diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e55c5b2f..704b3bc60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,7 +98,7 @@ endif() if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") message(STATUS "Debug build") - add_flags("-DDEBUG_RCU") + add_flags("-DDEBUG_RCU -D_DEBUG") else() message(STATUS "Release build") if((${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") OR (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")) diff --git a/conanfile.py b/conanfile.py index 4bedf8360..db9fc4a7a 100644 --- a/conanfile.py +++ b/conanfile.py @@ -5,7 +5,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "4.7.1" + version = "4.8.1" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/include/homestore/btree/btree.hpp b/src/include/homestore/btree/btree.hpp index 63056c413..428de8aa9 100644 --- a/src/include/homestore/btree/btree.hpp +++ b/src/include/homestore/btree/btree.hpp @@ -31,10 +31,6 @@ SISL_LOGGING_DECL(btree) namespace homestore { -typedef std::function< bool(const BtreeKey&, const BtreeValue&, const BtreeRequest&) > on_kv_read_t; -typedef std::function< bool(const BtreeKey&, const BtreeValue&, const BtreeRequest&) > on_kv_remove_t; -typedef std::function< bool(const BtreeKey&, const BtreeKey&, const BtreeValue&, const BtreeRequest&) > on_kv_update_t; - using BtreeNodePtr = boost::intrusive_ptr< BtreeNode >; struct BtreeThreadVariables { @@ -57,11 +53,6 @@ class Btree { std::atomic< uint64_t > m_req_id{0}; #endif - // Optional callback on various read or kv operations - on_kv_read_t m_on_read_cb{nullptr}; - on_kv_update_t m_on_update_cb{nullptr}; - on_kv_remove_t m_on_remove_cb{nullptr}; - // This workaround of BtreeThreadVariables is needed instead of directly declaring statics // to overcome the gcc bug, pointer here: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66944 static BtreeThreadVariables* bt_thread_vars() { @@ -72,13 +63,14 @@ class Btree { return fiber_map[this_id].get(); } + static bool is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info); + protected: BtreeConfig m_bt_cfg; public: /////////////////////////////////////// All External APIs ///////////////////////////// - Btree(const BtreeConfig& cfg, on_kv_read_t&& read_cb = nullptr, on_kv_update_t&& update_cb = nullptr, - on_kv_remove_t&& remove_cb = nullptr); + Btree(const BtreeConfig& cfg); virtual ~Btree(); virtual btree_status_t init(void* op_context); @@ -174,10 +166,6 @@ class Btree { void validate_sanity_child(const BtreeNodePtr& parent_node, uint32_t ind) const; void validate_sanity_next_child(const BtreeNodePtr& parent_node, uint32_t ind) const; void print_node(const bnodeid_t& bnodeid) const; - bool call_on_read_kv_cb(const BtreeNodePtr& node, uint32_t idx, const BtreeRequest& req) const; - bool call_on_remove_kv_cb(const BtreeNodePtr& node, uint32_t idx, const BtreeRequest& req) const; - bool call_on_update_kv_cb(const BtreeNodePtr& node, uint32_t idx, const BtreeKey& new_key, - const BtreeRequest& req) const; void append_route_trace(BtreeRequest& req, const BtreeNodePtr& node, btree_event_t event, uint32_t start_idx = 0, uint32_t end_idx = 0) const; @@ -194,10 +182,10 @@ class Btree { btree_status_t check_split_root(ReqT& req); template < typename ReqT > - bool is_split_needed(const BtreeNodePtr& node, const BtreeConfig& cfg, ReqT& req) const; + bool is_split_needed(const BtreeNodePtr& node, ReqT& req) const; btree_status_t split_node(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node, uint32_t parent_ind, - BtreeKey* out_split_key, void* context); + K* out_split_key, void* context); btree_status_t mutate_extents_in_leaf(const BtreeNodePtr& my_node, BtreeRangePutRequest< K >& rpreq); btree_status_t repair_split(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node1, uint32_t parent_split_idx, void* context); diff --git a/src/include/homestore/btree/btree.ipp b/src/include/homestore/btree/btree.ipp index 5ebe3e235..bb4018e05 100644 --- a/src/include/homestore/btree/btree.ipp +++ b/src/include/homestore/btree/btree.ipp @@ -36,14 +36,8 @@ namespace homestore { template < typename K, typename V > -Btree< K, V >::Btree(const BtreeConfig& cfg, on_kv_read_t&& read_cb, on_kv_update_t&& update_cb, - on_kv_remove_t&& remove_cb) : - m_metrics{cfg.name().c_str()}, - m_node_size{cfg.node_size()}, - m_on_read_cb{std::move(read_cb)}, - m_on_update_cb{std::move(update_cb)}, - m_on_remove_cb{std::move(remove_cb)}, - m_bt_cfg{cfg} { +Btree< K, V >::Btree(const BtreeConfig& cfg) : + m_metrics{cfg.name().c_str()}, m_node_size{cfg.node_size()}, m_bt_cfg{cfg} { m_bt_cfg.set_node_data_size(cfg.node_size() - sizeof(persistent_hdr_t)); } @@ -105,7 +99,7 @@ retry: if (ret != btree_status_t::success) { goto out; } is_leaf = root->is_leaf(); - if (is_split_needed(root, m_bt_cfg, put_req)) { + if (is_split_needed(root, put_req)) { // Time to do the split of root. unlock_node(root, acq_lock); m_btree_lock.unlock_shared(); @@ -143,8 +137,7 @@ out: #ifndef NDEBUG check_lock_debug(); #endif - if (ret != btree_status_t::success && ret != btree_status_t::fast_path_not_possible && - ret != btree_status_t::cp_mismatch) { + if (ret != btree_status_t::success && ret != btree_status_t::cp_mismatch) { BT_LOG(ERROR, "btree put failed {}", ret); COUNTER_INCREMENT(m_metrics, write_err_cnt, 1); } @@ -267,9 +260,9 @@ btree_status_t Btree< K, V >::query(BtreeQueryRequest< K >& qreq, std::vector< s if ((qreq.query_type() == BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY || qreq.query_type() == BtreeQueryType::TREE_TRAVERSAL_QUERY)) { if (out_values.size()) { - K& out_last_key = out_values.back().first; - qreq.set_cursor_key(out_last_key); + K out_last_key = out_values.back().first; if (out_last_key.compare(qreq.input_range().end_key()) >= 0) { ret = btree_status_t::success; } + qreq.shift_working_range(std::move(out_last_key), false /* non inclusive*/); } else { DEBUG_ASSERT_NE(ret, btree_status_t::has_more, "Query returned has_more, but no values added") } @@ -280,8 +273,7 @@ out: #ifndef NDEBUG check_lock_debug(); #endif - if (ret != btree_status_t::success && ret != btree_status_t::has_more && - ret != btree_status_t::fast_path_not_possible) { + if ((ret != btree_status_t::success) && (ret != btree_status_t::has_more)) { BT_LOG(ERROR, "btree query failed {}", ret); COUNTER_INCREMENT(m_metrics, query_err_cnt, 1); } @@ -354,11 +346,17 @@ template < typename K, typename V > bnodeid_t Btree< K, V >::root_node_id() const { return m_root_node_info.bnode_id(); } + template < typename K, typename V > uint64_t Btree< K, V >::root_link_version() const { return m_root_node_info.link_version(); } +template < typename K, typename V > +bool Btree< K, V >::is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info) { + return child_info.link_version() != child_node->link_version(); +} + // TODO: Commenting out flip till we figure out how to move flip dependency inside sisl package. #if 0 #ifdef _PRERELEASE diff --git a/src/include/homestore/btree/btree_kv.hpp b/src/include/homestore/btree/btree_kv.hpp index d54db46fd..18dd832a8 100644 --- a/src/include/homestore/btree/btree_kv.hpp +++ b/src/include/homestore/btree/btree_kv.hpp @@ -31,11 +31,10 @@ ENUM(MultiMatchOption, uint16_t, ) ENUM(btree_put_type, uint16_t, - INSERT_ONLY_IF_NOT_EXISTS, // Insert - REPLACE_ONLY_IF_EXISTS, // Update - REPLACE_IF_EXISTS_ELSE_INSERT, // Upsert - APPEND_ONLY_IF_EXISTS, // Update - APPEND_IF_EXISTS_ELSE_INSERT) + INSERT, // Insert only if it doesn't exist + UPDATE, // Update only if it exists + UPSERT // Update if exists, insert otherwise +) // The base class, btree library expects its key to be derived from class BtreeKey { @@ -45,23 +44,33 @@ class BtreeKey { // Deleting copy constructor forces the derived class to define its own copy constructor // BtreeKey(const BtreeKey& other) = delete; // BtreeKey(const sisl::blob& b) = delete; - BtreeKey(const BtreeKey& other) = default; + BtreeKey(BtreeKey const& other) = default; virtual ~BtreeKey() = default; - virtual BtreeKey& operator=(const BtreeKey& other) { - clone(other); - return *this; - }; - - virtual void clone(const BtreeKey& other) = 0; - virtual int compare(const BtreeKey& other) const = 0; + virtual int compare(BtreeKey const& other) const = 0; virtual sisl::blob serialize() const = 0; virtual uint32_t serialized_size() const = 0; - virtual void deserialize(const sisl::blob& b, bool copy) = 0; + virtual void deserialize(sisl::blob const& b, bool copy) = 0; virtual std::string to_string() const = 0; - virtual bool is_extent_key() const { return false; } + virtual bool is_interval_key() const { return false; } +}; + +// An extension of BtreeKey where each key is part of an interval range. Keys are not neccessarily only needs to be +// integers, but it needs to be able to get next or prev key from a given key in the key range +class BtreeIntervalKey : public BtreeKey { +public: + virtual void shift(int n) = 0; + virtual int distance(BtreeKey const& from) const = 0; + bool is_interval_key() const override { return true; } + + virtual sisl::blob serialize_prefix() const = 0; + virtual sisl::blob serialize_suffix() const = 0; + + virtual uint32_t serialized_prefix_size() const = 0; + virtual uint32_t serialized_suffix_size() const = 0; + virtual void deserialize(sisl::blob const& prefix, sisl::blob const& suffix, bool copy) = 0; }; template < typename K > @@ -69,13 +78,9 @@ class BtreeTraversalState; template < typename K > class BtreeKeyRange { -private: - K m_actual_start_key; - K m_actual_end_key; - public: - K* m_input_start_key{&m_actual_start_key}; - K* m_input_end_key{&m_actual_end_key}; + K m_start_key; + K m_end_key; bool m_start_incl{true}; bool m_end_incl{true}; MultiMatchOption m_multi_selector{MultiMatchOption::DO_NOT_CARE}; @@ -85,68 +90,35 @@ class BtreeKeyRange { public: BtreeKeyRange() = default; - BtreeKeyRange(const K& start_key, bool start_incl = true) : - m_actual_start_key{start_key}, - m_input_start_key{&m_actual_start_key}, - m_input_end_key{&m_actual_start_key}, - m_start_incl{start_incl}, - m_end_incl{true}, - m_multi_selector{MultiMatchOption::DO_NOT_CARE} {} - BtreeKeyRange(const K& start_key, bool start_incl, const K& end_key, bool end_incl = true, MultiMatchOption option = MultiMatchOption::DO_NOT_CARE) : - m_actual_start_key{start_key}, - m_actual_end_key{end_key}, - m_input_start_key{&m_actual_start_key}, - m_input_end_key{&m_actual_end_key}, + m_start_key{start_key}, + m_end_key{end_key}, m_start_incl{start_incl}, m_end_incl{end_incl}, m_multi_selector{option} {} BtreeKeyRange(const K& start_key, const K& end_key) : BtreeKeyRange(start_key, true, end_key, true) {} - BtreeKeyRange(const BtreeKeyRange& other) { copy(other); } - BtreeKeyRange(BtreeKeyRange&& other) { do_move(std::move(other)); } - BtreeKeyRange& operator=(const BtreeKeyRange< K >& other) { - this->copy(other); - return *this; - } - BtreeKeyRange& operator=(BtreeKeyRange< K >&& other) { - this->do_move(std::move(other)); - return *this; - } - - void copy(const BtreeKeyRange< K >& other) { - m_actual_start_key = other.m_actual_start_key; - m_actual_end_key = other.m_actual_end_key; - m_input_start_key = &m_actual_start_key; - m_input_end_key = - (other.m_input_end_key == &other.m_actual_start_key) ? &m_actual_start_key : &m_actual_end_key; - m_start_incl = other.m_start_incl; - m_end_incl = other.m_end_incl; - m_multi_selector = other.m_multi_selector; - } - - void do_move(BtreeKeyRange< K >&& other) { - m_input_start_key = &m_actual_start_key; - m_input_end_key = - (other.m_input_end_key == &other.m_actual_start_key) ? &m_actual_start_key : &m_actual_end_key; - m_actual_start_key = std::move(other.m_actual_start_key); - m_actual_end_key = std::move(other.m_actual_end_key); - m_start_incl = std::move(other.m_start_incl); - m_end_incl = std::move(other.m_end_incl); - m_multi_selector = std::move(other.m_multi_selector); - } + BtreeKeyRange(const BtreeKeyRange& other) = default; + BtreeKeyRange(BtreeKeyRange&& other) = default; + BtreeKeyRange& operator=(const BtreeKeyRange< K >& other) = default; + BtreeKeyRange& operator=(BtreeKeyRange< K >&& other) = default; void set_multi_option(MultiMatchOption o) { m_multi_selector = o; } - const K& start_key() const { return *m_input_start_key; } - const K& end_key() const { return *m_input_end_key; } + const K& start_key() const { return m_start_key; } + const K& end_key() const { return m_end_key; } bool is_start_inclusive() const { return m_start_incl; } bool is_end_inclusive() const { return m_end_incl; } MultiMatchOption multi_option() const { return m_multi_selector; } + void set_start_key(K&& key, bool incl) { + m_start_key = std::move(key); + m_start_incl = incl; + } + void set_end_key(K&& key, bool incl) { - m_actual_end_key = std::move(key); + m_end_key = std::move(key); m_end_incl = incl; } @@ -154,52 +126,6 @@ class BtreeKeyRange { return fmt::format("{}{}-{}{}", is_start_inclusive() ? '[' : '(', start_key().to_string(), end_key().to_string(), is_end_inclusive() ? ']' : ')'); } - -private: - const K& actual_start_key() const { return m_actual_start_key; } - const K& actual_end_key() const { return m_actual_end_key; } -}; - -/* - * This type is for keys which is range in itself i.e each key is having its own - * start() and end(). - */ -template < typename K > -class ExtentBtreeKey : public BtreeKey { -public: - ExtentBtreeKey() = default; - virtual ~ExtentBtreeKey() = default; - virtual bool is_extent_key() const { return true; } - - // Provide the length of the extent key, which is end - start + 1 - virtual uint32_t extent_length() const = 0; - - // Get the distance between the start of this key and start of other key. It returns equivalent of - // (other.start - this->start + 1) - virtual int64_t distance_start(const ExtentBtreeKey< K >& other) const = 0; - - // Get the distance between the end of this key and end of other key. It returns equivalent of - // (other.end - this->end + 1) - virtual int64_t distance_end(const ExtentBtreeKey< K >& other) const = 0; - - // Get the distance between the start of this key and end of other key. It returns equivalent of - // (other.end - this->start + 1) - virtual int64_t distance(const ExtentBtreeKey< K >& other) const = 0; - - // Extract a new extent key from the given offset upto this length from this key and optionally do a deep copy - virtual K extract(uint32_t offset, uint32_t length, bool copy) const = 0; - - // Merge this extent btree key with other extent btree key and return a new key - virtual K combine(const ExtentBtreeKey< K >& other) const = 0; - - // TODO: Evaluate if we need these 3 methods or we can manage with other methods - virtual int compare_start(const BtreeKey& other) const = 0; - virtual int compare_end(const BtreeKey& other) const = 0; - - /* we always compare the end key in case of extent */ - virtual int compare(const BtreeKey& other) const override { return (compare_end(other)); } - - K extract_end(bool copy) const { return extract(extent_length() - 1, 1, copy); } }; class BtreeValue { @@ -207,9 +133,6 @@ class BtreeValue { BtreeValue() = default; virtual ~BtreeValue() = default; - // Deleting copy constructor forces the derived class to define its own copy constructor - BtreeValue(const BtreeValue& other) = delete; - virtual sisl::blob serialize() const = 0; virtual uint32_t serialized_size() const = 0; virtual void deserialize(const sisl::blob& b, bool copy) = 0; @@ -217,30 +140,16 @@ class BtreeValue { virtual std::string to_string() const { return ""; } }; -template < typename V > -class ExtentBtreeValue : public BtreeValue { +class BtreeIntervalValue : public BtreeValue { public: - virtual ~ExtentBtreeValue() = default; - - // Extract a new extent value from the given offset upto this length from this value and optionally do a deep copy - virtual V extract(uint32_t offset, uint32_t length, bool copy) const = 0; + virtual void shift(int n) = 0; - // Returns the returns the serialized size if we were to extract other value from offset upto length - // This method is equivalent to: extract(offset, length, false).serialized_size() - // However, this method provides values to directly compute the extracted size without extracting - which is more - // efficient. - virtual uint32_t extracted_size(uint32_t offset, uint32_t length) const = 0; + virtual sisl::blob serialize_prefix() const = 0; + virtual sisl::blob serialize_suffix() const = 0; - // This method is similar to extract(0, length) along with moving the current values start to length. So for example - // if value has 0-100 and if shift(80) is called, this method returns a value from 0-79 and moves the start offset - // of current value to 80. - virtual V shift(uint32_t length, bool copy) = 0; - - // Given the length, report back how many extents from the current value can fit. - virtual uint32_t num_extents_fit(uint32_t length) const = 0; - - // Returns if every piece of extents are equally sized. - virtual bool is_equal_sized() const = 0; + virtual uint32_t serialized_prefix_size() const = 0; + virtual uint32_t serialized_suffix_size() const = 0; + virtual void deserialize(sisl::blob const& prefix, sisl::blob const& suffix, bool copy) = 0; }; struct BtreeLockTracker; @@ -262,53 +171,61 @@ class BtreeTraversalState { protected: const BtreeKeyRange< K > m_input_range; BtreeKeyRange< K > m_working_range; - BtreeKeyRange< K > m_next_range; - std::unique_ptr< BtreeQueryCursor< K > > m_cursor; + bool m_trimmed{false}; // Keep track of trimmed, so that a shift doesn't do unwanted copy of input_range + bool m_exhausted{false}; // The entire working range is exhausted public: - BtreeTraversalState(BtreeKeyRange< K >&& inp_range, bool paginated_query = false) : - m_input_range{std::move(inp_range)}, m_working_range{m_input_range} { - if (paginated_query) { m_cursor = std::make_unique< BtreeQueryCursor< K > >(); } - } + BtreeTraversalState(BtreeKeyRange< K >&& inp_range) : + m_input_range{std::move(inp_range)}, m_working_range{m_input_range} {} BtreeTraversalState(const BtreeTraversalState& other) = default; BtreeTraversalState(BtreeTraversalState&& other) = default; - const BtreeQueryCursor< K >* const_cursor() const { return m_cursor.get(); } - BtreeQueryCursor< K >* cursor() { return m_cursor.get(); } - bool is_cursor_valid() const { return (m_cursor != nullptr); } - - void set_cursor_key(const K& end_key) { - // no need to set cursor as user doesn't want to keep track of it - if (!m_cursor) { return; } - m_cursor->m_last_key = std::make_unique< K >(end_key); - } - const BtreeKeyRange< K >& input_range() const { return m_input_range; } - const BtreeKeyRange< K >& working_range() const { return m_working_range; } + const BtreeKeyRange< K >& working_range() const { + DEBUG_ASSERT_EQ(m_exhausted, false, "requested for working range on an exhausted traversal state"); + return m_working_range; + } // Returns the mutable reference to the end key, which caller can update it to trim down the end key - void trim_working_range(K&& end_key, bool end_incl) { m_working_range.set_end_key(std::move(end_key), end_incl); } - - const K& next_key() const { - return (m_cursor && m_cursor->m_last_key) ? *m_cursor->m_last_key : m_input_range.start_key(); + void trim_working_range(K&& end_key, bool end_incl) { + m_working_range.set_end_key(std::move(end_key), end_incl); + m_trimmed = true; } - const BtreeKeyRange< K >& next_range() { - if (m_cursor && m_cursor->m_last_key) { - m_next_range = BtreeKeyRange< K >(*m_cursor->m_last_key, false, m_input_range.end_key(), is_end_inclusive(), - m_input_range.multi_option()); - return m_next_range; + // Shift the working range start to previous working range end_key + void shift_working_range() { + if (m_trimmed) { + m_working_range.set_start_key(std::move(m_working_range.m_end_key), false); + m_working_range.m_end_key = m_input_range.end_key(); + m_working_range.m_end_incl = m_input_range.is_end_inclusive(); + m_trimmed = false; } else { - return m_input_range; + m_exhausted = true; } } -private: - bool is_start_inclusive() const { - // cursor always have the last key not included - return (m_cursor && m_cursor->m_last_key) ? false : m_input_range.is_start_inclusive(); + // Shift the working range start to specific end key + void shift_working_range(K&& start_key, bool start_incl) { + m_working_range.set_start_key(std::move(start_key), start_incl); + if (m_trimmed) { + m_working_range.m_end_key = m_input_range.end_key(); + m_working_range.m_end_incl = m_input_range.is_end_inclusive(); + m_trimmed = false; + } } + const K& first_key() const { return m_working_range.start_key(); } + + uint32_t first_key_size() const { + if (is_start_inclusive() || K::is_fixed_size()) { + return m_working_range.start_key().serialized_size(); + } else { + return K::get_max_size(); + } + } + +private: + bool is_start_inclusive() const { return m_input_range.is_start_inclusive(); } bool is_end_inclusive() const { return m_input_range.is_end_inclusive(); } }; diff --git a/src/include/homestore/btree/btree_req.hpp b/src/include/homestore/btree/btree_req.hpp index 6684c0be3..4e28dec8e 100644 --- a/src/include/homestore/btree/btree_req.hpp +++ b/src/include/homestore/btree/btree_req.hpp @@ -55,31 +55,24 @@ struct BtreeRangeRequest : public BtreeRequest { uint32_t batch_size() const { return m_batch_size; } void set_batch_size(uint32_t count) { m_batch_size = count; } - bool is_empty_cursor() const { - return ((m_search_state.const_cursor()->m_last_key == nullptr) && - (m_search_state.const_cursor()->m_locked_nodes == nullptr)); - } - BtreeTraversalState< K >& search_state() { return m_search_state; } - BtreeQueryCursor< K >* cursor() { return m_search_state.cursor(); } - const BtreeQueryCursor< K >* const_cursor() const { return m_search_state.const_cursor(); } - const BtreeKeyRange< K >& input_range() const { return m_search_state.input_range(); } - const BtreeKeyRange< K >& next_range() { return m_search_state.next_range(); } + void shift_working_range(K&& start_key, bool start_incl) { + m_search_state.shift_working_range(std::move(start_key), start_incl); + } + void shift_working_range() { m_search_state.shift_working_range(); } const BtreeKeyRange< K >& working_range() const { return m_search_state.working_range(); } - const K& next_key() const { return m_search_state.next_key(); } + const K& first_key() const { return m_search_state.first_key(); } + uint32_t first_key_size() const { return m_search_state.first_key_size(); } + void trim_working_range(K&& end_key, bool end_incl) { m_search_state.trim_working_range(std::move(end_key), end_incl); } - void set_cursor_key(const K& end_key) { return m_search_state.set_cursor_key(end_key); } protected: - BtreeRangeRequest(BtreeKeyRange< K >&& input_range, bool external_pagination = false, void* app_context = nullptr, - uint32_t batch_size = UINT32_MAX) : - BtreeRequest{app_context, nullptr}, - m_search_state{std::move(input_range), external_pagination}, - m_batch_size{batch_size} {} + BtreeRangeRequest(BtreeKeyRange< K >&& input_range, void* app_context = nullptr, uint32_t batch_size = UINT32_MAX) : + BtreeRequest{app_context, nullptr}, m_search_state{std::move(input_range)}, m_batch_size{batch_size} {} private: BtreeTraversalState< K > m_search_state; @@ -87,11 +80,14 @@ struct BtreeRangeRequest : public BtreeRequest { }; /////////////////////////// 1: Put Operations ///////////////////////////////////// +ENUM(put_filter_decision, uint8_t, keep, replace, remove); +using put_filter_cb_t = std::function< put_filter_decision(BtreeKey const&, BtreeValue const&, BtreeValue const&) >; + struct BtreeSinglePutRequest : public BtreeRequest { public: BtreeSinglePutRequest(const BtreeKey* k, const BtreeValue* v, btree_put_type put_type, - BtreeValue* existing_val = nullptr) : - m_k{k}, m_v{v}, m_put_type{put_type}, m_existing_val{existing_val} {} + BtreeValue* existing_val = nullptr, put_filter_cb_t filter_cb = nullptr) : + m_k{k}, m_v{v}, m_put_type{put_type}, m_existing_val{existing_val}, m_filter_cb{std::move(filter_cb)} {} const BtreeKey& key() const { return *m_k; } const BtreeValue& value() const { return *m_v; } @@ -100,19 +96,23 @@ struct BtreeSinglePutRequest : public BtreeRequest { const BtreeValue* m_v; const btree_put_type m_put_type; BtreeValue* m_existing_val; + put_filter_cb_t m_filter_cb; }; template < typename K > struct BtreeRangePutRequest : public BtreeRangeRequest< K > { public: BtreeRangePutRequest(BtreeKeyRange< K >&& inp_range, btree_put_type put_type, const BtreeValue* value, - void* app_context = nullptr, uint32_t batch_size = std::numeric_limits< uint32_t >::max()) : - BtreeRangeRequest< K >(std::move(inp_range), false, app_context, batch_size), + void* app_context = nullptr, uint32_t batch_size = std::numeric_limits< uint32_t >::max(), + put_filter_cb_t filter_cb = nullptr) : + BtreeRangeRequest< K >(std::move(inp_range), app_context, batch_size), m_put_type{put_type}, - m_newval{value} {} + m_newval{value}, + m_filter_cb{std::move(filter_cb)} {} - const btree_put_type m_put_type{btree_put_type::REPLACE_ONLY_IF_EXISTS}; + const btree_put_type m_put_type{btree_put_type::UPDATE}; const BtreeValue* m_newval; + put_filter_cb_t m_filter_cb; }; /////////////////////////// 2: Remove Operations ///////////////////////////////////// @@ -138,12 +138,18 @@ struct BtreeRemoveAnyRequest : public BtreeRequest { BtreeValue* m_outval; }; +using remove_filter_cb_t = std::function< bool(BtreeKey const&, BtreeValue const&) >; + template < typename K > struct BtreeRangeRemoveRequest : public BtreeRangeRequest< K > { +public: + remove_filter_cb_t m_filter_cb; + public: BtreeRangeRemoveRequest(BtreeKeyRange< K >&& inp_range, void* app_context = nullptr, - uint32_t batch_size = std::numeric_limits< uint32_t >::max()) : - BtreeRangeRequest< K >(std::move(inp_range), false, app_context, batch_size) {} + uint32_t batch_size = std::numeric_limits< uint32_t >::max(), + remove_filter_cb_t filter_cb = nullptr) : + BtreeRangeRequest< K >(std::move(inp_range), app_context, batch_size), m_filter_cb{std::move(filter_cb)} {} }; /////////////////////////// 3: Get Operations ///////////////////////////////////// @@ -191,21 +197,28 @@ ENUM(BtreeQueryType, uint8_t, // essentially create a serializable level of isolation. SERIALIZABLE_QUERY) +using get_filter_cb_t = std::function< bool(BtreeKey const&, BtreeValue const&) >; + template < typename K > struct BtreeQueryRequest : public BtreeRangeRequest< K > { public: BtreeQueryRequest(BtreeKeyRange< K >&& inp_range, BtreeQueryType query_type = BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, - uint32_t batch_size = UINT32_MAX, void* app_context = nullptr) : - BtreeRangeRequest< K >{std::move(inp_range), true, app_context, batch_size}, m_query_type{query_type} {} + uint32_t batch_size = UINT32_MAX, get_filter_cb_t filter_cb = nullptr, + void* app_context = nullptr) : + BtreeRangeRequest< K >{std::move(inp_range), app_context, batch_size}, + m_query_type{query_type}, + m_filter_cb{std::move(filter_cb)} {} ~BtreeQueryRequest() = default; // virtual bool is_serializable() const = 0; BtreeQueryType query_type() const { return m_query_type; } + get_filter_cb_t const& filter() const { return m_filter_cb; } + protected: - const BtreeQueryType m_query_type; // Type of the query - const std::unique_ptr< BtreeQueryCursor< K > > m_paginated_query; // Is it a paginated query + const BtreeQueryType m_query_type; // Type of the query + get_filter_cb_t m_filter_cb; }; /* This class is a top level class to keep track of the locks that are held currently. It is diff --git a/src/include/homestore/btree/detail/btree_common.ipp b/src/include/homestore/btree/detail/btree_common.ipp index bc03db7ba..44035d238 100644 --- a/src/include/homestore/btree/detail/btree_common.ipp +++ b/src/include/homestore/btree/detail/btree_common.ipp @@ -18,6 +18,8 @@ namespace homestore { +#define to_variant_node(n) boost::static_pointer_cast< VariantNode< K, V > >(n) + template < typename K, typename V > btree_status_t Btree< K, V >::post_order_traversal(locktype_t ltype, const auto& cb) { BtreeNodePtr root; @@ -258,37 +260,6 @@ done: BT_LOG(INFO, "Node: <{}>", buf); } -template < typename K, typename V > -bool Btree< K, V >::call_on_read_kv_cb(const BtreeNodePtr& node, uint32_t idx, const BtreeRequest& req) const { - if (m_on_read_cb) { - V v; - node->get_nth_value(idx, &v, false); - return m_on_read_cb(node->get_nth_key< K >(idx, false), v, req); - } - return true; -} - -template < typename K, typename V > -bool Btree< K, V >::call_on_remove_kv_cb(const BtreeNodePtr& node, uint32_t idx, const BtreeRequest& req) const { - if (m_on_remove_cb) { - V v; - node->get_nth_value(idx, &v, false); - return m_on_remove_cb(node->get_nth_key< K >(idx, false), v, req); - } - return true; -} - -template < typename K, typename V > -bool Btree< K, V >::call_on_update_kv_cb(const BtreeNodePtr& node, uint32_t idx, const BtreeKey& new_key, - const BtreeRequest& req) const { - if (m_on_update_cb) { - V v; - node->get_nth_value(idx, &v, false); - return m_on_update_cb(node->get_nth_key< K >(idx, false), new_key, v, req); - } - return true; -} - template < typename K, typename V > void Btree< K, V >::append_route_trace(BtreeRequest& req, const BtreeNodePtr& node, btree_event_t event, uint32_t start_idx, uint32_t end_idx) const { diff --git a/src/include/homestore/btree/detail/btree_get_impl.ipp b/src/include/homestore/btree/detail/btree_get_impl.ipp index f2081e22f..4f0c09732 100644 --- a/src/include/homestore/btree/detail/btree_get_impl.ipp +++ b/src/include/homestore/btree/detail/btree_get_impl.ipp @@ -26,11 +26,10 @@ btree_status_t Btree< K, V >::do_get(const BtreeNodePtr& my_node, ReqT& greq) co if (my_node->is_leaf()) { if constexpr (std::is_same_v< BtreeGetAnyRequest< K >, ReqT >) { - std::tie(found, idx) = my_node->get_any(greq.m_range, greq.m_outkey, greq.m_outval, true, true); - if (found) { call_on_read_kv_cb(my_node, idx, greq); } + std::tie(found, idx) = + to_variant_node(my_node)->get_any(greq.m_range, greq.m_outkey, greq.m_outval, true, true); } else if constexpr (std::is_same_v< BtreeSingleGetRequest, ReqT >) { std::tie(found, idx) = my_node->find(greq.key(), greq.m_outval, true); - if (found) { call_on_read_kv_cb(my_node, idx, greq); } } if (!found) { ret = btree_status_t::not_found; diff --git a/src/include/homestore/btree/detail/btree_internal.hpp b/src/include/homestore/btree/detail/btree_internal.hpp index 53ebc34e5..cec11deaf 100644 --- a/src/include/homestore/btree/detail/btree_internal.hpp +++ b/src/include/homestore/btree/detail/btree_internal.hpp @@ -201,9 +201,8 @@ VENUM(btree_node_type, uint32_t, FIXED = 0, VAR_VALUE = 1, VAR_KEY = 2, VAR_OBJE VENUM(btree_store_type, uint8_t, MEM = 0, SSD = 1) #endif -ENUM(btree_status_t, uint32_t, success, not_found, retry, has_more, read_failed, write_failed, stale_buf, - refresh_failed, put_failed, space_not_avail, split_failed, insert_failed, cp_mismatch, merge_not_required, - merge_failed, replay_not_needed, fast_path_not_possible, resource_full, crc_mismatch, not_supported, node_freed) +ENUM(btree_status_t, uint32_t, success, not_found, retry, has_more, node_read_failed, put_failed, space_not_avail, + cp_mismatch, merge_not_required, merge_failed, crc_mismatch, not_supported, node_freed) /*ENUM(btree_node_write_type, uint8_t, new_node, // Node write whenever a new node is created. @@ -300,7 +299,6 @@ class BtreeMetrics : public sisl::MetricsGroup { REGISTER_COUNTER(btree_int_node_count, "Btree Interior node count", "btree_node_count", {"node_type", "interior"}, _publish_as::publish_as_gauge); REGISTER_COUNTER(btree_split_count, "Total number of btree node splits"); - REGISTER_COUNTER(insert_failed_count, "Total number of inserts failed"); REGISTER_COUNTER(btree_merge_count, "Total number of btree node merges"); REGISTER_COUNTER(btree_depth, "Depth of btree", _publish_as::publish_as_gauge); @@ -316,7 +314,6 @@ class BtreeMetrics : public sisl::MetricsGroup { {"node_type", "leaf"}, HistogramBucketsType(LinearUpto128Buckets)); REGISTER_COUNTER(btree_retry_count, "number of retries"); REGISTER_COUNTER(write_err_cnt, "number of errors in write"); - REGISTER_COUNTER(split_failed, "split failed"); REGISTER_COUNTER(query_err_cnt, "number of errors in query"); REGISTER_COUNTER(read_node_count_in_write_ops, "number of nodes read in write_op"); REGISTER_COUNTER(read_node_count_in_query_ops, "number of nodes read in query_op"); diff --git a/src/include/homestore/btree/detail/btree_mutate_impl.ipp b/src/include/homestore/btree/detail/btree_mutate_impl.ipp index 7af504633..e5c6b832c 100644 --- a/src/include/homestore/btree/detail/btree_mutate_impl.ipp +++ b/src/include/homestore/btree/detail/btree_mutate_impl.ipp @@ -18,10 +18,6 @@ namespace homestore { -static bool is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info) { - return child_info.link_version() != child_node->link_version(); -} - /* This function does the heavy lifiting of co-ordinating inserts. It is a recursive function which walks * down the tree. * @@ -52,10 +48,10 @@ retry: uint32_t curr_idx; if constexpr (std::is_same_v< ReqT, BtreeRangePutRequest< K > >) { - const auto count = my_node->template get_all< K, V >(req.next_range(), UINT32_MAX, start_idx, end_idx); - if (count == 0) { - BT_NODE_LOG_ASSERT(false, my_node, "get_all returns 0 entries for interior node is not valid pattern"); - ret = btree_status_t::retry; + const auto matched = my_node->match_range(req.working_range(), start_idx, end_idx); + if (!matched) { + BT_NODE_LOG_ASSERT(false, my_node, "match_range returns 0 entries for interior node is not valid pattern"); + ret = btree_status_t::put_failed; goto out; } } else if constexpr (std::is_same_v< ReqT, BtreeSinglePutRequest >) { @@ -102,17 +98,7 @@ retry: // If the child and child_info link in the parent mismatch, we need to do btree repair, it might have // encountered a crash in-between the split or merge and only partial commit happened. - if (is_split_needed(child_node, m_bt_cfg, req) || is_repair_needed(child_node, child_info)) { - - // TODO remove the split_node retry logic and use key max size. - if (!my_node->can_accomodate(m_bt_cfg, K::get_estimate_max_size(), BtreeLinkInfo::get_fixed_size())) { - // Mark the parent_node itself to be split upon next retry. - bt_thread_vars()->force_split_node = my_node; - unlock_node(child_node, child_cur_lock); - ret = btree_status_t::retry; - goto out; - } - + if (is_split_needed(child_node, req) || is_repair_needed(child_node, child_info)) { ret = upgrade_node_locks(my_node, child_node, curlock, child_cur_lock, req.m_op_context); if (ret != btree_status_t::success) { BT_NODE_LOG(DEBUG, my_node, "Upgrade of node lock failed, retrying from root"); @@ -125,7 +111,6 @@ retry: if (is_repair_needed(child_node, child_info)) { BT_NODE_LOG(TRACE, child_node, "Node repair needed"); ret = repair_split(my_node, child_node, curr_idx, req.m_op_context); - } else { K split_key; BT_NODE_LOG(TRACE, my_node, "Split node needed"); @@ -148,11 +133,12 @@ retry: if (child_node->is_leaf()) { // We get the trimmed range only for leaf because this is where we will be inserting keys. In // interior nodes, keys are always propogated from the lower nodes. - bool is_inp_key_lesser = false; - K end_key = - my_node->min_of(s_cast< const K& >(req.input_range().end_key()), curr_idx, is_inp_key_lesser); - bool end_incl = is_inp_key_lesser ? req.input_range().is_end_inclusive() : true; - req.trim_working_range(std::move(end_key), end_incl); + if (curr_idx < my_node->total_entries()) { + K child_end_key = my_node->get_nth_key< K >(curr_idx, true); + if (child_end_key.compare(req.working_range().end_key()) < 0) { + req.trim_working_range(std::move(child_end_key), true /* inclusive child key */); + } + } BT_NODE_LOG(DEBUG, my_node, "Subrange:idx=[{}-{}],c={},working={}", start_idx, end_idx, curr_idx, req.working_range().to_string()); @@ -206,42 +192,17 @@ template < typename ReqT > btree_status_t Btree< K, V >::mutate_write_leaf_node(const BtreeNodePtr& my_node, ReqT& req) { btree_status_t ret = btree_status_t::success; if constexpr (std::is_same_v< ReqT, BtreeRangePutRequest< K > >) { - const BtreeKeyRange< K >& subrange = req.working_range(); - - if (subrange.start_key().is_extent_key()) { - ret = mutate_extents_in_leaf(my_node, req); - } else { - auto const [start_found, start_idx] = my_node->find(subrange.start_key(), nullptr, false); - auto const [end_found, end_idx] = my_node->find(subrange.end_key(), nullptr, false); - if (req.m_put_type != btree_put_type::REPLACE_ONLY_IF_EXISTS) { - BT_DBG_ASSERT(false, "For non-extent keys range-update should be really update and cannot insert"); - ret = btree_status_t::not_supported; - } else { - if (!end_found) { - if (end_idx == my_node->total_entries() || end_idx == start_idx) { - return btree_status_t::not_found; - } - K tail_key = my_node->get_nth_key< K >(end_idx, false); - if (tail_key.compare(subrange.end_key()) == 1) { return btree_status_t::not_found; } - } - if (!start_found && !end_found && end_idx >= start_idx) { return btree_status_t::not_found; } - if (end_idx < start_idx) { return btree_status_t::not_found; } - const auto new_val_size{(*req.m_newval).serialized_size()}; - V tmp_v; - for (auto idx{start_idx}; idx <= end_idx; ++idx) { - my_node->get_nth_value(idx, &tmp_v, false); - if (my_node->available_size(m_bt_cfg) + tmp_v.serialized_size() < new_val_size) { - req.set_cursor_key(my_node->get_nth_key< K >(idx, false)); - return btree_status_t::has_more; - } - my_node->update(idx, *req.m_newval); - } - } - // update cursor in intermediate search state - req.set_cursor_key(subrange.end_key()); + K last_failed_key; + ret = to_variant_node(my_node)->multi_put(req.working_range(), req.input_range().start_key(), *req.m_newval, + req.m_put_type, &last_failed_key, req.m_filter_cb); + if (ret == btree_status_t::has_more) { + req.shift_working_range(std::move(last_failed_key), true /* make it including last_failed_key */); + } else if (ret == btree_status_t::success) { + req.shift_working_range(); } } else if constexpr (std::is_same_v< ReqT, BtreeSinglePutRequest >) { - if (!my_node->put(req.key(), req.value(), req.m_put_type, req.m_existing_val)) { + if (!to_variant_node(my_node)->put(req.key(), req.value(), req.m_put_type, req.m_existing_val, + req.m_filter_cb)) { ret = btree_status_t::put_failed; } COUNTER_INCREMENT(m_metrics, btree_obj_count, 1); @@ -254,169 +215,6 @@ btree_status_t Btree< K, V >::mutate_write_leaf_node(const BtreeNodePtr& my_node return ret; } -template < typename K, typename V > -btree_status_t Btree< K, V >::mutate_extents_in_leaf(const BtreeNodePtr& node, BtreeRangePutRequest< K >& rpreq) { - if constexpr (std::is_base_of_v< ExtentBtreeKey< K >, K > && std::is_base_of_v< ExtentBtreeValue< V >, V >) { - const BtreeKeyRange< K >& subrange = rpreq.current_sub_range(); - const auto& start_key = static_cast< const ExtentBtreeKey< K >& >(subrange.start_key()); - const auto& end_key = static_cast< ExtentBtreeKey< K >& >(subrange.end_key()); - ExtentBtreeValue< V >* new_value = static_cast< ExtentBtreeValue< V >* >(rpreq.m_newval.get()); - btree_status_t ret{btree_status_t::success}; - - BT_DBG_ASSERT_EQ(start_key.extent_length(), 1, "Search range start key can't be multiple extents"); - BT_DBG_ASSERT_EQ(end_key.extent_length(), 1, "Search range end key can't be multiple extents"); - - if (!can_extents_auto_merge()) { - BT_REL_ASSERT(false, "Yet to support non-auto merge range of extents in range put"); - return btree_status_t::not_supported; - } - - bool retry{false}; - auto const [start_found, start_idx] = node->find(start_key, nullptr, false); - do { - auto const [end_found, end_idx] = node->find(end_key, nullptr, false); - ExtentBtreeKey const new_k = start_key.combine(end_key); - auto idx = start_idx; - - { // Scope this to avoid head_k and tail_k are used beyond - K h_k, t_k; - V h_v, t_v; - int64_t head_offset{0}; - int64_t tail_offset{0}; - ExtentBtreeKey< K >& head_k = static_cast< ExtentBtreeKey< K >& >(h_k); - ExtentBtreeKey< K >& tail_k = static_cast< ExtentBtreeKey< K >& >(t_k); - ExtentBtreeValue< V >& head_v = static_cast< ExtentBtreeValue< V >& >(h_v); - ExtentBtreeValue< V >& tail_v = static_cast< ExtentBtreeValue< V >& >(t_v); - - // Get the residue head and tail key first if it is present, before updating any fields, otherwise - // updating fields will modify the other entry. - if (start_found) { - head_k = node->get_nth_key< K >(start_idx, false); - head_offset = head_k.distance_start(start_key); - BT_NODE_DBG_ASSERT_GE(head_offset, 0, node, "Invalid start_key or head_k"); - if (head_offset > 0) { node->get_nth_value(start_idx, &head_v, false); } - } - if (end_found) { - tail_k = node->get_nth_key< K >(end_idx, false); - tail_offset = end_key.distance_end(tail_k); - BT_NODE_DBG_ASSERT_GE(tail_offset, 0, node, "Invalid end_key or tail_k"); - if (tail_offset > 0) { node->get_nth_value(end_idx, &tail_v, false); } - } - - // Shortcut to simple update of the existing range, which is a normal case. Its a simple update only - // if the value we are replacing is all equal sized for every extent piece (which is normal use - // cases of the extents) - if (start_found && end_found && (head_offset == 0) && (tail_offset == 0) && (start_idx == end_idx) && - new_value->is_equal_sized()) { - call_on_update_kv_cb(node, start_idx, new_k, rpreq); - node->update(start_idx, new_k, new_value->shift(new_k.extent_length(), false)); - break; - } - - // Do size check, first check if we can accomodate the keys if checked conservatively. Thats most - // common case and thus efficient. Next we go aggressively, the more aggressive the check, more - // performance impact. - // - // First level check: Try assuming the entire value + 2 keys + 2 records to be inserted. If there is - // a space available, no need any additional check. - auto const record_size = (2 * (new_k.serialized_size() + node->get_record_size())); - auto size_needed = new_value->extracted_size(0, new_k.extent_length()) + record_size; - - auto const available_space = node->available_size(m_bt_cfg); - if (size_needed > available_space) { - BT_NODE_DBG_ASSERT_EQ(retry, false, node, "Don't expect multiple attempts of size not available"); - - // Second level check: Take into account the head and tail overlapped space and see if it saves - // some - if (head_offset > 0) { - size_needed -= (head_v.serialized_size() - head_v.extracted_size(0, head_offset)); - } - if (tail_offset > 0) { size_needed -= tail_v.extracted_size(0, tail_offset); } - - if (size_needed > available_space) { - // Third level check: Walk through every entry in the about to remove list and account for - // theirs - V tmp_v; - for (auto i = start_idx; i < end_idx; ++i) { - node->get_nth_value(i, &tmp_v, false); - size_needed -= - (node->get_nth_key< K >(i, false).serialized_size() + tmp_v.serialized_size()); - } - - // If still size is not enough, no other option other than trimming down the keys and retry - if (size_needed > available_space) { - auto const nextents = new_value->num_extents_fit(available_space - record_size); - end_key = new_k.extract(0, nextents, true); - retry = true; - ret = btree_status_t::has_more; - continue; - } - } - } - retry = false; - - // Write partial head and tail kv. At this point we are committing and we can't go back and not - // update some of the extents. - if (end_idx == start_idx) { - // Special case - where there is a overlap and single entry is split into 3 - auto const tail_start = tail_k.extent_length() - tail_offset; - if (m_on_remove_cb) { - m_on_remove_cb(head_k.extract(head_offset, tail_start - head_offset, false), - head_v.extract(head_offset, tail_start - head_offset, false), rpreq); - } - - if (tail_offset > 0) { - node->insert(end_idx + 1, tail_k.extract(tail_start, tail_offset, false), - tail_v.extract(tail_start, tail_offset, false)); - COUNTER_INCREMENT(m_metrics, btree_obj_count, 1); - } - - if (head_offset > 0) { - node->update(idx++, head_k.extract(0, head_offset, false), - head_v.extract(0, head_offset, false)); - } - } else { - if (tail_offset > 0) { - auto const tail_start = tail_k.extent_length() - tail_offset; - auto const shrunk_k = tail_k.extract(tail_start, tail_offset, false); - call_on_update_kv_cb(node, end_idx, shrunk_k, rpreq); - node->update(end_idx, shrunk_k, tail_v.extract(tail_start, tail_offset, false)); - } else if (end_found) { - ++end_idx; - } - - if (head_offset > 0) { - auto const shrunk_k = head_k.extract(0, -head_offset, false); - call_on_update_kv_cb(node, idx, shrunk_k, rpreq); - node->update(idx++, shrunk_k, head_v.extract(0, -head_offset, false)); - } - } - } - - // Remove everything in-between - if (idx < end_idx) { - if (m_on_remove_cb) { - for (auto i{idx}; i <= end_idx; ++i) { - call_on_remove_kv_cb(node, i, rpreq); - } - } - node->remove(idx, end_idx - 1); - COUNTER_DECREMENT(m_metrics, btree_obj_count, end_idx - idx); - } - - // Now we should have enough room to insert the combined entry - node->insert(idx, new_k, new_value->shift(new_k.extent_length())); - COUNTER_INCREMENT(m_metrics, btree_obj_count, 1); - } while (retry); - - rpreq.set_cursor_key(end_key); - return ret; - } else { - BT_REL_ASSERT(false, "Don't expect mutate_extents to be called on non-extent code path"); - return btree_status_t::not_supported; - } -} - template < typename K, typename V > template < typename ReqT > btree_status_t Btree< K, V >::check_split_root(ReqT& req) { @@ -430,7 +228,7 @@ btree_status_t Btree< K, V >::check_split_root(ReqT& req) { ret = read_and_lock_node(m_root_node_info.bnode_id(), root, locktype_t::WRITE, locktype_t::WRITE, req.m_op_context); if (ret != btree_status_t::success) { goto done; } - if (!is_split_needed(root, m_bt_cfg, req) && !is_repair_needed(root, m_root_node_info)) { + if (!is_split_needed(root, req) && !is_repair_needed(root, m_root_node_info)) { unlock_node(root, locktype_t::WRITE); goto done; } @@ -482,7 +280,7 @@ done: template < typename K, typename V > btree_status_t Btree< K, V >::split_node(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node, - uint32_t parent_ind, BtreeKey* out_split_key, void* context) { + uint32_t parent_ind, K* out_split_key, void* context) { BtreeNodePtr child_node1 = child_node; BtreeNodePtr child_node2; child_node2.reset(child_node1->is_leaf() ? alloc_leaf_node().get() : alloc_interior_node().get()); @@ -494,7 +292,7 @@ btree_status_t Btree< K, V >::split_node(const BtreeNodePtr& parent_node, const child_node2->set_next_bnode(child_node1->next_bnode()); child_node1->set_next_bnode(child_node2->node_id()); child_node2->set_level(child_node1->level()); - uint32_t child1_filled_size = m_bt_cfg.node_data_size() - child_node1->available_size(m_bt_cfg); + uint32_t child1_filled_size = m_bt_cfg.node_data_size() - child_node1->available_size(); auto split_size = m_bt_cfg.split_size(child1_filled_size); uint32_t res = child_node1->move_out_to_right_by_size(m_bt_cfg, *child_node2, split_size); @@ -506,35 +304,13 @@ btree_status_t Btree< K, V >::split_node(const BtreeNodePtr& parent_node, const // Insert the last entry in first child to parent node *out_split_key = child_node1->get_last_key< K >(); - // In an unlikely case where parent node has no room to accomodate the child key, we need to un-split and then - // free up the new node. This situation could happen on variable key, where the key max size is purely - // an estimation. This logic allows the max size to be declared more optimistically than say 1/4 of node - // which will have substantially large number of splits and performance constraints. - if (!parent_node->can_accomodate(m_bt_cfg, out_split_key->serialized_size(), BtreeLinkInfo::get_fixed_size())) { - uint32_t move_in_res = child_node1->copy_by_entries(m_bt_cfg, *child_node2, 0, child_node2->total_entries()); - BT_NODE_REL_ASSERT_EQ(move_in_res, res, child_node1, - "The split key size is more than estimated parent available space, but when revert is " - "attempted it fails. Continuing can cause data loss, so crashing"); - free_node(child_node2, locktype_t::NONE, context); - - // Mark the parent_node itself to be split upon next retry. - bt_thread_vars()->force_split_node = parent_node; - return btree_status_t::retry; - } - BT_NODE_LOG(TRACE, parent_node, "Available space for split entry={}", parent_node->available_size(m_bt_cfg)); + BT_NODE_LOG(TRACE, parent_node, "Available space for split entry={}", parent_node->available_size()); child_node1->inc_link_version(); // Update the existing parent node entry to point to second child ptr. parent_node->update(parent_ind, child_node2->link_info()); - - // If key is extent then we always insert the tail portion of the extent key in the parent node - if (out_split_key->is_extent_key()) { - parent_node->insert(parent_ind, ((ExtentBtreeKey< K >*)out_split_key)->extract_end(false), - child_node1->link_info()); - } else { - parent_node->insert(parent_ind, *out_split_key, child_node1->link_info()); - } + parent_node->insert(parent_ind, *out_split_key, child_node1->link_info()); BT_NODE_DBG_ASSERT_GT(child_node2->get_first_key< K >().compare(*out_split_key), 0, child_node2); BT_NODE_LOG(DEBUG, parent_node, "Split child_node={} with new_child_node={}, split_key={}", child_node1->node_id(), @@ -551,34 +327,16 @@ btree_status_t Btree< K, V >::split_node(const BtreeNodePtr& parent_node, const template < typename K, typename V > template < typename ReqT > -bool Btree< K, V >::is_split_needed(const BtreeNodePtr& node, const BtreeConfig& cfg, ReqT& req) const { - if (bt_thread_vars()->force_split_node && (bt_thread_vars()->force_split_node == node)) { - bt_thread_vars()->force_split_node = nullptr; - return true; - } - - int64_t size_needed = 0; +bool Btree< K, V >::is_split_needed(const BtreeNodePtr& node, ReqT& req) const { if (!node->is_leaf()) { // if internal node, size is atmost one additional entry, size of K/V - size_needed = K::get_estimate_max_size() + BtreeLinkInfo::get_fixed_size() + node->get_record_size(); + return !node->has_room_for_put(btree_put_type::UPSERT, K::get_max_size(), BtreeLinkInfo::get_fixed_size()); } else if constexpr (std::is_same_v< ReqT, BtreeRangePutRequest< K > >) { - const BtreeKey& next_key = req.next_key(); - - if (next_key.is_extent_key()) { - // For extent keys we expect to write atleast first value in the req along with 2 possible keys - // in case of splitting existing key - auto val = static_cast< const ExtentBtreeValue< V >* >(req.m_newval); - size_needed = val->extracted_size(0, 1) + 2 * (next_key.serialized_size() + node->get_record_size()); - } else { - size_needed = req.m_newval->serialized_size(); - if (req.m_put_type != btree_put_type::REPLACE_ONLY_IF_EXISTS) { - size_needed += next_key.serialized_size() + node->get_record_size(); - } - } + return !node->has_room_for_put(req.m_put_type, req.first_key_size(), req.m_newval->serialized_size()); } else if constexpr (std::is_same_v< ReqT, BtreeSinglePutRequest >) { - size_needed = req.key().serialized_size() + req.value().serialized_size() + node->get_record_size(); + return !node->has_room_for_put(req.m_put_type, req.key().serialized_size(), req.value().serialized_size()); + } else { + return false; } - int64_t alreadyFilledSize = cfg.node_data_size() - node->available_size(cfg); - return (alreadyFilledSize + size_needed >= cfg.ideal_fill_size()); } template < typename K, typename V > @@ -588,54 +346,4 @@ btree_status_t Btree< K, V >::repair_split(const BtreeNodePtr& parent_node, cons parent_node->insert(parent_split_idx, child_node1->get_last_key< K >(), child_node1->link_info()); return write_node(parent_node, context); } - -#if 0 -template < typename K, typename V > -int64_t Btree< K, V >::compute_single_put_needed_size(const V& current_val, const V& new_val) const { - return new_val.serialized_size() - current_val.serialized_size(); -} - -template < typename K, typename V > -int64_t Btree< K, V >::compute_range_put_needed_size(const std::vector< std::pair< K, V > >& existing_kvs, - const V& new_val) const { - return new_val.serialized_size() * existing_kvs.size(); -} - -template < typename K, typename V > -btree_status_t -Btree< K, V >::custom_kv_select_for_write(uint8_t node_version, const std::vector< std::pair< K, V > >& match_kv, - std::vector< std::pair< K, V > >& replace_kv, const BtreeKeyRange& range, - const BtreeRangePutRequest& rpreq) const { - for (const auto& [k, v] : match_kv) { - replace_kv.push_back(std::make_pair(k, (V&)rpreq.m_newval)); - } - return btree_status_t::success; -} -#endif - -#if 0 -template < typename K, typename V > -btree_status_t Btree< K, V >::get_start_and_end_idx(const BtreeNodePtr& node, BtreeMutateRequest& req, - int& start_idx, int& end_idx) { - btree_status_t ret = btree_status_t::success; - if (is_range_put_req(req)) { - /* just get start/end index from get_all. We don't release the parent lock until this - * key range is not inserted from start_idx to end_idx. - */ - node->template get_all< V >(to_range_put_req(req).next_range(), UINT32_MAX, (uint32_t&)start_idx, - (uint32_t&)end_idx); - } else { - auto [found, idx] = node->find(to_single_put_req(req).key(), nullptr, true); - ASSERT_IS_VALID_INTERIOR_CHILD_INDX(found, idx, node); - end_idx = start_idx = (int)idx; - } - - if (start_idx > end_idx) { - BT_NODE_LOG_ASSERT(false, node, "start ind {} greater than end ind {}", start_idx, end_idx); - ret = btree_status_t::retry; - } - return ret; -} -#endif - } // namespace homestore diff --git a/src/include/homestore/btree/detail/btree_node.hpp b/src/include/homestore/btree/detail/btree_node.hpp index 509d97234..8f713e534 100644 --- a/src/include/homestore/btree/detail/btree_node.hpp +++ b/src/include/homestore/btree/detail/btree_node.hpp @@ -54,8 +54,7 @@ struct persistent_hdr_t { bnodeid_t node_id{empty_bnodeid}; bnodeid_t next_node{empty_bnodeid}; - uint32_t nentries : 27; - uint32_t node_type : 3; + uint32_t nentries : 30; uint32_t leaf : 1; uint32_t valid_node : 1; @@ -63,9 +62,11 @@ struct persistent_hdr_t { uint64_t link_version{0}; // Version of the link between its parent, updated if structure changes BtreeLinkInfo::bnode_link_info edge_info; // Edge entry information - uint16_t level; // Level of the node within the tree - uint16_t reserved1; - uint32_t reserved2; + uint16_t level; // Level of the node within the tree + uint8_t node_type; // Type of the node (simple vs varlen etc..) + uint8_t reserved1; + uint16_t node_size; + uint16_t reserved2; persistent_hdr_t() : nentries{0}, leaf{0}, valid_node{1} {} std::string to_string() const { @@ -78,7 +79,7 @@ struct persistent_hdr_t { #pragma pack() class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { - typedef std::pair< bool, uint32_t > node_find_result_t; + using node_find_result_t = std::pair< bool, uint32_t >; public: sisl::atomic_counter< int32_t > m_refcount{0}; @@ -86,12 +87,13 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { uint8_t* m_phys_node_buf; public: - ~BtreeNode() = default; - BtreeNode(uint8_t* node_buf, bnodeid_t id, bool init_buf, bool is_leaf) : m_phys_node_buf{node_buf} { + BtreeNode(uint8_t* node_buf, bnodeid_t id, bool init_buf, bool is_leaf, BtreeConfig const& cfg) : + m_phys_node_buf{node_buf} { if (init_buf) { new (node_buf) persistent_hdr_t{}; set_node_id(id); set_leaf(is_leaf); + set_node_size(cfg.node_size()); } else { DEBUG_ASSERT_EQ(node_id(), id); DEBUG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC); @@ -99,11 +101,26 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { } m_trans_hdr.is_leaf_node = is_leaf; } + virtual ~BtreeNode() = default; // Identify if a node is a leaf node or not, from raw buffer, by just reading persistent_hdr_t static bool identify_leaf_node(uint8_t* buf) { return (r_cast< persistent_hdr_t* >(buf))->leaf; } - node_find_result_t find(const BtreeKey& key, BtreeValue* outval, bool copy_val) const { + /// @brief Finds the index of the entry with the specified key in the node. + /// + /// This method performs a binary search on the node to find the index of the entry with the specified key. + /// If the key is not found in the node, the method returns the index of the first entry greater than the key. + /// + /// @param key The key to search for. + /// @param outval [optional] A pointer to a BtreeValue object to store the value associated with the key. + /// @param copy_val If outval is non-null, is the value deserialized from node needs to be copy of the btree + /// internal buffer. Safest option is to set this true, it is ok to set it false, if find() is called and value is + /// accessed and used before subsequent node modification. + /// @return A pair of values representing the result of the search. + /// The first value is a boolean indicating whether the key was found in the node. + /// The second value is an integer representing the index of the entry with the specified key or the index + /// of the first entry greater than the key. + node_find_result_t find(BtreeKey const& key, BtreeValue* outval, bool copy_val) const { LOGMSG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC, "Magic mismatch on btree_node {}", get_persistent_header_const()->to_string()); @@ -120,134 +137,42 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { return std::make_pair(found, idx); } - template < typename K, typename V > - uint32_t get_all(const BtreeKeyRange< K >& range, uint32_t max_count, uint32_t& start_idx, uint32_t& end_idx, - std::vector< std::pair< K, V > >* out_values = nullptr) const { - LOGMSG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC, "Magic mismatch on btree_node {}", - get_persistent_header_const()->to_string()); - auto count = 0U; - bool sfound, efound; - // Get the start index of the search range. - std::tie(sfound, start_idx) = bsearch_node(range.start_key()); - if (sfound && !range.is_start_inclusive()) { - ++start_idx; - sfound = false; - } - if (start_idx == total_entries()) { - end_idx = start_idx; - if (is_leaf() || !has_valid_edge()) { - return 0; // No result found - } else { - goto out; - } - } - - std::tie(efound, end_idx) = bsearch_node(range.end_key()); - if (efound && !range.is_end_inclusive()) { - if (end_idx == 0) { return 0; } - --end_idx; - efound = false; - } - - // If we point to same start and end without any match, it is hitting unavailable range - if ((start_idx == end_idx) && is_leaf() && !sfound && !efound) { return 0; } - - if (end_idx == total_entries()) { - DEBUG_ASSERT_GT(end_idx, 0); // At this point end_idx should never have been zero - if (!has_valid_edge()) { --end_idx; } - } - - out: - count = std::min(end_idx - start_idx + 1, max_count); - if (out_values) { - /* get the keys and values */ - for (auto i{start_idx}; i < (start_idx + count); ++i) { - add_nth_obj_to_list< K, V >(i, out_values, true); - } - } - return count; - } - template < typename K > - std::pair< bool, uint32_t > get_any(const BtreeKeyRange< K >& range, BtreeKey* out_key, BtreeValue* out_val, - bool copy_key, bool copy_val) const { + bool match_range(BtreeKeyRange< K > const& range, uint32_t& start_idx, uint32_t& end_idx) const { LOGMSG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC, "Magic mismatch on btree_node {}", get_persistent_header_const()->to_string()); - uint32_t result_idx; - const auto mm_opt = range.multi_option(); - bool efound; - uint32_t end_idx; + bool sfound, efound; // Get the start index of the search range. - auto [sfound, start_idx] = bsearch_node(range.start_key()); + std::tie(sfound, start_idx) = this->bsearch_node(range.start_key()); if (sfound && !range.is_start_inclusive()) { ++start_idx; sfound = false; } - if (sfound && ((mm_opt == MultiMatchOption::DO_NOT_CARE) || (mm_opt == MultiMatchOption::LEFT_MOST))) { - result_idx = start_idx; - goto found_result; - } else if (start_idx == total_entries()) { - DEBUG_ASSERT(is_leaf() || has_valid_edge(), "Invalid node"); - return std::make_pair(false, 0); // out_of_range - } - - std::tie(efound, end_idx) = bsearch_node(range.end_key()); - if (efound && !range.is_end_inclusive()) { - if (end_idx == 0) { return std::make_pair(false, 0); } - --end_idx; - efound = false; + if (start_idx == this->total_entries()) { + // We are already at the end of search, we should return this as the only entry + end_idx = start_idx; + return (!is_leaf() && this->has_valid_edge()); // No result found unless its a edge node } - if (end_idx > start_idx) { - if (mm_opt == MultiMatchOption::RIGHT_MOST) { - result_idx = end_idx; - } else if (mm_opt == MultiMatchOption::MID) { - result_idx = (end_idx - start_idx) / 2; - } else { - result_idx = start_idx; + // Get the end index of the search range. + std::tie(efound, end_idx) = this->bsearch_node(range.end_key()); + if (is_leaf() || ((end_idx == this->total_entries()) && !has_valid_edge())) { + // Binary search will always return the index as the first key that is >= given key (end_key in this + // case). Our goal here in leaf node is to find the last key that is less than in case of non_inclusive + // search or less than or equal in case of inclusive search. + if (!efound || !range.is_end_inclusive()) { + // If we are already on the first key, then obviously nothing has been matched. + if (end_idx == 0) { return false; } + --end_idx; } - } else if ((start_idx == end_idx) && ((sfound || efound))) { - result_idx = start_idx; - } else { - return std::make_pair(false, 0); - } - - found_result: - if (out_key) { get_nth_key_internal(result_idx, *out_key, copy_key); } - if (out_val) { get_nth_value(result_idx, out_val, copy_val); } - return std::make_pair(true, result_idx); - } - - bool put(const BtreeKey& key, const BtreeValue& val, btree_put_type put_type, BtreeValue* existing_val) { - LOGMSG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC, "Magic mismatch on btree_node {}", - get_persistent_header_const()->to_string()); - bool ret = true; - - const auto [found, idx] = find(key, nullptr, false); - if (found && existing_val) { get_nth_value(idx, existing_val, true); } - if (put_type == btree_put_type::INSERT_ONLY_IF_NOT_EXISTS) { - if (found) { - LOGDEBUG("Attempt to insert duplicate entry {}", key.to_string()); - return false; - } - ret = (insert(idx, key, val) == btree_status_t::success); - } else if (put_type == btree_put_type::REPLACE_ONLY_IF_EXISTS) { - if (!found) return false; - update(idx, key, val); - } else if (put_type == btree_put_type::REPLACE_IF_EXISTS_ELSE_INSERT) { - (found) ? update(idx, key, val) : (void)insert(idx, key, val); - } else if (put_type == btree_put_type::APPEND_ONLY_IF_EXISTS) { - if (!found) return false; - append(idx, key, val); - } else if (put_type == btree_put_type::APPEND_IF_EXISTS_ELSE_INSERT) { - (found) ? append(idx, key, val) : (void)insert(idx, key, val); - } else { - DEBUG_ASSERT(false, "Wrong put_type {}", put_type); + // If we point to same start and end without any match, it is hitting unavailable range + if (start_idx > end_idx) { return false; } } - return ret; + + return true; } virtual btree_status_t insert(const BtreeKey& key, const BtreeValue& val) { @@ -314,59 +239,6 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { } } - template < typename K > - K min_of(const K& cmp_key, uint32_t cmp_ind, bool& is_cmp_key_lesser) const { - K min_key; - int x{-1}; - is_cmp_key_lesser = false; - - if (cmp_ind < total_entries()) { - get_nth_key_internal(cmp_ind, min_key, false); - x = cmp_key.compare(min_key); - } - - if (x < 0) { - min_key = cmp_key; - is_cmp_key_lesser = true; - } - return min_key; - } - - /*BtreeKeyRange get_subrange(const BtreeKeyRange< K >& inp_range, int upto_ind) const { - #ifndef NDEBUG - if (upto_ind > 0) { - // start of input range should always be more then the key in curr_ind - 1 - DEBUG_ASSERT_LE(get_nth_key< K >(upto_ind - 1, false).compare(inp_range.start_key()), 0, "[node={}]", - to_string()); - } - #endif - - // find end of subrange - bool end_inc = true; - K end_key; - - if (upto_ind < int_cast(total_entries())) { - end_key = get_nth_key< K >(upto_ind, false); - if (end_key.compare(inp_range.end_key()) >= 0) { - // this is last index to process as end of range is smaller then key in this node - end_key = inp_range.end_key(); - end_inc = inp_range.is_end_inclusive(); - } else { - end_inc = true; - } - } else { - // it is the edge node. end key is the end of input range - LOGMSG_ASSERT_EQ(has_valid_edge(), true, "node={}", to_string()); - end_key = inp_range.end_key(); - end_inc = inp_range.is_end_inclusive(); - } - - BtreeKeyRangeSafe< K > subrange{inp_range.start_key(), inp_range.is_start_inclusive(), end_key, end_inc}; - RELEASE_ASSERT_LE(subrange.start_key().compare(subrange.end_key()), 0, "[node={}]", to_string()); - RELEASE_ASSERT_LE(subrange.start_key().compare(inp_range.end_key()), 0, "[node={}]", to_string()); - return subrange; - } */ - template < typename K > K get_nth_key(uint32_t idx, bool copy) const { K k; @@ -442,29 +314,17 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { void lock_acknowledge() { m_trans_hdr.upgraders.decrement(1); } bool any_upgrade_waiters() const { return (!m_trans_hdr.upgraders.testz()); } - bool can_accomodate(const BtreeConfig& cfg, uint32_t key_size, uint32_t value_size) const { - return ((key_size + value_size + get_record_size()) <= available_size(cfg)); - } - - template < typename K, typename V > - void add_nth_obj_to_list(uint32_t ind, std::vector< std::pair< K, V > >* vec, bool copy) const { - std::pair< K, V > kv; - vec->emplace_back(kv); - - auto* pkv = &vec->back(); - if (ind == total_entries() && !is_leaf()) { - pkv->second = edge_value_internal< V >(); - } else { - get_nth_key_internal(ind, pkv->first, copy); - get_nth_value(ind, &pkv->second, copy); - } - } - public: // Public method which needs to be implemented by variants + virtual btree_status_t insert(uint32_t ind, const BtreeKey& key, const BtreeValue& val) = 0; + virtual void remove(uint32_t ind) { remove(ind, ind); } + virtual void remove(uint32_t ind_s, uint32_t ind_e) = 0; + virtual void remove_all(const BtreeConfig& cfg) = 0; + virtual void update(uint32_t ind, const BtreeValue& val) = 0; + virtual void update(uint32_t ind, const BtreeKey& key, const BtreeValue& val) = 0; + virtual uint32_t move_out_to_right_by_entries(const BtreeConfig& cfg, BtreeNode& other_node, uint32_t nentries) = 0; virtual uint32_t move_out_to_right_by_size(const BtreeConfig& cfg, BtreeNode& other_node, uint32_t size) = 0; - virtual uint32_t num_entries_by_size(uint32_t start_idx, uint32_t size) const = 0; virtual uint32_t copy_by_size(const BtreeConfig& cfg, const BtreeNode& other_node, uint32_t start_idx, uint32_t size) = 0; virtual uint32_t copy_by_entries(const BtreeConfig& cfg, const BtreeNode& other_node, uint32_t start_idx, @@ -472,23 +332,17 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { /*virtual uint32_t move_in_from_right_by_entries(const BtreeConfig& cfg, BtreeNode& other_node, uint32_t nentries) = 0; virtual uint32_t move_in_from_right_by_size(const BtreeConfig& cfg, BtreeNode& other_node, uint32_t size) = 0;*/ - virtual uint32_t available_size(const BtreeConfig& cfg) const = 0; - virtual std::string to_string(bool print_friendly = false) const = 0; - virtual std::string to_string_keys(bool print_friendly = false) const = 0; - virtual void get_nth_value(uint32_t ind, BtreeValue* out_val, bool copy) const = 0; - virtual void get_nth_key_internal(uint32_t ind, BtreeKey& out_key, bool copykey) const = 0; - virtual btree_status_t insert(uint32_t ind, const BtreeKey& key, const BtreeValue& val) = 0; - virtual void remove(uint32_t ind) { remove(ind, ind); } - virtual void remove(uint32_t ind_s, uint32_t ind_e) = 0; - virtual void remove_all(const BtreeConfig& cfg) = 0; - virtual void update(uint32_t ind, const BtreeValue& val) = 0; - virtual void update(uint32_t ind, const BtreeKey& key, const BtreeValue& val) = 0; - virtual void append(uint32_t ind, const BtreeKey& key, const BtreeValue& val) = 0; + virtual uint32_t available_size() const = 0; + virtual bool has_room_for_put(btree_put_type put_type, uint32_t key_size, uint32_t value_size) const = 0; + virtual uint32_t num_entries_by_size(uint32_t start_idx, uint32_t size) const = 0; - virtual uint32_t get_nth_obj_size(uint32_t ind) const = 0; - virtual uint16_t get_record_size() const = 0; virtual int compare_nth_key(const BtreeKey& cmp_key, uint32_t ind) const = 0; + virtual void get_nth_key_internal(uint32_t ind, BtreeKey& out_key, bool copykey) const = 0; + virtual uint32_t get_nth_key_size(uint32_t ind) const = 0; + virtual void get_nth_value(uint32_t ind, BtreeValue* out_val, bool copy) const = 0; + virtual uint32_t get_nth_value_size(uint32_t ind) const = 0; + virtual uint32_t get_nth_obj_size(uint32_t ind) const { return get_nth_key_size(ind) + get_nth_value_size(ind); } virtual uint8_t* get_node_context() = 0; // Method just to please compiler @@ -497,7 +351,10 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { return V{edge_id()}; } -private: + virtual std::string to_string(bool print_friendly = false) const = 0; + virtual std::string to_string_keys(bool print_friendly = false) const = 0; + +protected: node_find_result_t bsearch_node(const BtreeKey& key) const { DEBUG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC); auto [found, idx] = bsearch(-1, total_entries(), key); @@ -575,7 +432,11 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { void set_leaf(bool leaf) { get_persistent_header()->leaf = leaf; } void set_node_type(btree_node_type t) { get_persistent_header()->node_type = uint32_cast(t); } + void set_node_size(uint32_t size) { get_persistent_header()->node_size = s_cast< uint16_t >(size - 1); } uint64_t node_gen() const { return get_persistent_header_const()->node_gen; } + uint32_t node_size() const { return s_cast< uint32_t >(get_persistent_header_const()->node_size) + 1; } + uint32_t node_data_size() const { return node_size() - sizeof(persistent_hdr_t); } + void inc_gen() { get_persistent_header()->node_gen++; } void set_gen(uint64_t g) { get_persistent_header()->node_gen = g; } uint64_t link_version() const { return get_persistent_header_const()->link_version; } @@ -587,21 +448,19 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { BtreeLinkInfo link_info() const { return BtreeLinkInfo{node_id(), link_version()}; } - virtual uint32_t occupied_size(const BtreeConfig& cfg) const { - return (cfg.node_data_size() - available_size(cfg)); - } + virtual uint32_t occupied_size() const { return (node_data_size() - available_size()); } bool is_merge_needed(const BtreeConfig& cfg) const { #if 0 #ifdef _PRERELEASE - if (iomgr_flip::instance()->test_flip("btree_merge_node") && occupied_size(cfg) < node_area_size(cfg)) { + if (iomgr_flip::instance()->test_flip("btree_merge_node") && occupied_size() < node_data_size) { return true; } auto ret = iomgr_flip::instance()->get_test_flip< uint64_t >("btree_merge_node_pct"); - if (ret && occupied_size(cfg) < (ret.get() * node_area_size(cfg) / 100)) { return true; } + if (ret && occupied_size() < (ret.get() * node_data_size() / 100)) { return true; } #endif #endif - return (occupied_size(cfg) < cfg.suggested_min_size()); + return (occupied_size() < cfg.suggested_min_size()); } bnodeid_t next_bnode() const { return get_persistent_header_const()->next_node; } diff --git a/src/include/homestore/btree/detail/btree_node_mgr.ipp b/src/include/homestore/btree/detail/btree_node_mgr.ipp index a802c79c7..e81da107f 100644 --- a/src/include/homestore/btree/detail/btree_node_mgr.ipp +++ b/src/include/homestore/btree/detail/btree_node_mgr.ipp @@ -18,6 +18,7 @@ #include #include #include +#include #include // #include @@ -52,7 +53,7 @@ btree_status_t Btree< K, V >::read_and_lock_node(bnodeid_t id, BtreeNodePtr& nod locktype_t leaf_lock_type, void* context) const { auto ret = read_node_impl(id, node_ptr); if (node_ptr == nullptr) { - if (ret != btree_status_t::fast_path_not_possible) { BT_LOG(ERROR, "read failed, reason: {}", ret); } + BT_LOG(ERROR, "read failed, reason: {}", ret); return ret; } @@ -86,7 +87,7 @@ template < typename K, typename V > btree_status_t Btree< K, V >::write_node(const BtreeNodePtr& node, void* context) { COUNTER_INCREMENT_IF_ELSE(m_metrics, node->is_leaf(), btree_leaf_node_writes, btree_int_node_writes, 1); HISTOGRAM_OBSERVE_IF_ELSE(m_metrics, node->is_leaf(), btree_leaf_node_occupancy, btree_int_node_occupancy, - ((m_node_size - node->available_size(m_bt_cfg)) * 100) / m_node_size); + ((m_node_size - node->available_size()) * 100) / m_node_size); return (write_node_impl(node, context)); } @@ -287,6 +288,13 @@ BtreeNode* Btree< K, V >::init_node(uint8_t* node_buf, uint32_t node_ctx_size, b this->m_bt_cfg); break; + case btree_node_type::PREFIX: + n = is_leaf + ? create_node< FixedPrefixNode< K, V > >(node_ctx_size, node_buf, id, init_buf, true, this->m_bt_cfg) + : create_node< FixedPrefixNode< K, BtreeLinkInfo > >(node_ctx_size, node_buf, id, init_buf, false, + this->m_bt_cfg); + break; + default: BT_REL_ASSERT(false, "Unsupported node type {}", node_type); break; diff --git a/src/include/homestore/btree/detail/btree_query_impl.ipp b/src/include/homestore/btree/detail/btree_query_impl.ipp index c151f6e5e..8d21c26b7 100644 --- a/src/include/homestore/btree/detail/btree_query_impl.ipp +++ b/src/include/homestore/btree/detail/btree_query_impl.ipp @@ -36,12 +36,8 @@ btree_status_t Btree< K, V >::do_sweep_query(BtreeNodePtr& my_node, BtreeQueryRe uint32_t start_ind{0}; uint32_t end_ind{0}; - auto cur_count = - my_node->template get_all< K, V >(qreq.next_range(), qreq.batch_size() - count, start_ind, end_ind); - for (auto idx{start_ind}; idx < (start_ind + cur_count); ++idx) { - call_on_read_kv_cb(my_node, idx, qreq); - my_node->add_nth_obj_to_list(idx, &out_values, true); - } + auto cur_count = to_variant_node(my_node)->multi_get(qreq.working_range(), qreq.batch_size() - count, + start_ind, end_ind, &out_values, qreq.filter()); count += cur_count; if (qreq.route_tracing) { @@ -49,7 +45,7 @@ btree_status_t Btree< K, V >::do_sweep_query(BtreeNodePtr& my_node, BtreeQueryRe } // If this is not the last entry found, then surely we have reached the end of search criteria - if ((end_ind + 1) < my_node->total_entries()) { break; } + // if ((end_ind + 1) < my_node->total_entries()) { break; } // Keep querying sibling nodes if (count < qreq.batch_size()) { @@ -71,7 +67,7 @@ btree_status_t Btree< K, V >::do_sweep_query(BtreeNodePtr& my_node, BtreeQueryRe } BtreeLinkInfo start_child_info; - [[maybe_unused]] const auto [isfound, idx] = my_node->find(qreq.next_key(), &start_child_info, false); + [[maybe_unused]] const auto [isfound, idx] = my_node->find(qreq.first_key(), &start_child_info, false); ASSERT_IS_VALID_INTERIOR_CHILD_INDX(isfound, idx, my_node); if (qreq.route_tracing) { append_route_trace(qreq, my_node, btree_event_t::READ, idx, idx); } @@ -92,17 +88,11 @@ btree_status_t Btree< K, V >::do_traversal_query(const BtreeNodePtr& my_node, Bt if (my_node->is_leaf()) { BT_NODE_LOG_ASSERT_GT(qreq.batch_size(), 0, my_node); - uint32_t start_ind = 0, end_ind = 0; - auto cur_count = my_node->get_all(qreq.next_range(), qreq.batch_size() - (uint32_t)out_values.size(), start_ind, - end_ind, &out_values); - - if (cur_count) { - for (auto idx{start_ind}; idx < (start_ind + cur_count); ++idx) { - call_on_read_kv_cb(my_node, idx, qreq); - // my_node->add_nth_obj_to_list(idx, &out_values, true); - } - } - + uint32_t start_ind{0}; + uint32_t end_ind{0}; + auto cur_count = to_variant_node(my_node)->multi_get(qreq.working_range(), + qreq.batch_size() - uint32_cast(out_values.size()), + start_ind, end_ind, &out_values, qreq.filter()); if (qreq.route_tracing) { append_route_trace(qreq, my_node, btree_event_t::READ, start_ind, start_ind + cur_count); } @@ -114,7 +104,7 @@ btree_status_t Btree< K, V >::do_traversal_query(const BtreeNodePtr& my_node, Bt return ret; } - const auto [start_isfound, start_idx] = my_node->find(qreq.next_key(), nullptr, false); + const auto [start_isfound, start_idx] = my_node->find(qreq.first_key(), nullptr, false); auto [end_is_found, end_idx] = my_node->find(qreq.input_range().end_key(), nullptr, false); bool unlocked_already = false; @@ -241,9 +231,7 @@ btree_status_t do_serialzable_query(const BtreeNodePtr& my_node, BtreeSerializab return ret; } } -#endif -#ifdef SERIALIZABLE_QUERY_IMPLEMENTATION btree_status_t sweep_query(BtreeQueryRequest< K >& qreq, std::vector< std::pair< K, V > >& out_values) { COUNTER_INCREMENT(m_metrics, btree_read_ops_count, 1); qreq.init_batch_range(); diff --git a/src/include/homestore/btree/detail/btree_remove_impl.ipp b/src/include/homestore/btree/detail/btree_remove_impl.ipp index 0ecddc7d6..df0a22773 100644 --- a/src/include/homestore/btree/detail/btree_remove_impl.ipp +++ b/src/include/homestore/btree/detail/btree_remove_impl.ipp @@ -34,24 +34,9 @@ btree_status_t Btree< K, V >::do_remove(const BtreeNodePtr& my_node, locktype_t if constexpr (std::is_same_v< ReqT, BtreeSingleRemoveRequest >) { if ((modified = my_node->remove_one(req.key(), nullptr, req.m_outval))) { ++removed_count; } } else if constexpr (std::is_same_v< ReqT, BtreeRangeRemoveRequest< K > >) { - if (req.next_key().is_extent_key()) { - modified = remove_extents_in_leaf(my_node, req); - } else { - if (my_node->total_entries()) { - auto& subrange = req.working_range(); - auto const [start_found, start_idx] = my_node->find(subrange.start_key(), nullptr, false); - auto [end_found, end_idx] = my_node->find(subrange.end_key(), nullptr, false); - if (end_found) { ++end_idx; } - - removed_count = end_idx - start_idx; - for (uint32_t count = 0; count < removed_count; ++count) { - call_on_remove_kv_cb(my_node, start_idx, req); - // since start_idx is getting updated, always call remove_start_idx - my_node->remove(start_idx); - modified = true; - } - } - } + removed_count = to_variant_node(my_node)->multi_remove(req.working_range(), req.m_filter_cb); + modified = (removed_count != 0); + req.shift_working_range(); } else if constexpr (std::is_same_v< ReqT, BtreeRemoveAnyRequest< K > >) { if ((modified = my_node->remove_any(req.m_range, req.m_outkey, req.m_outval))) { ++removed_count; } } @@ -68,7 +53,6 @@ btree_status_t Btree< K, V >::do_remove(const BtreeNodePtr& my_node, locktype_t return modified ? btree_status_t::success : btree_status_t::not_found; } - bool go_to_out = false; retry: locktype_t child_cur_lock = locktype_t::NONE; uint32_t curr_idx; @@ -80,21 +64,21 @@ retry: auto const [found, idx] = my_node->find(req.key(), nullptr, false); ASSERT_IS_VALID_INTERIOR_CHILD_INDX(found, idx, my_node); end_idx = start_idx = idx; + if (false) { goto out_return; } // Please the compiler } else if constexpr (std::is_same_v< ReqT, BtreeRangeRemoveRequest< K > >) { - const auto count = my_node->template get_all< K, V >(req.next_range(), UINT32_MAX, start_idx, end_idx); - if (!count) { + auto const matched = my_node->match_range< K >(req.working_range(), start_idx, end_idx); + if (!matched) { ret = btree_status_t::not_found; - go_to_out = true; + goto out_return; } - // BT_NODE_REL_ASSERT_NE(count, 0, my_node, "get_all returns 0 entries for interior node is not valid - // pattern"); - } else if constexpr (std::is_same_v< ReqT, BtreeRemoveAnyRequest< K > >) { - const auto count = my_node->template get_all< V >(req.m_range, UINT32_MAX, start_idx, end_idx); - BT_NODE_REL_ASSERT_NE(count, 0, my_node, "get_all returns 0 entries for interior node is not valid pattern"); + auto const matched = my_node->match_range< K >(req.m_range, start_idx, end_idx); + if (!matched) { + ret = btree_status_t::not_found; + goto out_return; + } end_idx = start_idx = (end_idx - start_idx) / 2; // Pick the middle, TODO: Ideally we need to pick random } - if (go_to_out) { goto out_return; } if (req.route_tracing) { append_route_trace(req, my_node, btree_event_t::READ, start_idx, end_idx); } curr_idx = start_idx; @@ -155,14 +139,15 @@ retry: if (child_node->is_leaf()) { // We get the trimmed range only for leaf because this is where we will be removing keys. In interior // nodes, keys are always propogated from the lower nodes. - bool is_inp_key_lesser = false; - K end_key = - my_node->min_of(s_cast< const K& >(req.input_range().end_key()), curr_idx, is_inp_key_lesser); - bool end_incl = is_inp_key_lesser ? req.input_range().is_end_inclusive() : true; - req.trim_working_range(std::move(end_key), end_incl); - - BT_NODE_LOG(DEBUG, my_node, "Subrange:idx=[{}-{}],c={},working={}", start_idx, end_idx, curr_idx, - req.working_range().to_string()); + if (curr_idx < my_node->total_entries()) { + K child_end_key = my_node->get_nth_key< K >(curr_idx, true); + if (child_end_key.compare(req.working_range().end_key()) < 0) { + req.trim_working_range(std::move(child_end_key), true /* inclusive child key */); + } + + BT_NODE_LOG(DEBUG, my_node, "Subrange:idx=[{}-{}],c={},working={}", start_idx, end_idx, curr_idx, + req.working_range().to_string()); + } } } @@ -200,91 +185,6 @@ out_return: return (at_least_one_child_modified == btree_status_t::success) ? btree_status_t::success : ret; } -template < typename K, typename V > -bool Btree< K, V >::remove_extents_in_leaf(const BtreeNodePtr& node, BtreeRangeRemoveRequest< K >& rrreq) { - if constexpr (std::is_base_of_v< ExtentBtreeKey< K >, K > && std::is_base_of_v< ExtentBtreeValue< V >, V >) { - const BtreeKeyRange< K >& subrange = rrreq.working_range(); - const auto& start_key = static_cast< const ExtentBtreeKey< K >& >(subrange.start_key()); - const auto& end_key = static_cast< ExtentBtreeKey< K >& >(subrange.end_key()); - - auto const [start_found, start_idx] = node->find(start_key, nullptr, false); - auto const [end_found, end_idx] = node->find(end_key, nullptr, false); - - K h_k, t_k; - V h_v, t_v; - int64_t head_offset{0}; - int64_t tail_offset{0}; - ExtentBtreeKey< K >& head_k = static_cast< ExtentBtreeKey< K >& >(h_k); - ExtentBtreeKey< K >& tail_k = static_cast< ExtentBtreeKey< K >& >(t_k); - ExtentBtreeValue< V >& head_v = static_cast< ExtentBtreeValue< V >& >(h_v); - ExtentBtreeValue< V >& tail_v = static_cast< ExtentBtreeValue< V >& >(t_v); - - if (start_found) { - head_k = node->get_nth_key< K >(start_idx, false); - head_offset = head_k.distance_start(start_key); - BT_NODE_DBG_ASSERT_GE(head_offset, 0, node, "Invalid start_key or head_k"); - if (head_offset > 0) { node->get_nth_value(start_idx, &head_v, false); } - } - if (end_found) { - tail_k = node->get_nth_key< K >(end_idx, false); - tail_offset = end_key.distance_end(tail_k); - BT_NODE_DBG_ASSERT_GE(tail_offset, 0, node, "Invalid end_key or tail_k"); - if (tail_offset > 0) { node->get_nth_value(end_idx, &tail_v, false); } - } - - // Write partial head and tail kv. At this point we are committing and we can't go back and not update - // some of the extents. - auto idx = start_idx; - if (end_idx == start_idx) { - // Special case - where there is a overlap and single entry is split into 3 - auto const tail_start = tail_k.extent_length() - tail_offset; - if (m_on_remove_cb) { - m_on_remove_cb(head_k.extract(head_offset, tail_start - head_offset, false), - head_v.extract(head_offset, tail_start - head_offset, false), rrreq); - } - - if (tail_offset > 0) { - node->insert(end_idx + 1, tail_k.extract(tail_start, tail_offset, false), - tail_v.extract(tail_start, tail_offset, false)); - COUNTER_INCREMENT(m_metrics, btree_obj_count, 1); - } - - if (head_offset > 0) { - node->update(idx++, head_k.extract(0, head_offset, false), head_v.extract(0, head_offset, false)); - } - } else { - if (tail_offset > 0) { - auto const tail_start = tail_k.extent_length() - tail_offset; - auto const shrunk_k = tail_k.extract(tail_start, tail_offset, false); - call_on_update_kv_cb(node, end_idx, shrunk_k, rrreq); - node->update(end_idx, shrunk_k, tail_v.extract(tail_start, tail_offset, false)); - } else if (end_found) { - ++end_idx; - } - if (head_offset > 0) { - auto const shrunk_k = head_k.extract(0, -head_offset, false); - call_on_update_kv_cb(node, idx, shrunk_k, rrreq); - node->update(idx++, shrunk_k, head_v.extract(0, -head_offset, false)); - } - } - - // Remove everything in-between - if (idx < end_idx) { - if (m_on_remove_cb) { - for (auto i{idx}; i <= end_idx; ++i) { - call_on_remove_kv_cb(node, i, rrreq); - } - } - node->remove(idx, end_idx - 1); - COUNTER_DECREMENT(m_metrics, btree_obj_count, end_idx - idx); - } - return true; - } else { - BT_REL_ASSERT(false, "Don't expect remove_extents to be called on non-extent code path"); - return false; - } -} - template < typename K, typename V > template < typename ReqT > btree_status_t Btree< K, V >::check_collapse_root(ReqT& req) { @@ -352,7 +252,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const _leftmost_src_info leftmost_src; _src_cursor_info src_cursor; - total_size = leftmost_node->occupied_size(m_bt_cfg); + total_size = leftmost_node->occupied_size(); for (auto indx = start_idx + 1; indx <= end_idx; ++indx) { if (indx == parent_node->total_entries()) { BT_NODE_LOG_ASSERT(parent_node->has_valid_edge(), parent_node, @@ -368,7 +268,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const BT_NODE_LOG_ASSERT_EQ(child->is_valid_node(), true, child); old_nodes.push_back(child); - total_size += child->occupied_size(m_bt_cfg); + total_size += child->occupied_size(); } // Determine if packing the nodes would result in reducing the number of nodes, if so go with that. If else @@ -383,7 +283,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const } balanced_size = (total_size == 0) ? 0 : (total_size - 1) / num_nodes + 1; - if (leftmost_node->occupied_size(m_bt_cfg) > balanced_size) { + if (leftmost_node->occupied_size() > balanced_size) { // If for some reason balancing increases the current size, give up. // TODO: Is this a real case, isn't happening would mean some sort of bug in calculation of is_merge_needed? BT_NODE_DBG_ASSERT(false, leftmost_node, @@ -395,7 +295,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const // First try to see how many entries you can fit in the leftmost node within the balanced size. We are checking // leftmost node as special case without moving, because that is the only node which is modified in-place and hence // doing a dry run and if for some reason there is a problem in balancing the nodes, then it is easy to give up. - available_size = static_cast< int32_t >(balanced_size) - leftmost_node->occupied_size(m_bt_cfg); + available_size = static_cast< int32_t >(balanced_size) - leftmost_node->occupied_size(); src_cursor.ith_node = old_nodes.size(); for (uint32_t i{0}; (i < old_nodes.size() && available_size >= 0); ++i) { leftmost_src.ith_nodes.push_back(i); @@ -403,7 +303,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const // node contains one entry and the value size is much bigger than available size auto const nentries = old_nodes[i]->num_entries_by_size(0, available_size); if ((old_nodes[i]->total_entries() - nentries) == 0) { // Entire node goes in - available_size -= old_nodes[i]->occupied_size(m_bt_cfg); + available_size -= old_nodes[i]->occupied_size(); if (i >= old_nodes.size() - 1) { src_cursor.ith_node = i + 1; src_cursor.nth_entry = std::numeric_limits< uint32_t >::max(); @@ -439,7 +339,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const // Copied entire node ++src_cursor.ith_node; src_cursor.nth_entry = 0; - available_size = balanced_size - new_node->occupied_size(m_bt_cfg); + available_size = balanced_size - new_node->occupied_size(); } else { src_cursor.nth_entry += nentries; available_size = 0; @@ -456,8 +356,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const // There is a case where we are rebalancing and the second node which rebalanced didn't move any size, in that case // the first node is going to be exactly same and we will do again merge, so bail out here. - if ((new_nodes.size() == old_nodes.size()) && - (old_nodes[0]->occupied_size(m_bt_cfg) >= new_nodes[0]->occupied_size(m_bt_cfg))) { + if ((new_nodes.size() == old_nodes.size()) && (old_nodes[0]->occupied_size() >= new_nodes[0]->occupied_size())) { ret = btree_status_t::merge_not_required; goto out; } @@ -465,27 +364,39 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const if (!K::is_fixed_size()) { // Lets see if we have enough room in parent node to accommodate changes. This is needed only if the key is not // fixed length. For fixed length node merge will always result in less or equal size + auto excess_releasing_nodes = + old_nodes.size() - new_nodes.size() - (parent_node->total_entries() == end_idx) ? 1 : 0; + if (!parent_node->has_room_for_put(btree_put_type::INSERT, excess_releasing_nodes * K::get_max_size(), + excess_releasing_nodes * BtreeLinkInfo::get_fixed_size())) { + BT_NODE_LOG(DEBUG, parent_node, + "Merge is needed, however after merge, the parent MAY not have enough space to accommodate the " + "new keys, so not proceeding with merge"); + ret = btree_status_t::merge_not_required; + goto out; + } +#if 0 // we first calculate the least amount of space being released after removing excess children. the key size // cannot be taken account; so we know for sure that value (i.e., linkinfo) and also its record will be freed. // If the end_idx is the parent's edge, the space is not released eventually. auto excess_releasing_nodes = - old_nodes.size() - new_nodes.size() - parent_node->total_entries() == end_idx ? 1 : 0; + old_nodes.size() - new_nodes.size() - (parent_node->total_entries() == end_idx) ? 1 : 0; auto minimum_releasing_excess_size = excess_releasing_nodes * (BtreeLinkInfo::get_fixed_size() + parent_node->get_record_size()); - // aside from releasing size due to excess node, K::get_estimate_max_size is needed for each updating element + // aside from releasing size due to excess node, K::get_max_size is needed for each updating element // at worst case (linkinfo and record remain the same for old and new nodes). The number of updating elements // are the size of the new nodes (the last key of the last new node is not getting updated; hence excluded) plus // the leftmost node. - if (parent_node->available_size(m_bt_cfg) + minimum_releasing_excess_size < - (1 + new_nodes.size() ? new_nodes.size() - 1 : 0) * K::get_estimate_max_size()) { + if (parent_node->available_size() + minimum_releasing_excess_size < + (1 + new_nodes.size() ? new_nodes.size() - 1 : 0) * K::get_max_size()) { BT_NODE_LOG(DEBUG, parent_node, "Merge is needed, however after merge, the parent MAY not have enough space to accommodate the " "new keys, so not proceeding with merge"); ret = btree_status_t::merge_not_required; goto out; } +#endif } // Now it is time to commit things and at this point no going back, since in-place write nodes are modified diff --git a/src/include/homestore/btree/detail/prefix_node.hpp b/src/include/homestore/btree/detail/prefix_node.hpp new file mode 100644 index 000000000..62003da7a --- /dev/null +++ b/src/include/homestore/btree/detail/prefix_node.hpp @@ -0,0 +1,828 @@ +/********************************************************************************* + * Modifications Copyright 2017-2019 eBay Inc. + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + *********************************************************************************/ + +#pragma once + +#include +#include +#include "btree_node.hpp" +#include +#include + +SISL_LOGGING_DECL(btree) + +namespace homestore { + +// Internal format of variable node: +// [Persistent Header][prefix_node_header][prefix_area_bitset][KV Suffix][KV Suffix].. ... ... [KV Prefix][KV Prefix] +// +template < typename K, typename V > +class FixedPrefixNode : public VariantNode< K, V > { + using BtreeNode::get_nth_key_internal; + using BtreeNode::get_nth_key_size; + using BtreeNode::get_nth_obj_size; + using BtreeNode::get_nth_value; + using BtreeNode::get_nth_value_size; + using BtreeNode::to_string; + using VariantNode< K, V >::get_nth_value; + +private: +#pragma pack(1) + struct prefix_node_header { + uint16_t used_slots; // Number of slots actually used. TODO: We can deduce from set_bit_count of bitset + uint16_t tail_slot; // What is the tail slot number being used + + std::string to_string() const { return fmt::format("slots_used={} tail_slot={} ", used_slots, tail_slot); } + + static constexpr uint16_t min_holes_to_compact = 10; + // Followed by bitset + }; + + struct prefix_entry { + uint16_t ref_count{0}; + // Followed by common prefix key + // Followed by common prefix value + + static constexpr uint32_t size() { return key_size() + value_size() + sizeof(prefix_entry); } + + static constexpr uint32_t key_size() { + if constexpr (std::is_base_of_v< BtreeIntervalKey, K >) { + return dummy_key< K >.serialized_prefix_size(); + } else { + return 0u; // There is no prefix for non interval key + } + } + + static constexpr uint32_t value_size() { + if constexpr (std::is_base_of_v< BtreeIntervalValue, V >) { + return dummy_value< V >.serialized_prefix_size(); + } else { + return 0u; // There is no prefix for non interval value + } + } + + void write_kv(BtreeKey const& key, BtreeValue const& val) { + if constexpr (std::is_base_of_v< BtreeIntervalKey, K > && std::is_base_of_v< BtreeIntervalValue, V >) { + sisl::blob const kblob = s_cast< K const& >(key).serialize_prefix(); + sisl::blob const vblob = s_cast< V const& >(val).serialize_prefix(); + + DEBUG_ASSERT_EQ(kblob.size, key_size(), "Prefix key size mismatch with serialized prefix size"); + DEBUG_ASSERT_EQ(vblob.size, value_size(), "Prefix value size mismatch with serialized prefix size"); + + uint8_t* cur_ptr = uintptr_cast(this) + sizeof(prefix_entry); + std::memcpy(cur_ptr, kblob.bytes, kblob.size); + cur_ptr += kblob.size; + std::memcpy(cur_ptr, vblob.bytes, vblob.size); + } + } + + sisl::blob key_buf() const { + return sisl::blob{const_cast< uint8_t* >(r_cast< uint8_t const* >(this) + sizeof(prefix_entry)), + key_size()}; + } + sisl::blob val_buf() const { return sisl::blob{key_buf().bytes + key_buf().size, value_size()}; } + }; + + struct suffix_entry { + uint16_t prefix_slot; + // Followed by suffix key + // Followed by suffix value + + static constexpr uint32_t size() { return key_size() + value_size() + sizeof(suffix_entry); } + + static constexpr uint32_t key_size() { + if constexpr (std::is_base_of_v< BtreeIntervalKey, K >) { + return dummy_key< K >.serialized_suffix_size(); + } else { + return dummy_key< K >.serialized_size(); + } + } + + static constexpr uint32_t value_size() { + if constexpr (std::is_base_of_v< BtreeIntervalValue, V >) { + return dummy_value< V >.serialized_suffix_size(); + } else { + return dummy_value< V >.serialized_size(); + } + } + + void write_kv(BtreeKey const& key, BtreeValue const& val) { + sisl::blob kblob; + sisl::blob vblob; + + uint8_t* cur_ptr = uintptr_cast(this) + sizeof(suffix_entry); + if constexpr (std::is_base_of_v< BtreeIntervalKey, K > && std::is_base_of_v< BtreeIntervalValue, V >) { + kblob = s_cast< K const& >(key).serialize_suffix(); + vblob = s_cast< V const& >(val).serialize_suffix(); + } else { + kblob = key.serialize(); + vblob = val.serialize(); + } + DEBUG_ASSERT_EQ(kblob.size, key_size(), "Suffix key size mismatch with serialized suffix size"); + DEBUG_ASSERT_EQ(vblob.size, value_size(), "Suffix value size mismatch with serialized suffix size"); + + std::memcpy(cur_ptr, kblob.bytes, kblob.size); + cur_ptr += kblob.size; + std::memcpy(cur_ptr, vblob.bytes, vblob.size); + } + + sisl::blob key_buf() const { + return sisl::blob{const_cast< uint8_t* >(r_cast< uint8_t const* >(this) + sizeof(suffix_entry)), + key_size()}; + } + sisl::blob val_buf() const { return sisl::blob{key_buf().bytes + key_buf().size, value_size()}; } + }; +#pragma pack() + + sisl::CompactBitSet prefix_bitset_; + +public: + FixedPrefixNode(uint8_t* node_buf, bnodeid_t id, bool init, bool is_leaf, const BtreeConfig& cfg) : + VariantNode< K, V >(node_buf, id, init, is_leaf, cfg), + prefix_bitset_{sisl::blob{bitset_area(), reqd_bitset_size(cfg)}, init} { + if (init) { + auto phdr = prefix_header(); + phdr->used_slots = 0; + phdr->tail_slot = 0; + } + } + + virtual ~FixedPrefixNode() = default; + + ///////////////////////////// All overrides of BtreeIntervalNode /////////////////////////////////// + /// @brief Upserts a batch of entries into a prefix node. + /// + /// This method upserts all entries in the node that have keys within the specified range. + /// The method is supported only for leaf nodes. + /// + /// @param keys The range of keys to upsert. + /// @param val The value to upsert. + /// @param on_found_cb The callback function to be called for each entry found within the range. + /// The function should take two arguments: a key and a value, and return a + /// batch_upsert_decision_t value. If the function returns: + /// batch_upsert_decision_t::replace, the entry is upserted with the new value. + /// batch_upsert_decision_t::remove, the entry is removed from the node. + /// batch_upsert_decision_t::keep, the entry is not modified and the method moves on to the + /// next entry. + /// @return An optional key that was not upserted due to lack of space in the node. + /// If all keys were upserted successfully, the method returns std::nullopt. + /// If the method ran out of space in the node, the method returns the key that was last upserted + btree_status_t multi_put(BtreeKeyRange< K > const& keys, BtreeKey const& first_input_key, BtreeValue const& val, + btree_put_type put_type, K* last_failed_key, + put_filter_cb_t const& filter_cb = nullptr) override { + DEBUG_ASSERT_EQ(this->is_leaf(), true, "Multi put entries on node are supported only for leaf nodes"); + if constexpr (std::is_base_of_v< BtreeIntervalKey, K > && std::is_base_of_v< BtreeIntervalValue, V >) { + uint32_t modified{0}; + + uint16_t prefix_slot{std::numeric_limits< uint16_t >::max()}; + K cur_key = keys.start_key(); + + if (!keys.is_start_inclusive()) { cur_key.shift(1); } + if (!has_room(1u)) { return btree_status_t::space_not_avail; } + bool upserted_all{false}; + + auto [found, idx] = this->find(cur_key, nullptr, false); + do { + auto x = cur_key.compare(keys.end_key()); + if ((x > 0) || ((x == 0) && !keys.is_end_inclusive())) { + upserted_all = true; + break; + } + + put_filter_decision decision{put_filter_decision::replace}; + if (found) { + if (put_type == btree_put_type::INSERT) { // Insert operation should skip existing entries + decision = put_filter_decision::keep; + } else if (filter_cb) { + decision = filter_cb(cur_key, get_nth_value(idx, false), val); + if (decision == put_filter_decision::remove) { + ++modified; + remove(idx); + } + } + + // We found the entry and it will be replaced in next step, for now, we need to deref the prefix + // corresponding to this suffix entry + if (decision == put_filter_decision::replace) { + deref_remove_prefix(get_suffix_entry_c(idx)->prefix_slot); + } + } else { + if (put_type == btree_put_type::UPDATE) { // Update would need existing entries found + decision = put_filter_decision::keep; + } else { + std::memmove(get_suffix_entry(idx + 1), get_suffix_entry(idx), + (this->total_entries() - idx) * suffix_entry::size()); + this->inc_entries(); + } + } + + if (decision == put_filter_decision::replace) { + if (prefix_slot == std::numeric_limits< uint16_t >::max()) { + prefix_slot = add_prefix(cur_key, val); + } + V new_val{s_cast< V const& >(val)}; + new_val.shift(s_cast< K const& >(cur_key).distance(first_input_key)); + write_suffix(idx, prefix_slot, cur_key, new_val); + } + + cur_key.shift(1); + if (!has_room(1u)) { break; } + + if (decision != put_filter_decision::remove) { ++idx; } + found = + (idx < this->total_entries() && (BtreeNode::get_nth_key< K >(idx, false).compare(cur_key) == 0)); + } while (true); + + if (modified) { this->inc_gen(); } +#ifndef NDEBUG + validate_sanity(); +#endif + if (!upserted_all) { + if (last_failed_key) { *last_failed_key = cur_key; } + return btree_status_t::has_more; + } else { + return btree_status_t::success; + } + } else { + return btree_status_t::not_supported; + } + } + + /** + * @brief Removes a batch of entries from a prefix node. + * + * This method removes all entries in the node that have keys within the specified range. + * The method is supported only for leaf nodes. + * + * @param keys The range of keys to remove. + * @param on_found_cb The callback function to be called for each entry found within the range. The function should + * take two arguments: a key and a value, and return a boolean value. If the function returns true or if there is + * no callback function, the entry is removed from the node. If the function returns false, the entry is not + * removed and the method moves on to the next entry. + * + * @return Returns number of objects removed + */ + uint32_t multi_remove(BtreeKeyRange< K > const& keys, remove_filter_cb_t const& filter_cb = nullptr) override { + DEBUG_ASSERT_EQ(this->is_leaf(), true, "remove_batch api is supported only for leaf node"); + if constexpr (std::is_base_of_v< BtreeIntervalKey, K > && std::is_base_of_v< BtreeIntervalValue, V >) { + K cur_key = keys.start_key(); + if (!keys.is_start_inclusive()) { cur_key.shift(1); } + uint32_t num_removed{0}; + + auto [_, idx] = this->find(cur_key, nullptr, false); + while (idx < this->total_entries()) { + cur_key = BtreeNode::get_nth_key< K >(idx, false); + auto x = cur_key.compare(keys.end_key()); + if ((x > 0) || ((x == 0) && !keys.is_end_inclusive())) { break; } + + bool remove{true}; + if (!filter_cb || filter_cb(cur_key, get_nth_value(idx, false))) { + suffix_entry* sentry = get_suffix_entry(idx); + deref_remove_prefix(sentry->prefix_slot); + std::memmove(uintptr_cast(sentry), uintptr_cast(get_suffix_entry(idx + 1)), + (this->total_entries() - idx - 1) * suffix_entry::size()); + this->dec_entries(); + ++num_removed; + } else { + ++idx; + } + } + if (num_removed) { this->inc_gen(); } + +#ifndef NDEBUG + validate_sanity(); +#endif + return num_removed; + } else { + return 0; + } + } + + ///////////////////////////// All overrides of BtreeNode /////////////////////////////////// + void get_nth_key_internal(uint32_t idx, BtreeKey& out_key, bool) const override { + suffix_entry const* sentry = get_suffix_entry_c(idx); + prefix_entry const* pentry = get_prefix_entry_c(sentry->prefix_slot); + DEBUG_ASSERT(prefix_bitset_.is_bit_set(sentry->prefix_slot), + "Prefix slot number is in suffix entry, but corresponding bit is not set"); + s_cast< BtreeIntervalKey& >(out_key).deserialize(pentry->key_buf(), sentry->key_buf(), true); + } + + void get_nth_value(uint32_t idx, BtreeValue* out_val, bool) const override { + if (idx == this->total_entries()) { + DEBUG_ASSERT_EQ(this->is_leaf(), false, "get_nth_value out-of-bound"); + DEBUG_ASSERT_EQ(this->has_valid_edge(), true, "get_nth_value out-of-bound"); + *(r_cast< BtreeLinkInfo* >(out_val)) = this->get_edge_value(); + } else { + suffix_entry const* sentry = get_suffix_entry_c(idx); + prefix_entry const* pentry = get_prefix_entry_c(sentry->prefix_slot); + DEBUG_ASSERT(prefix_bitset_.is_bit_set(sentry->prefix_slot), + "Prefix slot number is in suffix entry, but corresponding bit is not set"); + s_cast< BtreeIntervalValue* >(out_val)->deserialize(pentry->val_buf(), sentry->val_buf(), true); + } + } + + uint32_t available_size() const override { + auto num_holes = num_prefix_holes(); + if (num_holes > prefix_node_header::min_holes_to_compact) { + return available_size_without_compaction() + (num_holes * prefix_entry::size()); + } else { + return available_size_without_compaction(); + } + } + + bool has_room_for_put(btree_put_type, uint32_t, uint32_t) const override { return has_room(1u); } + + uint32_t get_nth_key_size(uint32_t) const override { return dummy_key< K >.serialized_size(); } + + uint32_t get_nth_value_size(uint32_t) const override { return dummy_value< V >.serialized_size(); } + + uint32_t move_out_to_right_by_size(const BtreeConfig& cfg, BtreeNode& on, uint32_t size_to_move) override { + return move_out_to_right_internal(cfg, on, true /* by_size*/, size_to_move); + } + + uint32_t move_out_to_right_by_entries(const BtreeConfig& cfg, BtreeNode& on, uint32_t num_entries) override { + return move_out_to_right_internal(cfg, on, false /* by_size*/, num_entries); + } + + uint32_t move_out_to_right_internal(const BtreeConfig& cfg, BtreeNode& on, bool by_size, uint32_t limit) { + FixedPrefixNode& dst_node = s_cast< FixedPrefixNode& >(on); + + uint32_t dst_node_size = dst_node.occupied_size(); + uint32_t num_moved{0}; + + // Nothing to move + if (this->total_entries() == 0) { return by_size ? 0 : dst_node_size; } + + // Step 1: Walk through from last idx towards first and map the current node prefix slot to new prefix slot. + // This map is used both to map the prefix slot as well as presence of if the prefix slot is used for multiple + // suffixes. At the end of this step, all prefixes that needs to be moved are moved with correct bitset + // settings on both source and destination + std::map< uint16_t, uint16_t > this_to_dst_prefix; + uint16_t idx = this->total_entries() - 1; + do { + if (by_size) { + if (dst_node_size > limit) { break; } + } else { + if (num_moved == limit) { break; } + } + suffix_entry* this_sentry = get_suffix_entry(idx); + + auto const this_prefix_slot = this_sentry->prefix_slot; + auto const it = this_to_dst_prefix.find(this_prefix_slot); + + if (it == this_to_dst_prefix.cend()) { + // Have not seen the prefix before, new entry, allocate a prefix in dest node and copy the prefix to dst + // node and update our suffix entry to point to that slot temporarily here. The suffix memory is all + // moved will be moved to dst node all at once later. + uint16_t dst_prefix_slot = dst_node.alloc_prefix(); + prefix_entry* dst_pentry = dst_node.get_prefix_entry(dst_prefix_slot); + + std::memcpy(voidptr_cast(dst_pentry), c_voidptr_cast(get_prefix_entry_c(this_prefix_slot)), + prefix_entry::size()); + + dst_pentry->ref_count = 1; + this_sentry->prefix_slot = dst_prefix_slot; + + this_to_dst_prefix.insert(std::pair(this_prefix_slot, dst_prefix_slot)); + dst_node_size += prefix_entry::size(); + } else { + prefix_entry* new_pentry = dst_node.get_prefix_entry(it->second); + ++new_pentry->ref_count; + this_sentry->prefix_slot = it->second; + } + + // Remove a reference to this prefix slot, since the suffix will be eventually moved to dst node + deref_remove_prefix(this_prefix_slot); + dst_node_size += suffix_entry::size(); + ++num_moved; + } while (idx-- > 0); + + // Step 2: Move the suffixes and adjust the num_entries in source and destination. All tomove suffixes have + // adjusted to their new prefix slot already as part of Step 1 + std::memmove(uintptr_cast(dst_node.get_suffix_entry(0)), uintptr_cast(get_suffix_entry(idx + 1)), + num_moved * suffix_entry::size()); + this->sub_entries(num_moved); + dst_node.add_entries(num_moved); + + // Step 3: Adjust all the header parameters for old and new. For old we other header parameters are adjusted as + // part of Step 1, except generation count + this->inc_gen(); + dst_node.inc_gen(); + auto new_phdr = dst_node.prefix_header(); + + if (!this->is_leaf() && (dst_node.total_entries() != 0)) { + // Incase this node is an edge node, move the stick to the right hand side node + dst_node.set_edge_info(this->edge_info()); + this->invalidate_edge(); + } + + // Step 4: Use this oppurtunity to compact the source node if it needs. Destination node is written in + // compacted state anyways + if (is_compaction_suggested()) { compact(); } + +#ifndef NDEBUG + validate_sanity(); + dst_node.validate_sanity(); +#endif + return by_size ? num_moved : dst_node_size; + } + + btree_status_t insert(uint32_t idx, BtreeKey const& key, BtreeValue const& val) override { + if (!has_room(1u)) { return btree_status_t::space_not_avail; } + + std::memmove(get_suffix_entry(idx + 1), get_suffix_entry(idx), + (this->total_entries() - idx) * suffix_entry::size()); + + write_suffix(idx, add_prefix(key, val), key, val); + this->inc_entries(); + this->inc_gen(); + +#ifndef NDEBUG + validate_sanity(); +#endif + return btree_status_t::success; + } + + void update(uint32_t idx, BtreeValue const& val) override { + update(idx, BtreeNode::get_nth_key< K >(idx, false), val); + } + + void update(uint32_t idx, BtreeKey const& key, BtreeValue const& val) override { + // If we are updating the edge value, none of the other logic matter. Just update edge value and move on + if (idx == this->total_entries()) { + DEBUG_ASSERT_EQ(this->is_leaf(), false); + this->set_edge_value(val); + this->inc_gen(); + return; + } + + if (!has_room(1u)) { + if (has_room_after_compaction(1u)) { + compact(); + } else { + LOGMSG_ASSERT(false, "Even after compaction there is no room for update"); + return; + } + } + write_suffix(idx, add_prefix(key, val), key, val); + this->inc_gen(); + +#ifndef NDEBUG + validate_sanity(); +#endif + } + + void remove(uint32_t idx) override { + if (idx == this->total_entries()) { + DEBUG_ASSERT(!this->is_leaf() && this->has_valid_edge(), + "idx={} == num_entries={} for leaf or non-edge node", idx, this->total_entries()); + + if (idx == 0) { + this->invalidate_edge(); + } else { + V last_1_val; + get_nth_value(idx - 1, &last_1_val, false); + this->set_edge_value(last_1_val); + } + } else { + suffix_entry* sentry = get_suffix_entry(idx); + deref_remove_prefix(sentry->prefix_slot); + std::memmove(uintptr_cast(sentry), uintptr_cast(get_suffix_entry(idx + 1)), + (this->total_entries() - idx - 1) * suffix_entry::size()); + this->dec_entries(); + } + this->inc_gen(); + } + + void remove(uint32_t idx_s, uint32_t idx_e) override { + for (auto idx{idx_s}; idx < idx_e; ++idx) { + remove(idx); + } + } + + void remove_all(BtreeConfig const& cfg) override { + this->sub_entries(this->total_entries()); + this->invalidate_edge(); + this->inc_gen(); + prefix_bitset_ = sisl::CompactBitSet{sisl::blob{bitset_area(), reqd_bitset_size(cfg)}, true}; + +#ifndef NDEBUG + validate_sanity(); +#endif + } + + uint8_t* get_node_context() override { return uintptr_cast(this) + sizeof(FixedPrefixNode< K, V >); } + + uint32_t get_nth_obj_size(uint32_t) const override { return get_key_size() + get_value_size(); } + + uint32_t num_entries_by_size(uint32_t start_idx, uint32_t size) const { + uint32_t num_entries{0}; + uint32_t cum_size{0}; + + std::unordered_set< uint16_t > prefixes; + for (auto idx{start_idx}; idx < this->total_entries(); ++idx) { + suffix_entry const* sentry = get_suffix_entry_c(idx); + if (prefixes.find(sentry->prefix_slot) == prefixes.cend()) { + prefixes.insert(sentry->prefix_slot); + cum_size += prefix_entry::size(); + } + cum_size += suffix_entry::size(); + + if (cum_size > size) { return num_entries; } + ++num_entries; + } + return num_entries; + } + + uint32_t copy_by_size(BtreeConfig const& cfg, BtreeNode const& o, uint32_t start_idx, uint32_t size) override { + return copy_internal(cfg, o, start_idx, true /* by_size*/, size); + } + + uint32_t copy_by_entries(BtreeConfig const& cfg, BtreeNode const& o, uint32_t start_idx, + uint32_t nentries) override { + return copy_internal(cfg, o, start_idx, false /* by_size*/, nentries); + } + + uint32_t copy_internal(BtreeConfig const& cfg, BtreeNode const& o, uint32_t start_idx, bool by_size, + uint32_t limit) { + FixedPrefixNode const& src_node = s_cast< FixedPrefixNode const& >(o); + + // Adjust the size_to_move to cover the new node's reqd header space. + uint32_t copied_size{0}; + + // Step 1: Walk through from last idx towards first and map the current node prefix slot to new prefix slot. + // This map is used both to map the prefix slot as well as presence of if the prefix slot is used for multiple + // suffixes. At the end of this step, all prefixes that needs to be coped are copied with correct bitset + // settings on both source and destination + std::map< uint16_t, uint16_t > src_to_my_prefix; + uint16_t src_idx{s_cast< uint16_t >(start_idx)}; + uint16_t my_prefix_slot{0}; + uint16_t my_idx = this->total_entries(); + uint32_t num_copied{0}; + + while ((src_idx < src_node.total_entries()) && has_room(1u)) { + if (!by_size && num_copied >= limit) { break; } + + suffix_entry const* src_sentry = src_node.get_suffix_entry_c(src_idx); + auto const src_prefix_slot = src_sentry->prefix_slot; + + // Map the prefix slot from src node to my node. If we don't have a prefix slot yet, we need to allocate one + // for the remote prefix slot and copy the prefix entry from src node to my node. If we have one, just + // continue to use that by incrementing the ref_count. + auto const it = src_to_my_prefix.find(src_prefix_slot); + if (it == src_to_my_prefix.cend()) { + copied_size += prefix_entry::size() + suffix_entry::size(); + if (by_size && (copied_size > limit)) { break; } + + my_prefix_slot = alloc_prefix(); + prefix_entry* my_pentry = get_prefix_entry(my_prefix_slot); + std::memcpy(voidptr_cast(my_pentry), c_voidptr_cast(src_node.get_prefix_entry_c(src_prefix_slot)), + prefix_entry::size()); + my_pentry->ref_count = 1; + + src_to_my_prefix.insert(std::pair(src_prefix_slot, my_prefix_slot)); + } else { + copied_size += suffix_entry::size(); + if (by_size && (copied_size > limit)) { break; } + + my_prefix_slot = it->second; + prefix_entry* my_pentry = get_prefix_entry(it->second); + ++my_pentry->ref_count; + } + + suffix_entry* my_sentry = get_suffix_entry(my_idx++); + std::memcpy(voidptr_cast(my_sentry), c_voidptr_cast(src_sentry), suffix_entry::size()); + my_sentry->prefix_slot = my_prefix_slot; + + ++src_idx; + ++num_copied; + } + + this->add_entries(num_copied); + this->inc_gen(); + + // If we copied everything from start_idx till end and if its an edge node, need to copy the edge id as + // well. + if (src_node.has_valid_edge() && ((start_idx + num_copied) == src_node.total_entries())) { + this->set_edge_info(src_node.edge_info()); + } + +#ifndef NDEBUG + validate_sanity(); +#endif + return by_size ? num_copied : copied_size; + } + + std::string to_string(bool print_friendly = false) const override { + auto str = fmt::format("{}id={} level={} nEntries={} {} next_node={} ", + (print_friendly ? "------------------------------------------------------------\n" : ""), + this->node_id(), this->level(), this->total_entries(), + (this->is_leaf() ? "LEAF" : "INTERIOR"), this->next_bnode()); + if (!this->is_leaf() && (this->has_valid_edge())) { + fmt::format_to(std::back_inserter(str), "edge_id={}.{}", this->edge_info().m_bnodeid, + this->edge_info().m_link_version); + } + + fmt::format_to(std::back_inserter(str), "{}Prefix_Hdr={}, Prefix_Bitmap=[{}]\n", + (print_friendly ? "\n\t" : " "), cprefix_header()->to_string(), prefix_bitset_.to_string()); + + for (uint32_t i{0}; i < this->total_entries(); ++i) { + fmt::format_to(std::back_inserter(str), "{}Entry{} [Key={} Val={}]", (print_friendly ? "\n\t" : " "), i + 1, + BtreeNode::get_nth_key< K >(i, false).to_string(), + this->get_nth_value(i, false).to_string()); + } + return str; + } + + std::string to_string_keys(bool print_friendly = false) const override { return "NOT Supported"; } + +private: + uint16_t add_prefix(BtreeKey const& key, BtreeValue const& val) { + auto const slot_num = alloc_prefix(); + + // Layout the prefix key/value into the prefix slot allocated + prefix_entry* pentry = get_prefix_entry(slot_num); + pentry->ref_count = 0; // Num suffix referencing this prefix + pentry->write_kv(key, val); + + return slot_num; + } + + uint16_t alloc_prefix() { + auto const slot_num = prefix_bitset_.get_next_reset_bit(0); + if (slot_num == std::numeric_limits< uint16_t >::max()) { + DEBUG_ASSERT(false, "Unable to alloc slot, shouldn't be mutating in this node without splitting"); + return std::numeric_limits< uint16_t >::max(); + } + prefix_bitset_.set_bit(slot_num); + + auto phdr = prefix_header(); + ++phdr->used_slots; + if (slot_num > phdr->tail_slot) { phdr->tail_slot = slot_num; } + return slot_num; + } + + void ref_prefix(uint16_t slot_num) { ++(get_prefix_entry(slot_num)->ref_count); } + + void deref_remove_prefix(uint16_t slot_num) { + auto phdr = prefix_header(); + auto pentry = get_prefix_entry(slot_num); + DEBUG_ASSERT_GT(pentry->ref_count, 0, "Deref of prefix slot={} error: ref_count already 0", slot_num); + DEBUG_ASSERT_GT(phdr->used_slots, 0, "Deref of prefix slot={} error: used slot count is already 0", slot_num); + + if (--pentry->ref_count == 0) { + --phdr->used_slots; + prefix_bitset_.reset_bit(slot_num); + if ((slot_num != 0) && (slot_num == phdr->tail_slot)) { + uint16_t prev_slot = prefix_bitset_.get_prev_set_bit(slot_num); + if (prev_slot != std::numeric_limits< uint16_t >::max()) { phdr->tail_slot = prev_slot; } + } + } + } + + void write_suffix(uint16_t idx, uint16_t prefix_slot, BtreeKey const& key, BtreeValue const& val) { + suffix_entry* sentry = get_suffix_entry(idx); + sentry->prefix_slot = prefix_slot; + sentry->write_kv(key, val); + ref_prefix(prefix_slot); + } + + uint32_t available_size_without_compaction() const { + uint8_t const* suffix = r_cast< uint8_t const* >(get_suffix_entry_c(this->total_entries())); + uint8_t const* prefix = r_cast< uint8_t const* >(get_prefix_entry_c(cprefix_header()->tail_slot)); + + if (suffix <= prefix) { + return prefix - suffix; + } else { + DEBUG_ASSERT(false, "Node data is corrupted, suffix area is overlapping prefix area"); + return 0; + } + } + + uint32_t available_size_with_compaction() const { + return available_size_without_compaction() + (num_prefix_holes() * prefix_entry::size()); + } + + bool has_room(uint16_t for_nentries) const { + return (available_size_without_compaction() >= (prefix_entry::size() + (for_nentries * suffix_entry::size()))); + } + + bool has_room_after_compaction(uint16_t for_nentries) const { + return (available_size_with_compaction() >= (prefix_entry::size() + (for_nentries * suffix_entry::size()))); + } + + uint32_t num_prefix_holes() const { + auto phdr = cprefix_header(); + return (phdr->tail_slot + 1 - phdr->used_slots); + } + + bool is_compaction_suggested() const { return (num_prefix_holes() > prefix_node_header::min_holes_to_compact); } + + void compact() { + // Build reverse map from prefix to suffix + std::multimap< uint16_t, uint16_t > prefix_to_suffix; + for (uint16_t idx{0}; idx < this->total_entries(); ++idx) { + suffix_entry const* sentry = get_suffix_entry_c(idx); + prefix_to_suffix.insert(std::pair(sentry->prefix_slot, idx)); + } + + // Starting from a slot outside of actual used slots, keep finding all the slots which are out of slots used + // count are moved to free area within the compactable area. + uint16_t from_slot{prefix_header()->used_slots}; + uint16_t to_slot{0}; + while (true) { + from_slot = prefix_bitset_.get_next_set_bit(from_slot); + if (from_slot == std::numeric_limits< uint16_t >::max()) { break; } + + auto const to_slot = prefix_bitset_.get_next_reset_bit(0u); + DEBUG_ASSERT_NE(to_slot, std::numeric_limits< uint16_t >::max(), + "Didn't find a free location on to compaction side, not expected"); + DEBUG_ASSERT_LT(to_slot, prefix_header()->used_slots, + "Couldn't find enough slots inside compactable area, not expected"); + + std::memcpy(uintptr_cast(get_prefix_entry(to_slot)), (void*)get_prefix_entry(from_slot), + prefix_entry::size()); + prefix_bitset_.reset_bit(from_slot); + prefix_bitset_.set_bit(to_slot); + + // Move all the suffixes that are referencing this prefix to the new location + auto range = prefix_to_suffix.equal_range(from_slot); + for (auto it = range.first; it != range.second; ++it) { + suffix_entry* sentry = get_suffix_entry(it->second); + sentry->prefix_slot = to_slot; + } + } + + // Finally adjust the tail offset to the compacted area. + auto phdr = prefix_header(); + phdr->tail_slot = phdr->used_slots; + } + +#ifdef _DEBUG + void validate_sanity() { + uint32_t i{0}; + // validate if keys are in ascending order + K prevKey; + while (i < this->total_entries()) { + K key = BtreeNode::get_nth_key< K >(i, false); + uint64_t kp = *(uint64_t*)key.serialize().bytes; + if (i > 0 && prevKey.compare(key) > 0) { + DEBUG_ASSERT(false, "Found non sorted entry: {} -> {}", kp, to_string()); + } + prevKey = key; + ++i; + } + } +#endif + + //////////////////////// All Helper methods section //////////////////////// + static uint32_t reqd_bitset_size(BtreeConfig const& cfg) { + return sisl::round_up(cfg.node_data_size() / (prefix_entry::key_size() + prefix_entry::value_size()) / 8, + sisl::CompactBitSet::size_multiples()); + } + + prefix_node_header* prefix_header() { return r_cast< prefix_node_header* >(this->node_data_area()); } + prefix_node_header const* cprefix_header() const { + return r_cast< prefix_node_header const* >(this->node_data_area_const()); + } + + uint8_t* bitset_area() { return this->node_data_area() + sizeof(prefix_node_header); } + uint8_t const* cbitset_area() const { return this->node_data_area_const() + sizeof(prefix_node_header); } + + uint8_t* suffix_kv_area() { return bitset_area() + (prefix_bitset_.size() / 8); } + uint8_t const* csuffix_kv_area() const { return cbitset_area() + (prefix_bitset_.size() / 8); } + + prefix_entry* get_prefix_entry(uint16_t slot_num) { + return r_cast< prefix_entry* >(this->node_data_area() + + (this->node_data_size() - ((slot_num + 1) * prefix_entry::size()))); + } + + prefix_entry const* get_prefix_entry_c(uint16_t slot_num) const { + return r_cast< prefix_entry const* >(this->node_data_area_const() + + (this->node_data_size() - ((slot_num + 1) * prefix_entry::size()))); + } + + suffix_entry* get_suffix_entry(uint16_t idx) { + return r_cast< suffix_entry* >(suffix_kv_area() + (idx * suffix_entry::size())); + } + suffix_entry const* get_suffix_entry_c(uint16_t idx) const { + return r_cast< suffix_entry const* >(csuffix_kv_area() + (idx * suffix_entry::size())); + } + + static constexpr uint32_t get_key_size() { return prefix_entry::key_size() + suffix_entry::key_size(); } + static constexpr uint32_t get_value_size() { return prefix_entry::value_size() + suffix_entry::value_size(); } +}; +} // namespace homestore diff --git a/src/include/homestore/btree/detail/simple_node.hpp b/src/include/homestore/btree/detail/simple_node.hpp index f12302183..b8149374c 100644 --- a/src/include/homestore/btree/detail/simple_node.hpp +++ b/src/include/homestore/btree/detail/simple_node.hpp @@ -16,8 +16,8 @@ #pragma once #include -#include "btree_node.hpp" -#include "btree_internal.hpp" +#include +#include #include "homestore/index/index_internal.hpp" using namespace std; @@ -28,13 +28,21 @@ SISL_LOGGING_DECL(btree) namespace homestore { template < typename K, typename V > -class SimpleNode : public BtreeNode { +class SimpleNode : public VariantNode< K, V > { public: SimpleNode(uint8_t* node_buf, bnodeid_t id, bool init, bool is_leaf, const BtreeConfig& cfg) : - BtreeNode(node_buf, id, init, is_leaf) { + VariantNode< K, V >(node_buf, id, init, is_leaf, cfg) { this->set_node_type(btree_node_type::FIXED); } + using BtreeNode::get_nth_key_internal; + using BtreeNode::get_nth_key_size; + using BtreeNode::get_nth_obj_size; + using BtreeNode::get_nth_value; + using BtreeNode::get_nth_value_size; + using BtreeNode::to_string; + using VariantNode< K, V >::get_nth_value; + // Insert the key and value in provided index // Assumption: Node lock is already taken btree_status_t insert(uint32_t ind, const BtreeKey& key, const BtreeValue& val) override { @@ -83,7 +91,7 @@ class SimpleNode : public BtreeNode { // Set the last key/value as edge entry and by decrementing entry count automatically removed the last // entry. BtreeLinkInfo new_edge; - get_nth_value(ind_s - 1, &new_edge, false); + this->get_nth_value(ind_s - 1, &new_edge, false); this->set_nth_value(total_entries, new_edge); this->sub_entries(total_entries - ind_s + 1); } else { @@ -107,15 +115,11 @@ class SimpleNode : public BtreeNode { #endif } - void append(uint32_t ind, const BtreeKey& key, const BtreeValue& val) override { - RELEASE_ASSERT(false, "Append operation is not supported on simple node"); - } - uint32_t move_out_to_right_by_entries(const BtreeConfig& cfg, BtreeNode& o, uint32_t nentries) override { auto& other_node = s_cast< SimpleNode< K, V >& >(o); // Minimum of whats to be moved out and how many slots available in other node - nentries = std::min({nentries, this->total_entries(), other_node.get_available_entries(cfg)}); + nentries = std::min({nentries, this->total_entries(), other_node.get_available_entries()}); uint32_t sz = nentries * get_nth_obj_size(0); if (sz != 0) { @@ -160,7 +164,7 @@ class SimpleNode : public BtreeNode { auto& other = s_cast< const SimpleNode< K, V >& >(o); nentries = std::min(nentries, other.total_entries() - start_idx); - nentries = std::min(nentries, this->get_available_entries(cfg)); + nentries = std::min(nentries, this->get_available_entries()); uint32_t sz = nentries * get_nth_obj_size(0); if (sz != 0) { std::memcpy(get_nth_obj(this->total_entries()), other.get_nth_obj_const(start_idx), sz); } this->add_entries(nentries); @@ -173,51 +177,15 @@ class SimpleNode : public BtreeNode { return nentries; } - /*uint32_t move_in_from_right_by_entries(const BtreeConfig& cfg, BtreeNode& o, uint32_t nentries) override { - auto& other_node = s_cast< SimpleNode< K, V >& >(o); - - // Minimum of whats to be moved and how many slots available - nentries = std::min({nentries, other_node.total_entries(), get_available_entries(cfg)}); - uint32_t sz = nentries * get_nth_obj_size(0); - if (sz != 0) { - uint32_t othersz = (other_node.total_entries() - nentries) * other_node.get_nth_obj_size(0); - std::memmove(get_nth_obj(this->total_entries()), other_node.get_nth_obj(0), sz); - std::memmove(other_node.get_nth_obj(0), other_node.get_nth_obj(nentries), othersz); - } - - other_node.sub_entries(nentries); - this->add_entries(nentries); - - // If next node does not have any more entries, but only a edge entry - // we need to move that to us, so that if need be next node could be freed. - if ((other_node.total_entries() == 0) && other_node.has_valid_edge()) { - DEBUG_ASSERT_EQ(this->has_valid_edge(), false, "node={}", to_string()); - this->set_edge_id(other_node.edge_id()); - other_node.invalidate_edge(); - } - - other_node.inc_gen(); - this->inc_gen(); - -#ifndef NDEBUG - validate_sanity(); -#endif - return nentries; - } - - uint32_t move_in_from_right_by_size(const BtreeConfig& cfg, BtreeNode& o, uint32_t size) override { - return (get_nth_obj_size(0) * move_in_from_right_by_entries(cfg, o, size / get_nth_obj_size(0))); - } */ - - uint32_t available_size(const BtreeConfig& cfg) const override { - return (cfg.node_data_size() - (this->total_entries() * get_nth_obj_size(0))); + uint32_t available_size() const override { + return (this->node_data_size() - (this->total_entries() * get_nth_obj_size(0))); } void get_nth_key_internal(uint32_t ind, BtreeKey& out_key, bool copy) const override { DEBUG_ASSERT_LT(ind, this->total_entries(), "node={}", to_string()); sisl::blob b; b.bytes = (uint8_t*)(this->node_data_area_const() + (get_nth_obj_size(ind) * ind)); - b.size = get_obj_key_size(ind); + b.size = get_nth_key_size(ind); out_key.deserialize(b, copy); } @@ -227,19 +195,18 @@ class SimpleNode : public BtreeNode { DEBUG_ASSERT_EQ(this->has_valid_edge(), true, "node={}", to_string()); *(BtreeLinkInfo*)out_val = this->get_edge_value(); } else { - sisl::blob b; - b.bytes = const_cast< uint8_t* >(reinterpret_cast< const uint8_t* >( - this->node_data_area_const() + (get_nth_obj_size(ind) * ind) + get_obj_key_size(ind))); - b.size = V::get_fixed_size(); + sisl::blob b{const_cast< uint8_t* >(this->node_data_area_const() + (get_nth_obj_size(ind) * ind) + + get_nth_key_size(ind)), + dummy_value< V >.serialized_size()}; out_val->deserialize(b, copy); } } - /*V get_nth_value(uint32_t ind, bool copy) const { - V val; - get_nth_value(ind, &val, copy); - return val; - }*/ + bool has_room_for_put(btree_put_type put_type, uint32_t key_size, uint32_t value_size) const override { + return ((put_type == btree_put_type::UPSERT) || (put_type == btree_put_type::INSERT)) + ? (get_available_entries() > 0) + : true; + } std::string to_string(bool print_friendly = false) const override { auto str = fmt::format("{}id={} level={} nEntries={} {} next_node={} ", @@ -252,10 +219,9 @@ class SimpleNode : public BtreeNode { } for (uint32_t i{0}; i < this->total_entries(); ++i) { - V val; - get_nth_value(i, &val, false); fmt::format_to(std::back_inserter(str), "{}Entry{} [Key={} Val={}]", (print_friendly ? "\n\t" : " "), i + 1, - get_nth_key< K >(i, false).to_string(), val.to_string()); + BtreeNode::get_nth_key< K >(i, false).to_string(), + this->get_nth_value(i, false).to_string()); } return str; } @@ -263,9 +229,11 @@ class SimpleNode : public BtreeNode { std::string to_string_keys(bool print_friendly = false) const override { #if 0 std::string delimiter = print_friendly ? "\n" : "\t"; + auto str = fmt::format("{}{}.{} nEntries={} {} ", auto str = fmt::format("{}{}.{} nEntries={} {} ", print_friendly ? "------------------------------------------------------------\n" : "", this->node_id(), this->link_version(), this->total_entries(), (this->is_leaf() ? "LEAF" : "INTERIOR")); + this->node_id(), this->link_version(), this->total_entries(), (this->is_leaf() ? "LEAF" : "INTERIOR")); if (!this->is_leaf() && (this->has_valid_edge())) { fmt::format_to(std::back_inserter(str), "edge_id={}.{}", this->edge_info().m_bnodeid, this->edge_info().m_link_version); @@ -277,7 +245,7 @@ class SimpleNode : public BtreeNode { if (!this->is_leaf()) { fmt::format_to(std::back_inserter(str), " ["); for (uint32_t i{0}; i < this->total_entries(); ++i) { - uint32_t cur_key = get_nth_key< K >(i, false).key(); + uint32_t cur_key = BtreeNode::get_nth_key< K >(i, false).key(); BtreeLinkInfo child_info; get_nth_value(i, &child_info, false /* copy */); fmt::format_to(std::back_inserter(str), "{}.{} {}", cur_key, child_info.link_version(), i == this->total_entries() - 1 ? "" : ", "); @@ -285,9 +253,9 @@ class SimpleNode : public BtreeNode { fmt::format_to(std::back_inserter(str), "]"); return str; } - uint32_t prev_key = get_nth_key< K >(0, false).key(); + uint32_t prev_key = BtreeNode::get_nth_key< K >(0, false).key(); uint32_t cur_key = prev_key; - uint32_t last_key = get_nth_key< K >(this->total_entries() - 1, false).key(); + uint32_t last_key = BtreeNode::get_nth_key< K >(this->total_entries() - 1, false).key(); if (last_key - prev_key == this->total_entries() - 1) { if (this->total_entries() == 1) fmt::format_to(std::back_inserter(str), "{}[{}]", delimiter, prev_key); @@ -298,7 +266,7 @@ class SimpleNode : public BtreeNode { fmt::format_to(std::back_inserter(str), "{}0 - [{}", delimiter, prev_key); uint32_t start_interval_key = prev_key; for (uint32_t i{1}; i < this->total_entries(); ++i) { - cur_key = get_nth_key< K >(i, false).key(); + cur_key = BtreeNode::get_nth_key< K >(i, false).key(); if (cur_key != prev_key + 1) { if (start_interval_key == prev_key) { fmt::format_to(std::back_inserter(str), "-{}]{}{}- [{}", prev_key, delimiter, i, cur_key); @@ -328,10 +296,10 @@ class SimpleNode : public BtreeNode { // validate if keys are in ascending order uint32_t i{1}; - K prevKey = get_nth_key< K >(0, false); + K prevKey = BtreeNode::get_nth_key< K >(0, false); while (i < this->total_entries()) { - K key = get_nth_key< K >(i, false); + K key = BtreeNode::get_nth_key< K >(i, false); if (i > 0 && prevKey.compare(key) > 0) { LOGINFO("non sorted entry : {} -> {} ", prevKey.to_string(), key.to_string()); DEBUG_ASSERT(false, "node={}", to_string()); @@ -343,16 +311,9 @@ class SimpleNode : public BtreeNode { #endif inline uint32_t get_nth_obj_size(uint32_t ind) const override { - return (get_obj_key_size(ind) + get_obj_value_size(ind)); + return (get_nth_key_size(ind) + get_nth_value_size(ind)); } - int compare_nth_key(const BtreeKey& cmp_key, uint32_t ind) const override { - return get_nth_key< K >(ind, false).compare(cmp_key); - } - - // Simple/Fixed node doesn't need a record to point key/value object - uint16_t get_record_size() const override { return 0; } - /*int compare_nth_key_range(const BtreeKeyRange& range, uint32_t ind) const override { return get_nth_key(ind, false).compare_range(range); }*/ @@ -371,11 +332,11 @@ class SimpleNode : public BtreeNode { } } - uint32_t get_available_entries(const BtreeConfig& cfg) const { return available_size(cfg) / get_nth_obj_size(0); } + uint32_t get_available_entries() const { return available_size() / get_nth_obj_size(0); } - inline uint32_t get_obj_key_size(uint32_t ind) const { return K::get_fixed_size(); } + uint32_t get_nth_key_size(uint32_t ind) const override { return dummy_key< K >.serialized_size(); } - inline uint32_t get_obj_value_size(uint32_t ind) const { return V::get_fixed_size(); } + uint32_t get_nth_value_size(uint32_t ind) const override { return dummy_value< V >.serialized_size(); } uint8_t* get_nth_obj(uint32_t ind) { return (this->node_data_area() + (get_nth_obj_size(ind) * ind)); } const uint8_t* get_nth_obj_const(uint32_t ind) const { @@ -396,7 +357,7 @@ class SimpleNode : public BtreeNode { "Invalid value size being set for non-leaf node"); this->set_edge_info(*r_cast< BtreeLinkInfo::bnode_link_info* >(b.bytes)); } else { - uint8_t* entry = this->node_data_area() + (get_nth_obj_size(ind) * ind) + get_obj_key_size(ind); + uint8_t* entry = this->node_data_area() + (get_nth_obj_size(ind) * ind) + get_nth_key_size(ind); std::memcpy(entry, b.bytes, b.size); } } diff --git a/src/include/homestore/btree/detail/variant_node.hpp b/src/include/homestore/btree/detail/variant_node.hpp new file mode 100644 index 000000000..b54dca2f8 --- /dev/null +++ b/src/include/homestore/btree/detail/variant_node.hpp @@ -0,0 +1,311 @@ +/********************************************************************************* + * Modifications Copyright 2017-2019 eBay Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + *********************************************************************************/ +#pragma once + +#include +#include + +namespace homestore { +template < typename K > +static K dummy_key; + +template < typename V > +static V dummy_value; + +template < typename K, typename V > +class VariantNode : public BtreeNode { +public: + using BtreeNode::get_nth_key_size; + using BtreeNode::get_nth_value; + + VariantNode(uint8_t* node_buf, bnodeid_t id, bool init_buf, bool is_leaf, BtreeConfig const& cfg) : + BtreeNode(node_buf, id, init_buf, is_leaf, cfg) {} + + ///////////////////////////////////////// Get related APIs of the node ///////////////////////////////////////// + + /// @brief Gets all entries in the node that have keys within the specified range. + /// + /// This method returns all entries in the node that have keys within the specified range. The method searches the + /// node using a binary search algorithm to find the first and last entries that have keys within the range. The + /// method returns the indices of these entries in the node and optionally returns the key-value pairs of the + /// entries. + /// + /// @tparam K The type of the keys in the node. + /// @tparam V The type of the values in the node. + /// @param range The range of keys to search for. + /// @param max_count The maximum number of entries to return. + /// @param start_idx [out] A reference to an integer to store the index of the first entry that has a key within the + /// range. + /// @param end_idx [out] A reference to an integer to store the index of the last entry that has a key within the + /// range. + /// @param out_values [optional] A pointer to a vector to store the key-value pairs of the entries if provided. Can + /// be nullptr + /// @param filter_cb [optional] A callback function to be called for each entry found in the node that has a key. + /// The callback is expected to return true if the entry should be included in the result and false otherwise. + /// @return The number of entries found in the node that have keys within the range and callback if any, allowed + /// the method to include result to. + virtual uint32_t multi_get(BtreeKeyRange< K > const& range, uint32_t max_count, uint32_t& start_idx, + uint32_t& end_idx, std::vector< std::pair< K, V > >* out_values = nullptr, + get_filter_cb_t const& filter_cb = nullptr) const { + if (!match_range(range, start_idx, end_idx)) { return 0; } + + uint32_t count = std::min(end_idx - start_idx + 1, max_count); + if (out_values || filter_cb) { + /* get the keys and values */ + auto const upto_idx = start_idx + count; + for (auto i{start_idx}; i < upto_idx; ++i) { + K key = get_nth_key< K >(i, (out_values != nullptr) /* copy */); + V val = get_nth_value(i, (out_values != nullptr) /* copy */); + if (!filter_cb || filter_cb(key, val)) { + if (out_values) { out_values->emplace_back(std::move(key), std::move(val)); } + } else { + --count; + } + } + } + return count; + } + + /// @brief Gets any entry in the node that has a key within the specified range. + /// + /// This method returns any entry in the node that has a key within the specified range. The method does a binary + /// search to find the first entry that has a key within the range. It returns the index of the entry in the node + /// and optionally returns the key and value of the entry. + /// + /// @param range The range of keys to search for. + /// @param out_key [optional] A pointer to a key to store the key of the entry if desired. + /// @param out_val [optional] A pointer to a value to store the value of the entry if desired. + /// @param copy_key Whether to copy the key of the entry to the output key. If not copied, it uses its internal node + /// pointer to construct the key. It is not advisable to set this to true in case the key is accessed after any + /// mutation on nodes. + /// @param copy_val Whether to copy the value of the entry to the output value. If not copied, it uses its internal + /// node pointer to construct the value. It is not advisable to set this to true in case the value is accessed after + /// any mutation on nodes. + /// @param filter_cb [optional] A callback function to be called for each entry found in the node that has a key. + /// The callback is expected to return true if the entry should be included in the result and false otherwise. + /// @return A pair of a boolean and an integer. + /// The boolean indicates whether an entry was found within the range. + /// The integer is the index of the entry in the node. + virtual std::pair< bool, uint32_t > get_any(BtreeKeyRange< K > const& range, BtreeKey* out_key, BtreeValue* out_val, + bool copy_key, bool copy_val, + get_filter_cb_t const& filter_cb = nullptr) const { + LOGMSG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC, "Magic mismatch on btree_node {}", + get_persistent_header_const()->to_string()); + uint32_t result_idx; + const auto mm_opt = range.multi_option(); + bool efound; + uint32_t end_idx; + + // Get the start index of the search range. + auto [sfound, start_idx] = bsearch_node(range.start_key()); + if (sfound && !range.is_start_inclusive()) { + ++start_idx; + sfound = false; + } + + if (sfound && ((mm_opt == MultiMatchOption::DO_NOT_CARE) || (mm_opt == MultiMatchOption::LEFT_MOST))) { + result_idx = start_idx; + goto found_result; + } else if (start_idx == total_entries()) { + DEBUG_ASSERT(is_leaf() || has_valid_edge(), "Invalid node"); + return std::make_pair(false, 0); // out_of_range + } + + std::tie(efound, end_idx) = bsearch_node(range.end_key()); + if (efound && !range.is_end_inclusive()) { + if (end_idx == 0) { return std::make_pair(false, 0); } + --end_idx; + efound = false; + } + + if (end_idx > start_idx) { + if (mm_opt == MultiMatchOption::RIGHT_MOST) { + result_idx = end_idx; + } else if (mm_opt == MultiMatchOption::MID) { + result_idx = (end_idx - start_idx) / 2; + } else { + result_idx = start_idx; + } + } else if ((start_idx == end_idx) && ((sfound || efound))) { + result_idx = start_idx; + } else { + return std::make_pair(false, 0); + } + + found_result: + K tmp_key; + if (filter_cb && !out_key) { + out_key = &tmp_key; + copy_key = false; + } + + V tmp_val; + if (filter_cb && !out_val) { + out_val = &tmp_val; + copy_val = false; + } + + if (out_key) { get_nth_key_internal(result_idx, *out_key, copy_key); } + if (out_val) { get_nth_value(result_idx, out_val, copy_val); } + + return (!filter_cb || filter_cb(*out_key, *out_val)) ? std::make_pair(true, result_idx) + : std::make_pair(false, 0u); + } + + V get_nth_value(uint32_t idx, bool copy) const { + V out_val; + get_nth_value(idx, &out_val, copy); + return out_val; + } + + int compare_nth_key(const BtreeKey& cmp_key, uint32_t ind) const override { + return get_nth_key< K >(ind, false).compare(cmp_key); + } + + ///////////////////////////////////////// Put related APIs of the node ///////////////////////////////////////// + /// @brief Inserts or updates an entry with the specified key and value in the node. + /// + /// This method inserts or updates an entry with the specified key and value in the node. It binary searches + /// the node to find the index of the entry with the specified key. If an entry with the specified key is found, it + /// updates the value for key according to the specified put type. If an entry with the specified key is not found, + /// it inserts a new entry with the specified key and value. The method optionally returns the value of the existing + /// entry if it was updated. + /// + /// NOTE: The operation fails if the put type is INSERT and an entry with the specified key already exists in the + /// node. + /// + /// @param key The key of the entry to insert or update. + /// @param val The value of the entry to insert or update. + /// @param put_type The type of put operation to perform if an entry with the specified key is found. put_type + /// translates into one of "Insert", "Update" or "Upsert". + /// @param existing_val [optional] A pointer to a value to store the value of the existing entry if it was updated. + /// @param filter_cb [optional] A callback function to be called for each entry found in the node that has a key. It + /// is used as an filter to remove anything that needn't be updated. + /// @return A boolean indicating whether the operation was successful. + /// + virtual bool put(BtreeKey const& key, BtreeValue const& val, btree_put_type put_type, BtreeValue* existing_val, + put_filter_cb_t const& filter_cb = nullptr) { + LOGMSG_ASSERT_EQ(magic(), BTREE_NODE_MAGIC, "Magic mismatch on btree_node {}", + get_persistent_header_const()->to_string()); + bool ret = true; + + DEBUG_ASSERT_EQ( + this->is_leaf(), true, + "Put operation on node is supported only for leaf nodes, interiors do use insert/update on index APIs"); + + const auto [found, idx] = find(key, nullptr, false); + if (found) { + if (existing_val) { get_nth_value(idx, existing_val, true); } + if (filter_cb && + filter_cb(get_nth_key< K >(idx, false), get_nth_value(idx, false), val) != + put_filter_decision::replace) { + return false; + } + } + + if (put_type == btree_put_type::INSERT) { + if (found) { + LOGDEBUG("Attempt to insert duplicate entry {}", key.to_string()); + return false; + } + ret = (insert(idx, key, val) == btree_status_t::success); + } else if (put_type == btree_put_type::UPDATE) { + if (!found) return false; + update(idx, key, val); + } else if (put_type == btree_put_type::UPSERT) { + (found) ? update(idx, key, val) : (void)insert(idx, key, val); + } else { + DEBUG_ASSERT(false, "Wrong put_type {}", put_type); + } + return ret; + } + + /// @brief Put a batch of key/values into this node + /// + /// This method updates all entries in the node that have keys within the specified range. + /// NOTE: The method is supported only for leaf nodes. + /// NOTE: This base class version only supports range updates. + /// + /// @param keys The range of keys to upsert. + /// @param val The value to upsert. + /// @param last_failed_key [optional] If non-null and if there an not enough room to put the objects, the key where + /// it was not able to put. + /// @param filter_cb The callback function to be called for each entry found within the range. The function should + /// take two arguments: a key and a value, and return a batch_upsert_decision_t value. If the function returns: + /// put_filter_decision::replace, the entry is upserted with the new value. + /// put_filter_decision::remove, the entry is removed from the node. + /// put_filter_decision::keep, the entry is not modified and the method moves on to the next entry. + /// @return Btree status typically . + /// If all keys were upserted successfully, the method returns btree_status_t::success. + /// If the method ran out of space in the node, the method returns the key that was last put and the status + /// as btree_status_t::has_more + virtual btree_status_t multi_put(BtreeKeyRange< K > const& keys, BtreeKey const&, BtreeValue const& val, + btree_put_type put_type, K* last_failed_key, + put_filter_cb_t const& filter_cb = nullptr) { + if (put_type != btree_put_type::UPDATE) { + DEBUG_ASSERT(false, "For non-interval keys multi-put should be really update and cannot insert"); + return btree_status_t::not_supported; + } + DEBUG_ASSERT_EQ(this->is_leaf(), true, "Multi put entries on node are supported only for leaf nodes"); + + // Match the key range to get start and end idx. If none of the ranges here matches, we have to return not_found + uint32_t start_idx; + uint32_t end_idx; + if (!this->match_range(keys, start_idx, end_idx)) { return btree_status_t::not_found; } + + const auto new_val_size = val.serialized_size(); + for (auto idx{start_idx}; idx <= end_idx; ++idx) { + if (!has_room_for_put(put_type, get_nth_key_size(idx), new_val_size)) { + if (last_failed_key) { this->get_nth_key_internal(idx, *last_failed_key, true); } + return btree_status_t::has_more; + } + if (filter_cb) { + auto decision = filter_cb(get_nth_key< K >(idx, false), get_nth_value(idx, false), val); + if (decision == put_filter_decision::replace) { + this->update(idx, val); + } else if (decision == put_filter_decision::remove) { + this->remove(idx); + --idx; + } + } else { + update(idx, val); + } + } + return btree_status_t::success; + } + + ///////////////////////////////////////// Remove related APIs of the node ///////////////////////////////////////// + virtual uint32_t multi_remove(BtreeKeyRange< K > const& keys, remove_filter_cb_t const& filter_cb = nullptr) { + DEBUG_ASSERT_EQ(this->is_leaf(), true, "Multi put entries on node are supported only for leaf nodes"); + + // Match the key range to get start and end idx. If none of the ranges here matches, we have to return not_found + uint32_t start_idx{0}; + uint32_t end_idx{0}; + if (!this->match_range(keys, start_idx, end_idx)) { return 0u; } + + auto removed_count = end_idx - start_idx + 1; + auto ret = removed_count; + for (uint32_t count = 0; count < removed_count; ++count) { + if (!filter_cb || filter_cb(get_nth_key< K >(start_idx, false), get_nth_value(start_idx, false))) { + this->remove(start_idx); + } else { + ++start_idx; // Skipping the entry + --ret; + } + } + return ret; + } +}; +} // namespace homestore \ No newline at end of file diff --git a/src/include/homestore/btree/detail/varlen_node.hpp b/src/include/homestore/btree/detail/varlen_node.hpp index 29a955983..de155f9c5 100644 --- a/src/include/homestore/btree/detail/varlen_node.hpp +++ b/src/include/homestore/btree/detail/varlen_node.hpp @@ -17,7 +17,7 @@ #pragma once #include -#include "btree_node.hpp" +#include #include #include "homestore/index/index_internal.hpp" @@ -47,10 +47,18 @@ struct var_node_header { // [Persistent Header][var node header][Record][Record].. ... ... [key][value][key][value] // template < typename K, typename V > -class VariableNode : public BtreeNode { +class VariableNode : public VariantNode< K, V > { public: + using BtreeNode::get_nth_key_internal; + using BtreeNode::get_nth_key_size; + using BtreeNode::get_nth_obj_size; + using BtreeNode::get_nth_value; + using BtreeNode::get_nth_value_size; + using BtreeNode::to_string; + using VariantNode< K, V >::get_nth_value; + VariableNode(uint8_t* node_buf, bnodeid_t id, bool init, bool is_leaf, const BtreeConfig& cfg) : - BtreeNode(node_buf, id, init, is_leaf) { + VariantNode< K, V >(node_buf, id, init, is_leaf, cfg) { if (init) { // Tail arena points to the edge of the node as data arena grows backwards. Entire space is now available // except for the header itself @@ -63,10 +71,6 @@ class VariableNode : public BtreeNode { virtual ~VariableNode() = default; - uint32_t occupied_size(const BtreeConfig& cfg) const override { - return (cfg.node_data_size() - sizeof(var_node_header) - available_size(cfg)); - } - /* Insert the key and value in provided index * Assumption: Node lock is already taken */ btree_status_t insert(uint32_t ind, const BtreeKey& key, const BtreeValue& val) override { @@ -75,8 +79,7 @@ class VariableNode : public BtreeNode { #ifndef NDEBUG validate_sanity(); #endif - if (sz == 0) { return btree_status_t::insert_failed; } - return btree_status_t::success; + return (sz == 0) ? btree_status_t::space_not_avail : btree_status_t::success; } #ifndef NDEBUG @@ -85,7 +88,7 @@ class VariableNode : public BtreeNode { // validate if keys are in ascending order K prevKey; while (i < this->total_entries()) { - K key = get_nth_key< K >(i, false); + K key = BtreeNode::get_nth_key< K >(i, false); uint64_t kp = *(uint64_t*)key.serialize().bytes; if (i > 0 && prevKey.compare(key) > 0) { DEBUG_ASSERT(false, "Found non sorted entry: {} -> {}", kp, to_string()); @@ -105,7 +108,7 @@ class VariableNode : public BtreeNode { this->set_edge_value(val); this->inc_gen(); } else { - K key = get_nth_key< K >(ind, true); + K key = BtreeNode::get_nth_key< K >(ind, true); update(ind, key, val); } } @@ -167,13 +170,13 @@ class VariableNode : public BtreeNode { this->set_edge_value(last_1_val); for (uint32_t i = ind_s - 1; i < total_entries; i++) { - get_var_node_header()->m_available_space += get_nth_key_len(i) + get_nth_value_len(i) + recSize; + get_var_node_header()->m_available_space += get_nth_key_size(i) + get_nth_value_size(i) + recSize; } this->sub_entries(total_entries - ind_s + 1); } else { // claim available memory for (uint32_t i = ind_s; i <= ind_e; i++) { - get_var_node_header()->m_available_space += get_nth_key_len(i) + get_nth_value_len(i) + recSize; + get_var_node_header()->m_available_space += get_nth_key_size(i) + get_nth_value_size(i) + recSize; } uint8_t* rec_ptr = get_nth_record_mutable(ind_s); memmove(rec_ptr, rec_ptr + recSize * no_of_elem, (this->total_entries() - ind_e - 1) * recSize); @@ -223,11 +226,11 @@ class VariableNode : public BtreeNode { // Get the ith key and value blob and then remove the entry from here and insert to the other node sisl::blob kb; kb.bytes = (uint8_t*)get_nth_obj(ind); - kb.size = get_nth_key_len(ind); + kb.size = get_nth_key_size(ind); sisl::blob vb; vb.bytes = kb.bytes + kb.size; - vb.size = get_nth_value_len(ind); + vb.size = get_nth_value_size(ind); auto sz = other.insert(0, kb, vb); if (!sz) { break; } @@ -264,11 +267,11 @@ class VariableNode : public BtreeNode { while (ind > 0) { sisl::blob kb; kb.bytes = (uint8_t*)get_nth_obj(ind); - kb.size = get_nth_key_len(ind); + kb.size = get_nth_key_size(ind); sisl::blob vb; vb.bytes = kb.bytes + kb.size; - vb.size = get_nth_value_len(ind); + vb.size = get_nth_value_size(ind); if ((kb.size + vb.size + this->get_record_size()) > size_to_move) { // We reached threshold of how much we could move @@ -303,7 +306,7 @@ class VariableNode : public BtreeNode { uint32_t cum_size{0}; while (idx < this->total_entries()) { - uint32_t const rec_size = this->get_record_size() + get_nth_key_len(idx) + get_nth_value_len(idx); + uint32_t const rec_size = this->get_record_size() + get_nth_key_size(idx) + get_nth_value_size(idx); cum_size += rec_size; if (cum_size > size) { break; } ++idx; @@ -319,8 +322,8 @@ class VariableNode : public BtreeNode { auto idx = start_idx; uint32_t n = 0; while (idx < other.total_entries()) { - sisl::blob kb{(uint8_t*)other.get_nth_obj(idx), other.get_nth_key_len(idx)}; - sisl::blob vb{kb.bytes + kb.size, other.get_nth_value_len(idx)}; + sisl::blob kb{(uint8_t*)other.get_nth_obj(idx), other.get_nth_key_size(idx)}; + sisl::blob vb{kb.bytes + kb.size, other.get_nth_value_size(idx)}; // We reached threshold of how much we could move if ((kb.size + vb.size + other.get_record_size()) > copy_size) { break; } @@ -349,8 +352,8 @@ class VariableNode : public BtreeNode { auto idx = start_idx; uint32_t n = 0; while (n < nentries) { - sisl::blob kb{(uint8_t*)other.get_nth_obj(idx), other.get_nth_key_len(idx)}; - sisl::blob vb{kb.bytes + kb.size, other.get_nth_value_len(idx)}; + sisl::blob kb{(uint8_t*)other.get_nth_obj(idx), other.get_nth_key_size(idx)}; + sisl::blob vb{kb.bytes + kb.size, other.get_nth_value_size(idx)}; auto sz = insert(this->total_entries(), kb, vb); if (sz == 0) { break; } @@ -378,11 +381,11 @@ class VariableNode : public BtreeNode { // Get the ith key and value blob and then remove the entry from here and insert to the other node sisl::blob kb; kb.bytes = (uint8_t*)other.get_nth_obj(other_ind); - kb.size = other.get_nth_key_len(other_ind); + kb.size = other.get_nth_key_size(other_ind); sisl::blob vb; vb.bytes = kb.bytes + kb.size; - vb.size = other.get_nth_value_len(other_ind); + vb.size = other.get_nth_value_size(other_ind); auto sz = insert(this->total_entries(), kb, vb); if (!sz) { break; } @@ -419,11 +422,11 @@ class VariableNode : public BtreeNode { while (ind < this->total_entries()) { sisl::blob kb; kb.bytes = (uint8_t*)other.get_nth_obj(ind); - kb.size = other.get_nth_key_len(ind); + kb.size = other.get_nth_key_size(ind); sisl::blob vb; vb.bytes = kb.bytes + kb.size; - vb.size = other.get_nth_value_len(ind); + vb.size = other.get_nth_value_size(ind); if ((kb.size + vb.size + other.get_record_size()) > size_to_move) { // We reached threshold of how much we could move @@ -453,31 +456,30 @@ class VariableNode : public BtreeNode { return moved_size; } */ - void append(uint32_t ind, const BtreeKey& key, const BtreeValue& val) override { - RELEASE_ASSERT(false, "Append operation is not supported on var node"); - } - - uint32_t available_size(const BtreeConfig& cfg) const override { - return get_var_node_header_const()->m_available_space; - } - - uint32_t get_nth_obj_size(uint32_t ind) const override { return get_nth_key_len(ind) + get_nth_value_len(ind); } + uint32_t available_size() const override { return get_var_node_header_const()->m_available_space; } void set_nth_key(uint32_t ind, const BtreeKey& key) { const auto kb = key.serialize(); assert(ind < this->total_entries()); - assert(kb.size == get_nth_key_len(ind)); + assert(kb.size == get_nth_key_size(ind)); memcpy(uintptr_cast(get_nth_obj(ind)), kb.bytes, kb.size); } - virtual uint16_t get_nth_key_len(uint32_t ind) const = 0; - virtual uint16_t get_nth_value_len(uint32_t ind) const = 0; - virtual void set_nth_key_len(uint8_t* rec_ptr, uint16_t key_len) = 0; - virtual void set_nth_value_len(uint8_t* rec_ptr, uint16_t value_len) = 0; + bool has_room_for_put(btree_put_type put_type, uint32_t key_size, uint32_t value_size) const override { + auto needed_size = key_size + value_size; + if ((put_type == btree_put_type::UPSERT) || (put_type == btree_put_type::INSERT)) { + needed_size += get_record_size(); + } + return (available_size() >= needed_size); + } + + virtual uint32_t get_record_size() const = 0; + virtual void set_nth_key_len(uint8_t* rec_ptr, uint32_t key_len) = 0; + virtual void set_nth_value_len(uint8_t* rec_ptr, uint32_t value_len) = 0; void get_nth_key_internal(uint32_t ind, BtreeKey& out_key, bool copy) const override { assert(ind < this->total_entries()); - sisl::blob b{const_cast< uint8_t* >(get_nth_obj(ind)), get_nth_key_len(ind)}; + sisl::blob b{const_cast< uint8_t* >(get_nth_obj(ind)), get_nth_key_size(ind)}; out_key.deserialize(b, copy); } @@ -487,17 +489,11 @@ class VariableNode : public BtreeNode { DEBUG_ASSERT_EQ(this->has_valid_edge(), true, "get_nth_value out-of-bound"); *(BtreeLinkInfo*)out_val = this->get_edge_value(); } else { - sisl::blob b{const_cast< uint8_t* >(get_nth_obj(ind)) + get_nth_key_len(ind), get_nth_value_len(ind)}; + sisl::blob b{const_cast< uint8_t* >(get_nth_obj(ind)) + get_nth_key_size(ind), get_nth_value_size(ind)}; out_val->deserialize(b, copy); } } - /*V get_nth_value(uint32_t ind, bool copy) const { - assert(ind < this->total_entries()); - sisl::blob b{const_cast< uint8_t* >(get_nth_obj(ind)) + get_nth_key_len(ind), get_nth_value_len(ind)}; - return V{b, copy}; - }*/ - std::string to_string(bool print_friendly = false) const override { auto str = fmt::format( "{}id={} level={} nEntries={} {} free_space={}{} ", @@ -513,7 +509,7 @@ class VariableNode : public BtreeNode { V val; get_nth_value(i, &val, false); fmt::format_to(std::back_inserter(str), "{}Entry{} [Key={} Val={}]", (print_friendly ? "\n\t" : " "), i + 1, - get_nth_key< K >(i, false).to_string(), val.to_string()); + BtreeNode::get_nth_key< K >(i, false).to_string(), val.to_string()); } return str; } @@ -521,9 +517,11 @@ class VariableNode : public BtreeNode { std::string to_string_keys(bool print_friendly = false) const override { #if 0 std::string delimiter = print_friendly ? "\n" : "\t"; + auto str = fmt::format("{}{}.{} nEntries={} {} ", auto str = fmt::format("{}{}.{} nEntries={} {} ", print_friendly ? "------------------------------------------------------------\n" : "", this->node_id(), this->link_version(), this->total_entries(), (this->is_leaf() ? "LEAF" : "INTERIOR")); + this->node_id(), this->link_version(), this->total_entries(), (this->is_leaf() ? "LEAF" : "INTERIOR")); if (!this->is_leaf() && (this->has_valid_edge())) { fmt::format_to(std::back_inserter(str), "edge_id={}.{}", this->edge_info().m_bnodeid, this->edge_info().m_link_version); @@ -535,7 +533,7 @@ class VariableNode : public BtreeNode { if (!this->is_leaf()) { fmt::format_to(std::back_inserter(str), " ["); for (uint32_t i{0}; i < this->total_entries(); ++i) { - uint32_t cur_key = get_nth_key< K >(i, false).key(); + uint32_t cur_key = BtreeNode::get_nth_key< K >(i, false).key(); BtreeLinkInfo child_info; get_nth_value(i, &child_info, false /* copy */); fmt::format_to(std::back_inserter(str), "{}.{} {}", cur_key, child_info.link_version(), i == this->total_entries() - 1 ? "" : ", "); @@ -543,9 +541,9 @@ class VariableNode : public BtreeNode { fmt::format_to(std::back_inserter(str), "]"); return str; } - uint32_t prev_key = get_nth_key< K >(0, false).key(); + uint32_t prev_key = BtreeNode::get_nth_key< K >(0, false).key(); uint32_t cur_key = prev_key; - uint32_t last_key = get_nth_key< K >(this->total_entries() - 1, false).key(); + uint32_t last_key = BtreeNode::get_nth_key< K >(this->total_entries() - 1, false).key(); if (last_key - prev_key == this->total_entries() - 1) { if (this->total_entries() == 1) fmt::format_to(std::back_inserter(str), "{}[{}]", delimiter, prev_key); @@ -556,7 +554,7 @@ class VariableNode : public BtreeNode { fmt::format_to(std::back_inserter(str), "{}0 - [{}", delimiter, prev_key); uint32_t start_interval_key = prev_key; for (uint32_t i{1}; i < this->total_entries(); ++i) { - cur_key = get_nth_key< K >(i, false).key(); + cur_key = BtreeNode::get_nth_key< K >(i, false).key(); if (cur_key != prev_key + 1) { if (start_interval_key == prev_key) { fmt::format_to(std::back_inserter(str), "-{}]{}{}- [{}", prev_key, delimiter, i, cur_key); @@ -580,10 +578,6 @@ class VariableNode : public BtreeNode { uint8_t* get_node_context() override { return uintptr_cast(this) + sizeof(VariableNode< K, V >); } - int compare_nth_key(const BtreeKey& cmp_key, uint32_t ind) const { - return get_nth_key< K >(ind, false).compare(cmp_key); - } - /*int compare_nth_key_range(const BtreeKeyRange& range, uint32_t ind) const { return get_nth_key(ind, false).compare_range(range); }*/ @@ -683,7 +677,7 @@ class VariableNode : public BtreeNode { // loop records while (ind < no_of_entries) { uint16_t total_key_value_len = - get_nth_key_len(rec[ind].orig_record_index) + get_nth_value_len(rec[ind].orig_record_index); + get_nth_key_size(rec[ind].orig_record_index) + get_nth_value_size(rec[ind].orig_record_index); sparce_space = last_offset - (rec[ind].m_obj_offset + total_key_value_len); if (sparce_space > 0) { // do compaction @@ -755,16 +749,18 @@ class VarKeySizeNode : public VariableNode< K, V > { } virtual ~VarKeySizeNode() = default; - uint16_t get_nth_key_len(uint32_t ind) const override { + uint32_t get_nth_key_size(uint32_t ind) const override { return r_cast< const var_key_record* >(this->get_nth_record(ind))->m_key_len; } - uint16_t get_nth_value_len(uint32_t ind) const override { return V::get_fixed_size(); } - uint16_t get_record_size() const override { return sizeof(var_key_record); } + uint32_t get_nth_value_size(uint32_t ind) const override { return dummy_value< V >.serialized_size(); } + uint32_t get_record_size() const override { return sizeof(var_key_record); } - void set_nth_key_len(uint8_t* rec_ptr, uint16_t key_len) override { + void set_nth_key_len(uint8_t* rec_ptr, uint32_t key_len) override { r_cast< var_key_record* >(rec_ptr)->m_key_len = key_len; } - void set_nth_value_len(uint8_t* rec_ptr, uint16_t value_len) override { assert(value_len == V::get_fixed_size()); } + void set_nth_value_len(uint8_t* rec_ptr, uint32_t value_len) override { + assert(value_len == dummy_value< V >.serialized_size()); + } private: #pragma pack(1) @@ -785,14 +781,16 @@ class VarValueSizeNode : public VariableNode< K, V > { } virtual ~VarValueSizeNode() = default; - uint16_t get_nth_key_len(uint32_t ind) const override { return K::get_fixed_size(); } - uint16_t get_nth_value_len(uint32_t ind) const override { + uint32_t get_nth_key_size(uint32_t ind) const override { return dummy_key< K >.serialized_size(); } + uint32_t get_nth_value_size(uint32_t ind) const override { return r_cast< const var_value_record* >(this->get_nth_record(ind))->m_value_len; } - uint16_t get_record_size() const override { return sizeof(var_value_record); } + uint32_t get_record_size() const override { return sizeof(var_value_record); } - void set_nth_key_len(uint8_t* rec_ptr, uint16_t key_len) override { assert(key_len == K::get_fixed_size()); } - void set_nth_value_len(uint8_t* rec_ptr, uint16_t value_len) override { + void set_nth_key_len(uint8_t* rec_ptr, uint32_t key_len) override { + assert(key_len == dummy_key< K >.serialized_size()); + } + void set_nth_value_len(uint8_t* rec_ptr, uint32_t value_len) override { r_cast< var_value_record* >(rec_ptr)->m_value_len = value_len; } @@ -815,18 +813,18 @@ class VarObjSizeNode : public VariableNode< K, V > { } virtual ~VarObjSizeNode() = default; - uint16_t get_nth_key_len(uint32_t ind) const override { + uint32_t get_nth_key_size(uint32_t ind) const override { return r_cast< const var_obj_record* >(this->get_nth_record(ind))->m_key_len; } - uint16_t get_nth_value_len(uint32_t ind) const override { + uint32_t get_nth_value_size(uint32_t ind) const override { return r_cast< const var_obj_record* >(this->get_nth_record(ind))->m_value_len; } - uint16_t get_record_size() const override { return sizeof(var_obj_record); } + uint32_t get_record_size() const override { return sizeof(var_obj_record); } - void set_nth_key_len(uint8_t* rec_ptr, uint16_t key_len) override { + void set_nth_key_len(uint8_t* rec_ptr, uint32_t key_len) override { r_cast< var_obj_record* >(rec_ptr)->m_key_len = key_len; } - void set_nth_value_len(uint8_t* rec_ptr, uint16_t value_len) override { + void set_nth_value_len(uint8_t* rec_ptr, uint32_t value_len) override { r_cast< var_obj_record* >(rec_ptr)->m_value_len = value_len; } diff --git a/src/include/homestore/index/index_table.hpp b/src/include/homestore/index/index_table.hpp index 7818490da..246557a62 100644 --- a/src/include/homestore/index/index_table.hpp +++ b/src/include/homestore/index/index_table.hpp @@ -34,9 +34,8 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { superblk< index_table_sb > m_sb; public: - IndexTable(uuid_t uuid, uuid_t parent_uuid, uint32_t user_sb_size, const BtreeConfig& cfg, - on_kv_read_t read_cb = nullptr, on_kv_update_t update_cb = nullptr, on_kv_remove_t remove_cb = nullptr) : - Btree< K, V >{cfg, std::move(read_cb), std::move(update_cb), std::move(remove_cb)}, m_sb{"index"} { + IndexTable(uuid_t uuid, uuid_t parent_uuid, uint32_t user_sb_size, const BtreeConfig& cfg) : + Btree< K, V >{cfg}, m_sb{"index"} { m_sb.create(sizeof(index_table_sb)); m_sb->uuid = uuid; m_sb->parent_uuid = parent_uuid; @@ -46,9 +45,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { if (status != btree_status_t::success) { throw std::runtime_error(fmt::format("Unable to create root node")); } } - IndexTable(const superblk< index_table_sb >& sb, const BtreeConfig& cfg, on_kv_read_t read_cb = nullptr, - on_kv_update_t update_cb = nullptr, on_kv_remove_t remove_cb = nullptr) : - Btree< K, V >{cfg, std::move(read_cb), std::move(update_cb), std::move(remove_cb)} { + IndexTable(const superblk< index_table_sb >& sb, const BtreeConfig& cfg) : Btree< K, V >{cfg} { m_sb = sb; Btree< K, V >::set_root_node_info(BtreeLinkInfo{m_sb->root_node, m_sb->link_version}); } @@ -160,7 +157,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { return BtreeNodePtr{n}; }); return btree_status_t::success; - } catch (std::exception& e) { return btree_status_t::read_failed; } + } catch (std::exception& e) { return btree_status_t::node_read_failed; } } btree_status_t refresh_node(const BtreeNodePtr& node, bool for_read_modify_write, void* context) const override { diff --git a/src/lib/blkalloc/bitmap_blk_allocator.cpp b/src/lib/blkalloc/bitmap_blk_allocator.cpp index 1967a988e..78f747c08 100644 --- a/src/lib/blkalloc/bitmap_blk_allocator.cpp +++ b/src/lib/blkalloc/bitmap_blk_allocator.cpp @@ -96,7 +96,7 @@ BlkAllocStatus BitmapBlkAllocator::alloc_on_disk(BlkId const& bid) { { auto lock{portion.portion_auto_lock()}; if (!hs()->is_initializing()) { - // During recovery we might try to alloc the entry which is already alloced while replaying the + // During recovery we might try to free the entry which is already freed while replaying the // journal, This assert is valid only post recovery. BLKALLOC_REL_ASSERT(m_disk_bm->is_bits_reset(b.blk_num(), b.blk_count()), "Expected disk blks to reset"); @@ -132,8 +132,8 @@ void BitmapBlkAllocator::free_on_disk(BlkId const& bid) { { auto lock{portion.portion_auto_lock()}; if (!hs()->is_initializing()) { - // During recovery we might try to free the entry which is already freed while replaying the journal, - // This assert is valid only post recovery. + // During recovery we might try to free the entry which is already freed while replaying the + // journal, This assert is valid only post recovery. if (!m_disk_bm->is_bits_set(b.blk_num(), b.blk_count())) { BLKALLOC_LOG(ERROR, "bit not set {} nblks {} chunk number {}", b.blk_num(), b.blk_count(), m_chunk_id); diff --git a/src/tests/btree_helpers/btree_test_helper.hpp b/src/tests/btree_helpers/btree_test_helper.hpp new file mode 100644 index 000000000..3bc943fc0 --- /dev/null +++ b/src/tests/btree_helpers/btree_test_helper.hpp @@ -0,0 +1,383 @@ +/********************************************************************************* + * Modifications Copyright 2017-2019 eBay Inc. + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + *********************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "test_common/range_scheduler.hpp" +#include "shadow_map.hpp" +#include "btree_test_kvs.hpp" + +static constexpr uint32_t g_node_size{4096}; + +template < typename TestType > +struct BtreeTestHelper : public testing::Test { + using T = TestType; + using K = typename TestType::KeyType; + using V = typename TestType::ValueType; + using mutex = iomgr::FiberManagerLib::shared_mutex; + using op_func_t = std::function< void(void) >; + + BtreeTestHelper() : testing::Test(), m_range_scheduler{SISL_OPTIONS["num_entries"].as< uint32_t >()} {} + + void SetUp() override { + m_cfg.m_leaf_node_type = T::leaf_node_type; + m_cfg.m_int_node_type = T::interior_node_type; + m_max_range_input = SISL_OPTIONS["num_entries"].as< uint32_t >(); + if (SISL_OPTIONS.count("disable_merge")) { m_cfg.m_merge_turned_on = false; } + + if (m_is_multi_threaded) { + std::mutex mtx; + iomanager.run_on_wait(iomgr::reactor_regex::all_io, [this, &mtx]() { + auto fv = iomanager.sync_io_capable_fibers(); + std::unique_lock lg(mtx); + m_fibers.insert(m_fibers.end(), fv.begin(), fv.end()); + }); + } + + m_operations["put"] = std::bind(&BtreeTestHelper::put_random, this); + m_operations["remove"] = std::bind(&BtreeTestHelper::remove_random, this); + m_operations["range_put"] = std::bind(&BtreeTestHelper::range_put_random, this); + m_operations["range_remove"] = std::bind(&BtreeTestHelper::range_remove_existing_random, this); + m_operations["query"] = std::bind(&BtreeTestHelper::query_random, this); + } + + void TearDown() override {} + +protected: + std::shared_ptr< typename T::BtreeType > m_bt; + ShadowMap< K, V > m_shadow_map; + BtreeConfig m_cfg{g_node_size}; + RangeScheduler m_range_scheduler; + uint32_t m_max_range_input{1000}; + bool m_is_multi_threaded{false}; + + std::map< std::string, op_func_t > m_operations; + std::vector< iomgr::io_fiber_t > m_fibers; + std::mutex m_test_done_mtx; + std::condition_variable m_test_done_cv; + + std::random_device m_re; + +public: + void preload(uint32_t preload_size) { + const auto chunk_size = preload_size / m_fibers.size(); + const auto last_chunk_size = preload_size % chunk_size ?: chunk_size; + auto test_count = m_fibers.size(); + + for (std::size_t i = 0; i < m_fibers.size(); ++i) { + const auto start_range = i * chunk_size; + const auto end_range = start_range + ((i == m_fibers.size() - 1) ? last_chunk_size : chunk_size); + iomanager.run_on_forget(m_fibers[i], [this, start_range, end_range, &test_count]() { + for (uint32_t i = start_range; i < end_range; i++) { + put(i, btree_put_type::INSERT); + m_range_scheduler.put_key(i); + } + { + std::unique_lock lg(m_test_done_mtx); + if (--test_count == 0) { m_test_done_cv.notify_one(); } + } + }); + } + + { + std::unique_lock< std::mutex > lk(m_test_done_mtx); + m_test_done_cv.wait(lk, [&]() { return test_count == 0; }); + } + LOGINFO("Preload Done"); + } + + ////////////////////// All put operation variants /////////////////////////////// + void put(uint64_t k, btree_put_type put_type) { do_put(k, put_type, V::generate_rand()); } + + void put_random() { + auto [start_k, end_k] = m_range_scheduler.pick_random_non_existing_keys(1); + RELEASE_ASSERT_EQ(start_k, end_k, "Range scheduler pick_random_non_existing_keys issue"); + + do_put(start_k, btree_put_type::INSERT, V::generate_rand()); + } + + void range_put(uint32_t start_k, uint32_t end_k, V const& value, bool update) { + K start_key = K{start_k}; + K end_key = K{end_k}; + auto const nkeys = end_k - start_k + 1; + + auto preq = BtreeRangePutRequest< K >{BtreeKeyRange< K >{start_key, true, end_key, true}, + update ? btree_put_type::UPDATE : btree_put_type::UPSERT, &value}; + preq.enable_route_tracing(); + ASSERT_EQ(m_bt->put(preq), btree_status_t::success) << "range_put failed for " << start_k << "-" << end_k; + + if (update) { + m_shadow_map.range_update(start_key, nkeys, value); + m_range_scheduler.remove_keys_from_working(start_k, end_k); + } else { + m_shadow_map.range_upsert(start_k, nkeys, value); + m_range_scheduler.put_keys(start_k, end_k); + } + } + + void range_put_random() { + bool is_update{true}; + if constexpr (std::is_same_v< V, TestIntervalValue >) { is_update = false; } + + static thread_local std::uniform_int_distribution< uint32_t > s_rand_range_generator{1, 50}; + + auto const [start_k, end_k] = is_update + ? m_range_scheduler.pick_random_existing_keys(s_rand_range_generator(m_re)) + : m_range_scheduler.pick_random_non_working_keys(s_rand_range_generator(m_re)); + + range_put(start_k, end_k, V::generate_rand(), is_update); + } + + ////////////////////// All remove operation variants /////////////////////////////// + void remove_one(uint32_t k) { + auto existing_v = std::make_unique< V >(); + auto pk = std::make_unique< K >(k); + + auto rreq = BtreeSingleRemoveRequest{pk.get(), existing_v.get()}; + bool removed = (m_bt->remove(rreq) == btree_status_t::success); + + ASSERT_EQ(removed, m_shadow_map.exists(*pk)) + << "Removal of key " << pk->key() << " status doesn't match with shadow"; + + if (removed) { + m_shadow_map.validate_data(rreq.key(), (const V&)rreq.value()); + m_shadow_map.erase(rreq.key()); + } + m_range_scheduler.remove_key(k); + } + + void remove_random() { + auto const [start_k, end_k] = m_range_scheduler.pick_random_existing_keys(1); + RELEASE_ASSERT_EQ(start_k, end_k, "Range scheduler pick_random_existing_keys issue"); + + remove_one(start_k); + m_range_scheduler.remove_key(start_k); + } + + void range_remove_existing(uint32_t start_k, uint32_t count) { + auto [start_key, end_key] = m_shadow_map.pick_existing_range(K{start_k}, count); + do_range_remove(start_k, end_key.key(), true /* removing_all_existing */); + } + + void range_remove_existing_random() { + static std::uniform_int_distribution< uint32_t > s_rand_range_generator{2, 5}; + + auto const [start_k, end_k] = m_range_scheduler.pick_random_existing_keys(s_rand_range_generator(m_re)); + do_range_remove(start_k, end_k, true /* only_existing */); + } + + void range_remove_any(uint32_t start_k, uint32_t end_k) { + do_range_remove(start_k, end_k, false /* removing_all_existing */); + } + + ////////////////////// All query operation variants /////////////////////////////// + void query_all() { do_query(0u, SISL_OPTIONS["num_entries"].as< uint32_t >() - 1, UINT32_MAX); } + + void query_all_paginate(uint32_t batch_size) { + do_query(0u, SISL_OPTIONS["num_entries"].as< uint32_t >() - 1, batch_size); + } + + void do_query(uint32_t start_k, uint32_t end_k, uint32_t batch_size) { + std::vector< std::pair< K, V > > out_vector; + uint32_t remaining = m_shadow_map.num_elems_in_range(start_k, end_k); + auto it = m_shadow_map.map_const().lower_bound(K{start_k}); + + BtreeQueryRequest< K > qreq{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, + BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, batch_size}; + while (remaining > 0) { + out_vector.clear(); + qreq.enable_route_tracing(); + auto const ret = m_bt->query(qreq, out_vector); + auto const expected_count = std::min(remaining, batch_size); + + ASSERT_EQ(out_vector.size(), expected_count) << "Received incorrect value on query pagination"; + remaining -= expected_count; + + if (remaining == 0) { + ASSERT_EQ(ret, btree_status_t::success) << "Expected success on query"; + } else { + ASSERT_EQ(ret, btree_status_t::has_more) << "Expected query to return has_more"; + } + + for (size_t idx{0}; idx < out_vector.size(); ++idx) { + ASSERT_EQ(out_vector[idx].second, it->second) + << "Range get doesn't return correct data for key=" << it->first << " idx=" << idx; + ++it; + } + } + out_vector.clear(); + auto ret = m_bt->query(qreq, out_vector); + ASSERT_EQ(ret, btree_status_t::success) << "Expected success on query"; + ASSERT_EQ(out_vector.size(), 0) << "Received incorrect value on empty query pagination"; + + if (start_k < m_max_range_input) { + m_range_scheduler.remove_keys_from_working(start_k, std::min(end_k, m_max_range_input - 1)); + } + } + + void query_random() { + static thread_local std::uniform_int_distribution< uint32_t > s_rand_range_generator{1, 100}; + + auto const [start_k, end_k] = m_range_scheduler.pick_random_non_working_keys(s_rand_range_generator(m_re)); + do_query(start_k, end_k, 79); + } + + ////////////////////// All get operation variants /////////////////////////////// + void get_all() const { + for (const auto& [key, value] : m_shadow_map.map_const()) { + auto copy_key = std::make_unique< K >(); + *copy_key = key; + auto out_v = std::make_unique< V >(); + auto req = BtreeSingleGetRequest{copy_key.get(), out_v.get()}; + + const auto ret = m_bt->get(req); + ASSERT_EQ(ret, btree_status_t::success) << "Missing key " << key << " in btree but present in shadow map"; + ASSERT_EQ((const V&)req.value(), value) + << "Found value in btree doesn't return correct data for key=" << key; + } + } + + void get_specific(uint32_t k) const { + auto pk = std::make_unique< K >(k); + auto out_v = std::make_unique< V >(); + auto req = BtreeSingleGetRequest{pk.get(), out_v.get()}; + + const auto status = m_bt->get(req); + if (status == btree_status_t::success) { + m_shadow_map.validate_data(req.key(), (const V&)req.value()); + } else { + ASSERT_EQ(m_shadow_map.exists(req.key()), false) << "Node key " << k << " is missing in the btree"; + } + } + + void get_any(uint32_t start_k, uint32_t end_k) const { + auto out_k = std::make_unique< K >(); + auto out_v = std::make_unique< V >(); + auto req = + BtreeGetAnyRequest< K >{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, out_k.get(), out_v.get()}; + const auto status = m_bt->get(req); + if (status == btree_status_t::success) { + ASSERT_EQ(m_shadow_map.exists_in_range(*(K*)req.m_outkey, start_k, end_k), true) + << "Get Any returned key=" << *(K*)req.m_outkey << " which is not in range " << start_k << "-" << end_k + << "according to shadow map"; + m_shadow_map.validate_data(*(K*)req.m_outkey, *(V*)req.m_outval); + } else { + ASSERT_EQ(m_shadow_map.exists_in_range(*(K*)req.m_outkey, start_k, end_k), false) + << "Get Any couldn't find key in the range " << start_k << "-" << end_k + << " but it present in shadow map"; + } + } + + void multi_op_execute(const std::vector< std::pair< std::string, int > >& op_list) { + preload(SISL_OPTIONS["preload_size"].as< uint32_t >()); + print_keys(); + run_in_parallel(op_list); + print_keys(); + } + + void print(const std::string& file = "") const { m_bt->print_tree(file); } + void print_keys() const { m_bt->print_tree_keys(); } + + void compare_files(const std::string& before, const std::string& after) { + std::ifstream b(before); + std::ifstream a(after); + std::ostringstream ss_before, ss_after; + ss_before << b.rdbuf(); + ss_after << a.rdbuf(); + std::string s1 = ss_before.str(); + std::string s2 = ss_after.str(); + ASSERT_EQ(s1, s2) << "Mismatch in btree structure"; + } + +private: + void do_put(uint64_t k, btree_put_type put_type, V const& value) { + auto existing_v = std::make_unique< V >(); + K key = K{k}; + auto sreq = BtreeSinglePutRequest{&key, &value, put_type, existing_v.get()}; + bool done = (m_bt->put(sreq) == btree_status_t::success); + + if (put_type == btree_put_type::INSERT) { + ASSERT_EQ(done, !m_shadow_map.exists(key)); + } else { + ASSERT_EQ(done, m_shadow_map.exists(key)); + } + + m_shadow_map.put_and_check(key, value, *existing_v, done); + m_range_scheduler.put_key(k); + } + + void do_range_remove(uint64_t start_k, uint64_t end_k, bool all_existing) { + K start_key = K{start_k}; + K end_key = K{end_k}; + + auto rreq = BtreeRangeRemoveRequest< K >{BtreeKeyRange< K >{start_key, true, end_key, true}}; + auto const ret = m_bt->remove(rreq); + m_shadow_map.range_erase(start_key, end_key); + + if (all_existing) { + ASSERT_EQ((ret == btree_status_t::success), true) + << "not a successful remove op for range " << start_k << "-" << end_k; + } + + if (start_k < m_max_range_input) { + m_range_scheduler.remove_keys(start_k, std::min(end_k, uint64_cast(m_max_range_input - 1))); + } + } + + void run_in_parallel(const std::vector< std::pair< std::string, int > >& op_list) { + auto test_count = m_fibers.size(); + for (auto it = m_fibers.begin(); it < m_fibers.end(); ++it) { + iomanager.run_on_forget(*it, [this, &test_count, op_list]() { + std::random_device g_rd{}; + std::default_random_engine re{g_rd()}; + const auto num_iters_per_thread = + sisl::round_up(SISL_OPTIONS["num_iters"].as< uint32_t >() / m_fibers.size(), m_fibers.size()); + std::vector< uint32_t > weights; + std::transform(op_list.begin(), op_list.end(), std::back_inserter(weights), + [](const auto& pair) { return pair.second; }); + + // Construct a weighted distribution based on the input frequencies + std::discrete_distribution< uint32_t > s_rand_op_generator(weights.begin(), weights.end()); + + for (uint32_t i = 0; i < num_iters_per_thread; i++) { + uint32_t op_idx = s_rand_op_generator(re); + (this->m_operations[op_list[op_idx].first])(); + } + { + std::unique_lock lg(m_test_done_mtx); + if (--test_count == 0) { m_test_done_cv.notify_one(); } + } + }); + } + + { + std::unique_lock< std::mutex > lk(m_test_done_mtx); + m_test_done_cv.wait(lk, [&]() { return test_count == 0; }); + } + LOGINFO("ALL parallel jobs joined"); + } +}; \ No newline at end of file diff --git a/src/tests/btree_test_kvs.hpp b/src/tests/btree_helpers/btree_test_kvs.hpp similarity index 56% rename from src/tests/btree_test_kvs.hpp rename to src/tests/btree_helpers/btree_test_kvs.hpp index 3822f8ec4..a5dada646 100644 --- a/src/tests/btree_test_kvs.hpp +++ b/src/tests/btree_helpers/btree_test_kvs.hpp @@ -60,19 +60,19 @@ using namespace homestore; class TestFixedKey : public BtreeKey { private: - uint32_t m_key{0}; + uint64_t m_key{0}; public: TestFixedKey() = default; - TestFixedKey(uint32_t k) : m_key{k} {} + TestFixedKey(uint64_t k) : m_key{k} {} TestFixedKey(const TestFixedKey& other) : TestFixedKey(other.serialize(), true) {} TestFixedKey(const BtreeKey& other) : TestFixedKey(other.serialize(), true) {} - TestFixedKey(const sisl::blob& b, bool copy) : BtreeKey(), m_key{*(r_cast< const uint32_t* >(b.bytes))} {} - TestFixedKey& operator=(const TestFixedKey& other) { - clone(other); + TestFixedKey(const sisl::blob& b, bool copy) : BtreeKey(), m_key{*(r_cast< const uint64_t* >(b.bytes))} {} + TestFixedKey& operator=(const TestFixedKey& other) = default; + TestFixedKey& operator=(BtreeKey const& other) { + m_key = s_cast< TestFixedKey const& >(other).m_key; return *this; - }; - virtual void clone(const BtreeKey& other) override { m_key = ((TestFixedKey&)other).m_key; } + } virtual ~TestFixedKey() = default; @@ -102,16 +102,16 @@ class TestFixedKey : public BtreeKey { }*/ sisl::blob serialize() const override { - return sisl::blob{uintptr_cast(const_cast< uint32_t* >(&m_key)), uint32_cast(sizeof(uint32_t))}; + return sisl::blob{uintptr_cast(const_cast< uint64_t* >(&m_key)), uint32_cast(sizeof(uint64_t))}; } uint32_t serialized_size() const override { return get_fixed_size(); } static bool is_fixed_size() { return true; } - static uint32_t get_fixed_size() { return (sizeof(uint32_t)); } + static uint32_t get_fixed_size() { return (sizeof(uint64_t)); } std::string to_string() const { return fmt::format("{}", m_key); } - void deserialize(const sisl::blob& b, bool copy) override { m_key = *(r_cast< const uint32_t* >(b.bytes)); } + void deserialize(const sisl::blob& b, bool copy) override { m_key = *(r_cast< const uint64_t* >(b.bytes)); } - static uint32_t get_estimate_max_size() { return get_fixed_size(); } + static uint32_t get_max_size() { return get_fixed_size(); } friend std::ostream& operator<<(std::ostream& os, const TestFixedKey& k) { os << k.to_string(); return os; @@ -120,12 +120,12 @@ class TestFixedKey : public BtreeKey { bool operator<(const TestFixedKey& o) const { return (compare(o) < 0); } bool operator==(const TestFixedKey& other) const { return (compare(other) == 0); } - uint32_t key() const { return m_key; } - uint32_t start_key(const BtreeKeyRange< TestFixedKey >& range) const { + uint64_t key() const { return m_key; } + uint64_t start_key(const BtreeKeyRange< TestFixedKey >& range) const { const TestFixedKey& k = (const TestFixedKey&)(range.start_key()); return k.m_key; } - uint32_t end_key(const BtreeKeyRange< TestFixedKey >& range) const { + uint64_t end_key(const BtreeKeyRange< TestFixedKey >& range) const { const TestFixedKey& k = (const TestFixedKey&)(range.end_key()); return k.m_key; } @@ -133,10 +133,10 @@ class TestFixedKey : public BtreeKey { class TestVarLenKey : public BtreeKey { private: - uint32_t m_key{0}; + uint64_t m_key{0}; - static uint32_t rand_key_size() { - return (uint32_cast(std::abs(std::round(g_randkeysize_generator(g_re)))) % g_max_keysize) + 1; + static uint64_t rand_key_size() { + return (uint64_cast(std::abs(std::round(g_randkeysize_generator(g_re)))) % g_max_keysize) + 1; } static std::shared_ptr< std::string > idx_to_key(uint32_t idx) { @@ -154,7 +154,7 @@ class TestVarLenKey : public BtreeKey { public: TestVarLenKey() = default; - TestVarLenKey(uint32_t k) : BtreeKey(), m_key{k} {} + TestVarLenKey(uint64_t k) : BtreeKey(), m_key{k} {} TestVarLenKey(const BtreeKey& other) : TestVarLenKey(other.serialize(), true) {} TestVarLenKey(const TestVarLenKey& other) = default; TestVarLenKey(TestVarLenKey&& other) = default; @@ -164,8 +164,6 @@ class TestVarLenKey : public BtreeKey { TestVarLenKey(const sisl::blob& b, bool copy) : BtreeKey() { deserialize(b, copy); } virtual ~TestVarLenKey() = default; - virtual void clone(const BtreeKey& other) override { m_key = ((TestVarLenKey&)other).m_key; } - sisl::blob serialize() const override { const auto& data = idx_to_key(m_key); return sisl::blob{(uint8_t*)(data->c_str()), (uint32_t)data->size()}; @@ -187,7 +185,7 @@ class TestVarLenKey : public BtreeKey { } // Add 8 bytes for preamble. - static uint32_t get_estimate_max_size() { return g_max_keysize + 8; } + static uint32_t get_max_size() { return g_max_keysize + 8; } int compare(const BtreeKey& o) const override { const TestVarLenKey& other = s_cast< const TestVarLenKey& >(o); @@ -224,17 +222,136 @@ class TestVarLenKey : public BtreeKey { bool operator<(const TestVarLenKey& o) const { return (compare(o) < 0); } bool operator==(const TestVarLenKey& other) const { return (compare(other) == 0); } - uint32_t key() const { return m_key; } - uint32_t start_key(const BtreeKeyRange< TestVarLenKey >& range) const { + uint64_t key() const { return m_key; } + uint64_t start_key(const BtreeKeyRange< TestVarLenKey >& range) const { const TestVarLenKey& k = (const TestVarLenKey&)(range.start_key()); return k.m_key; } - uint32_t end_key(const BtreeKeyRange< TestVarLenKey >& range) const { + uint64_t end_key(const BtreeKeyRange< TestVarLenKey >& range) const { const TestVarLenKey& k = (const TestVarLenKey&)(range.end_key()); return k.m_key; } }; +class TestIntervalKey : public BtreeIntervalKey { +private: +#pragma pack(1) + uint32_t m_base{0}; + uint32_t m_offset{0}; +#pragma pack() + +public: + TestIntervalKey() = default; + TestIntervalKey(uint64_t k) { + m_base = uint32_cast(k >> 32); + m_offset = uint32_cast(k & 0xFFFFFFFF); + } + TestIntervalKey(uint32_t b, uint32_t o) : m_base{b}, m_offset{o} {} + TestIntervalKey(const TestIntervalKey& other) = default; + TestIntervalKey(const BtreeKey& other) : TestIntervalKey(other.serialize(), true) {} + TestIntervalKey(const sisl::blob& b, bool copy) : BtreeIntervalKey() { + TestIntervalKey* other = r_cast< TestIntervalKey* >(b.bytes); + m_base = other->m_base; + m_offset = other->m_offset; + } + + TestIntervalKey& operator=(TestIntervalKey const& other) { + m_base = other.m_base; + m_offset = other.m_offset; + return *this; + }; + virtual ~TestIntervalKey() = default; + + /////////////////// Overriding methods of BtreeKey ///////////////// + int compare(BtreeKey const& o) const override { + TestIntervalKey const& other = s_cast< TestIntervalKey const& >(o); + if (m_base < other.m_base) { + return -1; + } else if (m_base > other.m_base) { + return 1; + } else if (m_offset < other.m_offset) { + return -1; + } else if (m_offset > other.m_offset) { + return 1; + } else { + return 0; + } + } + + sisl::blob serialize() const override { + return sisl::blob{uintptr_cast(const_cast< TestIntervalKey* >(this)), uint32_cast(sizeof(TestIntervalKey))}; + } + + uint32_t serialized_size() const override { return sizeof(TestIntervalKey); } + + void deserialize(sisl::blob const& b, bool copy) override { + assert(b.size == sizeof(TestIntervalKey)); + TestIntervalKey* other = r_cast< TestIntervalKey* >(b.bytes); + m_base = other->m_base; + m_offset = other->m_offset; + } + + std::string to_string() const override { return fmt::format("{}.{}", m_base, m_offset); } + + static uint32_t get_max_size() { return sizeof(TestIntervalKey); } + + static bool is_fixed_size() { return true; } + + static uint32_t get_fixed_size() { return sizeof(TestIntervalKey); } + + /////////////////// Overriding methods of BtreeIntervalKey ///////////////// + void shift(int n) override { m_offset += n; } + + int distance(BtreeKey const& f) const override { + TestIntervalKey const& from = s_cast< TestIntervalKey const& >(f); + DEBUG_ASSERT_EQ(m_base, from.m_base, "Invalid from key for distance"); + DEBUG_ASSERT_GE(m_offset, from.m_offset, "Invalid from key for distance"); + return m_offset - from.m_offset; + } + + bool is_interval_key() const override { return true; } + + sisl::blob serialize_prefix() const override { + return sisl::blob{uintptr_cast(const_cast< uint32_t* >(&m_base)), uint32_cast(sizeof(uint32_t))}; + } + + sisl::blob serialize_suffix() const override { + return sisl::blob{uintptr_cast(const_cast< uint32_t* >(&m_offset)), uint32_cast(sizeof(uint32_t))}; + } + + uint32_t serialized_prefix_size() const override { return uint32_cast(sizeof(uint32_t)); } + + uint32_t serialized_suffix_size() const override { return uint32_cast(sizeof(uint32_t)); }; + + void deserialize(sisl::blob const& prefix, sisl::blob const& suffix, bool) { + DEBUG_ASSERT_EQ(prefix.size, sizeof(uint32_t), "Invalid prefix size on deserialize"); + DEBUG_ASSERT_EQ(suffix.size, sizeof(uint32_t), "Invalid suffix size on deserialize"); + uint32_t* other_p = r_cast< uint32_t* >(prefix.bytes); + m_base = *other_p; + + uint32_t* other_s = r_cast< uint32_t* >(suffix.bytes); + m_offset = *other_s; + } + + /////////////////// Local methods for helping tests ////////////////// + bool operator<(const TestIntervalKey& o) const { return (compare(o) < 0); } + bool operator==(const TestIntervalKey& other) const { return (compare(other) == 0); } + + uint64_t key() const { return (uint64_cast(m_base) << 32) | m_offset; } + uint64_t start_key(const BtreeKeyRange< TestIntervalKey >& range) const { + const TestIntervalKey& k = (const TestIntervalKey&)(range.start_key()); + return k.key(); + } + uint64_t end_key(const BtreeKeyRange< TestIntervalKey >& range) const { + const TestIntervalKey& k = (const TestIntervalKey&)(range.end_key()); + return k.key(); + } + friend std::ostream& operator<<(std::ostream& os, const TestIntervalKey& k) { + os << k.to_string(); + return os; + } +}; + class TestFixedValue : public BtreeValue { private: public: @@ -327,3 +444,71 @@ class TestVarLenValue : public BtreeValue { private: std::string m_val; }; + +class TestIntervalValue : public BtreeIntervalValue { +private: +#pragma pack(1) + uint32_t m_base_val{0}; + uint16_t m_offset{0}; +#pragma pack() + +public: + TestIntervalValue(bnodeid_t val) { assert(0); } + TestIntervalValue(uint32_t val, uint16_t o) : BtreeIntervalValue(), m_base_val{val}, m_offset{o} {} + TestIntervalValue() = default; + TestIntervalValue(const TestIntervalValue& other) : + BtreeIntervalValue(), m_base_val{other.m_base_val}, m_offset{other.m_offset} {} + TestIntervalValue(const sisl::blob& b, bool copy) : BtreeIntervalValue() { this->deserialize(b, copy); } + virtual ~TestIntervalValue() = default; + + static TestIntervalValue generate_rand() { + return TestIntervalValue{g_randval_generator(g_re), s_cast< uint16_t >(0)}; + } + + ///////////////////////////// Overriding methods of BtreeValue ////////////////////////// + TestIntervalValue& operator=(const TestIntervalValue& other) = default; + sisl::blob serialize() const override { + sisl::blob b; + b.bytes = uintptr_cast(const_cast< TestIntervalValue* >(this)); + b.size = sizeof(TestIntervalValue); + return b; + } + + uint32_t serialized_size() const override { return sizeof(TestIntervalValue); } + static uint32_t get_fixed_size() { return sizeof(TestIntervalValue); } + void deserialize(const sisl::blob& b, bool) { + TestIntervalValue const* other = r_cast< TestIntervalValue const* >(b.bytes); + m_base_val = other->m_base_val; + m_offset = other->m_offset; + } + + std::string to_string() const override { return fmt::format("{}.{}", m_base_val, m_offset); } + + friend std::ostream& operator<<(std::ostream& os, const TestIntervalValue& v) { + os << v.to_string(); + return os; + } + + ///////////////////////////// Overriding methods of BtreeIntervalValue ////////////////////////// + void shift(int n) override { m_offset += n; } + + sisl::blob serialize_prefix() const override { + return sisl::blob{uintptr_cast(const_cast< uint32_t* >(&m_base_val)), uint32_cast(sizeof(uint32_t))}; + } + sisl::blob serialize_suffix() const override { + return sisl::blob{uintptr_cast(const_cast< uint16_t* >(&m_offset)), uint32_cast(sizeof(uint16_t))}; + } + uint32_t serialized_prefix_size() const override { return uint32_cast(sizeof(uint32_t)); } + uint32_t serialized_suffix_size() const override { return uint32_cast(sizeof(uint16_t)); } + + void deserialize(sisl::blob const& prefix, sisl::blob const& suffix, bool) override { + DEBUG_ASSERT_EQ(prefix.size, sizeof(uint32_t), "Invalid prefix size on deserialize"); + DEBUG_ASSERT_EQ(suffix.size, sizeof(uint16_t), "Invalid suffix size on deserialize"); + m_base_val = *(r_cast< uint32_t* >(prefix.bytes)); + m_offset = *(r_cast< uint16_t* >(suffix.bytes)); + } + + bool operator==(TestIntervalValue const& other) const { + return ((m_base_val == other.m_base_val) && (m_offset == other.m_offset)); + } +}; diff --git a/src/tests/btree_helpers/shadow_map.hpp b/src/tests/btree_helpers/shadow_map.hpp new file mode 100644 index 000000000..1e7418122 --- /dev/null +++ b/src/tests/btree_helpers/shadow_map.hpp @@ -0,0 +1,96 @@ +#include +#include + +#include "btree_test_kvs.hpp" + +template < typename K, typename V > +class ShadowMap { +private: + std::map< K, V > m_map; + +public: + void put_and_check(const K& key, const V& val, const V& old_val, bool expected_success) { + auto const [it, happened] = m_map.insert(std::make_pair(key, val)); + ASSERT_EQ(happened, expected_success) << "Testcase issue, expected inserted slots to be in shadow map"; + if (!happened) { + ASSERT_EQ(old_val, it->second) << "Put: Existing value doesn't return correct data for key: " << it->first; + } + } + + void range_upsert(uint64_t start_k, uint32_t count, const V& val) { + for (uint32_t i{0}; i < count; ++i) { + K key{start_k + i}; + V range_value{val}; + if constexpr (std::is_same_v< V, TestIntervalValue >) { range_value.shift(i); } + m_map.insert_or_assign(key, range_value); + } + } + + void range_update(const K& start_key, uint32_t count, const V& new_val) { + auto const start_it = m_map.lower_bound(start_key); + auto it = start_it; + uint32_t c = 0; + while ((it != m_map.end()) && (++c <= count)) { + it->second = new_val; + ++it; + } + } + + std::pair< K, K > pick_existing_range(const K& start_key, uint32_t max_count) const { + auto const start_it = m_map.lower_bound(start_key); + auto it = start_it; + uint32_t count = 0; + while ((it != m_map.cend()) && (++count < max_count)) { + ++it; + } + return std::pair(start_it->first, it->first); + } + + bool exists(const K& key) const { return m_map.find(key) != m_map.end(); } + + bool exists_in_range(const K& key, uint64_t start_k, uint64_t end_k) const { + const auto itlower = m_map.lower_bound(K{start_k}); + const auto itupper = m_map.upper_bound(K{end_k}); + auto it = itlower; + while (it != itupper) { + if (it->first == key) { return true; } + ++it; + } + return false; + } + + uint64_t size() const { return m_map.size(); } + + uint32_t num_elems_in_range(uint64_t start_k, uint64_t end_k) const { + const auto itlower = m_map.lower_bound(K{start_k}); + const auto itupper = m_map.upper_bound(K{end_k}); + return std::distance(itlower, itupper); + } + + void validate_data(const K& key, const V& btree_val) const { + const auto r = m_map.find(key); + ASSERT_NE(r, m_map.end()) << "Key " << key.to_string() << " is not present in shadow map"; + ASSERT_EQ(btree_val, r->second) << "Found value in btree doesn't return correct data for key=" << r->first; + } + + void erase(const K& key) { m_map.erase(key); } + + void range_erase(const K& start_key, uint32_t count) { + auto const it = m_map.lower_bound(start_key); + uint32_t i{0}; + while ((it != m_map.cend()) && (i++ < count)) { + it = m_map.erase(it); + } + } + + void range_erase(const K& start_key, const K& end_key) { + auto it = m_map.lower_bound(start_key); + auto const end_it = m_map.upper_bound(end_key); + while ((it != m_map.cend()) && (it != end_it)) { + it = m_map.erase(it); + } + } + + std::map< K, V >& map() { return m_map; } + const std::map< K, V >& map_const() const { return m_map; } +}; diff --git a/src/tests/test_btree_node.cpp b/src/tests/test_btree_node.cpp index 1f06d8bdf..23109cd58 100644 --- a/src/tests/test_btree_node.cpp +++ b/src/tests/test_btree_node.cpp @@ -23,7 +23,8 @@ #include #include #include -#include "btree_test_kvs.hpp" +#include +#include "btree_helpers/btree_test_kvs.hpp" static constexpr uint32_t g_node_size{4096}; static constexpr uint32_t g_max_keys{6000}; @@ -56,6 +57,12 @@ struct VarObjSizeNodeTest { using ValueType = TestVarLenValue; }; +struct PrefixIntervalBtreeTest { + using NodeType = FixedPrefixNode< TestIntervalKey, TestIntervalValue >; + using KeyType = TestIntervalKey; + using ValueType = TestIntervalValue; +}; + template < typename TestType > struct NodeTest : public testing::Test { using T = TestType; @@ -85,9 +92,7 @@ struct NodeTest : public testing::Test { bool done = m_node1->put(key, value, put_type, &existing_v); bool expected_done{true}; - if (m_shadow_map.find(key) != m_shadow_map.end()) { - expected_done = (put_type != btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); - } + if (m_shadow_map.find(key) != m_shadow_map.end()) { expected_done = (put_type != btree_put_type::INSERT); } ASSERT_EQ(done, expected_done) << "Expected put of key " << k << " of put_type " << enum_name(put_type) << " to be " << expected_done; if (expected_done) { @@ -100,6 +105,39 @@ struct NodeTest : public testing::Test { } } + void put_range(uint32_t k, uint32_t count) { + btree_put_type put_type; + if constexpr (!std::is_same_v< V, TestIntervalValue >) { + // For non-interval values we support only update, so we need to first put the value + for (uint32_t i{0}; i < count; ++i) { + this->put(k + i, btree_put_type::UPSERT); + } + put_type = btree_put_type::UPDATE; + } else { + put_type = btree_put_type::UPSERT; + } + + K start_key{k}; + K end_key{k + count - 1}; + V value{V::generate_rand()}; + auto status = m_node1->multi_put(BtreeKeyRange{start_key, true, end_key, true}, start_key, value, put_type, + nullptr /* last_failed_key */); + ASSERT_EQ(status, btree_status_t::success) << "Expected range put of key " << k << " to " << k + count - 1 + << " of put_type " << enum_name(put_type) << " to be successful"; + + for (uint32_t i{0}; i < count; ++i) { + K key{k + i}; + V range_value{value}; + if constexpr (std::is_same_v< V, TestIntervalValue >) { range_value.shift(i); } + + if (m_shadow_map.find(key) != m_shadow_map.end()) { + if (put_type != btree_put_type::INSERT) { m_shadow_map.insert_or_assign(key, range_value); } + } else { + m_shadow_map.insert(std::make_pair(key, range_value)); + } + } + } + void update(uint32_t k, bool validate_update = true) { K key{k}; V value{V::generate_rand()}; @@ -140,6 +178,7 @@ struct NodeTest : public testing::Test { if (validate_remove) { validate_specific(k); } } +#if 0 void remove_range(uint32_t start_idx, uint32_t end_idx) { ASSERT_LT(end_idx, m_node1->total_entries()); ASSERT_LT(start_idx, m_node1->total_entries()); @@ -168,15 +207,40 @@ struct NodeTest : public testing::Test { << "end index key= " << head_k << " key[" << i << "]= " << m_node1->template get_nth_key< K >(i, false); } } +#endif + + void remove_range(uint32_t start_idx, uint32_t end_idx) { + ASSERT_LT(end_idx, m_node1->total_entries()); + ASSERT_LT(start_idx, m_node1->total_entries()); + ASSERT_GE(start_idx, 0); + ASSERT_GE(end_idx, start_idx); + + auto num_entries = m_node1->total_entries(); + auto expected_nremoved = std::distance(m_shadow_map.lower_bound(start_idx), m_shadow_map.upper_bound(end_idx)); + + uint32_t nremoved = m_node1->multi_remove(BtreeKeyRange< K >{K{start_idx}, true, K{end_idx}, true}); + ASSERT_EQ(nremoved, expected_nremoved) << "multi_remove nremoved doesn't match what is expected"; + auto new_num_entries = m_node1->total_entries(); + + ASSERT_EQ(new_num_entries, num_entries - nremoved) + << "Total deleted objects does not match! start_idx= " << start_idx << " end_idx= " << end_idx + << " expected delete: " << end_idx - start_idx + 1 << " original node entries: " << num_entries + << " current node entries: " << new_num_entries; + + // Validating if every entry in the node is sorted correctly. + for (uint32_t i = 0; i < new_num_entries; i++) { + m_shadow_map.erase(K{start_idx + i}); + } + } void validate_get_all() const { uint32_t start_ind{0}; uint32_t end_ind{0}; std::vector< std::pair< K, V > > out_vector; - auto ret = m_node1->get_all(BtreeKeyRange< K >{K{0u}, true, K{g_max_keys}, false}, g_max_keys, start_ind, - end_ind, &out_vector); - ret += m_node2->get_all(BtreeKeyRange< K >{K{0u}, true, K{g_max_keys}, false}, g_max_keys, start_ind, end_ind, - &out_vector); + auto ret = m_node1->multi_get(BtreeKeyRange< K >{K{0u}, true, K{g_max_keys}, false}, g_max_keys, start_ind, + end_ind, &out_vector); + ret += m_node2->multi_get(BtreeKeyRange< K >{K{0u}, true, K{g_max_keys}, false}, g_max_keys, start_ind, end_ind, + &out_vector); ASSERT_EQ(ret, m_shadow_map.size()) << "Expected number of entries to be same with shadow_map size"; ASSERT_EQ(out_vector.size(), m_shadow_map.size()) @@ -243,7 +307,7 @@ struct NodeTest : public testing::Test { void put_list(const std::vector< uint32_t >& keys) { for (const auto& k : keys) { if (!this->has_room()) { break; } - put(k, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + put(k, btree_put_type::INSERT); } } @@ -252,7 +316,7 @@ struct NodeTest : public testing::Test { LOGDEBUG("Node2:\n {}", m_node2->to_string(true)); } - uint32_t remaining_space() const { return m_node1->available_size(m_cfg); } + uint32_t remaining_space() const { return m_node1->available_size(); } bool has_room() const { return remaining_space() > (g_max_keysize + g_max_valsize + 32); } private: @@ -263,12 +327,13 @@ struct NodeTest : public testing::Test { } }; -using NodeTypes = testing::Types< FixedLenNodeTest, VarKeySizeNodeTest, VarValueSizeNodeTest, VarObjSizeNodeTest >; +using NodeTypes = testing::Types< FixedLenNodeTest, VarKeySizeNodeTest, VarValueSizeNodeTest, VarObjSizeNodeTest, + PrefixIntervalBtreeTest >; TYPED_TEST_SUITE(NodeTest, NodeTypes); TYPED_TEST(NodeTest, SequentialInsert) { for (uint32_t i{0}; (i < 100 && this->has_room()); ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } this->print(); this->validate_get_all(); @@ -279,7 +344,7 @@ TYPED_TEST(NodeTest, SequentialInsert) { TYPED_TEST(NodeTest, ReverseInsert) { for (uint32_t i{100}; (i > 0 && this->has_room()); --i) { - this->put(i - 1, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i - 1, btree_put_type::INSERT); } this->print(); this->validate_get_all(); @@ -302,9 +367,17 @@ TYPED_TEST(NodeTest, Remove) { this->validate_get_any(g_max_keys / 2, g_max_keys - 1); } +TYPED_TEST(NodeTest, RangePutGet) { + for (uint32_t i = 0; i < 40; i += 5) { + this->put_range(i, 5); + } + + this->validate_get_all(); +} + TYPED_TEST(NodeTest, RemoveRangeIndex) { for (uint32_t i = 0; i < 20; i++) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } this->print(); this->remove_range(5, 10); // size = 14 EXPECT: 0 1 2 3 4 [5 6 7 8 9 10] 11 12 13 14 15 16 17 18 19 @@ -329,7 +402,7 @@ TYPED_TEST(NodeTest, Update) { TYPED_TEST(NodeTest, RandomInsertRemoveUpdate) { uint32_t num_inserted{0}; while (this->has_room()) { - this->put(g_randkey_generator(g_re), btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(g_randkey_generator(g_re), btree_put_type::INSERT); ++num_inserted; } LOGDEBUG("After random insertion of {} objects", num_inserted); diff --git a/src/tests/test_common/range_scheduler.hpp b/src/tests/test_common/range_scheduler.hpp index 57732aa51..5dc2e4d1b 100644 --- a/src/tests/test_common/range_scheduler.hpp +++ b/src/tests/test_common/range_scheduler.hpp @@ -20,19 +20,44 @@ #pragma once -#include -#include -#include -#include +#include #include + namespace homestore { -using namespace boost::icl; -typedef interval_set< uint32_t > set_t; -typedef set_t::interval_type ival; using mutex = iomgr::FiberManagerLib::shared_mutex; +static std::pair< uint64_t, uint64_t > get_next_contiguous_set_bits(const sisl::Bitset& bm, uint64_t search_start_bit, + uint64_t max_count) { + uint64_t first_set_bit{sisl::Bitset::npos}; + uint64_t set_count{0}; + uint64_t b; + while (((b = bm.get_next_set_bit(search_start_bit)) != sisl::Bitset::npos) && (set_count < max_count)) { + if (first_set_bit == sisl::Bitset::npos) { + first_set_bit = b; + } else if (b > search_start_bit) { + break; + } + ++set_count; + search_start_bit = b + 1; + } + + return std::pair(first_set_bit, set_count); +} + class RangeScheduler { +private: + sisl::Bitset m_existing_keys; + sisl::Bitset m_working_keys; + mutex m_set_lock; + std::uniform_int_distribution< uint32_t > m_rand_start_key_generator; + + std::random_device m_rd; + public: + RangeScheduler(uint32_t num_keys) : m_existing_keys{num_keys}, m_working_keys{num_keys} { + m_rand_start_key_generator = std::uniform_int_distribution< uint32_t >(0, num_keys - 1); + } + void remove_keys_from_working(uint32_t s, uint32_t e) { std::unique_lock< mutex > lk(m_set_lock); remove_from_working(s, e); @@ -62,165 +87,109 @@ class RangeScheduler { remove_from_working(start_key, end_key); } - int pick_random_non_existing_keys(uint32_t n_keys = 1, uint32_t max_range = 0) { + std::pair< uint32_t, uint32_t > pick_random_non_existing_keys(uint32_t max_keys) { + std::pair< uint32_t, uint32_t > ret; + do { + ret = try_pick_random_non_existing_keys(max_keys); + if (ret.first != UINT32_MAX) { break; } + } while (true); + + return ret; + } + + std::pair< uint32_t, uint32_t > pick_random_existing_keys(uint32_t max_keys) { + std::pair< uint32_t, uint32_t > ret; + do { + ret = try_pick_random_existing_keys(max_keys); + if (ret.first != UINT32_MAX) { break; } + } while (true); + + return ret; + } + + std::pair< uint32_t, uint32_t > pick_random_non_working_keys(uint32_t max_keys) { + std::pair< uint32_t, uint32_t > ret; + do { + ret = try_pick_random_non_working_keys(max_keys); + if (ret.first != UINT32_MAX) { break; } + } while (true); + + return ret; + } + +private: + std::pair< uint32_t, uint32_t > try_pick_random_non_existing_keys(uint32_t max_keys) { std::unique_lock< mutex > lk(m_set_lock); - uint32_t working_range = max_range <= 0 ? std::numeric_limits< uint32_t >::max() : max_range; - uint32_t num_retry = 0; - - auto num_intervals = static_cast< uint32_t >(m_existing_keys.iterative_size()); - std::uniform_int_distribution< uint32_t > s_rand_interval_generator{0, num_intervals - 1}; - uint32_t start_key = std::numeric_limits< uint32_t >::max(); - - while (num_retry < max_retries) { - // find a random interval - uint32_t next_lower = working_range; - uint32_t previous_upper = 0; - auto it = m_existing_keys.begin(); - // if the selected interval is the last ... check size between this one and the working_range, rand n keys - // in (previous_upper, working_range] = [previous_upper+1, working_range] choose the gap between this upper - // and the next begin. and check the size! rand nkeys in [previous_upper, next_lower] - if (num_intervals != 0) { - uint32_t cur_interval_idx = s_rand_interval_generator(m_re); - std::advance(it, cur_interval_idx); - previous_upper = last(*it) + 1; // to be inclusivelast - it++; - if (it != m_existing_keys.end()) { next_lower = first(*it) - 1; } - } - if ((next_lower + 1) < (n_keys + previous_upper)) { // check < or <= - num_retry++; - continue; - } - - // choose randomly n keys in [previous_upper, next_lower] - std::uniform_int_distribution< uint32_t > rand_key_generator{ - previous_upper, next_lower - n_keys + 1}; // n_keys or n_keys +- (1) - start_key = rand_key_generator(m_re); - auto found = (m_working_keys & ival::closed(start_key, start_key + n_keys - 1)); - if (found.empty()) { - auto validate = m_existing_keys & ival::closed(start_key, start_key + n_keys - 1); - assert(validate.empty()); - break; - } - num_retry++; - continue; + if ((m_existing_keys.size() - m_existing_keys.get_set_count()) == 0) { + throw std::out_of_range("All keys are being worked on right now"); + } + + uint32_t const search_start = m_rand_start_key_generator(m_rd); + auto bb = m_existing_keys.get_next_contiguous_n_reset_bits(search_start, max_keys); + if (bb.nbits && m_working_keys.is_bits_reset(bb.start_bit, bb.nbits)) { + uint32_t const start = uint32_cast(bb.start_bit); + uint32_t const end = uint32_cast(bb.start_bit + bb.nbits - 1); + add_to_working(start, end); + return std::pair(start, end); + } else { + return std::pair(UINT32_MAX, UINT32_MAX); } - if (num_retry == max_retries) { return -1; } - // add from working keys and return the start_key; - this->add_to_working(start_key, start_key + n_keys - 1); - assert(start_key + n_keys - 1 <= working_range); - return static_cast< int >(start_key); } - int pick_random_existing_keys(uint32_t n_keys = 1, uint32_t max_range = 0) { + std::pair< uint32_t, uint32_t > try_pick_random_existing_keys(uint32_t max_keys) { std::unique_lock< mutex > lk(m_set_lock); - uint32_t working_range = max_range <= 0 ? std::numeric_limits< uint32_t >::max() : max_range; - uint32_t num_retry = 0; - - auto num_intervals = static_cast< uint32_t >(m_existing_keys.iterative_size()); - // empty keys - if (num_intervals == 0) { return -1; } - std::uniform_int_distribution< uint32_t > s_rand_interval_generator{0, num_intervals - 1}; - uint32_t start_key = std::numeric_limits< uint32_t >::max(); - - while (num_retry < max_retries) { - // find a random interval - auto it = m_existing_keys.begin(); - uint32_t cur_interval_idx = s_rand_interval_generator(m_re); - std::advance(it, cur_interval_idx); - uint32_t upper = last(*it); - uint32_t lower = first(*it); - if ((upper + 1) < (n_keys + lower)) { - num_retry++; - continue; - } - // choose randomly n keys in [lower, upper] - std::uniform_int_distribution< uint32_t > rand_key_generator{lower, upper - n_keys + 1}; - start_key = rand_key_generator(m_re); - auto found = (m_working_keys & ival::closed(start_key, start_key + n_keys - 1)); - if (found.empty()) { - auto validate = m_existing_keys & ival::closed(start_key, start_key + n_keys - 1); - assert(!validate.empty()); - break; - } - num_retry++; - continue; + if (m_existing_keys.get_set_count() == 0) { + DEBUG_ASSERT(false, "Couldn't find one existing keys"); + throw std::out_of_range("Couldn't find one existing keys"); + } + + uint32_t const search_start = m_rand_start_key_generator(m_rd); + auto [s, count] = get_next_contiguous_set_bits(m_existing_keys, search_start, max_keys); + + if (count && m_working_keys.is_bits_reset(s, count)) { + uint32_t const start = uint32_cast(s); + uint32_t const end = uint32_cast(s + count - 1); + add_to_working(start, end); + return std::pair(start, end); + } else { + return std::pair(UINT32_MAX, UINT32_MAX); } - if (num_retry == max_retries) { return -1; } - // add from working keys and return the start_key; - this->add_to_working(start_key, start_key + n_keys - 1); - assert(start_key + n_keys - 1 <= working_range); - return static_cast< int >(start_key); } - int pick_random_non_working_keys(uint32_t n_keys = 1, uint32_t max_range = 0) { + std::pair< uint32_t, uint32_t > try_pick_random_non_working_keys(uint32_t max_keys) { std::unique_lock< mutex > lk(m_set_lock); - uint32_t working_range = max_range <= 0 ? std::numeric_limits< uint32_t >::max() : max_range; - uint32_t num_retry = 0; - - auto num_intervals = static_cast< uint32_t >(m_working_keys.iterative_size()); - // empty keys - if (num_intervals == 0) { return -1; } - std::uniform_int_distribution< uint32_t > s_rand_interval_generator{0, num_intervals - 1}; - uint32_t start_key = std::numeric_limits< uint32_t >::max(); - - while (num_retry < max_retries) { - // find a random interval - uint32_t next_lower = working_range; - uint32_t previous_upper = 0; - auto it = m_working_keys.begin(); - if (num_intervals != 0) { - uint32_t cur_interval_idx = s_rand_interval_generator(m_re); - std::advance(it, cur_interval_idx); - previous_upper = last(*it) + 1; // to be inclusivelast - it++; - if (it != m_working_keys.end()) { next_lower = first(*it) - 1; } - } - if ((next_lower + 1) < (n_keys + previous_upper)) { // check < or <= - num_retry++; - continue; - } - - // choose randomly n keys in [previous_upper, next_lower] - std::uniform_int_distribution< uint32_t > rand_key_generator{ - previous_upper, next_lower - n_keys + 1}; // n_keys or n_keys +- (1) - start_key = rand_key_generator(m_re); - break; + + uint32_t const search_start = m_rand_start_key_generator(m_rd); + auto bb = m_working_keys.get_next_contiguous_n_reset_bits(search_start, max_keys); + + if (bb.nbits) { + uint32_t const start = uint32_cast(bb.start_bit); + uint32_t const end = uint32_cast(bb.start_bit + bb.nbits - 1); + add_to_working(start, end); + return std::pair(start, end); + } else { + return std::pair(UINT32_MAX, UINT32_MAX); } - if (num_retry == max_retries) { return -1; } - // add from working keys and return the start_key; - this->add_to_working(start_key, start_key + n_keys - 1); - assert(start_key + n_keys - 1 <= working_range); - return static_cast< int >(start_key); } -private: void add_to_existing(uint32_t s) { add_to_existing(s, s); } void add_to_working(uint32_t s) { add_to_working(s, s); } - void add_to_existing(uint32_t s, uint32_t e) { m_existing_keys += ival::closed(s, e); } + void add_to_existing(uint32_t s, uint32_t e) { m_existing_keys.set_bits(s, e - s + 1); } - void add_to_working(uint32_t s, uint32_t e) { m_working_keys += ival::closed(s, e); } + void add_to_working(uint32_t s, uint32_t e) { m_working_keys.set_bits(s, e - s + 1); } - void remove_from_existing(uint32_t s, uint32_t e) { m_existing_keys -= ival::closed(s, e); } + void remove_from_existing(uint32_t s, uint32_t e) { m_existing_keys.reset_bits(s, e - s + 1); } void remove_from_existing(uint32_t s) { remove_from_existing(s, s); } void remove_from_working(uint32_t s) { remove_from_working(s, s); } - void remove_from_working(uint32_t s, uint32_t e) { m_working_keys -= ival::closed(s, e); } + void remove_from_working(uint32_t s, uint32_t e) { m_working_keys.reset_bits(s, e - s + 1); } - bool is_working(uint32_t cur_key) { return m_working_keys.find(cur_key) != m_working_keys.end(); } - - bool is_existing(uint32_t cur_key) { return m_existing_keys.find(cur_key) != m_existing_keys.end(); } - -private: - set_t m_existing_keys; - set_t m_working_keys; - mutex m_set_lock; + bool is_working(uint32_t cur_key) const { return m_working_keys.is_bits_set(cur_key, 1); } - std::random_device m_rd{}; - std::default_random_engine m_re{m_rd()}; - const uint32_t max_retries = 5; + bool is_existing(uint32_t cur_key) const { return m_existing_keys.is_bits_set(cur_key, 1); } }; }; // namespace homestore diff --git a/src/tests/test_index_btree.cpp b/src/tests/test_index_btree.cpp index 6805f2b0e..14062cd1f 100644 --- a/src/tests/test_index_btree.cpp +++ b/src/tests/test_index_btree.cpp @@ -24,7 +24,6 @@ #include #include #include -#include "btree_test_kvs.hpp" #include #include #include @@ -32,6 +31,9 @@ #include "common/homestore_config.hpp" #include "common/resource_mgr.hpp" #include "test_common/homestore_test_common.hpp" +#include "test_common/range_scheduler.hpp" +#include "btree_helpers/btree_test_kvs.hpp" +#include "btree_helpers/btree_test_helper.hpp" using namespace homestore; @@ -82,8 +84,16 @@ struct VarObjSizeBtreeTest { static constexpr btree_node_type interior_node_type = btree_node_type::VAR_OBJECT; }; +struct PrefixIntervalBtreeTest { + using BtreeType = IndexTable< TestIntervalKey, TestIntervalValue >; + using KeyType = TestIntervalKey; + using ValueType = TestIntervalValue; + static constexpr btree_node_type leaf_node_type = btree_node_type::PREFIX; + static constexpr btree_node_type interior_node_type = btree_node_type::FIXED; +}; + template < typename TestType > -struct BtreeTest : public testing::Test { +struct BtreeTest : public BtreeTestHelper< TestType > { using T = TestType; using K = typename TestType::KeyType; using V = typename TestType::ValueType; @@ -94,7 +104,7 @@ struct BtreeTest : public testing::Test { std::shared_ptr< IndexTableBase > on_index_table_found(const superblk< index_table_sb >& sb) override { LOGINFO("Index table recovered"); LOGINFO("Root bnode_id {} version {}", sb->root_node, sb->link_version); - m_test->m_bt = std::make_shared< typename T::BtreeType >(sb, *m_test->m_bt_cfg); + m_test->m_bt = std::make_shared< typename T::BtreeType >(sb, m_test->m_cfg); return m_test->m_bt; } @@ -102,10 +112,6 @@ struct BtreeTest : public testing::Test { BtreeTest* m_test; }; - std::shared_ptr< typename T::BtreeType > m_bt; - std::map< K, V > m_shadow_map; - std::unique_ptr< BtreeConfig > m_bt_cfg; - void SetUp() override { test_common::HSTestHelper::start_homestore( "test_index_btree", @@ -113,11 +119,7 @@ struct BtreeTest : public testing::Test { {HS_SERVICE::INDEX, {.size_pct = 70.0, .index_svc_cbs = new TestIndexServiceCallbacks(this)}}}); LOGINFO("Node size {} ", hs()->index_service().node_size()); - m_bt_cfg = std::make_unique< BtreeConfig >(hs()->index_service().node_size()); - m_bt_cfg->m_leaf_node_type = T::leaf_node_type; - m_bt_cfg->m_int_node_type = T::interior_node_type; - // TODO fix. SequentialRemove failing in case of VarObj test. - m_bt_cfg->m_merge_turned_on = false; + this->m_cfg = BtreeConfig(hs()->index_service().node_size()); auto uuid = boost::uuids::random_generator()(); auto parent_uuid = boost::uuids::random_generator()(); @@ -130,12 +132,14 @@ struct BtreeTest : public testing::Test { homestore::hs()->resource_mgr().reset_dirty_buf_qd(); // Create index table and attach to index service. - m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, *m_bt_cfg); - hs()->index_service().add_index_table(m_bt); + BtreeTestHelper< TestType >::SetUp(); + this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); + hs()->index_service().add_index_table(this->m_bt); LOGINFO("Added index table to index service"); } void TearDown() override { + BtreeTestHelper< TestType >::TearDown(); test_common::HSTestHelper::shutdown_homestore(); } @@ -146,202 +150,12 @@ struct BtreeTest : public testing::Test { nullptr, true /* restart */); } - void put(uint32_t k, btree_put_type put_type) { - auto existing_v = std::make_unique< V >(); - auto pk = std::make_unique< K >(k); - auto pv = std::make_unique< V >(V::generate_rand()); - auto sreq{BtreeSinglePutRequest{pk.get(), pv.get(), put_type, existing_v.get()}}; - sreq.enable_route_tracing(); - bool done = (m_bt->put(sreq) == btree_status_t::success); - - // auto& sreq = to_single_put_req(req); - bool expected_done{true}; - if (m_shadow_map.find(*sreq.m_k) != m_shadow_map.end()) { - expected_done = (put_type != btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); - } - ASSERT_EQ(done, expected_done) << "Expected put of key " << k << " of put_type " << enum_name(put_type) - << " to be " << expected_done; - if (expected_done) { - m_shadow_map.insert(std::make_pair((const K&)*sreq.m_k, (const V&)*sreq.m_v)); - } else { - const auto r = m_shadow_map.find(*sreq.m_k); - ASSERT_NE(r, m_shadow_map.end()) << "Testcase issue, expected inserted slots to be in shadow map"; - ASSERT_EQ((const V&)*sreq.m_existing_val, r->second) - << "Insert existing value doesn't return correct data for key " << r->first; - } - } - - void range_put(uint32_t max_count) { - const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); - static std::uniform_int_distribution< uint32_t > s_randkey_start_generator{1, num_entries}; - auto val = std::make_unique< V >(V::generate_rand()); - - retry: - auto const start_it = m_shadow_map.lower_bound(K{s_randkey_start_generator(g_re)}); - auto end_it = start_it; - auto it = start_it; - uint32_t count = 0; - while ((it != m_shadow_map.end()) && (count++ < max_count)) { - it->second = *val; - end_it = it++; - } - if (count == 0) { goto retry; } - - auto mreq = BtreeRangePutRequest< K >{BtreeKeyRange< K >{start_it->first, true, end_it->first, true}, - btree_put_type::REPLACE_ONLY_IF_EXISTS, val.get()}; - mreq.enable_route_tracing(); - ASSERT_EQ(m_bt->put(mreq), btree_status_t::success); - } - - void remove_one(uint32_t k) { - auto existing_v = std::make_unique< V >(); - auto pk = std::make_unique< K >(k); - - auto rreq = BtreeSingleRemoveRequest{pk.get(), existing_v.get()}; - rreq.enable_route_tracing(); - bool removed = (m_bt->remove(rreq) == btree_status_t::success); - - bool expected_removed = (m_shadow_map.find(rreq.key()) != m_shadow_map.end()); - ASSERT_EQ(removed, expected_removed) << "Expected remove of key " << k << " to be " << expected_removed; - - if (removed) { - validate_data(rreq.key(), (const V&)rreq.value()); - m_shadow_map.erase(rreq.key()); - } - } - - void query_all_validate() const { - query_validate(0u, SISL_OPTIONS["num_entries"].as< uint32_t >() - 1, UINT32_MAX); - } - void query_all_paginate_validate(uint32_t batch_size) const { - query_validate(0u, SISL_OPTIONS["num_entries"].as< uint32_t >() - 1, batch_size); - } - - void query_validate(uint32_t start_k, uint32_t end_k, uint32_t batch_size) const { - std::vector< std::pair< K, V > > out_vector; - uint32_t remaining = num_elems_in_range(start_k, end_k); - auto it = m_shadow_map.lower_bound(K{start_k}); - - BtreeQueryRequest< K > qreq{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, - BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, batch_size}; - qreq.enable_route_tracing(); - - do { - out_vector.clear(); - auto const ret = m_bt->query(qreq, out_vector); - auto const expected_count = std::min(remaining, batch_size); - - remaining -= expected_count; - if (remaining == 0) { - ASSERT_EQ(ret, btree_status_t::success) << "Expected success on query"; - } else { - ASSERT_EQ(ret, btree_status_t::has_more) << "Expected query to return has_more"; - } - ASSERT_EQ(out_vector.size(), expected_count) << "Received incorrect value on query pagination"; - - for (size_t idx{0}; idx < out_vector.size(); ++idx) { - ASSERT_EQ(out_vector[idx].second, it->second) - << "Range get doesn't return correct data for key=" << it->first << " idx=" << idx; - ++it; - } - } while (remaining > 0); -#if 0 - out_vector.clear(); - auto ret = m_bt->query(qreq, out_vector); - ASSERT_EQ(ret, btree_status_t::success) << "Expected success on query"; - ASSERT_EQ(out_vector.size(), 0) << "Received incorrect value on empty query pagination"; -#endif - } - - void get_all_validate() const { - for (const auto& [key, value] : m_shadow_map) { - auto copy_key = std::make_unique< K >(); - *copy_key = key; - auto out_v = std::make_unique< V >(); - auto req = BtreeSingleGetRequest{copy_key.get(), out_v.get()}; - req.enable_route_tracing(); - const auto ret = m_bt->get(req); - ASSERT_EQ(ret, btree_status_t::success) << "Missing key " << key << " in btree but present in shadow map"; - ASSERT_EQ((const V&)req.value(), value) - << "Found value in btree doesn't return correct data for key=" << key; - } - } - - void get_specific_validate(uint32_t k) const { - auto pk = std::make_unique< K >(k); - auto out_v = std::make_unique< V >(); - auto req = BtreeSingleGetRequest{pk.get(), out_v.get()}; - - const auto status = m_bt->get(req); - if (status == btree_status_t::success) { - validate_data(req.key(), (const V&)req.value()); - } else { - ASSERT_EQ((m_shadow_map.find(req.key()) == m_shadow_map.end()), true) - << "Node key " << k << " is missing in the btree"; - } - } - - void get_any_validate(uint32_t start_k, uint32_t end_k) const { - auto out_k = std::make_unique< K >(); - auto out_v = std::make_unique< V >(); - auto req = - BtreeGetAnyRequest< K >{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, out_k.get(), out_v.get()}; - const auto status = m_bt->get(req); - if (status == btree_status_t::success) { - ASSERT_EQ(found_in_range(*(K*)req.m_outkey, start_k, end_k), true) - << "Get Any returned key=" << *(K*)req.m_outkey << " which is not in range " << start_k << "-" << end_k - << "according to shadow map"; - validate_data(*(K*)req.m_outkey, *(V*)req.m_outval); - } else { - ASSERT_EQ(found_in_range(*(K*)req.m_outkey, start_k, end_k), false) - << "Get Any couldn't find key in the range " << start_k << "-" << end_k - << " but it present in shadow map"; - } - } - - void print(const std::string& file = "") const { m_bt->print_tree(file); } - void destroy_btree() { auto cpg = hs()->cp_mgr().cp_guard(); auto op_context = (void*)cpg.context(cp_consumer_t::INDEX_SVC); - const auto [ret, free_node_cnt] = m_bt->destroy_btree(op_context); + const auto [ret, free_node_cnt] = this->m_bt->destroy_btree(op_context); ASSERT_EQ(ret, btree_status_t::success) << "btree destroy failed"; - m_bt.reset(); - } - - void compare_files(const std::string& before, const std::string& after) { - std::ifstream b(before); - std::ifstream a(after); - std::ostringstream ss_before, ss_after; - ss_before << b.rdbuf(); - ss_after << a.rdbuf(); - std::string s1 = ss_before.str(); - std::string s2 = ss_after.str(); - ASSERT_EQ(s1, s2) << "Mismatch in btree structure"; - } - -private: - void validate_data(const K& key, const V& btree_val) const { - const auto r = m_shadow_map.find(key); - ASSERT_NE(r, m_shadow_map.end()) << "Node key is not present in shadow map"; - ASSERT_EQ(btree_val, r->second) << "Found value in btree doesn't return correct data for key=" << r->first; - } - - bool found_in_range(const K& key, uint32_t start_k, uint32_t end_k) const { - const auto itlower = m_shadow_map.lower_bound(K{start_k}); - const auto itupper = m_shadow_map.upper_bound(K{end_k}); - auto it = itlower; - while (it != itupper) { - if (it->first == key) { return true; } - ++it; - } - return false; - } - - uint32_t num_elems_in_range(uint32_t start_k, uint32_t end_k) const { - const auto itlower = m_shadow_map.lower_bound(K{start_k}); - const auto itupper = m_shadow_map.upper_bound(K{end_k}); - return std::distance(itlower, itupper); + this->m_bt.reset(); } }; @@ -361,36 +175,36 @@ TYPED_TEST(BtreeTest, SequentialInsert) { const auto entries_iter1 = num_entries / 2; LOGINFO("Step 1: Do Forward sequential insert for {} entries", entries_iter1); for (uint32_t i{0}; i < entries_iter1; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); // this->print(); } LOGINFO("Step 2: Query {} entries and validate with pagination of 75 entries", entries_iter1); - this->query_validate(0, entries_iter1 - 1, 75); + this->do_query(0, entries_iter1 - 1, 75); // Reverse sequential insert const auto entries_iter2 = num_entries - entries_iter1; LOGINFO("Step 3: Do Reverse sequential insert of remaining {} entries", entries_iter2); for (uint32_t i{num_entries - 1}; i >= entries_iter1; --i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 4: Query {} entries and validate with pagination of 90 entries", entries_iter2); - this->query_validate(entries_iter1, num_entries - 1, 90); + this->do_query(entries_iter1, num_entries - 1, 90); // Do validate all of them LOGINFO("Step 5: Query all entries and validate with no pagination"); - this->query_all_validate(); + this->query_all(); LOGINFO("Step 6: Query all entries and validate with pagination of 80 entries"); - this->query_all_paginate_validate(80); + this->query_all_paginate(80); LOGINFO("Step 7: Get all entries 1-by-1 and validate them"); - this->get_all_validate(); - this->get_any_validate(num_entries - 3, num_entries + 1); + this->get_all(); + this->get_any(num_entries - 3, num_entries + 1); // Negative cases LOGINFO("Step 8: Do incorrect input and validate errors"); - this->query_validate(num_entries + 100, num_entries + 500, 5); - this->get_any_validate(num_entries + 1, num_entries + 2); + this->do_query(num_entries + 100, num_entries + 500, 5); + this->get_any(num_entries + 1, num_entries + 2); // this->print(); LOGINFO("SequentialInsert test end"); @@ -406,9 +220,9 @@ TYPED_TEST(BtreeTest, RandomInsert) { std::random_shuffle(vec.begin(), vec.end()); LOGINFO("Step 1: Do forward random insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(vec[i], btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(vec[i], btree_put_type::INSERT); } - this->get_all_validate(); + this->get_all(); } #if 0 @@ -418,10 +232,10 @@ TYPED_TEST(BtreeTest, SequentialRemove) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); LOGINFO("Step 1: Do Forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 2: Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); const auto entries_iter1 = num_entries / 2; LOGINFO("Step 3: Do Forward sequential remove for {} entries", entries_iter1); @@ -429,8 +243,8 @@ TYPED_TEST(BtreeTest, SequentialRemove) { this->remove_one(i); } LOGINFO("Step 4: Query {} entries and validate with pagination of 75 entries", entries_iter1); - this->query_validate(0, entries_iter1 - 1, 75); - this->query_validate(entries_iter1, num_entries - 1, 75); + this->do_query(0, entries_iter1 - 1, 75); + this->do_query(entries_iter1, num_entries - 1, 75); const auto entries_iter2 = num_entries - entries_iter1; LOGINFO("Step 5: Do Reverse sequential remove of remaining {} entries", entries_iter2); @@ -439,9 +253,9 @@ TYPED_TEST(BtreeTest, SequentialRemove) { } LOGINFO("Step 6: Query the empty tree"); - this->query_validate(0, num_entries - 1, 75); - this->get_any_validate(0, 1); - this->get_specific_validate(0); + this->do_query(0, num_entries - 1, 75); + this->get_any(0, 1); + this->get_specific(0); LOGINFO("SequentialRemove test end"); } @@ -452,7 +266,7 @@ TYPED_TEST(BtreeTest, RandomRemove) { LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } std::vector< uint32_t > vec(num_entries); @@ -464,7 +278,7 @@ TYPED_TEST(BtreeTest, RandomRemove) { for (uint32_t i{0}; i < num_iters; ++i) { this->remove_one(vec[i]); } - this->get_all_validate(); + this->get_all(); } #endif @@ -474,17 +288,16 @@ TYPED_TEST(BtreeTest, RangeUpdate) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); LOGINFO("Step 1: Do Forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 2: Do Range Update of random intervals between [1-50] for 100 times with random key ranges"); - static std::uniform_int_distribution< uint32_t > s_rand_key_count_generator{1, 50}; for (uint32_t i{0}; i < 100; ++i) { - this->range_put(s_rand_key_count_generator(g_re)); + this->range_put_random(); } LOGINFO("Step 2: Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); LOGINFO("RangeUpdate test end"); } @@ -494,10 +307,10 @@ TYPED_TEST(BtreeTest, CpFlush) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); LOGINFO("Do Forward sequential insert for {} entries", num_entries / 2); for (uint32_t i = 0; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Query {} entries and validate with pagination of 75 entries", num_entries / 2); - this->query_validate(0, num_entries / 2 - 1, 75); + this->do_query(0, num_entries / 2 - 1, 75); this->print(std::string("before.txt")); @@ -505,7 +318,7 @@ TYPED_TEST(BtreeTest, CpFlush) { test_common::HSTestHelper::trigger_cp(true /* wait */); LOGINFO("Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); this->destroy_btree(); @@ -518,7 +331,7 @@ TYPED_TEST(BtreeTest, CpFlush) { this->print(std::string("after.txt")); LOGINFO("Query {} entries", num_entries); - this->query_validate(0, num_entries - 1, 1000); + this->do_query(0, num_entries - 1, 1000); this->compare_files("before.txt", "after.txt"); LOGINFO("CpFlush test end"); @@ -530,7 +343,7 @@ TYPED_TEST(BtreeTest, MultipleCpFlush) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); LOGINFO("Do Forward sequential insert for {} entries", num_entries / 2); for (uint32_t i = 0; i < num_entries / 2; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); if (i % 500 == 0) { LOGINFO("Trigger checkpoint flush wait=false."); test_common::HSTestHelper::trigger_cp(false /* wait */); @@ -541,7 +354,7 @@ TYPED_TEST(BtreeTest, MultipleCpFlush) { test_common::HSTestHelper::trigger_cp(false /* wait */); for (uint32_t i = num_entries / 2; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Trigger checkpoint flush wait=false."); @@ -551,7 +364,7 @@ TYPED_TEST(BtreeTest, MultipleCpFlush) { test_common::HSTestHelper::trigger_cp(true /* wait */); LOGINFO("Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); this->print(std::string("before.txt")); @@ -567,7 +380,7 @@ TYPED_TEST(BtreeTest, MultipleCpFlush) { this->compare_files("before.txt", "after.txt"); LOGINFO("Query {} entries and validate with pagination of 1000 entries", num_entries); - this->query_validate(0, num_entries - 1, 1000); + this->do_query(0, num_entries - 1, 1000); LOGINFO("MultipleCpFlush test end"); } @@ -579,7 +392,7 @@ TYPED_TEST(BtreeTest, ThreadedCpFlush) { auto io_thread = std::thread([this, num_entries] { LOGINFO("Do Forward sequential insert for {} entries", num_entries); for (uint32_t i = 0; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } }); @@ -599,7 +412,7 @@ TYPED_TEST(BtreeTest, ThreadedCpFlush) { test_common::HSTestHelper::trigger_cp(true /* wait */); LOGINFO("Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); this->print(std::string("before.txt")); this->destroy_btree(); @@ -614,7 +427,7 @@ TYPED_TEST(BtreeTest, ThreadedCpFlush) { this->compare_files("before.txt", "after.txt"); LOGINFO("Query {} entries and validate with pagination of 1000 entries", num_entries); - this->query_validate(0, num_entries - 1, 1000); + this->do_query(0, num_entries - 1, 1000); LOGINFO("ThreadedCpFlush test end"); } diff --git a/src/tests/test_mem_btree.cpp b/src/tests/test_mem_btree.cpp index 12129e538..68bf4003d 100644 --- a/src/tests/test_mem_btree.cpp +++ b/src/tests/test_mem_btree.cpp @@ -1,7 +1,6 @@ /********************************************************************************* * Modifications Copyright 2017-2019 eBay Inc. * - * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -22,13 +21,15 @@ #include #include #include -#include "btree_test_kvs.hpp" + #include #include +#include #include #include "test_common/range_scheduler.hpp" +#include "btree_helpers/btree_test_kvs.hpp" +#include "btree_helpers/btree_test_helper.hpp" -static constexpr uint32_t g_node_size{4096}; using namespace homestore; SISL_LOGGING_INIT(btree, iomgr, io_wd, flip) @@ -81,262 +82,29 @@ struct VarObjSizeBtreeTest { static constexpr btree_node_type interior_node_type = btree_node_type::VAR_OBJECT; }; +struct PrefixIntervalBtreeTest { + using BtreeType = MemBtree< TestIntervalKey, TestIntervalValue >; + using KeyType = TestIntervalKey; + using ValueType = TestIntervalValue; + static constexpr btree_node_type leaf_node_type = btree_node_type::PREFIX; + static constexpr btree_node_type interior_node_type = btree_node_type::FIXED; +}; + template < typename TestType > -struct BtreeTest : public testing::Test { +struct BtreeTest : public BtreeTestHelper< TestType > { using T = TestType; using K = typename TestType::KeyType; using V = typename TestType::ValueType; - std::unique_ptr< typename T::BtreeType > m_bt; - std::map< K, V > m_shadow_map; - BtreeConfig m_cfg{g_node_size}; - void SetUp() override { - m_cfg.m_leaf_node_type = T::leaf_node_type; - m_cfg.m_int_node_type = T::interior_node_type; - if (SISL_OPTIONS.count("disable_merge")) m_cfg.m_merge_turned_on = false; - m_bt = std::make_unique< typename T::BtreeType >(m_cfg); - m_bt->init(nullptr); - } - - void put(uint32_t k, btree_put_type put_type) { - auto existing_v = std::make_unique< V >(); - auto pk = std::make_unique< K >(k); - auto pv = std::make_unique< V >(V::generate_rand()); - auto sreq{BtreeSinglePutRequest{pk.get(), pv.get(), put_type, existing_v.get()}}; - bool done = (m_bt->put(sreq) == btree_status_t::success); - - // auto& sreq = to_single_put_req(req); - bool expected_done{true}; - if (m_shadow_map.find(*sreq.m_k) != m_shadow_map.end()) { - expected_done = (put_type != btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); - } - ASSERT_EQ(done, expected_done) << "Expected put of key " << k << " of put_type " << enum_name(put_type) - << " to be " << expected_done; - if (expected_done) { - m_shadow_map.insert(std::make_pair((const K&)*sreq.m_k, (const V&)*sreq.m_v)); - } else { - const auto r = m_shadow_map.find(*sreq.m_k); - ASSERT_NE(r, m_shadow_map.end()) << "Testcase issue, expected inserted slots to be in shadow map"; - ASSERT_EQ((const V&)*sreq.m_existing_val, r->second) - << "Insert existing value doesn't return correct data for key " << r->first; - } - } - - void range_put(uint32_t start_entry, uint32_t end_entry, bool expected) { - auto val = std::make_unique< V >(V::generate_rand()); - auto mreq = BtreeRangePutRequest< K >{BtreeKeyRange< K >{start_entry, true, end_entry, true}, - btree_put_type::REPLACE_ONLY_IF_EXISTS, val.get()}; - ASSERT_EQ(m_bt->put(mreq) == btree_status_t::success, expected); - } - - void range_put(uint32_t max_count) { - const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); - static thread_local std::uniform_int_distribution< uint32_t > s_randkey_start_generator{1, num_entries}; - auto val = std::make_unique< V >(V::generate_rand()); - - retry: - auto const start_it = m_shadow_map.lower_bound(K{s_randkey_start_generator(g_re)}); - auto end_it = start_it; - auto it = start_it; - uint32_t count = 0; - while ((it != m_shadow_map.end()) && (count++ < max_count)) { - it->second = *val; - end_it = it++; - } - if (count == 0) { goto retry; } - - auto mreq = BtreeRangePutRequest< K >{BtreeKeyRange< K >{start_it->first, true, end_it->first, true}, - btree_put_type::REPLACE_ONLY_IF_EXISTS, val.get()}; - ASSERT_EQ(m_bt->put(mreq), btree_status_t::success); - } - - void remove_one(uint32_t k) { - auto existing_v = std::make_unique< V >(); - auto pk = std::make_unique< K >(k); - - auto rreq = BtreeSingleRemoveRequest{pk.get(), existing_v.get()}; - bool removed = (m_bt->remove(rreq) == btree_status_t::success); - - bool expected_removed = (m_shadow_map.find(rreq.key()) != m_shadow_map.end()); - ASSERT_EQ(removed, expected_removed) << "Expected remove of key " << k << " to be " << expected_removed; - - if (removed) { - validate_data(rreq.key(), (const V&)rreq.value()); - m_shadow_map.erase(rreq.key()); - } - } - - void range_remove(uint32_t start_key, uint32_t end_key) { - - auto start_it = m_shadow_map.lower_bound(K{start_key}); - auto end_it = m_shadow_map.lower_bound(K{end_key}); - auto fount_it = m_shadow_map.find(K{end_key}); - bool expected = (start_it != m_shadow_map.end()) && (std::distance(start_it, end_it) >= 0); - if (start_it == end_it && fount_it == m_shadow_map.end()) { expected = false; } - auto range = BtreeKeyRange< K >{K{start_key}, true, K{end_key}, true}; - auto mreq = BtreeRangeRemoveRequest< K >{std::move(range)}; - - size_t original_ts = get_tree_size(); - size_t original_ms = m_shadow_map.size(); - - auto ret = m_bt->remove(mreq); - ASSERT_EQ(expected, ret == btree_status_t::success) - << " not a successful remove op for range " << range.to_string() - << "start_it!=m_shadow_map.end(): " << (start_it != m_shadow_map.end()) - << " and std::distance(start_it,end_it) >= 0 : " << (std::distance(start_it, end_it) >= 0); - - K out_key; - V out_value; - auto qret = get_num_elements_in_tree(start_key, end_key, out_key, out_value); - ASSERT_EQ(qret, btree_status_t::not_found) - << " At least one element found! [" << out_key << "] = " << out_value; - - if (expected) { m_shadow_map.erase(start_it, fount_it != m_shadow_map.end() ? ++end_it : end_it); } - size_t ms = m_shadow_map.size(); - size_t ts = get_tree_size(); - ASSERT_EQ(original_ms - ms, original_ts - ts) << " number of removed from map is " << original_ms - ms - << " whereas number of existing keys is " << original_ts - ts; - - ASSERT_EQ(ts, ms) << " size of tree is " << ts << " vs number of existing keys are " << ms; - } - - void query_all_validate() const { - query_validate(0u, SISL_OPTIONS["num_entries"].as< uint32_t >() - 1, UINT32_MAX); - } - - void query_all_paginate_validate(uint32_t batch_size) const { - query_validate(0u, SISL_OPTIONS["num_entries"].as< uint32_t >() - 1, batch_size); - } - - void query_validate(uint32_t start_k, uint32_t end_k, uint32_t batch_size) const { - std::vector< std::pair< K, V > > out_vector; - uint32_t remaining = num_elems_in_range(start_k, end_k); - auto it = m_shadow_map.lower_bound(K{start_k}); - - BtreeQueryRequest< K > qreq{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, - BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, batch_size}; - while (remaining > 0) { - out_vector.clear(); - auto const ret = m_bt->query(qreq, out_vector); - auto const expected_count = std::min(remaining, batch_size); - - ASSERT_EQ(out_vector.size(), expected_count) << "Received incorrect value on query pagination"; - remaining -= expected_count; - - if (remaining == 0) { - ASSERT_EQ(ret, btree_status_t::success) << "Expected success on query"; - } else { - ASSERT_EQ(ret, btree_status_t::has_more) << "Expected query to return has_more"; - } - - for (size_t idx{0}; idx < out_vector.size(); ++idx) { - ASSERT_EQ(out_vector[idx].second, it->second) - << "Range get doesn't return correct data for key=" << it->first << " idx=" << idx; - ++it; - } - } - out_vector.clear(); - auto ret = m_bt->query(qreq, out_vector); - ASSERT_EQ(ret, btree_status_t::success) << "Expected success on query"; - ASSERT_EQ(out_vector.size(), 0) << "Received incorrect value on empty query pagination"; - } - - void get_all_validate() const { - for (const auto& [key, value] : m_shadow_map) { - auto copy_key = std::make_unique< K >(); - *copy_key = key; - auto out_v = std::make_unique< V >(); - auto req = BtreeSingleGetRequest{copy_key.get(), out_v.get()}; - - const auto ret = m_bt->get(req); - ASSERT_EQ(ret, btree_status_t::success) << "Missing key " << key << " in btree but present in shadow map"; - ASSERT_EQ((const V&)req.value(), value) - << "Found value in btree doesn't return correct data for key=" << key; - } - } - - void get_specific_validate(uint32_t k) const { - auto pk = std::make_unique< K >(k); - auto out_v = std::make_unique< V >(); - auto req = BtreeSingleGetRequest{pk.get(), out_v.get()}; - - const auto status = m_bt->get(req); - if (status == btree_status_t::success) { - validate_data(req.key(), (const V&)req.value()); - } else { - ASSERT_EQ((m_shadow_map.find(req.key()) == m_shadow_map.end()), true) - << "Node key " << k << " is missing in the btree"; - } - } - - void get_any_validate(uint32_t start_k, uint32_t end_k) const { - auto out_k = std::make_unique< K >(); - auto out_v = std::make_unique< V >(); - auto req = - BtreeGetAnyRequest< K >{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, out_k.get(), out_v.get()}; - const auto status = m_bt->get(req); - if (status == btree_status_t::success) { - ASSERT_EQ(found_in_range(*(K*)req.m_outkey, start_k, end_k), true) - << "Get Any returned key=" << *(K*)req.m_outkey << " which is not in range " << start_k << "-" << end_k - << "according to shadow map"; - validate_data(*(K*)req.m_outkey, *(V*)req.m_outval); - } else { - ASSERT_EQ(found_in_range(*(K*)req.m_outkey, start_k, end_k), false) - << "Get Any couldn't find key in the range " << start_k << "-" << end_k - << " but it present in shadow map"; - } - } - - void print() const { m_bt->print_tree(); } - - void print_keys() const { m_bt->print_tree_keys(); } - - size_t get_tree_size() { - BtreeQueryRequest< K > qreq{ - BtreeKeyRange< K >{K{0}, true, K{SISL_OPTIONS["num_entries"].as< uint32_t >()}, true}, - BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, UINT32_MAX}; - std::vector< std::pair< K, V > > out_vector; - auto const ret = m_bt->query(qreq, out_vector); - return out_vector.size(); - } - - btree_status_t get_num_elements_in_tree(uint32_t start_k, uint32_t end_k, K& out_key, V& out_value) const { - auto k = std::make_unique< K >(); - auto v = std::make_unique< V >(); - auto req = BtreeGetAnyRequest< K >{BtreeKeyRange< K >{K{start_k}, true, K{end_k}, true}, k.get(), v.get()}; - auto ret = m_bt->get(req); - out_key = *((K*)req.m_outkey); - out_value = *((V*)req.m_outval); - return ret; - } - -private: - void validate_data(const K& key, const V& btree_val) const { - const auto r = m_shadow_map.find(key); - ASSERT_NE(r, m_shadow_map.end()) << "Node key is not present in shadow map"; - ASSERT_EQ(btree_val, r->second) << "Found value in btree doesn't return correct data for key=" << r->first; - } - - bool found_in_range(const K& key, uint32_t start_k, uint32_t end_k) const { - const auto itlower = m_shadow_map.lower_bound(K{start_k}); - const auto itupper = m_shadow_map.upper_bound(K{end_k}); - auto it = itlower; - while (it != itupper) { - if (it->first == key) { return true; } - ++it; - } - return false; - } - - uint32_t num_elems_in_range(uint32_t start_k, uint32_t end_k) const { - const auto itlower = m_shadow_map.lower_bound(K{start_k}); - const auto itupper = m_shadow_map.upper_bound(K{end_k}); - return std::distance(itlower, itupper); + BtreeTestHelper< TestType >::SetUp(); + this->m_bt = std::make_shared< typename T::BtreeType >(this->m_cfg); + this->m_bt->init(nullptr); } }; -using BtreeTypes = testing::Types< FixedLenBtreeTest, VarKeySizeBtreeTest, VarValueSizeBtreeTest, VarObjSizeBtreeTest >; +using BtreeTypes = testing::Types< PrefixIntervalBtreeTest, FixedLenBtreeTest, VarKeySizeBtreeTest, + VarValueSizeBtreeTest, VarObjSizeBtreeTest >; TYPED_TEST_SUITE(BtreeTest, BtreeTypes); TYPED_TEST(BtreeTest, SequentialInsert) { @@ -345,35 +113,35 @@ TYPED_TEST(BtreeTest, SequentialInsert) { const auto entries_iter1 = num_entries / 2; LOGINFO("Step 1: Do forward sequential insert for {} entries", entries_iter1); for (uint32_t i{0}; i < entries_iter1; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 2: Query {} entries and validate with pagination of 75 entries", entries_iter1); - this->query_validate(0, entries_iter1 - 1, 75); + this->do_query(0, entries_iter1 - 1, 75); // Reverse sequential insert const auto entries_iter2 = num_entries - entries_iter1; LOGINFO("Step 3: Do reverse sequential insert of remaining {} entries", entries_iter2); for (uint32_t i{num_entries - 1}; i >= entries_iter1; --i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 4: Query {} entries and validate with pagination of 90 entries", entries_iter2); - this->query_validate(entries_iter1, num_entries - 1, 90); + this->do_query(entries_iter1, num_entries - 1, 90); // Do validate all of them LOGINFO("Step 5: Query all entries and validate with no pagination"); - this->query_all_validate(); + this->query_all(); LOGINFO("Step 6: Query all entries and validate with pagination of 80 entries"); - this->query_all_paginate_validate(80); + this->query_all_paginate(80); LOGINFO("Step 7: Get all entries 1-by-1 and validate them"); - this->get_all_validate(); - this->get_any_validate(num_entries - 3, num_entries + 1); + this->get_all(); + this->get_any(num_entries - 3, num_entries + 1); // Negative cases LOGINFO("Step 8: Do incorrect input and validate errors"); - this->query_validate(num_entries + 100, num_entries + 500, 5); - this->get_any_validate(num_entries + 1, num_entries + 2); + this->do_query(num_entries + 100, num_entries + 500, 5); + this->get_any(num_entries + 1, num_entries + 2); } TYPED_TEST(BtreeTest, SequentialRemove) { @@ -381,10 +149,10 @@ TYPED_TEST(BtreeTest, SequentialRemove) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 2: Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); const auto entries_iter1 = num_entries / 2; LOGINFO("Step 3: Do forward sequential remove for {} entries", entries_iter1); @@ -392,7 +160,7 @@ TYPED_TEST(BtreeTest, SequentialRemove) { this->remove_one(i); } LOGINFO("Step 4: Query {} entries and validate with pagination of 75 entries", entries_iter1); - this->query_validate(0, entries_iter1 - 1, 75); + this->do_query(0, entries_iter1 - 1, 75); const auto entries_iter2 = num_entries - entries_iter1; LOGINFO("Step 5: Do reverse sequential remove of remaining {} entries", entries_iter2); @@ -401,9 +169,9 @@ TYPED_TEST(BtreeTest, SequentialRemove) { } LOGINFO("Step 6: Query the empty tree"); - this->query_validate(0, num_entries, 75); - this->get_any_validate(0, 1); - this->get_specific_validate(0); + this->do_query(0, num_entries, 75); + this->get_any(0, 1); + this->get_specific(0); } TYPED_TEST(BtreeTest, RandomInsert) { @@ -416,9 +184,9 @@ TYPED_TEST(BtreeTest, RandomInsert) { std::random_shuffle(vec.begin(), vec.end()); LOGINFO("Step 1: Do forward random insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(vec[i], btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(vec[i], btree_put_type::INSERT); } - this->get_all_validate(); + this->get_all(); } TYPED_TEST(BtreeTest, RangeUpdate) { @@ -426,17 +194,16 @@ TYPED_TEST(BtreeTest, RangeUpdate) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 2: Do range update of random intervals between [1-50] for 100 times with random key ranges"); - static thread_local std::uniform_int_distribution< uint32_t > s_rand_key_count_generator{1, 50}; for (uint32_t i{0}; i < 100; ++i) { - this->range_put(s_rand_key_count_generator(g_re)); + this->range_put_random(); } LOGINFO("Step 3: Query {} entries and validate with pagination of 75 entries", num_entries); - this->query_validate(0, num_entries - 1, 75); + this->do_query(0, num_entries - 1, 75); } TYPED_TEST(BtreeTest, SimpleRemoveRange) { @@ -444,24 +211,24 @@ TYPED_TEST(BtreeTest, SimpleRemoveRange) { const auto num_entries = 20; LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } LOGINFO("Step 2: Do range remove for {} entries", num_entries); // this->print_keys(); // EXPECT size = 20 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 - this->range_remove(5, 10); + this->range_remove_any(5, 10); // this->print_keys(); // EXPECT size = 14 : 0 1 2 3 4 [5 6 7 8 9 10] 11 12 13 14 15 16 17 18 19 - this->range_remove(0, 2); + this->range_remove_any(0, 2); // this->print_keys(); // EXPECT size = 11 : [0 1 2] 3 4 11 12 13 14 15 16 17 18 19 - this->range_remove(18, 19); + this->range_remove_any(18, 19); // this->print_keys(); // EXPECT size = 9 : 3 4 11 12 13 14 15 16 17 [18 19] - this->range_remove(17, 17); + this->range_remove_any(17, 17); // this->print_keys(); // EXPECT size = 8 : 3 4 11 12 13 14 15 16 [17] - this->range_remove(1, 5); + this->range_remove_any(1, 5); // this->print_keys(); // EXPECT size = 6 : [3 4] 11 12 13 14 15 16 - this->range_remove(1, 20); + this->range_remove_any(1, 20); // this->print_keys(); // EXPECT size = 0 : [11 12 13 14 15 16] - this->query_all_validate(); + this->query_all(); // this->query_validate(0, num_entries , 75); } @@ -472,7 +239,7 @@ TYPED_TEST(BtreeTest, RandomRemove) { LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } std::vector< uint32_t > vec(num_entries); @@ -485,387 +252,66 @@ TYPED_TEST(BtreeTest, RandomRemove) { this->remove_one(vec[i]); } - this->get_all_validate(); + this->get_all(); } TYPED_TEST(BtreeTest, RandomRemoveRange) { - // Forward sequential insert const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); const auto num_iters = SISL_OPTIONS["num_iters"].as< uint32_t >(); LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries); for (uint32_t i{0}; i < num_entries; ++i) { - this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS); + this->put(i, btree_put_type::INSERT); } // generate keys including out of bound static thread_local std::uniform_int_distribution< uint32_t > s_rand_key_generator{0, 2 * num_entries}; // this->print_keys(); LOGINFO("Step 2: Do range remove for maximum of {} iterations", num_iters); - for (uint32_t i{0}; i< num_iters&& this->m_shadow_map.size() > 0; ++i) { + for (uint32_t i{0}; (i < num_iters) && this->m_shadow_map.size(); ++i) { uint32_t key1 = s_rand_key_generator(g_re); uint32_t key2 = s_rand_key_generator(g_re); - uint32_t start_key = std::min(key1, key2); - uint32_t end_key = std::max(key1, key2); // LOGINFO("Step 2 - {}: Do Range Remove of maximum [{},{}] keys ", i, start_key, end_key); - this->range_remove(std::min(key1, key2), std::max(key1, key2)); + this->range_remove_any(std::min(key1, key2), std::max(key1, key2)); // this->print_keys(); } - this->query_all_validate(); + this->query_all(); } template < typename TestType > -class BtreeConcurrentTest : public testing::Test { - using op_func = void (BtreeConcurrentTest::*)(void); +struct BtreeConcurrentTest : public BtreeTestHelper< TestType > { using T = TestType; using K = typename TestType::KeyType; using V = typename TestType::ValueType; - using mutex = iomgr::FiberManagerLib::shared_mutex; - -public: - void SetUp() override { - m_cfg.m_leaf_node_type = T::leaf_node_type; - m_cfg.m_int_node_type = T::interior_node_type; - m_max_range_input = SISL_OPTIONS["num_entries"].as< uint32_t >(); - if (SISL_OPTIONS.count("disable_merge")) m_cfg.m_merge_turned_on = false; - m_fibers.clear(); - m_bt = std::make_unique< typename T::BtreeType >(m_cfg); - m_bt->init(nullptr); - } - - void TearDown() override { iomanager.stop(); } - void print() const { m_bt->print_tree(); } - void print_keys() const { m_bt->print_tree_keys(); } + BtreeConcurrentTest() { this->m_is_multi_threaded = true; } - void execute(const std::vector< std::pair< std::string, int > >& op_list) { + void SetUp() override { LOGINFO("Starting iomgr with {} threads", SISL_OPTIONS["n_threads"].as< uint32_t >()); ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = SISL_OPTIONS["n_threads"].as< uint32_t >(), .is_spdk = false, .num_fibers = 1 + SISL_OPTIONS["n_fibers"].as< uint32_t >(), .app_mem_size_mb = 0, .hugepage_size_mb = 0}); - std::mutex mtx; - iomanager.run_on_wait(iomgr::reactor_regex::all_io, [this, &mtx]() { - auto fv = iomanager.sync_io_capable_fibers(); - std::unique_lock lg(mtx); - m_fibers.insert(m_fibers.end(), fv.begin(), fv.end()); - }); - - preload(SISL_OPTIONS["preload_size"].as< uint32_t >()); - runInParallel(op_list); - } - -private: - void random_range_remove() { - static std::uniform_int_distribution< uint32_t > s_rand_range_generator{2, 5}; - std::random_device g_re{}; - uint32_t nkeys = s_rand_range_generator(g_re); - int key = m_range_scheduler.pick_random_existing_keys(nkeys, m_max_range_input); - if (key == -1) { return; } - range_remove(key, key + nkeys - 1); - m_range_scheduler.remove_keys(static_cast< uint32_t >(key), static_cast< uint32_t >(key + nkeys - 1)); - } - - void range_remove(uint32_t start_key, uint32_t end_key) { - auto range = BtreeKeyRange< K >{K{start_key}, true, K{end_key}, true}; - auto out_vector = query(start_key, end_key); - auto rreq = BtreeRangeRemoveRequest< K >{std::move(range)}; - rreq.enable_route_tracing(); - bool removed = (m_bt->remove(rreq) == btree_status_t::success); - bool expected_removed = m_shadow_map.range_remove(start_key, end_key - start_key + 1, out_vector); - ASSERT_EQ(removed, expected_removed) << "not a successful remove op for range " << range.to_string(); - } - - void random_query(uint32_t start_key, uint32_t end_key) { - auto range = BtreeKeyRange< K >{K{start_key}, true, K{end_key}, true}; - auto out_map = query(start_key, end_key); - bool expected = m_shadow_map.range_get(start_key, end_key - start_key + 1, out_map); - ASSERT_TRUE(expected) << "not a successful query op for range " << range.to_string(); - } - std::unordered_map< uint32_t, std::string > query(uint32_t start_k, uint32_t end_k) const { - std::unordered_map< uint32_t, std::string > result; - for (auto cur = start_k; cur <= end_k; cur++) { - auto key = std::make_unique< K >(cur); - auto value = std::make_unique< V >(); - auto req = BtreeSingleGetRequest{key.get(), value.get()}; - const auto status = m_bt->get(req); - if (status == btree_status_t::success) { result[cur] = ((const V&)req.value()).to_string(); } - } - return result; - } - - void random_get() { - static thread_local std::uniform_int_distribution< uint32_t > s_rand_range_generator{1, 100}; - std::random_device g_re{}; - uint32_t nkeys = s_rand_range_generator(g_re); - int key = -1; - key = m_range_scheduler.pick_random_non_working_keys(nkeys, m_max_range_input); - if (key == -1) { return; } - random_query(key, key + nkeys - 1); - m_range_scheduler.remove_keys_from_working(static_cast< uint32_t >(key), - static_cast< uint32_t >(key + nkeys - 1)); - } - - void random_put() { - int key = m_range_scheduler.pick_random_non_existing_keys(1, m_max_range_input); - if (key == -1) { return; } - auto value = V::generate_rand(); - put(key, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS, value); - m_range_scheduler.put_key(static_cast< uint32_t >(key)); - } - - void put(uint32_t key, btree_put_type put_type, V value) { - auto existing_v = std::make_unique< V >(); - auto v = std::make_unique< V >(value); - auto k = std::make_unique< K >(key); - auto sreq{BtreeSinglePutRequest{k.get(), v.get(), put_type, existing_v.get()}}; - sreq.enable_route_tracing(); - bool done = (m_bt->put(sreq) == btree_status_t::success); - auto expected_done = m_shadow_map.put(key, value.to_string()); - ASSERT_EQ(done, expected_done) << "Expected put of key " << key << " of put_type " << enum_name(put_type) - << " to be " << expected_done; + BtreeTestHelper< TestType >::SetUp(); + this->m_bt = std::make_shared< typename T::BtreeType >(this->m_cfg); + this->m_bt->init(nullptr); } - void remove(uint32_t key) { - auto existing_v = std::make_unique< V >(); - auto k = std::make_unique< K >(key); - auto rreq = BtreeSingleRemoveRequest{k.get(), existing_v.get()}; - rreq.enable_route_tracing(); - - bool removed = (m_bt->remove(rreq) == btree_status_t::success); - auto expected_done = m_shadow_map.remove(key, ((const V&)rreq.value()).to_string()); - ASSERT_EQ(removed, expected_done) << "Expected remove of key " << key << " to be " << expected_done; + void TearDown() override { + BtreeTestHelper< TestType >::TearDown(); + iomanager.stop(); } - - void random_remove() { - int key = m_range_scheduler.pick_random_existing_keys(1, m_max_range_input); - if (key == -1) { return; } - remove(key); - m_range_scheduler.remove_key(static_cast< uint32_t >(key)); - } - - void random_range_put() { random_range_put_update(false); } - - void random_range_update() { random_range_put_update(true); } - - void random_range_put_update(bool replace = false) { - static thread_local std::uniform_int_distribution< uint32_t > s_rand_range_generator{2, 5}; - std::random_device g_re{}; - uint32_t nkeys = s_rand_range_generator(g_re); - int key = -1; - - if (replace) { - key = m_range_scheduler.pick_random_existing_keys(nkeys, m_max_range_input); - } else { - key = m_range_scheduler.pick_random_non_existing_keys(nkeys, m_max_range_input); - } - - if (key == -1) { return; } - auto value = V::generate_rand(); - range_put(key, key + nkeys - 1, value, replace); - if (replace) { - m_range_scheduler.remove_keys_from_working(static_cast< uint32_t >(key), - static_cast< uint32_t >(key + nkeys - 1)); - } else { - m_range_scheduler.put_keys(static_cast< uint32_t >(key), static_cast< uint32_t >(key + nkeys - 1)); - } - } - - void range_put(uint32_t start_key, uint32_t end_key, V value, bool update) { - auto val = std::make_unique< V >(value); - auto preq = BtreeRangePutRequest< K >{ - BtreeKeyRange< K >{start_key, true, end_key, true}, - update ? btree_put_type::REPLACE_ONLY_IF_EXISTS : btree_put_type::INSERT_ONLY_IF_NOT_EXISTS, val.get()}; - preq.enable_route_tracing(); - bool done = (m_bt->put(preq) == btree_status_t::success); - auto expected_done = m_shadow_map.range_put(start_key, end_key - start_key + 1, value.to_string(), update); - ASSERT_EQ(done, expected_done); - } - - void runInParallel(const std::vector< std::pair< std::string, int > >& op_list) { - auto test_count = m_fibers.size(); - for (auto it = m_fibers.begin(); it < m_fibers.end(); ++it) { - iomanager.run_on_forget(*it, [this, &test_count, op_list]() { - std::random_device g_rd{}; - std::default_random_engine re{g_rd()}; - const auto num_iters_per_thread = - sisl::round_up(SISL_OPTIONS["num_iters"].as< uint32_t >() / m_fibers.size(), m_fibers.size()); - std::vector< uint32_t > weights; - std::transform(op_list.begin(), op_list.end(), std::back_inserter(weights), - [](const auto& pair) { return pair.second; }); - - // Construct a weighted distribution based on the input frequencies - std::discrete_distribution< uint32_t > s_rand_op_generator(weights.begin(), weights.end()); - - for (uint32_t i = 0; i < num_iters_per_thread; i++) { - uint32_t op_idx = s_rand_op_generator(re); - (this->*m_operations[op_list[op_idx].first])(); - } - { - std::unique_lock lg(m_test_done_mtx); - if (--test_count == 0) { m_test_done_cv.notify_one(); } - } - }); - } - - { - std::unique_lock< std::mutex > lk(m_test_done_mtx); - m_test_done_cv.wait(lk, [&]() { return test_count == 0; }); - } - LOGINFO("ALL parallel jobs joined"); - } - void preload(uint32_t preload_size) { - const auto chunk_size = preload_size / m_fibers.size(); - const auto last_chunk_size = preload_size % chunk_size ?: chunk_size; - auto test_count = m_fibers.size(); - - for (std::size_t i = 0; i < m_fibers.size(); ++i) { - const auto start_range = i * chunk_size; - const auto end_range = start_range + ((i == m_fibers.size() - 1) ? last_chunk_size : chunk_size); - iomanager.run_on_forget(m_fibers[i], [this, start_range, end_range, &test_count]() { - for (uint32_t i = start_range; i < end_range; i++) { - auto value = V::generate_rand(); - put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS, value); - m_range_scheduler.put_key(i); - } - { - std::unique_lock lg(m_test_done_mtx); - if (--test_count == 0) { m_test_done_cv.notify_one(); } - } - }); - } - - { - std::unique_lock< std::mutex > lk(m_test_done_mtx); - m_test_done_cv.wait(lk, [&]() { return test_count == 0; }); - } - LOGINFO("Preload Done"); - } - -private: - std::unique_ptr< typename T::BtreeType > m_bt; - struct ShadowMap { - public: - bool put(uint32_t key, std::string value, bool update = false) { - std::unique_lock< mutex > lk(map_lock); - auto it = data.find(key); - if ((it == data.end() && update) || (it != data.end() && !update)) { return false; } - data[key] = value; - return true; - } - - bool range_put(uint32_t key, uint32_t nkeys, std::string value, bool update = false) { - std::unique_lock< mutex > lk(map_lock); - if (update) { - if (!all_existed(key, nkeys)) { return false; } - } else { - if (!none_of_them_existed(key, nkeys)) { return false; } - } - for (auto cur = key; cur < key + nkeys; cur++) { - data[cur] = value; - } - return true; - } - - bool remove(uint32_t key) { - std::unique_lock< mutex > lk(map_lock); - if (none_of_them_existed(key, 1)) { return false; } - auto it = data.find(key); - if (it == data.end()) { return false; } - data.erase(it); - return true; - } - - bool remove(uint32_t key, std::string value) { - std::unique_lock< mutex > lk(map_lock); - if (none_of_them_existed(key, 1)) { return false; } - auto it = data.find(key); - if (it == data.end()) { return false; } - if (it->second != value) { return false; } - data.erase(it); - return true; - } - - bool range_remove(uint32_t key, uint32_t nkeys) { - std::unique_lock< mutex > lk(map_lock); - if (none_of_them_existed(key, nkeys)) { return false; } - auto first_it = data.find(key); - auto last_it = data.upper_bound(key + nkeys - 1); - data.erase(first_it, last_it); - return true; - } - - bool range_get(uint32_t key, uint32_t nkeys, std::unordered_map< uint32_t, std::string > val_map) { - std::unique_lock< mutex > lk(map_lock); - if (none_of_them_existed(key, nkeys) && val_map.size()) { return false; } - if (none_of_them_existed(key, nkeys) && val_map.size() == 0) { return true; } - uint32_t count = 0; - for (auto cur = key; cur < key + nkeys; cur++) { - if (data.find(cur) != data.end()) { - if (val_map[cur] != data[cur]) { return false; } - if (val_map[cur] == data[cur]) { count++; } - } - } - if (count != val_map.size()) { return false; } - return true; - } - bool range_remove(uint32_t key, uint32_t nkeys, std::unordered_map< uint32_t, std::string > val_map) { - std::unique_lock< mutex > lk(map_lock); - if (none_of_them_existed(key, nkeys)) { return false; } - for (auto cur = key; cur < key + nkeys; cur++) - if (data.find(cur) != data.end() && val_map[cur] != data[cur]) { return false; } - auto first_it = data.find(key); - auto last_it = data.upper_bound(key + nkeys - 1); - data.erase(first_it, last_it); - return true; - } - std::string to_string(uint32_t key, uint32_t nkeys) { - std::unique_lock< mutex > lk(map_lock); - std::string x = ""; - for (auto cur = key; cur < key + nkeys; cur++) { - if (data.find(cur) != data.end()) x += fmt::format("[{}]={}\n", cur, data[cur]); - } - return x; - } - - private: - bool none_of_them_existed(uint32_t key, uint32_t nkeys) { - for (auto cur = key; cur < key + nkeys; cur++) - if (data.find(cur) != data.end()) return false; - return true; - } - - bool all_existed(uint32_t key, uint32_t nkeys) { - for (auto cur = key; cur < key + nkeys; cur++) - if (data.find(cur) == data.end()) return false; - return true; - } - - std::map< uint32_t, std::string > data; - mutex map_lock; - }; - ShadowMap m_shadow_map; - RangeScheduler m_range_scheduler; - uint32_t m_max_range_input{1000}; - BtreeConfig m_cfg{g_node_size}; - std::map< std::string, op_func > m_operations = {{"put", &BtreeConcurrentTest::random_put}, - {"remove", &BtreeConcurrentTest::random_remove}, - {"range_update", &BtreeConcurrentTest::random_range_update}, - {"range_remove", &BtreeConcurrentTest::random_range_remove}, - {"query", &BtreeConcurrentTest::random_get}}; - std::vector< iomgr::io_fiber_t > m_fibers; - std::mutex m_test_done_mtx; - std::condition_variable m_test_done_cv; }; + TYPED_TEST_SUITE(BtreeConcurrentTest, BtreeTypes); -TYPED_TEST(BtreeConcurrentTest, AllTree) { +TYPED_TEST(BtreeConcurrentTest, ConcurrentAllOps) { // range put is not supported for non-extent keys - std::vector< std::string > input_ops = {"put:20", "remove:20", "range_update:20", "range_remove:20", "query:20"}; + std::vector< std::string > input_ops = {"put:20", "remove:20", "range_put:20", "range_remove:20", "query:20"}; std::vector< std::pair< std::string, int > > ops; if (SISL_OPTIONS.count("operation_list")) { @@ -897,7 +343,7 @@ TYPED_TEST(BtreeConcurrentTest, AllTree) { return std::make_pair(std::string(), 0); }); - this->execute(ops); + this->multi_op_execute(ops); } int main(int argc, char* argv[]) {