Skip to content

Commit

Permalink
Fix split, merge, varlen remove, (parent size estimation), simple remove
Browse files Browse the repository at this point in the history
  • Loading branch information
shosseinimotlagh committed Oct 5, 2023
1 parent bf8e6fd commit 920845b
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 57 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "4.5.3"
version = "4.5.4"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
2 changes: 1 addition & 1 deletion src/include/homestore/btree/btree.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ retry:
m_btree_lock.unlock_shared();

ret = check_collapse_root(req);
if (ret != btree_status_t::success) {
if (ret != btree_status_t::success && ret != btree_status_t::merge_not_required) {
LOGERROR("check collapse read failed btree name {}", m_bt_cfg.name());
goto out;
}
Expand Down
3 changes: 2 additions & 1 deletion src/include/homestore/btree/detail/btree_mutate_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ btree_status_t Btree< K, V >::split_node(const BtreeNodePtr& parent_node, const
}
BT_NODE_LOG(TRACE, parent_node, "Available space for split entry={}", parent_node->available_size(m_bt_cfg));

// child_node1->inc_link_version();
// child_node1->inc_link_version();

// Update the existing parent node entry to point to second child ptr.
parent_node->update(parent_ind, child_node2->link_info());
Expand Down Expand Up @@ -552,6 +552,7 @@ btree_status_t Btree< K, V >::split_node(const BtreeNodePtr& parent_node, const
template < typename K, typename V >
template < typename ReqT >
bool Btree< K, V >::is_split_needed(const BtreeNodePtr& node, const BtreeConfig& cfg, ReqT& req) const {
if (node->total_entries() <= 1) { return false; }
if (bt_thread_vars()->force_split_node && (bt_thread_vars()->force_split_node == node)) {
bt_thread_vars()->force_split_node = nullptr;
return true;
Expand Down
87 changes: 72 additions & 15 deletions src/include/homestore/btree/detail/btree_remove_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ bool Btree< K, V >::remove_extents_in_leaf(const BtreeNodePtr& node, BtreeRangeR
template < typename K, typename V >
template < typename ReqT >
btree_status_t Btree< K, V >::check_collapse_root(ReqT& req) {
if (!m_bt_cfg.m_merge_turned_on) { return btree_status_t::merge_not_required; }
BtreeNodePtr child;
BtreeNodePtr root;
btree_status_t ret = btree_status_t::success;
Expand Down Expand Up @@ -355,7 +356,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
for (auto indx = start_idx + 1; indx <= end_idx; ++indx) {
if (indx == parent_node->total_entries()) {
BT_NODE_LOG_ASSERT(parent_node->has_valid_edge(), parent_node,
"Assertion failure, expected valid edge for parent_node: {}");
"Assertion failure, expected valid edge for parent_node");
}

BtreeLinkInfo child_info;
Expand Down Expand Up @@ -465,26 +466,79 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
// Lets see if we have enough room in parent node to accommodate changes. This is needed only if the key is not
// fixed length. For fixed length node merge will always result in lesser or equal size
int64_t post_merge_size{0};
auto& old_node = old_nodes[leftmost_src.ith_nodes.back()];
if (old_node->total_entries()) {
post_merge_size += old_node->get_nth_obj_size(
std::min(leftmost_src.last_node_upto, old_node->total_entries() - 1)); // New leftmost entry
auto excess = old_nodes.size() - new_nodes.size();
int64_t p_available_estimation = static_cast< int64_t >(parent_node->available_size(m_bt_cfg));
// step 1 : return back the size of excess nodes
if (excess) {
for (uint32_t s = start_idx + 1; s <= start_idx + excess; s++) {
// the last one is edge, so space will not get released.
if (start_idx + excess != parent_node->total_entries()) {
p_available_estimation +=
(static_cast< int64_t >(parent_node->get_nth_obj_size(s)) + parent_node->get_record_size());
}
}
}
post_merge_size -= parent_node->get_nth_obj_size(start_idx); // Previous left entry

for (auto& node : new_nodes) {
if (node->total_entries()) { post_merge_size += node->get_nth_obj_size(node->total_entries() - 1); }
if (p_available_estimation <= 0) {
BT_NODE_LOG(DEBUG, parent_node,
"Merge is needed, however after merge, freeing excess nodes keys would cause lack of "
"available_size={} bytes, so not proceeding with merge [this means there is a bug in "
"calculation of free space]",
p_available_estimation);
ret = btree_status_t::merge_not_required;
goto out;
}

for (auto& node : old_nodes) {
if (node->total_entries()) { post_merge_size -= node->get_nth_obj_size(node->total_entries() - 1); }
// step 2 : check if at any time (in the same actual order) there exists enough space to accommodate the new
// keys in the parent node.
for (int32_t s = new_nodes.size() - 1; s >= 0; s--) {
if (new_nodes[s]->total_entries()) {
int64_t old_parent_key_size = 0;
int64_t new_parent_key_size = 0;
if (parent_node->total_entries() != s + start_idx + excess + 1) {
old_parent_key_size =
parent_node->get_nth_key< K >(s + start_idx + excess + 1, false).serialized_size();
new_parent_key_size = new_nodes[s]->get_last_key< K >().serialized_size();
p_available_estimation += static_cast< int64_t >(old_parent_key_size) - new_parent_key_size;
}

if (p_available_estimation <= 0) {
BT_NODE_LOG(DEBUG, parent_node,
"Merge is needed, however after merge, the new key for the new_node {} update would "
"cause a transient lack of "
"available_size={} bytes [{} released vs {} needed ], so not proceeding with merge",
new_nodes[s]->node_id(), p_available_estimation, old_parent_key_size,
new_parent_key_size);
ret = btree_status_t::merge_not_required;
goto out;
}
}
}

if (post_merge_size > parent_node->available_size(m_bt_cfg)) {
// step 3 : check if there exists enough space to accommodate the updated last key of leftmost node in its
// parent.
int64_t old_parent_key_size = 0;
int64_t new_parent_key_size = 0;
if (start_idx != parent_node->total_entries()) {
BtreeNodePtr old_node = old_nodes[leftmost_src.ith_nodes.back()];
if (leftmost_src.last_node_upto == 0 && leftmost_src.ith_nodes.size() > 1) {
old_node.reset(old_nodes[leftmost_src.ith_nodes.back() - 1].get());
}
if (old_node->total_entries()) {
uint32_t old_node_idx =
std::min(leftmost_src.last_node_upto == 0 ? old_node->total_entries() : leftmost_src.last_node_upto,
old_node->total_entries()) -
1;
new_parent_key_size =
old_node->template get_nth_key< K >(old_node_idx, false).serialized_size(); // New leftmost entry
}
old_parent_key_size = parent_node->get_nth_key< K >(start_idx, false).serialized_size();
}
p_available_estimation += static_cast< int64_t >(old_parent_key_size) - new_parent_key_size;
if (p_available_estimation <= 0) {
BT_NODE_LOG(DEBUG, parent_node,
"Merge is needed, however after merge it will add {} bytes which is more than "
"available_size={}, so not proceeding with merge",
post_merge_size, parent_node->available_size(m_bt_cfg));
"Merge is needed, however after merge, the new leftmost key update would cause lack of "
"available_size={} bytes [{} released vs {} needed ], so not proceeding with merge",
p_available_estimation, old_parent_key_size, new_parent_key_size);
ret = btree_status_t::merge_not_required;
goto out;
}
Expand Down Expand Up @@ -591,13 +645,16 @@ out:
// Do free/unlock based on success/failure in reverse order
if (ret == btree_status_t::success) {
for (auto& node : old_nodes) {
BT_NODE_LOG(DEBUG, node, "Freeing this node as part of successful merge");
free_node(node, locktype_t::WRITE, context);
}
} else {
for (auto& node : old_nodes) {
BT_NODE_LOG(DEBUG, node, "Unlocking this node as part of unsuccessful merge");
unlock_node(node, locktype_t::WRITE);
}
for (auto& node : new_nodes) {
BT_NODE_LOG(DEBUG, node, "Freeing this new node as part of unsuccessful merge");
free_node(node, locktype_t::NONE, context);
}
}
Expand Down
3 changes: 1 addition & 2 deletions src/include/homestore/btree/detail/simple_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ class SimpleNode : public BtreeNode {
}

uint32_t num_entries_by_size(uint32_t start_idx, uint32_t size) const override {
uint32_t possible_entries = (size == 0) ? 0 : (size - 1) / get_nth_obj_size(0) + 1;
return std::min(possible_entries, this->total_entries() - start_idx);
return std::min(size / get_nth_obj_size(0), this->total_entries() - start_idx);
}

uint32_t copy_by_size(const BtreeConfig& cfg, const BtreeNode& o, uint32_t start_idx, uint32_t size) override {
Expand Down
13 changes: 7 additions & 6 deletions src/include/homestore/btree/detail/varlen_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class VariableNode : public BtreeNode {
get_nth_value(ind_s - 1, &last_1_val, false);
this->set_edge_value(last_1_val);

for (uint32_t i = ind_s; i < total_entries; i++) {
for (uint32_t i = ind_s - 1; i < total_entries; i++) {
get_var_node_header()->m_available_space += get_nth_key_len(i) + get_nth_value_len(i) + recSize;
}
this->sub_entries(total_entries - ind_s + 1);
Expand Down Expand Up @@ -270,15 +270,15 @@ class VariableNode : public BtreeNode {
vb.bytes = kb.bytes + kb.size;
vb.size = get_nth_value_len(ind);

auto sz = other.insert(0, kb, vb); // Keep on inserting on the first index, thus moving everything to right
if (!sz) break;

--ind;
++nmoved;
if ((kb.size + vb.size + this->get_record_size()) > size_to_move) {
// We reached threshold of how much we could move
break;
}

auto sz = other.insert(0, kb, vb); // Keep on inserting on the first index, thus moving everything to right

--ind;
++nmoved;
size_to_move -= sz;
}
remove(ind + 1, this->total_entries() - 1);
Expand Down Expand Up @@ -329,6 +329,7 @@ class VariableNode : public BtreeNode {
if (sz == 0) { break; }
++n;
++idx;
copy_size -= sz;
}
this->set_gen(this_gen + 1);

Expand Down
7 changes: 4 additions & 3 deletions src/tests/btree_test_kvs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include <array>
#include <homestore/btree/btree_kv.hpp>

static constexpr uint32_t g_max_keysize{120};
static constexpr uint32_t g_max_valsize{120};
static constexpr uint32_t g_max_keysize{100}; // for node size = 512 : free space : 442 => 100+100+6(record size) = 46%
static constexpr uint32_t g_max_valsize{100};
static std::random_device g_rd{};
static std::default_random_engine g_re{g_rd()};
static std::normal_distribution<> g_randkeysize_generator{32, 24};
Expand All @@ -49,7 +49,8 @@ static std::string gen_random_string(size_t len, uint32_t preamble = std::numeri
static thread_local std::random_device rd{};
static thread_local std::default_random_engine re{rd()};
std::uniform_int_distribution< size_t > rand_char{0, alphanum.size() - 1};
for (size_t i{0}; i < len; ++i) {
if (len < str.size()) { len = str.size(); }
for (size_t i{0}; i < len - str.size(); ++i) {
str += alphanum[rand_char(re)];
}
str += '\0';
Expand Down
28 changes: 0 additions & 28 deletions src/tests/test_mem_btree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,32 +439,6 @@ TYPED_TEST(BtreeTest, RangeUpdate) {
this->query_validate(0, num_entries - 1, 75);
}

TYPED_TEST(BtreeTest, SimpleRemoveRange) {
// Forward sequential insert
const auto num_entries = 20;
LOGINFO("Step 1: Do forward sequential insert for {} entries", num_entries);
for (uint32_t i{0}; i < num_entries; ++i) {
this->put(i, btree_put_type::INSERT_ONLY_IF_NOT_EXISTS);
}
LOGINFO("Step 2: Do range remove for {} entries", num_entries);
// this->print_keys(); // EXPECT size = 20 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
this->range_remove(5, 10);
// this->print_keys(); // EXPECT size = 14 : 0 1 2 3 4 [5 6 7 8 9 10] 11 12 13 14 15 16 17 18 19
this->range_remove(0, 2);
// this->print_keys(); // EXPECT size = 11 : [0 1 2] 3 4 11 12 13 14 15 16 17 18 19
this->range_remove(18, 19);
// this->print_keys(); // EXPECT size = 9 : 3 4 11 12 13 14 15 16 17 [18 19]
this->range_remove(17, 17);
// this->print_keys(); // EXPECT size = 8 : 3 4 11 12 13 14 15 16 [17]
this->range_remove(1, 5);
// this->print_keys(); // EXPECT size = 6 : [3 4] 11 12 13 14 15 16
this->range_remove(1, 20);
// this->print_keys(); // EXPECT size = 0 : [11 12 13 14 15 16]

this->query_all_validate();
// this->query_validate(0, num_entries , 75);
}

TYPED_TEST(BtreeTest, RandomRemove) {
// Forward sequential insert
const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >();
Expand Down Expand Up @@ -556,9 +530,7 @@ class BtreeConcurrentTest : public testing::Test {
});

preload(SISL_OPTIONS["preload_size"].as< uint32_t >());
print_keys();
runInParallel(op_list);
print_keys();
}

private:
Expand Down

0 comments on commit 920845b

Please sign in to comment.