Skip to content

Commit

Permalink
Fix split, merge, varlen remove, (parent size estimation), simple rem…
Browse files Browse the repository at this point in the history
…ove (#181)
  • Loading branch information
shosseinimotlagh authored Oct 23, 2023
1 parent 4fb8fed commit 5ee291c
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 44 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "4.5.6"
version = "4.5.7"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
2 changes: 1 addition & 1 deletion src/include/homestore/btree/btree.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ retry:
m_btree_lock.unlock_shared();

ret = check_collapse_root(req);
if (ret != btree_status_t::success) {
if (ret != btree_status_t::success && ret != btree_status_t::merge_not_required) {
LOGERROR("check collapse read failed btree name {}", m_bt_cfg.name());
goto out;
}
Expand Down
57 changes: 29 additions & 28 deletions src/include/homestore/btree/detail/btree_remove_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ bool Btree< K, V >::remove_extents_in_leaf(const BtreeNodePtr& node, BtreeRangeR
template < typename K, typename V >
template < typename ReqT >
btree_status_t Btree< K, V >::check_collapse_root(ReqT& req) {
if (!m_bt_cfg.m_merge_turned_on) { return btree_status_t::merge_not_required; }
BtreeNodePtr child;
BtreeNodePtr root;
btree_status_t ret = btree_status_t::success;
Expand Down Expand Up @@ -355,7 +356,7 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
for (auto indx = start_idx + 1; indx <= end_idx; ++indx) {
if (indx == parent_node->total_entries()) {
BT_NODE_LOG_ASSERT(parent_node->has_valid_edge(), parent_node,
"Assertion failure, expected valid edge for parent_node: {}");
"Assertion failure, expected valid edge for parent_node");
}

BtreeLinkInfo child_info;
Expand Down Expand Up @@ -463,28 +464,25 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const

if (!K::is_fixed_size()) {
// Lets see if we have enough room in parent node to accommodate changes. This is needed only if the key is not
// fixed length. For fixed length node merge will always result in lesser or equal size
int64_t post_merge_size{0};
auto& old_node = old_nodes[leftmost_src.ith_nodes.back()];
if (old_node->total_entries()) {
post_merge_size += old_node->get_nth_obj_size(
std::min(leftmost_src.last_node_upto, old_node->total_entries() - 1)); // New leftmost entry
}
post_merge_size -= parent_node->get_nth_obj_size(start_idx); // Previous left entry

for (auto& node : new_nodes) {
if (node->total_entries()) { post_merge_size += node->get_nth_obj_size(node->total_entries() - 1); }
}

for (auto& node : old_nodes) {
if (node->total_entries()) { post_merge_size -= node->get_nth_obj_size(node->total_entries() - 1); }
}

if (post_merge_size > parent_node->available_size(m_bt_cfg)) {
// fixed length. For fixed length node merge will always result in less or equal size

// we first calculate the least amount of space being released after removing excess children. the key size
// cannot be taken account; so we know for sure that value (i.e., linkinfo) and also its record will be freed.
// If the end_idx is the parent's edge, the space is not released eventually.
auto excess_releasing_nodes =
old_nodes.size() - new_nodes.size() - parent_node->total_entries() == end_idx ? 1 : 0;
auto minimum_releasing_excess_size =
excess_releasing_nodes * (BtreeLinkInfo::get_fixed_size() + parent_node->get_record_size());

// aside from releasing size due to excess node, K::get_estimate_max_size is needed for each updating element
// at worst case (linkinfo and record remain the same for old and new nodes). The number of updating elements
// are the size of the new nodes (the last key of the last new node is not getting updated; hence excluded) plus
// the leftmost node.
if (parent_node->available_size(m_bt_cfg) + minimum_releasing_excess_size <
(1 + new_nodes.size() ? new_nodes.size() - 1 : 0) * K::get_estimate_max_size()) {
BT_NODE_LOG(DEBUG, parent_node,
"Merge is needed, however after merge it will add {} bytes which is more than "
"available_size={}, so not proceeding with merge",
post_merge_size, parent_node->available_size(m_bt_cfg));
"Merge is needed, however after merge, the parent MAY not have enough space to accommodate the "
"new keys, so not proceeding with merge");
ret = btree_status_t::merge_not_required;
goto out;
}
Expand Down Expand Up @@ -613,15 +611,18 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
out:
// Do free/unlock based on success/failure in reverse order
if (ret == btree_status_t::success) {
for (auto& node : old_nodes) {
free_node(node, locktype_t::WRITE, context);
for (auto it = old_nodes.rbegin(); it != old_nodes.rend(); ++it) {
BT_NODE_LOG(DEBUG, (*it).get(), "Freeing this node as part of successful merge");
free_node(*it, locktype_t::WRITE, context);
}
} else {
for (auto& node : old_nodes) {
unlock_node(node, locktype_t::WRITE);
for (auto it = old_nodes.rbegin(); it != old_nodes.rend(); ++it) {
BT_NODE_LOG(DEBUG, (*it).get(), "Unlocking this node as part of unsuccessful merge");
unlock_node(*it, locktype_t::WRITE);
}
for (auto& node : new_nodes) {
free_node(node, locktype_t::NONE, context);
for (auto it = new_nodes.rbegin(); it != new_nodes.rend(); ++it) {
BT_NODE_LOG(DEBUG, (*it).get(), "Freeing this new node as part of unsuccessful merge");
free_node(*it, locktype_t::NONE, context);
}
}
return ret;
Expand Down
3 changes: 1 addition & 2 deletions src/include/homestore/btree/detail/simple_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ class SimpleNode : public BtreeNode {
}

uint32_t num_entries_by_size(uint32_t start_idx, uint32_t size) const override {
uint32_t possible_entries = (size == 0) ? 0 : (size - 1) / get_nth_obj_size(0) + 1;
return std::min(possible_entries, this->total_entries() - start_idx);
return std::min(size / get_nth_obj_size(0), this->total_entries() - start_idx);
}

uint32_t copy_by_size(const BtreeConfig& cfg, const BtreeNode& o, uint32_t start_idx, uint32_t size) override {
Expand Down
13 changes: 7 additions & 6 deletions src/include/homestore/btree/detail/varlen_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class VariableNode : public BtreeNode {
get_nth_value(ind_s - 1, &last_1_val, false);
this->set_edge_value(last_1_val);

for (uint32_t i = ind_s; i < total_entries; i++) {
for (uint32_t i = ind_s - 1; i < total_entries; i++) {
get_var_node_header()->m_available_space += get_nth_key_len(i) + get_nth_value_len(i) + recSize;
}
this->sub_entries(total_entries - ind_s + 1);
Expand Down Expand Up @@ -270,15 +270,15 @@ class VariableNode : public BtreeNode {
vb.bytes = kb.bytes + kb.size;
vb.size = get_nth_value_len(ind);

auto sz = other.insert(0, kb, vb); // Keep on inserting on the first index, thus moving everything to right
if (!sz) break;

--ind;
++nmoved;
if ((kb.size + vb.size + this->get_record_size()) > size_to_move) {
// We reached threshold of how much we could move
break;
}

auto sz = other.insert(0, kb, vb); // Keep on inserting on the first index, thus moving everything to right

--ind;
++nmoved;
size_to_move -= sz;
}
remove(ind + 1, this->total_entries() - 1);
Expand Down Expand Up @@ -329,6 +329,7 @@ class VariableNode : public BtreeNode {
if (sz == 0) { break; }
++n;
++idx;
copy_size -= sz;
}
this->set_gen(this_gen + 1);

Expand Down
8 changes: 4 additions & 4 deletions src/tests/btree_test_kvs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include <array>
#include <homestore/btree/btree_kv.hpp>

static constexpr uint32_t g_max_keysize{120};
static constexpr uint32_t g_max_valsize{120};
static constexpr uint32_t g_max_keysize{100}; // for node size = 512 : free space : 442 => 100+100+6(record size) = 46%
static constexpr uint32_t g_max_valsize{100};
static std::random_device g_rd{};
static std::default_random_engine g_re{g_rd()};
static std::normal_distribution<> g_randkeysize_generator{32, 24};
Expand All @@ -49,10 +49,10 @@ static std::string gen_random_string(size_t len, uint32_t preamble = std::numeri
static thread_local std::random_device rd{};
static thread_local std::default_random_engine re{rd()};
std::uniform_int_distribution< size_t > rand_char{0, alphanum.size() - 1};
for (size_t i{0}; i < len; ++i) {
if (len < str.size()) { len = str.size(); }
for (size_t i{0}; i < len - str.size(); ++i) {
str += alphanum[rand_char(re)];
}
str += '\0';
return str;
}

Expand Down
2 changes: 0 additions & 2 deletions src/tests/test_mem_btree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,7 @@ class BtreeConcurrentTest : public testing::Test {
});

preload(SISL_OPTIONS["preload_size"].as< uint32_t >());
print_keys();
runInParallel(op_list);
print_keys();
}

private:
Expand Down

0 comments on commit 5ee291c

Please sign in to comment.