Skip to content

Commit

Permalink
Merge and resolved conflicts, fixed review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
hkadayam committed Nov 9, 2023
1 parent fa5f885 commit a5b1e8f
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 104 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "4.7.1"
version = "4.8.1"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
2 changes: 2 additions & 0 deletions src/include/homestore/btree/btree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ class Btree {
return fiber_map[this_id].get();
}

static bool is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info);

protected:
BtreeConfig m_bt_cfg;

Expand Down
6 changes: 6 additions & 0 deletions src/include/homestore/btree/btree.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -346,11 +346,17 @@ template < typename K, typename V >
bnodeid_t Btree< K, V >::root_node_id() const {
return m_root_node_info.bnode_id();
}

template < typename K, typename V >
uint64_t Btree< K, V >::root_link_version() const {
return m_root_node_info.link_version();
}

template < typename K, typename V >
bool Btree< K, V >::is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info) {
return child_info.link_version() != child_node->link_version();
}

// TODO: Commenting out flip till we figure out how to move flip dependency inside sisl package.
#if 0
#ifdef _PRERELEASE
Expand Down
4 changes: 0 additions & 4 deletions src/include/homestore/btree/detail/btree_mutate_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@

namespace homestore {

static bool is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info) {
return child_info.link_version() != child_node->link_version();
}

/* This function does the heavy lifiting of co-ordinating inserts. It is a recursive function which walks
* down the tree.
*
Expand Down
22 changes: 16 additions & 6 deletions src/include/homestore/btree/detail/btree_remove_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ btree_status_t Btree< K, V >::do_remove(const BtreeNodePtr& my_node, locktype_t
return modified ? btree_status_t::success : btree_status_t::not_found;
}

// bool go_to_out = false;
retry:
locktype_t child_cur_lock = locktype_t::NONE;
uint32_t curr_idx;
Expand All @@ -80,7 +79,6 @@ retry:
}
end_idx = start_idx = (end_idx - start_idx) / 2; // Pick the middle, TODO: Ideally we need to pick random
}
// if (go_to_out) { goto out_return; }

if (req.route_tracing) { append_route_trace(req, my_node, btree_event_t::READ, start_idx, end_idx); }
curr_idx = start_idx;
Expand Down Expand Up @@ -366,27 +364,39 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
if (!K::is_fixed_size()) {
// Lets see if we have enough room in parent node to accommodate changes. This is needed only if the key is not
// fixed length. For fixed length node merge will always result in less or equal size
auto excess_releasing_nodes =
old_nodes.size() - new_nodes.size() - (parent_node->total_entries() == end_idx) ? 1 : 0;
if (!parent_node->has_room_for_put(btree_put_type::INSERT, excess_releasing_nodes * K::get_max_size(),
excess_releasing_nodes * BtreeLinkInfo::get_fixed_size())) {
BT_NODE_LOG(DEBUG, parent_node,
"Merge is needed, however after merge, the parent MAY not have enough space to accommodate the "
"new keys, so not proceeding with merge");
ret = btree_status_t::merge_not_required;
goto out;
}

#if 0
// we first calculate the least amount of space being released after removing excess children. the key size
// cannot be taken account; so we know for sure that value (i.e., linkinfo) and also its record will be freed.
// If the end_idx is the parent's edge, the space is not released eventually.
auto excess_releasing_nodes =
old_nodes.size() - new_nodes.size() - parent_node->total_entries() == end_idx ? 1 : 0;
old_nodes.size() - new_nodes.size() - (parent_node->total_entries() == end_idx) ? 1 : 0;
auto minimum_releasing_excess_size =
excess_releasing_nodes * (BtreeLinkInfo::get_fixed_size() + parent_node->get_record_size());

// aside from releasing size due to excess node, K::get_estimate_max_size is needed for each updating element
// aside from releasing size due to excess node, K::get_max_size is needed for each updating element
// at worst case (linkinfo and record remain the same for old and new nodes). The number of updating elements
// are the size of the new nodes (the last key of the last new node is not getting updated; hence excluded) plus
// the leftmost node.
if (parent_node->available_size(m_bt_cfg) + minimum_releasing_excess_size <
(1 + new_nodes.size() ? new_nodes.size() - 1 : 0) * K::get_estimate_max_size()) {
if (parent_node->available_size() + minimum_releasing_excess_size <
(1 + new_nodes.size() ? new_nodes.size() - 1 : 0) * K::get_max_size()) {
BT_NODE_LOG(DEBUG, parent_node,
"Merge is needed, however after merge, the parent MAY not have enough space to accommodate the "
"new keys, so not proceeding with merge");
ret = btree_status_t::merge_not_required;
goto out;
}
#endif
}

// Now it is time to commit things and at this point no going back, since in-place write nodes are modified
Expand Down
2 changes: 1 addition & 1 deletion src/include/homestore/btree/detail/varlen_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ class VariableNode : public VariantNode< K, V > {

bool has_room_for_put(btree_put_type put_type, uint32_t key_size, uint32_t value_size) const override {
auto needed_size = key_size + value_size;
if ((put_type == btree_put_type::UPSERT) + (put_type == btree_put_type::INSERT)) {
if ((put_type == btree_put_type::UPSERT) || (put_type == btree_put_type::INSERT)) {
needed_size += get_record_size();
}
return (available_size() >= needed_size);
Expand Down
116 changes: 57 additions & 59 deletions src/lib/blkalloc/bitmap_blk_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,75 +127,73 @@ void BitmapBlkAllocator::free_on_disk(BlkId const& bid) {
// this api should be called only on persistent blk allocator
DEBUG_ASSERT_EQ(is_persistent(), true, "free_on_disk called for non-persistent blk allocator");

auto unset_on_disk_bm =
[this](auto& b) {
BlkAllocPortion& portion = blknum_to_portion(b.blk_num());
{
auto lock{portion.portion_auto_lock()};
if (!hs()->is_initializing()) {
// During recovery we might try to free the entry which is already freed while replaying the
// journal, This assert is valid only post recovery.
if (!m_disk_bm->is_bits_set(b.blk_num(), b.blk_count())) {
BLKALLOC_LOG(ERROR, "bit not set {} nblks {} chunk number {}", b.blk_num(), b.blk_count(),
m_chunk_id);
for (blk_count_t i{0}; i < b.blk_count(); ++i) {
if (!m_disk_bm->is_bits_set(b.blk_num() + i, 1)) {
BLKALLOC_LOG(ERROR, "bit not set {}", b.blk_num() + i);
}
auto unset_on_disk_bm = [this](auto& b) {
BlkAllocPortion& portion = blknum_to_portion(b.blk_num());
{
auto lock{portion.portion_auto_lock()};
if (!hs()->is_initializing()) {
// During recovery we might try to free the entry which is already freed while replaying the
// journal, This assert is valid only post recovery.
if (!m_disk_bm->is_bits_set(b.blk_num(), b.blk_count())) {
BLKALLOC_LOG(ERROR, "bit not set {} nblks {} chunk number {}", b.blk_num(), b.blk_count(),
m_chunk_id);
for (blk_count_t i{0}; i < b.blk_count(); ++i) {
if (!m_disk_bm->is_bits_set(b.blk_num() + i, 1)) {
BLKALLOC_LOG(ERROR, "bit not set {}", b.blk_num() + i);
}
BLKALLOC_REL_ASSERT(m_disk_bm->is_bits_set(b.blk_num(), b.blk_count()),
"Expected disk bits to set blk num {} num blks {}", b.blk_num(),
b.blk_count());
}
m_disk_bm->reset_bits(b.blk_num(), b.blk_count());
}
};

if (bid.is_multi()) {
MultiBlkId const& mbid = r_cast< MultiBlkId const& >(bid);
auto it = mbid.iterate();
while (auto const b = it.next()) {
unset_on_disk_bm(*b);
BLKALLOC_REL_ASSERT(m_disk_bm->is_bits_set(b.blk_num(), b.blk_count()),
"Expected disk bits to set blk num {} num blks {}", b.blk_num(), b.blk_count());
}
} else {
unset_on_disk_bm(bid);
}
m_disk_bm->reset_bits(b.blk_num(), b.blk_count());
}
};

sisl::byte_array
BitmapBlkAllocator::acquire_underlying_buffer() {
// prepare and temporary alloc list, where blkalloc is accumulated till underlying buffer is released.
// RCU will wait for all I/Os that are still in critical section (allocating on disk bm) to complete and exit;
auto alloc_list_ptr = new sisl::ThreadVector< BlkId >();
auto old_alloc_list_ptr = rcu_xchg_pointer(&m_alloc_blkid_list, alloc_list_ptr);
synchronize_rcu();

BLKALLOC_REL_ASSERT(old_alloc_list_ptr == nullptr, "Multiple acquires concurrently?");
return (m_disk_bm->serialize(m_align_size));
}

void BitmapBlkAllocator::release_underlying_buffer() {
// set to nullptr, so that alloc will go to disk bm directly
// wait for all I/Os in critical section (still accumulating bids) to complete and exit;
auto old_alloc_list_ptr = rcu_xchg_pointer(&m_alloc_blkid_list, nullptr);
synchronize_rcu();

// at this point, no I/O will be pushing back to the list (old_alloc_list_ptr);
auto it = old_alloc_list_ptr->begin(true /* latest */);
const BlkId* bid{nullptr};
while ((bid = old_alloc_list_ptr->next(it)) != nullptr) {
alloc_on_disk(*bid);
if (bid.is_multi()) {
MultiBlkId const& mbid = r_cast< MultiBlkId const& >(bid);
auto it = mbid.iterate();
while (auto const b = it.next()) {
unset_on_disk_bm(*b);
}
old_alloc_list_ptr->clear();
delete (old_alloc_list_ptr);
} else {
unset_on_disk_bm(bid);
}
}

/* Get status */
nlohmann::json BitmapBlkAllocator::get_status(int) const { return nlohmann::json{}; }
sisl::byte_array BitmapBlkAllocator::acquire_underlying_buffer() {
// prepare and temporary alloc list, where blkalloc is accumulated till underlying buffer is released.
// RCU will wait for all I/Os that are still in critical section (allocating on disk bm) to complete and exit;
auto alloc_list_ptr = new sisl::ThreadVector< BlkId >();
auto old_alloc_list_ptr = rcu_xchg_pointer(&m_alloc_blkid_list, alloc_list_ptr);
synchronize_rcu();

BLKALLOC_REL_ASSERT(old_alloc_list_ptr == nullptr, "Multiple acquires concurrently?");
return (m_disk_bm->serialize(m_align_size));
}

sisl::ThreadVector< BlkId >* BitmapBlkAllocator::get_alloc_blk_list() {
auto p = rcu_dereference(m_alloc_blkid_list);
return p;
void BitmapBlkAllocator::release_underlying_buffer() {
// set to nullptr, so that alloc will go to disk bm directly
// wait for all I/Os in critical section (still accumulating bids) to complete and exit;
auto old_alloc_list_ptr = rcu_xchg_pointer(&m_alloc_blkid_list, nullptr);
synchronize_rcu();

// at this point, no I/O will be pushing back to the list (old_alloc_list_ptr);
auto it = old_alloc_list_ptr->begin(true /* latest */);
const BlkId* bid{nullptr};
while ((bid = old_alloc_list_ptr->next(it)) != nullptr) {
alloc_on_disk(*bid);
}
old_alloc_list_ptr->clear();
delete (old_alloc_list_ptr);
}

/* Get status */
nlohmann::json BitmapBlkAllocator::get_status(int) const { return nlohmann::json{}; }

sisl::ThreadVector< BlkId >* BitmapBlkAllocator::get_alloc_blk_list() {
auto p = rcu_dereference(m_alloc_blkid_list);
return p;
}

} // namespace homestore
34 changes: 1 addition & 33 deletions src/tests/test_mem_btree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,44 +289,12 @@ struct BtreeConcurrentTest : public BtreeTestHelper< TestType > {
BtreeConcurrentTest() { this->m_is_multi_threaded = true; }

void SetUp() override {
m_cfg.m_leaf_node_type = T::leaf_node_type;
m_cfg.m_int_node_type = T::interior_node_type;
m_max_range_input = SISL_OPTIONS["num_entries"].as< uint32_t >();
if (SISL_OPTIONS.count("disable_merge")) m_cfg.m_merge_turned_on = false;
m_fibers.clear();
m_bt = std::make_unique< typename T::BtreeType >(m_cfg);
m_bt->init(nullptr);
}

void TearDown() override { iomanager.stop(); }

void print() const { m_bt->print_tree(); }
void print_keys() const { m_bt->print_tree_keys(); }

void execute(const std::vector< std::pair< std::string, int > >& op_list) {

m_cfg.m_leaf_node_type = T::leaf_node_type;
m_cfg.m_int_node_type = T::interior_node_type;
m_max_range_input = SISL_OPTIONS["num_entries"].as< uint32_t >();
if (SISL_OPTIONS.count("disable_merge")) m_cfg.m_merge_turned_on = false;
m_fibers.clear();
m_bt = std::make_unique< typename T::BtreeType >(m_cfg);
m_bt->init(nullptr);
}

void TearDown() override { iomanager.stop(); }

void print() const { m_bt->print_tree(); }
void print_keys() const { m_bt->print_tree_keys(); }

void execute(const std::vector< std::pair< std::string, int > >& op_list) {
LOGINFO("Starting iomgr with {} threads", SISL_OPTIONS["n_threads"].as< uint32_t >());
ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = SISL_OPTIONS["n_threads"].as< uint32_t >(),
.is_spdk = false,
.is_spdk = false,
.num_fibers = 1 + SISL_OPTIONS["n_fibers"].as< uint32_t >(),
.app_mem_size_mb = 0,
0});
.hugepage_size_mb = 0});

BtreeTestHelper< TestType >::SetUp();
this->m_bt = std::make_shared< typename T::BtreeType >(this->m_cfg);
Expand Down

0 comments on commit a5b1e8f

Please sign in to comment.