Skip to content

Commit

Permalink
Index crash recovery (#437)
Browse files Browse the repository at this point in the history
This PR changes the way WritebackCache handles the index crash recovery. Detailed README is provided as part of this PR.

Co-authored-by: shosseinimotlagh <[email protected]>
  • Loading branch information
hkadayam and shosseinimotlagh authored Jun 11, 2024
1 parent 30ed777 commit 509260c
Show file tree
Hide file tree
Showing 53 changed files with 2,046 additions and 1,146 deletions.
Binary file added docs/imgs/.DS_Store
Binary file not shown.
Binary file added docs/imgs/Btree_Node_Split.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/imgs/Child_Node_Merge_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/imgs/Existing_Parent_Node_Split_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/imgs/Multi_Existing_Node_Split_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/imgs/Multi_New_Node_Split_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/imgs/Simple_Split_Graph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/imgs/Typical_Btree.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
68 changes: 48 additions & 20 deletions src/include/homestore/btree/btree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,42 @@ SISL_LOGGING_DECL(btree)
namespace homestore {

using BtreeNodePtr = boost::intrusive_ptr< BtreeNode >;
using BtreeNodeList = folly::small_vector< BtreeNodePtr, 3 >;

struct BtreeThreadVariables {
std::vector< btree_locked_node_info > wr_locked_nodes;
std::vector< btree_locked_node_info > rd_locked_nodes;
BtreeNodePtr force_split_node{nullptr};
};

struct BTREE_FLIPS {
static constexpr uint32_t INDEX_PARENT_NON_ROOT = 1 << 0;
static constexpr uint32_t INDEX_PARENT_ROOT = 1 << 1;
static constexpr uint32_t INDEX_LEFT_SIBLING = 1 << 2;
static constexpr uint32_t INDEX_RIGHT_SIBLING = 1 << 3;

uint32_t flips;
BTREE_FLIPS() : flips{0} {}
std::string list() const {
std::string str;
if (flips & INDEX_PARENT_NON_ROOT) { str += "index_parent_non_root,"; }
if (flips & INDEX_PARENT_ROOT) { str += "index_parent_root,"; }
if (flips & INDEX_LEFT_SIBLING) { str += "index_left_sibling,"; }
if (flips & INDEX_RIGHT_SIBLING) { str += "index_right_sibling,"; }
return str;
}
void set_flip(uint32_t flip) { flips |= flip; }
void set_flip(std::string flip) {
if (flip == "index_parent_non_root") { set_flip(INDEX_PARENT_NON_ROOT); }
if (flip == "index_parent_root") { set_flip(INDEX_PARENT_ROOT); }
if (flip == "index_left_sibling") { set_flip(INDEX_LEFT_SIBLING); }
if (flip == "index_right_sibling") { set_flip(INDEX_RIGHT_SIBLING); }
}
};

template < typename K, typename V >
class Btree {
private:
protected:
mutable iomgr::FiberManagerLib::shared_mutex m_btree_lock;
BtreeLinkInfo m_root_node_info;

Expand All @@ -52,7 +78,9 @@ class Btree {
#ifndef NDEBUG
std::atomic< uint64_t > m_req_id{0};
#endif

#ifdef _PRERELEASE
BTREE_FLIPS m_flips;
#endif
// This workaround of BtreeThreadVariables is needed instead of directly declaring statics
// to overcome the gcc bug, pointer here: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66944
static BtreeThreadVariables* bt_thread_vars() {
Expand All @@ -63,16 +91,13 @@ class Btree {
return fiber_map[this_id].get();
}

static bool is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info);

protected:
BtreeConfig m_bt_cfg;

public:
/////////////////////////////////////// All External APIs /////////////////////////////
Btree(const BtreeConfig& cfg);
virtual ~Btree();
virtual btree_status_t init(void* op_context);

template < typename ReqT >
btree_status_t put(ReqT& put_req);
Expand All @@ -95,29 +120,35 @@ class Btree {

nlohmann::json get_metrics_in_json(bool updated = true);
bnodeid_t root_node_id() const;

uint64_t root_link_version() const;
void set_root_node_info(const BtreeLinkInfo& info);

// static void set_io_flip();
// static void set_error_flip();
#ifdef _PRERELEASE
void set_flip_point(std::string flip) { m_flips.set_flip(flip); }
void set_flips(std::vector< std::string > flips) {
for (const auto& flip : flips) {
set_flip_point(flip);
}
}
std::string flip_list() const { return m_flips.list(); }
#endif

protected:
/////////////////////////// Methods the underlying store is expected to handle ///////////////////////////
virtual BtreeNodePtr alloc_node(bool is_leaf) = 0;
virtual BtreeNode* init_node(uint8_t* node_buf, uint32_t node_ctx_size, bnodeid_t id, bool init_buf,
bool is_leaf) const;
virtual BtreeNode* init_node(uint8_t* node_buf, bnodeid_t id, bool init_buf, bool is_leaf) const;
virtual btree_status_t read_node_impl(bnodeid_t id, BtreeNodePtr& node) const = 0;
virtual btree_status_t write_node_impl(const BtreeNodePtr& node, void* context) = 0;
virtual btree_status_t refresh_node(const BtreeNodePtr& node, bool for_read_modify_write, void* context) const = 0;
virtual void free_node_impl(const BtreeNodePtr& node, void* context) = 0;
virtual btree_status_t prepare_node_txn(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node,
void* context) = 0;
virtual btree_status_t transact_write_nodes(const folly::small_vector< BtreeNodePtr, 3 >& new_nodes,
const BtreeNodePtr& child_node, const BtreeNodePtr& parent_node,
void* context) = 0;

virtual btree_status_t transact_nodes(const BtreeNodeList& new_nodes, const BtreeNodeList& freed_nodes,
const BtreeNodePtr& left_child_node, const BtreeNodePtr& parent_node,
void* context) = 0;
virtual btree_status_t on_root_changed(BtreeNodePtr const& root, void* context) = 0;
virtual std::string btree_store_type() const = 0;
virtual void update_new_root_info(bnodeid_t root_node, uint64_t version) = 0;

/////////////////////////// Methods the application use case is expected to handle ///////////////////////////

Expand All @@ -137,8 +168,8 @@ class Btree {
BtreeNodePtr& child_node, locktype_t int_lock_type,
locktype_t leaf_lock_type, void* context) const;
btree_status_t upgrade_node_locks(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node,
locktype_t parent_cur_lock, locktype_t child_cur_lock, void* context);
btree_status_t upgrade_node(const BtreeNodePtr& node, locktype_t prev_lock, void* context, uint64_t prev_gen);
locktype_t& parent_cur_lock, locktype_t& child_cur_lock, void* context);
btree_status_t upgrade_node_lock(const BtreeNodePtr& node, locktype_t& cur_lock, void* context);
btree_status_t _lock_node(const BtreeNodePtr& node, locktype_t type, void* context, const char* fname,
int line) const;
void unlock_node(const BtreeNodePtr& node, locktype_t type) const;
Expand Down Expand Up @@ -167,6 +198,7 @@ class Btree {
void validate_sanity_child(const BtreeNodePtr& parent_node, uint32_t ind) const;
void validate_sanity_next_child(const BtreeNodePtr& parent_node, uint32_t ind) const;
void print_node(const bnodeid_t& bnodeid) const;

void append_route_trace(BtreeRequest& req, const BtreeNodePtr& node, btree_event_t event, uint32_t start_idx = 0,
uint32_t end_idx = 0) const;

Expand All @@ -188,8 +220,6 @@ class Btree {
btree_status_t split_node(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node, uint32_t parent_ind,
K* out_split_key, void* context);
btree_status_t mutate_extents_in_leaf(const BtreeNodePtr& my_node, BtreeRangePutRequest< K >& rpreq);
btree_status_t repair_split(const BtreeNodePtr& parent_node, const BtreeNodePtr& child_node1,
uint32_t parent_split_idx, void* context);

///////// Remove Impl Methods
template < typename ReqT >
Expand All @@ -201,8 +231,6 @@ class Btree {
btree_status_t merge_nodes(const BtreeNodePtr& parent_node, const BtreeNodePtr& leftmost_node, uint32_t start_indx,
uint32_t end_indx, void* context);
bool remove_extents_in_leaf(const BtreeNodePtr& node, BtreeRangeRemoveRequest< K >& rrreq);
btree_status_t repair_merge(const BtreeNodePtr& parent_node, const BtreeNodePtr& left_child,
uint32_t parent_merge_idx, void* context);

///////// Query Impl Methods
btree_status_t do_sweep_query(BtreeNodePtr& my_node, BtreeQueryRequest< K >& qreq,
Expand Down
14 changes: 2 additions & 12 deletions src/include/homestore/btree/btree.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ Btree< K, V >::Btree(const BtreeConfig& cfg) :
template < typename K, typename V >
Btree< K, V >::~Btree() = default;

template < typename K, typename V >
btree_status_t Btree< K, V >::init(void* op_context) {
return create_root_node(op_context);
}

template < typename K, typename V >
void Btree< K, V >::set_root_node_info(const BtreeLinkInfo& info) {
m_root_node_info = info;
Expand Down Expand Up @@ -195,7 +190,7 @@ retry:
goto out;
}

BT_NODE_LOG_ASSERT_EQ(root->has_valid_edge(), true, root, "Orphaned root with no entries and edge");
BT_NODE_LOG_ASSERT_EQ(root->has_valid_edge(), true, root, "Orphaned root with no entries and no edge");
unlock_node(root, acq_lock);
m_btree_lock.unlock_shared();

Expand Down Expand Up @@ -334,7 +329,7 @@ void Btree< K, V >::print_tree_keys() const {
to_string_keys(m_root_node_info.bnode_id(), buf);
m_btree_lock.unlock_shared();

BT_LOG(INFO, "Pre order traversal of tree:\n<{}>", buf);
LOGINFO("Pre order traversal of tree:\n<{}>", buf);
}

template < typename K, typename V >
Expand All @@ -352,11 +347,6 @@ uint64_t Btree< K, V >::root_link_version() const {
return m_root_node_info.link_version();
}

template < typename K, typename V >
bool Btree< K, V >::is_repair_needed(const BtreeNodePtr& child_node, const BtreeLinkInfo& child_info) {
return child_info.link_version() != child_node->link_version();
}

// TODO: Commenting out flip till we figure out how to move flip dependency inside sisl package.
#if 0
#ifdef _PRERELEASE
Expand Down
21 changes: 9 additions & 12 deletions src/include/homestore/btree/detail/btree_internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,35 +254,32 @@ struct BtreeConfig {
public:
BtreeConfig(uint32_t node_size, const std::string& btree_name = "") :
m_node_size{node_size},
m_node_data_size{node_size},
m_btree_name{btree_name.empty() ? std::string("btree") : btree_name} {
m_ideal_fill_size = (uint32_t)(m_node_data_size * m_ideal_fill_pct) / 100;
m_suggested_min_size = (uint32_t)(m_node_data_size * m_suggested_min_pct) / 100;
m_node_size{node_size}, m_btree_name{btree_name.empty() ? std::string("btree") : btree_name} {
set_node_data_size(node_size - 512); // Just put estimate at this point of time.
}
virtual ~BtreeConfig() = default;
uint32_t node_size() const { return m_node_size; };
uint32_t split_size(uint32_t filled_size) const { return uint32_cast(filled_size * m_split_pct) / 100; }
uint32_t ideal_fill_size() const { return m_ideal_fill_size; }
uint32_t suggested_min_size() const { return m_suggested_min_size; }
uint32_t node_data_size() const { return m_node_data_size; }
void set_node_data_size(uint32_t data_size) {
m_node_data_size = data_size;
m_ideal_fill_size = (uint32_t)(m_node_data_size * m_ideal_fill_pct) / 100; // Recompute the values
m_suggested_min_size = (uint32_t)(m_node_data_size * m_suggested_min_pct) / 100;
}
uint32_t split_size(uint32_t filled_size) const { return uint32_cast(filled_size * m_split_pct) / 100; }
uint32_t ideal_fill_size() const { return m_ideal_fill_size; }
uint32_t suggested_min_size() const { return m_suggested_min_size; }
uint32_t node_data_size() const { return m_node_data_size; }
void set_ideal_fill_pct(uint8_t pct) {
m_ideal_fill_pct = pct;
m_ideal_fill_size = (uint32_t)(m_node_data_size * m_ideal_fill_pct) / 100;
m_ideal_fill_size = (uint32_t)(node_data_size() * m_ideal_fill_pct) / 100;
}
void set_suggested_min_size(uint8_t pct) {
m_suggested_min_pct = pct;
m_suggested_min_size = (uint32_t)(m_node_data_size * m_suggested_min_pct) / 100;
m_suggested_min_size = (uint32_t)(node_data_size() * m_suggested_min_pct) / 100;
}
const std::string& name() const { return m_btree_name; }
Expand Down
Loading

0 comments on commit 509260c

Please sign in to comment.