From c4a06976f335faf28af6b2a6055eb56d03426fa0 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Tue, 17 Dec 2024 19:14:35 -0700 Subject: [PATCH] fix twice call of leave Removed From cluster will be called twice when committing config_change log and journal_type_t::HS_CTRL_DESTROY in the moved out member, so the destroy future will be setvalue twice , which will lead to an error. this PR fixes this bug --- conanfile.py | 2 +- .../replication/repl_dev/raft_repl_dev.cpp | 31 +++++++++---------- src/lib/replication/repl_dev/raft_repl_dev.h | 4 ++- src/tests/test_common/raft_repl_test_base.hpp | 10 ++---- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/conanfile.py b/conanfile.py index a8d70345a..f241149e6 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.5.26" + version = "6.5.27" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/replication/repl_dev/raft_repl_dev.cpp b/src/lib/replication/repl_dev/raft_repl_dev.cpp index da8535602..d92464c8b 100644 --- a/src/lib/replication/repl_dev/raft_repl_dev.cpp +++ b/src/lib/replication/repl_dev/raft_repl_dev.cpp @@ -1183,7 +1183,7 @@ nuraft::ptr< nuraft::log_store > RaftReplDev::load_log_store() { return m_data_j int32_t RaftReplDev::server_id() { return m_raft_server_id; } -bool RaftReplDev::is_destroy_pending() const { return (m_rd_sb->destroy_pending == 0x1); } +bool RaftReplDev::is_destroy_pending() const { return (*m_stage.access().get() == repl_dev_stage_t::DESTROYED); } bool RaftReplDev::is_destroyed() const { return (*m_stage.access().get() == repl_dev_stage_t::PERMANENT_DESTROYED); } /////////////////////////////////// nuraft_mesg::mesg_state_mgr overrides //////////////////////////////////// @@ -1207,6 +1207,19 @@ void RaftReplDev::permanent_destroy() { } void RaftReplDev::leave() { + // this will be called in 3 cases : + // 1. commit log entry of journal_type_t::HS_CTRL_DESTROY + // 2. it is removed from the cluster and the new config(excluding this node) is being committed on this node + // 3. it is removed from the cluster , but the node is down and new config log(excluding this node) is not + // replicated to this removed node. when the node restart, leader will not send any append entry to this node, + // since it is not a member of the raft group. it will become a condidate and send request-vote request to other + // members of this raft group. a member will send RemovedFromCluster to the node if this member finds the node + // is no longer a member of the raft group. + + // leave() will never be called concurrently, since config change and journal_type_t::HS_CTRL_DESTROY are all log + // entry, which will be committed sequentially. + if (is_destroy_pending()) return; + // We update that this repl_dev in destroyed state, actual clean up of resources happen in reaper thread later m_stage.update([](auto* stage) { *stage = repl_dev_stage_t::DESTROYED; }); m_destroyed_time = Clock::now(); @@ -1288,21 +1301,7 @@ std::pair< bool, nuraft::cb_func::ReturnCode > RaftReplDev::handle_raft_event(nu return {true, ret}; } - case nuraft::cb_func::Type::RemovedFromCluster: { - // a node will reach here when : - // 1. it is removed from the cluster and the new config(excluding this node) is being committed on this node - // 2. it is removed from the cluster , but the node is down and new config log(excluding this node) is not - // replicated to this removed node. when the node restart, leader will not send any append entry to this node, - // since it is not a member of the raft group. it will become a condidate and send request-vote request to other - // members of this raft group. a member will send RemovedFromCluster to the node if this member finds the node - // is no longer a member of the raft group. - - // this will lazily cleanup the group - // TODO:cleanup this repl dev ASAP if necessary. - leave(); - - return {true, ret}; - } + // RemovedFromCluster will be handled in nuraft_mesg::generic_raft_event_handler where leave() is called // TODO: Add more type handler if necessary default: diff --git a/src/lib/replication/repl_dev/raft_repl_dev.h b/src/lib/replication/repl_dev/raft_repl_dev.h index 0550858cf..9e29a5737 100644 --- a/src/lib/replication/repl_dev/raft_repl_dev.h +++ b/src/lib/replication/repl_dev/raft_repl_dev.h @@ -229,7 +229,9 @@ class RaftReplDev : public ReplDev, * * @param num_reserved_entries The number of reserved entries of the replication log. */ - void truncate(uint32_t num_reserved_entries) { m_data_journal->truncate(num_reserved_entries, m_compact_lsn.load()); } + void truncate(uint32_t num_reserved_entries) { + m_data_journal->truncate(num_reserved_entries, m_compact_lsn.load()); + } void wait_for_logstore_ready() { m_data_journal->wait_for_log_store_ready(); } diff --git a/src/tests/test_common/raft_repl_test_base.hpp b/src/tests/test_common/raft_repl_test_base.hpp index 7445568b8..21d5fa3f2 100644 --- a/src/tests/test_common/raft_repl_test_base.hpp +++ b/src/tests/test_common/raft_repl_test_base.hpp @@ -182,16 +182,12 @@ class TestReplicatedDB : public homestore::ReplDevListener { return make_async_success<>(); } - static int64_t get_next_lsn(uint64_t& obj_id) { - return obj_id & ((1ULL << 63) - 1); - } - static void set_resync_msg_type_bit(uint64_t& obj_id) { - obj_id |= 1ULL << 63; - } + static int64_t get_next_lsn(uint64_t& obj_id) { return obj_id & ((1ULL << 63) - 1); } + static void set_resync_msg_type_bit(uint64_t& obj_id) { obj_id |= 1ULL << 63; } int read_snapshot_obj(shared< snapshot_context > context, shared< snapshot_obj > snp_data) override { auto s = std::dynamic_pointer_cast< nuraft_snapshot_context >(context)->nuraft_snapshot(); - if(RaftStateMachine::is_hs_snp_obj(snp_data->offset)) { + if (RaftStateMachine::is_hs_snp_obj(snp_data->offset)) { LOGERRORMOD(replication, "invalid snapshot offset={}", snp_data->offset); return -1; }