Skip to content

Commit

Permalink
fix destroy repl dev
Browse files Browse the repository at this point in the history
  • Loading branch information
JacksonYao287 committed Aug 25, 2024
1 parent f83dcf1 commit ac17767
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
8 changes: 8 additions & 0 deletions src/lib/replication/repl_dev/raft_repl_dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,14 @@ class RaftReplDev : public ReplDev,
*/
void on_restart();

/**
* \brief This method is called to force leave the group without waiting for committing the destroy message.
* it is used when the repl_dev is a stale member of a destroyed group. this stable member does not receive the
* destroy message. but the group is already destroyed, so no leader will send this message again to this stale
* member. we need to force leave the group to avoid the stale member to be a part of the group.
*/
void force_leave() { leave(); }

protected:
//////////////// All nuraft::state_mgr overrides ///////////////////////
nuraft::ptr< nuraft::cluster_config > load_config() override;
Expand Down
3 changes: 2 additions & 1 deletion src/lib/replication/service/raft_repl_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,8 @@ void RaftReplService::start_reaper_thread() {
m_reaper_fiber = iomanager.iofiber_self();

// Schedule the rdev garbage collector timer
LOGINFOMOD(replication, "Reaper Thread: scheduling GC every {} seconds", HS_DYNAMIC_CONFIG(generic.repl_dev_cleanup_interval_sec));
LOGINFOMOD(replication, "Reaper Thread: scheduling GC every {} seconds",
HS_DYNAMIC_CONFIG(generic.repl_dev_cleanup_interval_sec));
m_rdev_gc_timer_hdl = iomanager.schedule_thread_timer(
HS_DYNAMIC_CONFIG(generic.repl_dev_cleanup_interval_sec) * 1000 * 1000 * 1000, true /* recurring */,
nullptr, [this](void*) {
Expand Down
12 changes: 12 additions & 0 deletions src/tests/test_raft_repl_dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,23 @@ class RaftReplDevTest : public testing::Test {
for (auto const& db : dbs_) {
if (db->is_zombie()) { continue; }
auto repl_dev = std::dynamic_pointer_cast< RaftReplDev >(db->repl_dev());
int i = 0;
bool force_leave = false;
do {
std::this_thread::sleep_for(std::chrono::seconds(1));
auto& raft_repl_svc = dynamic_cast< RaftReplService& >(hs()->repl_service());
raft_repl_svc.gc_repl_devs();
LOGINFO("Waiting for repl dev to get destroyed");

// TODO: if leader is destroyed, but the follower does not receive the notification, it will not be
// destroyed for ever. we need handle this in raft_repl_dev. revisit here after making changes at
// raft_repl_dev side to hanle this case. this is a workaround to avoid the infinite loop for now.
if (i++ > 10 && !force_leave) {
LOGWARN("Waiting for repl dev to get destroyed and it is leader, so do a force leave");
repl_dev->force_leave();
force_leave = true;
}

} while (!repl_dev->is_destroyed());
}
}
Expand Down

0 comments on commit ac17767

Please sign in to comment.