Skip to content

Commit

Permalink
fix get peer info (#107)
Browse files Browse the repository at this point in the history
1 Fix a bug where a peer does not exist in the configuration of the leader.

For the remove_member case, the leader will first remove the member from the peer_list, and then apply the new cluster configuration which excludes the removed member. If a leader gets the peer_list before the to-be-removed node is removed from the peer_list, and then tries to get_srv_config after the node is removed for the cluster config (new config is applied), it cannot find this peer in the configuration.

2 Support follower to get its own peer info, which is useful for the upper layer to get the last appended log index of a follower.
  • Loading branch information
JacksonYao287 authored Nov 18, 2024
1 parent a277e8a commit ec3ca39
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 28 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class NuRaftMesgConan(ConanFile):
name = "nuraft_mesg"
version = "3.6.2"
version = "3.6.3"

homepage = "https://github.com/eBay/nuraft_mesg"
description = "A gRPC service for NuRAFT"
Expand Down
39 changes: 25 additions & 14 deletions src/lib/repl_service_ctx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,24 +89,35 @@ const std::string& repl_service_ctx::raft_leader_id() const {

std::vector< peer_info > repl_service_ctx::get_raft_status() const {
std::vector< peer_info > peers;
if (!is_raft_leader()) return peers;
if (!_server) return peers;

auto pinfo_all = _server->get_peer_info_all();
// add leader to the list
nuraft::raft_server::peer_info pi_leader;
pi_leader.id_ = _server->get_id();
pi_leader.last_log_idx_ = _server->get_last_log_idx();
pinfo_all.emplace_back(pi_leader);

for (auto const& pinfo : pinfo_all) {
std::string_view peer_id;
if (auto srv_config = _server->get_srv_config(pinfo.id_); nullptr != srv_config) {
peer_id = srv_config->get_endpoint();
if (is_raft_leader()) {
// leader can get all the peer info
auto pinfo_all = _server->get_peer_info_all();
// get all the peer info except the leader
for (auto const& pinfo : pinfo_all) {
std::string_view peer_id;
if (auto srv_config = _server->get_srv_config(pinfo.id_); nullptr != srv_config) {
peer_id = srv_config->get_endpoint();
}
if (peer_id.empty()) {
// if remove_member case , leader will first remove the member from peer_list, and then apply the new
// cluster configuraiton which excludes the removed member, so there is case that the peer_id is not in
// the config of leader.
LOGW("do not find peer id {} in the conifg of leader", pinfo.id_);
continue;
}
peers.emplace_back(peer_info{std::string(peer_id), pinfo.last_log_idx_, pinfo.last_succ_resp_us_});
}
DEBUG_ASSERT(!peer_id.empty(), "Unknown peer in config");
peers.emplace_back(peer_info{std::string(peer_id), pinfo.last_log_idx_, pinfo.last_succ_resp_us_});
}

auto my_id = _server->get_id();
auto my_peer_id = _server->get_srv_config(my_id)->get_endpoint();

// add the peer info of itself(leader or follower) , which is useful for upper layer
// from the view a node itself, last_succ_resp_us_ make no sense, so set it to 0
peers.emplace_back(my_peer_id, _server->get_last_log_idx(), 0);

return peers;
}

Expand Down
27 changes: 14 additions & 13 deletions src/tests/data_service_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ TEST_F(DataServiceFixture, BasicTest1) {
return folly::Unit();
}));


auto repl_ctx1 = sm1->get_repl_context();
for (auto svr : repl_ctx1->_server->get_config()->get_servers()) {
if (svr->get_endpoint() == to_string(app_1_->id_)) continue;
Expand All @@ -113,25 +112,26 @@ TEST_F(DataServiceFixture, BasicTest1) {
io_blob_list_t big_cli_buf;
test_state_mgr::fill_data_vec_big(big_cli_buf, 4 * 1024 * 1024);
sm1->data_service_request_unidirectional(nuraft_mesg::role_regex::ALL, SEND_DATA, big_cli_buf)
.deferValue([](auto e) -> NullResult {
EXPECT_TRUE(e.hasValue());
return folly::Unit();
}).get();
.deferValue([](auto e) -> NullResult {
EXPECT_TRUE(e.hasValue());
return folly::Unit();
})
.get();
LOGINFO("End large object write test")
LOGINFO("Starting large object read test")

sm4_1->data_service_request_bidirectional(nuraft_mesg::role_regex::LEADER, REQUEST_DATA, big_cli_buf)
.deferValue([](auto e) -> NullResult {
EXPECT_TRUE(e.hasValue());
test_state_mgr::verify_data(e.value().response_blob());
return folly::Unit();
}).get();
.deferValue([](auto e) -> NullResult {
EXPECT_TRUE(e.hasValue());
test_state_mgr::verify_data(e.value().response_blob());
return folly::Unit();
})
.get();
LOGINFO("End large object read test")
for (auto& buf : big_cli_buf) {
buf.buf_free();
}


// add a new member to data_service_test_group and check if repl_ctx4 sends data to newly added member
auto add_3 = app_4->instance_->add_member(data_group, app_3_->id_);
std::this_thread::sleep_for(std::chrono::seconds(1));
Expand Down Expand Up @@ -180,12 +180,13 @@ TEST_F(DataServiceFixture, BasicTest2) {
auto repl_ctx_2 = app_2_->state_mgr_map_[group_id_]->get_repl_context();
EXPECT_TRUE(repl_ctx_2 && !repl_ctx_2->is_raft_leader());
EXPECT_TRUE(repl_ctx_2 && repl_ctx_2->raft_leader_id() == to_string(app_1_->id_));
EXPECT_TRUE(repl_ctx && repl_ctx_2->get_raft_status().size() == 0);
// if it`s a follower, it should have only one peer info of itself
EXPECT_TRUE(repl_ctx && repl_ctx_2->get_raft_status().size() == 1);

auto repl_ctx_3 = app_3_->state_mgr_map_[group_id_]->get_repl_context();
EXPECT_TRUE(repl_ctx_3 && !repl_ctx_3->is_raft_leader());
EXPECT_TRUE(repl_ctx_3 && repl_ctx_3->raft_leader_id() == to_string(app_1_->id_));
EXPECT_TRUE(repl_ctx && repl_ctx_3->get_raft_status().size() == 0);
EXPECT_TRUE(repl_ctx && repl_ctx_3->get_raft_status().size() == 1);

std::list< nuraft_mesg::replica_config > cluster_config;
repl_ctx->get_cluster_config(cluster_config);
Expand Down

0 comments on commit ec3ca39

Please sign in to comment.