Skip to content

Commit

Permalink
Merge pull request #566 from yuwmao/raft
Browse files Browse the repository at this point in the history
Add cert watcher and restart raft service when cert is updated
  • Loading branch information
yuwmao authored Oct 17, 2024
2 parents 5e6bf9d + f88317d commit 9be2a49
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "6.4.63"
version = "6.4.64"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
48 changes: 48 additions & 0 deletions src/lib/replication/service/raft_repl_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ void RaftReplService::start() {
LOGINFO("Starting RaftReplService with server_uuid={} port={}", boost::uuids::to_string(params.server_uuid_),
params.mesg_port_);

//check if ssl cert files are provided, if yes, monitor the changes
if (!params.ssl_key_.empty() && !params.ssl_cert_.empty()) {
ioenvironment.with_file_watcher();
monitor_cert_changes();
}


// Step 2: Register all RAFT parameters. At the end of this step, raft is ready to be created/join group
auto r_params = nuraft::raft_params()
.with_election_timeout_lower(HS_DYNAMIC_CONFIG(consensus.elect_to_low_ms))
Expand Down Expand Up @@ -175,6 +182,47 @@ void RaftReplService::stop() {
hs()->logstore_service().stop();
}

void RaftReplService::monitor_cert_changes() {
auto fw = ioenvironment.get_file_watcher();
auto cert_change_cb = [this](const std::string filepath, const bool deleted) {
LOGINFO("file change event for {}, deleted? {}", filepath, deleted)
// do not block file_watcher thread
std::thread restart_svc(&RaftReplService::restart_raft_svc, this, filepath, deleted);
restart_svc.detach();
};

//monitor ssl cert file
if (!fw->register_listener(ioenvironment.get_ssl_cert(), "hs_ssl_cert_watcher", cert_change_cb)) {
LOGERROR("Failed to register listner, {} to watch file {}, Not monitoring cert files",
"hs_ssl_cert_watcher", ioenvironment.get_ssl_cert());
}
//monitor ssl key file
if (!fw->register_listener(ioenvironment.get_ssl_key(), "hs_ssl_key_watcher", cert_change_cb)) {
LOGERROR("Failed to register listner, {} to watch file {}, Not monitoring cert files",
"hs_ssl_key_watcher", ioenvironment.get_ssl_key());
}
}

void RaftReplService::restart_raft_svc(const std::string filepath, const bool deleted){
if (deleted && !wait_for_cert(filepath)) {
LOGINFO("file {} deleted, ", filepath)
// wait for the deleted file to be added again
throw std::runtime_error(fmt::format("file {} not found! Can not start grpc server", filepath));
}
const std::unique_lock lock(raft_restart_mutex);
m_msg_mgr->restart_server();
if (deleted) { monitor_cert_changes(); }
}

bool RaftReplService::wait_for_cert(const std::string& filepath) {
auto attempts = cert_change_timeout/cert_check_sleep;
for (auto i = attempts; i > 0; --i) {
if (std::filesystem::exists(filepath)) { return true; }
std::this_thread::sleep_for(cert_check_sleep);
}
return false;
}

RaftReplDev* RaftReplService::raft_group_config_found(sisl::byte_view const& buf, void* meta_cookie) {
json_superblk group_config;
auto& js = group_config.load(buf, meta_cookie);
Expand Down
9 changes: 8 additions & 1 deletion src/lib/replication/service/raft_repl_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@

namespace homestore {

constexpr auto cert_change_timeout = std::chrono::seconds(1200);
constexpr auto cert_check_sleep = std::chrono::seconds(1);

struct repl_dev_superblk;
class RaftReplDev;

Expand All @@ -47,7 +50,8 @@ class RaftReplService : public GenericReplService,
iomgr::timer_handle_t m_rdev_gc_timer_hdl;
iomgr::timer_handle_t m_flush_durable_commit_timer_hdl;
iomgr::io_fiber_t m_reaper_fiber;

std::mutex raft_restart_mutex;

public:
RaftReplService(cshared< ReplApplication >& repl_app);

Expand Down Expand Up @@ -80,6 +84,9 @@ class RaftReplService : public GenericReplService,
void gc_repl_devs();
void gc_repl_reqs();
void flush_durable_commit_lsn();
void monitor_cert_changes();
void restart_raft_svc(const std::string filepath, const bool deleted);
bool wait_for_cert(const std::string& filepath);
};

// cp context for repl_dev, repl_dev cp_lsn is critical cursor in the system,
Expand Down

0 comments on commit 9be2a49

Please sign in to comment.