diff --git a/conanfile.py b/conanfile.py index f34a966c7..51d8e4923 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.4.63" + version = "6.4.64" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/replication/service/raft_repl_service.cpp b/src/lib/replication/service/raft_repl_service.cpp index d862c2098..c4aefe1ca 100644 --- a/src/lib/replication/service/raft_repl_service.cpp +++ b/src/lib/replication/service/raft_repl_service.cpp @@ -85,6 +85,13 @@ void RaftReplService::start() { LOGINFO("Starting RaftReplService with server_uuid={} port={}", boost::uuids::to_string(params.server_uuid_), params.mesg_port_); + //check if ssl cert files are provided, if yes, monitor the changes + if (!params.ssl_key_.empty() && !params.ssl_cert_.empty()) { + ioenvironment.with_file_watcher(); + monitor_cert_changes(); + } + + // Step 2: Register all RAFT parameters. At the end of this step, raft is ready to be created/join group auto r_params = nuraft::raft_params() .with_election_timeout_lower(HS_DYNAMIC_CONFIG(consensus.elect_to_low_ms)) @@ -175,6 +182,47 @@ void RaftReplService::stop() { hs()->logstore_service().stop(); } +void RaftReplService::monitor_cert_changes() { + auto fw = ioenvironment.get_file_watcher(); + auto cert_change_cb = [this](const std::string filepath, const bool deleted) { + LOGINFO("file change event for {}, deleted? {}", filepath, deleted) + // do not block file_watcher thread + std::thread restart_svc(&RaftReplService::restart_raft_svc, this, filepath, deleted); + restart_svc.detach(); + }; + + //monitor ssl cert file + if (!fw->register_listener(ioenvironment.get_ssl_cert(), "hs_ssl_cert_watcher", cert_change_cb)) { + LOGERROR("Failed to register listner, {} to watch file {}, Not monitoring cert files", + "hs_ssl_cert_watcher", ioenvironment.get_ssl_cert()); + } + //monitor ssl key file + if (!fw->register_listener(ioenvironment.get_ssl_key(), "hs_ssl_key_watcher", cert_change_cb)) { + LOGERROR("Failed to register listner, {} to watch file {}, Not monitoring cert files", + "hs_ssl_key_watcher", ioenvironment.get_ssl_key()); + } +} + +void RaftReplService::restart_raft_svc(const std::string filepath, const bool deleted){ + if (deleted && !wait_for_cert(filepath)) { + LOGINFO("file {} deleted, ", filepath) + // wait for the deleted file to be added again + throw std::runtime_error(fmt::format("file {} not found! Can not start grpc server", filepath)); + } + const std::unique_lock lock(raft_restart_mutex); + m_msg_mgr->restart_server(); + if (deleted) { monitor_cert_changes(); } +} + +bool RaftReplService::wait_for_cert(const std::string& filepath) { + auto attempts = cert_change_timeout/cert_check_sleep; + for (auto i = attempts; i > 0; --i) { + if (std::filesystem::exists(filepath)) { return true; } + std::this_thread::sleep_for(cert_check_sleep); + } + return false; +} + RaftReplDev* RaftReplService::raft_group_config_found(sisl::byte_view const& buf, void* meta_cookie) { json_superblk group_config; auto& js = group_config.load(buf, meta_cookie); diff --git a/src/lib/replication/service/raft_repl_service.h b/src/lib/replication/service/raft_repl_service.h index 44ed06332..e0d1e6718 100644 --- a/src/lib/replication/service/raft_repl_service.h +++ b/src/lib/replication/service/raft_repl_service.h @@ -31,6 +31,9 @@ namespace homestore { +constexpr auto cert_change_timeout = std::chrono::seconds(1200); +constexpr auto cert_check_sleep = std::chrono::seconds(1); + struct repl_dev_superblk; class RaftReplDev; @@ -47,7 +50,8 @@ class RaftReplService : public GenericReplService, iomgr::timer_handle_t m_rdev_gc_timer_hdl; iomgr::timer_handle_t m_flush_durable_commit_timer_hdl; iomgr::io_fiber_t m_reaper_fiber; - + std::mutex raft_restart_mutex; + public: RaftReplService(cshared< ReplApplication >& repl_app); @@ -80,6 +84,9 @@ class RaftReplService : public GenericReplService, void gc_repl_devs(); void gc_repl_reqs(); void flush_durable_commit_lsn(); + void monitor_cert_changes(); + void restart_raft_svc(const std::string filepath, const bool deleted); + bool wait_for_cert(const std::string& filepath); }; // cp context for repl_dev, repl_dev cp_lsn is critical cursor in the system,