From 0276bd8ac6ab84150c9a3b9c03ed25340d9f1d49 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Fri, 10 May 2024 02:33:50 -0700 Subject: [PATCH] fix logstore recovery issue --- conanfile.py | 2 +- src/lib/logstore/log_store_service.cpp | 19 ++++++++----------- .../log_store/home_raft_log_store.cpp | 15 ++++++++++----- .../log_store/home_raft_log_store.h | 3 +++ src/lib/replication/repl_dev/raft_repl_dev.h | 2 ++ .../replication/service/raft_repl_service.cpp | 1 + 6 files changed, 25 insertions(+), 17 deletions(-) diff --git a/conanfile.py b/conanfile.py index 661ffef8d..5bd20c576 100644 --- a/conanfile.py +++ b/conanfile.py @@ -5,7 +5,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.4.4" + version = "6.4.5" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/logstore/log_store_service.cpp b/src/lib/logstore/log_store_service.cpp index c319f475e..e5d8530e1 100644 --- a/src/lib/logstore/log_store_service.cpp +++ b/src/lib/logstore/log_store_service.cpp @@ -51,7 +51,7 @@ LogStoreService::LogStoreService() { [this](meta_blk* mblk, sisl::byte_view buf, size_t size) { rollback_super_blk_found(std::move(buf), voidptr_cast(mblk)); }, - nullptr); + nullptr, true, std::optional< meta_subtype_vec_t >({logdev_sb_meta_name})); } folly::Future< std::error_code > LogStoreService::create_vdev(uint64_t size, HSDevType devType, uint32_t chunk_size) { @@ -175,7 +175,9 @@ void LogStoreService::open_logdev(logdev_id_t logdev_id) { m_id_reserver->reserve(logdev_id); auto logdev = std::make_shared< LogDev >(logdev_id, m_logdev_vdev.get()); m_id_logdev_map.emplace(logdev_id, logdev); + LOGDEBUGMOD(logstore, "log_dev={} does not exist, created!", logdev_id); } + m_unopened_logdev.erase(logdev_id); LOGDEBUGMOD(logstore, "Opened log_dev={}", logdev_id); } @@ -206,17 +208,17 @@ void LogStoreService::logdev_super_blk_found(const sisl::byte_view& buf, void* m auto id = sb->logdev_id; LOGDEBUGMOD(logstore, "Log dev superblk found logdev={}", id); const auto it = m_id_logdev_map.find(id); - if (it == m_id_logdev_map.end()) { - LOGERROR("logdev={} found but not opened yet, it will be discarded after logstore is started", id); - m_unopened_logdev.insert(id); - } - // We could update the logdev map either with logdev or rollback superblks found callbacks. if (it != m_id_logdev_map.end()) { logdev = it->second; } else { logdev = std::make_shared< LogDev >(id, m_logdev_vdev.get()); m_id_logdev_map.emplace(id, logdev); + // when recover logdev meta blk, we get all the logdevs from the superblk. we put them in m_unopened_logdev + // too. after logdev meta blks are all recovered, when a client opens a logdev, we remove it from + // m_unopened_logdev. so that when we start log service, all the left items in m_unopened_logdev are those + // not open, which can be destroyed + m_unopened_logdev.insert(id); } logdev->log_dev_meta().logdev_super_blk_found(buf, meta_cookie); @@ -235,11 +237,6 @@ void LogStoreService::rollback_super_blk_found(const sisl::byte_view& buf, void* auto id = rollback_sb->logdev_id; LOGDEBUGMOD(logstore, "Log dev rollback superblk found logdev={}", id); const auto it = m_id_logdev_map.find(id); - if (it == m_id_logdev_map.end()) { - LOGERROR("logdev={} found but not opened yet, it will be discarded after logstore is started", id); - m_unopened_logdev.insert(id); - } - if (it != m_id_logdev_map.end()) { logdev = it->second; } else { diff --git a/src/lib/replication/log_store/home_raft_log_store.cpp b/src/lib/replication/log_store/home_raft_log_store.cpp index 1bef25d36..e5c7e34ef 100644 --- a/src/lib/replication/log_store/home_raft_log_store.cpp +++ b/src/lib/replication/log_store/home_raft_log_store.cpp @@ -104,11 +104,13 @@ HomeRaftLogStore::HomeRaftLogStore(logdev_id_t logdev_id, logstore_id_t logstore m_logstore_id = logstore_id; LOGDEBUGMOD(replication, "Opening existing home log_dev={} log_store={}", m_logdev_id, logstore_id); logstore_service().open_logdev(m_logdev_id); - logstore_service().open_log_store(m_logdev_id, logstore_id, true).thenValue([this](auto log_store) { - m_log_store = std::move(log_store); - DEBUG_ASSERT_EQ(m_logstore_id, m_log_store->get_store_id(), "Mismatch in passed and create logstore id"); - REPL_STORE_LOG(DEBUG, "Home Log store created/opened successfully"); - }); + m_log_store_future = + logstore_service().open_log_store(m_logdev_id, logstore_id, true).thenValue([this](auto log_store) { + m_log_store = std::move(log_store); + DEBUG_ASSERT_EQ(m_logstore_id, m_log_store->get_store_id(), + "Mismatch in passed and create logstore id"); + REPL_STORE_LOG(DEBUG, "Home Log store created/opened successfully"); + }); } } @@ -309,4 +311,7 @@ ulong HomeRaftLogStore::last_durable_index() { m_last_durable_lsn = m_log_store->get_contiguous_completed_seq_num(m_last_durable_lsn); return to_repl_lsn(m_last_durable_lsn); } + +void HomeRaftLogStore::wait_for_log_store_ready() { m_log_store_future.wait(); } + } // namespace homestore diff --git a/src/lib/replication/log_store/home_raft_log_store.h b/src/lib/replication/log_store/home_raft_log_store.h index e3da2b379..8108c85c5 100644 --- a/src/lib/replication/log_store/home_raft_log_store.h +++ b/src/lib/replication/log_store/home_raft_log_store.h @@ -188,11 +188,14 @@ class HomeRaftLogStore : public nuraft::log_store { */ void truncate(uint32_t num_reserved_cnt, repl_lsn_t compact_lsn); + void wait_for_log_store_ready(); + private: logstore_id_t m_logstore_id; logdev_id_t m_logdev_id; shared< HomeLogStore > m_log_store; nuraft::ptr< nuraft::log_entry > m_dummy_log_entry; store_lsn_t m_last_durable_lsn{-1}; + folly::Future< folly::Unit > m_log_store_future; }; } // namespace homestore diff --git a/src/lib/replication/repl_dev/raft_repl_dev.h b/src/lib/replication/repl_dev/raft_repl_dev.h index 8a46993e7..696286f10 100644 --- a/src/lib/replication/repl_dev/raft_repl_dev.h +++ b/src/lib/replication/repl_dev/raft_repl_dev.h @@ -171,6 +171,8 @@ class RaftReplDev : public ReplDev, nuraft::ptr< nuraft::snapshot > get_last_snapshot() { return m_last_snapshot; } + void wait_for_logstore_ready() { m_data_journal->wait_for_log_store_ready(); } + protected: //////////////// All nuraft::state_mgr overrides /////////////////////// nuraft::ptr< nuraft::cluster_config > load_config() override; diff --git a/src/lib/replication/service/raft_repl_service.cpp b/src/lib/replication/service/raft_repl_service.cpp index 6f2cb0703..5a288d01d 100644 --- a/src/lib/replication/service/raft_repl_service.cpp +++ b/src/lib/replication/service/raft_repl_service.cpp @@ -128,6 +128,7 @@ void RaftReplService::start() { // Step 6: Iterate all the repl dev and ask each one of the join the raft group. for (auto it = m_rd_map.begin(); it != m_rd_map.end();) { auto rdev = std::dynamic_pointer_cast< RaftReplDev >(it->second); + rdev->wait_for_logstore_ready(); if (!rdev->join_group()) { it = m_rd_map.erase(it); } else {