From 1746bcc87043e165f2152899ca4670f380229a9d Mon Sep 17 00:00:00 2001 From: Hooper Date: Tue, 29 Oct 2024 03:21:42 -0700 Subject: [PATCH 1/5] implement fix pg size --- conanfile.py | 4 +- .../homestore_backend/heap_chunk_selector.cpp | 234 ++++++++++++++---- .../homestore_backend/heap_chunk_selector.h | 40 ++- src/lib/homestore_backend/hs_homeobject.cpp | 2 +- src/lib/homestore_backend/hs_homeobject.hpp | 28 ++- src/lib/homestore_backend/hs_pg_manager.cpp | 60 ++++- .../homestore_backend/hs_shard_manager.cpp | 11 +- .../replication_state_machine.cpp | 15 +- .../tests/homeobj_fixture.hpp | 12 +- .../tests/test_heap_chunk_selector.cpp | 191 ++++++++++---- 10 files changed, 459 insertions(+), 138 deletions(-) diff --git a/conanfile.py b/conanfile.py index e9edacac..fe81d8e7 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.6" + version = "2.1.7" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" @@ -49,7 +49,7 @@ def build_requirements(self): def requirements(self): self.requires("sisl/[^12.2]@oss/master", transitive_headers=True) - self.requires("homestore/[^6.4]@oss/master") + self.requires("homestore/[^6.5]@oss/master") self.requires("iomgr/[^11.3]@oss/master") self.requires("lz4/1.9.4", override=True) self.requires("openssl/3.3.1", override=True) diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index 897ab2ad..6e2a1492 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -31,7 +31,7 @@ void HeapChunkSelector::add_chunk_internal(const chunk_num_t chunkID, bool add_t auto pdevID = vchunk.get_pdev_id(); // add this find here, since we don`t want to call make_shared in try_emplace every time. auto it = m_per_dev_heap.find(pdevID); - if (it == m_per_dev_heap.end()) { it = m_per_dev_heap.emplace(pdevID, std::make_shared< PerDevHeap >()).first; } + if (it == m_per_dev_heap.end()) { it = m_per_dev_heap.emplace(pdevID, std::make_shared< ChunkHeap >()).first; } // build total blks for every chunk on this device; it->second->m_total_blks += vchunk.get_total_blks(); @@ -59,31 +59,20 @@ csharedChunk HeapChunkSelector::select_chunk(homestore::blk_count_t count, const return nullptr; } - // shardid -> chunkid map is maintained by ShardManager - // pg_id->pdev_id map is maintained by PgManager - // chunselector will not take care of the two maps for now. - uint32_t pdevID = 0; - auto& pdevIdHint = hint.pdev_id_hint; - if (!pdevIdHint.has_value()) { - // this is the first shard of this pg, select a pdev with the most available blocks for it - auto&& it = - std::max_element(m_per_dev_heap.begin(), m_per_dev_heap.end(), - [](const std::pair< const uint32_t, std::shared_ptr< PerDevHeap > >& lhs, - const std::pair< const uint32_t, std::shared_ptr< PerDevHeap > >& rhs) { - return lhs.second->available_blk_count.load() < rhs.second->available_blk_count.load(); - }); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for new pg"); - return nullptr; - } - pdevID = it->first; + std::shared_lock lock_guard(m_chunk_selector_mtx); + // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent pg_id_hint, "identical layout" will change it + pg_id_t pg_id = 0; + auto& pg_id_hint = hint.pdev_id_hint; + if (!pg_id_hint.has_value()) { + LOGWARNMOD(homeobject, "should not allocated a chunk without exiting pg_id in hint!"); + return nullptr; } else { - pdevID = pdevIdHint.value(); + pg_id = pg_id_hint.value(); } - auto it = m_per_dev_heap.find(pdevID); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", pdevID); + auto it = m_per_pg_heap.find(pg_id); + if (it == m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return nullptr; } @@ -99,29 +88,29 @@ csharedChunk HeapChunkSelector::select_chunk(homestore::blk_count_t count, const avalableBlkCounter.fetch_sub(vchunk.available_blks()); remove_chunk_from_defrag_heap(vchunk.get_chunk_id()); } else { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", pdevID); + LOGWARNMOD(homeobject, "no available chunks left for pg {}", pg_id); } return vchunk.get_internal_chunk(); } -csharedChunk HeapChunkSelector::select_specific_chunk(const chunk_num_t chunkID) { +csharedChunk HeapChunkSelector::select_specific_chunk(const pg_id_t pg_id, const chunk_num_t chunkID) { if (m_chunks.find(chunkID) == m_chunks.end()) { // sanity check LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); return nullptr; } - auto const pdevID = VChunk(m_chunks[chunkID]).get_pdev_id(); - auto it = m_per_dev_heap.find(pdevID); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", pdevID); + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_heap.find(pg_id); + if (pg_it == m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return nullptr; } - auto vchunk = VChunk(nullptr); - auto& heap = it->second->m_heap; - if (auto lock_guard = std::lock_guard< std::mutex >(it->second->mtx); !heap.empty()) { + VChunk vchunk(nullptr); + auto& heap = pg_it->second->m_heap; + if (auto lock_guard = std::lock_guard< std::mutex >(pg_it->second->mtx); !heap.empty()) { std::vector< VChunk > chunks; chunks.reserve(heap.size()); while (!heap.empty()) { @@ -133,14 +122,13 @@ csharedChunk HeapChunkSelector::select_specific_chunk(const chunk_num_t chunkID) } chunks.push_back(std::move(c)); } - for (auto& c : chunks) { heap.emplace(c); } } if (vchunk.get_internal_chunk()) { - auto& avalableBlkCounter = it->second->available_blk_count; + auto& avalableBlkCounter = pg_it->second->available_blk_count; avalableBlkCounter.fetch_sub(vchunk.available_blks()); remove_chunk_from_defrag_heap(vchunk.get_chunk_id()); } @@ -148,20 +136,21 @@ csharedChunk HeapChunkSelector::select_specific_chunk(const chunk_num_t chunkID) return vchunk.get_internal_chunk(); } +// Temporarily commented out, the subsequent GC implementation needs to be adapted to fix pg size // most_defrag_chunk will only be called when GC is triggered, and will return the chunk with the most // defrag blocks csharedChunk HeapChunkSelector::most_defrag_chunk() { - chunk_num_t chunkID{0}; + // chunk_num_t chunkID{0}; // the chunk might be seleted for creating shard. if this happens, we need to select another chunk - for (;;) { - { - std::lock_guard< std::mutex > lg(m_defrag_mtx); - if (m_defrag_heap.empty()) break; - chunkID = m_defrag_heap.top().get_chunk_id(); - } - auto chunk = select_specific_chunk(chunkID); - if (chunk) return chunk; - } + // for (;;) { + // { + // std::lock_guard< std::mutex > lg(m_defrag_mtx); + // if (m_defrag_heap.empty()) break; + // chunkID = m_defrag_heap.top().get_chunk_id(); + // } + // auto chunk = select_specific_chunk(chunkID); + // if (chunk) return chunk; + // } return nullptr; } @@ -186,22 +175,155 @@ void HeapChunkSelector::foreach_chunks(std::function< void(csharedChunk&) >&& cb [cb = std::move(cb)](auto& p) { cb(p.second); }); } -void HeapChunkSelector::release_chunk(const chunk_num_t chunkID) { - const auto& it = m_chunks.find(chunkID); - if (it == m_chunks.end()) { +void HeapChunkSelector::release_chunk(const pg_id_t pg_id, const chunk_num_t chunkID) { + std::shared_lock lock_guard(m_chunk_selector_mtx); + if (m_chunks.find(chunkID) == m_chunks.end()) { // sanity check LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); - } else { - add_chunk_internal(chunkID); + return; + } + + auto pg_it = m_per_pg_heap.find(pg_id); + if (pg_it == m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return; } + + const auto& chunk = m_chunks[chunkID]; + VChunk vchunk(chunk); + { + std::lock_guard< std::mutex > l(pg_it->second->mtx); + auto& pg_heap = pg_it->second->m_heap; + pg_heap.emplace(chunk); + } + auto& avalableBlkCounter = pg_it->second->available_blk_count; + avalableBlkCounter += vchunk.available_blks(); + +} + +uint32_t HeapChunkSelector::get_chunk_size() const { + const auto& chunk = m_chunks.begin()->second; + auto vchunk = VChunk(chunk); + return vchunk.size(); } -void HeapChunkSelector::build_per_dev_chunk_heap(const std::unordered_set< chunk_num_t >& excludingChunks) { - for (const auto& p : m_chunks) { +std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, u_int64_t pg_size) { + std::unique_lock lock_guard(m_chunk_selector_mtx); + if (m_per_pg_heap.find(pg_id) != m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "PG had already created, pg_id {}", pg_id); + return std::nullopt; + } + + const auto chunk_size = get_chunk_size(); + const uint32_t num_chunk = sisl::round_down(pg_size, chunk_size) / chunk_size; + + //Select a pdev with the most available num chunk + auto &&most_avail_dev_it = + std::max_element(m_per_dev_heap.begin(), m_per_dev_heap.end(), + [](const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& lhs, + const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& rhs) { + return lhs.second->size() < rhs.second->size(); + }); + auto& pdev_heap = most_avail_dev_it->second; + if (num_chunk > pdev_heap->size()) { + LOGWARNMOD(homeobject, "Pdev has no enough space to create pg {} with num_chunk {}", pg_id, num_chunk); + return std::nullopt; + } + auto vchunk = VChunk(nullptr); + auto it = m_per_pg_heap.emplace(pg_id, std::make_shared< ChunkHeap >()).first; + auto v2r_vector = m_v2r_chunk_map.emplace(pg_id, std::make_shared< std::vector < chunk_num_t > >()).first->second; + auto r2v_map = m_r2v_chunk_map.emplace(pg_id, std::make_shared< ChunkIdMap >()).first->second; + + auto& pg_heap = it->second; + std::scoped_lock lock(pdev_heap->mtx, pg_heap->mtx); + v2r_vector->reserve(num_chunk); + for (chunk_num_t i = 0; i < num_chunk; ++i) { + vchunk = pdev_heap->m_heap.top(); + //sanity check + RELEASE_ASSERT(vchunk.get_total_blks() == vchunk.available_blks(), "vchunk should be empty"); + pdev_heap->m_heap.pop(); + pdev_heap->available_blk_count -= vchunk.available_blks(); + + pg_heap->m_heap.emplace(vchunk); + pg_heap->m_total_blks += vchunk.get_total_blks(); + pg_heap->available_blk_count += vchunk.available_blks(); + // v_chunk_id start from 0. + chunk_num_t v_chunk_id = i; + chunk_num_t r_chunk_id = vchunk.get_chunk_id(); + v2r_vector->emplace_back(r_chunk_id); + r2v_map->emplace(r_chunk_id, v_chunk_id); + } + + return num_chunk; +} + +void HeapChunkSelector::set_pg_chunks(pg_id_t pg_id, std::vector&& chunk_ids) { + std::unique_lock lock_guard(m_chunk_selector_mtx); + if (m_v2r_chunk_map.find(pg_id) != m_v2r_chunk_map.end()) { + LOGWARNMOD(homeobject, "PG {} had been recovered", pg_id); + return; + } + + auto v2r_vector = m_v2r_chunk_map.emplace(pg_id, std::make_shared< std::vector < chunk_num_t > >(std::move(chunk_ids))).first->second; + auto r2v_map = m_r2v_chunk_map.emplace(pg_id, std::make_shared< ChunkIdMap >()).first->second; + + for (chunk_num_t i = 0; i < v2r_vector->size(); ++i) { + // v_chunk_id start from 0. + chunk_num_t v_chunk_id = i; + chunk_num_t r_chunk_id = (*v2r_vector)[i]; + r2v_map->emplace(r_chunk_id, v_chunk_id); + } +} + +void HeapChunkSelector::recover_per_dev_chunk_heap() { + std::unique_lock lock_guard(m_chunk_selector_mtx); + for (const auto& [chunk_id, _] : m_chunks) { bool add_to_heap = true; - if (excludingChunks.find(p.first) != excludingChunks.end()) { add_to_heap = false; } - add_chunk_internal(p.first, add_to_heap); - }; + for (const auto& [_, chunk_map] : m_r2v_chunk_map) { + if (chunk_map->find(chunk_id) != chunk_map->end()) { + add_to_heap = false; + break; + } + } + add_chunk_internal(chunk_id, add_to_heap); + + } +} + +void HeapChunkSelector::recover_pg_chunk_heap(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excludingChunks) +{ + std::unique_lock lock_guard(m_chunk_selector_mtx); + if (m_per_pg_heap.find(pg_id) != m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "Pg_heap {} had been recovered", pg_id); + return; + } + auto it = m_v2r_chunk_map.find(pg_id); + if (it == m_v2r_chunk_map.end()) { + LOGWARNMOD(homeobject, "Pg_chunk_map {} had never been recovered", pg_id); + return; + } + const auto& chunk_ids = it->second; + auto& pg_heap = m_per_pg_heap.emplace(pg_id, std::make_shared< ChunkHeap >()).first->second; + for (const auto& chunk_id : *chunk_ids) { + if (excludingChunks.find(chunk_id) == excludingChunks.end()) { + const auto& chunk = m_chunks[chunk_id]; + auto vchunk = VChunk(chunk); + pg_heap->m_heap.emplace(vchunk); + pg_heap->m_total_blks += vchunk.get_total_blks(); + pg_heap->available_blk_count += vchunk.available_blks(); + } + } +} + +std::shared_ptr< const std::vector > HeapChunkSelector::get_pg_chunks(pg_id_t pg_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto it = m_v2r_chunk_map.find(pg_id); + if (it != m_v2r_chunk_map.end()) { + return it->second; + } else { + LOGWARNMOD(homeobject, "PG {} had never been created", pg_id); + return nullptr; + } } homestore::blk_alloc_hints HeapChunkSelector::chunk_to_hints(chunk_num_t chunk_id) const { @@ -217,6 +339,7 @@ homestore::blk_alloc_hints HeapChunkSelector::chunk_to_hints(chunk_num_t chunk_i // return the maximum number of chunks that can be allocated on pdev uint32_t HeapChunkSelector::most_avail_num_chunks() const { + std::shared_lock lock_guard(m_chunk_selector_mtx); uint32_t max_avail_num_chunks = 0ul; for (auto const& [_, pdev_heap] : m_per_dev_heap) { max_avail_num_chunks = std::max(max_avail_num_chunks, pdev_heap->size()); @@ -226,6 +349,7 @@ uint32_t HeapChunkSelector::most_avail_num_chunks() const { } uint32_t HeapChunkSelector::avail_num_chunks(uint32_t dev_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); auto it = m_per_dev_heap.find(dev_id); if (it == m_per_dev_heap.end()) { LOGWARNMOD(homeobject, "No pdev found for pdev {}", dev_id); @@ -238,6 +362,7 @@ uint32_t HeapChunkSelector::avail_num_chunks(uint32_t dev_id) const { uint32_t HeapChunkSelector::total_chunks() const { return m_chunks.size(); } uint64_t HeapChunkSelector::avail_blks(std::optional< uint32_t > dev_it) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); if (!dev_it.has_value()) { uint64_t max_avail_blks = 0ull; for (auto const& [_, heap] : m_per_dev_heap) { @@ -257,6 +382,7 @@ uint64_t HeapChunkSelector::avail_blks(std::optional< uint32_t > dev_it) const { } uint64_t HeapChunkSelector::total_blks(uint32_t dev_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); auto it = m_per_dev_heap.find(dev_id); if (it == m_per_dev_heap.end()) { LOGWARNMOD(homeobject, "No pdev found for pdev {}", dev_id); diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h index 1ccf5d15..259ecfb5 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.h +++ b/src/lib/homestore_backend/heap_chunk_selector.h @@ -35,9 +35,10 @@ class HeapChunkSelector : public homestore::ChunkSelector { using VChunkHeap = std::priority_queue< VChunk, std::vector< VChunk >, VChunkComparator >; using VChunkDefragHeap = std::priority_queue< VChunk, std::vector< VChunk >, VChunkDefragComparator >; + using ChunkIdMap = std::unordered_map < homestore::chunk_num_t, homestore::chunk_num_t >; // used for real chunk id -> virtual chunk id map using chunk_num_t = homestore::chunk_num_t; - struct PerDevHeap { + struct ChunkHeap { std::mutex mtx; VChunkHeap m_heap; std::atomic_size_t available_blk_count; @@ -46,22 +47,41 @@ class HeapChunkSelector : public homestore::ChunkSelector { }; void add_chunk(csharedChunk&) override; + void foreach_chunks(std::function< void(csharedChunk&) >&& cb) override; + csharedChunk select_chunk([[maybe_unused]] homestore::blk_count_t nblks, const homestore::blk_alloc_hints& hints); // this function will be used by GC flow or recovery flow to mark one specific chunk to be busy, caller should be // responsible to use release_chunk() interface to release it when no longer to use the chunk anymore. - csharedChunk select_specific_chunk(const chunk_num_t); + csharedChunk select_specific_chunk(const pg_id_t pg_id, const chunk_num_t); // this function will be used by GC flow to select a chunk for GC csharedChunk most_defrag_chunk(); // this function is used to return a chunk back to ChunkSelector when sealing a shard, and will only be used by // Homeobject. - void release_chunk(const chunk_num_t); + void release_chunk(const pg_id_t pg_id, const chunk_num_t); + + /** + * select chunks for pg, chunks need to be in same pdev. + * + * @param pg_id The ID of the pg. + * @param pg_size The fix pg size. + * @return An optional uint32_t value representing num_chunk, or std::nullopt if no space left. + */ + std::optional< uint32_t > select_chunks_for_pg(pg_id_t pg_id, u_int64_t pg_size); + + std::shared_ptr< const std::vector > get_pg_chunks(pg_id_t pg_id) const; + + // this should be called on each pg meta blk found + void set_pg_chunks(pg_id_t pg_id, std::vector&& chunk_ids); + + // this should be called after all pg meta blk recovered + void recover_per_dev_chunk_heap(); // this should be called after ShardManager is initialized and get all the open shards - void build_per_dev_chunk_heap(const std::unordered_set< chunk_num_t >& excludingChunks); + void recover_pg_chunk_heap(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excludingChunks); /** * Retrieves the block allocation hints for a given chunk. @@ -112,12 +132,22 @@ class HeapChunkSelector : public homestore::ChunkSelector { */ uint32_t total_chunks() const; + uint32_t get_chunk_size() const; + private: - std::unordered_map< uint32_t, std::shared_ptr< PerDevHeap > > m_per_dev_heap; + std::unordered_map< uint32_t, std::shared_ptr< ChunkHeap > > m_per_dev_heap; + std::unordered_map< pg_id_t, std::shared_ptr< ChunkHeap > > m_per_pg_heap; + + // These mappings ensure "identical layout" by providing bidirectional indexing between virtual and real chunk IDs. + // m_v2r_chunk_map: Maps each pg_id to a vector of real chunk IDs (r_chunk_id). The index in the vector corresponds to the virtual chunk ID (v_chunk_id). + std::unordered_map< pg_id_t, std::shared_ptr< std::vector > > m_v2r_chunk_map; + // m_r2v_chunk_map: Maps each pg_id to a map that inversely maps real chunk IDs (r_chunk_id) to virtual chunk IDs (v_chunk_id). + std::unordered_map< pg_id_t, std::shared_ptr< ChunkIdMap > > m_r2v_chunk_map; // hold all the chunks , selected or not std::unordered_map< chunk_num_t, csharedChunk > m_chunks; + mutable std::shared_mutex m_chunk_selector_mtx; void add_chunk_internal(const chunk_num_t, bool add_to_heap = true); VChunkDefragHeap m_defrag_heap; diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp index b313a508..85945c91 100644 --- a/src/lib/homestore_backend/hs_homeobject.cpp +++ b/src/lib/homestore_backend/hs_homeobject.cpp @@ -226,7 +226,7 @@ void HSHomeObject::on_replica_restart() { [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) { on_pg_meta_blk_found(std::move(buf), voidptr_cast(mblk)); }, - nullptr, true); + [this](bool success) { on_pg_meta_blk_recover_completed(success); }, true); HomeStore::instance()->meta_service().read_sub_sb(_pg_meta_name); // recover shard diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index 357f0317..d2a46892 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -78,15 +78,24 @@ class HSHomeObject : public HomeObjectImpl { struct pg_info_superblk { pg_id_t id; uint32_t num_members; + uint32_t num_chunks; peer_id_t replica_set_uuid; + uint64_t pg_size; homestore::uuid_t index_table_uuid; blob_id_t blob_sequence_num; uint64_t active_blob_count; // Total number of active blobs uint64_t tombstone_blob_count; // Total number of tombstones uint64_t total_occupied_blk_count; // Total number of occupied blocks - pg_members members[1]; // ISO C++ forbids zero-size array + char data[1]; // ISO C++ forbids zero-size array + // Data layout inside 'data': + // First, an array of 'pg_members' structures: + // | pg_members[0] | pg_members[1] | ... | pg_members[num_members-1] | + // Immediately followed by an array of 'chunk_num_t' values (representing r_chunk_ids): + // | chunk_num_t[0] | chunk_num_t[1] | ... | chunk_num_t[num_chunks-1] | + // Here, 'chunk_num_t[i]' represents the r_chunk_id for the v_chunk_id 'i', where v_chunk_id starts from 0 and increases sequentially. - uint32_t size() const { return sizeof(pg_info_superblk) + ((num_members - 1) * sizeof(pg_members)); } + + uint32_t size() const { return sizeof(pg_info_superblk) - sizeof(char) + num_members * sizeof(pg_members) + num_chunks * sizeof(homestore::chunk_num_t); } static std::string name() { return _pg_meta_name; } pg_info_superblk() = default; @@ -95,14 +104,24 @@ class HSHomeObject : public HomeObjectImpl { pg_info_superblk& operator=(pg_info_superblk const& rhs) { id = rhs.id; num_members = rhs.num_members; + num_chunks = rhs.num_chunks; + pg_size = rhs.pg_size; replica_set_uuid = rhs.replica_set_uuid; index_table_uuid = rhs.index_table_uuid; blob_sequence_num = rhs.blob_sequence_num; - memcpy(members, rhs.members, sizeof(pg_members) * num_members); + + memcpy(get_pg_members_mutable(), rhs.get_pg_members(), sizeof(pg_members) * num_members); + memcpy(get_chunk_ids_mutable(), rhs.get_chunk_ids(), sizeof(homestore::chunk_num_t) * num_chunks); return *this; } void copy(pg_info_superblk const& rhs) { *this = rhs; } + + pg_members* get_pg_members_mutable() { return reinterpret_cast(data); } + const pg_members* get_pg_members() const { return reinterpret_cast(data); } + + homestore::chunk_num_t* get_chunk_ids_mutable() { return reinterpret_cast(data + num_members * sizeof(pg_members)); } + const homestore::chunk_num_t* get_chunk_ids() const { return reinterpret_cast(data + num_members * sizeof(pg_members)); } }; struct DataHeader { @@ -195,7 +214,7 @@ class HSHomeObject : public HomeObjectImpl { std::shared_ptr< BlobIndexTable > index_table_; PGMetrics metrics_; - HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table); + HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, std::shared_ptr< const std::vector > pg_chunk_ids); HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev); ~HS_PG() override = default; @@ -335,6 +354,7 @@ class HSHomeObject : public HomeObjectImpl { // recover part void register_homestore_metablk_callback(); void on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie); + void on_pg_meta_blk_recover_completed(bool success); void on_shard_meta_blk_found(homestore::meta_blk* mblk, sisl::byte_view buf); void on_shard_meta_blk_recover_completed(bool success); diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 5a90bb93..0cd9f749 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -60,6 +60,15 @@ PGManager::NullAsyncResult HSHomeObject::_create_pg(PGInfo&& pg_info, std::set< auto pg_id = pg_info.id; if (auto lg = std::shared_lock(_pg_lock); _pg_map.end() != _pg_map.find(pg_id)) return folly::Unit(); + const auto most_avail_num_chunks = chunk_selector()->most_avail_num_chunks(); + const auto chunk_size = chunk_selector()->get_chunk_size(); + const auto needed_num_chunks = sisl::round_down(pg_info.size, chunk_size) / chunk_size; + if (needed_num_chunks > most_avail_num_chunks) { + LOGW("No enough space to create pg, pg_id {}, needed_num_chunks {}, most_avail_num_chunks {}", pg_id, + needed_num_chunks, most_avail_num_chunks); + return folly::makeUnexpected(PGError::NO_SPACE_LEFT); + } + pg_info.replica_set_uuid = boost::uuids::random_generator()(); return hs_repl_service() .create_repl_dev(pg_info.replica_set_uuid, peers) @@ -127,12 +136,21 @@ void HSHomeObject::on_create_pg_message_commit(int64_t lsn, sisl::blob const& he return; } + // select chunks for pg + auto const num_chunk = chunk_selector()->select_chunks_for_pg(pg_id, pg_info.size); + if (!num_chunk.has_value()) { + LOGW("select chunks for pg failed, pg_id {}", pg_id); + if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(PGError::NO_SPACE_LEFT)); } + return; + } + auto chunk_ids = chunk_selector()->get_pg_chunks(pg_id); + // create index table and pg // TODO create index table during create shard. auto index_table = create_index_table(); auto uuid_str = boost::uuids::to_string(index_table->uuid()); - auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table); + auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table, chunk_ids); std::scoped_lock lock_guard(index_lock_); RELEASE_ASSERT(index_table_pg_map_.count(uuid_str) == 0, "duplicate index table found"); index_table_pg_map_[uuid_str] = PgIndexTable{pg_id, index_table}; @@ -193,11 +211,11 @@ void HSHomeObject::on_pg_replace_member(homestore::group_id_t group_id, const re pg->pg_info_.members.emplace(PGMember(member_in.id, member_in.name, member_in.priority)); uint32_t i{0}; + pg_members* sb_members = hs_pg->pg_sb_->get_pg_members_mutable(); for (auto const& m : pg->pg_info_.members) { - hs_pg->pg_sb_->members[i].id = m.id; - std::strncpy(hs_pg->pg_sb_->members[i].name, m.name.c_str(), - std::min(m.name.size(), pg_members::max_name_len)); - hs_pg->pg_sb_->members[i].priority = m.priority; + sb_members[i].id = m.id; + std::strncpy(sb_members[i].name, m.name.c_str(), std::min(m.name.size(), pg_members::max_name_len)); + sb_members[i].priority = m.priority; ++i; } @@ -226,6 +244,7 @@ void HSHomeObject::add_pg_to_map(unique< HS_PG > hs_pg) { std::string HSHomeObject::serialize_pg_info(const PGInfo& pginfo) { nlohmann::json j; j["pg_info"]["pg_id_t"] = pginfo.id; + j["pg_info"]["pg_size"] = pginfo.size; j["pg_info"]["repl_uuid"] = boost::uuids::to_string(pginfo.replica_set_uuid); nlohmann::json members_j{}; @@ -244,6 +263,7 @@ PGInfo HSHomeObject::deserialize_pg_info(const unsigned char* json_str, size_t s auto pg_json = nlohmann::json::parse(json_str, json_str + size); PGInfo pg_info(pg_json["pg_info"]["pg_id_t"].get< pg_id_t >()); + pg_info.size = pg_json["pg_info"]["pg_size"].get< u_int64_t >(); pg_info.replica_set_uuid = boost::uuids::string_generator()(pg_json["pg_info"]["repl_uuid"].get< std::string >()); for (auto const& m : pg_json["pg_info"]["members"]) { @@ -267,6 +287,8 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c return; } auto pg_id = pg_sb->id; + std::vector chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks); + chunk_selector_->set_pg_chunks(pg_id, std::move(chunk_ids)); auto uuid_str = boost::uuids::to_string(pg_sb->index_table_uuid); auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value())); // During PG recovery check if index is already recoverd else @@ -280,37 +302,51 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c add_pg_to_map(std::move(hs_pg)); } +void HSHomeObject::on_pg_meta_blk_recover_completed(bool success) { + chunk_selector_->recover_per_dev_chunk_heap(); +} + PGInfo HSHomeObject::HS_PG::pg_info_from_sb(homestore::superblk< pg_info_superblk > const& sb) { PGInfo pginfo{sb->id}; + const pg_members* sb_members = sb->get_pg_members(); for (uint32_t i{0}; i < sb->num_members; ++i) { - pginfo.members.emplace(sb->members[i].id, std::string(sb->members[i].name), sb->members[i].priority); + pginfo.members.emplace(sb_members[i].id, std::string(sb_members[i].name), sb_members[i].priority); } + pginfo.size = sb->pg_size; pginfo.replica_set_uuid = sb->replica_set_uuid; return pginfo; } -HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table) : +HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, std::shared_ptr< const std::vector > pg_chunk_ids) : PG{std::move(info)}, pg_sb_{_pg_meta_name}, repl_dev_{std::move(rdev)}, index_table_{std::move(index_table)}, metrics_{*this} { - pg_sb_.create(sizeof(pg_info_superblk) + ((pg_info_.members.size() - 1) * sizeof(pg_members))); + RELEASE_ASSERT(pg_chunk_ids != nullptr, "PG chunks null"); + const uint32_t num_chunks = pg_chunk_ids->size(); + pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members)+ num_chunks * sizeof(homestore::chunk_num_t)); pg_sb_->id = pg_info_.id; pg_sb_->num_members = pg_info_.members.size(); + pg_sb_->num_chunks = num_chunks; + pg_sb_->pg_size = pg_info_.size; pg_sb_->replica_set_uuid = repl_dev_->group_id(); pg_sb_->index_table_uuid = index_table_->uuid(); pg_sb_->active_blob_count = 0; pg_sb_->tombstone_blob_count = 0; pg_sb_->total_occupied_blk_count = 0; - uint32_t i{0}; + pg_members* pg_sb_members = pg_sb_->get_pg_members_mutable(); for (auto const& m : pg_info_.members) { - pg_sb_->members[i].id = m.id; - std::strncpy(pg_sb_->members[i].name, m.name.c_str(), std::min(m.name.size(), pg_members::max_name_len)); - pg_sb_->members[i].priority = m.priority; + pg_sb_members[i].id = m.id; + std::strncpy(pg_sb_members[i].name, m.name.c_str(), std::min(m.name.size(), pg_members::max_name_len)); + pg_sb_members[i].priority = m.priority; ++i; } + chunk_num_t* pg_sb_chunk_ids = pg_sb_->get_chunk_ids_mutable(); + for (i = 0; i < num_chunks; ++i) { + pg_sb_chunk_ids[i] = pg_chunk_ids->at(i); + } pg_sb_.write(); } diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 8378087b..938c64ec 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -324,12 +324,12 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom std::scoped_lock lock_guard(_shard_lock); shard_exist = (_shard_map.find(shard_info.id) != _shard_map.end()); } - if (!shard_exist) { add_new_shard_to_map(std::make_unique< HS_Shard >(shard_info, blkids.chunk_num())); // select_specific_chunk() will do something only when we are relaying journal after restart, during the // runtime flow chunk is already been be mark busy when we write the shard info to the repldev. - chunk_selector_->select_specific_chunk(blkids.chunk_num()); + auto pg_id = shard_info.placement_group; + chunk_selector_->select_specific_chunk(pg_id, blkids.chunk_num()); } if (ctx) { ctx->promise_.setValue(ShardManager::Result< ShardInfo >(shard_info)); } @@ -362,9 +362,10 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom } if (state == ShardInfo::State::SEALED) { + auto pg_id = shard_info.placement_group; auto chunk_id = get_shard_chunk(shard_info.id); RELEASE_ASSERT(chunk_id.has_value(), "Chunk id not found"); - chunk_selector()->release_chunk(chunk_id.value()); + chunk_selector()->release_chunk(pg_id, chunk_id.value()); update_shard_in_map(shard_info); } else LOGW("try to commit SEAL_SHARD_MSG but shard state is not sealed, shard_id: {}", shard_info.id); @@ -387,13 +388,15 @@ void HSHomeObject::on_shard_meta_blk_recover_completed(bool success) { std::unordered_set< homestore::chunk_num_t > excluding_chunks; std::scoped_lock lock_guard(_pg_lock); for (auto& pair : _pg_map) { + excluding_chunks.clear(); + excluding_chunks.reserve(pair.second->shards_.size()); for (auto& shard : pair.second->shards_) { if (shard->info.state == ShardInfo::State::OPEN) { excluding_chunks.emplace(d_cast< HS_Shard* >(shard.get())->sb_->chunk_id); } } + chunk_selector_->recover_pg_chunk_heap(pair.first, excluding_chunks); } - chunk_selector_->build_per_dev_chunk_heap(excluding_chunks); } void HSHomeObject::add_new_shard_to_map(ShardPtr&& shard) { diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 7f9164f5..ac3c6114 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -121,17 +121,10 @@ ReplicationStateMachine::get_blk_alloc_hints(sisl::blob const& header, uint32_t const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); switch (msg_header->msg_type) { case ReplicationMessageType::CREATE_SHARD_MSG: { - auto const [pg_found, shards_found, chunk_id] = home_object_->get_any_chunk_id(msg_header->pg_id); - if (!pg_found) { - LOGW("Requesting a chunk for an unknown pg={}, letting the caller retry after sometime", msg_header->pg_id); - return folly::makeUnexpected(homestore::ReplServiceError::RESULT_NOT_EXIST_YET); - } else if (!shards_found) { - // pg is empty without any shards, we leave the decision the HeapChunkSelector to select a pdev - // with most available space and then select one chunk based on that pdev - } else { - return home_object_->chunk_selector()->chunk_to_hints(chunk_id); - } - break; + // Since chunks are selected when a pg is created, the chunkselector selects one of the chunks owned by the pg + homestore::blk_alloc_hints hints; + hints.pdev_id_hint = msg_header->pg_id; // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent pg_id_hint, "identical layout" will change it + return hints; } case ReplicationMessageType::SEAL_SHARD_MSG: { diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index c0f3bb66..441475c8 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -61,6 +61,7 @@ class HomeObjectFixture : public ::testing::Test { auto memebers = g_helper->members(); auto name = g_helper->name(); auto info = homeobject::PGInfo(pg_id); + info.size = 50 * Mi; for (const auto& member : memebers) { if (replica_num == member.second) { // by default, leader is the first member @@ -293,6 +294,8 @@ class HomeObjectFixture : public ::testing::Test { EXPECT_EQ(lhs->id, rhs->id); EXPECT_EQ(lhs->num_members, rhs->num_members); + EXPECT_EQ(lhs->num_chunks, rhs->num_chunks); + EXPECT_EQ(lhs->pg_size, rhs->pg_size); EXPECT_EQ(lhs->replica_set_uuid, rhs->replica_set_uuid); EXPECT_EQ(lhs->index_table_uuid, rhs->index_table_uuid); EXPECT_EQ(lhs->blob_sequence_num, rhs->blob_sequence_num); @@ -301,9 +304,12 @@ class HomeObjectFixture : public ::testing::Test { EXPECT_EQ(lhs->total_occupied_blk_count, rhs->total_occupied_blk_count); EXPECT_EQ(lhs->tombstone_blob_count, rhs->tombstone_blob_count); for (uint32_t i = 0; i < lhs->num_members; i++) { - EXPECT_EQ(lhs->members[i].id, rhs->members[i].id); - EXPECT_EQ(lhs->members[i].priority, rhs->members[i].priority); - EXPECT_EQ(0, std::strcmp(lhs->members[i].name, rhs->members[i].name)); + EXPECT_EQ(lhs->get_pg_members()[i].id, rhs->get_pg_members()[i].id); + EXPECT_EQ(lhs->get_pg_members()[i].priority, rhs->get_pg_members()[i].priority); + EXPECT_EQ(0, std::strcmp(lhs->get_pg_members()[i].name, rhs->get_pg_members()[i].name)); + } + for (homestore::chunk_num_t i = 0; i < lhs->num_chunks; ++i) { + EXPECT_EQ(lhs->get_chunk_ids()[i], rhs->get_chunk_ids()[i]); } } diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 5358a56a..0f3a1c1f 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -1,5 +1,3 @@ -#include "lib/homestore_backend/heap_chunk_selector.h" - #include #include @@ -8,6 +6,11 @@ #include +#include "homeobject/common.hpp" +#define protected public +#define private public +#include "lib/homestore_backend/heap_chunk_selector.h" + SISL_LOGGING_DEF(HOMEOBJECT_LOG_MODS) SISL_LOGGING_INIT(HOMEOBJECT_LOG_MODS) SISL_OPTIONS_ENABLE(logging) @@ -35,9 +38,10 @@ class Chunk : public std::enable_shared_from_this< Chunk > { uint16_t get_chunk_id() const { return m_chunk_id; } - blk_num_t get_total_blks() const { return 0; } + blk_num_t get_total_blks() const { return m_available_blks; } void set_chunk_id(uint16_t chunk_id) { m_chunk_id = chunk_id; } const std::shared_ptr< Chunk > get_internal_chunk() { return shared_from_this(); } + uint64_t size() const { return 1 * Mi; } Chunk(uint32_t pdev_id, uint16_t chunk_id, uint32_t available_blks, uint32_t defrag_nblks) { m_available_blks = available_blks; @@ -69,6 +73,8 @@ uint16_t VChunk::get_chunk_id() const { return m_internal_chunk->get_chunk_id(); blk_num_t VChunk::get_total_blks() const { return m_internal_chunk->get_total_blks(); } +uint64_t VChunk::size() const { return m_internal_chunk->size(); } + cshared< Chunk > VChunk::get_internal_chunk() const { return m_internal_chunk->get_internal_chunk(); } } // namespace homestore @@ -90,10 +96,47 @@ class HeapChunkSelectorTest : public ::testing::Test { HCS.add_chunk(std::make_shared< Chunk >(3, 7, 1, 3)); HCS.add_chunk(std::make_shared< Chunk >(3, 8, 2, 2)); HCS.add_chunk(std::make_shared< Chunk >(3, 9, 3, 1)); - std::unordered_set< chunk_num_t > excludingChunks; - HCS.build_per_dev_chunk_heap(excludingChunks); + HCS.recover_per_dev_chunk_heap(); + prepare_pg(); }; + void prepare_pg() { + const uint32_t chunk_size = HCS.get_chunk_size(); // may problem + const u_int64_t pg_size = chunk_size * 3; + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + HCS.select_chunks_for_pg(pg_id, pg_size); + uint32_t last_pdev_id = 0; + // test pg heap + auto pg_heap_it = HCS.m_per_pg_heap.find(pg_id); + ASSERT_NE(pg_heap_it, HCS.m_per_pg_heap.end()); + ASSERT_EQ(pg_heap_it->second->size(), 3); + + // test chunk_map + auto v2r_chunk_map_it = HCS.m_v2r_chunk_map.find(pg_id); + ASSERT_NE(v2r_chunk_map_it, HCS.m_v2r_chunk_map.end()); + ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); + + auto r2v_chunk_map_it = HCS.m_r2v_chunk_map.find(pg_id); + ASSERT_NE(r2v_chunk_map_it, HCS.m_r2v_chunk_map.end()); + ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); + for (int i = 0; i < 3; ++i) { + auto r_chunk_id = v2r_chunk_map_it->second->at(i); + ASSERT_EQ(i, r2v_chunk_map_it->second->at(r_chunk_id)); + auto pdev_id = HCS.m_chunks[r_chunk_id]->get_pdev_id(); + if (last_pdev_id != 0) { + ASSERT_EQ(last_pdev_id, pdev_id); + } else { + last_pdev_id = pdev_id; + } + + auto pdev_it = HCS.m_per_dev_heap.find(pdev_id); + ASSERT_NE(pdev_it, HCS.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 0); + } + } + } + + public: HeapChunkSelector HCS; }; @@ -107,80 +150,144 @@ TEST_F(HeapChunkSelectorTest, test_for_each_chunk) { TEST_F(HeapChunkSelectorTest, test_select_chunk) { homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; - for (uint32_t i = 1; i < 4; i++) { - hints.pdev_id_hint = i; - for (int j = 3; j > 0; j--) { + auto chunk = HCS.select_chunk(count, hints); + ASSERT_EQ(chunk, nullptr); + + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + hints.pdev_id_hint = pg_id; // tmp bypass using pdev_id_hint present pg_id + for (int j = 3; j > 0; --j) { auto chunk = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk->get_pdev_id(), i); + ASSERT_NE(chunk, nullptr); + ASSERT_EQ(chunk->get_pdev_id(), pg_id); ASSERT_EQ(chunk->available_blks(), j); } } } + TEST_F(HeapChunkSelectorTest, test_select_specific_chunk) { - const chunk_num_t chunk_id = 3; - auto chunk = HCS.select_specific_chunk(chunk_id); - ASSERT_EQ(chunk->get_pdev_id(), 1); + const uint16_t pg_id = 1; + auto chunk_ids = HCS.get_pg_chunks(pg_id); + ASSERT_NE(chunk_ids, nullptr); + const chunk_num_t chunk_id = chunk_ids->at(0); + + auto chunk = HCS.select_specific_chunk(pg_id, chunk_id); ASSERT_EQ(chunk->get_chunk_id(), chunk_id); + auto pdev_id = chunk->get_pdev_id(); + + // make sure pg chunk map + auto pg_heap_it = HCS.m_per_pg_heap.find(pg_id); + ASSERT_NE(pg_heap_it, HCS.m_per_pg_heap.end()); + ASSERT_EQ(pg_heap_it->second->size(), 2); + + // test chunk_map stable + auto v2r_chunk_map_it = HCS.m_v2r_chunk_map.find(pg_id); + ASSERT_NE(v2r_chunk_map_it, HCS.m_v2r_chunk_map.end()); + ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); + + auto r2v_chunk_map_it = HCS.m_r2v_chunk_map.find(pg_id); + ASSERT_NE(r2v_chunk_map_it, HCS.m_r2v_chunk_map.end()); + ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); // select the rest chunks to make sure specific chunk does not exist in HeapChunkSelector anymore. homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; - for (uint32_t i = 1; i < 4; i++) { - hints.pdev_id_hint = i; - auto chunk_num = 3; - if (i == 1) { --chunk_num; } - for (int j = chunk_num; j > 0; j--) { - auto chunk = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk->get_pdev_id(), i); - ASSERT_EQ(chunk->available_blks(), j); - } + hints.pdev_id_hint = pg_id; + for (int j = 2; j > 0; --j) { + auto chunk = HCS.select_chunk(count, hints); + ASSERT_EQ(chunk->get_pdev_id(), pdev_id); } // release this chunk to HeapChunkSelector - HCS.release_chunk(chunk_id); - chunk = HCS.select_chunk(1, homestore::blk_alloc_hints()); + HCS.release_chunk(pg_id, chunk_id); + chunk = HCS.select_chunk(1, hints); ASSERT_EQ(1, chunk->get_pdev_id()); ASSERT_EQ(chunk_id, chunk->get_chunk_id()); -} -TEST_F(HeapChunkSelectorTest, test_most_defrag_chunk) { - for (uint32_t i = 1; i < 6; i++) { - auto chunk = HCS.most_defrag_chunk(); - // should always select the chunk with the most defrag blocks - ASSERT_EQ(chunk->get_chunk_id(), i); - } - - // after release a chunk with the most defrag blocks, most_defrag_chunk should select this chunk. - HCS.release_chunk(1); - auto chunk = HCS.most_defrag_chunk(); - ASSERT_EQ(chunk->get_chunk_id(), 1); } + TEST_F(HeapChunkSelectorTest, test_release_chunk) { homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; - hints.pdev_id_hint = 1; + const uint16_t pg_id = 1; + hints.pdev_id_hint = pg_id; auto chunk1 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk1->get_pdev_id(), 1); + auto pdev_id = chunk1->get_pdev_id(); + + ASSERT_EQ(chunk1->get_pdev_id(), pdev_id); ASSERT_EQ(chunk1->available_blks(), 3); auto chunk2 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk2->get_pdev_id(), 1); + ASSERT_EQ(chunk2->get_pdev_id(), pdev_id); ASSERT_EQ(chunk2->available_blks(), 2); - HCS.release_chunk(chunk1->get_chunk_id()); - HCS.release_chunk(chunk2->get_chunk_id()); + HCS.release_chunk(pg_id, chunk1->get_chunk_id()); + HCS.release_chunk(pg_id, chunk2->get_chunk_id()); chunk1 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk1->get_pdev_id(), 1); + ASSERT_EQ(chunk1->get_pdev_id(), pdev_id); ASSERT_EQ(chunk1->available_blks(), 3); chunk2 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk2->get_pdev_id(), 1); + ASSERT_EQ(chunk2->get_pdev_id(), pdev_id); ASSERT_EQ(chunk2->available_blks(), 2); } +TEST_F(HeapChunkSelectorTest, test_recovery) { + HeapChunkSelector HCS_recovery; + HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 1, 1, 9)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 2, 2, 8)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 3, 3, 7)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 4, 1, 6)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 5, 2, 5)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 6, 3, 4)); + + std::vector chunk_ids {1,2,3}; + const uint16_t pg_id = 1; + // test recover chunk map + HCS_recovery.set_pg_chunks(pg_id, std::move(chunk_ids)); + auto v2r_chunk_map_it = HCS_recovery.m_v2r_chunk_map.find(pg_id); + ASSERT_NE(v2r_chunk_map_it, HCS_recovery.m_v2r_chunk_map.end()); + ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); + + auto r2v_chunk_map_it = HCS_recovery.m_r2v_chunk_map.find(pg_id); + ASSERT_NE(r2v_chunk_map_it, HCS_recovery.m_r2v_chunk_map.end()); + ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); + // test recover pdev map + HCS_recovery.recover_per_dev_chunk_heap(); + auto pdev_it = HCS_recovery.m_per_dev_heap.find(1); + ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 0); + + pdev_it = HCS_recovery.m_per_dev_heap.find(2); + ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 3); + auto &pdev_heap = pdev_it->second->m_heap; + auto vchunk = homestore::VChunk(nullptr); + for (int i = 6; i > 3; --i) { + vchunk = pdev_heap.top(); + pdev_heap.pop(); + ASSERT_EQ(vchunk.get_chunk_id(), i); + } + + // test recover pg heap + std::unordered_set< homestore::chunk_num_t > excluding_chunks; + excluding_chunks.emplace(1); + HCS_recovery.recover_pg_chunk_heap(pg_id, excluding_chunks); + auto pg_heap_it = HCS_recovery.m_per_pg_heap.find(pg_id); + ASSERT_NE(pg_heap_it, HCS_recovery.m_per_pg_heap.end()); + ASSERT_EQ(pg_heap_it->second->size(), 2); + + homestore::blk_alloc_hints hints; + hints.pdev_id_hint = pg_id; + for (int j = 3; j > 1; --j) { + auto chunk = HCS_recovery.select_chunk(1, hints); + ASSERT_EQ(chunk->get_pdev_id(), 1); + ASSERT_EQ(chunk->available_blks(), j); + } +} + int main(int argc, char* argv[]) { int parsed_argc = argc; ::testing::InitGoogleTest(&parsed_argc, argv); From 20d84b4acf51b040b9928394101b352ed210cf4e Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Fri, 15 Nov 2024 09:32:41 +0800 Subject: [PATCH 2/5] support spare replicas in raft test framework (#226) this PR aims to support spare replicas in raft test framework , which is essential for testing pg move. what`s more, a basic replace_member UT is added --- conanfile.py | 2 +- src/include/homeobject/pg_manager.hpp | 2 +- src/lib/homestore_backend/CMakeLists.txt | 12 ++ src/lib/homestore_backend/hs_homeobject.cpp | 5 + src/lib/homestore_backend/hs_homeobject.hpp | 26 +++- src/lib/homestore_backend/hs_pg_manager.cpp | 12 +- src/lib/homestore_backend/index_kv.cpp | 5 +- .../replication_state_machine.cpp | 22 ++- .../homestore_backend/tests/CMakeLists.txt | 9 ++ .../tests/homeobj_fixture.hpp | 76 +++++++-- .../tests/hs_repl_test_helper.hpp | 23 ++- .../tests/test_homestore_backend.cpp | 8 +- .../tests/test_homestore_backend_dynamic.cpp | 147 ++++++++++++++++++ 13 files changed, 299 insertions(+), 50 deletions(-) create mode 100644 src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp diff --git a/conanfile.py b/conanfile.py index fe81d8e7..f2702f34 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.7" + version = "2.1.8" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/include/homeobject/pg_manager.hpp b/src/include/homeobject/pg_manager.hpp index 698ef9eb..8c74f3dc 100644 --- a/src/include/homeobject/pg_manager.hpp +++ b/src/include/homeobject/pg_manager.hpp @@ -41,7 +41,7 @@ struct PGInfo { pg_id_t id; mutable MemberSet members; peer_id_t replica_set_uuid; - u_int64_t size; + uint64_t size; auto operator<=>(PGInfo const& rhs) const { return id <=> rhs.id; } auto operator==(PGInfo const& rhs) const { return id == rhs.id; } diff --git a/src/lib/homestore_backend/CMakeLists.txt b/src/lib/homestore_backend/CMakeLists.txt index e8b88994..bd3d224f 100644 --- a/src/lib/homestore_backend/CMakeLists.txt +++ b/src/lib/homestore_backend/CMakeLists.txt @@ -57,3 +57,15 @@ target_link_libraries(homestore_test PUBLIC ) add_test(NAME HomestoreTest COMMAND homestore_test -csv error --executor immediate --config_path ./ --override_config homestore_config.consensus.snapshot_freq_distance:0) set_property(TEST HomestoreTest PROPERTY RUN_SERIAL 1) + +add_executable (homestore_test_dynamic) +target_sources(homestore_test_dynamic PRIVATE + $ +) +target_link_libraries(homestore_test_dynamic PUBLIC + homeobject_homestore + ${COMMON_TEST_DEPS} +) + +add_test(NAME HomestoreTestDynamic COMMAND homestore_test_dynamic -csv error --executor immediate --config_path ./ --override_config homestore_config.consensus.snapshot_freq_distance:0) + diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp index 85945c91..34d0bc89 100644 --- a/src/lib/homestore_backend/hs_homeobject.cpp +++ b/src/lib/homestore_backend/hs_homeobject.cpp @@ -323,4 +323,9 @@ sisl::io_blob_safe& HSHomeObject::get_pad_buf(uint32_t pad_len) { return zpad_bufs_[idx]; } +bool HSHomeObject::pg_exists(pg_id_t pg_id) const { + std::shared_lock lock_guard(_pg_lock); + return _pg_map.contains(pg_id); +} + } // namespace homeobject diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index d2a46892..3a723cf5 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -92,10 +92,13 @@ class HSHomeObject : public HomeObjectImpl { // | pg_members[0] | pg_members[1] | ... | pg_members[num_members-1] | // Immediately followed by an array of 'chunk_num_t' values (representing r_chunk_ids): // | chunk_num_t[0] | chunk_num_t[1] | ... | chunk_num_t[num_chunks-1] | - // Here, 'chunk_num_t[i]' represents the r_chunk_id for the v_chunk_id 'i', where v_chunk_id starts from 0 and increases sequentially. + // Here, 'chunk_num_t[i]' represents the r_chunk_id for the v_chunk_id 'i', where v_chunk_id starts from 0 and + // increases sequentially. - - uint32_t size() const { return sizeof(pg_info_superblk) - sizeof(char) + num_members * sizeof(pg_members) + num_chunks * sizeof(homestore::chunk_num_t); } + uint32_t size() const { + return sizeof(pg_info_superblk) - sizeof(char) + num_members * sizeof(pg_members) + + num_chunks * sizeof(homestore::chunk_num_t); + } static std::string name() { return _pg_meta_name; } pg_info_superblk() = default; @@ -117,11 +120,15 @@ class HSHomeObject : public HomeObjectImpl { void copy(pg_info_superblk const& rhs) { *this = rhs; } - pg_members* get_pg_members_mutable() { return reinterpret_cast(data); } - const pg_members* get_pg_members() const { return reinterpret_cast(data); } + pg_members* get_pg_members_mutable() { return reinterpret_cast< pg_members* >(data); } + const pg_members* get_pg_members() const { return reinterpret_cast< const pg_members* >(data); } - homestore::chunk_num_t* get_chunk_ids_mutable() { return reinterpret_cast(data + num_members * sizeof(pg_members)); } - const homestore::chunk_num_t* get_chunk_ids() const { return reinterpret_cast(data + num_members * sizeof(pg_members)); } + homestore::chunk_num_t* get_chunk_ids_mutable() { + return reinterpret_cast< homestore::chunk_num_t* >(data + num_members * sizeof(pg_members)); + } + const homestore::chunk_num_t* get_chunk_ids() const { + return reinterpret_cast< const homestore::chunk_num_t* >(data + num_members * sizeof(pg_members)); + } }; struct DataHeader { @@ -214,7 +221,8 @@ class HSHomeObject : public HomeObjectImpl { std::shared_ptr< BlobIndexTable > index_table_; PGMetrics metrics_; - HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, std::shared_ptr< const std::vector > pg_chunk_ids); + HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, + std::shared_ptr< const std::vector< homestore::chunk_num_t > > pg_chunk_ids); HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev); ~HS_PG() override = default; @@ -447,6 +455,8 @@ class HSHomeObject : public HomeObjectImpl { */ std::tuple< bool, bool, homestore::chunk_num_t > get_any_chunk_id(pg_id_t pg); + bool pg_exists(pg_id_t pg_id) const; + cshared< HeapChunkSelector > chunk_selector() const { return chunk_selector_; } // Blob manager related. diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 0cd9f749..7e0fe734 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -287,7 +287,7 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c return; } auto pg_id = pg_sb->id; - std::vector chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks); + std::vector< chunk_num_t > chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks); chunk_selector_->set_pg_chunks(pg_id, std::move(chunk_ids)); auto uuid_str = boost::uuids::to_string(pg_sb->index_table_uuid); auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value())); @@ -302,9 +302,7 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c add_pg_to_map(std::move(hs_pg)); } -void HSHomeObject::on_pg_meta_blk_recover_completed(bool success) { - chunk_selector_->recover_per_dev_chunk_heap(); -} +void HSHomeObject::on_pg_meta_blk_recover_completed(bool success) { chunk_selector_->recover_per_dev_chunk_heap(); } PGInfo HSHomeObject::HS_PG::pg_info_from_sb(homestore::superblk< pg_info_superblk > const& sb) { PGInfo pginfo{sb->id}; @@ -317,7 +315,8 @@ PGInfo HSHomeObject::HS_PG::pg_info_from_sb(homestore::superblk< pg_info_superbl return pginfo; } -HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, std::shared_ptr< const std::vector > pg_chunk_ids) : +HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, + std::shared_ptr< const std::vector< chunk_num_t > > pg_chunk_ids) : PG{std::move(info)}, pg_sb_{_pg_meta_name}, repl_dev_{std::move(rdev)}, @@ -325,7 +324,8 @@ HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, share metrics_{*this} { RELEASE_ASSERT(pg_chunk_ids != nullptr, "PG chunks null"); const uint32_t num_chunks = pg_chunk_ids->size(); - pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members)+ num_chunks * sizeof(homestore::chunk_num_t)); + pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members) + + num_chunks * sizeof(homestore::chunk_num_t)); pg_sb_->id = pg_info_.id; pg_sb_->num_members = pg_info_.members.size(); pg_sb_->num_chunks = num_chunks; diff --git a/src/lib/homestore_backend/index_kv.cpp b/src/lib/homestore_backend/index_kv.cpp index 00e63bbc..d02e941e 100644 --- a/src/lib/homestore_backend/index_kv.cpp +++ b/src/lib/homestore_backend/index_kv.cpp @@ -114,7 +114,10 @@ void HSHomeObject::print_btree_index(pg_id_t pg_id) { shared< BlobIndexTable > HSHomeObject::get_index_table(pg_id_t pg_id) { std::shared_lock lock_guard(_pg_lock); auto iter = _pg_map.find(pg_id); - RELEASE_ASSERT(iter != _pg_map.end(), "PG not found"); + if (iter == _pg_map.end()) { + LOGW("PG not found for pg_id={} when getting inde table", pg_id); + return nullptr; + } auto hs_pg = static_cast< HSHomeObject::HS_PG* >(iter->second.get()); RELEASE_ASSERT(hs_pg->index_table_ != nullptr, "Index table not found for PG"); return hs_pg->index_table_; diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index ac3c6114..3ff25ad8 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -5,6 +5,8 @@ #include "generated/resync_pg_shard_generated.h" #include "generated/resync_blob_data_generated.h" +#include +#include namespace homeobject { void ReplicationStateMachine::on_commit(int64_t lsn, const sisl::blob& header, const sisl::blob& key, @@ -40,6 +42,16 @@ bool ReplicationStateMachine::on_pre_commit(int64_t lsn, sisl::blob const& heade // For shard creation, since homestore repldev inside will write shard header to data service first before this // function is called. So there is nothing is needed to do and we can get the binding chunk_id with the newly shard // from the blkid in on_commit() + if (ctx->op_code() == homestore::journal_type_t::HS_CTRL_REPLACE) { + LOGI("pre_commit replace member log entry, lsn:{}", lsn); + return true; + } + + if (ctx->op_code() == homestore::journal_type_t::HS_CTRL_DESTROY) { + LOGI("pre_commit destroy member log entry, lsn:{}", lsn); + return true; + } + const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); if (msg_header->corrupted()) { LOGE("corrupted message in pre_commit, lsn:{}", lsn); @@ -121,9 +133,17 @@ ReplicationStateMachine::get_blk_alloc_hints(sisl::blob const& header, uint32_t const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); switch (msg_header->msg_type) { case ReplicationMessageType::CREATE_SHARD_MSG: { + auto& pg_id = msg_header->pg_id; + // check whether the pg exists + if (!home_object_->pg_exists(pg_id)) { + LOGI("can not find pg {} when getting blk_alloc_hint", pg_id); + // TODO:: add error code to indicate the pg not found in homestore side + return folly::makeUnexpected(homestore::ReplServiceError::NO_SPACE_LEFT); + } // Since chunks are selected when a pg is created, the chunkselector selects one of the chunks owned by the pg homestore::blk_alloc_hints hints; - hints.pdev_id_hint = msg_header->pg_id; // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent pg_id_hint, "identical layout" will change it + hints.pdev_id_hint = pg_id; // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent + // pg_id_hint, "identical layout" will change it return hints; } diff --git a/src/lib/homestore_backend/tests/CMakeLists.txt b/src/lib/homestore_backend/tests/CMakeLists.txt index 21e2fe45..898e7380 100644 --- a/src/lib/homestore_backend/tests/CMakeLists.txt +++ b/src/lib/homestore_backend/tests/CMakeLists.txt @@ -17,7 +17,16 @@ target_link_libraries(homestore_tests ${COMMON_TEST_DEPS} ) + +add_library(homestore_tests_dynamic OBJECT) +target_sources(homestore_tests_dynamic PRIVATE test_homestore_backend_dynamic.cpp) +target_link_libraries(homestore_tests_dynamic + homeobject_homestore + ${COMMON_TEST_DEPS} + ) + add_executable (test_heap_chunk_selector) target_sources(test_heap_chunk_selector PRIVATE test_heap_chunk_selector.cpp ../heap_chunk_selector.cpp) target_link_libraries(test_heap_chunk_selector homestore::homestore ${COMMON_TEST_DEPS}) add_test(NAME HeapChunkSelectorTest COMMAND test_heap_chunk_selector) + diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index 441475c8..d54bac13 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -55,18 +55,36 @@ class HomeObjectFixture : public ::testing::Test { std::this_thread::sleep_for(std::chrono::seconds(5)); } - // schedule create_pg to replica_num - void create_pg(pg_id_t pg_id, uint32_t replica_num = 0) { - if (replica_num == g_helper->replica_num()) { - auto memebers = g_helper->members(); - auto name = g_helper->name(); + /** + * \brief create pg with a given id. + * + * \param pg_id pg id that will be newly created. + * \param leader_replica_num the replica number that will be the initial leader of repl dev of this pg + * \param excluding_replicas_in_pg the set of replicas that will be excluded in the initial members of this pg. this + * means all the started replicas that are not in this set will be the initial members of this pg. + */ + void create_pg(pg_id_t pg_id, uint8_t leader_replica_num = 0, + std::optional< std::unordered_set< uint8_t > > excluding_replicas_in_pg = std::nullopt) { + std::unordered_set< uint8_t > excluding_pg_replicas; + if (excluding_replicas_in_pg.has_value()) excluding_pg_replicas = excluding_replicas_in_pg.value(); + if (excluding_pg_replicas.contains(leader_replica_num)) + RELEASE_ASSERT(false, "fail to create pg, leader_replica_num {} is excluded in the pg", leader_replica_num); + + auto my_replica_num = g_helper->replica_num(); + if (excluding_pg_replicas.contains(my_replica_num)) return; + + auto pg_size = SISL_OPTIONS["pg_size"].as< uint64_t >() * Mi; + auto name = g_helper->test_name(); + + if (leader_replica_num == my_replica_num) { + auto members = g_helper->members(); auto info = homeobject::PGInfo(pg_id); - info.size = 50 * Mi; - for (const auto& member : memebers) { - if (replica_num == member.second) { + info.size = pg_size; + for (const auto& member : members) { + if (leader_replica_num == member.second) { // by default, leader is the first member info.members.insert(homeobject::PGMember{member.first, name + std::to_string(member.second), 1}); - } else { + } else if (!excluding_pg_replicas.contains(member.second)) { info.members.insert(homeobject::PGMember{member.first, name + std::to_string(member.second), 0}); } } @@ -84,6 +102,7 @@ class HomeObjectFixture : public ::testing::Test { ShardInfo create_shard(pg_id_t pg_id, uint64_t size_bytes) { g_helper->sync(); + if (!am_i_in_pg(pg_id)) return {}; // schedule create_shard only on leader run_on_pg_leader(pg_id, [&]() { auto s = _obj_inst->shard_manager()->create_shard(pg_id, size_bytes).get(); @@ -113,10 +132,9 @@ class HomeObjectFixture : public ::testing::Test { } ShardInfo seal_shard(shard_id_t shard_id) { - // before seal shard, we need to wait all the memebers to complete shard state verification g_helper->sync(); auto r = _obj_inst->shard_manager()->get_shard(shard_id).get(); - RELEASE_ASSERT(!!r, "failed to get shard {}", shard_id); + if (!r) return {}; auto pg_id = r.value().placement_group; run_on_pg_leader(pg_id, [&]() { @@ -135,10 +153,10 @@ class HomeObjectFixture : public ::testing::Test { } } - void put_blob(shard_id_t shard_id, Blob&& blob) { + void put_blob(shard_id_t shard_id, Blob&& blob, bool need_sync_before_start = true) { g_helper->sync(); auto r = _obj_inst->shard_manager()->get_shard(shard_id).get(); - RELEASE_ASSERT(!!r, "failed to get shard {}", shard_id); + if (!r) return; auto pg_id = r.value().placement_group; run_on_pg_leader(pg_id, [&]() { @@ -160,9 +178,11 @@ class HomeObjectFixture : public ::testing::Test { // TODO:make this run in parallel void put_blobs(std::map< pg_id_t, std::vector< shard_id_t > > const& pg_shard_id_vec, - uint64_t const num_blobs_per_shard, std::map< pg_id_t, blob_id_t >& pg_blob_id) { + uint64_t const num_blobs_per_shard, std::map< pg_id_t, blob_id_t >& pg_blob_id, + bool need_sync_before_start = true) { g_helper->sync(); for (const auto& [pg_id, shard_vec] : pg_shard_id_vec) { + if (!am_i_in_pg(pg_id)) continue; // the blob_id of a pg is a continuous number starting from 0 and increasing by 1 blob_id_t current_blob_id{pg_blob_id[pg_id]}; @@ -195,6 +215,7 @@ class HomeObjectFixture : public ::testing::Test { pg_blob_id[pg_id] = current_blob_id; auto last_blob_id = pg_blob_id[pg_id] - 1; while (!blob_exist(shard_id, last_blob_id)) { + LOGINFO("waiting for pg_id {} blob {} to be created locally", pg_id, last_blob_id); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } LOGINFO("shard {} blob {} is created locally, which means all the blob before {} are created", shard_id, @@ -207,6 +228,7 @@ class HomeObjectFixture : public ::testing::Test { uint64_t const num_blobs_per_shard, std::map< pg_id_t, blob_id_t >& pg_blob_id) { g_helper->sync(); for (const auto& [pg_id, shard_vec] : pg_shard_id_vec) { + if (!am_i_in_pg(pg_id)) continue; run_on_pg_leader(pg_id, [&]() { blob_id_t current_blob_id{0}; for (; current_blob_id < pg_blob_id[pg_id];) { @@ -235,6 +257,7 @@ class HomeObjectFixture : public ::testing::Test { uint32_t off = 0, len = 0; for (const auto& [pg_id, shard_vec] : pg_shard_id_vec) { + if (!am_i_in_pg(pg_id)) continue; blob_id_t current_blob_id{0}; for (const auto& shard_id : shard_vec) { for (uint64_t k = 0; k < num_blobs_per_shard; k++) { @@ -272,6 +295,7 @@ class HomeObjectFixture : public ::testing::Test { uint32_t exp_tombstone_blobs = deleted_all ? shards_per_pg * blobs_per_shard : 0; for (uint32_t i = 1; i <= num_pgs; ++i) { + if (!am_i_in_pg(i)) continue; PGStats stats; _obj_inst->pg_manager()->get_stats(i, stats); ASSERT_EQ(stats.num_active_objects, exp_active_blobs) @@ -330,11 +354,33 @@ class HomeObjectFixture : public ::testing::Test { void run_on_pg_leader(pg_id_t pg_id, auto&& lambda) { PGStats pg_stats; auto res = _obj_inst->pg_manager()->get_stats(pg_id, pg_stats); - RELEASE_ASSERT(res, "can not get pg {} stats", pg_id); + if (!res) return; if (g_helper->my_replica_id() == pg_stats.leader_id) { lambda(); } // TODO: add logic for check and retry of leader change if necessary } + void run_if_in_pg(pg_id_t pg_id, auto&& lambda) { + if (am_i_in_pg(pg_id)) lambda(); + } + + bool am_i_in_pg(pg_id_t pg_id) { + PGStats pg_stats; + auto res = _obj_inst->pg_manager()->get_stats(pg_id, pg_stats); + if (!res) return false; + for (const auto& member : pg_stats.members) { + if (std::get< 0 >(member) == g_helper->my_replica_id()) return true; + } + return false; + } + + // wait for the last blob to be created locally, which means all the blob before this blob are created + void wait_for_all(shard_id_t shard_id, blob_id_t blob_id) { + while (true) { + if (blob_exist(shard_id, blob_id)) return; + std::this_thread::sleep_for(1s); + } + } + private: bool pg_exist(pg_id_t pg_id) { std::vector< pg_id_t > pg_ids; diff --git a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp index d6135b87..7fb638e9 100644 --- a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp +++ b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp @@ -49,8 +49,7 @@ namespace test_common { class HSReplTestHelper { protected: struct IPCData { - void sync(uint64_t sync_point, uint32_t max_count = 0) { - if (max_count == 0) { max_count = SISL_OPTIONS["replicas"].as< uint8_t >(); } + void sync(uint64_t sync_point, uint32_t max_count) { std::unique_lock< bip::interprocess_mutex > lg(mtx_); ++homeobject_replica_count_; if (homeobject_replica_count_ == max_count) { @@ -132,13 +131,13 @@ class HSReplTestHelper { friend class TestReplApplication; HSReplTestHelper(std::string const& name, std::vector< std::string > const& args, char** argv) : - name_{name}, args_{args}, argv_{argv} {} + test_name_{name}, args_{args}, argv_{argv} {} - void setup(uint32_t num_replicas) { - num_replicas_ = num_replicas; - replica_num_ = SISL_OPTIONS["replica_num"].as< uint16_t >(); + void setup(uint8_t num_replicas) { + total_replicas_nums_ = num_replicas; + replica_num_ = SISL_OPTIONS["replica_num"].as< uint8_t >(); - sisl::logging::SetLogger(name_ + std::string("_replica_") + std::to_string(replica_num_)); + sisl::logging::SetLogger(test_name_ + std::string("_replica_") + std::to_string(replica_num_)); sisl::logging::SetLogPattern("[%D %T%z] [%^%L%$] [%n] [%t] %v"); boost::uuids::string_generator gen; @@ -171,9 +170,7 @@ class HSReplTestHelper { dev_list_.emplace_back(dev); } } - name_ += std::to_string(replica_num_); - - // prepare_devices(); + name_ = test_name_ + std::to_string(replica_num_); if (replica_num_ == 0) { // Erase previous shmem and create a new shmem with IPCData structure @@ -252,10 +249,11 @@ class HSReplTestHelper { std::map< peer_id_t, uint32_t > const& members() const { return members_; } std::string name() const { return name_; } + std::string test_name() const { return test_name_; } void teardown() { sisl::GrpcAsyncClientWorker::shutdown_all(); } - void sync(uint32_t num_members = 0) { ipc_data_->sync(sync_point_num++, num_members); } + void sync() { ipc_data_->sync(sync_point_num++, total_replicas_nums_); } void set_uint64_id(uint64_t uint64_id) { ipc_data_->set_uint64_id(uint64_id); } uint64_t get_uint64_id() { return ipc_data_->get_uint64_id(); } @@ -329,11 +327,12 @@ class HSReplTestHelper { private: uint8_t replica_num_; + uint8_t total_replicas_nums_; uint64_t sync_point_num{0}; std::string name_; + std::string test_name_; std::vector< std::string > args_; char** argv_; - uint32_t num_replicas_; std::vector< std::string > generated_devs; std::vector< std::string > dev_list_; std::shared_ptr< homeobject::HomeObject > homeobj_; diff --git a/src/lib/homestore_backend/tests/test_homestore_backend.cpp b/src/lib/homestore_backend/tests/test_homestore_backend.cpp index 73269875..35dd6079 100644 --- a/src/lib/homestore_backend/tests/test_homestore_backend.cpp +++ b/src/lib/homestore_backend/tests/test_homestore_backend.cpp @@ -19,6 +19,8 @@ SISL_OPTION_GROUP( (spdk, "", "spdk", "spdk", ::cxxopts::value< bool >()->default_value("false"), "true or false"), (dev_size_mb, "", "dev_size_mb", "size of each device in MB", ::cxxopts::value< uint64_t >()->default_value("2048"), "number"), + (pg_size, "", "pg_size", "default size of pg in MB", ::cxxopts::value< uint64_t >()->default_value("100"), + "number"), (num_threads, "", "num_threads", "number of threads", ::cxxopts::value< uint32_t >()->default_value("2"), "number"), (num_devs, "", "num_devs", "number of devices to create", ::cxxopts::value< uint32_t >()->default_value("3"), "number"), @@ -27,21 +29,17 @@ SISL_OPTION_GROUP( (init_device, "", "init_device", "init real device", ::cxxopts::value< bool >()->default_value("false"), "true or false"), (replicas, "", "replicas", "Total number of replicas", ::cxxopts::value< uint8_t >()->default_value("3"), "number"), - (spare_replicas, "", "spare_replicas", "Additional number of spare replicas not part of repldev", - ::cxxopts::value< uint32_t >()->default_value("1"), "number"), (base_port, "", "base_port", "Port number of first replica", ::cxxopts::value< uint16_t >()->default_value("4000"), "number"), (replica_num, "", "replica_num", "Internal replica num (used to lauch multi process) - don't override", - ::cxxopts::value< uint16_t >()->default_value("0"), "number"), + ::cxxopts::value< uint8_t >()->default_value("0"), "number"), (replica_dev_list, "", "replica_dev_list", "Device list for all replicas", ::cxxopts::value< std::vector< std::string > >(), "path [...]"), - (num_io, "", "num_io", "number of IO operations", ::cxxopts::value< uint64_t >()->default_value("300"), "number"), (qdepth, "", "qdepth", "Max outstanding operations", ::cxxopts::value< uint32_t >()->default_value("8"), "number"), (num_pgs, "", "num_pgs", "number of pgs", ::cxxopts::value< uint64_t >()->default_value("2"), "number"), (num_shards, "", "num_shards", "number of shards", ::cxxopts::value< uint64_t >()->default_value("4"), "number"), (num_blobs, "", "num_blobs", "number of blobs", ::cxxopts::value< uint64_t >()->default_value("20"), "number")); -// SISL_LOGGING_INIT(HOMEOBJECT_LOG_MODS) SISL_LOGGING_INIT(homeobject) #define test_options logging, config, homeobject, test_homeobject_repl_common SISL_OPTIONS_ENABLE(test_options) diff --git a/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp b/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp new file mode 100644 index 00000000..681782b6 --- /dev/null +++ b/src/lib/homestore_backend/tests/test_homestore_backend_dynamic.cpp @@ -0,0 +1,147 @@ +/********************************************************************************* + * Modifications Copyright 2017-2019 eBay Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + *********************************************************************************/ +/* + * Homeobject Replication testing binaries shared common definitions, apis and data structures + */ +#include "homeobj_fixture.hpp" + +TEST_F(HomeObjectFixture, ReplaceMember) { + LOGINFO("HomeObject replica={} setup completed", g_helper->replica_num()); + auto spare_num_replicas = SISL_OPTIONS["spare_replicas"].as< uint8_t >(); + ASSERT_TRUE(spare_num_replicas > 0) << "we need spare replicas for homestore backend dynamic tests"; + + // step 1: Create a pg without spare replicas. + auto num_replicas = SISL_OPTIONS["replicas"].as< uint8_t >(); + std::unordered_set< uint8_t > excluding_replicas_in_pg; + for (size_t i = num_replicas; i < num_replicas + spare_num_replicas; i++) + excluding_replicas_in_pg.insert(i); + + pg_id_t pg_id{1}; + create_pg(pg_id, 0 /* pg_leader */, excluding_replicas_in_pg); + + // step 2: create shard and put a blob in the pg + auto num_shards_per_pg = SISL_OPTIONS["num_shards"].as< uint64_t >(); + auto num_blobs_per_shard = SISL_OPTIONS["num_blobs"].as< uint64_t >() / num_shards_per_pg; + + for (uint64_t j = 0; j < num_shards_per_pg; j++) + create_shard(pg_id, 64 * Mi); + + // we can not share all the shard_id and blob_id among all the replicas including the spare ones, so we need to + // derive them by calculating. + // since shard_id = pg_id + shard_sequence_num, so we can derive shard_ids for all the shards in this pg, and these + // derived info is used by all replicas(including the newly added member) to verify the blobs. + std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec; + std::map< pg_id_t, blob_id_t > pg_blob_id; + for (shard_id_t shard_id = 1; shard_id <= num_shards_per_pg; shard_id++) { + auto derived_shard_id = make_new_shard_id(pg_id, shard_id); + pg_shard_id_vec[pg_id].emplace_back(derived_shard_id); + } + + // TODO:: if we add delete blobs case in baseline resync, we need also derive the last blob_id in this pg for spare + // replicas + pg_blob_id[pg_id] = 0; + + // put and verify blobs in the pg, excluding the spare replicas + put_blobs(pg_shard_id_vec, num_blobs_per_shard, pg_blob_id); + + verify_get_blob(pg_shard_id_vec, num_blobs_per_shard); + verify_obj_count(1, num_blobs_per_shard, num_shards_per_pg, false); + + // all the replicas , including the spare ones, sync at this point + g_helper->sync(); + + // step 3: replace a member + auto out_member_id = g_helper->replica_id(num_replicas - 1); + auto in_member_id = g_helper->replica_id(num_replicas); /*spare replica*/ + + run_on_pg_leader(pg_id, [&]() { + auto r = _obj_inst->pg_manager() + ->replace_member(pg_id, out_member_id, PGMember{in_member_id, "new_member", 0}) + .get(); + ASSERT_TRUE(r); + }); + + // the new member should wait until it joins the pg and all the blobs are replicated to it + if (in_member_id == g_helper->my_replica_id()) { + while (!am_i_in_pg(pg_id)) { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + LOGINFO("new member is waiting to become a member of pg {}", pg_id); + } + + wait_for_all(pg_shard_id_vec[pg_id].back() /*the last shard id in this pg*/, + num_shards_per_pg * num_blobs_per_shard - 1 /*the last blob id in this pg*/); + } + + // step 4: after completing replace member, verify the blob on all the members of this pg including the newly added + // spare replica, + run_if_in_pg(pg_id, [&]() { + verify_get_blob(pg_shard_id_vec, num_blobs_per_shard); + verify_obj_count(1, num_blobs_per_shard, num_shards_per_pg, false); + }); +} + +SISL_OPTION_GROUP( + test_homeobject_repl_common, + (spdk, "", "spdk", "spdk", ::cxxopts::value< bool >()->default_value("false"), "true or false"), + (dev_size_mb, "", "dev_size_mb", "size of each device in MB", ::cxxopts::value< uint64_t >()->default_value("2048"), + "number"), + (pg_size, "", "pg_size", "default size of pg in MB", ::cxxopts::value< uint64_t >()->default_value("100"), + "number"), + (num_threads, "", "num_threads", "number of threads", ::cxxopts::value< uint32_t >()->default_value("2"), "number"), + (num_devs, "", "num_devs", "number of devices to create", ::cxxopts::value< uint32_t >()->default_value("3"), + "number"), + (use_file, "", "use_file", "use file instead of real drive", ::cxxopts::value< bool >()->default_value("false"), + "true or false"), + (init_device, "", "init_device", "init real device", ::cxxopts::value< bool >()->default_value("false"), + "true or false"), + (replicas, "", "replicas", "Total number of replicas", ::cxxopts::value< uint8_t >()->default_value("3"), "number"), + (spare_replicas, "", "spare_replicas", "Additional number of spare replicas not part of repldev", + ::cxxopts::value< uint8_t >()->default_value("1"), "number"), + (base_port, "", "base_port", "Port number of first replica", ::cxxopts::value< uint16_t >()->default_value("4000"), + "number"), + (replica_num, "", "replica_num", "Internal replica num (used to lauch multi process) - don't override", + ::cxxopts::value< uint8_t >()->default_value("0"), "number"), + (replica_dev_list, "", "replica_dev_list", "Device list for all replicas", + ::cxxopts::value< std::vector< std::string > >(), "path [...]"), + (qdepth, "", "qdepth", "Max outstanding operations", ::cxxopts::value< uint32_t >()->default_value("8"), "number"), + (num_pgs, "", "num_pgs", "number of pgs", ::cxxopts::value< uint64_t >()->default_value("2"), "number"), + (num_shards, "", "num_shards", "number of shards", ::cxxopts::value< uint64_t >()->default_value("4"), "number"), + (num_blobs, "", "num_blobs", "number of blobs", ::cxxopts::value< uint64_t >()->default_value("20"), "number")); + +SISL_LOGGING_INIT(homeobject) +#define test_options logging, config, homeobject, test_homeobject_repl_common +SISL_OPTIONS_ENABLE(test_options) + +std::unique_ptr< test_common::HSReplTestHelper > g_helper; + +int main(int argc, char* argv[]) { + int parsed_argc = argc; + char** orig_argv = argv; + std::vector< std::string > args; + for (int i = 0; i < argc; ++i) { + args.emplace_back(argv[i]); + } + ::testing::InitGoogleTest(&parsed_argc, argv); + SISL_OPTIONS_LOAD(parsed_argc, argv, test_options); + + g_helper = std::make_unique< test_common::HSReplTestHelper >("test_homeobject_dynamic", args, orig_argv); + // We spawn spare replica's, which is used for testing baseline resync + // TODO: handle overflow for the sum of two uint8_t + auto total_replicas = SISL_OPTIONS["replicas"].as< uint8_t >() + SISL_OPTIONS["spare_replicas"].as< uint8_t >(); + g_helper->setup(total_replicas); + auto ret = RUN_ALL_TESTS(); + g_helper->teardown(); + return ret; +} \ No newline at end of file From 26e1984f706597b41cb6ea2797cdbb4701024884 Mon Sep 17 00:00:00 2001 From: Hooper Date: Thu, 7 Nov 2024 03:27:34 -0700 Subject: [PATCH 3/5] 1. Implement shard identical layout using blk_alloc_hints and add v_chunk_id field to shard_info_superblk. 2. Enhance defensive checks in ChunkSelector for input validation and exception handling. 3. Adapt unit tests and introduce PGExceedSpaceTest. --- conanfile.py | 2 +- src/include/homeobject/pg_manager.hpp | 1 + src/include/homeobject/shard_manager.hpp | 2 +- .../homestore_backend/heap_chunk_selector.cpp | 439 ++++++++---------- .../homestore_backend/heap_chunk_selector.h | 106 +++-- src/lib/homestore_backend/hs_blob_manager.cpp | 4 +- src/lib/homestore_backend/hs_homeobject.hpp | 54 ++- src/lib/homestore_backend/hs_pg_manager.cpp | 54 +-- .../homestore_backend/hs_shard_manager.cpp | 110 ++++- .../replication_state_machine.cpp | 34 +- .../homestore_backend/tests/hs_pg_tests.cpp | 38 ++ .../tests/hs_shard_tests.cpp | 40 +- .../tests/test_heap_chunk_selector.cpp | 328 ++++++++----- 13 files changed, 690 insertions(+), 522 deletions(-) diff --git a/conanfile.py b/conanfile.py index f2702f34..8a044105 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.8" + version = "2.1.9" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/include/homeobject/pg_manager.hpp b/src/include/homeobject/pg_manager.hpp index 8c74f3dc..4819052a 100644 --- a/src/include/homeobject/pg_manager.hpp +++ b/src/include/homeobject/pg_manager.hpp @@ -42,6 +42,7 @@ struct PGInfo { mutable MemberSet members; peer_id_t replica_set_uuid; uint64_t size; + uint64_t chunk_size; auto operator<=>(PGInfo const& rhs) const { return id <=> rhs.id; } auto operator==(PGInfo const& rhs) const { return id == rhs.id; } diff --git a/src/include/homeobject/shard_manager.hpp b/src/include/homeobject/shard_manager.hpp index 812ebe9f..19ee7e2b 100644 --- a/src/include/homeobject/shard_manager.hpp +++ b/src/include/homeobject/shard_manager.hpp @@ -10,7 +10,7 @@ namespace homeobject { ENUM(ShardError, uint16_t, UNKNOWN = 1, TIMEOUT, INVALID_ARG, NOT_LEADER, UNSUPPORTED_OP, UNKNOWN_PG, UNKNOWN_SHARD, - PG_NOT_READY, CRC_MISMATCH); + PG_NOT_READY, CRC_MISMATCH, NO_SPACE_LEFT); struct ShardInfo { enum class State : uint8_t { diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index 6e2a1492..a4d46706 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -17,37 +17,27 @@ namespace homeobject { // 2 the key collection of m_chunks will never change // this should only be called when initializing HeapChunkSelector in Homestore -void HeapChunkSelector::add_chunk(csharedChunk& chunk) { m_chunks.emplace(VChunk(chunk).get_chunk_id(), chunk); } +void HeapChunkSelector::add_chunk(csharedChunk& chunk) { + m_chunks.emplace(VChunk(chunk).get_chunk_id(), std::make_shared< ExtendedVChunk >(chunk)); +} -void HeapChunkSelector::add_chunk_internal(const chunk_num_t chunkID, bool add_to_heap) { - if (m_chunks.find(chunkID) == m_chunks.end()) { - // sanity check - LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); - return; - } +void HeapChunkSelector::add_chunk_internal(const chunk_num_t p_chunk_id, bool add_to_heap) { + // private function p_chunk_id must belong to m_chunks - const auto& chunk = m_chunks[chunkID]; - VChunk vchunk(chunk); - auto pdevID = vchunk.get_pdev_id(); + auto chunk = m_chunks[p_chunk_id]; + auto pdevID = chunk->get_pdev_id(); // add this find here, since we don`t want to call make_shared in try_emplace every time. auto it = m_per_dev_heap.find(pdevID); if (it == m_per_dev_heap.end()) { it = m_per_dev_heap.emplace(pdevID, std::make_shared< ChunkHeap >()).first; } // build total blks for every chunk on this device; - it->second->m_total_blks += vchunk.get_total_blks(); + it->second->m_total_blks += chunk->get_total_blks(); if (add_to_heap) { - auto& avalableBlkCounter = it->second->available_blk_count; - avalableBlkCounter.fetch_add(vchunk.available_blks()); - - auto& heapLock = it->second->mtx; + std::lock_guard< std::mutex > l(it->second->mtx); auto& heap = it->second->m_heap; - { - std::lock_guard< std::mutex > l(m_defrag_mtx); - m_defrag_heap.emplace(chunk); - } - std::lock_guard< std::mutex > l(heapLock); heap.emplace(chunk); + it->second->available_blk_count += chunk->available_blks(); } } @@ -59,157 +49,83 @@ csharedChunk HeapChunkSelector::select_chunk(homestore::blk_count_t count, const return nullptr; } - std::shared_lock lock_guard(m_chunk_selector_mtx); - // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent pg_id_hint, "identical layout" will change it - pg_id_t pg_id = 0; - auto& pg_id_hint = hint.pdev_id_hint; - if (!pg_id_hint.has_value()) { - LOGWARNMOD(homeobject, "should not allocated a chunk without exiting pg_id in hint!"); + if (!hint.application_hint.has_value()) { + LOGWARNMOD(homeobject, "should not allocated a chunk without exiting application_hint in hint!"); return nullptr; } else { - pg_id = pg_id_hint.value(); + // Both chunk_num_t and pg_id_t are of type uint16_t. + static_assert(std::is_same< pg_id_t, uint16_t >::value, "pg_id_t is not uint16_t"); + static_assert(std::is_same< homestore::chunk_num_t, uint16_t >::value, "chunk_num_t is not uint16_t"); + uint32_t application_hint = hint.application_hint.value(); + pg_id_t pg_id = (uint16_t)(application_hint >> 16 & 0xFFFF); + homestore::chunk_num_t v_chunk_id = (uint16_t)(application_hint & 0xFFFF); + return select_specific_chunk(pg_id, v_chunk_id); } - - auto it = m_per_pg_heap.find(pg_id); - if (it == m_per_pg_heap.end()) { - LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); - return nullptr; - } - - auto vchunk = VChunk(nullptr); - auto& heap = it->second->m_heap; - if (auto lock_guard = std::lock_guard< std::mutex >(it->second->mtx); !heap.empty()) { - vchunk = heap.top(); - heap.pop(); - } - - if (vchunk.get_internal_chunk()) { - auto& avalableBlkCounter = it->second->available_blk_count; - avalableBlkCounter.fetch_sub(vchunk.available_blks()); - remove_chunk_from_defrag_heap(vchunk.get_chunk_id()); - } else { - LOGWARNMOD(homeobject, "no available chunks left for pg {}", pg_id); - } - - return vchunk.get_internal_chunk(); } -csharedChunk HeapChunkSelector::select_specific_chunk(const pg_id_t pg_id, const chunk_num_t chunkID) { - if (m_chunks.find(chunkID) == m_chunks.end()) { - // sanity check - LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); - return nullptr; - } - +csharedChunk HeapChunkSelector::select_specific_chunk(const pg_id_t pg_id, const chunk_num_t v_chunk_id) { std::shared_lock lock_guard(m_chunk_selector_mtx); - auto pg_it = m_per_pg_heap.find(pg_id); - if (pg_it == m_per_pg_heap.end()) { + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return nullptr; } - VChunk vchunk(nullptr); - auto& heap = pg_it->second->m_heap; - if (auto lock_guard = std::lock_guard< std::mutex >(pg_it->second->mtx); !heap.empty()) { - std::vector< VChunk > chunks; - chunks.reserve(heap.size()); - while (!heap.empty()) { - auto c = heap.top(); - heap.pop(); - if (c.get_chunk_id() == chunkID) { - vchunk = c; - break; - } - chunks.push_back(std::move(c)); - } - for (auto& c : chunks) { - heap.emplace(c); - } + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + std::scoped_lock lock(pg_chunk_collection->mtx); + if (v_chunk_id >= pg_chunks.size()) { + LOGWARNMOD(homeobject, "No chunk found for v_chunk_id {}", v_chunk_id); + return nullptr; } - - if (vchunk.get_internal_chunk()) { - auto& avalableBlkCounter = pg_it->second->available_blk_count; - avalableBlkCounter.fetch_sub(vchunk.available_blks()); - remove_chunk_from_defrag_heap(vchunk.get_chunk_id()); + auto chunk = pg_chunks[v_chunk_id]; + if (chunk->m_state == ChunkState::AVAILABLE) { + chunk->m_state = ChunkState::INUSE; + --pg_chunk_collection->available_num_chunks; + pg_chunk_collection->available_blk_count -= chunk->available_blks(); } - return vchunk.get_internal_chunk(); -} - -// Temporarily commented out, the subsequent GC implementation needs to be adapted to fix pg size -// most_defrag_chunk will only be called when GC is triggered, and will return the chunk with the most -// defrag blocks -csharedChunk HeapChunkSelector::most_defrag_chunk() { - // chunk_num_t chunkID{0}; - // the chunk might be seleted for creating shard. if this happens, we need to select another chunk - // for (;;) { - // { - // std::lock_guard< std::mutex > lg(m_defrag_mtx); - // if (m_defrag_heap.empty()) break; - // chunkID = m_defrag_heap.top().get_chunk_id(); - // } - // auto chunk = select_specific_chunk(chunkID); - // if (chunk) return chunk; - // } - return nullptr; -} - -void HeapChunkSelector::remove_chunk_from_defrag_heap(const chunk_num_t chunkID) { - std::vector< VChunk > chunks; - std::lock_guard< std::mutex > lg(m_defrag_mtx); - chunks.reserve(m_defrag_heap.size()); - while (!m_defrag_heap.empty()) { - auto c = m_defrag_heap.top(); - m_defrag_heap.pop(); - if (c.get_chunk_id() == chunkID) break; - chunks.emplace_back(std::move(c)); - } - for (auto& c : chunks) { - m_defrag_heap.emplace(c); - } + return chunk->get_internal_chunk(); } void HeapChunkSelector::foreach_chunks(std::function< void(csharedChunk&) >&& cb) { // we should call `cb` on all the chunks, selected or not std::for_each(std::execution::par_unseq, m_chunks.begin(), m_chunks.end(), - [cb = std::move(cb)](auto& p) { cb(p.second); }); + [cb = std::move(cb)](auto& p) { cb(p.second->get_internal_chunk()); }); } -void HeapChunkSelector::release_chunk(const pg_id_t pg_id, const chunk_num_t chunkID) { +bool HeapChunkSelector::release_chunk(const pg_id_t pg_id, const chunk_num_t v_chunk_id) { std::shared_lock lock_guard(m_chunk_selector_mtx); - if (m_chunks.find(chunkID) == m_chunks.end()) { - // sanity check - LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); - return; - } - - auto pg_it = m_per_pg_heap.find(pg_id); - if (pg_it == m_per_pg_heap.end()) { + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); - return; + return false; } - const auto& chunk = m_chunks[chunkID]; - VChunk vchunk(chunk); - { - std::lock_guard< std::mutex > l(pg_it->second->mtx); - auto& pg_heap = pg_it->second->m_heap; - pg_heap.emplace(chunk); + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + if (v_chunk_id >= pg_chunks.size()) { + LOGWARNMOD(homeobject, "No chunk found for v_chunk_id {}", v_chunk_id); + return false; } - auto& avalableBlkCounter = pg_it->second->available_blk_count; - avalableBlkCounter += vchunk.available_blks(); - + std::scoped_lock lock(pg_chunk_collection->mtx); + auto chunk = pg_chunks[v_chunk_id]; + if (chunk->m_state == ChunkState::INUSE) { + chunk->m_state = ChunkState::AVAILABLE; + ++pg_chunk_collection->available_num_chunks; + pg_chunk_collection->available_blk_count += chunk->available_blks(); + } + return true; } uint32_t HeapChunkSelector::get_chunk_size() const { - const auto& chunk = m_chunks.begin()->second; - auto vchunk = VChunk(chunk); - return vchunk.size(); + const auto chunk = m_chunks.begin()->second; + return chunk->size(); } -std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, u_int64_t pg_size) { +std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, uint64_t pg_size) { std::unique_lock lock_guard(m_chunk_selector_mtx); - if (m_per_pg_heap.find(pg_id) != m_per_pg_heap.end()) { + if (m_per_pg_chunks.find(pg_id) != m_per_pg_chunks.end()) { LOGWARNMOD(homeobject, "PG had already created, pg_id {}", pg_id); return std::nullopt; } @@ -217,124 +133,172 @@ std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, const auto chunk_size = get_chunk_size(); const uint32_t num_chunk = sisl::round_down(pg_size, chunk_size) / chunk_size; - //Select a pdev with the most available num chunk - auto &&most_avail_dev_it = - std::max_element(m_per_dev_heap.begin(), m_per_dev_heap.end(), - [](const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& lhs, - const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& rhs) { - return lhs.second->size() < rhs.second->size(); - }); + // Select a pdev with the most available num chunk + auto most_avail_dev_it = std::max_element(m_per_dev_heap.begin(), m_per_dev_heap.end(), + [](const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& lhs, + const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& rhs) { + return lhs.second->size() < rhs.second->size(); + }); auto& pdev_heap = most_avail_dev_it->second; if (num_chunk > pdev_heap->size()) { LOGWARNMOD(homeobject, "Pdev has no enough space to create pg {} with num_chunk {}", pg_id, num_chunk); return std::nullopt; } - auto vchunk = VChunk(nullptr); - auto it = m_per_pg_heap.emplace(pg_id, std::make_shared< ChunkHeap >()).first; - auto v2r_vector = m_v2r_chunk_map.emplace(pg_id, std::make_shared< std::vector < chunk_num_t > >()).first->second; - auto r2v_map = m_r2v_chunk_map.emplace(pg_id, std::make_shared< ChunkIdMap >()).first->second; - - auto& pg_heap = it->second; - std::scoped_lock lock(pdev_heap->mtx, pg_heap->mtx); - v2r_vector->reserve(num_chunk); - for (chunk_num_t i = 0; i < num_chunk; ++i) { - vchunk = pdev_heap->m_heap.top(); - //sanity check - RELEASE_ASSERT(vchunk.get_total_blks() == vchunk.available_blks(), "vchunk should be empty"); + + auto pg_it = m_per_pg_chunks.emplace(pg_id, std::make_shared< PGChunkCollection >()).first; + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + std::scoped_lock lock(pdev_heap->mtx, pg_chunk_collection->mtx); + pg_chunks.reserve(num_chunk); + + // v_chunk_id start from 0. + for (chunk_num_t v_chunk_id = 0; v_chunk_id < num_chunk; ++v_chunk_id) { + auto chunk = pdev_heap->m_heap.top(); + // sanity check + RELEASE_ASSERT(chunk->get_total_blks() == chunk->available_blks(), "chunk should be empty"); + RELEASE_ASSERT(chunk->available(), "chunk state should be available"); pdev_heap->m_heap.pop(); - pdev_heap->available_blk_count -= vchunk.available_blks(); - - pg_heap->m_heap.emplace(vchunk); - pg_heap->m_total_blks += vchunk.get_total_blks(); - pg_heap->available_blk_count += vchunk.available_blks(); - // v_chunk_id start from 0. - chunk_num_t v_chunk_id = i; - chunk_num_t r_chunk_id = vchunk.get_chunk_id(); - v2r_vector->emplace_back(r_chunk_id); - r2v_map->emplace(r_chunk_id, v_chunk_id); + pdev_heap->available_blk_count -= chunk->available_blks(); + + chunk->m_pg_id = pg_id; + chunk->m_v_chunk_id = v_chunk_id; + pg_chunks.emplace_back(chunk); + ++pg_chunk_collection->available_num_chunks; + pg_chunk_collection->m_total_blks += chunk->get_total_blks(); + pg_chunk_collection->available_blk_count += chunk->available_blks(); } return num_chunk; } -void HeapChunkSelector::set_pg_chunks(pg_id_t pg_id, std::vector&& chunk_ids) { +bool HeapChunkSelector::recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids) { std::unique_lock lock_guard(m_chunk_selector_mtx); - if (m_v2r_chunk_map.find(pg_id) != m_v2r_chunk_map.end()) { + // check pg exist + if (m_per_pg_chunks.find(pg_id) != m_per_pg_chunks.end()) { LOGWARNMOD(homeobject, "PG {} had been recovered", pg_id); - return; + return false; } - auto v2r_vector = m_v2r_chunk_map.emplace(pg_id, std::make_shared< std::vector < chunk_num_t > >(std::move(chunk_ids))).first->second; - auto r2v_map = m_r2v_chunk_map.emplace(pg_id, std::make_shared< ChunkIdMap >()).first->second; + // check chunks valid, must belong to m_chunks and have same pdev_id + std::optional< uint32_t > last_pdev_id; + for (auto p_chunk_id : p_chunk_ids) { + auto it = m_chunks.find(p_chunk_id); + if (it == m_chunks.end()) { + LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", p_chunk_id); + return false; + } + auto chunk = it->second; + if (last_pdev_id.has_value() && last_pdev_id.value() != chunk->get_pdev_id()) { + LOGWARNMOD(homeobject, "The pdev value is different, last_pdev_id={}, pdev_id={}", last_pdev_id.value(), + chunk->get_pdev_id()); + return false; + } else { + last_pdev_id = chunk->get_pdev_id(); + } + } - for (chunk_num_t i = 0; i < v2r_vector->size(); ++i) { - // v_chunk_id start from 0. - chunk_num_t v_chunk_id = i; - chunk_num_t r_chunk_id = (*v2r_vector)[i]; - r2v_map->emplace(r_chunk_id, v_chunk_id); + auto pg_it = m_per_pg_chunks.emplace(pg_id, std::make_shared< PGChunkCollection >()).first; + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + std::scoped_lock lock(pg_chunk_collection->mtx); + pg_chunks.reserve(p_chunk_ids.size()); + + // v_chunk_id start from 0. + for (chunk_num_t v_chunk_id = 0; v_chunk_id < p_chunk_ids.size(); ++v_chunk_id) { + chunk_num_t p_chunk_id = p_chunk_ids[v_chunk_id]; + auto chunk = m_chunks[p_chunk_id]; + chunk->m_pg_id = pg_id; + chunk->m_v_chunk_id = v_chunk_id; + pg_chunks.emplace_back(chunk); } + return true; } void HeapChunkSelector::recover_per_dev_chunk_heap() { std::unique_lock lock_guard(m_chunk_selector_mtx); - for (const auto& [chunk_id, _] : m_chunks) { - bool add_to_heap = true; - for (const auto& [_, chunk_map] : m_r2v_chunk_map) { - if (chunk_map->find(chunk_id) != chunk_map->end()) { - add_to_heap = false; - break; - } - } - add_chunk_internal(chunk_id, add_to_heap); - + for (auto [p_chunk_id, chunk] : m_chunks) { + // if selected for pg, not add to pdev. + bool add_to_heap = !chunk->m_pg_id.has_value(); + add_chunk_internal(p_chunk_id, add_to_heap); } } -void HeapChunkSelector::recover_pg_chunk_heap(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excludingChunks) -{ +bool HeapChunkSelector::recover_pg_chunks_states(pg_id_t pg_id, + const std::unordered_set< chunk_num_t >& excluding_v_chunk_ids) { std::unique_lock lock_guard(m_chunk_selector_mtx); - if (m_per_pg_heap.find(pg_id) != m_per_pg_heap.end()) { - LOGWARNMOD(homeobject, "Pg_heap {} had been recovered", pg_id); - return; - } - auto it = m_v2r_chunk_map.find(pg_id); - if (it == m_v2r_chunk_map.end()) { - LOGWARNMOD(homeobject, "Pg_chunk_map {} had never been recovered", pg_id); - return; + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "PG chunks should be recovered beforhand, pg_id={}", pg_id); + return false; } - const auto& chunk_ids = it->second; - auto& pg_heap = m_per_pg_heap.emplace(pg_id, std::make_shared< ChunkHeap >()).first->second; - for (const auto& chunk_id : *chunk_ids) { - if (excludingChunks.find(chunk_id) == excludingChunks.end()) { - const auto& chunk = m_chunks[chunk_id]; - auto vchunk = VChunk(chunk); - pg_heap->m_heap.emplace(vchunk); - pg_heap->m_total_blks += vchunk.get_total_blks(); - pg_heap->available_blk_count += vchunk.available_blks(); + + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + std::scoped_lock lock(pg_chunk_collection->mtx); + + for (size_t v_chunk_id = 0; v_chunk_id < pg_chunks.size(); ++v_chunk_id) { + auto chunk = pg_chunks[v_chunk_id]; + pg_chunk_collection->m_total_blks += chunk->get_total_blks(); + if (excluding_v_chunk_ids.find(v_chunk_id) == excluding_v_chunk_ids.end()) { + chunk->m_state = ChunkState::AVAILABLE; + ++pg_chunk_collection->available_num_chunks; + pg_chunk_collection->available_blk_count += chunk->available_blks(); + + } else { + chunk->m_state = ChunkState::INUSE; } } + return true; } -std::shared_ptr< const std::vector > HeapChunkSelector::get_pg_chunks(pg_id_t pg_id) const { +std::shared_ptr< const std::vector< homestore::chunk_num_t > > HeapChunkSelector::get_pg_chunks(pg_id_t pg_id) const { std::shared_lock lock_guard(m_chunk_selector_mtx); - auto it = m_v2r_chunk_map.find(pg_id); - if (it != m_v2r_chunk_map.end()) { - return it->second; - } else { + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { LOGWARNMOD(homeobject, "PG {} had never been created", pg_id); return nullptr; } + + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + std::scoped_lock lock(pg_chunk_collection->mtx); + auto p_chunk_ids = std::make_shared< std::vector< homestore::chunk_num_t > >(); + p_chunk_ids->reserve(pg_chunks.size()); + for (auto chunk : pg_chunks) { + p_chunk_ids->emplace_back(chunk->get_chunk_id()); + } + return p_chunk_ids; } -homestore::blk_alloc_hints HeapChunkSelector::chunk_to_hints(chunk_num_t chunk_id) const { - auto iter = m_chunks.find(chunk_id); - if (iter == m_chunks.end()) { - LOGWARNMOD(homeobject, "No chunk found for chunk_id {}, will return default blk alloc hints", chunk_id); - return homestore::blk_alloc_hints(); +std::optional< homestore::chunk_num_t > HeapChunkSelector::get_most_available_blk_chunk(pg_id_t pg_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return std::nullopt; + } + if (pg_it->second->available_num_chunks == 0) { + LOGWARNMOD(homeobject, "No available chunk for pg {}", pg_id); + return std::nullopt; } - homestore::blk_alloc_hints hints; - hints.pdev_id_hint = VChunk(iter->second).get_pdev_id(); - return hints; + + std::scoped_lock lock(pg_it->second->mtx); + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + auto max_it = + std::max_element(pg_chunks.begin(), pg_chunks.end(), + [](const std::shared_ptr< ExtendedVChunk >& a, const std::shared_ptr< ExtendedVChunk >& b) { + if (a->available() && b->available()) { return a->available_blks() < b->available_blks(); } + if (!a->available() && b->available()) { return true; } + if (a->available() && !b->available()) { return false; } + return false; + }); + + auto v_chunk_id = std::distance(pg_chunks.begin(), max_it); + pg_chunks[v_chunk_id]->m_state = ChunkState::INUSE; + --pg_chunk_collection->available_num_chunks; + pg_chunk_collection->available_blk_count -= pg_chunks[v_chunk_id]->available_blks(); + return v_chunk_id; } // return the maximum number of chunks that can be allocated on pdev @@ -348,37 +312,26 @@ uint32_t HeapChunkSelector::most_avail_num_chunks() const { return max_avail_num_chunks; } -uint32_t HeapChunkSelector::avail_num_chunks(uint32_t dev_id) const { +uint32_t HeapChunkSelector::avail_num_chunks(pg_id_t pg_id) const { std::shared_lock lock_guard(m_chunk_selector_mtx); - auto it = m_per_dev_heap.find(dev_id); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", dev_id); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return 0; } - - return it->second->size(); + return pg_it->second->available_num_chunks.load(); } uint32_t HeapChunkSelector::total_chunks() const { return m_chunks.size(); } -uint64_t HeapChunkSelector::avail_blks(std::optional< uint32_t > dev_it) const { +uint64_t HeapChunkSelector::avail_blks(pg_id_t pg_id) const { std::shared_lock lock_guard(m_chunk_selector_mtx); - if (!dev_it.has_value()) { - uint64_t max_avail_blks = 0ull; - for (auto const& [_, heap] : m_per_dev_heap) { - std::scoped_lock lock(heap->mtx); - max_avail_blks = std::max(max_avail_blks, static_cast< uint64_t >(heap->available_blk_count.load())); - } - return max_avail_blks; - } else { - auto it = m_per_dev_heap.find(dev_it.value()); - std::scoped_lock lock(it->second->mtx); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", dev_it.value()); - return 0; - } - return it->second->available_blk_count.load(); + auto pg_it = m_per_pg_chunks.find(pg_id); + if (pg_it == m_per_pg_chunks.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return 0; } + return pg_it->second->available_blk_count.load(); } uint64_t HeapChunkSelector::total_blks(uint32_t dev_id) const { diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h index 259ecfb5..cbcbbd2a 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.h +++ b/src/lib/homestore_backend/heap_chunk_selector.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,8 @@ namespace homeobject { +ENUM(ChunkState, uint8_t, AVAILABLE = 0, INUSE); + using csharedChunk = homestore::cshared< homestore::Chunk >; class HeapChunkSelector : public homestore::ChunkSelector { @@ -23,45 +26,58 @@ class HeapChunkSelector : public homestore::ChunkSelector { ~HeapChunkSelector() = default; using VChunk = homestore::VChunk; - class VChunkComparator { + using chunk_num_t = homestore::chunk_num_t; + + class ExtendedVChunk : public VChunk { public: - bool operator()(VChunk& lhs, VChunk& rhs) { return lhs.available_blks() < rhs.available_blks(); } + ExtendedVChunk(csharedChunk const& chunk) : + VChunk(chunk), m_state(ChunkState::AVAILABLE), m_pg_id(), m_v_chunk_id() {} + ~ExtendedVChunk() = default; + ChunkState m_state; + std::optional< pg_id_t > m_pg_id; + std::optional< chunk_num_t > m_v_chunk_id; + bool available() const { return m_state == ChunkState::AVAILABLE; } }; - class VChunkDefragComparator { + class ExtendedVChunkComparator { public: - bool operator()(VChunk& lhs, VChunk& rhs) { return lhs.get_defrag_nblks() < rhs.get_defrag_nblks(); } + bool operator()(std::shared_ptr< ExtendedVChunk >& lhs, std::shared_ptr< ExtendedVChunk >& rhs) { + return lhs->available_blks() < rhs->available_blks(); + } }; - - using VChunkHeap = std::priority_queue< VChunk, std::vector< VChunk >, VChunkComparator >; - using VChunkDefragHeap = std::priority_queue< VChunk, std::vector< VChunk >, VChunkDefragComparator >; - using ChunkIdMap = std::unordered_map < homestore::chunk_num_t, homestore::chunk_num_t >; // used for real chunk id -> virtual chunk id map - using chunk_num_t = homestore::chunk_num_t; + using ExtendedVChunkHeap = + std::priority_queue< std::shared_ptr< ExtendedVChunk >, std::vector< std::shared_ptr< ExtendedVChunk > >, + ExtendedVChunkComparator >; struct ChunkHeap { std::mutex mtx; - VChunkHeap m_heap; + ExtendedVChunkHeap m_heap; std::atomic_size_t available_blk_count; uint64_t m_total_blks{0}; // initlized during boot, and will not change during runtime; uint32_t size() const { return m_heap.size(); } }; + struct PGChunkCollection { + std::mutex mtx; + std::vector< std::shared_ptr< ExtendedVChunk > > m_pg_chunks; + std::atomic_size_t available_num_chunks; + std::atomic_size_t available_blk_count; + uint64_t m_total_blks{0}; // initlized during boot, and will not change during runtime; + }; + void add_chunk(csharedChunk&) override; void foreach_chunks(std::function< void(csharedChunk&) >&& cb) override; csharedChunk select_chunk([[maybe_unused]] homestore::blk_count_t nblks, const homestore::blk_alloc_hints& hints); - // this function will be used by GC flow or recovery flow to mark one specific chunk to be busy, caller should be - // responsible to use release_chunk() interface to release it when no longer to use the chunk anymore. - csharedChunk select_specific_chunk(const pg_id_t pg_id, const chunk_num_t); - - // this function will be used by GC flow to select a chunk for GC - csharedChunk most_defrag_chunk(); + // this function will be used by create shard or recovery flow to mark one specific chunk to be busy, caller should + // be responsible to use release_chunk() interface to release it when no longer to use the chunk anymore. + csharedChunk select_specific_chunk(const pg_id_t pg_id, const chunk_num_t v_chunk_id); - // this function is used to return a chunk back to ChunkSelector when sealing a shard, and will only be used by - // Homeobject. - void release_chunk(const pg_id_t pg_id, const chunk_num_t); + // This function returns a chunk back to ChunkSelector. + // It is used in two scenarios: 1. seal shard 2. create shard rollback + bool release_chunk(const pg_id_t pg_id, const chunk_num_t v_chunk_id); /** * select chunks for pg, chunks need to be in same pdev. @@ -70,26 +86,27 @@ class HeapChunkSelector : public homestore::ChunkSelector { * @param pg_size The fix pg size. * @return An optional uint32_t value representing num_chunk, or std::nullopt if no space left. */ - std::optional< uint32_t > select_chunks_for_pg(pg_id_t pg_id, u_int64_t pg_size); + std::optional< uint32_t > select_chunks_for_pg(pg_id_t pg_id, uint64_t pg_size); - std::shared_ptr< const std::vector > get_pg_chunks(pg_id_t pg_id) const; + // this function is used for pg info superblk persist v_chunk_id <-> p_chunk_id + std::shared_ptr< const std::vector< chunk_num_t > > get_pg_chunks(pg_id_t pg_id) const; + + /** + * pop pg top chunk + * + * @param pg_id The ID of the pg. + * @return An optional chunk_num_t value representing v_chunk_id, or std::nullopt if no space left. + */ + std::optional< chunk_num_t > get_most_available_blk_chunk(pg_id_t pg_id) const; // this should be called on each pg meta blk found - void set_pg_chunks(pg_id_t pg_id, std::vector&& chunk_ids); + bool recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids); // this should be called after all pg meta blk recovered void recover_per_dev_chunk_heap(); // this should be called after ShardManager is initialized and get all the open shards - void recover_pg_chunk_heap(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excludingChunks); - - /** - * Retrieves the block allocation hints for a given chunk. - * - * @param chunk_id The ID of the chunk. - * @return The block allocation hints for the specified chunk. - */ - homestore::blk_alloc_hints chunk_to_hints(chunk_num_t chunk_id) const; + bool recover_pg_chunks_states(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excluding_v_chunk_ids); /** * Returns the number of available blocks of the given device id. @@ -97,7 +114,7 @@ class HeapChunkSelector : public homestore::ChunkSelector { * @param dev_id (optional) The device ID. if nullopt, it returns the maximum available blocks among all devices. * @return The number of available blocks. */ - uint64_t avail_blks(std::optional< uint32_t > dev_id) const; + uint64_t avail_blks(pg_id_t pg_id) const; /** * Returns the total number of blocks of the given device; @@ -116,12 +133,12 @@ class HeapChunkSelector : public homestore::ChunkSelector { uint32_t most_avail_num_chunks() const; /** - * Returns the number of available chunks for a given device ID. + * Returns the number of available chunks for a given pg id. * - * @param dev_id The device ID. + * @param pg_id The pg id. * @return The number of available chunks. */ - uint32_t avail_num_chunks(uint32_t dev_id) const; + uint32_t avail_num_chunks(pg_id_t pg_id) const; /** * @brief Returns the total number of chunks. @@ -135,24 +152,15 @@ class HeapChunkSelector : public homestore::ChunkSelector { uint32_t get_chunk_size() const; private: - std::unordered_map< uint32_t, std::shared_ptr< ChunkHeap > > m_per_dev_heap; - std::unordered_map< pg_id_t, std::shared_ptr< ChunkHeap > > m_per_pg_heap; + void add_chunk_internal(const chunk_num_t, bool add_to_heap = true); - // These mappings ensure "identical layout" by providing bidirectional indexing between virtual and real chunk IDs. - // m_v2r_chunk_map: Maps each pg_id to a vector of real chunk IDs (r_chunk_id). The index in the vector corresponds to the virtual chunk ID (v_chunk_id). - std::unordered_map< pg_id_t, std::shared_ptr< std::vector > > m_v2r_chunk_map; - // m_r2v_chunk_map: Maps each pg_id to a map that inversely maps real chunk IDs (r_chunk_id) to virtual chunk IDs (v_chunk_id). - std::unordered_map< pg_id_t, std::shared_ptr< ChunkIdMap > > m_r2v_chunk_map; +private: + std::unordered_map< uint32_t, std::shared_ptr< ChunkHeap > > m_per_dev_heap; + std::unordered_map< pg_id_t, std::shared_ptr< PGChunkCollection > > m_per_pg_chunks; // hold all the chunks , selected or not - std::unordered_map< chunk_num_t, csharedChunk > m_chunks; + std::unordered_map< chunk_num_t, homestore::cshared< ExtendedVChunk > > m_chunks; mutable std::shared_mutex m_chunk_selector_mtx; - void add_chunk_internal(const chunk_num_t, bool add_to_heap = true); - - VChunkDefragHeap m_defrag_heap; - std::mutex m_defrag_mtx; - - void remove_chunk_from_defrag_heap(const chunk_num_t); }; } // namespace homeobject diff --git a/src/lib/homestore_backend/hs_blob_manager.cpp b/src/lib/homestore_backend/hs_blob_manager.cpp index c0aa81e5..2283eb8a 100644 --- a/src/lib/homestore_backend/hs_blob_manager.cpp +++ b/src/lib/homestore_backend/hs_blob_manager.cpp @@ -351,10 +351,10 @@ HSHomeObject::blob_put_get_blk_alloc_hints(sisl::blob const& header, cintrusive< } auto hs_shard = d_cast< HS_Shard* >((*shard_iter->second).get()); - BLOGD(msg_header->shard_id, "n/a", "Picked chunk_id={}", hs_shard->sb_->chunk_id); + BLOGD(msg_header->shard_id, "n/a", "Picked p_chunk_id={}", hs_shard->sb_->p_chunk_id); homestore::blk_alloc_hints hints; - hints.chunk_id_hint = hs_shard->sb_->chunk_id; + hints.chunk_id_hint = hs_shard->sb_->p_chunk_id; return hints; } diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index 3a723cf5..d586bea9 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -90,9 +90,9 @@ class HSHomeObject : public HomeObjectImpl { // Data layout inside 'data': // First, an array of 'pg_members' structures: // | pg_members[0] | pg_members[1] | ... | pg_members[num_members-1] | - // Immediately followed by an array of 'chunk_num_t' values (representing r_chunk_ids): + // Immediately followed by an array of 'chunk_num_t' values (representing physical chunkID): // | chunk_num_t[0] | chunk_num_t[1] | ... | chunk_num_t[num_chunks-1] | - // Here, 'chunk_num_t[i]' represents the r_chunk_id for the v_chunk_id 'i', where v_chunk_id starts from 0 and + // Here, 'chunk_num_t[i]' represents the p_chunk_id for the v_chunk_id 'i', where v_chunk_id starts from 0 and // increases sequentially. uint32_t size() const { @@ -148,7 +148,8 @@ class HSHomeObject : public HomeObjectImpl { struct shard_info_superblk : public DataHeader { ShardInfo info; - homestore::chunk_num_t chunk_id; + homestore::chunk_num_t p_chunk_id; + homestore::chunk_num_t v_chunk_id; }; #pragma pack() @@ -217,7 +218,6 @@ class HSHomeObject : public HomeObjectImpl { public: homestore::superblk< pg_info_superblk > pg_sb_; shared< homestore::ReplDev > repl_dev_; - std::optional< homestore::chunk_num_t > any_allocated_chunk_id_{}; std::shared_ptr< BlobIndexTable > index_table_; PGMetrics metrics_; @@ -240,24 +240,16 @@ class HSHomeObject : public HomeObjectImpl { * Returns the number of open shards on this PG. */ uint32_t open_shards() const; - - /** - * Retrieves the device hint associated with this PG(if any shard is created). - * - * @param selector The HeapChunkSelector object. - * @return An optional uint32_t value representing the device hint, or std::nullopt if no hint is available. - */ - std::optional< uint32_t > dev_hint(cshared< HeapChunkSelector >) const; }; struct HS_Shard : public Shard { homestore::superblk< shard_info_superblk > sb_; - HS_Shard(ShardInfo info, homestore::chunk_num_t chunk_id); + HS_Shard(ShardInfo info, homestore::chunk_num_t p_chunk_id); HS_Shard(homestore::superblk< shard_info_superblk >&& sb); ~HS_Shard() override = default; void update_info(const ShardInfo& info); - auto chunk_id() const { return sb_->chunk_id; } + auto p_chunk_id() const { return sb_->p_chunk_id; } }; #pragma pack(1) @@ -437,9 +429,17 @@ class HSHomeObject : public HomeObjectImpl { * @brief Retrieves the chunk number associated with the given shard ID. * * @param id The ID of the shard to retrieve the chunk number for. - * @return An optional chunk number if the shard ID is valid, otherwise an empty optional. + * @return An optional chunk number values shard p_chunk_id if the shard ID is valid, otherwise an empty optional. + */ + std::optional< homestore::chunk_num_t > get_shard_p_chunk_id(shard_id_t id) const; + + /** + * @brief Retrieves the chunk number associated with the given shard ID. + * + * @param id The ID of the shard to retrieve the chunk number for. + * @return An optional chunk number values shard v_chunk_id if the shard ID is valid, otherwise an empty optional. */ - std::optional< homestore::chunk_num_t > get_shard_chunk(shard_id_t id) const; + std::optional< homestore::chunk_num_t > get_shard_v_chunk_id(shard_id_t id) const; /** * @brief recover PG and shard from the superblock. @@ -448,13 +448,27 @@ class HSHomeObject : public HomeObjectImpl { void on_replica_restart(); /** - * @brief Returns any chunk number for the given pg ID. + * @brief Extracts the physical chunk ID for create shard from the message. + * + * @param header The message header that includes the shard_info_superblk, which contains the data necessary for + * extracting and mapping the chunk ID. + * @return An optional virtual chunk id if the extraction and mapping process is successful, otherwise an empty + * optional. + */ + std::optional< homestore::chunk_num_t > resolve_v_chunk_id_from_msg(sisl::blob const& header); + + /** + * @brief Releases a chunk based on the information provided in a CREATE_SHARD message. + * + * This function is invoked during log rollback or when the proposer encounters an error. + * Its primary purpose is to ensure that the state of pg_chunks is reverted to the correct state. * - * @param pg The pg ID to get the chunk number for. - * @return A tuple of . + * @param header The message header that includes the shard_info_superblk, which contains the data necessary for + * extracting and mapping the chunk ID. + * @return Returns true if the chunk was successfully released, false otherwise. */ - std::tuple< bool, bool, homestore::chunk_num_t > get_any_chunk_id(pg_id_t pg); + bool release_chunk_based_on_create_shard_message(sisl::blob const& header); bool pg_exists(pg_id_t pg_id) const; cshared< HeapChunkSelector > chunk_selector() const { return chunk_selector_; } diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 7e0fe734..58bacd57 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -69,6 +69,7 @@ PGManager::NullAsyncResult HSHomeObject::_create_pg(PGInfo&& pg_info, std::set< return folly::makeUnexpected(PGError::NO_SPACE_LEFT); } + pg_info.chunk_size = chunk_size; pg_info.replica_set_uuid = boost::uuids::random_generator()(); return hs_repl_service() .create_repl_dev(pg_info.replica_set_uuid, peers) @@ -136,15 +137,27 @@ void HSHomeObject::on_create_pg_message_commit(int64_t lsn, sisl::blob const& he return; } + auto local_chunk_size = chunk_selector()->get_chunk_size(); + if (pg_info.chunk_size != local_chunk_size) { + LOGE("Chunk sizes are inconsistent, leader_chunk_size={}, local_chunk_size={}", pg_info.chunk_size, + local_chunk_size); + if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(PGError::UNKNOWN)); } + return; + } + // select chunks for pg auto const num_chunk = chunk_selector()->select_chunks_for_pg(pg_id, pg_info.size); if (!num_chunk.has_value()) { - LOGW("select chunks for pg failed, pg_id {}", pg_id); + LOGW("Failed to select chunks for pg {}", pg_id); if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(PGError::NO_SPACE_LEFT)); } return; } auto chunk_ids = chunk_selector()->get_pg_chunks(pg_id); - + if (chunk_ids == nullptr) { + LOGW("Failed to get pg chunks, pg_id {}", pg_id); + if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(PGError::NO_SPACE_LEFT)); } + return; + } // create index table and pg // TODO create index table during create shard. auto index_table = create_index_table(); @@ -245,6 +258,7 @@ std::string HSHomeObject::serialize_pg_info(const PGInfo& pginfo) { nlohmann::json j; j["pg_info"]["pg_id_t"] = pginfo.id; j["pg_info"]["pg_size"] = pginfo.size; + j["pg_info"]["chunk_size"] = pginfo.chunk_size; j["pg_info"]["repl_uuid"] = boost::uuids::to_string(pginfo.replica_set_uuid); nlohmann::json members_j{}; @@ -263,7 +277,8 @@ PGInfo HSHomeObject::deserialize_pg_info(const unsigned char* json_str, size_t s auto pg_json = nlohmann::json::parse(json_str, json_str + size); PGInfo pg_info(pg_json["pg_info"]["pg_id_t"].get< pg_id_t >()); - pg_info.size = pg_json["pg_info"]["pg_size"].get< u_int64_t >(); + pg_info.size = pg_json["pg_info"]["pg_size"].get< uint64_t >(); + pg_info.chunk_size = pg_json["pg_info"]["chunk_size"].get< uint64_t >(); pg_info.replica_set_uuid = boost::uuids::string_generator()(pg_json["pg_info"]["repl_uuid"].get< std::string >()); for (auto const& m : pg_json["pg_info"]["members"]) { @@ -287,8 +302,9 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c return; } auto pg_id = pg_sb->id; - std::vector< chunk_num_t > chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks); - chunk_selector_->set_pg_chunks(pg_id, std::move(chunk_ids)); + std::vector< chunk_num_t > p_chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks); + bool set_pg_chunks_res = chunk_selector_->recover_pg_chunks(pg_id, std::move(p_chunk_ids)); + RELEASE_ASSERT(set_pg_chunks_res, "Failed to set pg={} chunks", pg_id); auto uuid_str = boost::uuids::to_string(pg_sb->index_table_uuid); auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value())); // During PG recovery check if index is already recoverd else @@ -365,13 +381,6 @@ uint32_t HSHomeObject::HS_PG::open_shards() const { return std::count_if(shards_.begin(), shards_.end(), [](auto const& s) { return s->is_open(); }); } -std::optional< uint32_t > HSHomeObject::HS_PG::dev_hint(cshared< HeapChunkSelector > chunk_sel) const { - if (shards_.empty()) { return std::nullopt; } - auto const hs_shard = d_cast< HS_Shard* >(shards_.front().get()); - auto const hint = chunk_sel->chunk_to_hints(hs_shard->chunk_id()); - return hint.pdev_id_hint; -} - bool HSHomeObject::_get_stats(pg_id_t id, PGStats& stats) const { auto lg = std::shared_lock(_pg_lock); auto it = _pg_map.find(id); @@ -404,24 +413,9 @@ bool HSHomeObject::_get_stats(pg_id_t id, PGStats& stats) const { stats.members.emplace_back(std::make_tuple(m.id, m.name, last_commit_lsn, last_succ_resp_us)); } - auto const pdev_id_hint = hs_pg->dev_hint(chunk_selector()); - if (pdev_id_hint.has_value()) { - stats.avail_open_shards = chunk_selector()->avail_num_chunks(pdev_id_hint.value()); - stats.avail_bytes = chunk_selector()->avail_blks(pdev_id_hint) * blk_size; - stats.used_bytes = - hs_pg->durable_entities().total_occupied_blk_count.load(std::memory_order_relaxed) * blk_size; - } else { - // if no shard has been created on this PG yet, it means this PG could arrive on any drive that has the most - // available open shards; - stats.avail_open_shards = chunk_selector()->most_avail_num_chunks(); - - // if no shards is created yet on this PG, set used bytes to zero; - stats.used_bytes = 0ul; - - // if no shard has been created on this PG yet, it means this PG could arrive on any drive that has the most - // available space; - stats.avail_bytes = chunk_selector()->avail_blks(std::nullopt) * blk_size; - } + stats.avail_open_shards = chunk_selector()->avail_num_chunks(hs_pg->pg_info_.id); + stats.avail_bytes = chunk_selector()->avail_blks(hs_pg->pg_info_.id) * blk_size; + stats.used_bytes = hs_pg->durable_entities().total_occupied_blk_count.load(std::memory_order_relaxed) * blk_size; return true; } diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 938c64ec..67681923 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -111,6 +111,13 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_create_shard(pg_id_t pg_ow auto new_shard_id = generate_new_shard_id(pg_owner); auto create_time = get_current_timestamp(); + // select chunk for shard. + const auto v_chunkID = chunk_selector()->get_most_available_blk_chunk(pg_owner); + if (!v_chunkID.has_value()) { + LOGW("no availble chunk left to create shard for pg [{}]", pg_owner); + return folly::makeUnexpected(ShardError::NO_SPACE_LEFT); + } + // Prepare the shard info block sisl::io_blob_safe sb_blob(sisl::round_up(sizeof(shard_info_superblk), repl_dev->get_blk_size()), io_align); shard_info_superblk* sb = new (sb_blob.bytes()) shard_info_superblk(); @@ -124,7 +131,8 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_create_shard(pg_id_t pg_ow .available_capacity_bytes = size_bytes, .total_capacity_bytes = size_bytes, .deleted_capacity_bytes = 0}; - sb->chunk_id = 0; + sb->p_chunk_id = 0; + sb->v_chunk_id = v_chunkID.value(); auto req = repl_result_ctx< ShardManager::Result< ShardInfo > >::make( sizeof(shard_info_superblk) /* header_extn_size */, 0u /* key_size */); @@ -172,7 +180,9 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_seal_shard(ShardInfo const shard_info_superblk* sb = new (sb_blob.bytes()) shard_info_superblk(); sb->type = DataHeader::data_type_t::SHARD_INFO; sb->info = tmp_info; - sb->chunk_id = 0; + // p_chunk_id and v_chunk_id will never be used in seal shard workflow. + sb->p_chunk_id = 0; + sb->v_chunk_id = 0; auto req = repl_result_ctx< ShardManager::Result< ShardInfo > >::make( sizeof(shard_info_superblk) /* header_extn_size */, 0u /* key_size */); @@ -252,6 +262,11 @@ void HSHomeObject::on_shard_message_rollback(int64_t lsn, sisl::blob const& head } switch (msg_header->msg_type) { + case ReplicationMessageType::CREATE_SHARD_MSG: { + bool res = release_chunk_based_on_create_shard_message(header); + if (!res) { LOGW("failed to release chunk based on create shard msg"); } + break; + } case ReplicationMessageType::SEAL_SHARD_MSG: { auto sb = r_cast< shard_info_superblk const* >(header.cbytes() + sizeof(ReplicationMessageHeader)); auto const shard_info = sb->info; @@ -269,6 +284,7 @@ void HSHomeObject::on_shard_message_rollback(int64_t lsn, sisl::blob const& head shard_info.id); } } + break; } default: { break; @@ -363,9 +379,10 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom if (state == ShardInfo::State::SEALED) { auto pg_id = shard_info.placement_group; - auto chunk_id = get_shard_chunk(shard_info.id); - RELEASE_ASSERT(chunk_id.has_value(), "Chunk id not found"); - chunk_selector()->release_chunk(pg_id, chunk_id.value()); + auto v_chunkID = get_shard_v_chunk_id(shard_info.id); + RELEASE_ASSERT(v_chunkID.has_value(), "v_chunk id not found"); + bool res = chunk_selector()->release_chunk(pg_id, v_chunkID.value()); + RELEASE_ASSERT(res, "Failed to release chunk {}, pg_id {}", v_chunkID.value(), pg_id); update_shard_in_map(shard_info); } else LOGW("try to commit SEAL_SHARD_MSG but shard state is not sealed, shard_id: {}", shard_info.id); @@ -392,10 +409,11 @@ void HSHomeObject::on_shard_meta_blk_recover_completed(bool success) { excluding_chunks.reserve(pair.second->shards_.size()); for (auto& shard : pair.second->shards_) { if (shard->info.state == ShardInfo::State::OPEN) { - excluding_chunks.emplace(d_cast< HS_Shard* >(shard.get())->sb_->chunk_id); + excluding_chunks.emplace(d_cast< HS_Shard* >(shard.get())->sb_->v_chunk_id); } } - chunk_selector_->recover_pg_chunk_heap(pair.first, excluding_chunks); + bool res = chunk_selector_->recover_pg_chunks_states(pair.first, excluding_chunks); + RELEASE_ASSERT(res, "Failed to recover pg chunk heap, pg={}", pair.first); } } @@ -424,37 +442,83 @@ void HSHomeObject::update_shard_in_map(const ShardInfo& shard_info) { hs_shard->update_info(shard_info); } -std::optional< homestore::chunk_num_t > HSHomeObject::get_shard_chunk(shard_id_t id) const { +std::optional< homestore::chunk_num_t > HSHomeObject::get_shard_p_chunk_id(shard_id_t id) const { std::scoped_lock lock_guard(_shard_lock); auto shard_iter = _shard_map.find(id); if (shard_iter == _shard_map.end()) { return std::nullopt; } auto hs_shard = d_cast< HS_Shard* >((*shard_iter->second).get()); - return std::make_optional< homestore::chunk_num_t >(hs_shard->sb_->chunk_id); + return std::make_optional< homestore::chunk_num_t >(hs_shard->sb_->p_chunk_id); } -std::tuple< bool, bool, homestore::chunk_num_t > HSHomeObject::get_any_chunk_id(pg_id_t pg_id) { - std::scoped_lock lock_guard(_pg_lock); - auto pg_iter = _pg_map.find(pg_id); - if (pg_iter == _pg_map.end()) { return {false /* pg_found */, false /* shards_found */, 0 /* chunk_id */}; } +std::optional< homestore::chunk_num_t > HSHomeObject::get_shard_v_chunk_id(shard_id_t id) const { + std::scoped_lock lock_guard(_shard_lock); + auto shard_iter = _shard_map.find(id); + if (shard_iter == _shard_map.end()) { return std::nullopt; } + auto hs_shard = d_cast< HS_Shard* >((*shard_iter->second).get()); + return std::make_optional< homestore::chunk_num_t >(hs_shard->sb_->v_chunk_id); +} - HS_PG* pg = static_cast< HS_PG* >(pg_iter->second.get()); - if (pg->any_allocated_chunk_id_.has_value()) { // it is already cached and use it; - return {true /* pg_found */, true /* shards_found */, *pg->any_allocated_chunk_id_}; +std::optional< homestore::chunk_num_t > HSHomeObject::resolve_v_chunk_id_from_msg(sisl::blob const& header) { + const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); + if (msg_header->corrupted()) { + LOGW("replication message header is corrupted with crc error"); + return std::nullopt; } - auto& shards = pg->shards_; - if (shards.empty()) { return {true /* pg_found */, false /* shards_found */, 0 /* chunk_id */}; } + switch (msg_header->msg_type) { + case ReplicationMessageType::CREATE_SHARD_MSG: { + const pg_id_t pg_id = msg_header->pg_id; + std::scoped_lock lock_guard(_pg_lock); + auto pg_iter = _pg_map.find(pg_id); + if (pg_iter == _pg_map.end()) { + LOGW("Requesting a chunk for an unknown pg={}", pg_id); + return std::nullopt; + } + auto sb = r_cast< shard_info_superblk const* >(header.cbytes() + sizeof(ReplicationMessageHeader)); + return sb->v_chunk_id; + } + default: { + LOGW("Unexpected message type encountered: {}. This function should only be called with 'CREATE_SHARD_MSG'.", + msg_header->msg_type); + return std::nullopt; + } + } +} + +bool HSHomeObject::release_chunk_based_on_create_shard_message(sisl::blob const& header) { + const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); + if (msg_header->corrupted()) { + LOGW("replication message header is corrupted with crc error"); + return false; + } - auto hs_shard = d_cast< HS_Shard* >(shards.front().get()); - pg->any_allocated_chunk_id_ = hs_shard->sb_->chunk_id; // cache it; - return {true /* pg_found */, true /* shards_found */, *pg->any_allocated_chunk_id_}; + switch (msg_header->msg_type) { + case ReplicationMessageType::CREATE_SHARD_MSG: { + const pg_id_t pg_id = msg_header->pg_id; + std::scoped_lock lock_guard(_pg_lock); + auto pg_iter = _pg_map.find(pg_id); + if (pg_iter == _pg_map.end()) { + LOGW("Requesting a chunk for an unknown pg={}", pg_id); + return false; + } + auto sb = r_cast< shard_info_superblk const* >(header.cbytes() + sizeof(ReplicationMessageHeader)); + bool res = chunk_selector_->release_chunk(sb->info.placement_group, sb->v_chunk_id); + if (!res) { LOGW("Failed to release chunk {} to pg {}", sb->v_chunk_id, sb->info.placement_group); } + return res; + } + default: { + LOGW("Unexpected message type encountered: {}. This function should only be called with 'CREATE_SHARD_MSG'.", + msg_header->msg_type); + return false; + } + } } -HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t chunk_id) : +HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t p_chunk_id) : Shard(std::move(shard_info)), sb_(_shard_meta_name) { sb_.create(sizeof(shard_info_superblk)); sb_->info = info; - sb_->chunk_id = chunk_id; + sb_->p_chunk_id = p_chunk_id; sb_.write(); } diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 3ff25ad8..35f5deb3 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -78,6 +78,10 @@ void ReplicationStateMachine::on_rollback(int64_t lsn, sisl::blob const& header, return; } switch (msg_header->msg_type) { + case ReplicationMessageType::CREATE_SHARD_MSG: { + home_object_->on_shard_message_rollback(lsn, header, key, ctx); + break; + } case ReplicationMessageType::SEAL_SHARD_MSG: { home_object_->on_shard_message_rollback(lsn, header, key, ctx); break; @@ -102,7 +106,13 @@ void ReplicationStateMachine::on_error(ReplServiceError error, const sisl::blob& result_ctx->promise_.setValue(folly::makeUnexpected(homeobject::toPgError(error))); break; } - case ReplicationMessageType::CREATE_SHARD_MSG: + case ReplicationMessageType::CREATE_SHARD_MSG: { + bool res = home_object_->release_chunk_based_on_create_shard_message(header); + if (!res) { LOGW("failed to release chunk based on create shard msg"); } + auto result_ctx = boost::static_pointer_cast< repl_result_ctx< ShardManager::Result< ShardInfo > > >(ctx).get(); + result_ctx->promise_.setValue(folly::makeUnexpected(toShardError(error))); + break; + } case ReplicationMessageType::SEAL_SHARD_MSG: { auto result_ctx = boost::static_pointer_cast< repl_result_ctx< ShardManager::Result< ShardInfo > > >(ctx).get(); result_ctx->promise_.setValue(folly::makeUnexpected(toShardError(error))); @@ -133,25 +143,33 @@ ReplicationStateMachine::get_blk_alloc_hints(sisl::blob const& header, uint32_t const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); switch (msg_header->msg_type) { case ReplicationMessageType::CREATE_SHARD_MSG: { - auto& pg_id = msg_header->pg_id; + pg_id_t pg_id = msg_header->pg_id; // check whether the pg exists if (!home_object_->pg_exists(pg_id)) { LOGI("can not find pg {} when getting blk_alloc_hint", pg_id); // TODO:: add error code to indicate the pg not found in homestore side return folly::makeUnexpected(homestore::ReplServiceError::NO_SPACE_LEFT); } - // Since chunks are selected when a pg is created, the chunkselector selects one of the chunks owned by the pg + + auto v_chunkID = home_object_->resolve_v_chunk_id_from_msg(header); + if (!v_chunkID.has_value()) { + LOGW("can not resolve v_chunk_id from msg"); + return folly::makeUnexpected(homestore::ReplServiceError::FAILED); + } homestore::blk_alloc_hints hints; - hints.pdev_id_hint = pg_id; // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent - // pg_id_hint, "identical layout" will change it + // Both chunk_num_t and pg_id_t are of type uint16_t. + static_assert(std::is_same< pg_id_t, uint16_t >::value, "pg_id_t is not uint16_t"); + static_assert(std::is_same< homestore::chunk_num_t, uint16_t >::value, "chunk_num_t is not uint16_t"); + homestore::chunk_num_t v_chunk_id = v_chunkID.value(); + hints.application_hint = ((uint64_t)pg_id << 16) | v_chunk_id; return hints; } case ReplicationMessageType::SEAL_SHARD_MSG: { - auto chunk_id = home_object_->get_shard_chunk(msg_header->shard_id); - RELEASE_ASSERT(chunk_id.has_value(), "unknown shard id to get binded chunk"); + auto p_chunkID = home_object_->get_shard_p_chunk_id(msg_header->shard_id); + RELEASE_ASSERT(p_chunkID.has_value(), "unknown shard id to get binded chunk"); homestore::blk_alloc_hints hints; - hints.chunk_id_hint = chunk_id.value(); + hints.chunk_id_hint = p_chunkID.value(); return hints; } diff --git a/src/lib/homestore_backend/tests/hs_pg_tests.cpp b/src/lib/homestore_backend/tests/hs_pg_tests.cpp index b8a4499f..3a0dd89f 100644 --- a/src/lib/homestore_backend/tests/hs_pg_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_pg_tests.cpp @@ -46,6 +46,44 @@ TEST_F(HomeObjectFixture, PGStatsTest) { LOGINFO("HomeObj stats: {}", stats.to_string()); } +TEST_F(HomeObjectFixture, PGExceedSpaceTest) { + LOGINFO("HomeObject replica={} setup completed", g_helper->replica_num()); + pg_id_t pg_id{1}; + if (0 == g_helper->replica_num()) { // leader + auto memebers = g_helper->members(); + auto name = g_helper->name(); + auto info = homeobject::PGInfo(pg_id); + info.size = 500 * Gi; // execced local available space + for (const auto& member : memebers) { + if (0 == member.second) { + // by default, leader is the first member + info.members.insert(homeobject::PGMember{member.first, name + std::to_string(member.second), 1}); + } else { + info.members.insert(homeobject::PGMember{member.first, name + std::to_string(member.second), 0}); + } + } + auto p = _obj_inst->pg_manager()->create_pg(std::move(info)).get(); + ASSERT_TRUE(p.hasError()); + PGError error = p.error(); + ASSERT_EQ(PGError::NO_SPACE_LEFT, error); + } else { + auto start_time = std::chrono::steady_clock::now(); + bool res = true; + // follower need to wait for pg creation + while (!pg_exist(pg_id)) { + auto current_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast< std::chrono::seconds >(current_time - start_time).count(); + if (duration >= 20) { + LOGINFO("Failed to create pg {} at follower", pg_id); + res = false; + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + ASSERT_FALSE(res); + } +} + TEST_F(HomeObjectFixture, PGRecoveryTest) { // create 10 pg for (pg_id_t i = 1; i < 11; i++) { diff --git a/src/lib/homestore_backend/tests/hs_shard_tests.cpp b/src/lib/homestore_backend/tests/hs_shard_tests.cpp index b9161093..7d3feb77 100644 --- a/src/lib/homestore_backend/tests/hs_shard_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_shard_tests.cpp @@ -6,18 +6,22 @@ TEST_F(HomeObjectFixture, CreateMultiShards) { auto _shard_1 = create_shard(pg_id, 64 * Mi); auto _shard_2 = create_shard(pg_id, 64 * Mi); - auto chunk_num_1 = _obj_inst->get_shard_chunk(_shard_1.id); + auto chunk_num_1 = _obj_inst->get_shard_p_chunk_id(_shard_1.id); ASSERT_TRUE(chunk_num_1.has_value()); - auto chunk_num_2 = _obj_inst->get_shard_chunk(_shard_2.id); + auto chunk_num_2 = _obj_inst->get_shard_p_chunk_id(_shard_2.id); ASSERT_TRUE(chunk_num_2.has_value()); - // check if both chunk is on the same pdev; - auto alloc_hint1 = _obj_inst->chunk_selector()->chunk_to_hints(chunk_num_1.value()); - auto alloc_hint2 = _obj_inst->chunk_selector()->chunk_to_hints(chunk_num_2.value()); - ASSERT_TRUE(alloc_hint1.pdev_id_hint.has_value()); - ASSERT_TRUE(alloc_hint2.pdev_id_hint.has_value()); - ASSERT_TRUE(alloc_hint1.pdev_id_hint.value() == alloc_hint2.pdev_id_hint.value()); + // check if both chunk is on the same pg and pdev; + auto chunks = _obj_inst->chunk_selector()->m_chunks; + ASSERT_TRUE(chunks.find(chunk_num_1.value()) != chunks.end()); + ASSERT_TRUE(chunks.find(chunk_num_2.value()) != chunks.end()); + auto chunk_1 = chunks[chunk_num_1.value()]; + auto chunk_2 = chunks[chunk_num_2.value()]; + ASSERT_TRUE(chunk_1->m_pg_id.has_value()); + ASSERT_TRUE(chunk_2->m_pg_id.has_value()); + ASSERT_TRUE(chunk_1->m_pg_id.value() == chunk_2->m_pg_id.value()); + ASSERT_TRUE(chunk_1->get_pdev_id() == chunk_2->get_pdev_id()); } TEST_F(HomeObjectFixture, CreateMultiShardsOnMultiPG) { @@ -30,20 +34,24 @@ TEST_F(HomeObjectFixture, CreateMultiShardsOnMultiPG) { for (const auto pg : pgs) { auto shard_info = create_shard(pg, Mi); - auto chunk_num_1 = _obj_inst->get_shard_chunk(shard_info.id); + auto chunk_num_1 = _obj_inst->get_shard_p_chunk_id(shard_info.id); ASSERT_TRUE(chunk_num_1.has_value()); // create another shard again. shard_info = create_shard(pg, Mi); - auto chunk_num_2 = _obj_inst->get_shard_chunk(shard_info.id); + auto chunk_num_2 = _obj_inst->get_shard_p_chunk_id(shard_info.id); ASSERT_TRUE(chunk_num_2.has_value()); - // check if both chunk is on the same pdev; - auto alloc_hint1 = _obj_inst->chunk_selector()->chunk_to_hints(chunk_num_1.value()); - auto alloc_hint2 = _obj_inst->chunk_selector()->chunk_to_hints(chunk_num_2.value()); - ASSERT_TRUE(alloc_hint1.pdev_id_hint.has_value()); - ASSERT_TRUE(alloc_hint2.pdev_id_hint.has_value()); - ASSERT_TRUE(alloc_hint1.pdev_id_hint.value() == alloc_hint2.pdev_id_hint.value()); + // check if both chunk is on the same pg and pdev; + auto chunks = _obj_inst->chunk_selector()->m_chunks; + ASSERT_TRUE(chunks.find(chunk_num_1.value()) != chunks.end()); + ASSERT_TRUE(chunks.find(chunk_num_2.value()) != chunks.end()); + auto chunk_1 = chunks[chunk_num_1.value()]; + auto chunk_2 = chunks[chunk_num_2.value()]; + ASSERT_TRUE(chunk_1->m_pg_id.has_value()); + ASSERT_TRUE(chunk_2->m_pg_id.has_value()); + ASSERT_TRUE(chunk_1->m_pg_id.value() == chunk_2->m_pg_id.value()); + ASSERT_TRUE(chunk_1->get_pdev_id() == chunk_2->get_pdev_id()); } } diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 0f3a1c1f..0bdb86a4 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -40,7 +40,6 @@ class Chunk : public std::enable_shared_from_this< Chunk > { blk_num_t get_total_blks() const { return m_available_blks; } void set_chunk_id(uint16_t chunk_id) { m_chunk_id = chunk_id; } - const std::shared_ptr< Chunk > get_internal_chunk() { return shared_from_this(); } uint64_t size() const { return 1 * Mi; } Chunk(uint32_t pdev_id, uint16_t chunk_id, uint32_t available_blks, uint32_t defrag_nblks) { @@ -75,15 +74,20 @@ blk_num_t VChunk::get_total_blks() const { return m_internal_chunk->get_total_bl uint64_t VChunk::size() const { return m_internal_chunk->size(); } -cshared< Chunk > VChunk::get_internal_chunk() const { return m_internal_chunk->get_internal_chunk(); } +cshared< Chunk > VChunk::get_internal_chunk() const { return m_internal_chunk; } } // namespace homestore +using homeobject::ChunkState; using homeobject::csharedChunk; using homeobject::HeapChunkSelector; +using homeobject::pg_id_t; using homestore::Chunk; using homestore::chunk_num_t; +const pg_id_t FAKE_PG_ID = UINT16_MAX; +const chunk_num_t FAKE_CHUNK_ID = UINT16_MAX; + class HeapChunkSelectorTest : public ::testing::Test { protected: void SetUp() override { @@ -101,34 +105,34 @@ class HeapChunkSelectorTest : public ::testing::Test { }; void prepare_pg() { - const uint32_t chunk_size = HCS.get_chunk_size(); // may problem + const uint32_t chunk_size = HCS.get_chunk_size(); const u_int64_t pg_size = chunk_size * 3; for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { - HCS.select_chunks_for_pg(pg_id, pg_size); + ASSERT_EQ(HCS.select_chunks_for_pg(pg_id, pg_size), 3); uint32_t last_pdev_id = 0; // test pg heap - auto pg_heap_it = HCS.m_per_pg_heap.find(pg_id); - ASSERT_NE(pg_heap_it, HCS.m_per_pg_heap.end()); - ASSERT_EQ(pg_heap_it->second->size(), 3); - - // test chunk_map - auto v2r_chunk_map_it = HCS.m_v2r_chunk_map.find(pg_id); - ASSERT_NE(v2r_chunk_map_it, HCS.m_v2r_chunk_map.end()); - ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); - - auto r2v_chunk_map_it = HCS.m_r2v_chunk_map.find(pg_id); - ASSERT_NE(r2v_chunk_map_it, HCS.m_r2v_chunk_map.end()); - ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); + auto pg_it = HCS.m_per_pg_chunks.find(pg_id); + ASSERT_NE(pg_it, HCS.m_per_pg_chunks.end()); + auto pg_chunk_collection = pg_it->second; + auto& pg_chunks = pg_chunk_collection->m_pg_chunks; + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 3); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 1 + 2 + 3); + ASSERT_EQ(pg_chunk_collection->m_total_blks, 1 + 2 + 3); + for (int i = 0; i < 3; ++i) { - auto r_chunk_id = v2r_chunk_map_it->second->at(i); - ASSERT_EQ(i, r2v_chunk_map_it->second->at(r_chunk_id)); - auto pdev_id = HCS.m_chunks[r_chunk_id]->get_pdev_id(); + // test chunk information + auto p_chunk_id = pg_chunks[i]->get_chunk_id(); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_pg_id.value(), pg_id); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_v_chunk_id.value(), i); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::AVAILABLE); + // test pg chunks must belong to same pdev + auto pdev_id = HCS.m_chunks[p_chunk_id]->get_pdev_id(); if (last_pdev_id != 0) { ASSERT_EQ(last_pdev_id, pdev_id); } else { last_pdev_id = pdev_id; } - + // pdev heap should be empty at this point because all chunks have already been given to pg. auto pdev_it = HCS.m_per_dev_heap.find(pdev_id); ASSERT_NE(pdev_it, HCS.m_per_dev_heap.end()); ASSERT_EQ(pdev_it->second->size(), 0); @@ -136,7 +140,6 @@ class HeapChunkSelectorTest : public ::testing::Test { } } - public: HeapChunkSelector HCS; }; @@ -147,6 +150,59 @@ TEST_F(HeapChunkSelectorTest, test_for_each_chunk) { ASSERT_EQ(size.load(), 18); } +TEST_F(HeapChunkSelectorTest, test_identical_layout) { + const homestore::blk_count_t count = 1; + homestore::blk_alloc_hints hints; + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + chunk_num_t p_chunk_id = 0; + auto pg_chunk_collection = HCS.m_per_pg_chunks[pg_id]; + auto start_available_blk_count = 1 + 2 + 3; + for (int j = 3; j > 0; --j) { + ASSERT_EQ(pg_chunk_collection->available_blk_count, start_available_blk_count); + + const auto v_chunkID = HCS.get_most_available_blk_chunk(pg_id); + ASSERT_TRUE(v_chunkID.has_value()); + p_chunk_id = pg_chunk_collection->m_pg_chunks[v_chunkID.value()]->get_chunk_id(); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, j - 1); + ASSERT_EQ(pg_chunk_collection->available_blk_count, start_available_blk_count - j); + + const auto v_chunkID2 = HCS.m_chunks[p_chunk_id]->m_v_chunk_id; + ASSERT_TRUE(v_chunkID2.has_value()); + ASSERT_EQ(v_chunkID.value(), v_chunkID2.value()); + hints.application_hint = ((uint64_t)pg_id << 16) | v_chunkID.value(); + + // mock leader on_commit + ASSERT_NE(HCS.select_chunk(count, hints), nullptr); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, j - 1); + ASSERT_EQ(pg_chunk_collection->available_blk_count, start_available_blk_count - j); + + // mock leader rollback or on_error + ASSERT_TRUE(HCS.release_chunk(pg_id, v_chunkID.value())); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, j); + ASSERT_EQ(pg_chunk_collection->available_blk_count, start_available_blk_count); + + // mock follower rollback or on_error + ASSERT_TRUE(HCS.release_chunk(pg_id, v_chunkID.value())); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, j); + ASSERT_EQ(pg_chunk_collection->available_blk_count, start_available_blk_count); + + // mock follower on_commit + ASSERT_NE(HCS.select_chunk(count, hints), nullptr); // leader select + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, j - 1); + ASSERT_EQ(pg_chunk_collection->available_blk_count, start_available_blk_count - j); + + start_available_blk_count -= j; + } + // all chunks have been given out + ASSERT_FALSE(HCS.get_most_available_blk_chunk(pg_id).has_value()); + } +} + TEST_F(HeapChunkSelectorTest, test_select_chunk) { homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; @@ -154,84 +210,66 @@ TEST_F(HeapChunkSelectorTest, test_select_chunk) { ASSERT_EQ(chunk, nullptr); for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { - hints.pdev_id_hint = pg_id; // tmp bypass using pdev_id_hint present pg_id for (int j = 3; j > 0; --j) { + chunk_num_t v_chunk_id = 3 - j; + hints.application_hint = ((uint64_t)pg_id << 16) | v_chunk_id; auto chunk = HCS.select_chunk(count, hints); ASSERT_NE(chunk, nullptr); - ASSERT_EQ(chunk->get_pdev_id(), pg_id); + ASSERT_EQ(chunk->get_pdev_id(), pg_id); // in this ut, pg_id is same as pdev id ASSERT_EQ(chunk->available_blks(), j); } } } - -TEST_F(HeapChunkSelectorTest, test_select_specific_chunk) { - const uint16_t pg_id = 1; - auto chunk_ids = HCS.get_pg_chunks(pg_id); - ASSERT_NE(chunk_ids, nullptr); - const chunk_num_t chunk_id = chunk_ids->at(0); - - auto chunk = HCS.select_specific_chunk(pg_id, chunk_id); - ASSERT_EQ(chunk->get_chunk_id(), chunk_id); - auto pdev_id = chunk->get_pdev_id(); - - // make sure pg chunk map - auto pg_heap_it = HCS.m_per_pg_heap.find(pg_id); - ASSERT_NE(pg_heap_it, HCS.m_per_pg_heap.end()); - ASSERT_EQ(pg_heap_it->second->size(), 2); - - // test chunk_map stable - auto v2r_chunk_map_it = HCS.m_v2r_chunk_map.find(pg_id); - ASSERT_NE(v2r_chunk_map_it, HCS.m_v2r_chunk_map.end()); - ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); - - auto r2v_chunk_map_it = HCS.m_r2v_chunk_map.find(pg_id); - ASSERT_NE(r2v_chunk_map_it, HCS.m_r2v_chunk_map.end()); - ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); - - // select the rest chunks to make sure specific chunk does not exist in HeapChunkSelector anymore. - homestore::blk_count_t count = 1; - homestore::blk_alloc_hints hints; - hints.pdev_id_hint = pg_id; - for (int j = 2; j > 0; --j) { - auto chunk = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk->get_pdev_id(), pdev_id); +TEST_F(HeapChunkSelectorTest, test_select_specific_chunk_and_release_chunk) { + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + // test fake + ASSERT_FALSE(HCS.release_chunk(FAKE_PG_ID, FAKE_CHUNK_ID)); + ASSERT_FALSE(HCS.release_chunk(pg_id, FAKE_CHUNK_ID)); + ASSERT_EQ(nullptr, HCS.select_specific_chunk(FAKE_PG_ID, FAKE_CHUNK_ID)); + ASSERT_EQ(nullptr, HCS.select_specific_chunk(pg_id, FAKE_CHUNK_ID)); + + auto chunk_ids = HCS.get_pg_chunks(pg_id); + ASSERT_NE(chunk_ids, nullptr); + const chunk_num_t v_chunk_id = 0; + const chunk_num_t p_chunk_id = chunk_ids->at(v_chunk_id); + + auto pg_chunk_collection = HCS.m_per_pg_chunks[pg_id]; + auto chunk = HCS.select_specific_chunk(pg_id, v_chunk_id); + ASSERT_NE(nullptr, chunk); + ASSERT_EQ(chunk->get_chunk_id(), p_chunk_id); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 2); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 1 + 2); + + // test select an INUSE chunk + chunk = HCS.select_specific_chunk(pg_id, v_chunk_id); + ASSERT_NE(nullptr, chunk); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 2); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 1 + 2); + + // release this chunk to HeapChunkSelector + ASSERT_TRUE(HCS.release_chunk(pg_id, v_chunk_id)); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 3); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 1 + 2 + 3); + + // test release an AVAILABLE chunk + ASSERT_TRUE(HCS.release_chunk(pg_id, v_chunk_id)); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::AVAILABLE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 3); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 1 + 2 + 3); + + // select again + chunk = HCS.select_specific_chunk(pg_id, v_chunk_id); + ASSERT_NE(nullptr, chunk); + ASSERT_EQ(HCS.m_chunks[p_chunk_id]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 2); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 1 + 2); + ASSERT_EQ(pg_id, chunk->get_pdev_id()); // in this ut, pg_id is same as pdev id + ASSERT_EQ(p_chunk_id, chunk->get_chunk_id()); } - - // release this chunk to HeapChunkSelector - HCS.release_chunk(pg_id, chunk_id); - chunk = HCS.select_chunk(1, hints); - ASSERT_EQ(1, chunk->get_pdev_id()); - ASSERT_EQ(chunk_id, chunk->get_chunk_id()); - -} - - -TEST_F(HeapChunkSelectorTest, test_release_chunk) { - homestore::blk_count_t count = 1; - homestore::blk_alloc_hints hints; - const uint16_t pg_id = 1; - hints.pdev_id_hint = pg_id; - auto chunk1 = HCS.select_chunk(count, hints); - auto pdev_id = chunk1->get_pdev_id(); - - ASSERT_EQ(chunk1->get_pdev_id(), pdev_id); - ASSERT_EQ(chunk1->available_blks(), 3); - - auto chunk2 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk2->get_pdev_id(), pdev_id); - ASSERT_EQ(chunk2->available_blks(), 2); - - HCS.release_chunk(pg_id, chunk1->get_chunk_id()); - HCS.release_chunk(pg_id, chunk2->get_chunk_id()); - - chunk1 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk1->get_pdev_id(), pdev_id); - ASSERT_EQ(chunk1->available_blks(), 3); - - chunk2 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk2->get_pdev_id(), pdev_id); - ASSERT_EQ(chunk2->available_blks(), 2); } TEST_F(HeapChunkSelectorTest, test_recovery) { @@ -242,49 +280,81 @@ TEST_F(HeapChunkSelectorTest, test_recovery) { HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 4, 1, 6)); HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 5, 2, 5)); HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 6, 3, 4)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(3, 7, 1, 3)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(3, 8, 2, 2)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(3, 9, 3, 1)); - std::vector chunk_ids {1,2,3}; - const uint16_t pg_id = 1; - // test recover chunk map - HCS_recovery.set_pg_chunks(pg_id, std::move(chunk_ids)); - auto v2r_chunk_map_it = HCS_recovery.m_v2r_chunk_map.find(pg_id); - ASSERT_NE(v2r_chunk_map_it, HCS_recovery.m_v2r_chunk_map.end()); - ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); - - auto r2v_chunk_map_it = HCS_recovery.m_r2v_chunk_map.find(pg_id); - ASSERT_NE(r2v_chunk_map_it, HCS_recovery.m_r2v_chunk_map.end()); - ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); - // test recover pdev map - HCS_recovery.recover_per_dev_chunk_heap(); - auto pdev_it = HCS_recovery.m_per_dev_heap.find(1); - ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); - ASSERT_EQ(pdev_it->second->size(), 0); - - pdev_it = HCS_recovery.m_per_dev_heap.find(2); - ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); - ASSERT_EQ(pdev_it->second->size(), 3); - auto &pdev_heap = pdev_it->second->m_heap; - auto vchunk = homestore::VChunk(nullptr); - for (int i = 6; i > 3; --i) { - vchunk = pdev_heap.top(); - pdev_heap.pop(); - ASSERT_EQ(vchunk.get_chunk_id(), i); + // on_pg_meta_blk_found + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + std::vector< chunk_num_t > chunk_ids{1, 2}; + std::vector< chunk_num_t > chunk_ids_for_twice{1, 2}; + std::vector< chunk_num_t > chunk_ids_not_valid{1, 20}; + std::vector< chunk_num_t > chunk_ids_not_same_pdev{1, 6}; + for (chunk_num_t j = 0; j < 2; ++j) { + chunk_ids[j] += (pg_id - 1) * 3; + chunk_ids_for_twice[j] += (pg_id - 1) * 3; + chunk_ids_not_valid[j] += (pg_id - 1) * 3; + chunk_ids_not_same_pdev[j] += ((pg_id - 1) * 3) % 9; + } + + // test recover chunk map + ASSERT_FALSE(HCS_recovery.recover_pg_chunks(pg_id, std::move(chunk_ids_not_valid))); + ASSERT_FALSE(HCS_recovery.recover_pg_chunks(pg_id, std::move(chunk_ids_not_same_pdev))); + + ASSERT_TRUE(HCS_recovery.recover_pg_chunks(pg_id, std::move(chunk_ids))); + // can't set pg chunks twice + ASSERT_FALSE(HCS_recovery.recover_pg_chunks(pg_id, std::move(chunk_ids_for_twice))); + + auto pg_it = HCS_recovery.m_per_pg_chunks.find(pg_id); + ASSERT_NE(pg_it, HCS_recovery.m_per_pg_chunks.end()); + auto pg_chunk_collection = pg_it->second; + ASSERT_EQ(pg_chunk_collection->m_pg_chunks.size(), 2); + for (chunk_num_t v_chunk_id = 0; v_chunk_id < 2; ++v_chunk_id) { + ASSERT_EQ(pg_chunk_collection->m_pg_chunks[v_chunk_id]->m_pg_id, pg_id); + ASSERT_EQ(pg_chunk_collection->m_pg_chunks[v_chunk_id]->m_v_chunk_id, v_chunk_id); + ASSERT_EQ(pg_chunk_collection->m_pg_chunks[v_chunk_id]->get_chunk_id(), chunk_ids[v_chunk_id]); + } } - // test recover pg heap - std::unordered_set< homestore::chunk_num_t > excluding_chunks; - excluding_chunks.emplace(1); - HCS_recovery.recover_pg_chunk_heap(pg_id, excluding_chunks); - auto pg_heap_it = HCS_recovery.m_per_pg_heap.find(pg_id); - ASSERT_NE(pg_heap_it, HCS_recovery.m_per_pg_heap.end()); - ASSERT_EQ(pg_heap_it->second->size(), 2); + // on_pg_meta_blk_recover_completed + HCS_recovery.recover_per_dev_chunk_heap(); + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + // test recover pdev map size + auto pdev_it = HCS_recovery.m_per_dev_heap.find(pg_id); + ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 1); // 1 = 3(all) - 2(pg) + + auto& pdev_heap = pdev_it->second->m_heap; + auto chunk = pdev_heap.top(); + ASSERT_EQ(chunk->get_chunk_id(), 3 + (pg_id - 1) * 3); + } - homestore::blk_alloc_hints hints; - hints.pdev_id_hint = pg_id; - for (int j = 3; j > 1; --j) { - auto chunk = HCS_recovery.select_chunk(1, hints); - ASSERT_EQ(chunk->get_pdev_id(), 1); - ASSERT_EQ(chunk->available_blks(), j); + // on_shard_meta_blk_recover_completed + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + // test recover pg heap + std::unordered_set< homestore::chunk_num_t > excluding_chunks; + excluding_chunks.emplace(0); + + ASSERT_FALSE(HCS_recovery.recover_pg_chunks_states(FAKE_PG_ID, excluding_chunks)); + ASSERT_TRUE(HCS_recovery.recover_pg_chunks_states(pg_id, excluding_chunks)); + + auto pg_it = HCS_recovery.m_per_pg_chunks.find(pg_id); + ASSERT_NE(pg_it, HCS_recovery.m_per_pg_chunks.end()); + auto pg_chunk_collection = pg_it->second; + ASSERT_EQ(pg_chunk_collection->m_pg_chunks.size(), 2); // size wont change + ASSERT_EQ(pg_chunk_collection->available_num_chunks, 1); + ASSERT_EQ(pg_chunk_collection->available_blk_count, 2); // only left v_chunk_id=1 + + ASSERT_EQ(pg_chunk_collection->m_pg_chunks[0]->m_state, ChunkState::INUSE); + ASSERT_EQ(pg_chunk_collection->m_pg_chunks[1]->m_state, ChunkState::AVAILABLE); + + const auto v_chunkID = HCS_recovery.get_most_available_blk_chunk(pg_id); + ASSERT_TRUE(v_chunkID.has_value()); + auto chunk = HCS_recovery.select_specific_chunk(pg_id, v_chunkID.value()); + ASSERT_NE(chunk, nullptr); + ASSERT_EQ(chunk->get_pdev_id(), pg_id); + ASSERT_EQ(chunk->available_blks(), 2); + ASSERT_EQ(pg_chunk_collection->m_pg_chunks[1]->m_state, ChunkState::INUSE); } } From d33b601d426da71c5d57b604177e45d5b8a5a063 Mon Sep 17 00:00:00 2001 From: Hooper Date: Tue, 19 Nov 2024 20:25:56 -0700 Subject: [PATCH 4/5] Fix HeapChunkSelector::get_most_available_blk_chunk 1. Add check to ensure max_it points to an available chunk. If not, it indicates there are no available chunks left. 2. Add safeguards in the create and recover pg paths to ensure the pg size cannot be zero. --- conanfile.py | 2 +- .../homestore_backend/heap_chunk_selector.cpp | 23 +++++++++++-------- src/lib/homestore_backend/hs_pg_manager.cpp | 5 ++++ .../tests/test_heap_chunk_selector.cpp | 6 ++++- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/conanfile.py b/conanfile.py index 8a044105..fdd1682f 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.9" + version = "2.1.10" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index a4d46706..a7dcd6f5 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -129,6 +129,10 @@ std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, LOGWARNMOD(homeobject, "PG had already created, pg_id {}", pg_id); return std::nullopt; } + if (pg_size == 0) { + LOGWARNMOD(homeobject, "Not supported to create empty PG, pg_id {}, pg_size {}", pg_id, pg_size); + return std::nullopt; + } const auto chunk_size = get_chunk_size(); const uint32_t num_chunk = sisl::round_down(pg_size, chunk_size) / chunk_size; @@ -178,6 +182,10 @@ bool HeapChunkSelector::recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_ LOGWARNMOD(homeobject, "PG {} had been recovered", pg_id); return false; } + if (p_chunk_ids.size() == 0) { + LOGWARNMOD(homeobject, "Unexpected empty PG {}", pg_id); + return false; + } // check chunks valid, must belong to m_chunks and have same pdev_id std::optional< uint32_t > last_pdev_id; @@ -277,23 +285,18 @@ std::optional< homestore::chunk_num_t > HeapChunkSelector::get_most_available_bl LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return std::nullopt; } - if (pg_it->second->available_num_chunks == 0) { - LOGWARNMOD(homeobject, "No available chunk for pg {}", pg_id); - return std::nullopt; - } - std::scoped_lock lock(pg_it->second->mtx); auto pg_chunk_collection = pg_it->second; auto& pg_chunks = pg_chunk_collection->m_pg_chunks; auto max_it = std::max_element(pg_chunks.begin(), pg_chunks.end(), [](const std::shared_ptr< ExtendedVChunk >& a, const std::shared_ptr< ExtendedVChunk >& b) { - if (a->available() && b->available()) { return a->available_blks() < b->available_blks(); } - if (!a->available() && b->available()) { return true; } - if (a->available() && !b->available()) { return false; } - return false; + return !a->available() || (b->available() && a->available_blks() < b->available_blks()); }); - + if (!(*max_it)->available()) { + LOGWARNMOD(homeobject, "No available chunk for PG {}", pg_id); + return std::nullopt; + } auto v_chunk_id = std::distance(pg_chunks.begin(), max_it); pg_chunks[v_chunk_id]->m_state = ChunkState::INUSE; --pg_chunk_collection->available_num_chunks; diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 58bacd57..1d03ec6c 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -60,6 +60,11 @@ PGManager::NullAsyncResult HSHomeObject::_create_pg(PGInfo&& pg_info, std::set< auto pg_id = pg_info.id; if (auto lg = std::shared_lock(_pg_lock); _pg_map.end() != _pg_map.find(pg_id)) return folly::Unit(); + if (pg_info.size == 0) { + LOGW("Not supported to create empty PG, pg_id {}, pg_size {}", pg_id, pg_info.size); + return folly::makeUnexpected(PGError::INVALID_ARG); + } + const auto most_avail_num_chunks = chunk_selector()->most_avail_num_chunks(); const auto chunk_size = chunk_selector()->get_chunk_size(); const auto needed_num_chunks = sisl::round_down(pg_info.size, chunk_size) / chunk_size; diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 0bdb86a4..20fa7109 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -108,7 +108,9 @@ class HeapChunkSelectorTest : public ::testing::Test { const uint32_t chunk_size = HCS.get_chunk_size(); const u_int64_t pg_size = chunk_size * 3; for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { - ASSERT_EQ(HCS.select_chunks_for_pg(pg_id, pg_size), 3); + // not supported to create empty pg + ASSERT_FALSE(HCS.select_chunks_for_pg(pg_id, 0).has_value()); + ASSERT_EQ(HCS.select_chunks_for_pg(pg_id, pg_size).value(), 3); uint32_t last_pdev_id = 0; // test pg heap auto pg_it = HCS.m_per_pg_chunks.find(pg_id); @@ -287,6 +289,7 @@ TEST_F(HeapChunkSelectorTest, test_recovery) { // on_pg_meta_blk_found for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { std::vector< chunk_num_t > chunk_ids{1, 2}; + std::vector< chunk_num_t > empty_chunk_ids{}; std::vector< chunk_num_t > chunk_ids_for_twice{1, 2}; std::vector< chunk_num_t > chunk_ids_not_valid{1, 20}; std::vector< chunk_num_t > chunk_ids_not_same_pdev{1, 6}; @@ -298,6 +301,7 @@ TEST_F(HeapChunkSelectorTest, test_recovery) { } // test recover chunk map + ASSERT_FALSE(HCS_recovery.recover_pg_chunks(pg_id, std::move(empty_chunk_ids))); ASSERT_FALSE(HCS_recovery.recover_pg_chunks(pg_id, std::move(chunk_ids_not_valid))); ASSERT_FALSE(HCS_recovery.recover_pg_chunks(pg_id, std::move(chunk_ids_not_same_pdev))); From 160f8c1023474bb825c891a2b81468d42f9c5574 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Mon, 25 Nov 2024 20:23:13 -0700 Subject: [PATCH 5/5] add is leader check before any raft write operation 1 change on_destroy signature to adapt to homestore 2 add is leader check before any raft write operation --- conanfile.py | 2 +- src/lib/homestore_backend/hs_blob_manager.cpp | 10 ++++++++++ src/lib/homestore_backend/hs_shard_manager.cpp | 10 ++++++++++ .../homestore_backend/replication_state_machine.cpp | 2 +- .../homestore_backend/replication_state_machine.hpp | 2 +- 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/conanfile.py b/conanfile.py index fdd1682f..29884417 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.10" + version = "2.1.11" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/lib/homestore_backend/hs_blob_manager.cpp b/src/lib/homestore_backend/hs_blob_manager.cpp index 2283eb8a..b4715144 100644 --- a/src/lib/homestore_backend/hs_blob_manager.cpp +++ b/src/lib/homestore_backend/hs_blob_manager.cpp @@ -99,6 +99,11 @@ BlobManager::AsyncResult< blob_id_t > HSHomeObject::_put_blob(ShardInfo const& s RELEASE_ASSERT(repl_dev != nullptr, "Repl dev instance null"); + if (!repl_dev->is_leader()) { + LOGW("failed to put blob for pg [{}], shard [{}], not leader", pg_id, shard.id); + return folly::makeUnexpected(BlobErrorCode::NOT_LEADER); + } + // Create a put_blob request which allocates for header, key and blob_header, user_key. Data sgs are added later auto req = put_blob_req_ctx::make(sizeof(BlobHeader) + blob.user_key.size()); req->header()->msg_type = ReplicationMessageType::PUT_BLOB_MSG; @@ -371,6 +376,11 @@ BlobManager::NullAsyncResult HSHomeObject::_del_blob(ShardInfo const& shard, blo RELEASE_ASSERT(repl_dev != nullptr, "Repl dev instance null"); + if (!repl_dev->is_leader()) { + LOGW("failed to del blob for pg [{}], shard [{}], blob_id [{}], not leader", pg_id, shard.id, blob_id); + return folly::makeUnexpected(BlobErrorCode::NOT_LEADER); + } + // Create an unaligned header request unaligned auto req = repl_result_ctx< BlobManager::Result< BlobInfo > >::make(0u /* header_extn */, sizeof(blob_id_t) /* key_size */); diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 67681923..74102f7f 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -108,6 +108,11 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_create_shard(pg_id_t pg_ow return folly::makeUnexpected(ShardError::PG_NOT_READY); } + if (!repl_dev->is_leader()) { + LOGW("failed to create shard for pg [{}], not leader", pg_owner); + return folly::makeUnexpected(ShardError::NOT_LEADER); + } + auto new_shard_id = generate_new_shard_id(pg_owner); auto create_time = get_current_timestamp(); @@ -172,6 +177,11 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_seal_shard(ShardInfo const RELEASE_ASSERT(repl_dev != nullptr, "Repl dev null"); } + if (!repl_dev->is_leader()) { + LOGW("failed to seal shard for shard [{}], not leader", shard_id); + return folly::makeUnexpected(ShardError::NOT_LEADER); + } + ShardInfo tmp_info = info; tmp_info.state = ShardInfo::State::SEALED; diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 35f5deb3..2b8d5f57 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -189,7 +189,7 @@ void ReplicationStateMachine::on_replace_member(const homestore::replica_member_ home_object_->on_pg_replace_member(repl_dev()->group_id(), member_out, member_in); } -void ReplicationStateMachine::on_destroy() { +void ReplicationStateMachine::on_destroy(const homestore::group_id_t& group_id) { // TODO:: add the logic to handle destroy LOGI("replica destroyed"); } diff --git a/src/lib/homestore_backend/replication_state_machine.hpp b/src/lib/homestore_backend/replication_state_machine.hpp index 25556e29..eb74eac0 100644 --- a/src/lib/homestore_backend/replication_state_machine.hpp +++ b/src/lib/homestore_backend/replication_state_machine.hpp @@ -173,7 +173,7 @@ class ReplicationStateMachine : public homestore::ReplDevListener { const homestore::replica_member_info& member_in) override; /// @brief Called when the replica is being destroyed by nuraft; - void on_destroy() override; + void on_destroy(const homestore::group_id_t& group_id) override; /// Not Implemented /// @brief Called when the snapshot is being created by nuraft;