diff --git a/conanfile.py b/conanfile.py index e9edacac..fe81d8e7 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.6" + version = "2.1.7" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" @@ -49,7 +49,7 @@ def build_requirements(self): def requirements(self): self.requires("sisl/[^12.2]@oss/master", transitive_headers=True) - self.requires("homestore/[^6.4]@oss/master") + self.requires("homestore/[^6.5]@oss/master") self.requires("iomgr/[^11.3]@oss/master") self.requires("lz4/1.9.4", override=True) self.requires("openssl/3.3.1", override=True) diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index 897ab2ad..6e2a1492 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -31,7 +31,7 @@ void HeapChunkSelector::add_chunk_internal(const chunk_num_t chunkID, bool add_t auto pdevID = vchunk.get_pdev_id(); // add this find here, since we don`t want to call make_shared in try_emplace every time. auto it = m_per_dev_heap.find(pdevID); - if (it == m_per_dev_heap.end()) { it = m_per_dev_heap.emplace(pdevID, std::make_shared< PerDevHeap >()).first; } + if (it == m_per_dev_heap.end()) { it = m_per_dev_heap.emplace(pdevID, std::make_shared< ChunkHeap >()).first; } // build total blks for every chunk on this device; it->second->m_total_blks += vchunk.get_total_blks(); @@ -59,31 +59,20 @@ csharedChunk HeapChunkSelector::select_chunk(homestore::blk_count_t count, const return nullptr; } - // shardid -> chunkid map is maintained by ShardManager - // pg_id->pdev_id map is maintained by PgManager - // chunselector will not take care of the two maps for now. - uint32_t pdevID = 0; - auto& pdevIdHint = hint.pdev_id_hint; - if (!pdevIdHint.has_value()) { - // this is the first shard of this pg, select a pdev with the most available blocks for it - auto&& it = - std::max_element(m_per_dev_heap.begin(), m_per_dev_heap.end(), - [](const std::pair< const uint32_t, std::shared_ptr< PerDevHeap > >& lhs, - const std::pair< const uint32_t, std::shared_ptr< PerDevHeap > >& rhs) { - return lhs.second->available_blk_count.load() < rhs.second->available_blk_count.load(); - }); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for new pg"); - return nullptr; - } - pdevID = it->first; + std::shared_lock lock_guard(m_chunk_selector_mtx); + // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent pg_id_hint, "identical layout" will change it + pg_id_t pg_id = 0; + auto& pg_id_hint = hint.pdev_id_hint; + if (!pg_id_hint.has_value()) { + LOGWARNMOD(homeobject, "should not allocated a chunk without exiting pg_id in hint!"); + return nullptr; } else { - pdevID = pdevIdHint.value(); + pg_id = pg_id_hint.value(); } - auto it = m_per_dev_heap.find(pdevID); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", pdevID); + auto it = m_per_pg_heap.find(pg_id); + if (it == m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return nullptr; } @@ -99,29 +88,29 @@ csharedChunk HeapChunkSelector::select_chunk(homestore::blk_count_t count, const avalableBlkCounter.fetch_sub(vchunk.available_blks()); remove_chunk_from_defrag_heap(vchunk.get_chunk_id()); } else { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", pdevID); + LOGWARNMOD(homeobject, "no available chunks left for pg {}", pg_id); } return vchunk.get_internal_chunk(); } -csharedChunk HeapChunkSelector::select_specific_chunk(const chunk_num_t chunkID) { +csharedChunk HeapChunkSelector::select_specific_chunk(const pg_id_t pg_id, const chunk_num_t chunkID) { if (m_chunks.find(chunkID) == m_chunks.end()) { // sanity check LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); return nullptr; } - auto const pdevID = VChunk(m_chunks[chunkID]).get_pdev_id(); - auto it = m_per_dev_heap.find(pdevID); - if (it == m_per_dev_heap.end()) { - LOGWARNMOD(homeobject, "No pdev found for pdev {}", pdevID); + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto pg_it = m_per_pg_heap.find(pg_id); + if (pg_it == m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); return nullptr; } - auto vchunk = VChunk(nullptr); - auto& heap = it->second->m_heap; - if (auto lock_guard = std::lock_guard< std::mutex >(it->second->mtx); !heap.empty()) { + VChunk vchunk(nullptr); + auto& heap = pg_it->second->m_heap; + if (auto lock_guard = std::lock_guard< std::mutex >(pg_it->second->mtx); !heap.empty()) { std::vector< VChunk > chunks; chunks.reserve(heap.size()); while (!heap.empty()) { @@ -133,14 +122,13 @@ csharedChunk HeapChunkSelector::select_specific_chunk(const chunk_num_t chunkID) } chunks.push_back(std::move(c)); } - for (auto& c : chunks) { heap.emplace(c); } } if (vchunk.get_internal_chunk()) { - auto& avalableBlkCounter = it->second->available_blk_count; + auto& avalableBlkCounter = pg_it->second->available_blk_count; avalableBlkCounter.fetch_sub(vchunk.available_blks()); remove_chunk_from_defrag_heap(vchunk.get_chunk_id()); } @@ -148,20 +136,21 @@ csharedChunk HeapChunkSelector::select_specific_chunk(const chunk_num_t chunkID) return vchunk.get_internal_chunk(); } +// Temporarily commented out, the subsequent GC implementation needs to be adapted to fix pg size // most_defrag_chunk will only be called when GC is triggered, and will return the chunk with the most // defrag blocks csharedChunk HeapChunkSelector::most_defrag_chunk() { - chunk_num_t chunkID{0}; + // chunk_num_t chunkID{0}; // the chunk might be seleted for creating shard. if this happens, we need to select another chunk - for (;;) { - { - std::lock_guard< std::mutex > lg(m_defrag_mtx); - if (m_defrag_heap.empty()) break; - chunkID = m_defrag_heap.top().get_chunk_id(); - } - auto chunk = select_specific_chunk(chunkID); - if (chunk) return chunk; - } + // for (;;) { + // { + // std::lock_guard< std::mutex > lg(m_defrag_mtx); + // if (m_defrag_heap.empty()) break; + // chunkID = m_defrag_heap.top().get_chunk_id(); + // } + // auto chunk = select_specific_chunk(chunkID); + // if (chunk) return chunk; + // } return nullptr; } @@ -186,22 +175,155 @@ void HeapChunkSelector::foreach_chunks(std::function< void(csharedChunk&) >&& cb [cb = std::move(cb)](auto& p) { cb(p.second); }); } -void HeapChunkSelector::release_chunk(const chunk_num_t chunkID) { - const auto& it = m_chunks.find(chunkID); - if (it == m_chunks.end()) { +void HeapChunkSelector::release_chunk(const pg_id_t pg_id, const chunk_num_t chunkID) { + std::shared_lock lock_guard(m_chunk_selector_mtx); + if (m_chunks.find(chunkID) == m_chunks.end()) { // sanity check LOGWARNMOD(homeobject, "No chunk found for ChunkID {}", chunkID); - } else { - add_chunk_internal(chunkID); + return; + } + + auto pg_it = m_per_pg_heap.find(pg_id); + if (pg_it == m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "No pg found for pg_id {}", pg_id); + return; } + + const auto& chunk = m_chunks[chunkID]; + VChunk vchunk(chunk); + { + std::lock_guard< std::mutex > l(pg_it->second->mtx); + auto& pg_heap = pg_it->second->m_heap; + pg_heap.emplace(chunk); + } + auto& avalableBlkCounter = pg_it->second->available_blk_count; + avalableBlkCounter += vchunk.available_blks(); + +} + +uint32_t HeapChunkSelector::get_chunk_size() const { + const auto& chunk = m_chunks.begin()->second; + auto vchunk = VChunk(chunk); + return vchunk.size(); } -void HeapChunkSelector::build_per_dev_chunk_heap(const std::unordered_set< chunk_num_t >& excludingChunks) { - for (const auto& p : m_chunks) { +std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, u_int64_t pg_size) { + std::unique_lock lock_guard(m_chunk_selector_mtx); + if (m_per_pg_heap.find(pg_id) != m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "PG had already created, pg_id {}", pg_id); + return std::nullopt; + } + + const auto chunk_size = get_chunk_size(); + const uint32_t num_chunk = sisl::round_down(pg_size, chunk_size) / chunk_size; + + //Select a pdev with the most available num chunk + auto &&most_avail_dev_it = + std::max_element(m_per_dev_heap.begin(), m_per_dev_heap.end(), + [](const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& lhs, + const std::pair< const uint32_t, std::shared_ptr< ChunkHeap > >& rhs) { + return lhs.second->size() < rhs.second->size(); + }); + auto& pdev_heap = most_avail_dev_it->second; + if (num_chunk > pdev_heap->size()) { + LOGWARNMOD(homeobject, "Pdev has no enough space to create pg {} with num_chunk {}", pg_id, num_chunk); + return std::nullopt; + } + auto vchunk = VChunk(nullptr); + auto it = m_per_pg_heap.emplace(pg_id, std::make_shared< ChunkHeap >()).first; + auto v2r_vector = m_v2r_chunk_map.emplace(pg_id, std::make_shared< std::vector < chunk_num_t > >()).first->second; + auto r2v_map = m_r2v_chunk_map.emplace(pg_id, std::make_shared< ChunkIdMap >()).first->second; + + auto& pg_heap = it->second; + std::scoped_lock lock(pdev_heap->mtx, pg_heap->mtx); + v2r_vector->reserve(num_chunk); + for (chunk_num_t i = 0; i < num_chunk; ++i) { + vchunk = pdev_heap->m_heap.top(); + //sanity check + RELEASE_ASSERT(vchunk.get_total_blks() == vchunk.available_blks(), "vchunk should be empty"); + pdev_heap->m_heap.pop(); + pdev_heap->available_blk_count -= vchunk.available_blks(); + + pg_heap->m_heap.emplace(vchunk); + pg_heap->m_total_blks += vchunk.get_total_blks(); + pg_heap->available_blk_count += vchunk.available_blks(); + // v_chunk_id start from 0. + chunk_num_t v_chunk_id = i; + chunk_num_t r_chunk_id = vchunk.get_chunk_id(); + v2r_vector->emplace_back(r_chunk_id); + r2v_map->emplace(r_chunk_id, v_chunk_id); + } + + return num_chunk; +} + +void HeapChunkSelector::set_pg_chunks(pg_id_t pg_id, std::vector&& chunk_ids) { + std::unique_lock lock_guard(m_chunk_selector_mtx); + if (m_v2r_chunk_map.find(pg_id) != m_v2r_chunk_map.end()) { + LOGWARNMOD(homeobject, "PG {} had been recovered", pg_id); + return; + } + + auto v2r_vector = m_v2r_chunk_map.emplace(pg_id, std::make_shared< std::vector < chunk_num_t > >(std::move(chunk_ids))).first->second; + auto r2v_map = m_r2v_chunk_map.emplace(pg_id, std::make_shared< ChunkIdMap >()).first->second; + + for (chunk_num_t i = 0; i < v2r_vector->size(); ++i) { + // v_chunk_id start from 0. + chunk_num_t v_chunk_id = i; + chunk_num_t r_chunk_id = (*v2r_vector)[i]; + r2v_map->emplace(r_chunk_id, v_chunk_id); + } +} + +void HeapChunkSelector::recover_per_dev_chunk_heap() { + std::unique_lock lock_guard(m_chunk_selector_mtx); + for (const auto& [chunk_id, _] : m_chunks) { bool add_to_heap = true; - if (excludingChunks.find(p.first) != excludingChunks.end()) { add_to_heap = false; } - add_chunk_internal(p.first, add_to_heap); - }; + for (const auto& [_, chunk_map] : m_r2v_chunk_map) { + if (chunk_map->find(chunk_id) != chunk_map->end()) { + add_to_heap = false; + break; + } + } + add_chunk_internal(chunk_id, add_to_heap); + + } +} + +void HeapChunkSelector::recover_pg_chunk_heap(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excludingChunks) +{ + std::unique_lock lock_guard(m_chunk_selector_mtx); + if (m_per_pg_heap.find(pg_id) != m_per_pg_heap.end()) { + LOGWARNMOD(homeobject, "Pg_heap {} had been recovered", pg_id); + return; + } + auto it = m_v2r_chunk_map.find(pg_id); + if (it == m_v2r_chunk_map.end()) { + LOGWARNMOD(homeobject, "Pg_chunk_map {} had never been recovered", pg_id); + return; + } + const auto& chunk_ids = it->second; + auto& pg_heap = m_per_pg_heap.emplace(pg_id, std::make_shared< ChunkHeap >()).first->second; + for (const auto& chunk_id : *chunk_ids) { + if (excludingChunks.find(chunk_id) == excludingChunks.end()) { + const auto& chunk = m_chunks[chunk_id]; + auto vchunk = VChunk(chunk); + pg_heap->m_heap.emplace(vchunk); + pg_heap->m_total_blks += vchunk.get_total_blks(); + pg_heap->available_blk_count += vchunk.available_blks(); + } + } +} + +std::shared_ptr< const std::vector > HeapChunkSelector::get_pg_chunks(pg_id_t pg_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); + auto it = m_v2r_chunk_map.find(pg_id); + if (it != m_v2r_chunk_map.end()) { + return it->second; + } else { + LOGWARNMOD(homeobject, "PG {} had never been created", pg_id); + return nullptr; + } } homestore::blk_alloc_hints HeapChunkSelector::chunk_to_hints(chunk_num_t chunk_id) const { @@ -217,6 +339,7 @@ homestore::blk_alloc_hints HeapChunkSelector::chunk_to_hints(chunk_num_t chunk_i // return the maximum number of chunks that can be allocated on pdev uint32_t HeapChunkSelector::most_avail_num_chunks() const { + std::shared_lock lock_guard(m_chunk_selector_mtx); uint32_t max_avail_num_chunks = 0ul; for (auto const& [_, pdev_heap] : m_per_dev_heap) { max_avail_num_chunks = std::max(max_avail_num_chunks, pdev_heap->size()); @@ -226,6 +349,7 @@ uint32_t HeapChunkSelector::most_avail_num_chunks() const { } uint32_t HeapChunkSelector::avail_num_chunks(uint32_t dev_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); auto it = m_per_dev_heap.find(dev_id); if (it == m_per_dev_heap.end()) { LOGWARNMOD(homeobject, "No pdev found for pdev {}", dev_id); @@ -238,6 +362,7 @@ uint32_t HeapChunkSelector::avail_num_chunks(uint32_t dev_id) const { uint32_t HeapChunkSelector::total_chunks() const { return m_chunks.size(); } uint64_t HeapChunkSelector::avail_blks(std::optional< uint32_t > dev_it) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); if (!dev_it.has_value()) { uint64_t max_avail_blks = 0ull; for (auto const& [_, heap] : m_per_dev_heap) { @@ -257,6 +382,7 @@ uint64_t HeapChunkSelector::avail_blks(std::optional< uint32_t > dev_it) const { } uint64_t HeapChunkSelector::total_blks(uint32_t dev_id) const { + std::shared_lock lock_guard(m_chunk_selector_mtx); auto it = m_per_dev_heap.find(dev_id); if (it == m_per_dev_heap.end()) { LOGWARNMOD(homeobject, "No pdev found for pdev {}", dev_id); diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h index 1ccf5d15..259ecfb5 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.h +++ b/src/lib/homestore_backend/heap_chunk_selector.h @@ -35,9 +35,10 @@ class HeapChunkSelector : public homestore::ChunkSelector { using VChunkHeap = std::priority_queue< VChunk, std::vector< VChunk >, VChunkComparator >; using VChunkDefragHeap = std::priority_queue< VChunk, std::vector< VChunk >, VChunkDefragComparator >; + using ChunkIdMap = std::unordered_map < homestore::chunk_num_t, homestore::chunk_num_t >; // used for real chunk id -> virtual chunk id map using chunk_num_t = homestore::chunk_num_t; - struct PerDevHeap { + struct ChunkHeap { std::mutex mtx; VChunkHeap m_heap; std::atomic_size_t available_blk_count; @@ -46,22 +47,41 @@ class HeapChunkSelector : public homestore::ChunkSelector { }; void add_chunk(csharedChunk&) override; + void foreach_chunks(std::function< void(csharedChunk&) >&& cb) override; + csharedChunk select_chunk([[maybe_unused]] homestore::blk_count_t nblks, const homestore::blk_alloc_hints& hints); // this function will be used by GC flow or recovery flow to mark one specific chunk to be busy, caller should be // responsible to use release_chunk() interface to release it when no longer to use the chunk anymore. - csharedChunk select_specific_chunk(const chunk_num_t); + csharedChunk select_specific_chunk(const pg_id_t pg_id, const chunk_num_t); // this function will be used by GC flow to select a chunk for GC csharedChunk most_defrag_chunk(); // this function is used to return a chunk back to ChunkSelector when sealing a shard, and will only be used by // Homeobject. - void release_chunk(const chunk_num_t); + void release_chunk(const pg_id_t pg_id, const chunk_num_t); + + /** + * select chunks for pg, chunks need to be in same pdev. + * + * @param pg_id The ID of the pg. + * @param pg_size The fix pg size. + * @return An optional uint32_t value representing num_chunk, or std::nullopt if no space left. + */ + std::optional< uint32_t > select_chunks_for_pg(pg_id_t pg_id, u_int64_t pg_size); + + std::shared_ptr< const std::vector > get_pg_chunks(pg_id_t pg_id) const; + + // this should be called on each pg meta blk found + void set_pg_chunks(pg_id_t pg_id, std::vector&& chunk_ids); + + // this should be called after all pg meta blk recovered + void recover_per_dev_chunk_heap(); // this should be called after ShardManager is initialized and get all the open shards - void build_per_dev_chunk_heap(const std::unordered_set< chunk_num_t >& excludingChunks); + void recover_pg_chunk_heap(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excludingChunks); /** * Retrieves the block allocation hints for a given chunk. @@ -112,12 +132,22 @@ class HeapChunkSelector : public homestore::ChunkSelector { */ uint32_t total_chunks() const; + uint32_t get_chunk_size() const; + private: - std::unordered_map< uint32_t, std::shared_ptr< PerDevHeap > > m_per_dev_heap; + std::unordered_map< uint32_t, std::shared_ptr< ChunkHeap > > m_per_dev_heap; + std::unordered_map< pg_id_t, std::shared_ptr< ChunkHeap > > m_per_pg_heap; + + // These mappings ensure "identical layout" by providing bidirectional indexing between virtual and real chunk IDs. + // m_v2r_chunk_map: Maps each pg_id to a vector of real chunk IDs (r_chunk_id). The index in the vector corresponds to the virtual chunk ID (v_chunk_id). + std::unordered_map< pg_id_t, std::shared_ptr< std::vector > > m_v2r_chunk_map; + // m_r2v_chunk_map: Maps each pg_id to a map that inversely maps real chunk IDs (r_chunk_id) to virtual chunk IDs (v_chunk_id). + std::unordered_map< pg_id_t, std::shared_ptr< ChunkIdMap > > m_r2v_chunk_map; // hold all the chunks , selected or not std::unordered_map< chunk_num_t, csharedChunk > m_chunks; + mutable std::shared_mutex m_chunk_selector_mtx; void add_chunk_internal(const chunk_num_t, bool add_to_heap = true); VChunkDefragHeap m_defrag_heap; diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp index b313a508..85945c91 100644 --- a/src/lib/homestore_backend/hs_homeobject.cpp +++ b/src/lib/homestore_backend/hs_homeobject.cpp @@ -226,7 +226,7 @@ void HSHomeObject::on_replica_restart() { [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) { on_pg_meta_blk_found(std::move(buf), voidptr_cast(mblk)); }, - nullptr, true); + [this](bool success) { on_pg_meta_blk_recover_completed(success); }, true); HomeStore::instance()->meta_service().read_sub_sb(_pg_meta_name); // recover shard diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index 357f0317..d2a46892 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -78,15 +78,24 @@ class HSHomeObject : public HomeObjectImpl { struct pg_info_superblk { pg_id_t id; uint32_t num_members; + uint32_t num_chunks; peer_id_t replica_set_uuid; + uint64_t pg_size; homestore::uuid_t index_table_uuid; blob_id_t blob_sequence_num; uint64_t active_blob_count; // Total number of active blobs uint64_t tombstone_blob_count; // Total number of tombstones uint64_t total_occupied_blk_count; // Total number of occupied blocks - pg_members members[1]; // ISO C++ forbids zero-size array + char data[1]; // ISO C++ forbids zero-size array + // Data layout inside 'data': + // First, an array of 'pg_members' structures: + // | pg_members[0] | pg_members[1] | ... | pg_members[num_members-1] | + // Immediately followed by an array of 'chunk_num_t' values (representing r_chunk_ids): + // | chunk_num_t[0] | chunk_num_t[1] | ... | chunk_num_t[num_chunks-1] | + // Here, 'chunk_num_t[i]' represents the r_chunk_id for the v_chunk_id 'i', where v_chunk_id starts from 0 and increases sequentially. - uint32_t size() const { return sizeof(pg_info_superblk) + ((num_members - 1) * sizeof(pg_members)); } + + uint32_t size() const { return sizeof(pg_info_superblk) - sizeof(char) + num_members * sizeof(pg_members) + num_chunks * sizeof(homestore::chunk_num_t); } static std::string name() { return _pg_meta_name; } pg_info_superblk() = default; @@ -95,14 +104,24 @@ class HSHomeObject : public HomeObjectImpl { pg_info_superblk& operator=(pg_info_superblk const& rhs) { id = rhs.id; num_members = rhs.num_members; + num_chunks = rhs.num_chunks; + pg_size = rhs.pg_size; replica_set_uuid = rhs.replica_set_uuid; index_table_uuid = rhs.index_table_uuid; blob_sequence_num = rhs.blob_sequence_num; - memcpy(members, rhs.members, sizeof(pg_members) * num_members); + + memcpy(get_pg_members_mutable(), rhs.get_pg_members(), sizeof(pg_members) * num_members); + memcpy(get_chunk_ids_mutable(), rhs.get_chunk_ids(), sizeof(homestore::chunk_num_t) * num_chunks); return *this; } void copy(pg_info_superblk const& rhs) { *this = rhs; } + + pg_members* get_pg_members_mutable() { return reinterpret_cast(data); } + const pg_members* get_pg_members() const { return reinterpret_cast(data); } + + homestore::chunk_num_t* get_chunk_ids_mutable() { return reinterpret_cast(data + num_members * sizeof(pg_members)); } + const homestore::chunk_num_t* get_chunk_ids() const { return reinterpret_cast(data + num_members * sizeof(pg_members)); } }; struct DataHeader { @@ -195,7 +214,7 @@ class HSHomeObject : public HomeObjectImpl { std::shared_ptr< BlobIndexTable > index_table_; PGMetrics metrics_; - HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table); + HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, std::shared_ptr< const std::vector > pg_chunk_ids); HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev); ~HS_PG() override = default; @@ -335,6 +354,7 @@ class HSHomeObject : public HomeObjectImpl { // recover part void register_homestore_metablk_callback(); void on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie); + void on_pg_meta_blk_recover_completed(bool success); void on_shard_meta_blk_found(homestore::meta_blk* mblk, sisl::byte_view buf); void on_shard_meta_blk_recover_completed(bool success); diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 5a90bb93..0cd9f749 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -60,6 +60,15 @@ PGManager::NullAsyncResult HSHomeObject::_create_pg(PGInfo&& pg_info, std::set< auto pg_id = pg_info.id; if (auto lg = std::shared_lock(_pg_lock); _pg_map.end() != _pg_map.find(pg_id)) return folly::Unit(); + const auto most_avail_num_chunks = chunk_selector()->most_avail_num_chunks(); + const auto chunk_size = chunk_selector()->get_chunk_size(); + const auto needed_num_chunks = sisl::round_down(pg_info.size, chunk_size) / chunk_size; + if (needed_num_chunks > most_avail_num_chunks) { + LOGW("No enough space to create pg, pg_id {}, needed_num_chunks {}, most_avail_num_chunks {}", pg_id, + needed_num_chunks, most_avail_num_chunks); + return folly::makeUnexpected(PGError::NO_SPACE_LEFT); + } + pg_info.replica_set_uuid = boost::uuids::random_generator()(); return hs_repl_service() .create_repl_dev(pg_info.replica_set_uuid, peers) @@ -127,12 +136,21 @@ void HSHomeObject::on_create_pg_message_commit(int64_t lsn, sisl::blob const& he return; } + // select chunks for pg + auto const num_chunk = chunk_selector()->select_chunks_for_pg(pg_id, pg_info.size); + if (!num_chunk.has_value()) { + LOGW("select chunks for pg failed, pg_id {}", pg_id); + if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(PGError::NO_SPACE_LEFT)); } + return; + } + auto chunk_ids = chunk_selector()->get_pg_chunks(pg_id); + // create index table and pg // TODO create index table during create shard. auto index_table = create_index_table(); auto uuid_str = boost::uuids::to_string(index_table->uuid()); - auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table); + auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table, chunk_ids); std::scoped_lock lock_guard(index_lock_); RELEASE_ASSERT(index_table_pg_map_.count(uuid_str) == 0, "duplicate index table found"); index_table_pg_map_[uuid_str] = PgIndexTable{pg_id, index_table}; @@ -193,11 +211,11 @@ void HSHomeObject::on_pg_replace_member(homestore::group_id_t group_id, const re pg->pg_info_.members.emplace(PGMember(member_in.id, member_in.name, member_in.priority)); uint32_t i{0}; + pg_members* sb_members = hs_pg->pg_sb_->get_pg_members_mutable(); for (auto const& m : pg->pg_info_.members) { - hs_pg->pg_sb_->members[i].id = m.id; - std::strncpy(hs_pg->pg_sb_->members[i].name, m.name.c_str(), - std::min(m.name.size(), pg_members::max_name_len)); - hs_pg->pg_sb_->members[i].priority = m.priority; + sb_members[i].id = m.id; + std::strncpy(sb_members[i].name, m.name.c_str(), std::min(m.name.size(), pg_members::max_name_len)); + sb_members[i].priority = m.priority; ++i; } @@ -226,6 +244,7 @@ void HSHomeObject::add_pg_to_map(unique< HS_PG > hs_pg) { std::string HSHomeObject::serialize_pg_info(const PGInfo& pginfo) { nlohmann::json j; j["pg_info"]["pg_id_t"] = pginfo.id; + j["pg_info"]["pg_size"] = pginfo.size; j["pg_info"]["repl_uuid"] = boost::uuids::to_string(pginfo.replica_set_uuid); nlohmann::json members_j{}; @@ -244,6 +263,7 @@ PGInfo HSHomeObject::deserialize_pg_info(const unsigned char* json_str, size_t s auto pg_json = nlohmann::json::parse(json_str, json_str + size); PGInfo pg_info(pg_json["pg_info"]["pg_id_t"].get< pg_id_t >()); + pg_info.size = pg_json["pg_info"]["pg_size"].get< u_int64_t >(); pg_info.replica_set_uuid = boost::uuids::string_generator()(pg_json["pg_info"]["repl_uuid"].get< std::string >()); for (auto const& m : pg_json["pg_info"]["members"]) { @@ -267,6 +287,8 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c return; } auto pg_id = pg_sb->id; + std::vector chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks); + chunk_selector_->set_pg_chunks(pg_id, std::move(chunk_ids)); auto uuid_str = boost::uuids::to_string(pg_sb->index_table_uuid); auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value())); // During PG recovery check if index is already recoverd else @@ -280,37 +302,51 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c add_pg_to_map(std::move(hs_pg)); } +void HSHomeObject::on_pg_meta_blk_recover_completed(bool success) { + chunk_selector_->recover_per_dev_chunk_heap(); +} + PGInfo HSHomeObject::HS_PG::pg_info_from_sb(homestore::superblk< pg_info_superblk > const& sb) { PGInfo pginfo{sb->id}; + const pg_members* sb_members = sb->get_pg_members(); for (uint32_t i{0}; i < sb->num_members; ++i) { - pginfo.members.emplace(sb->members[i].id, std::string(sb->members[i].name), sb->members[i].priority); + pginfo.members.emplace(sb_members[i].id, std::string(sb_members[i].name), sb_members[i].priority); } + pginfo.size = sb->pg_size; pginfo.replica_set_uuid = sb->replica_set_uuid; return pginfo; } -HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table) : +HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table, std::shared_ptr< const std::vector > pg_chunk_ids) : PG{std::move(info)}, pg_sb_{_pg_meta_name}, repl_dev_{std::move(rdev)}, index_table_{std::move(index_table)}, metrics_{*this} { - pg_sb_.create(sizeof(pg_info_superblk) + ((pg_info_.members.size() - 1) * sizeof(pg_members))); + RELEASE_ASSERT(pg_chunk_ids != nullptr, "PG chunks null"); + const uint32_t num_chunks = pg_chunk_ids->size(); + pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members)+ num_chunks * sizeof(homestore::chunk_num_t)); pg_sb_->id = pg_info_.id; pg_sb_->num_members = pg_info_.members.size(); + pg_sb_->num_chunks = num_chunks; + pg_sb_->pg_size = pg_info_.size; pg_sb_->replica_set_uuid = repl_dev_->group_id(); pg_sb_->index_table_uuid = index_table_->uuid(); pg_sb_->active_blob_count = 0; pg_sb_->tombstone_blob_count = 0; pg_sb_->total_occupied_blk_count = 0; - uint32_t i{0}; + pg_members* pg_sb_members = pg_sb_->get_pg_members_mutable(); for (auto const& m : pg_info_.members) { - pg_sb_->members[i].id = m.id; - std::strncpy(pg_sb_->members[i].name, m.name.c_str(), std::min(m.name.size(), pg_members::max_name_len)); - pg_sb_->members[i].priority = m.priority; + pg_sb_members[i].id = m.id; + std::strncpy(pg_sb_members[i].name, m.name.c_str(), std::min(m.name.size(), pg_members::max_name_len)); + pg_sb_members[i].priority = m.priority; ++i; } + chunk_num_t* pg_sb_chunk_ids = pg_sb_->get_chunk_ids_mutable(); + for (i = 0; i < num_chunks; ++i) { + pg_sb_chunk_ids[i] = pg_chunk_ids->at(i); + } pg_sb_.write(); } diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 8378087b..938c64ec 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -324,12 +324,12 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom std::scoped_lock lock_guard(_shard_lock); shard_exist = (_shard_map.find(shard_info.id) != _shard_map.end()); } - if (!shard_exist) { add_new_shard_to_map(std::make_unique< HS_Shard >(shard_info, blkids.chunk_num())); // select_specific_chunk() will do something only when we are relaying journal after restart, during the // runtime flow chunk is already been be mark busy when we write the shard info to the repldev. - chunk_selector_->select_specific_chunk(blkids.chunk_num()); + auto pg_id = shard_info.placement_group; + chunk_selector_->select_specific_chunk(pg_id, blkids.chunk_num()); } if (ctx) { ctx->promise_.setValue(ShardManager::Result< ShardInfo >(shard_info)); } @@ -362,9 +362,10 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom } if (state == ShardInfo::State::SEALED) { + auto pg_id = shard_info.placement_group; auto chunk_id = get_shard_chunk(shard_info.id); RELEASE_ASSERT(chunk_id.has_value(), "Chunk id not found"); - chunk_selector()->release_chunk(chunk_id.value()); + chunk_selector()->release_chunk(pg_id, chunk_id.value()); update_shard_in_map(shard_info); } else LOGW("try to commit SEAL_SHARD_MSG but shard state is not sealed, shard_id: {}", shard_info.id); @@ -387,13 +388,15 @@ void HSHomeObject::on_shard_meta_blk_recover_completed(bool success) { std::unordered_set< homestore::chunk_num_t > excluding_chunks; std::scoped_lock lock_guard(_pg_lock); for (auto& pair : _pg_map) { + excluding_chunks.clear(); + excluding_chunks.reserve(pair.second->shards_.size()); for (auto& shard : pair.second->shards_) { if (shard->info.state == ShardInfo::State::OPEN) { excluding_chunks.emplace(d_cast< HS_Shard* >(shard.get())->sb_->chunk_id); } } + chunk_selector_->recover_pg_chunk_heap(pair.first, excluding_chunks); } - chunk_selector_->build_per_dev_chunk_heap(excluding_chunks); } void HSHomeObject::add_new_shard_to_map(ShardPtr&& shard) { diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 7f9164f5..ac3c6114 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -121,17 +121,10 @@ ReplicationStateMachine::get_blk_alloc_hints(sisl::blob const& header, uint32_t const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes()); switch (msg_header->msg_type) { case ReplicationMessageType::CREATE_SHARD_MSG: { - auto const [pg_found, shards_found, chunk_id] = home_object_->get_any_chunk_id(msg_header->pg_id); - if (!pg_found) { - LOGW("Requesting a chunk for an unknown pg={}, letting the caller retry after sometime", msg_header->pg_id); - return folly::makeUnexpected(homestore::ReplServiceError::RESULT_NOT_EXIST_YET); - } else if (!shards_found) { - // pg is empty without any shards, we leave the decision the HeapChunkSelector to select a pdev - // with most available space and then select one chunk based on that pdev - } else { - return home_object_->chunk_selector()->chunk_to_hints(chunk_id); - } - break; + // Since chunks are selected when a pg is created, the chunkselector selects one of the chunks owned by the pg + homestore::blk_alloc_hints hints; + hints.pdev_id_hint = msg_header->pg_id; // FIXME @Hooper: Temporary bypass using pdev_id_hint to represent pg_id_hint, "identical layout" will change it + return hints; } case ReplicationMessageType::SEAL_SHARD_MSG: { diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index c0f3bb66..441475c8 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -61,6 +61,7 @@ class HomeObjectFixture : public ::testing::Test { auto memebers = g_helper->members(); auto name = g_helper->name(); auto info = homeobject::PGInfo(pg_id); + info.size = 50 * Mi; for (const auto& member : memebers) { if (replica_num == member.second) { // by default, leader is the first member @@ -293,6 +294,8 @@ class HomeObjectFixture : public ::testing::Test { EXPECT_EQ(lhs->id, rhs->id); EXPECT_EQ(lhs->num_members, rhs->num_members); + EXPECT_EQ(lhs->num_chunks, rhs->num_chunks); + EXPECT_EQ(lhs->pg_size, rhs->pg_size); EXPECT_EQ(lhs->replica_set_uuid, rhs->replica_set_uuid); EXPECT_EQ(lhs->index_table_uuid, rhs->index_table_uuid); EXPECT_EQ(lhs->blob_sequence_num, rhs->blob_sequence_num); @@ -301,9 +304,12 @@ class HomeObjectFixture : public ::testing::Test { EXPECT_EQ(lhs->total_occupied_blk_count, rhs->total_occupied_blk_count); EXPECT_EQ(lhs->tombstone_blob_count, rhs->tombstone_blob_count); for (uint32_t i = 0; i < lhs->num_members; i++) { - EXPECT_EQ(lhs->members[i].id, rhs->members[i].id); - EXPECT_EQ(lhs->members[i].priority, rhs->members[i].priority); - EXPECT_EQ(0, std::strcmp(lhs->members[i].name, rhs->members[i].name)); + EXPECT_EQ(lhs->get_pg_members()[i].id, rhs->get_pg_members()[i].id); + EXPECT_EQ(lhs->get_pg_members()[i].priority, rhs->get_pg_members()[i].priority); + EXPECT_EQ(0, std::strcmp(lhs->get_pg_members()[i].name, rhs->get_pg_members()[i].name)); + } + for (homestore::chunk_num_t i = 0; i < lhs->num_chunks; ++i) { + EXPECT_EQ(lhs->get_chunk_ids()[i], rhs->get_chunk_ids()[i]); } } diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 5358a56a..0f3a1c1f 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -1,5 +1,3 @@ -#include "lib/homestore_backend/heap_chunk_selector.h" - #include #include @@ -8,6 +6,11 @@ #include +#include "homeobject/common.hpp" +#define protected public +#define private public +#include "lib/homestore_backend/heap_chunk_selector.h" + SISL_LOGGING_DEF(HOMEOBJECT_LOG_MODS) SISL_LOGGING_INIT(HOMEOBJECT_LOG_MODS) SISL_OPTIONS_ENABLE(logging) @@ -35,9 +38,10 @@ class Chunk : public std::enable_shared_from_this< Chunk > { uint16_t get_chunk_id() const { return m_chunk_id; } - blk_num_t get_total_blks() const { return 0; } + blk_num_t get_total_blks() const { return m_available_blks; } void set_chunk_id(uint16_t chunk_id) { m_chunk_id = chunk_id; } const std::shared_ptr< Chunk > get_internal_chunk() { return shared_from_this(); } + uint64_t size() const { return 1 * Mi; } Chunk(uint32_t pdev_id, uint16_t chunk_id, uint32_t available_blks, uint32_t defrag_nblks) { m_available_blks = available_blks; @@ -69,6 +73,8 @@ uint16_t VChunk::get_chunk_id() const { return m_internal_chunk->get_chunk_id(); blk_num_t VChunk::get_total_blks() const { return m_internal_chunk->get_total_blks(); } +uint64_t VChunk::size() const { return m_internal_chunk->size(); } + cshared< Chunk > VChunk::get_internal_chunk() const { return m_internal_chunk->get_internal_chunk(); } } // namespace homestore @@ -90,10 +96,47 @@ class HeapChunkSelectorTest : public ::testing::Test { HCS.add_chunk(std::make_shared< Chunk >(3, 7, 1, 3)); HCS.add_chunk(std::make_shared< Chunk >(3, 8, 2, 2)); HCS.add_chunk(std::make_shared< Chunk >(3, 9, 3, 1)); - std::unordered_set< chunk_num_t > excludingChunks; - HCS.build_per_dev_chunk_heap(excludingChunks); + HCS.recover_per_dev_chunk_heap(); + prepare_pg(); }; + void prepare_pg() { + const uint32_t chunk_size = HCS.get_chunk_size(); // may problem + const u_int64_t pg_size = chunk_size * 3; + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + HCS.select_chunks_for_pg(pg_id, pg_size); + uint32_t last_pdev_id = 0; + // test pg heap + auto pg_heap_it = HCS.m_per_pg_heap.find(pg_id); + ASSERT_NE(pg_heap_it, HCS.m_per_pg_heap.end()); + ASSERT_EQ(pg_heap_it->second->size(), 3); + + // test chunk_map + auto v2r_chunk_map_it = HCS.m_v2r_chunk_map.find(pg_id); + ASSERT_NE(v2r_chunk_map_it, HCS.m_v2r_chunk_map.end()); + ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); + + auto r2v_chunk_map_it = HCS.m_r2v_chunk_map.find(pg_id); + ASSERT_NE(r2v_chunk_map_it, HCS.m_r2v_chunk_map.end()); + ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); + for (int i = 0; i < 3; ++i) { + auto r_chunk_id = v2r_chunk_map_it->second->at(i); + ASSERT_EQ(i, r2v_chunk_map_it->second->at(r_chunk_id)); + auto pdev_id = HCS.m_chunks[r_chunk_id]->get_pdev_id(); + if (last_pdev_id != 0) { + ASSERT_EQ(last_pdev_id, pdev_id); + } else { + last_pdev_id = pdev_id; + } + + auto pdev_it = HCS.m_per_dev_heap.find(pdev_id); + ASSERT_NE(pdev_it, HCS.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 0); + } + } + } + + public: HeapChunkSelector HCS; }; @@ -107,80 +150,144 @@ TEST_F(HeapChunkSelectorTest, test_for_each_chunk) { TEST_F(HeapChunkSelectorTest, test_select_chunk) { homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; - for (uint32_t i = 1; i < 4; i++) { - hints.pdev_id_hint = i; - for (int j = 3; j > 0; j--) { + auto chunk = HCS.select_chunk(count, hints); + ASSERT_EQ(chunk, nullptr); + + for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { + hints.pdev_id_hint = pg_id; // tmp bypass using pdev_id_hint present pg_id + for (int j = 3; j > 0; --j) { auto chunk = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk->get_pdev_id(), i); + ASSERT_NE(chunk, nullptr); + ASSERT_EQ(chunk->get_pdev_id(), pg_id); ASSERT_EQ(chunk->available_blks(), j); } } } + TEST_F(HeapChunkSelectorTest, test_select_specific_chunk) { - const chunk_num_t chunk_id = 3; - auto chunk = HCS.select_specific_chunk(chunk_id); - ASSERT_EQ(chunk->get_pdev_id(), 1); + const uint16_t pg_id = 1; + auto chunk_ids = HCS.get_pg_chunks(pg_id); + ASSERT_NE(chunk_ids, nullptr); + const chunk_num_t chunk_id = chunk_ids->at(0); + + auto chunk = HCS.select_specific_chunk(pg_id, chunk_id); ASSERT_EQ(chunk->get_chunk_id(), chunk_id); + auto pdev_id = chunk->get_pdev_id(); + + // make sure pg chunk map + auto pg_heap_it = HCS.m_per_pg_heap.find(pg_id); + ASSERT_NE(pg_heap_it, HCS.m_per_pg_heap.end()); + ASSERT_EQ(pg_heap_it->second->size(), 2); + + // test chunk_map stable + auto v2r_chunk_map_it = HCS.m_v2r_chunk_map.find(pg_id); + ASSERT_NE(v2r_chunk_map_it, HCS.m_v2r_chunk_map.end()); + ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); + + auto r2v_chunk_map_it = HCS.m_r2v_chunk_map.find(pg_id); + ASSERT_NE(r2v_chunk_map_it, HCS.m_r2v_chunk_map.end()); + ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); // select the rest chunks to make sure specific chunk does not exist in HeapChunkSelector anymore. homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; - for (uint32_t i = 1; i < 4; i++) { - hints.pdev_id_hint = i; - auto chunk_num = 3; - if (i == 1) { --chunk_num; } - for (int j = chunk_num; j > 0; j--) { - auto chunk = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk->get_pdev_id(), i); - ASSERT_EQ(chunk->available_blks(), j); - } + hints.pdev_id_hint = pg_id; + for (int j = 2; j > 0; --j) { + auto chunk = HCS.select_chunk(count, hints); + ASSERT_EQ(chunk->get_pdev_id(), pdev_id); } // release this chunk to HeapChunkSelector - HCS.release_chunk(chunk_id); - chunk = HCS.select_chunk(1, homestore::blk_alloc_hints()); + HCS.release_chunk(pg_id, chunk_id); + chunk = HCS.select_chunk(1, hints); ASSERT_EQ(1, chunk->get_pdev_id()); ASSERT_EQ(chunk_id, chunk->get_chunk_id()); -} -TEST_F(HeapChunkSelectorTest, test_most_defrag_chunk) { - for (uint32_t i = 1; i < 6; i++) { - auto chunk = HCS.most_defrag_chunk(); - // should always select the chunk with the most defrag blocks - ASSERT_EQ(chunk->get_chunk_id(), i); - } - - // after release a chunk with the most defrag blocks, most_defrag_chunk should select this chunk. - HCS.release_chunk(1); - auto chunk = HCS.most_defrag_chunk(); - ASSERT_EQ(chunk->get_chunk_id(), 1); } + TEST_F(HeapChunkSelectorTest, test_release_chunk) { homestore::blk_count_t count = 1; homestore::blk_alloc_hints hints; - hints.pdev_id_hint = 1; + const uint16_t pg_id = 1; + hints.pdev_id_hint = pg_id; auto chunk1 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk1->get_pdev_id(), 1); + auto pdev_id = chunk1->get_pdev_id(); + + ASSERT_EQ(chunk1->get_pdev_id(), pdev_id); ASSERT_EQ(chunk1->available_blks(), 3); auto chunk2 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk2->get_pdev_id(), 1); + ASSERT_EQ(chunk2->get_pdev_id(), pdev_id); ASSERT_EQ(chunk2->available_blks(), 2); - HCS.release_chunk(chunk1->get_chunk_id()); - HCS.release_chunk(chunk2->get_chunk_id()); + HCS.release_chunk(pg_id, chunk1->get_chunk_id()); + HCS.release_chunk(pg_id, chunk2->get_chunk_id()); chunk1 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk1->get_pdev_id(), 1); + ASSERT_EQ(chunk1->get_pdev_id(), pdev_id); ASSERT_EQ(chunk1->available_blks(), 3); chunk2 = HCS.select_chunk(count, hints); - ASSERT_EQ(chunk2->get_pdev_id(), 1); + ASSERT_EQ(chunk2->get_pdev_id(), pdev_id); ASSERT_EQ(chunk2->available_blks(), 2); } +TEST_F(HeapChunkSelectorTest, test_recovery) { + HeapChunkSelector HCS_recovery; + HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 1, 1, 9)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 2, 2, 8)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(1, 3, 3, 7)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 4, 1, 6)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 5, 2, 5)); + HCS_recovery.add_chunk(std::make_shared< Chunk >(2, 6, 3, 4)); + + std::vector chunk_ids {1,2,3}; + const uint16_t pg_id = 1; + // test recover chunk map + HCS_recovery.set_pg_chunks(pg_id, std::move(chunk_ids)); + auto v2r_chunk_map_it = HCS_recovery.m_v2r_chunk_map.find(pg_id); + ASSERT_NE(v2r_chunk_map_it, HCS_recovery.m_v2r_chunk_map.end()); + ASSERT_EQ(v2r_chunk_map_it->second->size(), 3); + + auto r2v_chunk_map_it = HCS_recovery.m_r2v_chunk_map.find(pg_id); + ASSERT_NE(r2v_chunk_map_it, HCS_recovery.m_r2v_chunk_map.end()); + ASSERT_EQ(r2v_chunk_map_it->second->size(), 3); + // test recover pdev map + HCS_recovery.recover_per_dev_chunk_heap(); + auto pdev_it = HCS_recovery.m_per_dev_heap.find(1); + ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 0); + + pdev_it = HCS_recovery.m_per_dev_heap.find(2); + ASSERT_NE(pdev_it, HCS_recovery.m_per_dev_heap.end()); + ASSERT_EQ(pdev_it->second->size(), 3); + auto &pdev_heap = pdev_it->second->m_heap; + auto vchunk = homestore::VChunk(nullptr); + for (int i = 6; i > 3; --i) { + vchunk = pdev_heap.top(); + pdev_heap.pop(); + ASSERT_EQ(vchunk.get_chunk_id(), i); + } + + // test recover pg heap + std::unordered_set< homestore::chunk_num_t > excluding_chunks; + excluding_chunks.emplace(1); + HCS_recovery.recover_pg_chunk_heap(pg_id, excluding_chunks); + auto pg_heap_it = HCS_recovery.m_per_pg_heap.find(pg_id); + ASSERT_NE(pg_heap_it, HCS_recovery.m_per_pg_heap.end()); + ASSERT_EQ(pg_heap_it->second->size(), 2); + + homestore::blk_alloc_hints hints; + hints.pdev_id_hint = pg_id; + for (int j = 3; j > 1; --j) { + auto chunk = HCS_recovery.select_chunk(1, hints); + ASSERT_EQ(chunk->get_pdev_id(), 1); + ASSERT_EQ(chunk->available_blks(), j); + } +} + int main(int argc, char* argv[]) { int parsed_argc = argc; ::testing::InitGoogleTest(&parsed_argc, argv);