diff --git a/conanfile.py b/conanfile.py index 59fedf6..211d344 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "2.1.12" + version = "2.1.13" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeReplication" diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp index a7dcd6f..95d07f7 100644 --- a/src/lib/homestore_backend/heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/heap_chunk_selector.cpp @@ -56,7 +56,7 @@ csharedChunk HeapChunkSelector::select_chunk(homestore::blk_count_t count, const // Both chunk_num_t and pg_id_t are of type uint16_t. static_assert(std::is_same< pg_id_t, uint16_t >::value, "pg_id_t is not uint16_t"); static_assert(std::is_same< homestore::chunk_num_t, uint16_t >::value, "chunk_num_t is not uint16_t"); - uint32_t application_hint = hint.application_hint.value(); + auto application_hint = hint.application_hint.value(); pg_id_t pg_id = (uint16_t)(application_hint >> 16 & 0xFFFF); homestore::chunk_num_t v_chunk_id = (uint16_t)(application_hint & 0xFFFF); return select_specific_chunk(pg_id, v_chunk_id); @@ -129,12 +129,13 @@ std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id, LOGWARNMOD(homeobject, "PG had already created, pg_id {}", pg_id); return std::nullopt; } - if (pg_size == 0) { - LOGWARNMOD(homeobject, "Not supported to create empty PG, pg_id {}, pg_size {}", pg_id, pg_size); + + const auto chunk_size = get_chunk_size(); + if (pg_size < chunk_size) { + LOGWARNMOD(homeobject, "pg_size {} is less than chunk_size {}", pg_size, chunk_size); return std::nullopt; } - const auto chunk_size = get_chunk_size(); const uint32_t num_chunk = sisl::round_down(pg_size, chunk_size) / chunk_size; // Select a pdev with the most available num chunk diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp index 34d0bc8..305f5d7 100644 --- a/src/lib/homestore_backend/hs_homeobject.cpp +++ b/src/lib/homestore_backend/hs_homeobject.cpp @@ -98,6 +98,8 @@ class HSReplApplication : public homestore::ReplApplication { // // This should assert if we can not initialize HomeStore. // +uint64_t HSHomeObject::_hs_chunk_size = HS_CHUNK_SIZE; + DevType HSHomeObject::get_device_type(string const& devname) { const iomgr::drive_type dtype = iomgr::DriveInterface::get_drive_type(devname); if (dtype == iomgr::drive_type::block_hdd || dtype == iomgr::drive_type::file_on_hdd) { return DevType::HDD; } @@ -168,7 +170,8 @@ void HSHomeObject::init_homestore() { {HS_SERVICE::REPLICATION, hs_format_params{.dev_type = HSDevType::Data, .size_pct = 99.0, - .num_chunks = 60000, + .num_chunks = 0, + .chunk_size = _hs_chunk_size, .block_size = _data_block_size, .alloc_type = blk_allocator_type_t::append, .chunk_sel_type = chunk_selector_type_t::CUSTOM}}, @@ -185,7 +188,8 @@ void HSHomeObject::init_homestore() { {HS_SERVICE::REPLICATION, hs_format_params{.dev_type = run_on_type, .size_pct = 79.0, - .num_chunks = 60000, + .num_chunks = 0, + .chunk_size = _hs_chunk_size, .block_size = _data_block_size, .alloc_type = blk_allocator_type_t::append, .chunk_sel_type = chunk_selector_type_t::CUSTOM}}, diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index d586bea..43da954 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -32,11 +32,14 @@ BlobError toBlobError(homestore::ReplServiceError const&); ShardError toShardError(homestore::ReplServiceError const&); class HSHomeObject : public HomeObjectImpl { +private: /// NOTE: Be wary to change these as they effect on-disk format! inline static auto const _svc_meta_name = std::string("HomeObject"); inline static auto const _pg_meta_name = std::string("PGManager"); inline static auto const _shard_meta_name = std::string("ShardManager"); + static constexpr uint64_t HS_CHUNK_SIZE = 2 * Gi; static constexpr uint32_t _data_block_size = 1024; + static uint64_t _hs_chunk_size; /// /// Overridable Helpers @@ -244,7 +247,7 @@ class HSHomeObject : public HomeObjectImpl { struct HS_Shard : public Shard { homestore::superblk< shard_info_superblk > sb_; - HS_Shard(ShardInfo info, homestore::chunk_num_t p_chunk_id); + HS_Shard(ShardInfo info, homestore::chunk_num_t p_chunk_id, homestore::chunk_num_t v_chunk_id); HS_Shard(homestore::superblk< shard_info_superblk >&& sb); ~HS_Shard() override = default; diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 1d03ec6..72a3cc2 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -60,13 +60,13 @@ PGManager::NullAsyncResult HSHomeObject::_create_pg(PGInfo&& pg_info, std::set< auto pg_id = pg_info.id; if (auto lg = std::shared_lock(_pg_lock); _pg_map.end() != _pg_map.find(pg_id)) return folly::Unit(); - if (pg_info.size == 0) { - LOGW("Not supported to create empty PG, pg_id {}, pg_size {}", pg_id, pg_info.size); + const auto most_avail_num_chunks = chunk_selector()->most_avail_num_chunks(); + const auto chunk_size = chunk_selector()->get_chunk_size(); + if (pg_info.size < chunk_size) { + LOGW("Not support to create PG which pg_size {} < chunk_size {}", pg_info.size, chunk_size); return folly::makeUnexpected(PGError::INVALID_ARG); } - const auto most_avail_num_chunks = chunk_selector()->most_avail_num_chunks(); - const auto chunk_size = chunk_selector()->get_chunk_size(); const auto needed_num_chunks = sisl::round_down(pg_info.size, chunk_size) / chunk_size; if (needed_num_chunks > most_avail_num_chunks) { LOGW("No enough space to create pg, pg_id {}, needed_num_chunks {}, most_avail_num_chunks {}", pg_id, @@ -173,7 +173,8 @@ void HSHomeObject::on_create_pg_message_commit(int64_t lsn, sisl::blob const& he RELEASE_ASSERT(index_table_pg_map_.count(uuid_str) == 0, "duplicate index table found"); index_table_pg_map_[uuid_str] = PgIndexTable{pg_id, index_table}; - LOGI("Index table created for pg {} uuid {}", pg_id, uuid_str); + LOGI("create pg {} successfully, index table uuid={} pg_size={} num_chunk={}", pg_id, uuid_str, pg_info.size, + num_chunk.value()); hs_pg->index_table_ = index_table; // Add to index service, so that it gets cleaned up when index service is shutdown. homestore::hs()->index_service().add_index_table(index_table); diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp index 74102f7..6c0fbe3 100644 --- a/src/lib/homestore_backend/hs_shard_manager.cpp +++ b/src/lib/homestore_backend/hs_shard_manager.cpp @@ -343,6 +343,7 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom case ReplicationMessageType::CREATE_SHARD_MSG: { auto sb = r_cast< shard_info_superblk const* >(h.cbytes() + sizeof(ReplicationMessageHeader)); auto shard_info = sb->info; + auto v_chunk_id = sb->v_chunk_id; shard_info.lsn = lsn; bool shard_exist = false; @@ -351,11 +352,12 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom shard_exist = (_shard_map.find(shard_info.id) != _shard_map.end()); } if (!shard_exist) { - add_new_shard_to_map(std::make_unique< HS_Shard >(shard_info, blkids.chunk_num())); + add_new_shard_to_map(std::make_unique< HS_Shard >(shard_info, blkids.chunk_num(), v_chunk_id)); // select_specific_chunk() will do something only when we are relaying journal after restart, during the // runtime flow chunk is already been be mark busy when we write the shard info to the repldev. auto pg_id = shard_info.placement_group; - chunk_selector_->select_specific_chunk(pg_id, blkids.chunk_num()); + auto chunk = chunk_selector_->select_specific_chunk(pg_id, v_chunk_id); + RELEASE_ASSERT(chunk != nullptr, "chunk selection failed with v_chunk_id: {} in PG: {}", v_chunk_id, pg_id); } if (ctx) { ctx->promise_.setValue(ShardManager::Result< ShardInfo >(shard_info)); } @@ -524,11 +526,13 @@ bool HSHomeObject::release_chunk_based_on_create_shard_message(sisl::blob const& } } -HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t p_chunk_id) : +HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t p_chunk_id, + homestore::chunk_num_t v_chunk_id) : Shard(std::move(shard_info)), sb_(_shard_meta_name) { sb_.create(sizeof(shard_info_superblk)); sb_->info = info; sb_->p_chunk_id = p_chunk_id; + sb_->v_chunk_id = v_chunk_id; sb_.write(); } diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 2d4ca50..a730a49 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -167,7 +167,10 @@ ReplicationStateMachine::get_blk_alloc_hints(sisl::blob const& header, uint32_t case ReplicationMessageType::SEAL_SHARD_MSG: { auto p_chunkID = home_object_->get_shard_p_chunk_id(msg_header->shard_id); - RELEASE_ASSERT(p_chunkID.has_value(), "unknown shard id to get binded chunk"); + if (!p_chunkID.has_value()) { + LOGW("shard does not exist, underlying engine will retry this later", msg_header->shard_id); + return folly::makeUnexpected(homestore::ReplServiceError::RESULT_NOT_EXIST_YET); + } homestore::blk_alloc_hints hints; hints.chunk_id_hint = p_chunkID.value(); return hints; diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index d54bac1..250016b 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -36,6 +36,7 @@ class HomeObjectFixture : public ::testing::Test { HomeObjectFixture() : rand_blob_size{1u, 16 * 1024}, rand_user_key_size{1u, 1024} {} void SetUp() override { + HSHomeObject::_hs_chunk_size = 20 * Mi; _obj_inst = std::dynamic_pointer_cast< HSHomeObject >(g_helper->build_new_homeobject()); g_helper->sync(); } @@ -126,6 +127,19 @@ class HomeObjectFixture : public ::testing::Test { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } + // set v_chunk_id to IPC + run_on_pg_leader(pg_id, [&]() { + auto v_chunkID = _obj_inst->get_shard_v_chunk_id(shard_id); + RELEASE_ASSERT(v_chunkID.has_value(), "failed to get shard v_chunk_id"); + g_helper->set_v_chunk_id(v_chunkID.value()); + }); + + // get v_chunk_id from IPC and compare with local + auto leader_v_chunk_id = g_helper->get_v_chunk_id(); + auto local_v_chunkID = _obj_inst->get_shard_v_chunk_id(shard_id); + RELEASE_ASSERT(local_v_chunkID.has_value(), "failed to get shard v_chunk_id"); + RELEASE_ASSERT(leader_v_chunk_id == local_v_chunkID, "v_chunk_id supposed to be identical"); + auto r = _obj_inst->shard_manager()->get_shard(shard_id).get(); RELEASE_ASSERT(!!r, "failed to get shard {}", shard_id); return r.value(); diff --git a/src/lib/homestore_backend/tests/hs_pg_tests.cpp b/src/lib/homestore_backend/tests/hs_pg_tests.cpp index 3a0dd89..b9ac28c 100644 --- a/src/lib/homestore_backend/tests/hs_pg_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_pg_tests.cpp @@ -84,6 +84,45 @@ TEST_F(HomeObjectFixture, PGExceedSpaceTest) { } } +TEST_F(HomeObjectFixture, PGSizeLessThanChunkTest) { + LOGINFO("HomeObject replica={} setup completed", g_helper->replica_num()); + g_helper->sync(); + pg_id_t pg_id{1}; + if (0 == g_helper->replica_num()) { // leader + auto memebers = g_helper->members(); + auto name = g_helper->name(); + auto info = homeobject::PGInfo(pg_id); + info.size = 1; // less than chunk size + for (const auto& member : memebers) { + if (0 == member.second) { + // by default, leader is the first member + info.members.insert(homeobject::PGMember{member.first, name + std::to_string(member.second), 1}); + } else { + info.members.insert(homeobject::PGMember{member.first, name + std::to_string(member.second), 0}); + } + } + auto p = _obj_inst->pg_manager()->create_pg(std::move(info)).get(); + ASSERT_TRUE(p.hasError()); + PGError error = p.error(); + ASSERT_EQ(PGError::INVALID_ARG, error); + } else { + auto start_time = std::chrono::steady_clock::now(); + bool res = true; + // follower need to wait for pg creation + while (!pg_exist(pg_id)) { + auto current_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast< std::chrono::seconds >(current_time - start_time).count(); + if (duration >= 20) { + LOGINFO("Failed to create pg {} at follower", pg_id); + res = false; + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + ASSERT_FALSE(res); + } +} + TEST_F(HomeObjectFixture, PGRecoveryTest) { // create 10 pg for (pg_id_t i = 1; i < 11; i++) { diff --git a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp index 7fb638e..549655a 100644 --- a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp +++ b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp @@ -43,6 +43,7 @@ namespace bip = boost::interprocess; using namespace homeobject; #define INVALID_UINT64_ID UINT64_MAX +#define INVALID_CHUNK_NUM UINT16_MAX namespace test_common { @@ -56,6 +57,7 @@ class HSReplTestHelper { sync_point_num_ = sync_point; homeobject_replica_count_ = 0; uint64_id_ = INVALID_UINT64_ID; + v_chunk_id_ = INVALID_CHUNK_NUM; cv_.notify_all(); } else { cv_.wait(lg, [this, sync_point]() { return sync_point_num_ == sync_point; }); @@ -72,6 +74,16 @@ class HSReplTestHelper { return uint64_id_; } + void set_v_chunk_id(homestore::chunk_num_t input_v_chunk_id) { + std::unique_lock< bip::interprocess_mutex > lg(mtx_); + v_chunk_id_ = input_v_chunk_id; + } + + homestore::chunk_num_t get_v_chunk_id() { + std::unique_lock< bip::interprocess_mutex > lg(mtx_); + return v_chunk_id_; + } + private: bip::interprocess_mutex mtx_; bip::interprocess_condition cv_; @@ -80,6 +92,9 @@ class HSReplTestHelper { // the following variables are used to share shard_id and blob_id among different replicas uint64_t uint64_id_{0}; + // used to verify identical layout + homestore::chunk_num_t v_chunk_id_{0}; + // the nth synchronization point, that is how many times different replicas have synced uint64_t sync_point_num_{UINT64_MAX}; }; @@ -256,6 +271,8 @@ class HSReplTestHelper { void sync() { ipc_data_->sync(sync_point_num++, total_replicas_nums_); } void set_uint64_id(uint64_t uint64_id) { ipc_data_->set_uint64_id(uint64_id); } uint64_t get_uint64_id() { return ipc_data_->get_uint64_id(); } + void set_v_chunk_id(homestore::chunk_num_t v_chunk_id) { ipc_data_->set_v_chunk_id(v_chunk_id); } + homestore::chunk_num_t get_v_chunk_id() { return ipc_data_->get_v_chunk_id(); } void check_and_kill(int port) { std::string command = "lsof -t -i:" + std::to_string(port); diff --git a/src/lib/homestore_backend/tests/hs_shard_tests.cpp b/src/lib/homestore_backend/tests/hs_shard_tests.cpp index 7d3feb7..884065d 100644 --- a/src/lib/homestore_backend/tests/hs_shard_tests.cpp +++ b/src/lib/homestore_backend/tests/hs_shard_tests.cpp @@ -34,24 +34,30 @@ TEST_F(HomeObjectFixture, CreateMultiShardsOnMultiPG) { for (const auto pg : pgs) { auto shard_info = create_shard(pg, Mi); - auto chunk_num_1 = _obj_inst->get_shard_p_chunk_id(shard_info.id); - ASSERT_TRUE(chunk_num_1.has_value()); + auto p_chunk_ID1 = _obj_inst->get_shard_p_chunk_id(shard_info.id); + auto v_chunk_ID1 = _obj_inst->get_shard_v_chunk_id(shard_info.id); + ASSERT_TRUE(p_chunk_ID1.has_value()); + ASSERT_TRUE(v_chunk_ID1.has_value()); // create another shard again. shard_info = create_shard(pg, Mi); - auto chunk_num_2 = _obj_inst->get_shard_p_chunk_id(shard_info.id); - ASSERT_TRUE(chunk_num_2.has_value()); + auto p_chunk_ID2 = _obj_inst->get_shard_p_chunk_id(shard_info.id); + auto v_chunk_ID2 = _obj_inst->get_shard_v_chunk_id(shard_info.id); + ASSERT_TRUE(p_chunk_ID2.has_value()); + ASSERT_TRUE(v_chunk_ID2.has_value()); + // v_chunk_id should not same + ASSERT_NE(v_chunk_ID1.value(), v_chunk_ID2.value()); // check if both chunk is on the same pg and pdev; auto chunks = _obj_inst->chunk_selector()->m_chunks; - ASSERT_TRUE(chunks.find(chunk_num_1.value()) != chunks.end()); - ASSERT_TRUE(chunks.find(chunk_num_2.value()) != chunks.end()); - auto chunk_1 = chunks[chunk_num_1.value()]; - auto chunk_2 = chunks[chunk_num_2.value()]; + ASSERT_TRUE(chunks.find(p_chunk_ID1.value()) != chunks.end()); + ASSERT_TRUE(chunks.find(p_chunk_ID2.value()) != chunks.end()); + auto chunk_1 = chunks[p_chunk_ID1.value()]; + auto chunk_2 = chunks[p_chunk_ID2.value()]; ASSERT_TRUE(chunk_1->m_pg_id.has_value()); ASSERT_TRUE(chunk_2->m_pg_id.has_value()); - ASSERT_TRUE(chunk_1->m_pg_id.value() == chunk_2->m_pg_id.value()); - ASSERT_TRUE(chunk_1->get_pdev_id() == chunk_2->get_pdev_id()); + ASSERT_EQ(chunk_1->m_pg_id.value(), chunk_2->m_pg_id.value()); + ASSERT_EQ(chunk_1->get_pdev_id(), chunk_2->get_pdev_id()); } } @@ -64,6 +70,51 @@ TEST_F(HomeObjectFixture, SealShard) { // seal the shard shard_info = seal_shard(shard_info.id); ASSERT_EQ(ShardInfo::State::SEALED, shard_info.state); + + // seal the shard again + shard_info = seal_shard(shard_info.id); + ASSERT_EQ(ShardInfo::State::SEALED, shard_info.state); + + // create shard until no space left, we have 5 chunks in one pg. + for (auto i = 0; i < 5; i++) { + shard_info = create_shard(pg_id, 64 * Mi); + ASSERT_EQ(ShardInfo::State::OPEN, shard_info.state); + } + + { + g_helper->sync(); + + // expect to create shard failed + run_on_pg_leader(pg_id, [&]() { + auto s = _obj_inst->shard_manager()->create_shard(pg_id, 64 * Mi).get(); + ASSERT_TRUE(s.hasError()); + ASSERT_EQ(ShardError::NO_SPACE_LEFT, s.error()); + }); + + auto start_time = std::chrono::steady_clock::now(); + bool res = true; + + while (g_helper->get_uint64_id() == INVALID_UINT64_ID) { + auto current_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast< std::chrono::seconds >(current_time - start_time).count(); + if (duration >= 20) { + LOGINFO("Failed to create shard at pg {}", pg_id); + res = false; + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + ASSERT_FALSE(res); + } + + shard_info = seal_shard(shard_info.id); + ASSERT_EQ(ShardInfo::State::SEALED, shard_info.state); + + shard_info = create_shard(pg_id, 64 * Mi); + ASSERT_EQ(ShardInfo::State::OPEN, shard_info.state); + + shard_info = seal_shard(shard_info.id); + ASSERT_EQ(ShardInfo::State::SEALED, shard_info.state); } TEST_F(HomeObjectFixture, ShardManagerRecovery) { diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp index 20fa710..3a7a39d 100644 --- a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp +++ b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp @@ -108,7 +108,7 @@ class HeapChunkSelectorTest : public ::testing::Test { const uint32_t chunk_size = HCS.get_chunk_size(); const u_int64_t pg_size = chunk_size * 3; for (uint16_t pg_id = 1; pg_id < 4; ++pg_id) { - // not supported to create empty pg + // not supported to create pg which pg_size < chunk_size ASSERT_FALSE(HCS.select_chunks_for_pg(pg_id, 0).has_value()); ASSERT_EQ(HCS.select_chunks_for_pg(pg_id, pg_size).value(), 3); uint32_t last_pdev_id = 0;