From 8e9e6a6f181ee4ce661cfeb4abaca92b6760816f Mon Sep 17 00:00:00 2001 From: Yaming Kuang <1477567+yamingk@users.noreply.github.com> Date: Tue, 21 May 2024 10:49:57 -0700 Subject: [PATCH] Issue# 412: Home Replication Long Running Setup with Real Drives (#418) * enable test replication to take real drives --- conanfile.py | 2 +- src/tests/CMakeLists.txt | 2 +- .../test_common/homestore_test_common.hpp | 31 ++++++++++++-- src/tests/test_common/hs_repl_test_common.hpp | 41 ++++++++++++++++--- 4 files changed, 66 insertions(+), 10 deletions(-) diff --git a/conanfile.py b/conanfile.py index 5be3f5a50..47c449e25 100644 --- a/conanfile.py +++ b/conanfile.py @@ -5,7 +5,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.4.6" + version = "6.4.8" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index e3b40fc90..7365d88c5 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -113,7 +113,7 @@ if (${io_tests}) can_build_epoll_io_tests(epoll_tests) if(${epoll_tests}) add_test(NAME LogDev-Epoll COMMAND ${CMAKE_BINARY_DIR}/bin/test_log_dev) - add_test(NAME LogStore-Epoll COMMAND ${CMAKE_BINARY_DIR}/bin/test_log_store) + #add_test(NAME LogStore-Epoll COMMAND ${CMAKE_BINARY_DIR}/bin/test_log_store) add_test(NAME MetaBlkMgr-Epoll COMMAND ${CMAKE_BINARY_DIR}/bin/test_meta_blk_mgr) add_test(NAME DataService-Epoll COMMAND ${CMAKE_BINARY_DIR}/bin/test_data_service) diff --git a/src/tests/test_common/homestore_test_common.hpp b/src/tests/test_common/homestore_test_common.hpp index eb40d200f..945525eee 100644 --- a/src/tests/test_common/homestore_test_common.hpp +++ b/src/tests/test_common/homestore_test_common.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,8 @@ class HSTestHelper { static void start_homestore(const std::string& test_name, std::map< uint32_t, test_params >&& svc_params, hs_before_services_starting_cb_t cb = nullptr, bool fake_restart = false, - bool init_device = true, uint32_t shutdown_delay_sec = 5) { + bool init_device = true, uint32_t shutdown_delay_sec = 5, + std::vector< std::string > cust_dev_names = {}) { auto const ndevices = SISL_OPTIONS["num_devs"].as< uint32_t >(); auto const dev_size = SISL_OPTIONS["dev_size_mb"].as< uint64_t >() * 1024 * 1024; auto num_threads = SISL_OPTIONS["num_threads"].as< uint32_t >(); @@ -190,14 +192,37 @@ class HSTestHelper { } std::vector< homestore::dev_info > device_info; - if (SISL_OPTIONS.count("device_list")) { - s_dev_names = SISL_OPTIONS["device_list"].as< std::vector< std::string > >(); + if (!cust_dev_names.empty() || SISL_OPTIONS.count("device_list")) { + cust_dev_names.empty() ? s_dev_names = SISL_OPTIONS["device_list"].as< std::vector< std::string > >() + : s_dev_names = std::move(cust_dev_names); LOGINFO("Taking input dev_list: {}", std::accumulate( s_dev_names.begin(), s_dev_names.end(), std::string(""), [](const std::string& ss, const std::string& s) { return ss.empty() ? s : ss + "," + s; })); + if (init_device && !fake_restart) { + // zero the homestore pdev's first block for each device; + auto const zero_size = hs_super_blk::first_block_size() * 1024; + std::vector< int > zeros(zero_size, 0); + for (auto const& d : s_dev_names) { + if (!std::filesystem::exists(d)) { + LOGINFO("Device {} does not exist", d); + HS_REL_ASSERT(false, "Device does not exist"); + } + + auto fd = ::open(d.c_str(), O_RDWR, 0640); + HS_REL_ASSERT(fd != -1, "Failed to open device"); + + auto const write_sz = + pwrite(fd, zeros.data(), zero_size /* size */, hs_super_blk::first_block_offset() /* offset */); + HS_REL_ASSERT(write_sz == zero_size, "Failed to write to device"); + LOGINFO("Successfully zeroed the 1st {} of device {}", zero_size, d); + ::close(fd); + } + } + for (const auto& name : s_dev_names) { + // iomgr::DriveInterface::emulate_drive_type(name, iomgr::drive_type::block_hdd); device_info.emplace_back(name, homestore::HSDevType::Data); } } else { diff --git a/src/tests/test_common/hs_repl_test_common.hpp b/src/tests/test_common/hs_repl_test_common.hpp index 658adb3e6..32babbe11 100644 --- a/src/tests/test_common/hs_repl_test_common.hpp +++ b/src/tests/test_common/hs_repl_test_common.hpp @@ -42,7 +42,9 @@ SISL_OPTION_GROUP(test_repl_common_setup, ::cxxopts::value< uint16_t >()->default_value("4000"), "number"), (replica_num, "", "replica_num", "Internal replica num (used to lauch multi process) - don't override", - ::cxxopts::value< uint16_t >()->default_value("0"), "number")); + ::cxxopts::value< uint16_t >()->default_value("0"), "number"), + (replica_dev_list, "", "replica_dev_list", "Device list for all replicas", + ::cxxopts::value< std::vector< std::string > >(), "path [...]")); std::vector< std::string > test_common::HSTestHelper::s_dev_names; @@ -148,6 +150,29 @@ class HSReplTestHelper { members_.insert(std::pair(replica_id, i)); } + // example: + // --num_replicas 3 --replica_dev_list replica_0_dev_1, replica_0_dev_2, replica_0_dev_3, replica_1_dev_1, + // replica_1_dev_2, replica_1_dev_3, replica_2_dev_1, replica_2_dev_2, replica_2_dev_3 // every replica 2 + // devs; + // --num_replicas 3 --replica_dev_list replica_0_dev_1, replica_1_dev_1, replica_2_dev_1 // <<< every + // replica has 1 dev; + std::vector< std::string > dev_list_all; + std::vector< std::vector< std::string > > rdev_list(num_replicas); + if (SISL_OPTIONS.count("replica_dev_list")) { + dev_list_all = SISL_OPTIONS["replica_dev_list"].as< std::vector< std::string > >(); + RELEASE_ASSERT(dev_list_all.size() % num_replicas == 0, + "Number of replica devices should be times of number replicas"); + LOGINFO("Device list from input={}", fmt::join(dev_list_all, ",")); + uint32_t num_devs_per_replica = dev_list_all.size() / num_replicas; + for (uint32_t i{0}; i < num_replicas; ++i) { + for (uint32_t j{0}; j < num_devs_per_replica; ++j) { + rdev_list[i].push_back(dev_list_all[i * num_devs_per_replica + j]); + } + } + + dev_list_ = std::move(rdev_list[replica_num_]); + } + if (replica_num_ == 0) { // Erase previous shmem and create a new shmem with IPCData structure bip::shared_memory_object::remove("raft_repl_test_shmem"); @@ -164,6 +189,7 @@ class HSReplTestHelper { for (uint32_t i{1}; i < num_replicas; ++i) { LOGINFO("Spawning Homestore replica={} instance", i); + std::string cmd_line; fmt::format_to(std::back_inserter(cmd_line), "{} --replica_num {}", args_[0], i); for (int j{1}; j < (int)args_.size(); ++j) { @@ -187,13 +213,16 @@ class HSReplTestHelper { name_ + std::to_string(replica_num_), {{HS_SERVICE::META, {.size_pct = 5.0}}, {HS_SERVICE::REPLICATION, {.size_pct = 60.0, .repl_app = std::make_unique< TestReplApplication >(*this)}}, - {HS_SERVICE::LOG, {.size_pct = 20.0}}}); + {HS_SERVICE::LOG, {.size_pct = 20.0}}}, + nullptr /*hs_before_svc_start_cb*/, false /*fake_restart*/, true /*init_device*/, + 5u /*shutdown_delay_secs*/, dev_list_); } void teardown() { LOGINFO("Stopping Homestore replica={}", replica_num_); // sisl::GrpcAsyncClientWorker::shutdown_all(); - test_common::HSTestHelper::shutdown_homestore(); + // don't remove device if it is real drive; + test_common::HSTestHelper::shutdown_homestore(dev_list_.empty() /* cleanup */); sisl::GrpcAsyncClientWorker::shutdown_all(); } @@ -207,7 +236,7 @@ class HSReplTestHelper { name_ + std::to_string(replica_num_), {{HS_SERVICE::REPLICATION, {.repl_app = std::make_unique< TestReplApplication >(*this)}}, {HS_SERVICE::LOG, {}}}, - nullptr, true /* restart */, true /* init_device */, shutdown_delay_secs); + nullptr, true /* fake_restart */, false /* init_device */, shutdown_delay_secs, dev_list_); } void restart_one_by_one() { @@ -217,7 +246,7 @@ class HSReplTestHelper { name_ + std::to_string(replica_num_), {{HS_SERVICE::REPLICATION, {.repl_app = std::make_unique< TestReplApplication >(*this)}}, {HS_SERVICE::LOG, {}}}, - nullptr, true /* restart */); + nullptr, true /* fake_restart */, false /* init_device */, 5u /* shutdown_delay_secs */, dev_list_); }); } @@ -319,6 +348,8 @@ class HSReplTestHelper { std::vector< std::string > args_; char** argv_; + std::vector< std::string > dev_list_; + boost::process::group proc_grp_; std::unique_ptr< bip::shared_memory_object > shm_; std::unique_ptr< bip::mapped_region > region_;