From b89f2751092bbf3fe8d92228fab444c9c7ad7ba0 Mon Sep 17 00:00:00 2001 From: Mehdi Hosseini <116847813+shosseinimotlagh@users.noreply.github.com> Date: Fri, 26 Jan 2024 10:29:18 -0800 Subject: [PATCH] Enable clean restart for index UTs (#292) --- conanfile.py | 2 +- src/include/homestore/btree/btree.hpp | 1 + .../homestore/btree/detail/btree_common.ipp | 22 ++++++++ src/tests/btree_helpers/btree_test_helper.hpp | 7 ++- src/tests/btree_helpers/btree_test_kvs.hpp | 46 ++++++++++++++++ src/tests/btree_helpers/shadow_map.hpp | 39 ++++++++++++++ .../test_common/homestore_test_common.hpp | 7 +-- src/tests/test_index_btree.cpp | 51 ++++++++++++++++-- src/tests/test_scripts/btree_test.py | 52 ++++++++++++++++--- 9 files changed, 206 insertions(+), 21 deletions(-) diff --git a/conanfile.py b/conanfile.py index f8f3cfb78..a742ba048 100644 --- a/conanfile.py +++ b/conanfile.py @@ -5,7 +5,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "5.0.6" + version = "5.0.7" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" topics = ("ebay", "nublox") diff --git a/src/include/homestore/btree/btree.hpp b/src/include/homestore/btree/btree.hpp index 428de8aa9..93d4c2b2b 100644 --- a/src/include/homestore/btree/btree.hpp +++ b/src/include/homestore/btree/btree.hpp @@ -91,6 +91,7 @@ class Btree { void print_tree(const std::string& file = "") const; void print_tree_keys() const; + uint64_t count_keys(bnodeid_t bnodeid) const; nlohmann::json get_metrics_in_json(bool updated = true); bnodeid_t root_node_id() const; diff --git a/src/include/homestore/btree/detail/btree_common.ipp b/src/include/homestore/btree/detail/btree_common.ipp index 44035d238..edd895d89 100644 --- a/src/include/homestore/btree/detail/btree_common.ipp +++ b/src/include/homestore/btree/detail/btree_common.ipp @@ -168,6 +168,28 @@ void Btree< K, V >::to_string_keys(bnodeid_t bnodeid, std::string& buf) const { unlock_node(node, acq_lock); } +template < typename K, typename V > +uint64_t Btree< K, V >::count_keys(bnodeid_t bnodeid) const { + BtreeNodePtr node; + locktype_t acq_lock = locktype_t::READ; + if (read_and_lock_node(bnodeid, node, acq_lock, acq_lock, nullptr) != btree_status_t::success) { return 0; } + uint64_t result = 0; + if (!node->is_leaf()) { + uint32_t i = 0; + while (i < node->total_entries()) { + BtreeLinkInfo p; + node->get_nth_value(i, &p, false); + result += count_keys(p.bnode_id()); + ++i; + } + if (node->has_valid_edge()) { result += count_keys(node->edge_id()); } + } else { + result = node->total_entries(); + } + unlock_node(node, acq_lock); + return result; +} + template < typename K, typename V > void Btree< K, V >::validate_sanity_child(const BtreeNodePtr& parent_node, uint32_t ind) const { BtreeLinkInfo child_info; diff --git a/src/tests/btree_helpers/btree_test_helper.hpp b/src/tests/btree_helpers/btree_test_helper.hpp index 2cb03bca1..9c142ffe5 100644 --- a/src/tests/btree_helpers/btree_test_helper.hpp +++ b/src/tests/btree_helpers/btree_test_helper.hpp @@ -78,7 +78,6 @@ struct BtreeTestHelper { std::vector< iomgr::io_fiber_t > m_fibers; std::mutex m_test_done_mtx; std::condition_variable m_test_done_cv; - std::random_device m_re; std::atomic< uint32_t > m_num_ops{0}; @@ -298,8 +297,8 @@ struct BtreeTestHelper { } } - void multi_op_execute(const std::vector< std::pair< std::string, int > >& op_list) { - preload(SISL_OPTIONS["preload_size"].as< uint32_t >()); + void multi_op_execute(const std::vector< std::pair< std::string, int > >& op_list, bool skip_preload = false) { + if (!skip_preload) { preload(SISL_OPTIONS["preload_size"].as< uint32_t >()); } run_in_parallel(op_list); } @@ -389,7 +388,7 @@ struct BtreeTestHelper { [](const auto& pair) { return pair.second; }); double progress_interval = (double)num_iters_this_fiber / 20; // 5% of the total number of iterations - double progress_thresh = progress_interval; // threshold for progress interval + double progress_thresh = progress_interval; // threshold for progress interval double elapsed_time, progress_percent, last_progress_time = 0; // Construct a weighted distribution based on the input frequencies diff --git a/src/tests/btree_helpers/btree_test_kvs.hpp b/src/tests/btree_helpers/btree_test_kvs.hpp index 4d4481136..0aebc77bc 100644 --- a/src/tests/btree_helpers/btree_test_kvs.hpp +++ b/src/tests/btree_helpers/btree_test_kvs.hpp @@ -122,6 +122,13 @@ class TestFixedKey : public BtreeKey { return os; } + friend std::istream& operator>>(std::istream& is, TestFixedKey& k) { + uint64_t key; + is >> key; + k = TestFixedKey{key}; + return is; + } + bool operator<(const TestFixedKey& o) const { return (compare(o) < 0); } bool operator==(const TestFixedKey& other) const { return (compare(other) == 0); } @@ -224,6 +231,13 @@ class TestVarLenKey : public BtreeKey { return os; } + friend std::istream& operator>>(std::istream& is, TestVarLenKey& k) { + uint64_t key; + is >> key; + k = TestVarLenKey{key}; + return is; + } + bool operator<(const TestVarLenKey& o) const { return (compare(o) < 0); } bool operator==(const TestVarLenKey& other) const { return (compare(other) == 0); } @@ -355,6 +369,15 @@ class TestIntervalKey : public BtreeIntervalKey { os << k.to_string(); return os; } + + friend std::istream& operator>>(std::istream& is, TestIntervalKey& k) { + uint32_t m_base; + uint32_t m_offset; + char dummy; + is >> m_base >> dummy >> m_offset; + k = TestIntervalKey{m_base, m_offset}; + return is; + } }; class TestFixedValue : public BtreeValue { @@ -390,6 +413,13 @@ class TestFixedValue : public BtreeValue { return os; } + friend std::istream& operator>>(std::istream& is, TestFixedValue& v) { + uint32_t value; + is >> value; + v = TestFixedValue{value}; + return is; + } + // This is not mandatory overridden method for BtreeValue, but for testing comparision bool operator==(const TestFixedValue& other) const { return (m_val == other.m_val); } @@ -438,6 +468,13 @@ class TestVarLenValue : public BtreeValue { return os; } + friend std::istream& operator>>(std::istream& is, TestVarLenValue& v) { + std::string value; + is >> value; + v = TestVarLenValue{value}; + return is; + } + // This is not mandatory overridden method for BtreeValue, but for testing comparision bool operator==(const TestVarLenValue& other) const { return (m_val == other.m_val); } @@ -489,6 +526,15 @@ class TestIntervalValue : public BtreeIntervalValue { return os; } + friend std::istream& operator>>(std::istream& is, TestIntervalValue& v) { + uint32_t m_base_val; + uint16_t m_offset; + char dummy; + is >> m_base_val >> dummy >> m_offset; + v = TestIntervalValue{m_base_val, m_offset}; + return is; + } + ///////////////////////////// Overriding methods of BtreeIntervalValue ////////////////////////// void shift(int n) override { m_offset += n; } diff --git a/src/tests/btree_helpers/shadow_map.hpp b/src/tests/btree_helpers/shadow_map.hpp index edd6e567f..e2c86bfda 100644 --- a/src/tests/btree_helpers/shadow_map.hpp +++ b/src/tests/btree_helpers/shadow_map.hpp @@ -138,6 +138,21 @@ class ShadowMap { func(key, value); } } + std::string to_string() const { + std::string result; + std::stringstream ss; + const int key_width = 20; + + // Format the key-value pairs and insert them into the result string + ss << std::left << std::setw(key_width) << "KEY" + << " " + << "VaLUE" << '\n'; + foreach ([&](const auto& key, const auto& value) { + ss << std::left << std::setw(key_width) << key.to_string() << " " << value.to_string() << '\n'; + }); + result = ss.str(); + return result; + } std::pair< uint32_t, uint32_t > pick_random_non_existing_keys(uint32_t max_keys) { do { @@ -172,4 +187,28 @@ class ShadowMap { std::lock_guard lock{m_mutex}; m_range_scheduler.remove_keys(start_key, end_key); } + + void save(const std::string& filename) { + std::lock_guard lock{m_mutex}; + std::ofstream file(filename); + for (const auto& [key, value] : m_map) { + file << key << " " << value << '\n'; + } + file.close(); + } + + void load(const std::string& filename) { + std::lock_guard lock{m_mutex}; + std::ifstream file(filename); + if (file.is_open()) { + m_map.clear(); + K key; + V value; + while (file >> key >> value) { + m_map.emplace(key, std::move(value)); + m_range_scheduler.put_key(key.key()); + } + file.close(); + } + } }; diff --git a/src/tests/test_common/homestore_test_common.hpp b/src/tests/test_common/homestore_test_common.hpp index 758becb58..b108358dc 100644 --- a/src/tests/test_common/homestore_test_common.hpp +++ b/src/tests/test_common/homestore_test_common.hpp @@ -180,14 +180,15 @@ class HSTestHelper { bool default_data_svc_alloc_type = true); #endif static void start_homestore(const std::string& test_name, std::map< uint32_t, test_params >&& svc_params, - hs_before_services_starting_cb_t cb = nullptr, bool restart = false) { + hs_before_services_starting_cb_t cb = nullptr, bool fake_restart = false, + bool init_device = true) { auto const ndevices = SISL_OPTIONS["num_devs"].as< uint32_t >(); auto const dev_size = SISL_OPTIONS["dev_size_mb"].as< uint64_t >() * 1024 * 1024; auto num_threads = SISL_OPTIONS["num_threads"].as< uint32_t >(); auto num_fibers = SISL_OPTIONS["num_fibers"].as< uint32_t >(); auto is_spdk = SISL_OPTIONS["spdk"].as< bool >(); - if (restart) { + if (fake_restart) { shutdown_homestore(false); std::this_thread::sleep_for(std::chrono::seconds{5}); } @@ -210,7 +211,7 @@ class HSTestHelper { s_dev_names.emplace_back(std::string{"/tmp/" + test_name + "_" + std::to_string(i + 1)}); } - if (!restart) { init_files(s_dev_names, dev_size); } + if (!fake_restart && init_device) { init_files(s_dev_names, dev_size); } for (const auto& fname : s_dev_names) { device_info.emplace_back(std::filesystem::canonical(fname).string(), homestore::HSDevType::Data); } diff --git a/src/tests/test_index_btree.cpp b/src/tests/test_index_btree.cpp index eb4028fc4..ea39d4e12 100644 --- a/src/tests/test_index_btree.cpp +++ b/src/tests/test_index_btree.cpp @@ -48,6 +48,9 @@ SISL_OPTION_GROUP( ::cxxopts::value< std::vector< std::string > >(), "operations [...]"), (preload_size, "", "preload_size", "number of entries to preload tree with", ::cxxopts::value< uint32_t >()->default_value("1000"), "number"), + (init_device, "", "init_device", "init device", ::cxxopts::value< bool >()->default_value("1"), ""), + (cleanup_after_shutdown, "", "cleanup_after_shutdown", "cleanup after shutdown", + ::cxxopts::value< bool >()->default_value("1"), ""), (seed, "", "seed", "random engine seed, use random if not defined", ::cxxopts::value< uint64_t >()->default_value("0"), "number")) @@ -421,6 +424,9 @@ struct BtreeConcurrentTest : public BtreeTestHelper< TestType >, public ::testin std::shared_ptr< IndexTableBase > on_index_table_found(superblk< index_table_sb >&& sb) override { LOGINFO("Index table recovered"); LOGINFO("Root bnode_id {} version {}", sb->root_node, sb->link_version); + m_test->m_cfg = BtreeConfig(hs()->index_service().node_size()); + m_test->m_cfg.m_leaf_node_type = T::leaf_node_type; + m_test->m_cfg.m_int_node_type = T::interior_node_type; m_test->m_bt = std::make_shared< typename T::BtreeType >(std::move(sb), m_test->m_cfg); return m_test->m_bt; } @@ -431,11 +437,19 @@ struct BtreeConcurrentTest : public BtreeTestHelper< TestType >, public ::testin BtreeConcurrentTest() : testing::Test() { this->m_is_multi_threaded = true; } + void restart_homestore() { + test_common::HSTestHelper::start_homestore( + "test_index_btree", + {{HS_SERVICE::META, {}}, {HS_SERVICE::INDEX, {.index_svc_cbs = new TestIndexServiceCallbacks(this)}}}, + nullptr, true /* restart */); + } + void SetUp() override { test_common::HSTestHelper::start_homestore( "test_index_btree", {{HS_SERVICE::META, {.size_pct = 10.0}}, - {HS_SERVICE::INDEX, {.size_pct = 70.0, .index_svc_cbs = new TestIndexServiceCallbacks(this)}}}); + {HS_SERVICE::INDEX, {.size_pct = 70.0, .index_svc_cbs = new TestIndexServiceCallbacks(this)}}}, + nullptr, false, SISL_OPTIONS["init_device"].as< bool >()); LOGINFO("Node size {} ", hs()->index_service().node_size()); this->m_cfg = BtreeConfig(hs()->index_service().node_size()); @@ -452,27 +466,54 @@ struct BtreeConcurrentTest : public BtreeTestHelper< TestType >, public ::testin // Create index table and attach to index service. BtreeTestHelper< TestType >::SetUp(); - this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); + if (this->m_bt == nullptr) { + this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); + } else { + populate_shadow_map(); + } + hs()->index_service().add_index_table(this->m_bt); LOGINFO("Added index table to index service"); } + void populate_shadow_map() { + this->m_shadow_map.load(m_shadow_filename); + ASSERT_EQ(this->m_shadow_map.size(), this->m_bt->count_keys(this->m_bt->root_node_id())) + << "shadow map size and tree size mismatch"; + this->get_all(); + } void TearDown() override { + bool cleanup = SISL_OPTIONS["cleanup_after_shutdown"].as< bool >(); + LOGINFO("cleanup the dump map and index data? {}", cleanup); + if (!cleanup) { + this->m_shadow_map.save(m_shadow_filename); + } else { + if (std::filesystem::remove(m_shadow_filename)) { + LOGINFO("File {} removed successfully", m_shadow_filename); + } else { + LOGINFO("Error: failed to remove {}", m_shadow_filename); + } + } + LOGINFO("Teardown with Root bnode_id {} tree size: {}", this->m_bt->root_node_id(), + this->m_bt->count_keys(this->m_bt->root_node_id())); BtreeTestHelper< TestType >::TearDown(); - test_common::HSTestHelper::shutdown_homestore(); + test_common::HSTestHelper::shutdown_homestore(cleanup); } + +private: + const std::string m_shadow_filename = "shadow_map.txt"; }; TYPED_TEST_SUITE(BtreeConcurrentTest, BtreeTypes); TYPED_TEST(BtreeConcurrentTest, ConcurrentAllOps) { // range put is not supported for non-extent keys - std::vector< std::string > input_ops = {"put:19", "remove:14", "range_put:20", "range_remove:2", "query:10"}; + std::vector< std::string > input_ops = {"put:18", "remove:14", "range_put:20", "range_remove:2", "query:10"}; if (SISL_OPTIONS.count("operation_list")) { input_ops = SISL_OPTIONS["operation_list"].as< std::vector< std::string > >(); } auto ops = this->build_op_list(input_ops); - this->multi_op_execute(ops); + this->multi_op_execute(ops, !SISL_OPTIONS["init_device"].as< bool >()); } int main(int argc, char* argv[]) { diff --git a/src/tests/test_scripts/btree_test.py b/src/tests/test_scripts/btree_test.py index 55d257955..b7db33259 100755 --- a/src/tests/test_scripts/btree_test.py +++ b/src/tests/test_scripts/btree_test.py @@ -6,6 +6,8 @@ import sys import getopt import sys +import time +import random sys.stdout.flush() import requests @@ -19,8 +21,8 @@ log_mods = "" threads = " --num_threads=5" fibers = " --num_fibers=5" -preload_size = " --preload_size=262144" -num_entries = " --num_entries=1048576" +preload_size = " --preload_size=262144" # 256K +num_entries = " --num_entries=2097152" # 2M num_iters = " --num_iters=100000000" run_time = " --run_time=36000" dev_list = "" @@ -70,20 +72,54 @@ addln_opts += ' --device_list ' addln_opts += dev_list -btree_options = num_entries + num_iters + preload_size + fibers + threads + operations + run_time + addln_opts +btree_options = num_entries + num_iters + preload_size + fibers + threads + operations + addln_opts -def normal(): - print("normal test started with (%s)" % btree_options) +def long_runnig_index(): + print("normal test started with (%s)" % (btree_options+ " " + run_time)) # " --operation_list=query:20 --operation_list=put:20 --operation_list=remove:20" - cmd_opts = " --gtest_filter=BtreeConcurrentTest/0.ConcurrentAllOps --gtest_break_on_failure " + btree_options + " "+log_mods + cmd_opts = "--gtest_filter=BtreeConcurrentTest/0.ConcurrentAllOps --gtest_break_on_failure " + btree_options + " "+log_mods + run_time subprocess.check_call(dirpath + "test_index_btree " + cmd_opts, stderr=subprocess.STDOUT, shell=True) + print("Long running test completed") + +def function_normal(runtime, cleanup_after_shutdown=False, init_device=False): + normal_options = "--gtest_filter=BtreeConcurrentTest/0.ConcurrentAllOps --gtest_break_on_failure " + btree_options + " " + log_mods + " --run_time " + str(runtime) + cmd_opts = normal_options + " --cleanup_after_shutdown=" + str(cleanup_after_shutdown) + " --init_device=" + str(init_device) + print("normal test started with (%s)" % cmd_opts) + subprocess.check_call(dirpath + "test_index_btree " + + cmd_opts, stderr=subprocess.STDOUT, shell=True) print("normal test completed") +def function_crash(runtime, cleanup_after_shutdown=False, init_device=False): + normal_options =" --gtest_filter=BtreeConcurrentTest/0.ConcurrentAllOps --gtest_break_on_failure " + btree_options + " "+log_mods +" --enable_crash" + cmd_opts = normal_options +" --cleanup_after_shutdown=" + str(cleanup_after_shutdown) + " --init_device="+str(init_device) +" --run_time " + str(runtime) + subprocess.check_call(dirpath + "test_index_btree " + cmd_opts, stderr=subprocess.STDOUT, shell=True) + print("crash test completed") -def nightly(): - normal() +def crash_recovery_framework(): + total_run_time = 30 * 3600 + normal_run_time = 10 * 60 + crash_run_time = 10 * 60 + crash_execution_frequency = 0 + function_normal(normal_run_time, False, True) + elapsed_time = normal_run_time + + while elapsed_time <= total_run_time: + start_time = time.time() + p = random.randint(0, 100) # some distribution + if p < crash_execution_frequency: + function_crash(crash_run_time, False, False) + else: + function_normal(min(normal_run_time, total_run_time - elapsed_time), False, False) + end_time = time.time() + elapsed_time += end_time - start_time + function_normal(0, True, False) #cleanup after shutdown + print("crash recovery test completed") + +def nightly(): + long_runnig_index() + # crash_recovery_framework() # The name of the method to be called is the var test_suits eval(f"{test_suits}()")