From 41fe8193a9cfa9be8fadca48dd17681d108d07e0 Mon Sep 17 00:00:00 2001 From: shosseinimotlagh Date: Fri, 9 Feb 2024 11:23:21 -0800 Subject: [PATCH] Enable zero detecting for requests Added a manual io job test --- conanfile.py | 2 +- src/engine/blkalloc/blk.h | 2 +- src/homeblks/homeblks_config.fbs | 3 - src/homeblks/volume/tests/vol_gtest.cpp | 326 +++++++++++++++++++----- src/homeblks/volume/volume.cpp | 160 ++++-------- src/homeblks/volume/volume.hpp | 9 - 6 files changed, 322 insertions(+), 180 deletions(-) diff --git a/conanfile.py b/conanfile.py index 61d77e40d..0fb9488de 100644 --- a/conanfile.py +++ b/conanfile.py @@ -2,7 +2,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "3.7.1" + version = "3.7.2" homepage = "https://github.corp.ebay.com/SDS/homestore" description = "HomeStore" diff --git a/src/engine/blkalloc/blk.h b/src/engine/blkalloc/blk.h index 40262f154..5429f77bf 100644 --- a/src/engine/blkalloc/blk.h +++ b/src/engine/blkalloc/blk.h @@ -102,7 +102,7 @@ struct BlkId { bool operator==(const BlkId& other) noexcept { return (compare(*this, other) == 0); } void invalidate() { set(blk_num_t{0}, blk_count_t{0}, s_chunk_num_mask); } - // return invalid_blk_id() { return blk_count_t{0}; } + [[nodiscard]] bool is_valid() const { return (m_chunk_num != s_chunk_num_mask); } [[nodiscard]] BlkId get_blkid_at(const uint32_t offset, const uint32_t pagesz) const { diff --git a/src/homeblks/homeblks_config.fbs b/src/homeblks/homeblks_config.fbs index a99da66d2..42566a9c4 100644 --- a/src/homeblks/homeblks_config.fbs +++ b/src/homeblks/homeblks_config.fbs @@ -34,9 +34,6 @@ table GeneralConfig { // These fields should only be changed by agent through workflow boot_restricted_mode: bool = false; boot_safe_mode: bool = false; - - // This field is for enabling thin provisioing on booting - boot_thin_provisioning: bool = true; } table HomeBlksSettings { diff --git a/src/homeblks/volume/tests/vol_gtest.cpp b/src/homeblks/volume/tests/vol_gtest.cpp index 932b2fa7f..b7992c6bd 100644 --- a/src/homeblks/volume/tests/vol_gtest.cpp +++ b/src/homeblks/volume/tests/vol_gtest.cpp @@ -174,6 +174,9 @@ struct TestCfg { uint32_t p_vol_files_space; std::string flip_name; std::string vol_copy_file_path; + uint32_t p_zero_buffer; + uint32_t zero_buffer_period; + bool thin_provision_enable{false}; bool verify_csum() { return verify_type == verify_type_t::csum; } bool verify_data() { return verify_type == verify_type_t::data; } @@ -575,6 +578,7 @@ class VolTest : public ::testing::Test { friend class VolCreateDeleteJob; friend class IOTestJob; friend class VolVerifyJob; + friend class IOManualTestJob; protected: std::atomic< size_t > outstanding_ios; @@ -620,12 +624,20 @@ class VolTest : public ::testing::Test { // vol_create_del_test = false; // move_verify_to_done = false; print_startTime = Clock::now(); + if (tcfg.thin_provision_enable) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; }); + HS_SETTINGS_FACTORY().save(); + } // outstanding_ios = 0; } virtual ~VolTest() override { if (init_buf) { iomanager.iobuf_free(static_cast< uint8_t* >(init_buf)); } + if (tcfg.thin_provision_enable) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; }); + HS_SETTINGS_FACTORY().save(); + } } VolTest(const VolTest&) = delete; @@ -1675,8 +1687,7 @@ class IOTestJob : public TestJob { // lba: [0, max_vol_blks - max_blks) std::uniform_int_distribution< uint64_t > lba_random{0, vinfo->max_vol_blks - max_blks - 1}; // nlbas: [1, max_blks] -// std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; - std::uniform_int_distribution< uint32_t > nlbas_random{1, 5}; + std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; // we won't be writing more then 128 blocks in one io uint32_t attempt{1}; @@ -1816,22 +1827,22 @@ class IOTestJob : public TestJob { const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)}; const uint64_t size{nlbas * page_size}; + static std::atomic< uint32_t > remaining_period{tcfg.zero_buffer_period}; + uint32_t zero_counts_per_period = tcfg.p_zero_buffer * tcfg.zero_buffer_period / 100; boost::intrusive_ptr< io_req_t > vreq{}; - - static thread_local std::random_device rd{}; - static thread_local std::default_random_engine engine{rd()}; - static thread_local std::uniform_int_distribution< uint8_t > dist{0, 1}; - if (tcfg.write_cache) { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; HS_REL_ASSERT_NOTNULL(wbuf); populate_buf(wbuf, size, lba, vinfo.get()); - populate_zero_buf(wbuf, size, vinfo.get()); + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + populate_zero_buf(wbuf, size); + } vreq = boost::intrusive_ptr< io_req_t >( new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); } else { - static bool send_iovec{false}; + static bool send_iovec{true}; std::vector< iovec > iovecs{}; if (send_iovec) { for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { @@ -1840,7 +1851,14 @@ class IOTestJob : public TestJob { iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; iovecs.emplace_back(std::move(iov)); populate_buf(wbuf, page_size, lba + lba_num, vinfo.get()); - populate_zero_buf(wbuf, size, vinfo.get()); + } + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + for (const auto& iovec : iovecs) { + auto data = static_cast< uint8_t* >(iovec.iov_base); + const size_t size = iovec.iov_len; + populate_zero_buf(data, size); + } } vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, @@ -1848,13 +1866,17 @@ class IOTestJob : public TestJob { } else { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; populate_buf(wbuf, size, lba, vinfo.get()); - populate_zero_buf(wbuf, size, vinfo.get()); + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + populate_zero_buf(wbuf, size); + } HS_REL_ASSERT_NOTNULL(wbuf); vreq = boost::intrusive_ptr< io_req_t >{ new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; } - // send_iovec = !send_iovec; + if (remaining_period.load() == 0) { remaining_period.store(tcfg.zero_buffer_period); } + send_iovec = !send_iovec; } vreq->cookie = static_cast< void* >(this); @@ -1869,40 +1891,6 @@ class IOTestJob : public TestJob { return true; } - void populate_zero_buf(uint8_t* buf, const uint64_t size, const vol_info_t* const vinfo) { - auto page_size = VolInterface::get_instance()->get_page_size(vinfo->vol); - auto nlbas = size / page_size; - static thread_local std::random_device rd{}; - static thread_local std::default_random_engine engine{rd()}; - static thread_local std::uniform_int_distribution< uint8_t > dist{0, 100}; -// std::fill_n(buf + nlbas/2 * page_size, page_size, 0); -// { -// // first zero -// std::fill_n(buf, page_size, 0); -// } - { - // first x lbas the non_zero the rest zero - - if (nlbas >= 2) - std::fill_n(buf + page_size, (nlbas -1) *page_size, 0); - } -// { -// // randomly 5% of lbas can be zero -// for (long unsigned int i = 0; i < nlbas; ++i) { -// if (dist(engine) < 5) { std::fill_n(buf + i * page_size, page_size, 0); } -// } -// } -// { -// // one lba in the middle can be zero (two sub non empty ranges) -// std::uniform_int_distribution< uint8_t > ran_lba{1, nlbas-1}; -// auto l1= ran_lba(engine); -// auto l2= ran_lba(engine); -// auto lb1 = std::min(l1,l2); -// auto lb2 = std::max(l1,l2); -// std::fill_n(buf + l1 * page_size, (lb2 -lb1 +1) *page_size, 0); -// } - } - void populate_buf(uint8_t* const buf, const uint64_t size, const uint64_t lba, const vol_info_t* const vinfo) { static thread_local std::random_device rd{}; static thread_local std::default_random_engine engine{rd()}; @@ -1921,8 +1909,9 @@ class IOTestJob : public TestJob { } } + void populate_zero_buf(uint8_t* buf, const uint64_t size) { std::fill_n(buf, size, 0); } + bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) { - return true; const auto vinfo{m_voltest->m_vol_info[cur]}; const auto vol{vinfo->vol}; if (vol == nullptr) { return false; } @@ -2000,8 +1989,6 @@ class IOTestJob : public TestJob { } bool verify(const boost::intrusive_ptr< io_req_t >& req, const bool can_panic = true) const { - return true; -#if 0 const auto& vol_req{static_cast< vol_interface_req_ptr >(req)}; const auto verify_buffer{[this, &req, &can_panic](const uint8_t* const validate_buffer, @@ -2107,9 +2094,200 @@ class IOTestJob : public TestJob { tcfg.verify_csum() ? (HS_REL_ASSERT_EQ(total_size_read_csum, req->verify_size)) : (HS_REL_ASSERT_EQ(total_size_read, req->original_size)); return true; -#endif } +}; + +// This test job is used to test the IOs with manual requests. For sake of simplicity, we will use the same volume for +// all requests. The caller needs to load the requests before starting the job. The requests are loaded in the form of +// Write with three or four parameters and Read with three parameters. The value is optional and is used only for write +// requests. +class IOManualTestJob : public TestJob { +public: + using TupleVariant = std::variant< std::tuple< std::string, uint64_t, uint32_t >, + std::tuple< std::string, uint64_t, uint32_t, uint8_t > >; + using RequestVector = std::vector< IOManualTestJob::TupleVariant >; + IOManualTestJob(VolTest* const test) : TestJob(test, 1, true) { + vol = m_voltest->m_vol_info[0]->vol; + vinfo = m_voltest->m_vol_info[0]; + page_size = VolInterface::get_instance()->get_page_size(vol); + const auto vol_size = VolInterface::get_instance()->get_size(vol); + const auto max_lbas = vol_size / page_size; + m_validate_buf.resize(max_lbas); + std::fill(m_validate_buf.begin(), m_validate_buf.end(), 0); + LOGINFO("Manual volume size {} max_lbas {}", vol_size, max_lbas); + } + virtual ~IOManualTestJob() override = default; + IOManualTestJob(const IOManualTestJob&) = delete; + IOManualTestJob(IOManualTestJob&&) noexcept = delete; + IOManualTestJob& operator=(const IOManualTestJob&) = delete; + IOManualTestJob& operator=(IOManualTestJob&&) noexcept = delete; + + virtual void run_one_iteration() override { + if (m_outstanding_ios.load() == 0 && m_current_request < m_requests.size()) { + const auto& request = m_requests[m_current_request]; + if (std::holds_alternative< std::tuple< std::string, uint64_t, uint32_t > >(request)) { + auto& tuple = std::get< std::tuple< std::string, uint64_t, uint32_t > >(request); + auto start_lba = std::get< 1 >(tuple); + auto nlbas = std::get< 2 >(tuple); + if (std::get< 0 >(tuple) == "write") { + write_vol(start_lba, nlbas); + auto it = m_validate_buf.begin() + start_lba; + std::fill(it, it + nlbas, 0); + } else { + read_vol(start_lba, nlbas); + } + } else if (std::holds_alternative< std::tuple< std::string, uint64_t, uint32_t, uint8_t > >(request)) { + auto& tuple = std::get< std::tuple< std::string, uint64_t, uint32_t, uint8_t > >(request); + auto start_lba = std::get< 1 >(tuple); + auto nlbas = std::get< 2 >(tuple); + auto value = std::get< 3 >(tuple); + if (std::get< 0 >(tuple) == "write") { + write_vol(start_lba, nlbas, value); + auto it = m_validate_buf.begin() + start_lba; + std::fill(it, it + nlbas, value); + } else { + // in case, the caller mistakenly added a value for a read request, we will ignore the value + read_vol(start_lba, nlbas); + } + } + } + } + + void on_one_iteration_completed(const boost::intrusive_ptr< io_req_t >& req) override { + --m_outstanding_ios; + if (req->op_type == Op_type::READ) { verify_request(req); } + req->vol_info->ref_cnt.decrement_testz(1); + } + uint64_t read_buffer(std::vector< iovec >& iovecs, uint8_t* buf) { + uint8_t* current_position = buf; + for (const auto& iov : iovecs) { + std::memcpy(current_position, iov.iov_base, iov.iov_len); + current_position += iov.iov_len; + } + return static_cast< uint64_t >(current_position - buf); + } + void verify_request(const boost::intrusive_ptr< io_req_t >& req) { + std::shared_ptr< uint8_t > buf(new uint8_t[req->nlbas * page_size]); + std::fill_n(buf.get(), req->nlbas * page_size, 0); + auto total_size_read = read_buffer(req->iovecs, buf.get()); + HS_REL_ASSERT_EQ(req->nlbas * page_size, total_size_read); + auto raw_buf = buf.get(); + for (size_t i = 0; i < req->nlbas; i++) { + HS_REL_ASSERT_EQ(raw_buf[i * page_size], m_validate_buf[req->lba + i]); + } + } + bool time_to_stop() const override { return m_current_request == m_requests.size(); } + + virtual bool is_job_done() const override { return (m_outstanding_ios == 0); } + bool is_async_job() const override { return true; } + std::string job_name() const { return "IO Manual Job"; } + void load_requests(RequestVector& requests) { m_requests = requests; } + +protected: + VolumePtr vol; + std::shared_ptr< vol_info_t > vinfo; + uint64_t page_size; + std::atomic< uint64_t > m_outstanding_ios{0}; + std::atomic< uint64_t > m_current_request{0}; + std::vector< uint8_t > m_validate_buf; + RequestVector m_requests; + + bool write_vol(const uint64_t lba, const uint32_t nlbas, const uint8_t value = 0) { + ++m_current_request; + ++m_outstanding_ios; + const uint64_t size{nlbas * page_size}; + boost::intrusive_ptr< io_req_t > vreq{}; + if (tcfg.write_cache) { + uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; + populate_buf(wbuf, size, value); + vreq = boost::intrusive_ptr< io_req_t >( + new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); + } else { + static bool send_iovec{true}; + std::vector< iovec > iovecs{}; + if (send_iovec) { + for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { + uint8_t* const wbuf{iomanager.iobuf_alloc(512, page_size)}; + iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; + iovecs.emplace_back(std::move(iov)); + populate_buf(wbuf, page_size, value); + } + vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, + nlbas, tcfg.verify_csum(), tcfg.write_cache)); + } else { + uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; + populate_buf(wbuf, size, value); + vreq = boost::intrusive_ptr< io_req_t >{ + new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; + } + send_iovec = !send_iovec; + } + vreq->cookie = static_cast< void* >(this); + ++m_voltest->output.write_cnt; + vinfo->ref_cnt.increment(1); + const auto ret_io{VolInterface::get_instance()->write(vol, vreq)}; + LOGDEBUG("Wrote lba: {}, nlbas: {} outstanding_ios={}, iovec(s)={}, cache={}", lba, nlbas, + m_outstanding_ios.load(), (tcfg.write_iovec != 0 ? true : false), + (tcfg.write_cache != 0 ? true : false)); + if (ret_io != no_error) { return false; } + return true; + } + + void populate_buf(uint8_t* buf, const uint64_t size, const uint8_t value = 0) { std::fill_n(buf, size, value); } + + bool read_vol(const uint64_t lba, const uint32_t nlbas) { + ++m_current_request; + if (read_vol_internal(vinfo, vol, lba, nlbas, false)) { return true; } + return false; + } + + boost::intrusive_ptr< io_req_t > read_vol_internal(std::shared_ptr< vol_info_t > vinfo, VolumePtr vol, + const uint64_t lba, const uint32_t nlbas, + const bool sync = false) { + boost::intrusive_ptr< io_req_t > vreq{}; + if (tcfg.read_cache) { + vreq = boost::intrusive_ptr< io_req_t >{ + new io_req_t{vinfo, Op_type::READ, nullptr, lba, nlbas, tcfg.verify_csum(), tcfg.read_cache, sync}}; + } else { + static bool send_iovec{true}; + if (send_iovec) { + std::vector< iovec > iovecs{}; + for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { + uint8_t* const rbuf{iomanager.iobuf_alloc(512, page_size)}; + std::memset(static_cast< void* >(rbuf), 0, page_size); + + HS_REL_ASSERT_NOTNULL(rbuf); + iovec iov{static_cast< void* >(rbuf), static_cast< size_t >(page_size)}; + iovecs.emplace_back(std::move(iov)); + } + + vreq = boost::intrusive_ptr< io_req_t >{new io_req_t{vinfo, Op_type::READ, std::move(iovecs), lba, + nlbas, tcfg.verify_csum(), tcfg.read_cache, sync}}; + } else { + uint8_t* const rbuf{iomanager.iobuf_alloc(512, nlbas * page_size)}; + std::memset(static_cast< void* >(rbuf), 0, nlbas * page_size); + vreq = boost::intrusive_ptr< io_req_t >{ + new io_req_t{vinfo, Op_type::READ, rbuf, lba, nlbas, tcfg.verify_csum(), tcfg.read_cache, sync}}; + } + send_iovec = !send_iovec; + } + vreq->cookie = static_cast< void* >(this); + + ++m_voltest->output.read_cnt; + ++m_outstanding_ios; + vinfo->ref_cnt.increment(1); + const auto ret_io{VolInterface::get_instance()->read(vol, vreq)}; + LOGDEBUG("Read lba: {}, nlbas: {} outstanding_ios={}, iovec(s)={}, cache={}", lba, nlbas, + m_outstanding_ios.load(), (tcfg.read_iovec != 0 ? true : false), + (tcfg.read_cache != 0 ? true : false)); + if (sync) { + --m_outstanding_ios; + vinfo->ref_cnt.decrement(1); + } + if (ret_io != no_error) { return nullptr; } + return vreq; + } }; class VolVerifyJob : public IOTestJob { @@ -2269,20 +2447,40 @@ TEST_F(VolTest, init_io_test) { this->shutdown(); if (tcfg.remove_file_on_shutdown) { this->remove_files(); } } - TEST_F(VolTest, thin_test) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; }); + HS_SETTINGS_FACTORY().save(); + tcfg.max_vols = 1; + tcfg.verify_type = static_cast< verify_type_t >(3); + tcfg.max_disk_capacity = 1 * (1ul << 30); // 1GB + tcfg.p_volume_size = 1; // 1% of 2 (devices) * 1G = 20 MB volume + output.print("thin_test"); + this->start_homestore(); - std::unique_ptr< VolCreateDeleteJob > cdjob; - if (tcfg.create_del_with_io || tcfg.delete_with_io) { - cdjob = std::make_unique< VolCreateDeleteJob >(this); - this->start_job(cdjob.get(), wait_type::no_wait); - } - this->start_io_job(); - output.print("init_io_test"); + std::unique_ptr< IOManualTestJob > job; + job = std::make_unique< IOManualTestJob >(this); + IOManualTestJob::RequestVector reqs = {// Case one: normal read (no zero padding) + std::make_tuple("write", 0, 100, 4), std::make_tuple("read", 5, 20), + // Case two: zero padding, read after write + std::make_tuple("write", 1, 10), std::make_tuple("read", 1, 20), + std::make_tuple("read", 5, 3), + // Case three: zero padding, overlapping for read + std::make_tuple("write", 100, 200), std::make_tuple("read", 150, 250), + // Case four: no write + std::make_tuple("read", 800, 5)}; + job->load_requests(reqs); + + this->start_job(job.get(), wait_type::for_completion); - if (tcfg.create_del_with_io || tcfg.delete_with_io) { cdjob->wait_for_completion(); } + LOGINFO("All volumes are deleted, do a shutdown of homestore"); this->shutdown(); + + LOGINFO("Shutdown of homestore is completed, removing files"); + this->remove_files(); + + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; }); + HS_SETTINGS_FACTORY().save(); } /*! @@ -2743,6 +2941,13 @@ SISL_OPTION_GROUP( (io_size, "", "io_size", "io size in KB", ::cxxopts::value< uint32_t >()->default_value("4"), "io_size"), (vol_copy_file_path, "", "vol_copy_file_path", "file path for copied volume", ::cxxopts::value< std::string >()->default_value(""), "path [...]"), + (p_zero_buffer, "", "p_zero_buffer", + "percentage of zero buffer occurrence for testing thin provisioning within period", + ::cxxopts::value< uint32_t >()->default_value("70"), "0 to 100"), + (zero_buffer_period, "", "zero_buffer_period", " the period of consecutive zero buffer occurrence", + ::cxxopts::value< uint32_t >()->default_value("100"), "0 to 100"), + (thin_provision_enable, "", "thin_provision_enable", " enable thin provisioning", + ::cxxopts::value< uint32_t >()->default_value("0"), "flag"), (unmap_frequency, "", "unmap_frequency", "do unmap for every N", ::cxxopts::value< uint64_t >()->default_value("100"), "unmap_frequency")) @@ -2819,6 +3024,9 @@ int main(int argc, char* argv[]) { gcfg.app_mem_size_in_gb = SISL_OPTIONS["app_mem_size_in_gb"].as< uint32_t >(); gcfg.vol_copy_file_path = SISL_OPTIONS["vol_copy_file_path"].as< std::string >(); const auto io_size_in_kb = SISL_OPTIONS["io_size"].as< uint32_t >(); + gcfg.p_zero_buffer = SISL_OPTIONS["p_zero_buffer"].as< uint32_t >(); + gcfg.zero_buffer_period = SISL_OPTIONS["zero_buffer_period"].as< uint32_t >(); + gcfg.thin_provision_enable = SISL_OPTIONS["thin_provision_enable"].as< uint32_t >() != 0 ? true : false; gcfg.io_size = io_size_in_kb * 1024; HS_REL_ASSERT(io_size_in_kb && (io_size_in_kb % 4 == 0), diff --git a/src/homeblks/volume/volume.cpp b/src/homeblks/volume/volume.cpp index 5220c455f..43dd0c007 100644 --- a/src/homeblks/volume/volume.cpp +++ b/src/homeblks/volume/volume.cpp @@ -334,18 +334,7 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info) SnapMgr::add_read_tracker, &cp_info); return static_cast< indx_tbl* >(tbl); } -static std::vector< bool > find_non_zero_data(const uint8_t* buf, size_t size, uint32_t nlbas) { - std::vector< bool > empty_blocks; - auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { - return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); - }; - for (uint32_t count{0}; count < nlbas; ++count) { - empty_blocks.push_back(!is_buf_empty(buf, size)); - buf += size; - } - return empty_blocks; -} static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) { std::vector< std::pair< int, int > > result; @@ -360,24 +349,48 @@ static std::vector< std::pair< int, int > > get_true_intervals(const std::vector } } } - if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); } - return result; } +static bool is_buf_zero(const uint8_t* buf, size_t size) { + // TODO: subsample the buffer to detect zero request instead of working on the whole buffer to achieve constant + // processing time for large buffer size requests. Needs to investigate the performance impact of this change + // in end2end testing. + static std::vector< uint8_t > read_buf(size, 0); + static const auto zero_crc = crc16_t10dif(init_crc_16, read_buf.data(), size); + const auto crc = crc16_t10dif(init_crc_16, buf, size); + return (crc == zero_crc) ? (buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1)) : false; +} + +static bool is_iovec_zero(const std::vector< iovec >& iovecs) { + for (const auto& iovec : iovecs) { + auto data = static_cast< uint8_t* >(iovec.iov_base); + const size_t size = iovec.iov_len; + if (!is_buf_zero(data, size)) { return false; } + } + return true; +} + +static bool is_zero_request(const vol_interface_req_ptr& iface_req, uint32_t page_size) { + if (iface_req->iovecs.empty()) { + return is_buf_zero(static_cast< uint8_t* >(iface_req->buffer), iface_req->nlbas * page_size); + } + return is_iovec_zero(iface_req->iovecs); +} + +#if 0 +// TODO: use these functions for near future optimization of write path for thin provisioning volumes to enable skipping +// writing empty blocks in subrange intervals for requested buffer instead of detecting the all-zero-buffer requests. static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size, uint32_t nlbas, bool empty_blocks = false) { std::vector< std::pair< int, int > > intervals; bool in_empty_region = false; int current_range_start = -1; int current_range_length = 1; - auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { - return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); - }; for (uint32_t i = 0; i < nlbas; i++) { const uint8_t* page_start = buf + (i * page_size); - bool is_page_empty = (empty_blocks == is_buf_empty(page_start, page_size)); + bool is_page_empty = (empty_blocks == is_buf_zero(page_start, page_size)); if (is_page_empty) { if (!in_empty_region) { current_range_start = i; @@ -395,16 +408,7 @@ static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_ return intervals; } -std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { - if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ - return write_internal(iface_req); - } - std::error_condition ret{no_error}; - auto buf = static_cast< uint8_t* >(iface_req->buffer); - auto nlbas = iface_req->nlbas; - auto start_lba = iface_req->lba; - auto non_empty_blocks = compute_range_intervals(buf, get_page_size(), nlbas, false); -// auto vreq = volume_req::make(iface_req); +static std::string print_ranges(lba_t start_lba, const std::vector< std::pair< int, int > >& intervals) { auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string { std::vector< std::string > result_strings; std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings), @@ -416,67 +420,29 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { }); return std::accumulate(result_strings.begin(), result_strings.end(), std::string("")); }; - LOGINFO("original req <{}, {}> => [{}]", iface_req->lba, iface_req->nlbas, intervals_to_string(non_empty_blocks)); - for (const auto &interval : non_empty_blocks) { -//#if 0 - iface_req->lba = start_lba + interval.first; - iface_req->nlbas = interval.second; - iface_req->buffer = buf + (interval.first * get_page_size()); - iface_req->iovecs.clear(); - - - ret = write_internal(iface_req); - if (ret != no_error) { - return ret; - } -//#endif -#if 0 - auto lba = start_lba + interval.first; - auto nlbas = interval.second; - const auto buffer = buf + (interval.first * get_page_size()); - auto req = std::make_unique(buffer, lba, nlbas, iface_req->sync, iface_req->cache); - - req->vol_instance = shared_from_this(); - req->part_of_batch = iface_req->part_of_batch; - req->op_type = Op_type::WRITE; - LOGINFO("sending request to write_internal with lba: {}, nlbas: {} buffer :{}", req->lba, req->nlbas, req->buffer); - //extra - req->read_buf_list = iface_req->read_buf_list; - req->err = iface_req->err; - req->request_id = iface_req->request_id; - req->cache = iface_req->cache; - req->sync = iface_req->sync; - req->is_fail_completed = iface_req->is_fail_completed.load(); - req->cookie = iface_req->cookie; - - ret = write_internal(req.get()); - for (auto x: iface_req->read_buf_list) { - req->read_buf_list.push_back(x); - - } - for (auto p: iface_req->iovecs) { - req->iovecs.push_back(p); + return intervals_to_string(intervals); +} +#endif +std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { + std::error_condition ret{no_error}; + if (!HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning)) { + return write_internal(iface_req); + } else { + bool zeroRequest = is_zero_request(iface_req, get_page_size()); + if (zeroRequest) { + THIS_VOL_LOG(TRACE, volume, iface_req, "zero request <{}, {}>", iface_req->lba, iface_req->nlbas); + iface_req->op_type = Op_type::UNMAP; + ret = unmap(iface_req); + } else { + ret = write_internal(iface_req); } - // vol_interface_req i_req(buffer, start_lba, nlbas, iface_req->sync, iface_req->cache); -// i_req.request_id = iface_req->request_id; -// auto ret = write_internal(&i_req); -// if (ret != no_error) { -// return ret; -// } -#endif } - iface_req->buffer = (void*)(buf); - iface_req->nlbas = nlbas; - iface_req->lba = start_lba; -// check_and_complete_req(vreq, ret); -// interface_req_done(iface_req); + iface_req->op_type = Op_type::WRITE; return ret; } -//std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { -// -//} - std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { + +std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { static thread_local std::vector< BlkId > bid{}; std::error_condition ret{no_error}; @@ -485,9 +451,6 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { auto vreq = volume_req::make(iface_req); THIS_VOL_LOG(TRACE, volume, vreq, "write: lba={}, nlbas={}, cache={}", vreq->lba(), vreq->nlbas(), vreq->use_cache()); - LOGINFO("\nwrite: lba={}, nlbas={}, cache={} buffer= {}", vreq->lba(), vreq->nlbas(), - vreq->use_cache(), iface_req->buffer); - print_tree(); COUNTER_INCREMENT(m_metrics, volume_outstanding_data_write_count, 1); // Sanity checks @@ -515,7 +478,6 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { uint64_t start_lba{vreq->lba()}; for (size_t i{0}; i < bid.size(); ++i) { - LOGINFO("bid[{}]: {}", i, bid[i].to_string()); if (bid[i].get_nblks() == 0) { // It should not happen. But it happened once so adding a safe check in case it happens again VOL_LOG_ASSERT(0, vreq, "{}", bid[i].to_string()); @@ -548,10 +510,7 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } } else { // scatter/gather write - const auto& iovecs{std::get< volume_req::IoVecData >(vreq->data)}; - LOGINFO("write: lba={}, nlbas={}, data size/pagesize: {} iovec[0]_len {} buffer{} iovecs.iov_data {} size {}", vreq->lba(), vreq->nlbas(), - data_size/get_page_size(), static_cast< uint64_t >(iovecs.get().at(0).iov_len)/4096, iface_req->buffer, iovecs.get().at(0).iov_base, iovecs.get().size()); const auto write_iovecs{get_next_iovecs(write_transversal, iovecs, data_size)}; // TO DO: Add option to insert into cache if write cache option true @@ -597,11 +556,7 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } done: -// if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ - LOGINFO("done calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); - check_and_complete_req(vreq, ret); -// } - + check_and_complete_req(vreq, ret); return ret; } @@ -736,8 +691,6 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error vreq->state = volume_req_state::journal_io; vreq->indx_start_time = Clock::now(); auto ireq = boost::static_pointer_cast< indx_req >(vreq); - LOGINFO("complete write? {}: lba={}, nlbas={}, cache={}", vreq->is_write(), vreq->lba(), vreq->nlbas(), - vreq->use_cache()); (vreq->is_unmap()) ? m_indx_mgr->unmap(ireq) : m_indx_mgr->update_indx(ireq); COUNTER_INCREMENT(m_metrics, volume_outstanding_metadata_write_count, 1); } @@ -781,12 +734,7 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error } #endif THIS_VOL_LOG(TRACE, volume, vreq, "IO DONE"); - if (vreq->is_write() && HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ - - } - else{ - interface_req_done(vreq->iface_req); - } + interface_req_done(vreq->iface_req); } shutdown_if_needed(); } @@ -815,7 +763,7 @@ void Volume::process_indx_completions(const indx_req_ptr& ireq, std::error_condi THIS_VOL_LOG(TRACE, volume, vreq, "metadata_complete: status={}", vreq->err().message()); HISTOGRAM_OBSERVE(m_metrics, volume_map_write_latency, get_elapsed_time_us(vreq->indx_start_time)); - LOGINFO("process_indx_completions calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); + check_and_complete_req(vreq, err); } @@ -924,7 +872,7 @@ mapping* Volume::get_active_indx() { void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req >& ireq, std::error_condition err) { auto ret = no_error; auto vreq = boost::static_pointer_cast< volume_req >(ireq); - LOGINFO("process_read_indx_completions calls for check and complete read? {}: lba={}, nlbas={}", vreq->is_read_op(), vreq->lba(), vreq->nlbas()); + // if there is error or nothing to read anymore, complete this req; if (err != no_error) { ret = err; @@ -1048,7 +996,6 @@ void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req /* It is not lock protected. It should be called only by thread for a vreq */ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume_req_ptr& vreq, const uint64_t start_lba, const lba_count_t nlbas) { - volume_child_req_ptr vc_req = volume_child_req::make_request(); vc_req->parent_req = vreq; vc_req->is_read = vreq->is_read_op(); @@ -1059,7 +1006,6 @@ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume vc_req->use_cache = vreq->use_cache(); vc_req->part_of_batch = vreq->iface_req->part_of_batch; vc_req->request_id = vreq->request_id; - LOGINFO("create_vol_child_req calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); assert((bid.data_size(HomeBlks::instance()->get_data_pagesz()) % get_page_size()) == 0); vc_req->nlbas = nlbas; diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 76a905257..2617dddc7 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -465,15 +465,6 @@ class Volume : public std::enable_shared_from_this< Volume > { */ std::error_condition write(const vol_interface_req_ptr& hb_req); std::error_condition write_internal(const vol_interface_req_ptr& hb_req); - std::error_condition write_thin_provisioning(const vol_interface_req_ptr& hb_req); - - - - /* Write to lba - * @param hb_req :- it expects this request to be created - * @return :- no_error if there is no error. It doesn't throw any exception - */ - std::error_condition write_internal(const vol_interface_req_ptr& hb_req); /* Read from lba * @param hb_req :- it expects this request to be created