diff --git a/src/.clang-format b/src/.clang-format index 2f7712008..fdfa11f5e 100644 --- a/src/.clang-format +++ b/src/.clang-format @@ -18,7 +18,6 @@ AlignOperands: false AlignTrailingComments: true AllowShortBlocksOnASingleLine: true AllowShortIfStatementsOnASingleLine: true -AllowShortBlocksOnASingleLine: true AllowShortCaseLabelsOnASingleLine: false # AllowShortFunctionsOnASingleLine: InlineOnly # AllowShortLoopsOnASingleLine: false diff --git a/src/engine/blkalloc/blk.h b/src/engine/blkalloc/blk.h index 5429f77bf..40262f154 100644 --- a/src/engine/blkalloc/blk.h +++ b/src/engine/blkalloc/blk.h @@ -102,7 +102,7 @@ struct BlkId { bool operator==(const BlkId& other) noexcept { return (compare(*this, other) == 0); } void invalidate() { set(blk_num_t{0}, blk_count_t{0}, s_chunk_num_mask); } - + // return invalid_blk_id() { return blk_count_t{0}; } [[nodiscard]] bool is_valid() const { return (m_chunk_num != s_chunk_num_mask); } [[nodiscard]] BlkId get_blkid_at(const uint32_t offset, const uint32_t pagesz) const { diff --git a/src/engine/common/homestore_config.fbs b/src/engine/common/homestore_config.fbs index e6dfee051..9b219abe4 100644 --- a/src/engine/common/homestore_config.fbs +++ b/src/engine/common/homestore_config.fbs @@ -142,6 +142,8 @@ table Generic { // percentage of cache used to create indx mempool. It should be more than 100 to // take into account some floating buffers in writeback cache. indx_mempool_percent : uint32 = 110; + + boot_thin_provisioning: bool = false; } table ResourceLimits { diff --git a/src/homeblks/homeblks_config.fbs b/src/homeblks/homeblks_config.fbs index 42566a9c4..a99da66d2 100644 --- a/src/homeblks/homeblks_config.fbs +++ b/src/homeblks/homeblks_config.fbs @@ -34,6 +34,9 @@ table GeneralConfig { // These fields should only be changed by agent through workflow boot_restricted_mode: bool = false; boot_safe_mode: bool = false; + + // This field is for enabling thin provisioing on booting + boot_thin_provisioning: bool = true; } table HomeBlksSettings { diff --git a/src/homeblks/volume/tests/vol_gtest.cpp b/src/homeblks/volume/tests/vol_gtest.cpp index d8aa4ef17..fa6d55e88 100644 --- a/src/homeblks/volume/tests/vol_gtest.cpp +++ b/src/homeblks/volume/tests/vol_gtest.cpp @@ -174,6 +174,9 @@ struct TestCfg { uint32_t p_vol_files_space; std::string flip_name; std::string vol_copy_file_path; + uint32_t p_zero_buffer; + uint32_t zero_buffer_period; + bool thin_provision_enable{false}; bool verify_csum() { return verify_type == verify_type_t::csum; } bool verify_data() { return verify_type == verify_type_t::data; } @@ -620,12 +623,20 @@ class VolTest : public ::testing::Test { // vol_create_del_test = false; // move_verify_to_done = false; print_startTime = Clock::now(); + if (tcfg.thin_provision_enable) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; }); + HS_SETTINGS_FACTORY().save(); + } // outstanding_ios = 0; } virtual ~VolTest() override { if (init_buf) { iomanager.iobuf_free(static_cast< uint8_t* >(init_buf)); } + if (tcfg.thin_provision_enable) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; }); + HS_SETTINGS_FACTORY().save(); + } } VolTest(const VolTest&) = delete; @@ -1815,13 +1826,18 @@ class IOTestJob : public TestJob { const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)}; const uint64_t size{nlbas * page_size}; + static std::atomic< uint32_t > remaining_period{tcfg.zero_buffer_period}; + uint32_t zero_counts_per_period = tcfg.p_zero_buffer * tcfg.zero_buffer_period / 100; boost::intrusive_ptr< io_req_t > vreq{}; if (tcfg.write_cache) { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; HS_REL_ASSERT_NOTNULL(wbuf); populate_buf(wbuf, size, lba, vinfo.get()); - + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + populate_zero_buf(wbuf, size); + } vreq = boost::intrusive_ptr< io_req_t >( new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); } else { @@ -1833,20 +1849,32 @@ class IOTestJob : public TestJob { HS_REL_ASSERT_NOTNULL(wbuf); iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; iovecs.emplace_back(std::move(iov)); - populate_buf(wbuf, page_size, lba + lba_num, vinfo.get()); } + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + for (const auto& iovec : iovecs) { + auto data = static_cast< uint8_t* >(iovec.iov_base); + const size_t size = iovec.iov_len; + populate_zero_buf(data, size); + } + } vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); } else { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; populate_buf(wbuf, size, lba, vinfo.get()); + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + populate_zero_buf(wbuf, size); + } HS_REL_ASSERT_NOTNULL(wbuf); vreq = boost::intrusive_ptr< io_req_t >{ new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; } + if (remaining_period.load() == 0) { remaining_period.store(tcfg.zero_buffer_period); } send_iovec = !send_iovec; } vreq->cookie = static_cast< void* >(this); @@ -1880,6 +1908,8 @@ class IOTestJob : public TestJob { } } + void populate_zero_buf(uint8_t* buf, const uint64_t size) { std::fill_n(buf, size, 0); } + bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) { const auto vinfo{m_voltest->m_vol_info[cur]}; const auto vol{vinfo->vol}; @@ -2224,6 +2254,16 @@ TEST_F(VolTest, init_io_test) { if (tcfg.remove_file_on_shutdown) { this->remove_files(); } } +TEST_F(VolTest, thin_provisioning_test) { + tcfg.precreate_volume = false; + this->start_homestore(); + + auto cdjob{std::make_unique< VolCreateDeleteJob >(this)}; + this->start_job(cdjob.get(), wait_type::for_completion); + this->shutdown(); + if (tcfg.remove_file_on_shutdown) { this->remove_files(); } +} + /*! @test recovery_io_test @brief Tests which does recovery. End up with a clean shutdown @@ -2682,6 +2722,13 @@ SISL_OPTION_GROUP( (io_size, "", "io_size", "io size in KB", ::cxxopts::value< uint32_t >()->default_value("4"), "io_size"), (vol_copy_file_path, "", "vol_copy_file_path", "file path for copied volume", ::cxxopts::value< std::string >()->default_value(""), "path [...]"), + (p_zero_buffer, "", "p_zero_buffer", + "percentage of zero buffer occurrence for testing thin provisioning within period", + ::cxxopts::value< uint32_t >()->default_value("70"), "0 to 100"), + (zero_buffer_period, "", "zero_buffer_period", " the period of consecutive zero buffer occurrence", + ::cxxopts::value< uint32_t >()->default_value("100"), "0 to 100"), + (thin_provision_enable, "", "thin_provision_enable", " enable thin provisioning", + ::cxxopts::value< uint32_t >()->default_value("0"), "flag"), (unmap_frequency, "", "unmap_frequency", "do unmap for every N", ::cxxopts::value< uint64_t >()->default_value("100"), "unmap_frequency")) @@ -2758,6 +2805,9 @@ int main(int argc, char* argv[]) { gcfg.app_mem_size_in_gb = SISL_OPTIONS["app_mem_size_in_gb"].as< uint32_t >(); gcfg.vol_copy_file_path = SISL_OPTIONS["vol_copy_file_path"].as< std::string >(); const auto io_size_in_kb = SISL_OPTIONS["io_size"].as< uint32_t >(); + gcfg.p_zero_buffer = SISL_OPTIONS["p_zero_buffer"].as< uint32_t >(); + gcfg.zero_buffer_period = SISL_OPTIONS["zero_buffer_period"].as< uint32_t >(); + gcfg.thin_provision_enable = SISL_OPTIONS["thin_provision_enable"].as< uint32_t >() != 0 ? true : false; gcfg.io_size = io_size_in_kb * 1024; HS_REL_ASSERT(io_size_in_kb && (io_size_in_kb % 4 == 0), diff --git a/src/homeblks/volume/volume.cpp b/src/homeblks/volume/volume.cpp index 77ff6fee7..43dd0c007 100644 --- a/src/homeblks/volume/volume.cpp +++ b/src/homeblks/volume/volume.cpp @@ -171,7 +171,7 @@ Volume::Volume(const vol_params& params) : throw std::runtime_error("shutdown in progress"); } m_sobject = m_hb->sobject_mgr()->create_object("volume", params.vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); m_state = vol_state::UNINITED; } @@ -190,7 +190,7 @@ Volume::Volume(meta_blk* mblk_cookie, sisl::byte_view sb_buf) : HS_REL_ASSERT_EQ(sb->magic, vol_sb_magic, "magic mismatch"); m_hb = HomeBlks::safe_instance(); m_sobject = m_hb->sobject_mgr()->create_object("volume", sb->vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); } void Volume::init() { @@ -335,7 +335,114 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info) return static_cast< indx_tbl* >(tbl); } +static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) { + std::vector< std::pair< int, int > > result; + + int start = -1; + for (std::size_t i = 0; i < empty_blocks.size(); ++i) { + if (empty_blocks[i]) { + if (start == -1) { start = i; } + } else { + if (start != -1) { + result.emplace_back(start, i - start); + start = -1; + } + } + } + if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); } + return result; +} + +static bool is_buf_zero(const uint8_t* buf, size_t size) { + // TODO: subsample the buffer to detect zero request instead of working on the whole buffer to achieve constant + // processing time for large buffer size requests. Needs to investigate the performance impact of this change + // in end2end testing. + static std::vector< uint8_t > read_buf(size, 0); + static const auto zero_crc = crc16_t10dif(init_crc_16, read_buf.data(), size); + const auto crc = crc16_t10dif(init_crc_16, buf, size); + return (crc == zero_crc) ? (buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1)) : false; +} + +static bool is_iovec_zero(const std::vector< iovec >& iovecs) { + for (const auto& iovec : iovecs) { + auto data = static_cast< uint8_t* >(iovec.iov_base); + const size_t size = iovec.iov_len; + if (!is_buf_zero(data, size)) { return false; } + } + return true; +} + +static bool is_zero_request(const vol_interface_req_ptr& iface_req, uint32_t page_size) { + if (iface_req->iovecs.empty()) { + return is_buf_zero(static_cast< uint8_t* >(iface_req->buffer), iface_req->nlbas * page_size); + } + return is_iovec_zero(iface_req->iovecs); +} + +#if 0 +// TODO: use these functions for near future optimization of write path for thin provisioning volumes to enable skipping +// writing empty blocks in subrange intervals for requested buffer instead of detecting the all-zero-buffer requests. +static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size, + uint32_t nlbas, bool empty_blocks = false) { + std::vector< std::pair< int, int > > intervals; + bool in_empty_region = false; + int current_range_start = -1; + int current_range_length = 1; + for (uint32_t i = 0; i < nlbas; i++) { + const uint8_t* page_start = buf + (i * page_size); + bool is_page_empty = (empty_blocks == is_buf_zero(page_start, page_size)); + if (is_page_empty) { + if (!in_empty_region) { + current_range_start = i; + current_range_length = 1; + in_empty_region = true; + } else { + current_range_length++; + } + } else { + if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } + in_empty_region = false; + } + } + if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } + return intervals; +} + +static std::string print_ranges(lba_t start_lba, const std::vector< std::pair< int, int > >& intervals) { + auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string { + std::vector< std::string > result_strings; + std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings), + [start_lba](const std::pair< int, int >& p) -> std::string { + // Use a static buffer to hold the formatted string + static char buffer[32]; + std::snprintf(buffer, sizeof(buffer), "<%ld,%d>", p.first + start_lba, p.second); + return buffer; + }); + return std::accumulate(result_strings.begin(), result_strings.end(), std::string("")); + }; + return intervals_to_string(intervals); +} +#endif + std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { + std::error_condition ret{no_error}; + if (!HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning)) { + return write_internal(iface_req); + } else { + bool zeroRequest = is_zero_request(iface_req, get_page_size()); + if (zeroRequest) { + THIS_VOL_LOG(TRACE, volume, iface_req, "zero request <{}, {}>", iface_req->lba, iface_req->nlbas); + iface_req->op_type = Op_type::UNMAP; + ret = unmap(iface_req); + } else { + ret = write_internal(iface_req); + } + } + iface_req->op_type = Op_type::WRITE; + return ret; +} + +std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { static thread_local std::vector< BlkId > bid{}; std::error_condition ret{no_error}; @@ -924,11 +1031,11 @@ sisl::status_response Volume::get_status(const sisl::status_request& request) { auto active_indx_json = get_active_indx()->sobject()->run_callback(request).json; if (!active_indx_json.empty()) { response.json["index"] = active_indx_json; } - response.json["name"] = sobject()->name(); + response.json["name"] = sobject()->name(); response.json["type"] = sobject()->type(); response.json["uuid"] = boost::lexical_cast< std::string >(get_uuid()); response.json["state"] = is_offline() ? "Offline" : "Online"; - response.json["size"]= get_size(); + response.json["size"] = get_size(); return response; } diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 10c2fbbf5..13c6cb99a 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -464,6 +464,13 @@ class Volume : public std::enable_shared_from_this< Volume > { * @return :- no_error if there is no error. It doesn't throw any exception */ std::error_condition write(const vol_interface_req_ptr& hb_req); + std::error_condition write_internal(const vol_interface_req_ptr& hb_req); + + /* Write to lba + * @param hb_req :- it expects this request to be created + * @return :- no_error if there is no error. It doesn't throw any exception + */ + std::error_condition write_internal(const vol_interface_req_ptr& hb_req); /* Read from lba * @param hb_req :- it expects this request to be created @@ -729,7 +736,7 @@ struct volume_req : indx_req { csum_t* j_csum = (csum_t*)mem; if (!is_unmap() && active_nlbas_written != nlbas()) { - VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write{}", + VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write: {}", active_nlbas_written, nlbas()); } for (lba_count_t i{0}; !is_unmap() && i < active_nlbas_written; ++i) {