-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ThinProvision: skip all zero buffers #318
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -142,6 +142,8 @@ table Generic { | |
// percentage of cache used to create indx mempool. It should be more than 100 to | ||
// take into account some floating buffers in writeback cache. | ||
indx_mempool_percent : uint32 = 110; | ||
|
||
boot_thin_provisioning: bool = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the deployment plan to enable this feature in production if this is defaulted to false? |
||
} | ||
|
||
table ResourceLimits { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,6 +174,9 @@ struct TestCfg { | |
uint32_t p_vol_files_space; | ||
std::string flip_name; | ||
std::string vol_copy_file_path; | ||
uint32_t p_zero_buffer; | ||
uint32_t zero_buffer_period; | ||
bool thin_provision_enable{false}; | ||
|
||
bool verify_csum() { return verify_type == verify_type_t::csum; } | ||
bool verify_data() { return verify_type == verify_type_t::data; } | ||
|
@@ -620,12 +623,20 @@ class VolTest : public ::testing::Test { | |
// vol_create_del_test = false; | ||
// move_verify_to_done = false; | ||
print_startTime = Clock::now(); | ||
if (tcfg.thin_provision_enable) { | ||
HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; }); | ||
HS_SETTINGS_FACTORY().save(); | ||
} | ||
|
||
// outstanding_ios = 0; | ||
} | ||
|
||
virtual ~VolTest() override { | ||
if (init_buf) { iomanager.iobuf_free(static_cast< uint8_t* >(init_buf)); } | ||
if (tcfg.thin_provision_enable) { | ||
HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; }); | ||
HS_SETTINGS_FACTORY().save(); | ||
} | ||
} | ||
|
||
VolTest(const VolTest&) = delete; | ||
|
@@ -1815,13 +1826,18 @@ class IOTestJob : public TestJob { | |
|
||
const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)}; | ||
const uint64_t size{nlbas * page_size}; | ||
static std::atomic< uint32_t > remaining_period{tcfg.zero_buffer_period}; | ||
uint32_t zero_counts_per_period = tcfg.p_zero_buffer * tcfg.zero_buffer_period / 100; | ||
boost::intrusive_ptr< io_req_t > vreq{}; | ||
if (tcfg.write_cache) { | ||
uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; | ||
HS_REL_ASSERT_NOTNULL(wbuf); | ||
|
||
populate_buf(wbuf, size, lba, vinfo.get()); | ||
|
||
if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && | ||
remaining_period.fetch_sub(1) < zero_counts_per_period) { | ||
populate_zero_buf(wbuf, size); | ||
} | ||
vreq = boost::intrusive_ptr< io_req_t >( | ||
new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); | ||
} else { | ||
|
@@ -1833,20 +1849,32 @@ class IOTestJob : public TestJob { | |
HS_REL_ASSERT_NOTNULL(wbuf); | ||
iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; | ||
iovecs.emplace_back(std::move(iov)); | ||
|
||
populate_buf(wbuf, page_size, lba + lba_num, vinfo.get()); | ||
} | ||
if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && | ||
remaining_period.fetch_sub(1) < zero_counts_per_period) { | ||
for (const auto& iovec : iovecs) { | ||
auto data = static_cast< uint8_t* >(iovec.iov_base); | ||
const size_t size = iovec.iov_len; | ||
populate_zero_buf(data, size); | ||
} | ||
} | ||
|
||
vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, | ||
nlbas, tcfg.verify_csum(), tcfg.write_cache)); | ||
} else { | ||
uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; | ||
populate_buf(wbuf, size, lba, vinfo.get()); | ||
if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && | ||
remaining_period.fetch_sub(1) < zero_counts_per_period) { | ||
populate_zero_buf(wbuf, size); | ||
} | ||
HS_REL_ASSERT_NOTNULL(wbuf); | ||
|
||
vreq = boost::intrusive_ptr< io_req_t >{ | ||
new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; | ||
} | ||
if (remaining_period.load() == 0) { remaining_period.store(tcfg.zero_buffer_period); } | ||
send_iovec = !send_iovec; | ||
} | ||
vreq->cookie = static_cast< void* >(this); | ||
|
@@ -1880,6 +1908,8 @@ class IOTestJob : public TestJob { | |
} | ||
} | ||
|
||
void populate_zero_buf(uint8_t* buf, const uint64_t size) { std::fill_n(buf, size, 0); } | ||
|
||
bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) { | ||
const auto vinfo{m_voltest->m_vol_info[cur]}; | ||
const auto vol{vinfo->vol}; | ||
|
@@ -2224,6 +2254,16 @@ TEST_F(VolTest, init_io_test) { | |
if (tcfg.remove_file_on_shutdown) { this->remove_files(); } | ||
} | ||
|
||
TEST_F(VolTest, thin_provisioning_test) { | ||
tcfg.precreate_volume = false; | ||
this->start_homestore(); | ||
|
||
auto cdjob{std::make_unique< VolCreateDeleteJob >(this)}; | ||
this->start_job(cdjob.get(), wait_type::for_completion); | ||
this->shutdown(); | ||
if (tcfg.remove_file_on_shutdown) { this->remove_files(); } | ||
} | ||
|
||
/*! | ||
@test recovery_io_test | ||
@brief Tests which does recovery. End up with a clean shutdown | ||
|
@@ -2682,6 +2722,13 @@ SISL_OPTION_GROUP( | |
(io_size, "", "io_size", "io size in KB", ::cxxopts::value< uint32_t >()->default_value("4"), "io_size"), | ||
(vol_copy_file_path, "", "vol_copy_file_path", "file path for copied volume", | ||
::cxxopts::value< std::string >()->default_value(""), "path [...]"), | ||
(p_zero_buffer, "", "p_zero_buffer", | ||
"percentage of zero buffer occurrence for testing thin provisioning within period", | ||
::cxxopts::value< uint32_t >()->default_value("70"), "0 to 100"), | ||
(zero_buffer_period, "", "zero_buffer_period", " the period of consecutive zero buffer occurrence", | ||
::cxxopts::value< uint32_t >()->default_value("100"), "0 to 100"), | ||
(thin_provision_enable, "", "thin_provision_enable", " enable thin provisioning", | ||
::cxxopts::value< uint32_t >()->default_value("0"), "flag"), | ||
(unmap_frequency, "", "unmap_frequency", "do unmap for every N", | ||
::cxxopts::value< uint64_t >()->default_value("100"), "unmap_frequency")) | ||
|
||
|
@@ -2758,6 +2805,9 @@ int main(int argc, char* argv[]) { | |
gcfg.app_mem_size_in_gb = SISL_OPTIONS["app_mem_size_in_gb"].as< uint32_t >(); | ||
gcfg.vol_copy_file_path = SISL_OPTIONS["vol_copy_file_path"].as< std::string >(); | ||
const auto io_size_in_kb = SISL_OPTIONS["io_size"].as< uint32_t >(); | ||
gcfg.p_zero_buffer = SISL_OPTIONS["p_zero_buffer"].as< uint32_t >(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as we discussed, would suggest to add a basic zero write/read test case:
Another basic test: This is to verify that new writes and/or overwrite with all zeros both works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good to me. I am adding it to the PR |
||
gcfg.zero_buffer_period = SISL_OPTIONS["zero_buffer_period"].as< uint32_t >(); | ||
gcfg.thin_provision_enable = SISL_OPTIONS["thin_provision_enable"].as< uint32_t >() != 0 ? true : false; | ||
gcfg.io_size = io_size_in_kb * 1024; | ||
|
||
HS_REL_ASSERT(io_size_in_kb && (io_size_in_kb % 4 == 0), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -171,7 +171,7 @@ Volume::Volume(const vol_params& params) : | |
throw std::runtime_error("shutdown in progress"); | ||
} | ||
m_sobject = m_hb->sobject_mgr()->create_object("volume", params.vol_name, | ||
std::bind(&Volume::get_status, this, std::placeholders::_1)); | ||
std::bind(&Volume::get_status, this, std::placeholders::_1)); | ||
|
||
m_state = vol_state::UNINITED; | ||
} | ||
|
@@ -190,7 +190,7 @@ Volume::Volume(meta_blk* mblk_cookie, sisl::byte_view sb_buf) : | |
HS_REL_ASSERT_EQ(sb->magic, vol_sb_magic, "magic mismatch"); | ||
m_hb = HomeBlks::safe_instance(); | ||
m_sobject = m_hb->sobject_mgr()->create_object("volume", sb->vol_name, | ||
std::bind(&Volume::get_status, this, std::placeholders::_1)); | ||
std::bind(&Volume::get_status, this, std::placeholders::_1)); | ||
} | ||
|
||
void Volume::init() { | ||
|
@@ -335,7 +335,114 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info) | |
return static_cast< indx_tbl* >(tbl); | ||
} | ||
|
||
static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see any place using this api as well as copmute_range_intervals, can we remove them unless there is a plan to use them in the near future? |
||
std::vector< std::pair< int, int > > result; | ||
|
||
int start = -1; | ||
for (std::size_t i = 0; i < empty_blocks.size(); ++i) { | ||
if (empty_blocks[i]) { | ||
if (start == -1) { start = i; } | ||
} else { | ||
if (start != -1) { | ||
result.emplace_back(start, i - start); | ||
start = -1; | ||
} | ||
} | ||
} | ||
if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); } | ||
return result; | ||
} | ||
|
||
static bool is_buf_zero(const uint8_t* buf, size_t size) { | ||
// TODO: subsample the buffer to detect zero request instead of working on the whole buffer to achieve constant | ||
// processing time for large buffer size requests. Needs to investigate the performance impact of this change | ||
// in end2end testing. | ||
static std::vector< uint8_t > read_buf(size, 0); | ||
static const auto zero_crc = crc16_t10dif(init_crc_16, read_buf.data(), size); | ||
const auto crc = crc16_t10dif(init_crc_16, buf, size); | ||
return (crc == zero_crc) ? (buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1)) : false; | ||
} | ||
|
||
static bool is_iovec_zero(const std::vector< iovec >& iovecs) { | ||
for (const auto& iovec : iovecs) { | ||
auto data = static_cast< uint8_t* >(iovec.iov_base); | ||
const size_t size = iovec.iov_len; | ||
if (!is_buf_zero(data, size)) { return false; } | ||
} | ||
return true; | ||
} | ||
|
||
static bool is_zero_request(const vol_interface_req_ptr& iface_req, uint32_t page_size) { | ||
if (iface_req->iovecs.empty()) { | ||
return is_buf_zero(static_cast< uint8_t* >(iface_req->buffer), iface_req->nlbas * page_size); | ||
} | ||
return is_iovec_zero(iface_req->iovecs); | ||
} | ||
|
||
#if 0 | ||
// TODO: use these functions for near future optimization of write path for thin provisioning volumes to enable skipping | ||
// writing empty blocks in subrange intervals for requested buffer instead of detecting the all-zero-buffer requests. | ||
static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for later extension on subranges |
||
uint32_t nlbas, bool empty_blocks = false) { | ||
std::vector< std::pair< int, int > > intervals; | ||
bool in_empty_region = false; | ||
int current_range_start = -1; | ||
int current_range_length = 1; | ||
for (uint32_t i = 0; i < nlbas; i++) { | ||
const uint8_t* page_start = buf + (i * page_size); | ||
bool is_page_empty = (empty_blocks == is_buf_zero(page_start, page_size)); | ||
if (is_page_empty) { | ||
if (!in_empty_region) { | ||
current_range_start = i; | ||
current_range_length = 1; | ||
in_empty_region = true; | ||
} else { | ||
current_range_length++; | ||
} | ||
} else { | ||
if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } | ||
in_empty_region = false; | ||
} | ||
} | ||
if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } | ||
return intervals; | ||
} | ||
|
||
static std::string print_ranges(lba_t start_lba, const std::vector< std::pair< int, int > >& intervals) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one also can be removed if not going to be used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you put these three APIs that are currently not used together and make an #if 0 around them, so that it would be easier for others to read and understand this thin-provisioning code? Also put comment that these apis are for future usage if we support paritial zero detection. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string { | ||
std::vector< std::string > result_strings; | ||
std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings), | ||
[start_lba](const std::pair< int, int >& p) -> std::string { | ||
// Use a static buffer to hold the formatted string | ||
static char buffer[32]; | ||
std::snprintf(buffer, sizeof(buffer), "<%ld,%d>", p.first + start_lba, p.second); | ||
return buffer; | ||
}); | ||
return std::accumulate(result_strings.begin(), result_strings.end(), std::string("")); | ||
}; | ||
return intervals_to_string(intervals); | ||
} | ||
#endif | ||
|
||
std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { | ||
std::error_condition ret{no_error}; | ||
if (!HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning)) { | ||
return write_internal(iface_req); | ||
} else { | ||
bool zeroRequest = is_zero_request(iface_req, get_page_size()); | ||
if (zeroRequest) { | ||
THIS_VOL_LOG(TRACE, volume, iface_req, "zero request <{}, {}>", iface_req->lba, iface_req->nlbas); | ||
iface_req->op_type = Op_type::UNMAP; | ||
ret = unmap(iface_req); | ||
} else { | ||
ret = write_internal(iface_req); | ||
} | ||
} | ||
iface_req->op_type = Op_type::WRITE; | ||
return ret; | ||
} | ||
|
||
std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { | ||
static thread_local std::vector< BlkId > bid{}; | ||
std::error_condition ret{no_error}; | ||
|
||
|
@@ -924,11 +1031,11 @@ sisl::status_response Volume::get_status(const sisl::status_request& request) { | |
auto active_indx_json = get_active_indx()->sobject()->run_callback(request).json; | ||
if (!active_indx_json.empty()) { response.json["index"] = active_indx_json; } | ||
|
||
response.json["name"] = sobject()->name(); | ||
response.json["name"] = sobject()->name(); | ||
response.json["type"] = sobject()->type(); | ||
response.json["uuid"] = boost::lexical_cast< std::string >(get_uuid()); | ||
response.json["state"] = is_offline() ? "Offline" : "Online"; | ||
response.json["size"]= get_size(); | ||
response.json["size"] = get_size(); | ||
return response; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
verbose (line 19)