Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ThinProvision: skip all zero buffers #318

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/.clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ AlignOperands: false
AlignTrailingComments: true
AllowShortBlocksOnASingleLine: true
AllowShortIfStatementsOnASingleLine: true
AllowShortBlocksOnASingleLine: true
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

verbose (line 19)

AllowShortCaseLabelsOnASingleLine: false
# AllowShortFunctionsOnASingleLine: InlineOnly
# AllowShortLoopsOnASingleLine: false
Expand Down
2 changes: 1 addition & 1 deletion src/engine/blkalloc/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ struct BlkId {
bool operator==(const BlkId& other) noexcept { return (compare(*this, other) == 0); }

void invalidate() { set(blk_num_t{0}, blk_count_t{0}, s_chunk_num_mask); }

// return invalid_blk_id() { return blk_count_t{0}; }
[[nodiscard]] bool is_valid() const { return (m_chunk_num != s_chunk_num_mask); }

[[nodiscard]] BlkId get_blkid_at(const uint32_t offset, const uint32_t pagesz) const {
Expand Down
2 changes: 2 additions & 0 deletions src/engine/common/homestore_config.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ table Generic {
// percentage of cache used to create indx mempool. It should be more than 100 to
// take into account some floating buffers in writeback cache.
indx_mempool_percent : uint32 = 110;

boot_thin_provisioning: bool = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the deployment plan to enable this feature in production if this is defaulted to false?

}

table ResourceLimits {
Expand Down
3 changes: 3 additions & 0 deletions src/homeblks/homeblks_config.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ table GeneralConfig {
// These fields should only be changed by agent through workflow
boot_restricted_mode: bool = false;
boot_safe_mode: bool = false;

// This field is for enabling thin provisioing on booting
boot_thin_provisioning: bool = true;
}

table HomeBlksSettings {
Expand Down
54 changes: 52 additions & 2 deletions src/homeblks/volume/tests/vol_gtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ struct TestCfg {
uint32_t p_vol_files_space;
std::string flip_name;
std::string vol_copy_file_path;
uint32_t p_zero_buffer;
uint32_t zero_buffer_period;
bool thin_provision_enable{false};

bool verify_csum() { return verify_type == verify_type_t::csum; }
bool verify_data() { return verify_type == verify_type_t::data; }
Expand Down Expand Up @@ -620,12 +623,20 @@ class VolTest : public ::testing::Test {
// vol_create_del_test = false;
// move_verify_to_done = false;
print_startTime = Clock::now();
if (tcfg.thin_provision_enable) {
HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; });
HS_SETTINGS_FACTORY().save();
}

// outstanding_ios = 0;
}

virtual ~VolTest() override {
if (init_buf) { iomanager.iobuf_free(static_cast< uint8_t* >(init_buf)); }
if (tcfg.thin_provision_enable) {
HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; });
HS_SETTINGS_FACTORY().save();
}
}

VolTest(const VolTest&) = delete;
Expand Down Expand Up @@ -1815,13 +1826,18 @@ class IOTestJob : public TestJob {

const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)};
const uint64_t size{nlbas * page_size};
static std::atomic< uint32_t > remaining_period{tcfg.zero_buffer_period};
uint32_t zero_counts_per_period = tcfg.p_zero_buffer * tcfg.zero_buffer_period / 100;
boost::intrusive_ptr< io_req_t > vreq{};
if (tcfg.write_cache) {
uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)};
HS_REL_ASSERT_NOTNULL(wbuf);

populate_buf(wbuf, size, lba, vinfo.get());

if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) &&
remaining_period.fetch_sub(1) < zero_counts_per_period) {
populate_zero_buf(wbuf, size);
}
vreq = boost::intrusive_ptr< io_req_t >(
new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache));
} else {
Expand All @@ -1833,20 +1849,32 @@ class IOTestJob : public TestJob {
HS_REL_ASSERT_NOTNULL(wbuf);
iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)};
iovecs.emplace_back(std::move(iov));

populate_buf(wbuf, page_size, lba + lba_num, vinfo.get());
}
if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) &&
remaining_period.fetch_sub(1) < zero_counts_per_period) {
for (const auto& iovec : iovecs) {
auto data = static_cast< uint8_t* >(iovec.iov_base);
const size_t size = iovec.iov_len;
populate_zero_buf(data, size);
}
}

vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba,
nlbas, tcfg.verify_csum(), tcfg.write_cache));
} else {
uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)};
populate_buf(wbuf, size, lba, vinfo.get());
if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) &&
remaining_period.fetch_sub(1) < zero_counts_per_period) {
populate_zero_buf(wbuf, size);
}
HS_REL_ASSERT_NOTNULL(wbuf);

vreq = boost::intrusive_ptr< io_req_t >{
new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)};
}
if (remaining_period.load() == 0) { remaining_period.store(tcfg.zero_buffer_period); }
send_iovec = !send_iovec;
}
vreq->cookie = static_cast< void* >(this);
Expand Down Expand Up @@ -1880,6 +1908,8 @@ class IOTestJob : public TestJob {
}
}

void populate_zero_buf(uint8_t* buf, const uint64_t size) { std::fill_n(buf, size, 0); }

bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) {
const auto vinfo{m_voltest->m_vol_info[cur]};
const auto vol{vinfo->vol};
Expand Down Expand Up @@ -2224,6 +2254,16 @@ TEST_F(VolTest, init_io_test) {
if (tcfg.remove_file_on_shutdown) { this->remove_files(); }
}

TEST_F(VolTest, thin_provisioning_test) {
tcfg.precreate_volume = false;
this->start_homestore();

auto cdjob{std::make_unique< VolCreateDeleteJob >(this)};
this->start_job(cdjob.get(), wait_type::for_completion);
this->shutdown();
if (tcfg.remove_file_on_shutdown) { this->remove_files(); }
}

/*!
@test recovery_io_test
@brief Tests which does recovery. End up with a clean shutdown
Expand Down Expand Up @@ -2682,6 +2722,13 @@ SISL_OPTION_GROUP(
(io_size, "", "io_size", "io size in KB", ::cxxopts::value< uint32_t >()->default_value("4"), "io_size"),
(vol_copy_file_path, "", "vol_copy_file_path", "file path for copied volume",
::cxxopts::value< std::string >()->default_value(""), "path [...]"),
(p_zero_buffer, "", "p_zero_buffer",
"percentage of zero buffer occurrence for testing thin provisioning within period",
::cxxopts::value< uint32_t >()->default_value("70"), "0 to 100"),
(zero_buffer_period, "", "zero_buffer_period", " the period of consecutive zero buffer occurrence",
::cxxopts::value< uint32_t >()->default_value("100"), "0 to 100"),
(thin_provision_enable, "", "thin_provision_enable", " enable thin provisioning",
::cxxopts::value< uint32_t >()->default_value("0"), "flag"),
(unmap_frequency, "", "unmap_frequency", "do unmap for every N",
::cxxopts::value< uint64_t >()->default_value("100"), "unmap_frequency"))

Expand Down Expand Up @@ -2758,6 +2805,9 @@ int main(int argc, char* argv[]) {
gcfg.app_mem_size_in_gb = SISL_OPTIONS["app_mem_size_in_gb"].as< uint32_t >();
gcfg.vol_copy_file_path = SISL_OPTIONS["vol_copy_file_path"].as< std::string >();
const auto io_size_in_kb = SISL_OPTIONS["io_size"].as< uint32_t >();
gcfg.p_zero_buffer = SISL_OPTIONS["p_zero_buffer"].as< uint32_t >();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as we discussed, would suggest to add a basic zero write/read test case:

  1. populate real data from LBA [0, 2MB]
  2. write all zeros to [128K, 128K + 32K]
  3. read LBA from [128K, 128K + 32K], verify all zeros are returned.

Another basic test:
Similar as above one, only skip the 1st step, e.g. write zeros to LBAs that has never been written before.

This is to verify that new writes and/or overwrite with all zeros both works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me. I am adding it to the PR

gcfg.zero_buffer_period = SISL_OPTIONS["zero_buffer_period"].as< uint32_t >();
gcfg.thin_provision_enable = SISL_OPTIONS["thin_provision_enable"].as< uint32_t >() != 0 ? true : false;
gcfg.io_size = io_size_in_kb * 1024;

HS_REL_ASSERT(io_size_in_kb && (io_size_in_kb % 4 == 0),
Expand Down
115 changes: 111 additions & 4 deletions src/homeblks/volume/volume.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ Volume::Volume(const vol_params& params) :
throw std::runtime_error("shutdown in progress");
}
m_sobject = m_hb->sobject_mgr()->create_object("volume", params.vol_name,
std::bind(&Volume::get_status, this, std::placeholders::_1));
std::bind(&Volume::get_status, this, std::placeholders::_1));

m_state = vol_state::UNINITED;
}
Expand All @@ -190,7 +190,7 @@ Volume::Volume(meta_blk* mblk_cookie, sisl::byte_view sb_buf) :
HS_REL_ASSERT_EQ(sb->magic, vol_sb_magic, "magic mismatch");
m_hb = HomeBlks::safe_instance();
m_sobject = m_hb->sobject_mgr()->create_object("volume", sb->vol_name,
std::bind(&Volume::get_status, this, std::placeholders::_1));
std::bind(&Volume::get_status, this, std::placeholders::_1));
}

void Volume::init() {
Expand Down Expand Up @@ -335,7 +335,114 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info)
return static_cast< indx_tbl* >(tbl);
}

static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any place using this api as well as copmute_range_intervals, can we remove them unless there is a plan to use them in the near future?

std::vector< std::pair< int, int > > result;

int start = -1;
for (std::size_t i = 0; i < empty_blocks.size(); ++i) {
if (empty_blocks[i]) {
if (start == -1) { start = i; }
} else {
if (start != -1) {
result.emplace_back(start, i - start);
start = -1;
}
}
}
if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); }
return result;
}

static bool is_buf_zero(const uint8_t* buf, size_t size) {
// TODO: subsample the buffer to detect zero request instead of working on the whole buffer to achieve constant
// processing time for large buffer size requests. Needs to investigate the performance impact of this change
// in end2end testing.
static std::vector< uint8_t > read_buf(size, 0);
static const auto zero_crc = crc16_t10dif(init_crc_16, read_buf.data(), size);
const auto crc = crc16_t10dif(init_crc_16, buf, size);
return (crc == zero_crc) ? (buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1)) : false;
}

static bool is_iovec_zero(const std::vector< iovec >& iovecs) {
for (const auto& iovec : iovecs) {
auto data = static_cast< uint8_t* >(iovec.iov_base);
const size_t size = iovec.iov_len;
if (!is_buf_zero(data, size)) { return false; }
}
return true;
}

static bool is_zero_request(const vol_interface_req_ptr& iface_req, uint32_t page_size) {
if (iface_req->iovecs.empty()) {
return is_buf_zero(static_cast< uint8_t* >(iface_req->buffer), iface_req->nlbas * page_size);
}
return is_iovec_zero(iface_req->iovecs);
}

#if 0
// TODO: use these functions for near future optimization of write path for thin provisioning volumes to enable skipping
// writing empty blocks in subrange intervals for requested buffer instead of detecting the all-zero-buffer requests.
static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for later extension on subranges

uint32_t nlbas, bool empty_blocks = false) {
std::vector< std::pair< int, int > > intervals;
bool in_empty_region = false;
int current_range_start = -1;
int current_range_length = 1;
for (uint32_t i = 0; i < nlbas; i++) {
const uint8_t* page_start = buf + (i * page_size);
bool is_page_empty = (empty_blocks == is_buf_zero(page_start, page_size));
if (is_page_empty) {
if (!in_empty_region) {
current_range_start = i;
current_range_length = 1;
in_empty_region = true;
} else {
current_range_length++;
}
} else {
if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); }
in_empty_region = false;
}
}
if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); }
return intervals;
}

static std::string print_ranges(lba_t start_lba, const std::vector< std::pair< int, int > >& intervals) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one also can be removed if not going to be used

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put these three APIs that are currently not used together and make an #if 0 around them, so that it would be easier for others to read and understand this thin-provisioning code? Also put comment that these apis are for future usage if we support paritial zero detection.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string {
std::vector< std::string > result_strings;
std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings),
[start_lba](const std::pair< int, int >& p) -> std::string {
// Use a static buffer to hold the formatted string
static char buffer[32];
std::snprintf(buffer, sizeof(buffer), "<%ld,%d>", p.first + start_lba, p.second);
return buffer;
});
return std::accumulate(result_strings.begin(), result_strings.end(), std::string(""));
};
return intervals_to_string(intervals);
}
#endif

std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) {
std::error_condition ret{no_error};
if (!HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning)) {
return write_internal(iface_req);
} else {
bool zeroRequest = is_zero_request(iface_req, get_page_size());
if (zeroRequest) {
THIS_VOL_LOG(TRACE, volume, iface_req, "zero request <{}, {}>", iface_req->lba, iface_req->nlbas);
iface_req->op_type = Op_type::UNMAP;
ret = unmap(iface_req);
} else {
ret = write_internal(iface_req);
}
}
iface_req->op_type = Op_type::WRITE;
return ret;
}

std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) {
static thread_local std::vector< BlkId > bid{};
std::error_condition ret{no_error};

Expand Down Expand Up @@ -924,11 +1031,11 @@ sisl::status_response Volume::get_status(const sisl::status_request& request) {
auto active_indx_json = get_active_indx()->sobject()->run_callback(request).json;
if (!active_indx_json.empty()) { response.json["index"] = active_indx_json; }

response.json["name"] = sobject()->name();
response.json["name"] = sobject()->name();
response.json["type"] = sobject()->type();
response.json["uuid"] = boost::lexical_cast< std::string >(get_uuid());
response.json["state"] = is_offline() ? "Offline" : "Online";
response.json["size"]= get_size();
response.json["size"] = get_size();
return response;
}

Expand Down
9 changes: 8 additions & 1 deletion src/homeblks/volume/volume.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,13 @@ class Volume : public std::enable_shared_from_this< Volume > {
* @return :- no_error if there is no error. It doesn't throw any exception
*/
std::error_condition write(const vol_interface_req_ptr& hb_req);
std::error_condition write_internal(const vol_interface_req_ptr& hb_req);

/* Write to lba
* @param hb_req :- it expects this request to be created
* @return :- no_error if there is no error. It doesn't throw any exception
*/
std::error_condition write_internal(const vol_interface_req_ptr& hb_req);

/* Read from lba
* @param hb_req :- it expects this request to be created
Expand Down Expand Up @@ -729,7 +736,7 @@ struct volume_req : indx_req {
csum_t* j_csum = (csum_t*)mem;

if (!is_unmap() && active_nlbas_written != nlbas()) {
VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write{}",
VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write: {}",
active_nlbas_written, nlbas());
}
for (lba_count_t i{0}; !is_unmap() && i < active_nlbas_written; ++i) {
Expand Down
Loading