Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adopt device type #167

Merged
merged 11 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class HomeObjectConan(ConanFile):
name = "homeobject"
version = "1.0.16"
version = "1.1.1"
homepage = "https://github.com/eBay/HomeObject"
description = "Blob Store built on HomeReplication"
topics = ("ebay")
Expand Down Expand Up @@ -40,8 +40,8 @@ def build_requirements(self):
self.build_requires("gtest/1.14.0")

def requirements(self):
self.requires("homestore/[>=6.2, include_prerelease=True]@oss/master")
self.requires("sisl/[>=12.1, include_prerelease=True]@oss/master")
self.requires("homestore/[^6.2, include_prerelease=True]@oss/master")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: if HS made some api change that will break HO build, previously we only bump minor version, this stays true still, right? e.g. we don't need to bump major, just bump minor for HS.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure...previously we bumped the major version. We discussed in HS meeting but I am not sure a decision has been made.

If that is the case, we need to pin to [^6.2.0]

self.requires("sisl/[^12.1, include_prerelease=True]@oss/master")
self.requires("lz4/1.9.4", override=True)

def validate(self):
Expand Down
10 changes: 9 additions & 1 deletion src/include/homeobject/homeobject.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,28 @@
#include <string>

#include "common.hpp"
#include <sisl/utility/enum.hpp>

namespace homeobject {

class BlobManager;
class PGManager;
class ShardManager;
ENUM(DevType, uint8_t, AUTO_DETECT = 1, HDD, NVME, UNSUPPORTED);
struct device_info_t {
explicit device_info_t(std::string name, DevType dtype = DevType::AUTO_DETECT) : path{std::filesystem::canonical(name)}, type{dtype} {}
bool operator ==(device_info_t const& rhs) const { return path == rhs.path && type == rhs.type; }
std::filesystem::path path;
DevType type;
};

class HomeObjectApplication {
public:
virtual ~HomeObjectApplication() = default;

virtual bool spdk_mode() const = 0;
virtual uint32_t threads() const = 0;
virtual std::list< std::filesystem::path > devices() const = 0;
virtual std::list< device_info_t > devices() const = 0;

// Callback made after determining if a SvcId exists or not during initialization, will consume response
virtual peer_id_t discover_svcid(std::optional< peer_id_t > const& found) const = 0;
Expand Down
73 changes: 60 additions & 13 deletions src/lib/homestore_backend/hs_homeobject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ class HSReplApplication : public homestore::ReplApplication {
//
// This should assert if we can not initialize HomeStore.
//
DevType HSHomeObject::get_device_type(string const& devname) {
const iomgr::drive_type dtype = iomgr::DriveInterface::get_drive_type(devname);
if (dtype == iomgr::drive_type::block_hdd || dtype == iomgr::drive_type::file_on_hdd) { return DevType::HDD; }
if (dtype == iomgr::drive_type::file_on_nvme || dtype == iomgr::drive_type::block_nvme) { return DevType::NVME; }
return DevType::UNSUPPORTED;
}

void HSHomeObject::init_homestore() {
auto app = _application.lock();
RELEASE_ASSERT(app, "HomeObjectApplication lifetime unexpected!");
Expand All @@ -115,9 +122,27 @@ void HSHomeObject::init_homestore() {
LOGI("Initialize and start HomeStore with app_mem_size = {}", homestore::in_bytes(app_mem_size));

std::vector< homestore::dev_info > device_info;
for (auto const& path : app->devices()) {
device_info.emplace_back(std::filesystem::canonical(path).string(), homestore::HSDevType::Data);
bool has_data_dev = false;
bool has_fast_dev = false;
for (auto const& dev : app->devices()) {
auto input_dev_type = dev.type;
yamingk marked this conversation as resolved.
Show resolved Hide resolved
auto detected_type = get_device_type(dev.path.string());
LOGD("Device {} detected as {}", dev.path.string(), detected_type);
auto final_type = (dev.type == DevType::AUTO_DETECT) ? detected_type : input_dev_type;
if (final_type == DevType::UNSUPPORTED) {
LOGW("Device {} is not supported, skipping", dev.path.string());
continue;
}
if (input_dev_type != DevType::AUTO_DETECT && detected_type != final_type) {
LOGW("Device {} detected as {}, but input type is {}, using input type", dev.path.string(), detected_type,
input_dev_type);
}
auto hs_type = (final_type == DevType::HDD) ? homestore::HSDevType::Data : homestore::HSDevType::Fast;
if (hs_type == homestore::HSDevType::Data) { has_data_dev = true; }
if (hs_type == homestore::HSDevType::Fast) { has_fast_dev = true; }
device_info.emplace_back(std::filesystem::canonical(dev.path).string(), hs_type);
}
RELEASE_ASSERT(device_info.size() != 0, "No supported devices found!");

xiaoxichen marked this conversation as resolved.
Show resolved Hide resolved
chunk_selector_ = std::make_shared< HeapChunkSelector >();
using namespace homestore;
Expand All @@ -134,17 +159,39 @@ void HSHomeObject::init_homestore() {
RELEASE_ASSERT(!_our_id.is_nil(), "Received no SvcId and need FORMAT!");
LOGW("We are starting for the first time on [{}], Formatting!!", to_string(_our_id));

HomeStore::instance()->format_and_start({
{HS_SERVICE::META, hs_format_params{.size_pct = 5.0}},
{HS_SERVICE::LOG, hs_format_params{.size_pct = 10.0, .chunk_size = 32 * Mi}},
{HS_SERVICE::REPLICATION,
hs_format_params{.size_pct = 79.0,
.num_chunks = 65000,
.block_size = _data_block_size,
.alloc_type = blk_allocator_type_t::append,
.chunk_sel_type = chunk_selector_type_t::CUSTOM}},
{HS_SERVICE::INDEX, hs_format_params{.size_pct = 5.0}},
});
if (has_data_dev && has_fast_dev) {
// Hybrid mode
LOGD("Has both Data and Fast, running with Hybrid mode");
HomeStore::instance()->format_and_start({
{HS_SERVICE::META, hs_format_params{.dev_type = HSDevType::Fast, .size_pct = 9.0, .num_chunks = 64}},
{HS_SERVICE::LOG,
hs_format_params{.dev_type = HSDevType::Fast, .size_pct = 45.0, .chunk_size = 32 * Mi}},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand if chunk_size is specified, HS starts from 0 num_chunks, but how does it know how many chunks has been created in total, and what is the available number chunks this LOG service can use, is there a place we assert that the total number chunks created is less than 64K (this is that FIXME you've put in creaet_vdev, right?).

I did some calculation, say one NVME drive is 500GB, 45% is around 200GB and it will require 6400 number of chunks with 32MB chunk size. 6400 + 64 + 128 + 65000 will exceed 64K, right? Please correct me if I missed something here.

Copy link
Collaborator Author

@xiaoxichen xiaoxichen Apr 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes this is very tricky part. The log device go with chunk_size and it creates chunk dynamically, so worth case it could create FAST_SIZE * PCT / CHUNK_SIZE . But we dont know have the intelligent to distribute #chunks across service, that is part of the fixme.

For the configuration now I tend to believe the behavior will be only 344 chunks is availalbe for log, bounding max logstore size to 344*32MB = 11G.

I will reduce the 65000 to 60000 for now. Other enhancement can be taken care later.

Copy link
Contributor

@yamingk yamingk Apr 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the ideal solution (with your change, in homestore) seems to be, log device should use that chunk_size, and num_chunks should not exceeding the total 64K, regardless of the pct set for this log service. This will result in losing some space not being used, better than creating chunk numbers that will overflow the chunk number and cause correctness issues.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes I think we need to change the log vdev , when creating a max_num_chunks parameterscan be provided and logstore need to stay within the limit to avoid create_chunk failure, also ensure flushing/truncating the logdev properly based on the max_num_chunks. Logstore can take this chance to decide whether it want to adjust the chunk_size.

There will not be correctness issue as in pdev::create_chunk , it will throw std::out_of_range but that probably crash the process. https://github.com/eBay/HomeStore/blob/2c500c573fa2c5733218e0cbdd44226fb3e6504f/src/lib/device/physical_dev.cpp#L275

{HS_SERVICE::INDEX, hs_format_params{.dev_type = HSDevType::Fast, .size_pct = 45.0, .num_chunks = 128}},
{HS_SERVICE::REPLICATION,
hs_format_params{.dev_type = HSDevType::Data,
.size_pct = 99.0,
.num_chunks = 65000,
Copy link
Contributor

@yamingk yamingk Apr 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't num_chunks from NVME drives + num_chunks from HDD equal to 65000?

Is it because maximum is 64K which is 65536, and we leave the 536 for NVME chunks?

Copy link
Collaborator Author

@xiaoxichen xiaoxichen Apr 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that is the idea, as said, lower it to 60000 to give more chunks to nvme

.block_size = _data_block_size,
.alloc_type = blk_allocator_type_t::append,
.chunk_sel_type = chunk_selector_type_t::CUSTOM}},
});
} else {
auto run_on_type = has_fast_dev ? homestore::HSDevType::Fast : homestore::HSDevType::Data;
LOGD("Running with Single mode, all service on {}", run_on_type);
HomeStore::instance()->format_and_start({
// FIXME: this is to work around the issue in HS that varsize allocator doesnt work with small chunk size.
{HS_SERVICE::META, hs_format_params{.dev_type = run_on_type, .size_pct = 5.0, .num_chunks = 1}},
{HS_SERVICE::LOG, hs_format_params{.dev_type = run_on_type, .size_pct = 10.0, .chunk_size = 32 * Mi}},
{HS_SERVICE::INDEX, hs_format_params{.dev_type = run_on_type, .size_pct = 5.0, .num_chunks = 1}},
{HS_SERVICE::REPLICATION,
hs_format_params{.dev_type = run_on_type,
.size_pct = 79.0,
.num_chunks = 65000,
Copy link
Contributor

@yamingk yamingk Apr 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should also be adjusted, right? 10 pct of total device would be still very large and exceed 64K in total?

Also can we be more conservative say setting this to 40000, before the fixme part is done? I am not sure what will be the total disk size in production, we probably need some careful calculation.

For mixed mode, I remember hearing something from John D saying we would have around 900GB of nvme per SM, and it would result in around 12800 num_chunks for logstore with 32MB chunk_size.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, lets postpone this calculation and will get it from testing on real environment.

.block_size = _data_block_size,
.alloc_type = blk_allocator_type_t::append,
.chunk_sel_type = chunk_selector_type_t::CUSTOM}},
});
}

// Create a superblock that contains our SvcId
auto svc_sb = homestore::superblk< svc_info_superblk_t >(_svc_meta_name);
Expand Down
3 changes: 3 additions & 0 deletions src/lib/homestore_backend/hs_homeobject.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ class HSHomeObject : public HomeObjectImpl {

void persist_pg_sb();

// helpers
DevType get_device_type(string const& devname);

public:
using HomeObjectImpl::HomeObjectImpl;
HSHomeObject();
Expand Down
2 changes: 1 addition & 1 deletion src/lib/homestore_backend/tests/homeobj_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class HomeObjectFixture : public ::testing::Test {
std::default_random_engine rnd_engine{rnd()};

void SetUp() override {
app = std::make_shared< FixtureApp >();
app = std::make_shared< FixtureApp >(true /* is_hybrid */);
_obj_inst = homeobject::init_homeobject(std::weak_ptr< homeobject::HomeObjectApplication >(app));
}

Expand Down
14 changes: 10 additions & 4 deletions src/lib/tests/fixture_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,17 @@ SISL_LOGGING_INIT(HOMEOBJECT_LOG_MODS)

SISL_OPTIONS_ENABLE(logging, homeobject, test_home_object)

FixtureApp::FixtureApp() {
FixtureApp::FixtureApp(bool is_hybrid) : is_hybrid_(is_hybrid) {
clean();
LOGWARN("creating device {} file with size {} ", path_, 10 * Gi);
std::ofstream ofs{path_, std::ios::binary | std::ios::out | std::ios::trunc};
std::filesystem::resize_file(path_, 10 * Gi);
LOGWARN("creating HDD device {} file with size {} ", path_hdd_, 10 * Gi);
std::ofstream ofs{path_hdd_, std::ios::binary | std::ios::out | std::ios::trunc};
std::filesystem::resize_file(path_hdd_, 10 * Gi);

if (is_hybrid_) {
LOGWARN("creating SSD device {} file with size {} ", path_ssd_, 10 * Gi);
std::ofstream ofs{path_ssd_, std::ios::binary | std::ios::out | std::ios::trunc};
std::filesystem::resize_file(path_ssd_, 10 * Gi);
}
}

homeobject::peer_id_t FixtureApp::discover_svcid(std::optional< homeobject::peer_id_t > const& p) const {
Expand Down
20 changes: 14 additions & 6 deletions src/lib/tests/fixture_app.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,30 @@ using homeobject::blob_id_t;
using homeobject::peer_id_t;

class FixtureApp : public homeobject::HomeObjectApplication {
std::string path_{"/tmp/homeobject_test.data"};
std::string path_hdd_{"/tmp/homeobject_test.hdd"};
std::string path_ssd_{"/tmp/homeobject_test.ssd"};
bool is_hybrid_{false};

public:
FixtureApp();
FixtureApp(bool is_hybrid=false);
~FixtureApp() = default;

bool spdk_mode() const override { return false; }
uint32_t threads() const override { return 2; }

void clean() {
if (std::filesystem::exists(path_)) std::filesystem::remove(path_);
if (std::filesystem::exists(path_hdd_)) std::filesystem::remove(path_hdd_);
if (std::filesystem::exists(path_ssd_)) std::filesystem::remove(path_ssd_);
}

std::list< std::filesystem::path > devices() const override {
auto device_info = std::list< std::filesystem::path >();
device_info.emplace_back(std::filesystem::canonical(path_));
std::list< homeobject::device_info_t > devices() const override {
auto device_info = std::list< homeobject::device_info_t >();
// add HDD
device_info.emplace_back(path_hdd_, homeobject::DevType::HDD);
if (is_hybrid_) {
// add SSD
device_info.emplace_back(path_ssd_, homeobject::DevType::NVME);
}
return device_info;
}

Expand Down
4 changes: 3 additions & 1 deletion test_package/test_package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ class TestApp : public homeobject::HomeObjectApplication {
public:
bool spdk_mode() const override { return false; }
uint32_t threads() const override { return 1; }
std::list< std::filesystem::path > devices() const override { return std::list< std::filesystem::path >(); }
std::list< homeobject::device_info_t > devices() const override {
return std::list< homeobject::device_info_t >();
}
homeobject::peer_id_t discover_svcid(std::optional< homeobject::peer_id_t > const& p) const override {
return boost::uuids::random_generator()();
}
Expand Down
Loading