From 276a2b97a695fa56b4764e12dacc1bced2e26341 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Tue, 26 Sep 2023 11:10:49 -0600 Subject: [PATCH 01/19] Start File based implementation. --- src/lib/CMakeLists.txt | 1 + src/lib/file/CMakeLists.txt | 38 ++++++++++++++++++++++++ src/lib/file/blob_manager.cpp | 50 +++++++++++++++++++++++++++++++ src/lib/file/homeobject.cpp | 30 +++++++++++++++++++ src/lib/file/homeobject.hpp | 54 ++++++++++++++++++++++++++++++++++ src/lib/file/shard_manager.cpp | 38 ++++++++++++++++++++++++ 6 files changed, 211 insertions(+) create mode 100644 src/lib/file/CMakeLists.txt create mode 100644 src/lib/file/blob_manager.cpp create mode 100644 src/lib/file/homeobject.cpp create mode 100644 src/lib/file/homeobject.hpp create mode 100644 src/lib/file/shard_manager.cpp diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 3c0ed3e0..3ff6697c 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -18,3 +18,4 @@ endif() add_subdirectory(homestore) add_subdirectory(memory) +add_subdirectory(file) diff --git a/src/lib/file/CMakeLists.txt b/src/lib/file/CMakeLists.txt new file mode 100644 index 00000000..57dcfef9 --- /dev/null +++ b/src/lib/file/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required (VERSION 3.11) + +add_library ("${PROJECT_NAME}_file") +target_sources("${PROJECT_NAME}_file" PRIVATE + homeobject.cpp + blob_manager.cpp + shard_manager.cpp + $ + ) +target_link_libraries("${PROJECT_NAME}_file" + ${COMMON_DEPS} + ) + +if(BUILD_TESTING) +add_executable (pg_file_test) +target_sources(pg_file_test PRIVATE $) +target_link_libraries(pg_file_test + homeobject_file + ${COMMON_TEST_DEPS} + ) +add_test(NAME PGFileTest COMMAND pg_file_test -csv error) + +add_executable (shard_file_test) +target_sources(shard_file_test PRIVATE $) +target_link_libraries(shard_file_test + homeobject_file + ${COMMON_TEST_DEPS} + ) +add_test(NAME ShardFileTest COMMAND shard_file_test -csv error) + +add_executable (blob_file_test) +target_sources(blob_file_test PRIVATE $) +target_link_libraries(blob_file_test + homeobject_file + ${COMMON_TEST_DEPS} + ) +add_test(NAME BlobFileTest COMMAND blob_file_test -csv error) +endif() diff --git a/src/lib/file/blob_manager.cpp b/src/lib/file/blob_manager.cpp new file mode 100644 index 00000000..bd85c4d7 --- /dev/null +++ b/src/lib/file/blob_manager.cpp @@ -0,0 +1,50 @@ +#include "homeobject.hpp" + +namespace homeobject { + +#define WITH_SHARD \ + auto index_it = index_.find(_shard.id); \ + RELEASE_ASSERT(index_.end() != index_it, "Missing BTree!!"); \ + auto& shard = *index_it->second; + +#define WITH_ROUTE(blob) \ + auto const route = BlobRoute{_shard.id, (blob)}; \ + LOGTRACEMOD(homeobject, "[route={}]", route); + +#define IF_BLOB_ALIVE \ + if (auto blob_it = shard.btree_.find(route); shard.btree_.end() == blob_it || !blob_it->second) { \ + LOGWARNMOD(homeobject, "[route={}] missing", route); \ + return folly::makeUnexpected(BlobError::UNKNOWN_BLOB); \ + } else + +// Write (move) Blob to new BlobExt on heap and Insert BlobExt to Index +BlobManager::Result< blob_id > FileHomeObject::_put_blob(ShardInfo const& _shard, Blob&& _blob) { + WITH_SHARD + WITH_ROUTE(shard.shard_seq_num_++) + + auto [_, happened] = + shard.btree_.try_emplace(route, BlobExt{.state_ = BlobState::ALIVE, .blob_ = new Blob(std::move(_blob))}); + RELEASE_ASSERT(happened, "Generated duplicate BlobRoute!"); + return route.blob; +} + +// Lookup BlobExt and duplicate underyling Blob for user; only *safe* because we defer GC. +BlobManager::Result< Blob > FileHomeObject::_get_blob(ShardInfo const& _shard, blob_id _blob) const { + WITH_SHARD + WITH_ROUTE(_blob) + IF_BLOB_ALIVE { return blob_it->second.blob_->clone(); } +} + +// Tombstone BlobExt entry +BlobManager::NullResult FileHomeObject::_del_blob(ShardInfo const& _shard, blob_id _blob) { + WITH_SHARD + WITH_ROUTE(_blob) + IF_BLOB_ALIVE { + auto del_blob = BlobExt(); + del_blob.blob_ = blob_it->second.blob_; + shard.btree_.assign_if_equal(route, blob_it->second, std::move(del_blob)); + return folly::Unit(); + } +} + +} // namespace homeobject diff --git a/src/lib/file/homeobject.cpp b/src/lib/file/homeobject.cpp new file mode 100644 index 00000000..e5da77ab --- /dev/null +++ b/src/lib/file/homeobject.cpp @@ -0,0 +1,30 @@ +#include "homeobject.hpp" + +#include + +namespace homeobject { + +/// NOTE: We give ourselves the option to provide a different HR instance here than libhomeobject.a +extern std::shared_ptr< HomeObject > init_homeobject(std::weak_ptr< HomeObjectApplication >&& application) { + auto instance = std::make_shared< FileHomeObject >(std::move(application)); + instance->init_repl_svc(); + return instance; +} + +void HomeObjectImpl::init_repl_svc() { + auto lg = std::scoped_lock(_repl_lock); + if (!_repl_svc) { + _our_id = boost::uuids::random_generator()(); + LOGINFOMOD(homeobject, "SvcId faked: {}", to_string(_our_id)); + _our_id = _application.lock()->discover_svcid(_our_id); + _repl_svc = home_replication::create_repl_service([](auto) { return nullptr; }); + } +} + +ShardIndex::~ShardIndex() { + for (auto it = btree_.begin(); it != btree_.end(); ++it) { + delete it->second.blob_; + } +} + +} // namespace homeobject diff --git a/src/lib/file/homeobject.hpp b/src/lib/file/homeobject.hpp new file mode 100644 index 00000000..905fcf61 --- /dev/null +++ b/src/lib/file/homeobject.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include + +#include +#include "mocks/repl_service.h" + +#include "lib/homeobject_impl.hpp" +#include "lib/blob_route.hpp" + +namespace homeobject { + +/// +// Used to TombStone Blob's in the Index to defer for GC. +ENUM(BlobState, uint8_t, ALIVE = 0, DELETED); + +struct BlobExt { + BlobState state_{BlobState::DELETED}; + Blob* blob_; + + explicit operator bool() const { return state_ == BlobState::ALIVE; } + bool operator==(const BlobExt& rhs) const { return blob_ == rhs.blob_; } +}; + +struct ShardIndex { + folly::ConcurrentHashMap< BlobRoute, BlobExt > btree_; + std::atomic< blob_id > shard_seq_num_{0ull}; + ~ShardIndex(); +}; + +class FileHomeObject : public HomeObjectImpl { + /// Simulates the Shard=>Chunk mapping in IndexSvc + using index_svc = folly::ConcurrentHashMap< shard_id, std::unique_ptr< ShardIndex > >; + index_svc index_; + /// + + /// Helpers + // ShardManager + ShardManager::Result< ShardInfo > _create_shard(pg_id, uint64_t size_bytes) override; + ShardManager::Result< ShardInfo > _seal_shard(shard_id) override; + + // BlobManager + BlobManager::Result< blob_id > _put_blob(ShardInfo const&, Blob&&) override; + BlobManager::Result< Blob > _get_blob(ShardInfo const&, blob_id) const override; + BlobManager::NullResult _del_blob(ShardInfo const&, blob_id) override; + /// + +public: + using HomeObjectImpl::HomeObjectImpl; + ~FileHomeObject() override = default; +}; + +} // namespace homeobject diff --git a/src/lib/file/shard_manager.cpp b/src/lib/file/shard_manager.cpp new file mode 100644 index 00000000..df59334c --- /dev/null +++ b/src/lib/file/shard_manager.cpp @@ -0,0 +1,38 @@ +#include + +#include "homeobject.hpp" + +namespace homeobject { + +uint64_t ShardManager::max_shard_size() { return Gi; } + +ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id pg_owner, uint64_t size_bytes) { + auto const now = get_current_timestamp(); + auto info = ShardInfo(0ull, pg_owner, ShardInfo::State::OPEN, now, now, size_bytes, size_bytes, 0); + { + auto lg = std::scoped_lock(_pg_lock, _shard_lock); + auto pg_it = _pg_map.find(pg_owner); + if (_pg_map.end() == pg_it) return folly::makeUnexpected(ShardError::UNKNOWN_PG); + + auto& s_list = pg_it->second.shards; + info.id = make_new_shard_id(pg_owner, s_list.size()); + auto iter = s_list.emplace(s_list.end(), Shard(info)); + LOGDEBUG("Creating Shard [{}]: in Pg [{}] of Size [{}b]", info.id & shard_mask, pg_owner, size_bytes); + auto [_, s_happened] = _shard_map.emplace(info.id, iter); + RELEASE_ASSERT(s_happened, "Duplicate Shard insertion!"); + } + auto [it, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); + RELEASE_ASSERT(happened, "Could not create BTree!"); + return info; +} + +ShardManager::Result< ShardInfo > FileHomeObject::_seal_shard(shard_id id) { + auto lg = std::scoped_lock(_shard_lock); + auto shard_it = _shard_map.find(id); + RELEASE_ASSERT(_shard_map.end() != shard_it, "Missing ShardIterator!"); + auto& shard_info = (*shard_it->second).info; + shard_info.state = ShardInfo::State::SEALED; + return shard_info; +} + +} // namespace homeobject From 3fb4869cd75e00c6afb8d8c24a6cabb0a7ccc864 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Tue, 26 Sep 2023 11:34:16 -0600 Subject: [PATCH 02/19] Create shard file --- src/lib/file/homeobject.hpp | 3 +++ src/lib/file/shard_manager.cpp | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/lib/file/homeobject.hpp b/src/lib/file/homeobject.hpp index 905fcf61..c8b1c763 100644 --- a/src/lib/file/homeobject.hpp +++ b/src/lib/file/homeobject.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -30,6 +31,8 @@ struct ShardIndex { }; class FileHomeObject : public HomeObjectImpl { + std::filesystem::path const file_store_ = "file_store"; + /// Simulates the Shard=>Chunk mapping in IndexSvc using index_svc = folly::ConcurrentHashMap< shard_id, std::unique_ptr< ShardIndex > >; index_svc index_; diff --git a/src/lib/file/shard_manager.cpp b/src/lib/file/shard_manager.cpp index df59334c..451b7557 100644 --- a/src/lib/file/shard_manager.cpp +++ b/src/lib/file/shard_manager.cpp @@ -1,11 +1,17 @@ #include +#include #include "homeobject.hpp" namespace homeobject { +using std::filesystem::path; + uint64_t ShardManager::max_shard_size() { return Gi; } +/// +// Each Shard is stored as a FILE on the system. We defer creating the "PG" (directory) until +// the first Shard is created ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id pg_owner, uint64_t size_bytes) { auto const now = get_current_timestamp(); auto info = ShardInfo(0ull, pg_owner, ShardInfo::State::OPEN, now, now, size_bytes, size_bytes, 0); @@ -21,11 +27,24 @@ ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id pg_owner, auto [_, s_happened] = _shard_map.emplace(info.id, iter); RELEASE_ASSERT(s_happened, "Duplicate Shard insertion!"); } - auto [it, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); + + auto const shard_path = file_store_ / path(fmt::format("{:04x}", (info.id >> homeobject::shard_width))); + RELEASE_ASSERT(std::filesystem::create_directories(shard_path), "Could not create directory: {}", + shard_path.string()); + + auto const shard_file = shard_path / path(fmt::format("{:012x}", (info.id & homeobject::shard_mask))); + RELEASE_ASSERT(!std::filesystem::exists(shard_file), "Shard Path Exists! [path={}]", shard_file.string()); + std::ofstream ofs{shard_path, std::ios::binary | std::ios::out | std::ios::trunc}; + std::filesystem::resize_file(shard_path, max_shard_size()); + RELEASE_ASSERT(std::filesystem::exists(shard_file), "Shard Path Failed Creation! [path={}]", shard_file.string()); + + auto [_, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); RELEASE_ASSERT(happened, "Could not create BTree!"); return info; } +/// +// Shard STATE is managed through the FILE stat (rw/ro) ShardManager::Result< ShardInfo > FileHomeObject::_seal_shard(shard_id id) { auto lg = std::scoped_lock(_shard_lock); auto shard_it = _shard_map.find(id); From dbe76b0dbb2446442e73b5269a1bd0afaec7d8ff Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Tue, 26 Sep 2023 14:02:03 -0600 Subject: [PATCH 03/19] Persistent HomeObject. --- src/lib/file/blob_manager.cpp | 62 ++++++++++++++++++++++++++++------ src/lib/file/homeobject.cpp | 9 ++--- src/lib/file/homeobject.hpp | 22 +++--------- src/lib/file/shard_manager.cpp | 26 +++++++++++--- src/lib/tests/fixture_app.hpp | 12 +++---- 5 files changed, 85 insertions(+), 46 deletions(-) diff --git a/src/lib/file/blob_manager.cpp b/src/lib/file/blob_manager.cpp index bd85c4d7..8a1ce5a3 100644 --- a/src/lib/file/blob_manager.cpp +++ b/src/lib/file/blob_manager.cpp @@ -2,6 +2,8 @@ namespace homeobject { +using std::filesystem::path; + #define WITH_SHARD \ auto index_it = index_.find(_shard.id); \ RELEASE_ASSERT(index_.end() != index_it, "Missing BTree!!"); \ @@ -17,32 +19,72 @@ namespace homeobject { return folly::makeUnexpected(BlobError::UNKNOWN_BLOB); \ } else -// Write (move) Blob to new BlobExt on heap and Insert BlobExt to Index +#define WITH_SHARD_FILE(mode) \ + auto const shard_file = file_store_ / path(fmt::format("{:04x}", (_shard.id >> homeobject::shard_width))) / \ + path(fmt::format("{:012x}", (_shard.id & homeobject::shard_mask))); \ + auto shard_fd = open(shard_file.string().c_str(), (mode)); \ + RELEASE_ASSERT(shard_fd >= 0, "Failed to open Shard {}", shard_file.string()); + +// Write (move) Blob to FILE BlobManager::Result< blob_id > FileHomeObject::_put_blob(ShardInfo const& _shard, Blob&& _blob) { WITH_SHARD - WITH_ROUTE(shard.shard_seq_num_++) - auto [_, happened] = - shard.btree_.try_emplace(route, BlobExt{.state_ = BlobState::ALIVE, .blob_ = new Blob(std::move(_blob))}); + nlohmann::json j; + j["user_key"] = _blob.user_key; + j["object_off"] = _blob.object_off; + j["body_size"] = _blob.body.size; + auto serialize = j.dump(); + auto const h_size = serialize.size(); + auto const t_size = sizeof(size_t) + h_size + _blob.body.size; + + WITH_ROUTE(shard.shard_offset_.fetch_add(t_size, std::memory_order_relaxed)) + WITH_SHARD_FILE(O_WRONLY) + + auto err = pwrite(shard_fd, &h_size, sizeof(h_size), route.blob); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); + err = err || pwrite(shard_fd, serialize.c_str(), h_size, sizeof(h_size) + route.blob); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); + err = err || pwrite(shard_fd, _blob.body.bytes, _blob.body.size, sizeof(h_size) + h_size + route.blob); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); + auto [_, happened] = shard.btree_.try_emplace(route, true); RELEASE_ASSERT(happened, "Generated duplicate BlobRoute!"); + close(shard_fd); return route.blob; } -// Lookup BlobExt and duplicate underyling Blob for user; only *safe* because we defer GC. +// Lookup and duplicate underyling Blob for user; only *safe* because we defer GC. BlobManager::Result< Blob > FileHomeObject::_get_blob(ShardInfo const& _shard, blob_id _blob) const { WITH_SHARD WITH_ROUTE(_blob) - IF_BLOB_ALIVE { return blob_it->second.blob_->clone(); } + IF_BLOB_ALIVE { + WITH_SHARD_FILE(O_RDONLY) + size_t h_size = 0ull; + auto err = pread(shard_fd, &h_size, sizeof(h_size), route.blob); + RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); + + auto j_str = std::string(h_size, '\0'); + err = pread(shard_fd, const_cast< char* >(j_str.c_str()), h_size, sizeof(h_size) + route.blob); + RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); + auto shard_json = nlohmann::json::parse(j_str); + + auto const body_size = shard_json["body_size"].get< uint64_t >(); + auto b = Blob{sisl::io_blob_safe(body_size), "", 0}; + err = pread(shard_fd, b.body.bytes, body_size, sizeof(h_size) + h_size + route.blob); + RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); + + b.user_key = shard_json["user_key"].get< std::string >(); + b.object_off = shard_json["object_off"].get< uint64_t >(); + close(shard_fd); + return b; + } } -// Tombstone BlobExt entry +// Tombstone entry BlobManager::NullResult FileHomeObject::_del_blob(ShardInfo const& _shard, blob_id _blob) { WITH_SHARD WITH_ROUTE(_blob) IF_BLOB_ALIVE { - auto del_blob = BlobExt(); - del_blob.blob_ = blob_it->second.blob_; - shard.btree_.assign_if_equal(route, blob_it->second, std::move(del_blob)); + shard.btree_.assign_if_equal(route, blob_it->second, false); return folly::Unit(); } } diff --git a/src/lib/file/homeobject.cpp b/src/lib/file/homeobject.cpp index e5da77ab..75ad682c 100644 --- a/src/lib/file/homeobject.cpp +++ b/src/lib/file/homeobject.cpp @@ -6,7 +6,8 @@ namespace homeobject { /// NOTE: We give ourselves the option to provide a different HR instance here than libhomeobject.a extern std::shared_ptr< HomeObject > init_homeobject(std::weak_ptr< HomeObjectApplication >&& application) { - auto instance = std::make_shared< FileHomeObject >(std::move(application)); + auto devices = application.lock()->devices(); + auto instance = std::make_shared< FileHomeObject >(std::move(application), *devices.begin()); instance->init_repl_svc(); return instance; } @@ -21,10 +22,4 @@ void HomeObjectImpl::init_repl_svc() { } } -ShardIndex::~ShardIndex() { - for (auto it = btree_.begin(); it != btree_.end(); ++it) { - delete it->second.blob_; - } -} - } // namespace homeobject diff --git a/src/lib/file/homeobject.hpp b/src/lib/file/homeobject.hpp index c8b1c763..05a404b9 100644 --- a/src/lib/file/homeobject.hpp +++ b/src/lib/file/homeobject.hpp @@ -12,26 +12,13 @@ namespace homeobject { -/// -// Used to TombStone Blob's in the Index to defer for GC. -ENUM(BlobState, uint8_t, ALIVE = 0, DELETED); - -struct BlobExt { - BlobState state_{BlobState::DELETED}; - Blob* blob_; - - explicit operator bool() const { return state_ == BlobState::ALIVE; } - bool operator==(const BlobExt& rhs) const { return blob_ == rhs.blob_; } -}; - struct ShardIndex { - folly::ConcurrentHashMap< BlobRoute, BlobExt > btree_; - std::atomic< blob_id > shard_seq_num_{0ull}; - ~ShardIndex(); + folly::ConcurrentHashMap< BlobRoute, bool > btree_; + std::atomic< blob_id > shard_offset_{0ull}; }; class FileHomeObject : public HomeObjectImpl { - std::filesystem::path const file_store_ = "file_store"; + std::filesystem::path const file_store_; /// Simulates the Shard=>Chunk mapping in IndexSvc using index_svc = folly::ConcurrentHashMap< shard_id, std::unique_ptr< ShardIndex > >; @@ -50,7 +37,8 @@ class FileHomeObject : public HomeObjectImpl { /// public: - using HomeObjectImpl::HomeObjectImpl; + FileHomeObject(std::weak_ptr< HomeObjectApplication >&& application, std::filesystem::path const& root) : + HomeObjectImpl::HomeObjectImpl(std::move(application)), file_store_(root) {} ~FileHomeObject() override = default; }; diff --git a/src/lib/file/shard_manager.cpp b/src/lib/file/shard_manager.cpp index 451b7557..efac351b 100644 --- a/src/lib/file/shard_manager.cpp +++ b/src/lib/file/shard_manager.cpp @@ -29,17 +29,33 @@ ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id pg_owner, } auto const shard_path = file_store_ / path(fmt::format("{:04x}", (info.id >> homeobject::shard_width))); - RELEASE_ASSERT(std::filesystem::create_directories(shard_path), "Could not create directory: {}", - shard_path.string()); + std::filesystem::create_directories(shard_path); auto const shard_file = shard_path / path(fmt::format("{:012x}", (info.id & homeobject::shard_mask))); RELEASE_ASSERT(!std::filesystem::exists(shard_file), "Shard Path Exists! [path={}]", shard_file.string()); - std::ofstream ofs{shard_path, std::ios::binary | std::ios::out | std::ios::trunc}; - std::filesystem::resize_file(shard_path, max_shard_size()); + std::ofstream ofs{shard_file, std::ios::binary | std::ios::out | std::ios::trunc}; + std::filesystem::resize_file(shard_file, max_shard_size()); RELEASE_ASSERT(std::filesystem::exists(shard_file), "Shard Path Failed Creation! [path={}]", shard_file.string()); - auto [_, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); + auto shard_fd = open(shard_file.string().c_str(), O_WRONLY); + RELEASE_ASSERT(shard_fd >= 0, "Failed to open Shard {}", shard_file.string()); + + nlohmann::json j; + j["shard_id"] = info.id; + j["pg_id"] = info.placement_group; + j["state"] = info.state; + j["created_time"] = info.created_time; + j["modified_time"] = info.last_modified_time; + j["total_capacity"] = info.total_capacity_bytes; + j["available_capacity"] = info.available_capacity_bytes; + j["deleted_capacity"] = info.deleted_capacity_bytes; + auto serialize = j.dump(); + auto err = pwrite(shard_fd, serialize.c_str(), serialize.size(), 0ull); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); + + auto [it, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); RELEASE_ASSERT(happened, "Could not create BTree!"); + it->second->shard_offset_.store(serialize.size()); return info; } diff --git a/src/lib/tests/fixture_app.hpp b/src/lib/tests/fixture_app.hpp index be10bd36..eceb91b1 100644 --- a/src/lib/tests/fixture_app.hpp +++ b/src/lib/tests/fixture_app.hpp @@ -30,8 +30,6 @@ class FixtureApp : public homeobject::HomeObjectApplication { FixtureApp() { clean(); LOGINFO("creating device {} file with size {} ", path_, homestore::in_bytes(2 * Gi)); - std::ofstream ofs{path_, std::ios::binary | std::ios::out | std::ios::trunc}; - std::filesystem::resize_file(path_, 2 * Gi); } ~FixtureApp() override { clean(); } @@ -40,12 +38,12 @@ class FixtureApp : public homeobject::HomeObjectApplication { uint32_t threads() const override { return 2; } void clean() { - if (std::filesystem::exists(path_)) { std::filesystem::remove(path_); } + if (std::filesystem::exists(path_)) { std::filesystem::remove_all(path_); } } std::list< std::filesystem::path > devices() const override { auto device_info = std::list< std::filesystem::path >(); - device_info.emplace_back(std::filesystem::canonical(path_)); + device_info.emplace_back(std::filesystem::weakly_canonical(path_)); return device_info; } @@ -94,9 +92,9 @@ class TestFixture : public ::testing::Test { EXPECT_EQ(BlobError::UNKNOWN_BLOB, g_e.error()); LOGDEBUG("Insert Blob to: {}", _shard_1.id); - auto o_e = homeobj_->blob_manager() - ->put(_shard_1.id, Blob{sisl::io_blob_safe(4 * Ki, 512u), "test_blob", 4 * Mi}) - .get(); + auto blob_data = sisl::io_blob_safe(4 * Ki, 512u); + sprintf((char*)blob_data.bytes, "HELLO, WORLD!"); + auto o_e = homeobj_->blob_manager()->put(_shard_1.id, Blob{std::move(blob_data), "test_blob", 4 * Mi}).get(); EXPECT_TRUE(!!o_e); o_e.then([this](auto&& b) mutable { _blob_id = std::move(b); }); } From 507ef22190a90c510024e8bb16d1754c4d84de80 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Tue, 26 Sep 2023 14:56:46 -0600 Subject: [PATCH 04/19] Remove extra logic and test BLOB body. --- src/lib/file/blob_manager.cpp | 4 ++-- src/lib/tests/BlobManagerTest.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib/file/blob_manager.cpp b/src/lib/file/blob_manager.cpp index 8a1ce5a3..e4449187 100644 --- a/src/lib/file/blob_manager.cpp +++ b/src/lib/file/blob_manager.cpp @@ -42,9 +42,9 @@ BlobManager::Result< blob_id > FileHomeObject::_put_blob(ShardInfo const& _shard auto err = pwrite(shard_fd, &h_size, sizeof(h_size), route.blob); RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); - err = err || pwrite(shard_fd, serialize.c_str(), h_size, sizeof(h_size) + route.blob); + err = pwrite(shard_fd, serialize.c_str(), h_size, sizeof(h_size) + route.blob); RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); - err = err || pwrite(shard_fd, _blob.body.bytes, _blob.body.size, sizeof(h_size) + h_size + route.blob); + err = pwrite(shard_fd, _blob.body.bytes, _blob.body.size, sizeof(h_size) + h_size + route.blob); RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); auto [_, happened] = shard.btree_.try_emplace(route, true); RELEASE_ASSERT(happened, "Generated duplicate BlobRoute!"); diff --git a/src/lib/tests/BlobManagerTest.cpp b/src/lib/tests/BlobManagerTest.cpp index 1da3bc78..bb1dcc5e 100644 --- a/src/lib/tests/BlobManagerTest.cpp +++ b/src/lib/tests/BlobManagerTest.cpp @@ -21,6 +21,7 @@ TEST_F(TestFixture, BasicTests) { e.then([](auto const& blob) { EXPECT_STREQ(blob.user_key.c_str(), "test_blob"); EXPECT_EQ(blob.object_off, 4 * Mi); + EXPECT_STREQ((char*)blob.body.bytes, "HELLO, WORLD!"); }); })); our_calls.push_back(homeobj_->blob_manager()->get(i, _blob_id).deferValue([](auto const& e) {})); From 2cdd4d73d2fb6eecd915dbe940dd2eb61d0ec244 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Tue, 26 Sep 2023 15:02:39 -0600 Subject: [PATCH 05/19] Missing header includes. --- conanfile.py | 3 +++ src/lib/file/blob_manager.cpp | 2 ++ src/lib/file/shard_manager.cpp | 1 + 3 files changed, 6 insertions(+) diff --git a/conanfile.py b/conanfile.py index 629a1d46..c3dd8a1c 100644 --- a/conanfile.py +++ b/conanfile.py @@ -102,10 +102,13 @@ def package_info(self): self.cpp_info.components["homestore"].requires = ["homestore::homestore"] self.cpp_info.components["memory"].libs = ["homeobject_memory", "home_replication_mock"] self.cpp_info.components["memory"].requires = ["homestore::homestore"] + self.cpp_info.components["file"].libs = ["homeobject_file", "home_replication_mock"] + self.cpp_info.components["file"].requires = ["homestore::homestore"] if self.settings.os == "Linux": self.cpp_info.components["homestore"].system_libs.append("pthread") self.cpp_info.components["memory"].system_libs.append("pthread") + self.cpp_info.components["file"].system_libs.append("pthread") if self.options.sanitize: self.cpp_info.components["memory"].sharedlinkflags.append("-fsanitize=address") self.cpp_info.components["memory"].exelinkflags.append("-fsanitize=address") diff --git a/src/lib/file/blob_manager.cpp b/src/lib/file/blob_manager.cpp index e4449187..f8b9469a 100644 --- a/src/lib/file/blob_manager.cpp +++ b/src/lib/file/blob_manager.cpp @@ -1,3 +1,5 @@ +#include + #include "homeobject.hpp" namespace homeobject { diff --git a/src/lib/file/shard_manager.cpp b/src/lib/file/shard_manager.cpp index efac351b..173f3706 100644 --- a/src/lib/file/shard_manager.cpp +++ b/src/lib/file/shard_manager.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "homeobject.hpp" From 78f3d7240833fce111d164e4ec5e11096eb9bcc5 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Tue, 26 Sep 2023 17:02:09 -0600 Subject: [PATCH 06/19] Also need fnctl. --- src/lib/file/blob_manager.cpp | 1 + src/lib/file/shard_manager.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/lib/file/blob_manager.cpp b/src/lib/file/blob_manager.cpp index f8b9469a..fb8d87ba 100644 --- a/src/lib/file/blob_manager.cpp +++ b/src/lib/file/blob_manager.cpp @@ -1,3 +1,4 @@ +#include #include #include "homeobject.hpp" diff --git a/src/lib/file/shard_manager.cpp b/src/lib/file/shard_manager.cpp index 173f3706..f26550e1 100644 --- a/src/lib/file/shard_manager.cpp +++ b/src/lib/file/shard_manager.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "homeobject.hpp" From 7812fc09f50d51c5ab59ed6d6ba4ce2a3cd9c2b2 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 20:12:40 -0700 Subject: [PATCH 07/19] Directory existence for pg creation. --- src/lib/file_backend/file_pg_manager.cpp | 4 ++++ src/lib/file_backend/file_shard_manager.cpp | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib/file_backend/file_pg_manager.cpp b/src/lib/file_backend/file_pg_manager.cpp index b6d7c59c..693ae9ad 100644 --- a/src/lib/file_backend/file_pg_manager.cpp +++ b/src/lib/file_backend/file_pg_manager.cpp @@ -3,6 +3,10 @@ namespace homeobject { PGManager::NullAsyncResult FileHomeObject::_create_pg(PGInfo&& pg_info, std::set< std::string, std::less<> >) { auto lg = std::scoped_lock(_pg_lock); + + auto const pg_path = file_store_ / std::filesystem::path(fmt::format("{:04x}", pg_info.id)); + std::filesystem::create_directories(pg_path); + auto [it1, _] = _pg_map.try_emplace(pg_info.id, std::make_unique< PG >(pg_info)); RELEASE_ASSERT(_pg_map.end() != it1, "Unknown map insert error!"); return folly::makeSemiFuture< PGManager::NullResult >(folly::Unit()); diff --git a/src/lib/file_backend/file_shard_manager.cpp b/src/lib/file_backend/file_shard_manager.cpp index eec2817c..065aea24 100644 --- a/src/lib/file_backend/file_shard_manager.cpp +++ b/src/lib/file_backend/file_shard_manager.cpp @@ -30,10 +30,8 @@ ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id_t pg_owner RELEASE_ASSERT(s_happened, "Duplicate Shard insertion!"); } - auto const shard_path = file_store_ / path(fmt::format("{:04x}", (info.id >> homeobject::shard_width))); - std::filesystem::create_directories(shard_path); - - auto const shard_file = shard_path / path(fmt::format("{:012x}", (info.id & homeobject::shard_mask))); + auto const pg_path = file_store_ / path(fmt::format("{:04x}", (info.id >> homeobject::shard_width))); + auto const shard_file = pg_path / path(fmt::format("{:012x}", (info.id & homeobject::shard_mask))); RELEASE_ASSERT(!std::filesystem::exists(shard_file), "Shard Path Exists! [path={}]", shard_file.string()); std::ofstream ofs{shard_file, std::ios::binary | std::ios::out | std::ios::trunc}; std::filesystem::resize_file(shard_file, max_shard_size()); From 65188545891e42e225bfdbbf8e27f72023e7eef0 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 20:24:30 -0700 Subject: [PATCH 08/19] Dynamic shard size. --- src/lib/file_backend/file_homeobject.cpp | 4 ++++ src/lib/file_backend/file_shard_manager.cpp | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 8c361529..299c56c9 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -2,6 +2,10 @@ #include +SISL_OPTION_GROUP(homeobject_file, + (max_filesize, "", "max_filesize", "Maximum File (Shard) size", + cxxopts::value< uint32_t >()->default_value("1024"), "mb")) + namespace homeobject { /// NOTE: We give ourselves the option to provide a different HR instance here than libhomeobject.a diff --git a/src/lib/file_backend/file_shard_manager.cpp b/src/lib/file_backend/file_shard_manager.cpp index 065aea24..041adb90 100644 --- a/src/lib/file_backend/file_shard_manager.cpp +++ b/src/lib/file_backend/file_shard_manager.cpp @@ -9,7 +9,9 @@ namespace homeobject { using std::filesystem::path; -uint64_t ShardManager::max_shard_size() { return Gi; } +uint64_t ShardManager::max_shard_size() { + return (SISL_OPTIONS.count("max_filesize") > 0) ? SISL_OPTIONS["max_filesize"].as< uint32_t >() * Mi : 1 * Gi; +} /// // Each Shard is stored as a FILE on the system. We defer creating the "PG" (directory) until From dccaf5ea4fe4f51fde4030c87a3d5f5fb968fca6 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 21:06:32 -0700 Subject: [PATCH 09/19] Recovery. --- src/lib/file_backend/file_blob_manager.cpp | 8 ++-- src/lib/file_backend/file_homeobject.cpp | 43 +++++++++++++++++++++ src/lib/file_backend/file_homeobject.hpp | 2 + src/lib/file_backend/file_shard_manager.cpp | 8 +++- 4 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/lib/file_backend/file_blob_manager.cpp b/src/lib/file_backend/file_blob_manager.cpp index e6f8a000..7c3643fc 100644 --- a/src/lib/file_backend/file_blob_manager.cpp +++ b/src/lib/file_backend/file_blob_manager.cpp @@ -68,15 +68,15 @@ BlobManager::Result< Blob > FileHomeObject::_get_blob(ShardInfo const& _shard, b auto j_str = std::string(h_size, '\0'); err = pread(shard_fd, const_cast< char* >(j_str.c_str()), h_size, sizeof(h_size) + route.blob); RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); - auto shard_json = nlohmann::json::parse(j_str); + auto blob_json = nlohmann::json::parse(j_str); - auto const body_size = shard_json["body_size"].get< uint64_t >(); + auto const body_size = blob_json["body_size"].get< uint64_t >(); auto b = Blob{sisl::io_blob_safe(body_size), "", 0}; err = pread(shard_fd, b.body.bytes, body_size, sizeof(h_size) + h_size + route.blob); RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); - b.user_key = shard_json["user_key"].get< std::string >(); - b.object_off = shard_json["object_off"].get< uint64_t >(); + b.user_key = blob_json["user_key"].get< std::string >(); + b.object_off = blob_json["object_off"].get< uint64_t >(); close(shard_fd); return b; } diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 299c56c9..ce40ee56 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -1,6 +1,8 @@ #include "file_homeobject.hpp" #include +#include +#include SISL_OPTION_GROUP(homeobject_file, (max_filesize, "", "max_filesize", "Maximum File (Shard) size", @@ -14,10 +16,51 @@ extern std::shared_ptr< HomeObject > init_homeobject(std::weak_ptr< HomeObjectAp auto instance = std::make_shared< FileHomeObject >(std::move(application), *devices.begin()); return instance; } +void FileHomeObject::_recover() { + for (auto const& pg_dir_e : std::filesystem::directory_iterator{file_store_}) { + auto pg_dir = pg_dir_e.path(); + auto pgid = std::stoul(pg_dir.filename().string()); + LOGI("discovered [pg_dir={}] [pg_id={}]", pg_dir.string(), pgid); + auto [it, happened] = _pg_map.try_emplace(pgid, std::make_unique< PG >(PGInfo(pgid))); + auto& s_list = it->second->shards_; + RELEASE_ASSERT(happened, "Unknown map insert error!"); + for (auto const& shard_file_e : std::filesystem::directory_iterator{pg_dir_e}) { + auto shard_file = shard_file_e.path().string(); + LOGI("discovered [shard_file={}]", shard_file); + auto shard_fd = open(shard_file.c_str(), O_RDONLY); + RELEASE_ASSERT(shard_fd >= 0, "Failed to open Shard {}", shard_file); + + size_t h_size = 0ull; + auto err = pread(shard_fd, &h_size, sizeof(h_size), 0ull); + RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file); + + auto j_str = std::string(h_size, '\0'); + err = pread(shard_fd, const_cast< char* >(j_str.c_str()), h_size, sizeof(h_size)); + RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file); + auto shard_json = nlohmann::json::parse(j_str); + + auto info = ShardInfo(); + info.id = shard_json["id"].get< shard_id_t >(); + info.placement_group = shard_json["pg_id"].get< pg_id_t >(); + info.state = shard_json["state"].get< ShardInfo::State >(); + info.created_time = shard_json["created_time"].get< uint64_t >(); + info.last_modified_time = shard_json["last_modified_time"].get< uint64_t >(); + info.available_capacity_bytes = shard_json["available_capacity_bytes"].get< uint64_t >(); + info.total_capacity_bytes = shard_json["total_capacity_bytes"].get< uint64_t >(); + info.deleted_capacity_bytes = shard_json["deleted_capacity_bytes"].get< uint64_t >(); + + auto iter = s_list.emplace(s_list.end(), Shard(info)); + auto [_, s_happened] = _shard_map.emplace(info.id, iter); + RELEASE_ASSERT(s_happened, "Duplicate Shard insertion!"); + close(shard_fd); + } + } +} FileHomeObject::FileHomeObject(std::weak_ptr< HomeObjectApplication >&& application, std::filesystem::path const& root) : HomeObjectImpl::HomeObjectImpl(std::move(application)), file_store_(root) { + if (std::filesystem::exists(file_store_)) _recover(); _our_id = _application.lock()->discover_svcid(_our_id); } diff --git a/src/lib/file_backend/file_homeobject.hpp b/src/lib/file_backend/file_homeobject.hpp index 65b71665..a6045e42 100644 --- a/src/lib/file_backend/file_homeobject.hpp +++ b/src/lib/file_backend/file_homeobject.hpp @@ -41,6 +41,8 @@ class FileHomeObject : public HomeObjectImpl { PGMember const& new_member) override; /// + void _recover(); + public: FileHomeObject(std::weak_ptr< HomeObjectApplication >&& application, std::filesystem::path const& root); ~FileHomeObject() override = default; diff --git a/src/lib/file_backend/file_shard_manager.cpp b/src/lib/file_backend/file_shard_manager.cpp index 041adb90..f3332906 100644 --- a/src/lib/file_backend/file_shard_manager.cpp +++ b/src/lib/file_backend/file_shard_manager.cpp @@ -52,12 +52,16 @@ ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id_t pg_owner j["available_capacity"] = info.available_capacity_bytes; j["deleted_capacity"] = info.deleted_capacity_bytes; auto serialize = j.dump(); - auto err = pwrite(shard_fd, serialize.c_str(), serialize.size(), 0ull); + auto hdr_len = serialize.size(); + auto err = pwrite(shard_fd, &hdr_len, sizeof(hdr_len), 0ull); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); + err = pwrite(shard_fd, serialize.c_str(), serialize.size(), sizeof(hdr_len)); RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); auto [it, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); RELEASE_ASSERT(happened, "Could not create BTree!"); - it->second->shard_offset_.store(serialize.size()); + it->second->shard_offset_.store(sizeof(hdr_len) + serialize.size()); + close(shard_fd); return info; } From bcc923f8a9c9c5596dbe622b04bdb00c6206163f Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 21:17:11 -0700 Subject: [PATCH 10/19] Fix shard info recovery. --- src/lib/file_backend/file_homeobject.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index ce40ee56..3ba745da 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -40,14 +40,14 @@ void FileHomeObject::_recover() { auto shard_json = nlohmann::json::parse(j_str); auto info = ShardInfo(); - info.id = shard_json["id"].get< shard_id_t >(); + info.id = shard_json["shard_id"].get< shard_id_t >(); info.placement_group = shard_json["pg_id"].get< pg_id_t >(); info.state = shard_json["state"].get< ShardInfo::State >(); info.created_time = shard_json["created_time"].get< uint64_t >(); - info.last_modified_time = shard_json["last_modified_time"].get< uint64_t >(); - info.available_capacity_bytes = shard_json["available_capacity_bytes"].get< uint64_t >(); - info.total_capacity_bytes = shard_json["total_capacity_bytes"].get< uint64_t >(); - info.deleted_capacity_bytes = shard_json["deleted_capacity_bytes"].get< uint64_t >(); + info.last_modified_time = shard_json["modified_time"].get< uint64_t >(); + info.available_capacity_bytes = shard_json["available_capacity"].get< uint64_t >(); + info.total_capacity_bytes = shard_json["total_capacity"].get< uint64_t >(); + info.deleted_capacity_bytes = shard_json["deleted_capacity"].get< uint64_t >(); auto iter = s_list.emplace(s_list.end(), Shard(info)); auto [_, s_happened] = _shard_map.emplace(info.id, iter); From df8b0436f177df535e6a918a1df36574dda1294f Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 21:23:31 -0700 Subject: [PATCH 11/19] Recover svcid --- src/lib/file_backend/file_homeobject.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 3ba745da..9b48daad 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -19,6 +19,8 @@ extern std::shared_ptr< HomeObject > init_homeobject(std::weak_ptr< HomeObjectAp void FileHomeObject::_recover() { for (auto const& pg_dir_e : std::filesystem::directory_iterator{file_store_}) { auto pg_dir = pg_dir_e.path(); + if (pg_dir.filename().string() == "svc_id") continue; + LOGI("discovered [pg_dir={}]", pg_dir.string()); auto pgid = std::stoul(pg_dir.filename().string()); LOGI("discovered [pg_dir={}] [pg_id={}]", pg_dir.string(), pgid); auto [it, happened] = _pg_map.try_emplace(pgid, std::make_unique< PG >(PGInfo(pgid))); @@ -60,8 +62,22 @@ void FileHomeObject::_recover() { FileHomeObject::FileHomeObject(std::weak_ptr< HomeObjectApplication >&& application, std::filesystem::path const& root) : HomeObjectImpl::HomeObjectImpl(std::move(application)), file_store_(root) { - if (std::filesystem::exists(file_store_)) _recover(); + auto const id_file = file_store_ / "svc_id"; + if (std::filesystem::exists(file_store_)) { + auto id_fd = open(id_file.string().c_str(), O_RDONLY); + auto err = pread(id_fd, &_our_id, sizeof(_our_id), 0ull); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", id_file.string()); + LOGI("recovering: {}", to_string(_our_id)); + _recover(); + } _our_id = _application.lock()->discover_svcid(_our_id); + std::filesystem::create_directories(file_store_); + std::ofstream ofs{id_file, std::ios::binary | std::ios::out | std::ios::trunc}; + std::filesystem::resize_file(id_file, 4 * Ki); + auto id_fd = open(id_file.string().c_str(), O_WRONLY); + RELEASE_ASSERT(0 < id_fd, "Failed to open: {}", id_file.string()); + auto err = pwrite(id_fd, &_our_id, sizeof(_our_id), 0ull); + RELEASE_ASSERT(0 < err, "Failed to write to: {}", id_file.string()); } } // namespace homeobject From 911e0122bdd01cd4428038931e3210473b446f0f Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 21:54:39 -0700 Subject: [PATCH 12/19] Missing includes --- src/lib/file_backend/file_homeobject.cpp | 3 ++- src/lib/tests/BlobManagerTest.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 9b48daad..36274921 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -1,6 +1,7 @@ #include "file_homeobject.hpp" -#include +#include +#include #include #include diff --git a/src/lib/tests/BlobManagerTest.cpp b/src/lib/tests/BlobManagerTest.cpp index bb1dcc5e..17ff0f14 100644 --- a/src/lib/tests/BlobManagerTest.cpp +++ b/src/lib/tests/BlobManagerTest.cpp @@ -15,7 +15,7 @@ TEST_F(TestFixture, BasicTests) { for (auto k = 0; batch_sz > k; ++k) { t_v.push_back(std::thread([this, &call_lock, &calls, batch_sz]() mutable { auto our_calls = std::list< folly::SemiFuture< folly::Unit > >(); - for (auto i = _blob_id + _shard_2.id + 1; (_blob_id + _shard_1.id + 1) + ((100 * Ki) / batch_sz) > i; ++i) { + for (auto i = _blob_id + _shard_2.id + 1; (_blob_id + _shard_1.id + 1) + ((20 * Ki) / batch_sz) > i; ++i) { our_calls.push_back(homeobj_->blob_manager()->get(_shard_1.id, _blob_id).deferValue([](auto const& e) { EXPECT_TRUE(!!e); e.then([](auto const& blob) { From ad125702fa6982cf3cbef9b04bbad079a7c9c603 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Thu, 28 Sep 2023 22:08:31 -0700 Subject: [PATCH 13/19] Fix unused --- src/lib/file_backend/file_homeobject.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 36274921..0401b18d 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -5,6 +5,8 @@ #include #include +#include + SISL_OPTION_GROUP(homeobject_file, (max_filesize, "", "max_filesize", "Maximum File (Shard) size", cxxopts::value< uint32_t >()->default_value("1024"), "mb")) @@ -24,6 +26,8 @@ void FileHomeObject::_recover() { LOGI("discovered [pg_dir={}]", pg_dir.string()); auto pgid = std::stoul(pg_dir.filename().string()); LOGI("discovered [pg_dir={}] [pg_id={}]", pg_dir.string(), pgid); + auto pg = PGInfo(pgid); + pg.replica_set_uuid = boost::uuids::random_generator()(); auto [it, happened] = _pg_map.try_emplace(pgid, std::make_unique< PG >(PGInfo(pgid))); auto& s_list = it->second->shards_; RELEASE_ASSERT(happened, "Unknown map insert error!"); From da8686fea57448547fa095931c1f78e45aacf751 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 29 Sep 2023 09:18:15 -0700 Subject: [PATCH 14/19] Reconstitute shards. --- src/lib/file_backend/file_blob_manager.cpp | 29 ++++++++++++++-------- src/lib/file_backend/file_homeobject.cpp | 23 +++++++++++++++-- src/lib/file_backend/file_homeobject.hpp | 1 + 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/lib/file_backend/file_blob_manager.cpp b/src/lib/file_backend/file_blob_manager.cpp index 7c3643fc..2fbd1c5c 100644 --- a/src/lib/file_backend/file_blob_manager.cpp +++ b/src/lib/file_backend/file_blob_manager.cpp @@ -13,7 +13,7 @@ using std::filesystem::path; auto& shard = *index_it->second; #define WITH_ROUTE(blob) \ - auto const route = BlobRoute{_shard.id, (blob)}; \ + auto route = BlobRoute{_shard.id, (blob)}; \ LOGTRACEMOD(homeobject, "[route={}]", route); #define IF_BLOB_ALIVE \ @@ -55,24 +55,33 @@ BlobManager::Result< blob_id_t > FileHomeObject::_put_blob(ShardInfo const& _sha return route.blob; } +nlohmann::json FileHomeObject::_read_blob_header(int shard_fd, blob_id_t& blob_id) { + size_t h_size = 0ull; + auto err = pread(shard_fd, &h_size, sizeof(h_size), blob_id); + RELEASE_ASSERT(0 < err, "failed to read from shard"); + blob_id += sizeof(h_size); + + auto j_str = std::string(h_size, '\0'); + err = pread(shard_fd, const_cast< char* >(j_str.c_str()), h_size, blob_id); + blob_id += h_size; + try { + if (0 <= err) return nlohmann::json::parse(j_str); + LOGE("failed to read: {}", strerror(errno)); + } catch (nlohmann::exception const&) { LOGT("no blob @ [blob_id={}]", blob_id); } + return nlohmann::json{}; +} + // Lookup and duplicate underyling Blob for user; only *safe* because we defer GC. BlobManager::Result< Blob > FileHomeObject::_get_blob(ShardInfo const& _shard, blob_id_t _blob) const { WITH_SHARD WITH_ROUTE(_blob) IF_BLOB_ALIVE { WITH_SHARD_FILE(O_RDONLY) - size_t h_size = 0ull; - auto err = pread(shard_fd, &h_size, sizeof(h_size), route.blob); - RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); - - auto j_str = std::string(h_size, '\0'); - err = pread(shard_fd, const_cast< char* >(j_str.c_str()), h_size, sizeof(h_size) + route.blob); - RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); - auto blob_json = nlohmann::json::parse(j_str); + auto blob_json = _read_blob_header(shard_fd, route.blob); auto const body_size = blob_json["body_size"].get< uint64_t >(); auto b = Blob{sisl::io_blob_safe(body_size), "", 0}; - err = pread(shard_fd, b.body.bytes, body_size, sizeof(h_size) + h_size + route.blob); + auto err = pread(shard_fd, b.body.bytes, body_size, route.blob); RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); b.user_key = blob_json["user_key"].get< std::string >(); diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 0401b18d..b57d2466 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -46,6 +46,7 @@ void FileHomeObject::_recover() { RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file); auto shard_json = nlohmann::json::parse(j_str); + // Reconsitute the ShardInfo auto info = ShardInfo(); info.id = shard_json["shard_id"].get< shard_id_t >(); info.placement_group = shard_json["pg_id"].get< pg_id_t >(); @@ -56,10 +57,27 @@ void FileHomeObject::_recover() { info.total_capacity_bytes = shard_json["total_capacity"].get< uint64_t >(); info.deleted_capacity_bytes = shard_json["deleted_capacity"].get< uint64_t >(); + auto [it, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); + RELEASE_ASSERT(happened, "duplicate Shard recovery!"); + + // Scan Shard for shard_offset_ (next blob_id) + auto blob_id = sizeof(h_size) + h_size; + while (max_shard_size() > blob_id) { + auto route = BlobRoute{info.id, blob_id}; + auto blob_hdr = _read_blob_header(shard_fd, blob_id); + if (blob_hdr.is_null()) break; + auto blob_size = blob_hdr["body_size"].get< uint64_t >(); + LOGT("found [blob={}], [user_key={}], [size={}]", route, blob_hdr["user_key"].get< std::string >(), + blob_size); + blob_id += blob_size; + } + LOGI("[shard={}] reconstituted to: [offset={}]", info.id, blob_id); + it->second->shard_offset_.store(blob_id); + close(shard_fd); + auto iter = s_list.emplace(s_list.end(), Shard(info)); auto [_, s_happened] = _shard_map.emplace(info.id, iter); - RELEASE_ASSERT(s_happened, "Duplicate Shard insertion!"); - close(shard_fd); + RELEASE_ASSERT(s_happened, "duplicate Shard recovery!"); } } } @@ -71,6 +89,7 @@ FileHomeObject::FileHomeObject(std::weak_ptr< HomeObjectApplication >&& applicat if (std::filesystem::exists(file_store_)) { auto id_fd = open(id_file.string().c_str(), O_RDONLY); auto err = pread(id_fd, &_our_id, sizeof(_our_id), 0ull); + close(id_fd); RELEASE_ASSERT(0 < err, "Failed to write to: {}", id_file.string()); LOGI("recovering: {}", to_string(_our_id)); _recover(); diff --git a/src/lib/file_backend/file_homeobject.hpp b/src/lib/file_backend/file_homeobject.hpp index a6045e42..4b0f5398 100644 --- a/src/lib/file_backend/file_homeobject.hpp +++ b/src/lib/file_backend/file_homeobject.hpp @@ -41,6 +41,7 @@ class FileHomeObject : public HomeObjectImpl { PGMember const& new_member) override; /// + static nlohmann::json _read_blob_header(int shard_fd, blob_id_t& blob_id); void _recover(); public: From 647007eaaa4b4cd68def9544d94732a5703541b5 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 29 Sep 2023 09:38:07 -0700 Subject: [PATCH 15/19] Missing namespace. --- src/lib/file_backend/file_blob_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/file_backend/file_blob_manager.cpp b/src/lib/file_backend/file_blob_manager.cpp index 2fbd1c5c..cda8f065 100644 --- a/src/lib/file_backend/file_blob_manager.cpp +++ b/src/lib/file_backend/file_blob_manager.cpp @@ -67,7 +67,7 @@ nlohmann::json FileHomeObject::_read_blob_header(int shard_fd, blob_id_t& blob_i try { if (0 <= err) return nlohmann::json::parse(j_str); LOGE("failed to read: {}", strerror(errno)); - } catch (nlohmann::exception const&) { LOGT("no blob @ [blob_id={}]", blob_id); } + } catch (nlohmann::json::exception const&) { LOGT("no blob @ [blob_id={}]", blob_id); } return nlohmann::json{}; } From 8d1ecf7db4abae3e7000591ab822e890ca7a3d96 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 29 Sep 2023 10:00:09 -0700 Subject: [PATCH 16/19] Remove unused. --- src/lib/file_backend/file_homeobject.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index b57d2466..3f480726 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -28,7 +28,7 @@ void FileHomeObject::_recover() { LOGI("discovered [pg_dir={}] [pg_id={}]", pg_dir.string(), pgid); auto pg = PGInfo(pgid); pg.replica_set_uuid = boost::uuids::random_generator()(); - auto [it, happened] = _pg_map.try_emplace(pgid, std::make_unique< PG >(PGInfo(pgid))); + auto [it, happened] = _pg_map.try_emplace(pgid, std::make_unique< PG >(std::move(pg))); auto& s_list = it->second->shards_; RELEASE_ASSERT(happened, "Unknown map insert error!"); for (auto const& shard_file_e : std::filesystem::directory_iterator{pg_dir_e}) { From 882335cd64d7cfd4466583f79988b4ead6bda008 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 29 Sep 2023 10:41:40 -0700 Subject: [PATCH 17/19] Reuse fds --- src/lib/file_backend/file_blob_manager.cpp | 31 ++++++++------------- src/lib/file_backend/file_homeobject.cpp | 2 +- src/lib/file_backend/file_homeobject.hpp | 2 ++ src/lib/file_backend/file_shard_manager.cpp | 8 ++++-- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/lib/file_backend/file_blob_manager.cpp b/src/lib/file_backend/file_blob_manager.cpp index cda8f065..ac6fa120 100644 --- a/src/lib/file_backend/file_blob_manager.cpp +++ b/src/lib/file_backend/file_blob_manager.cpp @@ -22,12 +22,6 @@ using std::filesystem::path; return folly::makeUnexpected(BlobError::UNKNOWN_BLOB); \ } else -#define WITH_SHARD_FILE(mode) \ - auto const shard_file = file_store_ / path(fmt::format("{:04x}", (_shard.id >> homeobject::shard_width))) / \ - path(fmt::format("{:012x}", (_shard.id & homeobject::shard_mask))); \ - auto shard_fd = open(shard_file.string().c_str(), (mode)); \ - RELEASE_ASSERT(shard_fd >= 0, "Failed to open Shard {}", shard_file.string()); - // Write (move) Blob to FILE BlobManager::Result< blob_id_t > FileHomeObject::_put_blob(ShardInfo const& _shard, Blob&& _blob) { WITH_SHARD @@ -41,17 +35,16 @@ BlobManager::Result< blob_id_t > FileHomeObject::_put_blob(ShardInfo const& _sha auto const t_size = sizeof(size_t) + h_size + _blob.body.size; WITH_ROUTE(shard.shard_offset_.fetch_add(t_size, std::memory_order_relaxed)) - WITH_SHARD_FILE(O_WRONLY) - - auto err = pwrite(shard_fd, &h_size, sizeof(h_size), route.blob); - RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); - err = pwrite(shard_fd, serialize.c_str(), h_size, sizeof(h_size) + route.blob); - RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); - err = pwrite(shard_fd, _blob.body.bytes, _blob.body.size, sizeof(h_size) + h_size + route.blob); - RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); + if (route.blob + t_size > max_shard_size()) return folly::makeUnexpected(BlobError::INVALID_ARG); + + auto err = pwrite(shard.fd_, &h_size, sizeof(h_size), route.blob); + RELEASE_ASSERT(0 < err, "failed to write to: {route=}", route); + err = pwrite(shard.fd_, serialize.c_str(), h_size, sizeof(h_size) + route.blob); + RELEASE_ASSERT(0 < err, "failed to write to: {route=}", route); + err = pwrite(shard.fd_, _blob.body.bytes, _blob.body.size, sizeof(h_size) + h_size + route.blob); + RELEASE_ASSERT(0 < err, "failed to write to: {route=}", route); auto [_, happened] = shard.btree_.try_emplace(route, true); RELEASE_ASSERT(happened, "Generated duplicate BlobRoute!"); - close(shard_fd); return route.blob; } @@ -76,17 +69,15 @@ BlobManager::Result< Blob > FileHomeObject::_get_blob(ShardInfo const& _shard, b WITH_SHARD WITH_ROUTE(_blob) IF_BLOB_ALIVE { - WITH_SHARD_FILE(O_RDONLY) - auto blob_json = _read_blob_header(shard_fd, route.blob); + auto blob_json = _read_blob_header(shard.fd_, route.blob); auto const body_size = blob_json["body_size"].get< uint64_t >(); auto b = Blob{sisl::io_blob_safe(body_size), "", 0}; - auto err = pread(shard_fd, b.body.bytes, body_size, route.blob); - RELEASE_ASSERT(0 < err, "Failed to read from: {}", shard_file.string()); + auto err = pread(shard.fd_, b.body.bytes, body_size, route.blob); + RELEASE_ASSERT(0 < err, "Failed to read from: [route={}]", route); b.user_key = blob_json["user_key"].get< std::string >(); b.object_off = blob_json["object_off"].get< uint64_t >(); - close(shard_fd); return b; } } diff --git a/src/lib/file_backend/file_homeobject.cpp b/src/lib/file_backend/file_homeobject.cpp index 3f480726..51f3f596 100644 --- a/src/lib/file_backend/file_homeobject.cpp +++ b/src/lib/file_backend/file_homeobject.cpp @@ -72,8 +72,8 @@ void FileHomeObject::_recover() { blob_id += blob_size; } LOGI("[shard={}] reconstituted to: [offset={}]", info.id, blob_id); + it->second->fd_ = shard_fd; it->second->shard_offset_.store(blob_id); - close(shard_fd); auto iter = s_list.emplace(s_list.end(), Shard(info)); auto [_, s_happened] = _shard_map.emplace(info.id, iter); diff --git a/src/lib/file_backend/file_homeobject.hpp b/src/lib/file_backend/file_homeobject.hpp index 4b0f5398..efc7c2ba 100644 --- a/src/lib/file_backend/file_homeobject.hpp +++ b/src/lib/file_backend/file_homeobject.hpp @@ -13,7 +13,9 @@ namespace homeobject { struct ShardIndex { folly::ConcurrentHashMap< BlobRoute, bool > btree_; + int fd_{-1}; std::atomic< blob_id_t > shard_offset_{0ull}; + ~ShardIndex(); }; class FileHomeObject : public HomeObjectImpl { diff --git a/src/lib/file_backend/file_shard_manager.cpp b/src/lib/file_backend/file_shard_manager.cpp index f3332906..a4161934 100644 --- a/src/lib/file_backend/file_shard_manager.cpp +++ b/src/lib/file_backend/file_shard_manager.cpp @@ -39,7 +39,7 @@ ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id_t pg_owner std::filesystem::resize_file(shard_file, max_shard_size()); RELEASE_ASSERT(std::filesystem::exists(shard_file), "Shard Path Failed Creation! [path={}]", shard_file.string()); - auto shard_fd = open(shard_file.string().c_str(), O_WRONLY); + auto shard_fd = open(shard_file.string().c_str(), O_RDWR); RELEASE_ASSERT(shard_fd >= 0, "Failed to open Shard {}", shard_file.string()); nlohmann::json j; @@ -59,9 +59,9 @@ ShardManager::Result< ShardInfo > FileHomeObject::_create_shard(pg_id_t pg_owner RELEASE_ASSERT(0 < err, "Failed to write to: {}", shard_file.string()); auto [it, happened] = index_.try_emplace(info.id, std::make_unique< ShardIndex >()); + it->second->fd_ = shard_fd; RELEASE_ASSERT(happened, "Could not create BTree!"); it->second->shard_offset_.store(sizeof(hdr_len) + serialize.size()); - close(shard_fd); return info; } @@ -76,4 +76,8 @@ ShardManager::Result< ShardInfo > FileHomeObject::_seal_shard(shard_id_t id) { return shard_info; } +ShardIndex::~ShardIndex() { + if (fd_ > 0) close(fd_); +} + } // namespace homeobject From 4d21a426761d2b4e21e4f10360dc6798ffb4f2d9 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Fri, 29 Sep 2023 11:13:50 -0700 Subject: [PATCH 18/19] Increase test back to original size. --- src/lib/tests/BlobManagerTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/tests/BlobManagerTest.cpp b/src/lib/tests/BlobManagerTest.cpp index 17ff0f14..bb1dcc5e 100644 --- a/src/lib/tests/BlobManagerTest.cpp +++ b/src/lib/tests/BlobManagerTest.cpp @@ -15,7 +15,7 @@ TEST_F(TestFixture, BasicTests) { for (auto k = 0; batch_sz > k; ++k) { t_v.push_back(std::thread([this, &call_lock, &calls, batch_sz]() mutable { auto our_calls = std::list< folly::SemiFuture< folly::Unit > >(); - for (auto i = _blob_id + _shard_2.id + 1; (_blob_id + _shard_1.id + 1) + ((20 * Ki) / batch_sz) > i; ++i) { + for (auto i = _blob_id + _shard_2.id + 1; (_blob_id + _shard_1.id + 1) + ((100 * Ki) / batch_sz) > i; ++i) { our_calls.push_back(homeobj_->blob_manager()->get(_shard_1.id, _blob_id).deferValue([](auto const& e) { EXPECT_TRUE(!!e); e.then([](auto const& blob) { From eee5c590e5b98e38a9ef37c4b6259d6921c883a8 Mon Sep 17 00:00:00 2001 From: Brian Szmyd Date: Mon, 6 Nov 2023 08:51:02 -0700 Subject: [PATCH 19/19] Fix homestore test. --- src/lib/homestore_backend/CMakeLists.txt | 2 +- src/lib/tests/fixture_app.cpp | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/lib/homestore_backend/CMakeLists.txt b/src/lib/homestore_backend/CMakeLists.txt index 1ae6b838..22c3bbf3 100644 --- a/src/lib/homestore_backend/CMakeLists.txt +++ b/src/lib/homestore_backend/CMakeLists.txt @@ -27,6 +27,6 @@ target_link_libraries(homestore_test homeobject_homestore ${COMMON_TEST_DEPS} ) -add_test(NAME HomestoreTest COMMAND homestore_test -csv error --executor immediate) +add_test(NAME HomestoreTest COMMAND homestore_test -csv error --executor immediate --init_file 2) set_property(TEST HomestoreTest PROPERTY RUN_SERIAL 1) endif() diff --git a/src/lib/tests/fixture_app.cpp b/src/lib/tests/fixture_app.cpp index 67ca4de2..0a4abc24 100644 --- a/src/lib/tests/fixture_app.cpp +++ b/src/lib/tests/fixture_app.cpp @@ -1,11 +1,13 @@ -#include -#include +#include "fixture_app.hpp" + #include -#include "fixture_app.hpp" +#include +#include SISL_OPTION_GROUP( test_home_object, + (init_file, "", "init_file", "Initialize the path", ::cxxopts::value< uint32_t >()->default_value("0"), "Gb"), (num_iters, "", "num_iters", "number of iterations per loop", ::cxxopts::value< uint64_t >()->default_value("1000"), "number"), (num_pgs, "", "num_pgs", "number of pgs", ::cxxopts::value< uint64_t >()->default_value("10"), "number"), @@ -18,7 +20,11 @@ SISL_OPTIONS_ENABLE(logging, iomgr, homeobject, test_home_object) FixtureApp::FixtureApp() { clean(); - LOGWARN("creating device {} file with size {} ", path_, homestore::in_bytes(2 * Gi)); + if (auto init_size = SISL_OPTIONS["init_file"].as< uint32_t >(); 0u < init_size) { + LOGWARN("creating device {} file with size {}Gb", path_, homestore::in_bytes(init_size * Gi)); + std::ofstream ofs{path_, std::ios::binary | std::ios::out | std::ios::trunc}; + std::filesystem::resize_file(path_, init_size * Gi); + } } homeobject::peer_id_t FixtureApp::discover_svcid(std::optional< homeobject::peer_id_t > const& p) const {