Skip to content

Commit

Permalink
Add flexible journal vdev. (#250)
Browse files Browse the repository at this point in the history
Ability to add chunks to a journal vdev dynamically. Create
pool of chunks and maintain a list of chunks sliding window.
Add api to create chunk. Chunks part of journal vdev
are connected like singly linked list with next_chunk in private
data. Chunks part of journal vdv have private data.
Journal vdev can give independent descriptors to the user
to do alloc write and truncate. Modified tests to support this.
  • Loading branch information
sanebay authored Jan 25, 2024
1 parent a0291e4 commit a7a22c2
Show file tree
Hide file tree
Showing 25 changed files with 1,390 additions and 619 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "5.0.4"
version = "5.0.5"
homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
topics = ("ebay", "nublox")
Expand Down
1 change: 1 addition & 0 deletions src/include/homestore/chunk_selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ChunkSelector {
public:
ChunkSelector() = default;
virtual void add_chunk(cshared< Chunk >&) = 0;
virtual void remove_chunk(cshared< Chunk >&){};
virtual void foreach_chunks(std::function< void(cshared< Chunk >&) >&& cb) = 0;
virtual cshared< Chunk > select_chunk(blk_count_t nblks, const blk_alloc_hints& hints) = 0;

Expand Down
7 changes: 5 additions & 2 deletions src/include/homestore/homestore_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ ENUM(chunk_selector_type_t, uint8_t, // What are the options to select chunk to
ALWAYS_CALLER_CONTROLLED // Expect the caller to always provide the specific chunkid
);

ENUM(vdev_size_type_t, uint8_t, VDEV_SIZE_STATIC, VDEV_SIZE_DYNAMIC);

////////////// All structs ///////////////////
struct dev_info {
explicit dev_info(std::string name, HSDevType type = HSDevType::Data) : dev_name{std::move(name)}, dev_type{type} {}
Expand Down Expand Up @@ -148,7 +150,9 @@ static std::string in_bytes(uint64_t sz) {
struct hs_format_params {
float size_pct;
uint32_t num_chunks{1};
uint64_t chunk_size{0};
uint32_t block_size{0};
vdev_size_type_t vdev_size_type{vdev_size_type_t::VDEV_SIZE_STATIC};
blk_allocator_type_t alloc_type{blk_allocator_type_t::varsize};
chunk_selector_type_t chunk_sel_type{chunk_selector_type_t::ROUND_ROBIN};
};
Expand Down Expand Up @@ -199,5 +203,4 @@ struct cap_attrs {
} // namespace homestore

////////////// Misc ///////////////////
#define HOMESTORE_LOG_MODS \
btree, device, blkalloc, cp, metablk, wbcache, logstore, transient, replication
#define HOMESTORE_LOG_MODS btree, device, blkalloc, cp, metablk, wbcache, logstore, transient, replication, journalvdev
2 changes: 1 addition & 1 deletion src/include/homestore/logstore_service.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class LogStoreService {
void device_truncate(const device_truncate_cb_t& cb = nullptr, const bool wait_till_done = false,
const bool dry_run = false);

folly::Future< std::error_code > create_vdev(uint64_t size, logstore_family_id_t family, uint32_t num_chunks);
folly::Future< std::error_code > create_vdev(uint64_t size, logstore_family_id_t family, uint32_t chunk_size);
shared< VirtualDev > open_vdev(const vdev_info& vinfo, logstore_family_id_t family, bool load_existing);
shared< JournalVirtualDev > get_vdev(logstore_family_id_t family) const {
return (family == DATA_LOG_FAMILY_IDX) ? m_data_logdev_vdev : m_ctrl_logdev_vdev;
Expand Down
3 changes: 3 additions & 0 deletions src/lib/common/homestore_config.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ table Generic {
// percentage of cache used to create indx mempool. It should be more than 100 to
// take into account some floating buffers in writeback cache.
indx_mempool_percent : uint32 = 110;

// Number of chunks in journal chunk pool.
journal_chunk_pool_capacity: uint32 = 5;
}

table ResourceLimits {
Expand Down
14 changes: 3 additions & 11 deletions src/lib/device/chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ Chunk::Chunk(PhysicalDev* pdev, const chunk_info& cinfo, uint32_t chunk_slot) :
m_chunk_info{cinfo}, m_pdev{pdev}, m_chunk_slot{chunk_slot}, m_stream_id{pdev->chunk_to_stream_id(cinfo)} {}

std::string Chunk::to_string() const {
return fmt::format("chunk_id={}, vdev_id={}, start_offset={}, size={}, end_of_chunk={}, slot_num_in_pdev={} "
return fmt::format("chunk_id={}, vdev_id={}, start_offset={}, size={}, slot_num_in_pdev={} "
"pdev_ordinal={} vdev_ordinal={} stream_id={}",
chunk_id(), vdev_id(), start_offset(), in_bytes(size()), end_of_chunk(), slot_number(),
pdev_ordinal(), vdev_ordinal(), stream_id());
chunk_id(), vdev_id(), start_offset(), in_bytes(size()), slot_number(), pdev_ordinal(),
vdev_ordinal(), stream_id());
}

void Chunk::set_user_private(const sisl::blob& data) {
Expand All @@ -36,13 +36,6 @@ void Chunk::set_user_private(const sisl::blob& data) {
write_chunk_info();
}

void Chunk::update_end_of_chunk(uint64_t end_offset) {
std::unique_lock lg{m_mgmt_mutex};
m_chunk_info.end_of_chunk_size = end_offset;
m_chunk_info.compute_checksum();
write_chunk_info();
}

void Chunk::write_chunk_info() {
auto buf = hs_utils::iobuf_alloc(chunk_info::size, sisl::buftag::superblk, physical_dev()->align_size());
auto cinfo = new (buf) chunk_info();
Expand All @@ -59,7 +52,6 @@ nlohmann::json Chunk::get_status([[maybe_unused]] int log_level) const {
j["vdev_id"] = vdev_id();
j["start_offset"] = start_offset();
j["size"] = size();
j["end_of_chunk_size"] = end_of_chunk();
j["slot_alloced?"] = is_busy();
return j;
}
Expand Down
2 changes: 0 additions & 2 deletions src/lib/device/chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class Chunk {
bool is_busy() const { return m_chunk_info.is_allocated(); }
uint32_t vdev_id() const { return m_chunk_info.vdev_id; }
uint16_t chunk_id() const { return static_cast< uint16_t >(m_chunk_info.chunk_id); }
uint64_t end_of_chunk() const { return m_chunk_info.end_of_chunk_size; }
uint32_t pdev_ordinal() const { return m_chunk_info.chunk_ordinal; }
const uint8_t* user_private() { return &m_chunk_info.user_private[0]; }
uint32_t stream_id() const { return m_stream_id; }
Expand All @@ -62,7 +61,6 @@ class Chunk {
BlkAllocator* blk_allocator_mutable() { return m_blk_allocator.get(); }

////////////// Setters /////////////////////
void update_end_of_chunk(uint64_t end_offset);
void set_user_private(const sisl::blob& data);
void set_block_allocator(cshared< BlkAllocator >& blkalloc) { m_blk_allocator = blkalloc; }
void set_vdev_ordinal(uint32_t vdev_ordinal) { m_vdev_ordinal = vdev_ordinal; }
Expand Down
79 changes: 69 additions & 10 deletions src/lib/device/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,17 @@ struct vdev_info {
uint32_t num_mirrors{0}; // 12: Total number of mirrors
uint32_t blk_size{0}; // 16: IO block size for this vdev
uint32_t num_primary_chunks{0}; // 20: number of primary chunks
uint8_t slot_allocated{0}; // 24: Is this current slot allocated
uint8_t failed{0}; // 25: set to true if disk is replaced
uint8_t hs_dev_type{0}; // 26: PDev dev type (as in fast or data)
uint8_t multi_pdev_choice{0}; // 27: Choice when multiple pdevs are present (vdev_multi_pdev_opts_t)
char name[64]; // 28: Name of the vdev
uint16_t checksum{0}; // 92: Checksum of this entire Block
uint8_t alloc_type; // 94: Allocator type of this vdev
uint8_t chunk_sel_type; // 95: Chunk Selector type of this vdev_id
uint8_t padding[160]{}; // 96: Pad to make it 256 bytes total
uint32_t chunk_size{0}; // 24: chunk size used in vdev.
vdev_size_type_t size_type{}; // 28: Whether its a static or dynamic type.
uint8_t slot_allocated{0}; // 29: Is this current slot allocated
uint8_t failed{0}; // 30: set to true if disk is replaced
uint8_t hs_dev_type{0}; // 31: PDev dev type (as in fast or data)
uint8_t multi_pdev_choice{0}; // 32: Choice when multiple pdevs are present (vdev_multi_pdev_opts_t)
char name[64]; // 33: Name of the vdev
uint16_t checksum{0}; // 97: Checksum of this entire Block
uint8_t alloc_type; // 98: Allocator type of this vdev
uint8_t chunk_sel_type; // 99: Chunk Selector type of this vdev_id
uint8_t padding[155]{}; // 100: Pad to make it 256 bytes total
uint8_t user_private[user_private_size]{}; // 128: User specific information

uint32_t get_vdev_id() const { return vdev_id; }
Expand Down Expand Up @@ -94,11 +96,13 @@ ENUM(chunk_selector_t, uint8_t, // What are the options to select chunk to alloc

struct vdev_parameters {
std::string vdev_name; // Name of the vdev
vdev_size_type_t size_type{}; // Wether size is static or dynamic.
uint64_t vdev_size; // Current Vdev size.
uint32_t num_chunks; // Total number of primary chunks.
uint32_t num_chunks{}; // Total number of primary chunks.
// NOTE: If pdev opts is ALL_PDEV_STRIPED, then num_chunks would round off
// to number of pdevs evenly
uint32_t blk_size; // Block size vdev operates on
uint32_t chunk_size{}; // Chunk size provided for dynamic vdev.
HSDevType dev_type; // Which physical device type this vdev belongs to (FAST or DATA)
blk_allocator_type_t alloc_type; // which allocator type this vdev wants to be with;
chunk_selector_type_t chunk_sel_type; // which chunk selector type this vdev wants to be with;
Expand Down Expand Up @@ -154,22 +158,29 @@ class DeviceManager {
shared< VirtualDev > create_vdev(vdev_parameters&& vdev_param);

const Chunk* get_chunk(uint32_t chunk_id) const {
std::unique_lock lg{m_vdev_mutex};
return (chunk_id == INVALID_CHUNK_ID) ? nullptr : m_chunks[chunk_id].get();
}

Chunk* get_chunk_mutable(uint32_t chunk_id) {
std::unique_lock lg{m_vdev_mutex};
return (chunk_id == INVALID_CHUNK_ID) ? nullptr : m_chunks[chunk_id].get();
}

uint32_t atomic_page_size(HSDevType dtype) const;
uint32_t optimal_page_size(HSDevType dtype) const;
uint32_t align_size(HSDevType dtype) const;

std::vector< PhysicalDev* > get_pdevs_by_dev_type(HSDevType dtype) const;
std::vector< shared< VirtualDev > > get_vdevs() const;

uint64_t total_capacity() const;
uint64_t total_capacity(HSDevType dtype) const;

shared< Chunk > create_chunk(HSDevType dev_type, uint32_t vdev_id, uint64_t chunk_size, const sisl::blob& data);
void remove_chunk(shared< Chunk > chunk);
void remove_chunk_locked(shared< Chunk > chunk);

private:
void load_vdevs();
int device_open_flags(const std::string& devname) const;
Expand All @@ -181,4 +192,52 @@ class DeviceManager {
const std::vector< PhysicalDev* >& pdevs_by_type_internal(HSDevType dtype) const;
}; // class DeviceManager

// Chunk pool is used to get chunks when there is no space
// and its cheaper compared to create a chunk on the fly.
// Creating chunk on the fly causes sync write.
class ChunkPool {
public:
struct Params {
uint64_t pool_capacity;
// Private data used when creating chunks.
std::function< sisl::blob() > init_private_data_cb;
uint8_t hs_dev_type;
uint32_t vdev_id;
uint64_t chunk_size;
};

ChunkPool(DeviceManager& dmgr, Params&& param);
~ChunkPool();

// Start the chunk pool.
void start();

// Add chunk to the pool. If the queue is full,
// chunk removed from the system. Returns if
// if we could reuse chunk by adding back to pool.
bool enqueue(shared< Chunk >& chunk);

// Get a chunk from the pool.
shared< Chunk > dequeue();

// Returns the capacity of the chunk pool.
uint64_t capacity() { return m_params.pool_capacity; }
uint64_t size() { return m_pool.size(); }

private:
// Producer thread.
void producer();

private:
DeviceManager& m_dmgr;
Params m_params;
std::list< shared< Chunk > > m_pool;
uint32_t m_pool_capacity;
std::condition_variable m_pool_cv;
std::mutex m_pool_mutex;
std::thread m_producer_thread;
bool m_run_pool{false};
folly::Promise< folly::Unit > m_pool_halt;
};

} // namespace homestore
Loading

0 comments on commit a7a22c2

Please sign in to comment.