Skip to content

Commit

Permalink
handle missing pdev when reading (#405)
Browse files Browse the repository at this point in the history
in some scenarios, homestore might be restarted with missing pdev(for example, breakfix), thus all the chunks in that pdev will not be loaded. so we need to deal with this case.

1 for read from client, we need to check whether the chunk in the provided blkid is loaded and proceed accordingly.
2 for write , we will almost not encounter this case since we will never allocate blk for a missing pdev. but for the sake of completeness, check is also added .
  • Loading branch information
JacksonYao287 authored May 8, 2024
1 parent 12390e7 commit 97cba11
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 5 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "6.4.3"
version = "6.4.4"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
6 changes: 5 additions & 1 deletion src/include/homestore/homestore_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ static constexpr uint32_t INVALID_PDEV_ID{std::numeric_limits< uint32_t >::max()
static constexpr uint32_t INVALID_VDEV_ID{std::numeric_limits< uint32_t >::max()};
static constexpr uint32_t INVALID_CHUNK_ID{std::numeric_limits< uint32_t >::max()};
static constexpr uint32_t INVALID_DEV_ID{std::numeric_limits< uint32_t >::max()};
static constexpr uint64_t INVALID_DEV_OFFSET{std::numeric_limits< uint64_t >::max()};
constexpr uint16_t MAX_UUID_LEN{128};
static constexpr hs_uuid_t INVALID_SYSTEM_UUID{0};

Expand Down Expand Up @@ -177,7 +178,10 @@ struct hs_input_params {
nlohmann::json to_json() const;
std::string to_string() const { return to_json().dump(4); }
uint64_t io_mem_size() const { return (hugepage_size != 0) ? hugepage_size : app_mem_size; }
bool has_fast_dev() const { return std::any_of(devices.begin(), devices.end(), [](const dev_info& d) { return d.dev_type == HSDevType::Fast; }); }
bool has_fast_dev() const {
return std::any_of(devices.begin(), devices.end(),
[](const dev_info& d) { return d.dev_type == HSDevType::Fast; });
}
};

struct hs_engine_config {
Expand Down
8 changes: 6 additions & 2 deletions src/lib/device/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,16 @@ class DeviceManager {

const Chunk* get_chunk(uint32_t chunk_id) const {
std::unique_lock lg{m_vdev_mutex};
return (chunk_id == INVALID_CHUNK_ID) ? nullptr : m_chunks[chunk_id].get();
// if a pdev is misssing when restart, chunk_id from client might be larger than m_chunks.size()
if (!m_chunks.index_exists(chunk_id)) return nullptr;
return m_chunks[chunk_id].get();
}

Chunk* get_chunk_mutable(uint32_t chunk_id) {
std::unique_lock lg{m_vdev_mutex};
return (chunk_id == INVALID_CHUNK_ID) ? nullptr : m_chunks[chunk_id].get();
// if a pdev is misssing when restart, chunk_id from client might be larger than m_chunks.size()
if (!m_chunks.index_exists(chunk_id)) return nullptr;
return m_chunks[chunk_id].get();
}

uint32_t atomic_page_size(HSDevType dtype) const;
Expand Down
54 changes: 53 additions & 1 deletion src/lib/device/virtual_dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <sisl/logging/logging.h>
#include <sisl/utility/atomic_counter.hpp>
#include <iomgr/iomgr_flip.hpp>
#include <homestore/homestore_decl.hpp>

#include "device/chunk.h"
#include "device/physical_dev.hpp"
Expand Down Expand Up @@ -309,13 +310,19 @@ uint64_t VirtualDev::get_len(const iovec* iov, int iovcnt) {
return len;
}

// for all writes functions, we don't expect to get invalid dev_offset, since we will never allocate blkid from missing
// chunk(missing pdev);
////////////////////////// async write section //////////////////////////////////
folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32_t size, BlkId const& bid,
bool part_of_batch) {
HS_DBG_ASSERT_EQ(bid.is_multi(), false, "async_write needs individual pieces of blkid - not MultiBlkid");

Chunk* chunk;
uint64_t const dev_offset = to_dev_offset(bid, &chunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
// TODO: define a new error code for missing pdev case;
return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again));
}
auto* pdev = chunk->physical_dev_mutable();

HS_LOG(TRACE, device, "Writing in device: {}, offset = {}", pdev->pdev_id(), dev_offset);
Expand All @@ -328,6 +335,9 @@ folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32

folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32_t size, cshared< Chunk >& chunk,
uint64_t offset_in_chunk) {
if (sisl_unlikely(!is_chunk_available(chunk))) {
return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again));
}
auto const dev_offset = chunk->start_offset() + offset_in_chunk;
auto* pdev = chunk->physical_dev_mutable();

Expand All @@ -345,6 +355,9 @@ folly::Future< std::error_code > VirtualDev::async_writev(const iovec* iov, cons

Chunk* chunk;
uint64_t const dev_offset = to_dev_offset(bid, &chunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again));
}
auto const size = get_len(iov, iovcnt);
auto* pdev = chunk->physical_dev_mutable();

Expand All @@ -358,6 +371,9 @@ folly::Future< std::error_code > VirtualDev::async_writev(const iovec* iov, cons

folly::Future< std::error_code > VirtualDev::async_writev(const iovec* iov, const int iovcnt, cshared< Chunk >& chunk,
uint64_t offset_in_chunk) {
if (sisl_unlikely(!is_chunk_available(chunk))) {
return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again));
}
auto const dev_offset = chunk->start_offset() + offset_in_chunk;
auto const size = get_len(iov, iovcnt);
auto* pdev = chunk->physical_dev_mutable();
Expand All @@ -376,11 +392,17 @@ std::error_code VirtualDev::sync_write(const char* buf, uint32_t size, BlkId con

Chunk* chunk;
uint64_t const dev_offset = to_dev_offset(bid, &chunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
return chunk->physical_dev_mutable()->sync_write(buf, size, dev_offset);
}

std::error_code VirtualDev::sync_write(const char* buf, uint32_t size, cshared< Chunk >& chunk,
uint64_t offset_in_chunk) {
if (sisl_unlikely(!is_chunk_available(chunk))) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
return chunk->physical_dev_mutable()->sync_write(buf, size, chunk->start_offset() + offset_in_chunk);
}

Expand All @@ -389,6 +411,9 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, BlkId cons

Chunk* chunk;
uint64_t const dev_offset = to_dev_offset(bid, &chunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
auto const size = get_len(iov, iovcnt);
auto* pdev = chunk->physical_dev_mutable();

Expand All @@ -402,6 +427,9 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, BlkId cons

std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, cshared< Chunk >& chunk,
uint64_t offset_in_chunk) {
if (sisl_unlikely(!is_chunk_available(chunk))) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
uint64_t const dev_offset = chunk->start_offset() + offset_in_chunk;
auto const size = get_len(iov, iovcnt);
auto* pdev = chunk->physical_dev_mutable();
Expand All @@ -414,13 +442,18 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, cshared< C
return pdev->sync_writev(iov, iovcnt, size, dev_offset);
}

// for read, chunk might be missing in case of pdev is gone(for example , breakfix), so we need to check if chunk is
// loaded before proceeding with read;
////////////////////////////////// async read section ///////////////////////////////////////////////
folly::Future< std::error_code > VirtualDev::async_read(char* buf, uint64_t size, BlkId const& bid,
bool part_of_batch) {
HS_DBG_ASSERT_EQ(bid.is_multi(), false, "async_read needs individual pieces of blkid - not MultiBlkid");

Chunk* pchunk;
uint64_t const dev_offset = to_dev_offset(bid, &pchunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again));
}
return pchunk->physical_dev_mutable()->async_read(buf, size, dev_offset, part_of_batch);
}

Expand All @@ -430,6 +463,9 @@ folly::Future< std::error_code > VirtualDev::async_readv(iovec* iovs, int iovcnt

Chunk* pchunk;
uint64_t const dev_offset = to_dev_offset(bid, &pchunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again));
}
return pchunk->physical_dev_mutable()->async_readv(iovs, iovcnt, size, dev_offset, part_of_batch);
}

Expand All @@ -439,10 +475,16 @@ std::error_code VirtualDev::sync_read(char* buf, uint32_t size, BlkId const& bid

Chunk* chunk;
uint64_t const dev_offset = to_dev_offset(bid, &chunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
return chunk->physical_dev_mutable()->sync_read(buf, size, dev_offset);
}

std::error_code VirtualDev::sync_read(char* buf, uint32_t size, cshared< Chunk >& chunk, uint64_t offset_in_chunk) {
if (sisl_unlikely(!is_chunk_available(chunk))) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
return chunk->physical_dev_mutable()->sync_read(buf, size, chunk->start_offset() + offset_in_chunk);
}

Expand All @@ -451,6 +493,9 @@ std::error_code VirtualDev::sync_readv(iovec* iov, int iovcnt, BlkId const& bid)

Chunk* chunk;
uint64_t const dev_offset = to_dev_offset(bid, &chunk);
if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
auto const size = get_len(iov, iovcnt);
auto* pdev = chunk->physical_dev_mutable();

Expand All @@ -463,6 +508,9 @@ std::error_code VirtualDev::sync_readv(iovec* iov, int iovcnt, BlkId const& bid)
}

std::error_code VirtualDev::sync_readv(iovec* iov, int iovcnt, cshared< Chunk >& chunk, uint64_t offset_in_chunk) {
if (sisl_unlikely(!is_chunk_available(chunk))) {
return std::make_error_code(std::errc::resource_unavailable_try_again);
}
uint64_t const dev_offset = chunk->start_offset() + offset_in_chunk;
auto const size = get_len(iov, iovcnt);
auto* pdev = chunk->physical_dev_mutable();
Expand Down Expand Up @@ -600,8 +648,12 @@ int VirtualDev::cp_progress_percent() { return 100; }
///////////////////////// VirtualDev Private Methods /////////////////////////////
uint64_t VirtualDev::to_dev_offset(BlkId const& b, Chunk** chunk) const {
*chunk = m_dmgr.get_chunk_mutable(b.chunk_num());
RELEASE_ASSERT(*chunk, "Chunk got null {}", b.chunk_num());
if (!(*chunk)) return INVALID_DEV_OFFSET;
return uint64_cast(b.blk_num()) * block_size() + uint64_cast((*chunk)->start_offset());
}

bool VirtualDev::is_chunk_available(cshared< Chunk >& chunk) const {
return m_dmgr.get_chunk(chunk->chunk_id()) != nullptr;
}

} // namespace homestore
1 change: 1 addition & 0 deletions src/lib/device/virtual_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ class VirtualDev {

private:
uint64_t to_dev_offset(BlkId const& b, Chunk** chunk) const;
bool is_chunk_available(cshared< Chunk >& chunk) const;
BlkAllocStatus alloc_blks_from_chunk(blk_count_t nblks, blk_alloc_hints const& hints, MultiBlkId& out_blkid,
Chunk* chunk);
};
Expand Down

0 comments on commit 97cba11

Please sign in to comment.