From ca6db1649bae5bd6da7bd686e91ab7b5283bb2cb Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Mon, 6 May 2024 07:54:22 -0700 Subject: [PATCH] handle missing pdev --- src/include/homestore/homestore_decl.hpp | 6 ++- src/lib/device/device.h | 8 +++- src/lib/device/virtual_dev.cpp | 54 +++++++++++++++++++++++- src/lib/device/virtual_dev.hpp | 1 + 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/src/include/homestore/homestore_decl.hpp b/src/include/homestore/homestore_decl.hpp index 51c043f9d..59d51f300 100644 --- a/src/include/homestore/homestore_decl.hpp +++ b/src/include/homestore/homestore_decl.hpp @@ -83,6 +83,7 @@ static constexpr uint32_t INVALID_PDEV_ID{std::numeric_limits< uint32_t >::max() static constexpr uint32_t INVALID_VDEV_ID{std::numeric_limits< uint32_t >::max()}; static constexpr uint32_t INVALID_CHUNK_ID{std::numeric_limits< uint32_t >::max()}; static constexpr uint32_t INVALID_DEV_ID{std::numeric_limits< uint32_t >::max()}; +static constexpr uint64_t INVALID_DEV_OFFSET{std::numeric_limits< uint64_t >::max()}; constexpr uint16_t MAX_UUID_LEN{128}; static constexpr hs_uuid_t INVALID_SYSTEM_UUID{0}; @@ -177,7 +178,10 @@ struct hs_input_params { nlohmann::json to_json() const; std::string to_string() const { return to_json().dump(4); } uint64_t io_mem_size() const { return (hugepage_size != 0) ? hugepage_size : app_mem_size; } - bool has_fast_dev() const { return std::any_of(devices.begin(), devices.end(), [](const dev_info& d) { return d.dev_type == HSDevType::Fast; }); } + bool has_fast_dev() const { + return std::any_of(devices.begin(), devices.end(), + [](const dev_info& d) { return d.dev_type == HSDevType::Fast; }); + } }; struct hs_engine_config { diff --git a/src/lib/device/device.h b/src/lib/device/device.h index a69e740dd..0dc483670 100644 --- a/src/lib/device/device.h +++ b/src/lib/device/device.h @@ -159,12 +159,16 @@ class DeviceManager { const Chunk* get_chunk(uint32_t chunk_id) const { std::unique_lock lg{m_vdev_mutex}; - return (chunk_id == INVALID_CHUNK_ID) ? nullptr : m_chunks[chunk_id].get(); + // if a pdev is misssing when restart, chunk_id from cleint might be larger than m_chunks.size() + if (chunk_id == INVALID_CHUNK_ID || chunk_id >= m_chunks.size()) return nullptr; + return m_chunks[chunk_id].get(); } Chunk* get_chunk_mutable(uint32_t chunk_id) { std::unique_lock lg{m_vdev_mutex}; - return (chunk_id == INVALID_CHUNK_ID) ? nullptr : m_chunks[chunk_id].get(); + // if a pdev is misssing when restart, chunk_id from client might be larger than m_chunks.size() + if (chunk_id == INVALID_CHUNK_ID || chunk_id >= m_chunks.size()) return nullptr; + return m_chunks[chunk_id].get(); } uint32_t atomic_page_size(HSDevType dtype) const; diff --git a/src/lib/device/virtual_dev.cpp b/src/lib/device/virtual_dev.cpp index 7153d31b6..61cf4487a 100644 --- a/src/lib/device/virtual_dev.cpp +++ b/src/lib/device/virtual_dev.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include "device/chunk.h" #include "device/physical_dev.hpp" @@ -309,6 +310,8 @@ uint64_t VirtualDev::get_len(const iovec* iov, int iovcnt) { return len; } +// for all writes functions, we don't expect to get invalid dev_offset, since we will never allocate blkid from missing +// chunk(missing pdev); ////////////////////////// async write section ////////////////////////////////// folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32_t size, BlkId const& bid, bool part_of_batch) { @@ -316,6 +319,10 @@ folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32 Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + // TODO: define a new error code for missing pdev case; + return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again)); + } auto* pdev = chunk->physical_dev_mutable(); HS_LOG(TRACE, device, "Writing in device: {}, offset = {}", pdev->pdev_id(), dev_offset); @@ -328,6 +335,9 @@ folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32 folly::Future< std::error_code > VirtualDev::async_write(const char* buf, uint32_t size, cshared< Chunk >& chunk, uint64_t offset_in_chunk) { + if (sisl_unlikely(!is_chunk_loaded(chunk))) { + return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again)); + } auto const dev_offset = chunk->start_offset() + offset_in_chunk; auto* pdev = chunk->physical_dev_mutable(); @@ -345,6 +355,9 @@ folly::Future< std::error_code > VirtualDev::async_writev(const iovec* iov, cons Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again)); + } auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); @@ -358,6 +371,9 @@ folly::Future< std::error_code > VirtualDev::async_writev(const iovec* iov, cons folly::Future< std::error_code > VirtualDev::async_writev(const iovec* iov, const int iovcnt, cshared< Chunk >& chunk, uint64_t offset_in_chunk) { + if (sisl_unlikely(!is_chunk_loaded(chunk))) { + return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again)); + } auto const dev_offset = chunk->start_offset() + offset_in_chunk; auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); @@ -376,11 +392,17 @@ std::error_code VirtualDev::sync_write(const char* buf, uint32_t size, BlkId con Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } return chunk->physical_dev_mutable()->sync_write(buf, size, dev_offset); } std::error_code VirtualDev::sync_write(const char* buf, uint32_t size, cshared< Chunk >& chunk, uint64_t offset_in_chunk) { + if (sisl_unlikely(!is_chunk_loaded(chunk))) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } return chunk->physical_dev_mutable()->sync_write(buf, size, chunk->start_offset() + offset_in_chunk); } @@ -389,6 +411,9 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, BlkId cons Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); @@ -402,6 +427,9 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, BlkId cons std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, cshared< Chunk >& chunk, uint64_t offset_in_chunk) { + if (sisl_unlikely(!is_chunk_loaded(chunk))) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } uint64_t const dev_offset = chunk->start_offset() + offset_in_chunk; auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); @@ -414,6 +442,8 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, cshared< C return pdev->sync_writev(iov, iovcnt, size, dev_offset); } +// for read, chunk might be missing in case of pdev is gone(for example , breakfix), so we need to check if chunk is +// loaded before proceeding with read; ////////////////////////////////// async read section /////////////////////////////////////////////// folly::Future< std::error_code > VirtualDev::async_read(char* buf, uint64_t size, BlkId const& bid, bool part_of_batch) { @@ -421,6 +451,9 @@ folly::Future< std::error_code > VirtualDev::async_read(char* buf, uint64_t size Chunk* pchunk; uint64_t const dev_offset = to_dev_offset(bid, &pchunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again)); + } return pchunk->physical_dev_mutable()->async_read(buf, size, dev_offset, part_of_batch); } @@ -430,6 +463,9 @@ folly::Future< std::error_code > VirtualDev::async_readv(iovec* iovs, int iovcnt Chunk* pchunk; uint64_t const dev_offset = to_dev_offset(bid, &pchunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::resource_unavailable_try_again)); + } return pchunk->physical_dev_mutable()->async_readv(iovs, iovcnt, size, dev_offset, part_of_batch); } @@ -439,10 +475,16 @@ std::error_code VirtualDev::sync_read(char* buf, uint32_t size, BlkId const& bid Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } return chunk->physical_dev_mutable()->sync_read(buf, size, dev_offset); } std::error_code VirtualDev::sync_read(char* buf, uint32_t size, cshared< Chunk >& chunk, uint64_t offset_in_chunk) { + if (sisl_unlikely(!is_chunk_loaded(chunk))) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } return chunk->physical_dev_mutable()->sync_read(buf, size, chunk->start_offset() + offset_in_chunk); } @@ -451,6 +493,9 @@ std::error_code VirtualDev::sync_readv(iovec* iov, int iovcnt, BlkId const& bid) Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); @@ -463,6 +508,9 @@ std::error_code VirtualDev::sync_readv(iovec* iov, int iovcnt, BlkId const& bid) } std::error_code VirtualDev::sync_readv(iovec* iov, int iovcnt, cshared< Chunk >& chunk, uint64_t offset_in_chunk) { + if (sisl_unlikely(!is_chunk_loaded(chunk))) { + return std::make_error_code(std::errc::resource_unavailable_try_again); + } uint64_t const dev_offset = chunk->start_offset() + offset_in_chunk; auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); @@ -600,8 +648,12 @@ int VirtualDev::cp_progress_percent() { return 100; } ///////////////////////// VirtualDev Private Methods ///////////////////////////// uint64_t VirtualDev::to_dev_offset(BlkId const& b, Chunk** chunk) const { *chunk = m_dmgr.get_chunk_mutable(b.chunk_num()); - RELEASE_ASSERT(*chunk, "Chunk got null {}", b.chunk_num()); + if (!(*chunk)) return INVALID_DEV_OFFSET; return uint64_cast(b.blk_num()) * block_size() + uint64_cast((*chunk)->start_offset()); } +bool VirtualDev::is_chunk_loaded(cshared< Chunk >& chunk) const { + return m_dmgr.get_chunk(chunk->chunk_id()) != nullptr; +} + } // namespace homestore diff --git a/src/lib/device/virtual_dev.hpp b/src/lib/device/virtual_dev.hpp index 6cc629cfc..88dddeba0 100644 --- a/src/lib/device/virtual_dev.hpp +++ b/src/lib/device/virtual_dev.hpp @@ -299,6 +299,7 @@ class VirtualDev { private: uint64_t to_dev_offset(BlkId const& b, Chunk** chunk) const; + bool is_chunk_loaded(cshared< Chunk >& chunk) const; BlkAllocStatus alloc_blks_from_chunk(blk_count_t nblks, blk_alloc_hints const& hints, MultiBlkId& out_blkid, Chunk* chunk); };