Skip to content

Commit

Permalink
fix HS metablk allocate duplicated blkId after restart (#238)
Browse files Browse the repository at this point in the history
* fix HS metablk allocate duplicated blkId after restart
  • Loading branch information
zichanglai authored Dec 2, 2023
1 parent 0f460de commit 0a9df25
Show file tree
Hide file tree
Showing 12 changed files with 73 additions and 6 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "4.9.0"
version = "4.9.1"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
2 changes: 2 additions & 0 deletions src/lib/blkalloc/append_blk_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ BlkAllocStatus AppendBlkAllocator::alloc(blk_count_t nblks, const blk_alloc_hint

BlkAllocStatus AppendBlkAllocator::alloc_on_disk(BlkId const&) { return BlkAllocStatus::SUCCESS; }

BlkAllocStatus AppendBlkAllocator::mark_blk_allocated(BlkId const&) { return BlkAllocStatus::SUCCESS; }

void AppendBlkAllocator::free_on_disk(BlkId const&) {}

bool AppendBlkAllocator::is_blk_alloced_on_disk(BlkId const&, bool) const { return false; }
Expand Down
3 changes: 2 additions & 1 deletion src/lib/blkalloc/append_blk_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ class AppendBlkAllocator : public BlkAllocator {
BlkAllocStatus alloc_contiguous(BlkId& bid) override;
BlkAllocStatus alloc(blk_count_t nblks, blk_alloc_hints const& hints, BlkId& out_blkid) override;
void free(BlkId const& b) override;

BlkAllocStatus alloc_on_disk(BlkId const& in_bid) override;
BlkAllocStatus mark_blk_allocated(BlkId const& b) override;

void free_on_disk(BlkId const& b) override;
bool is_blk_alloced_on_disk(BlkId const& b, bool use_lock = false) const override;

Expand Down
5 changes: 4 additions & 1 deletion src/lib/blkalloc/bitmap_blk_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ bool BitmapBlkAllocator::is_blk_alloced_on_disk(const BlkId& b, bool use_lock) c
}

BlkAllocStatus BitmapBlkAllocator::alloc_on_disk(BlkId const& bid) {
if (!is_persistent()) { return BlkAllocStatus::FAILED; }
if (!is_persistent()) {
//for non-persistent bitmap nothing is needed to do. So always return success
return BlkAllocStatus::SUCCESS;
}

rcu_read_lock();
auto list = get_alloc_blk_list();
Expand Down
2 changes: 1 addition & 1 deletion src/lib/blkalloc/bitmap_blk_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,4 @@ class BitmapBlkAllocator : public BlkAllocator {
void* m_meta_blk_cookie{nullptr};
std::atomic< int64_t > m_alloced_blk_count{0};
};
} // namespace homestore
} // namespace homestore
1 change: 1 addition & 0 deletions src/lib/blkalloc/blk_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ class BlkAllocator {
virtual BlkAllocStatus alloc_contiguous(BlkId& bid) = 0;
virtual BlkAllocStatus alloc(blk_count_t nblks, blk_alloc_hints const& hints, BlkId& out_blkid) = 0;
virtual BlkAllocStatus alloc_on_disk(BlkId const& bid) = 0;
virtual BlkAllocStatus mark_blk_allocated(BlkId const& bid) = 0;

virtual void free(BlkId const& id) = 0;
virtual void free_on_disk(BlkId const& bid) = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/lib/blkalloc/fixed_blk_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ BlkAllocStatus FixedBlkAllocator::alloc_contiguous(BlkId& out_blkid) {
return m_blk_q.read(out_blkid) ? BlkAllocStatus::SUCCESS : BlkAllocStatus::SPACE_FULL;
}

BlkAllocStatus FixedBlkAllocator::mark_blk_allocated(BlkId const& b) { return BlkAllocStatus::SUCCESS;}

void FixedBlkAllocator::free(BlkId const& b) {
HS_DBG_ASSERT_EQ(b.blk_count(), 1, "Multiple blk free for FixedBlkAllocator? allocated by different allocator?");

Expand Down
3 changes: 2 additions & 1 deletion src/lib/blkalloc/fixed_blk_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class FixedBlkAllocator : public BitmapBlkAllocator {

BlkAllocStatus alloc_contiguous(BlkId& bid) override;
BlkAllocStatus alloc(blk_count_t nblks, blk_alloc_hints const& hints, BlkId& out_blkid) override;
BlkAllocStatus mark_blk_allocated(BlkId const& b) override;
void free(BlkId const& b) override;

blk_num_t available_blks() const override;
Expand All @@ -49,4 +50,4 @@ class FixedBlkAllocator : public BitmapBlkAllocator {
private:
folly::MPMCQueue< BlkId > m_blk_q;
};
} // namespace homestore
} // namespace homestore
20 changes: 20 additions & 0 deletions src/lib/blkalloc/varsize_blk_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,26 @@ blk_count_t VarsizeBlkAllocator::alloc_blks_direct(blk_count_t nblks, blk_alloc_
return (nblks - nblks_remain);
}

//since this function will only be called during HS recovery, we can safe to update the cache bitmap directly without
//touching the slab caches.
BlkAllocStatus VarsizeBlkAllocator::mark_blk_allocated(BlkId const& bid) {
BlkAllocPortion& portion = blknum_to_portion(bid.blk_num());
{
auto lock{portion.portion_auto_lock()};
#ifndef NDEBUG
auto const start_blk_id = portion.get_portion_num() * get_blks_per_portion();
auto const end_blk_id = start_blk_id + get_blks_per_portion() - 1;
HS_DBG_ASSERT_LE(start_blk_id, bid.blk_num(), "Expected start bit to be greater than portion start bit");
HS_DBG_ASSERT_GE(end_blk_id, (bid.blk_num() + bid.blk_count() - 1),
"Expected end bit to be smaller than portion end bit");
#endif
m_cache_bm->set_bits(bid.blk_num(), bid.blk_count());
}
BLKALLOC_LOG(TRACE, "mark blk alloced directly to portion={} blkid={} set_bits_count={}",
blknum_to_portion_num(bid.blk_num()), bid.to_string(), get_alloced_blk_count());
return BlkAllocStatus::SUCCESS;
}

void VarsizeBlkAllocator::free(BlkId const& bid) {
blk_count_t n_freed = (m_cfg.m_use_slabs && (bid.blk_count() <= m_cfg.highest_slab_blks_count()))
? free_blks_slab(r_cast< MultiBlkId const& >(bid))
Expand Down
1 change: 1 addition & 0 deletions src/lib/blkalloc/varsize_blk_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ class VarsizeBlkAllocator : public BitmapBlkAllocator {
BlkAllocStatus alloc_contiguous(blk_count_t nblks, blk_alloc_hints const& hints, BlkId& out_blkid);
BlkAllocStatus alloc(blk_count_t nblks, blk_alloc_hints const& hints, BlkId& out_blkid) override;
BlkAllocStatus alloc(blk_count_t nblks, blk_alloc_hints const& hints, std::vector< BlkId >& out_blkids);
BlkAllocStatus mark_blk_allocated(BlkId const& b) override;
void free(BlkId const& blk_id) override;

blk_num_t available_blks() const override;
Expand Down
6 changes: 6 additions & 0 deletions src/lib/device/virtual_dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@ bool VirtualDev::is_blk_alloced(BlkId const& blkid) const {
BlkAllocStatus VirtualDev::commit_blk(BlkId const& blkid) {
Chunk* chunk = m_dmgr.get_chunk_mutable(blkid.chunk_num());
HS_LOG(DEBUG, device, "commit_blk: bid {}", blkid.to_string());
auto const recovering = homestore::hs()->is_initializing();
if (!recovering) {
HS_DBG_ASSERT(is_blk_alloced(blkid), "commiting blkid {} is not allocated in non-recovery mode", blkid.to_string());
} else {
chunk->blk_allocator_mutable()->mark_blk_allocated(blkid);
}
return chunk->blk_allocator_mutable()->alloc_on_disk(blkid);
}

Expand Down
32 changes: 31 additions & 1 deletion src/tests/test_meta_blk_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ class VMetaBlkMgrTest : public ::testing::Test {
return true;
}

void restart_homestore() {
test_common::HSTestHelper::start_homestore("test_meta_blk_mgr", {{HS_SERVICE::META, {.size_pct = 85.0}}},
nullptr /* before_svc_start_cb */, true /* restart */);
}

uint64_t io_cnt() const { return m_update_cnt + m_wrt_cnt + m_rm_cnt; }

void gen_rand_buf(uint8_t* s, const uint32_t len) {
Expand Down Expand Up @@ -776,6 +781,31 @@ TEST_F(VMetaBlkMgrTest, random_dependency_test) {
this->shutdown();
}

TEST_F(VMetaBlkMgrTest, recovery_test) {
mtype = "Test_MetaService_recovery";
reset_counters();
m_start_time = Clock::now();
this->register_client();

// since we are using overflow metablk with 64K metadata, which will cause consume anther 2 metablks
auto max_write_times = m_mbm->available_blks() * m_mbm->block_size() / (64 * Ki + 8 * Ki);
// write 1/2 of the available blks;
for (uint64_t i = 0; i < max_write_times / 2; i++) {
EXPECT_GT(this->do_sb_write(true, uint64_cast(64 * Ki)), uint64_cast(0));
}

// restart homestore
this->restart_homestore();
// write another 1/2 of the available blks to make sure we can write after recovery
// during the write, HS metablk service will check the allocated metablk is unique
reset_counters();
this->register_client();
for (uint64_t i = 0; i < (max_write_times / 2); i++) {
EXPECT_GT(this->do_sb_write(true, uint64_cast(64 * Ki)), uint64_cast(0));
}
this->shutdown();
}

// 1. randome write, update, remove;
// 2. recovery test and verify callback context data matches;
TEST_F(VMetaBlkMgrTest, random_load_test) {
Expand Down Expand Up @@ -893,7 +923,7 @@ SISL_OPTION_GROUP(
(bitmap, "", "bitmap", "bitmap test", ::cxxopts::value< bool >()->default_value("false"), "true or false"));

int main(int argc, char* argv[]) {
::testing::GTEST_FLAG(filter) = "*random*";
::testing::GTEST_FLAG(filter) = "*random*:VMetaBlkMgrTest.recovery_test";
::testing::InitGoogleTest(&argc, argv);
SISL_OPTIONS_LOAD(argc, argv, logging, test_meta_blk_mgr, iomgr, test_common_setup);
sisl::logging::SetLogger("test_meta_blk_mgr");
Expand Down

0 comments on commit 0a9df25

Please sign in to comment.