-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PG cleanup for moved out member #242
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,6 +250,79 @@ void HSHomeObject::on_pg_replace_member(homestore::group_id_t group_id, const re | |
boost::uuids::to_string(member_in.id)); | ||
} | ||
|
||
std::optional< pg_id_t > HSHomeObject::get_pg_id_with_group_id(homestore::group_id_t group_id) const { | ||
auto lg = std::shared_lock(_pg_lock); | ||
auto iter = std::find_if(_pg_map.begin(), _pg_map.end(), [group_id](const auto& entry) { | ||
return pg_repl_dev(*entry.second).group_id() == group_id; | ||
}); | ||
if (iter != _pg_map.end()) { | ||
return iter->first; | ||
} else { | ||
return std::nullopt; | ||
} | ||
} | ||
|
||
void HSHomeObject::pg_destroy(pg_id_t pg_id) { | ||
mark_pg_destroyed(pg_id); | ||
destroy_shards(pg_id); | ||
reset_pg_chunks(pg_id); | ||
cleanup_pg_resources(pg_id); | ||
LOGI("pg {} is destroyed", pg_id); | ||
JacksonYao287 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
void HSHomeObject::mark_pg_destroyed(pg_id_t pg_id) { | ||
auto lg = std::scoped_lock(_pg_lock); | ||
auto iter = _pg_map.find(pg_id); | ||
if (iter == _pg_map.end()) { | ||
LOGW("on pg destroy with unknown pg_id {}", pg_id); | ||
return; | ||
} | ||
auto& pg = iter->second; | ||
auto hs_pg = s_cast< HS_PG* >(pg.get()); | ||
hs_pg->pg_sb_->state = PGState::DESTROYED; | ||
hs_pg->pg_sb_.write(); | ||
} | ||
|
||
void HSHomeObject::reset_pg_chunks(pg_id_t pg_id) { | ||
bool res = chunk_selector_->reset_pg_chunks(pg_id); | ||
RELEASE_ASSERT(res, "Failed to reset all chunks in pg {}", pg_id); | ||
auto fut = homestore::hs()->cp_mgr().trigger_cp_flush(true /* force */); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why triggering a cp flush only after reset chunks, should we trigger flush when all the cleanup is done? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. other cleanups are metaService only, which do not rely on cp. I think the intent is to do the cp as early as possible but I am fine to move the cp to anywhere before delete the pg superblk There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because only reset chunks rely on cp, other operation are metaService destroy only. Above all, I thought do cp after reset chunks can be good. |
||
auto on_complete = [&](auto success) { | ||
RELEASE_ASSERT(success, "Failed to trigger CP flush"); | ||
LOGI("CP Flush completed"); | ||
}; | ||
on_complete(std::move(fut).get()); | ||
} | ||
|
||
void HSHomeObject::cleanup_pg_resources(pg_id_t pg_id) { | ||
auto lg = std::scoped_lock(_pg_lock); | ||
auto iter = _pg_map.find(pg_id); | ||
if (iter == _pg_map.end()) { | ||
LOGW("on pg resource release with unknown pg_id {}", pg_id); | ||
return; | ||
} | ||
|
||
// destroy index table | ||
auto& pg = iter->second; | ||
auto hs_pg = s_cast< HS_PG* >(pg.get()); | ||
if (nullptr != hs_pg->index_table_) { | ||
auto uuid_str = boost::uuids::to_string(hs_pg->index_table_->uuid()); | ||
index_table_pg_map_.erase(uuid_str); | ||
hs()->index_service().remove_index_table(hs_pg->index_table_); | ||
hs_pg->index_table_->destroy(); | ||
} | ||
|
||
// destroy pg super blk | ||
hs_pg->pg_sb_.destroy(); | ||
xiaoxichen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// return pg chunks to dev heap | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you also explains if a crash happened here,what would be the process of recovering this chunks to heap? Is that because we dont have pg superblock so these chunks are available by default during recovery? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are right.If a crash happened here, then we do recovery, as we don't have the pg superblk, so these chunks will belongs to dev heap by default. |
||
// which must be done after destroying pg super blk to avoid multiple pg use same chunks | ||
bool res = chunk_selector_->return_pg_chunks_to_dev_heap(pg_id); | ||
RELEASE_ASSERT(res, "Failed to return pg {} chunks to dev_heap", pg_id); | ||
|
||
// erase pg in pg map | ||
_pg_map.erase(iter); | ||
} | ||
|
||
void HSHomeObject::add_pg_to_map(unique< HS_PG > hs_pg) { | ||
RELEASE_ASSERT(hs_pg->pg_info_.replica_set_uuid == hs_pg->repl_dev_->group_id(), | ||
"PGInfo replica set uuid mismatch with ReplDev instance for {}", | ||
|
@@ -317,9 +390,14 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c | |
// add entry in map, so that index recovery can update the PG. | ||
std::scoped_lock lg(index_lock_); | ||
auto it = index_table_pg_map_.find(uuid_str); | ||
RELEASE_ASSERT(it != index_table_pg_map_.end(), "IndexTable should be recovered before PG"); | ||
hs_pg->index_table_ = it->second.index_table; | ||
it->second.pg_id = pg_id; | ||
if (it != index_table_pg_map_.end()) { | ||
hs_pg->index_table_ = it->second.index_table; | ||
it->second.pg_id = pg_id; | ||
} else { | ||
RELEASE_ASSERT(hs_pg->pg_sb_->state == PGState::DESTROYED, "IndexTable should be recovered before PG"); | ||
hs_pg->index_table_ = nullptr; | ||
LOGI("Index table not found for destroyed pg_id={}, index_table_uuid={}", pg_id, uuid_str); | ||
} | ||
|
||
add_pg_to_map(std::move(hs_pg)); | ||
} | ||
|
@@ -349,6 +427,7 @@ HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, share | |
pg_sb_.create(sizeof(pg_info_superblk) - sizeof(char) + pg_info_.members.size() * sizeof(pg_members) + | ||
num_chunks * sizeof(homestore::chunk_num_t)); | ||
pg_sb_->id = pg_info_.id; | ||
pg_sb_->state = PGState::ALIVE; | ||
pg_sb_->num_members = pg_info_.members.size(); | ||
pg_sb_->num_chunks = num_chunks; | ||
pg_sb_->pg_size = pg_info_.size; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ ShardError toShardError(ReplServiceError const& e) { | |
} | ||
} | ||
|
||
|
||
uint64_t ShardManager::max_shard_size() { return Gi; } | ||
|
||
uint64_t ShardManager::max_shard_num_in_pg() { return ((uint64_t)0x01) << shard_width; } | ||
|
@@ -526,6 +527,25 @@ bool HSHomeObject::release_chunk_based_on_create_shard_message(sisl::blob const& | |
} | ||
} | ||
|
||
void HSHomeObject::destroy_shards(pg_id_t pg_id) { | ||
auto lg = std::scoped_lock(_pg_lock, _shard_lock); | ||
auto iter = _pg_map.find(pg_id); | ||
if (iter == _pg_map.end()) { | ||
LOGW("on shards destroy with unknown pg_id {}", pg_id); | ||
return; | ||
} | ||
|
||
auto& pg = iter->second; | ||
for (auto& shard : pg->shards_) { | ||
// release open shard v_chunk | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: remove this comment, we do not care about v_chunk here |
||
auto hs_shard = s_cast< HS_Shard* >(shard.get()); | ||
// destroy shard super blk | ||
hs_shard->sb_.destroy(); | ||
xiaoxichen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// erase shard in shard map | ||
_shard_map.erase(shard->info.id); | ||
} | ||
} | ||
|
||
HSHomeObject::HS_Shard::HS_Shard(ShardInfo shard_info, homestore::chunk_num_t p_chunk_id, | ||
homestore::chunk_num_t v_chunk_id) : | ||
Shard(std::move(shard_info)), sb_(_shard_meta_name) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are we fine to open this chunk for re-use by other PG at this point??
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe fine, another PG can pick these chunks but commit has to wait as commit is sequential
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a problem, at this point, maybe another create pg request is coming in and will reuse some chunks which belong to the destroyed pg. If a crash happened unfortunately, then recover HomeObject, there will be a situation that two pgs use some same chunks.
fixed it by resetting the chunks before destroying the pg super block, and only returning the chunks to the heap once the pg super block has been destroyed.