Skip to content

Commit

Permalink
Reset PG after failures
Browse files Browse the repository at this point in the history
  • Loading branch information
yuwmao committed Dec 25, 2024
1 parent a64147b commit ab2184a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/lib/homestore_backend/hs_homeobject.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ class HSHomeObject : public HomeObjectImpl {

std::shared_ptr< BlobIndexTable > recover_index_table(homestore::superblk< homestore::index_table_sb >&& sb);
std::optional< pg_id_t > get_pg_id_with_group_id(homestore::group_id_t group_id) const;
bool is_pg_present(pg_id_t pg_id);

private:
std::shared_ptr< BlobIndexTable > create_index_table();
Expand Down
6 changes: 6 additions & 0 deletions src/lib/homestore_backend/hs_pg_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,12 @@ std::optional< pg_id_t > HSHomeObject::get_pg_id_with_group_id(homestore::group_
}
}

bool HSHomeObject::is_pg_present(pg_id_t pg_id) {
auto lg = std::scoped_lock(_pg_lock);
auto iter = _pg_map.find(pg_id);
return iter != _pg_map.end();
}

void HSHomeObject::pg_destroy(pg_id_t pg_id) {
mark_pg_destroyed(pg_id);
destroy_shards(pg_id);
Expand Down
8 changes: 4 additions & 4 deletions src/lib/homestore_backend/replication_state_machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,12 +363,12 @@ void ReplicationStateMachine::write_snapshot_obj(std::shared_ptr< homestore::sna

auto pg_data = GetSizePrefixedResyncPGMetaData(data_buf);

// Check if the snapshot context is same as the current snapshot context.
// If not, drop the previous context and re-init a new one
if (m_snp_rcv_handler->get_context_lsn() != context->get_lsn()) {
//Check if pg exists, if yes, clean the stale pg resources, may be due to previous snapshot failure. Let's resync on a pristine base
if (home_object_->is_pg_present(pg_data->pg_id())) {
LOGI("pg already exists, clean pg resources before snapshot, pg_id:{} {}", pg_data->pg_id(), log_suffix);
home_object_->pg_destroy(pg_data->pg_id());
LOGI("reset context from lsn:{} to lsn:{}", m_snp_rcv_handler->get_context_lsn(), context->get_lsn());
m_snp_rcv_handler->reset_context(context->get_lsn(), pg_data->pg_id());
// TODO: Reset all data of current PG - let's resync on a pristine base
}

auto ret = m_snp_rcv_handler->process_pg_snapshot_data(*pg_data);
Expand Down

0 comments on commit ab2184a

Please sign in to comment.