Skip to content

Commit

Permalink
[Enhancement] add metrics for pk table error state (backport #52590) (#…
Browse files Browse the repository at this point in the history
…52611)

Signed-off-by: Yixin Luo <[email protected]>
Co-authored-by: Yixin Luo <[email protected]>
  • Loading branch information
mergify[bot] and luohaha authored Nov 5, 2024
1 parent 91d710d commit b917805
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 0 deletions.
2 changes: 2 additions & 0 deletions be/src/storage/lake/meta_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "util/coding.h"
#include "util/defer_op.h"
#include "util/raw_container.h"
#include "util/starrocks_metrics.h"
#include "util/trace.h"

namespace starrocks {
Expand Down Expand Up @@ -159,6 +160,7 @@ Status MetaFileBuilder::update_num_del_stat(const std::map<uint32_t, size_t>& se
std::string err_msg =
fmt::format("unexpected segment id: {} tablet id: {}", each.first, _tablet_meta->id());
LOG(ERROR) << err_msg;
StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1);
if (!config::experimental_lake_ignore_pk_consistency_check) {
set_recover_flag(RecoverFlag::RECOVER_WITHOUT_PUBLISH);
return Status::InternalError(err_msg);
Expand Down
1 change: 1 addition & 0 deletions be/src/storage/lake/txn_log_applier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class PrimaryKeyTxnLogApplier : public TxnLogApplier {
LOG(INFO) << "Primary Key recover begin, tablet_id: " << _tablet.id() << " base_ver: " << _base_version;
// release and remove index entry's reference
_tablet.update_mgr()->release_primary_index_cache(_index_entry);
_guard.reset(nullptr);
_index_entry = nullptr;
// rebuild delvec and pk index
LakePrimaryKeyRecover recover(&_builder, &_tablet, _metadata);
Expand Down
3 changes: 3 additions & 0 deletions be/src/storage/lake/update_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "storage/tablet_meta_manager.h"
#include "testutil/sync_point.h"
#include "util/pretty_printer.h"
#include "util/starrocks_metrics.h"
#include "util/trace.h"

namespace starrocks::lake {
Expand Down Expand Up @@ -84,6 +85,7 @@ StatusOr<IndexEntry*> UpdateManager::prepare_primary_index(const TabletMetadata&
_index_cache.update_object_size(index_entry, index.memory_usage());
if (!st.ok()) {
if (st.is_already_exist()) {
StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1);
builder->set_recover_flag(RecoverFlag::RECOVER_WITH_PUBLISH);
}
_index_cache.remove(index_entry);
Expand Down Expand Up @@ -223,6 +225,7 @@ Status UpdateManager::publish_primary_key_tablet(const TxnLogPB_OpWrite& op_writ
"v:$6",
tablet->id(), rssid, cur_old, cur_add, cur_new, old_del_vec->version(), metadata.version());
LOG(ERROR) << error_msg;
StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1);
if (!config::experimental_lake_ignore_pk_consistency_check) {
builder->set_recover_flag(RecoverFlag::RECOVER_WITH_PUBLISH);
return Status::InternalError(error_msg);
Expand Down
1 change: 1 addition & 0 deletions be/src/storage/tablet_updates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3393,6 +3393,7 @@ void TabletUpdates::_print_rowsets(std::vector<uint32_t>& rowsets, std::string*
}

void TabletUpdates::_set_error(const string& msg) {
StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1);
_error_msg = msg;
_error = true;
_apply_version_changed.notify_all();
Expand Down
1 change: 1 addition & 0 deletions be/src/util/starrocks_metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ StarRocksMetrics::StarRocksMetrics() : _metrics(_s_registry_name) {
REGISTER_STARROCKS_METRIC(delta_column_group_get_hit_cache);
REGISTER_STARROCKS_METRIC(delta_column_group_get_non_pk_total);
REGISTER_STARROCKS_METRIC(delta_column_group_get_non_pk_hit_cache);
REGISTER_STARROCKS_METRIC(primary_key_table_error_state_total);

// push request
_metrics.register_metric("push_requests_total", MetricLabels().add("status", "SUCCESS"),
Expand Down
1 change: 1 addition & 0 deletions be/src/util/starrocks_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ class StarRocksMetrics {
METRIC_DEFINE_INT_COUNTER(delta_column_group_get_hit_cache, MetricUnit::REQUESTS);
METRIC_DEFINE_INT_COUNTER(delta_column_group_get_non_pk_total, MetricUnit::REQUESTS);
METRIC_DEFINE_INT_COUNTER(delta_column_group_get_non_pk_hit_cache, MetricUnit::REQUESTS);
METRIC_DEFINE_INT_COUNTER(primary_key_table_error_state_total, MetricUnit::REQUESTS);

// Gauges
METRIC_DEFINE_INT_GAUGE(memory_pool_bytes_total, MetricUnit::BYTES);
Expand Down
27 changes: 27 additions & 0 deletions be/test/storage/lake/meta_file_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "storage/lake/update_manager.h"
#include "testutil/assert.h"
#include "testutil/id_generator.h"
#include "util/starrocks_metrics.h"
#include "util/uid_util.h"

namespace starrocks::lake {
Expand Down Expand Up @@ -237,4 +238,30 @@ TEST_F(MetaFileTest, test_delvec_read_loop) {
}
}

TEST_F(MetaFileTest, test_error_state) {
// generate metadata
const int64_t tablet_id = 10001;
auto tablet = std::make_shared<Tablet>(_tablet_manager.get(), tablet_id);
auto metadata = std::make_shared<TabletMetadata>();
metadata->set_id(tablet_id);
metadata->set_version(10);
metadata->set_next_rowset_id(110);

// add rowset with segment
RowsetMetadataPB rowset_metadata;
rowset_metadata.set_id(110);
rowset_metadata.add_segments("aaa.dat");
rowset_metadata.add_segments("bbb.dat");
metadata->add_rowsets()->CopyFrom(rowset_metadata);
std::map<uint32_t, size_t> segment_id_to_add_dels;
for (int i = 0; i < 10; i++) {
segment_id_to_add_dels[i] = 100;
}
// generate error state
MetaFileBuilder builder(*tablet, metadata);
Status st = builder.update_num_del_stat(segment_id_to_add_dels);
EXPECT_FALSE(st.ok());
EXPECT_TRUE(StarRocksMetrics::instance()->primary_key_table_error_state_total.value() > 0);
}

} // namespace starrocks::lake

0 comments on commit b917805

Please sign in to comment.