From a68f38122ee696c4ab89d154f98b024ba175cffe Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 14:16:14 +0800 Subject: [PATCH] [Enhancement] add metrics for pk table error state (backport #52590) (#52609) Co-authored-by: Yixin Luo <18810541851@163.com> --- be/src/storage/lake/meta_file.cpp | 2 ++ be/src/storage/lake/update_manager.cpp | 2 ++ be/src/storage/tablet_updates.cpp | 1 + be/src/util/starrocks_metrics.cpp | 1 + be/src/util/starrocks_metrics.h | 1 + be/test/storage/lake/meta_file_test.cpp | 27 +++++++++++++++++++++++++ 6 files changed, 34 insertions(+) diff --git a/be/src/storage/lake/meta_file.cpp b/be/src/storage/lake/meta_file.cpp index 833fc18ec3f3b..2de2ce2fcc053 100644 --- a/be/src/storage/lake/meta_file.cpp +++ b/be/src/storage/lake/meta_file.cpp @@ -26,6 +26,7 @@ #include "util/coding.h" #include "util/defer_op.h" #include "util/raw_container.h" +#include "util/starrocks_metrics.h" #include "util/trace.h" namespace starrocks::lake { @@ -407,6 +408,7 @@ Status MetaFileBuilder::update_num_del_stat(const std::map& se std::string err_msg = fmt::format("unexpected segment id: {} tablet id: {}", each.first, _tablet_meta->id()); LOG(ERROR) << err_msg; + StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1); if (!config::experimental_lake_ignore_pk_consistency_check) { set_recover_flag(RecoverFlag::RECOVER_WITHOUT_PUBLISH); return Status::InternalError(err_msg); diff --git a/be/src/storage/lake/update_manager.cpp b/be/src/storage/lake/update_manager.cpp index 6f3c0572c4053..fde8b51c9815f 100644 --- a/be/src/storage/lake/update_manager.cpp +++ b/be/src/storage/lake/update_manager.cpp @@ -145,6 +145,7 @@ StatusOr UpdateManager::prepare_primary_index( _index_cache.update_object_size(index_entry, index.memory_usage()); if (!st.ok()) { if (st.is_already_exist()) { + StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1); builder->set_recover_flag(RecoverFlag::RECOVER_WITH_PUBLISH); } _index_cache.remove(index_entry); @@ -305,6 +306,7 @@ Status UpdateManager::publish_primary_key_tablet(const TxnLogPB_OpWrite& op_writ "v:$6", tablet->id(), rssid, cur_old, cur_add, cur_new, old_del_vec->version(), metadata->version()); LOG(ERROR) << error_msg; + StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1); if (!config::experimental_lake_ignore_pk_consistency_check) { builder->set_recover_flag(RecoverFlag::RECOVER_WITH_PUBLISH); return Status::InternalError(error_msg); diff --git a/be/src/storage/tablet_updates.cpp b/be/src/storage/tablet_updates.cpp index e1db6990ca39b..51fe6016dde03 100644 --- a/be/src/storage/tablet_updates.cpp +++ b/be/src/storage/tablet_updates.cpp @@ -3662,6 +3662,7 @@ void TabletUpdates::_print_rowsets(std::vector& rowsets, std::string* } void TabletUpdates::_set_error(const string& msg) { + StarRocksMetrics::instance()->primary_key_table_error_state_total.increment(1); _error_msg = msg; _error = true; _apply_version_changed.notify_all(); diff --git a/be/src/util/starrocks_metrics.cpp b/be/src/util/starrocks_metrics.cpp index 6bba5ed1a5f86..41afb5c81ee5c 100644 --- a/be/src/util/starrocks_metrics.cpp +++ b/be/src/util/starrocks_metrics.cpp @@ -96,6 +96,7 @@ StarRocksMetrics::StarRocksMetrics() : _metrics(_s_registry_name) { REGISTER_STARROCKS_METRIC(delta_column_group_get_hit_cache); REGISTER_STARROCKS_METRIC(delta_column_group_get_non_pk_total); REGISTER_STARROCKS_METRIC(delta_column_group_get_non_pk_hit_cache); + REGISTER_STARROCKS_METRIC(primary_key_table_error_state_total); // push request _metrics.register_metric("push_requests_total", MetricLabels().add("status", "SUCCESS"), diff --git a/be/src/util/starrocks_metrics.h b/be/src/util/starrocks_metrics.h index 391ef967e3705..d0a2bf035b050 100644 --- a/be/src/util/starrocks_metrics.h +++ b/be/src/util/starrocks_metrics.h @@ -264,6 +264,7 @@ class StarRocksMetrics { METRIC_DEFINE_INT_COUNTER(delta_column_group_get_hit_cache, MetricUnit::REQUESTS); METRIC_DEFINE_INT_COUNTER(delta_column_group_get_non_pk_total, MetricUnit::REQUESTS); METRIC_DEFINE_INT_COUNTER(delta_column_group_get_non_pk_hit_cache, MetricUnit::REQUESTS); + METRIC_DEFINE_INT_COUNTER(primary_key_table_error_state_total, MetricUnit::REQUESTS); // Gauges METRIC_DEFINE_INT_GAUGE(memory_pool_bytes_total, MetricUnit::BYTES); diff --git a/be/test/storage/lake/meta_file_test.cpp b/be/test/storage/lake/meta_file_test.cpp index 1ae12ad31a5de..f3552815d08a9 100644 --- a/be/test/storage/lake/meta_file_test.cpp +++ b/be/test/storage/lake/meta_file_test.cpp @@ -32,6 +32,7 @@ #include "storage/lake/update_manager.h" #include "testutil/assert.h" #include "testutil/id_generator.h" +#include "util/starrocks_metrics.h" #include "util/uid_util.h" namespace starrocks::lake { @@ -572,4 +573,30 @@ TEST_F(MetaFileTest, test_trim_partial_compaction_last_input_rowset) { EXPECT_EQ(last_input_rowset_metadata.segments_size(), 2); } +TEST_F(MetaFileTest, test_error_state) { + // generate metadata + const int64_t tablet_id = 10001; + auto tablet = std::make_shared(_tablet_manager.get(), tablet_id); + auto metadata = std::make_shared(); + metadata->set_id(tablet_id); + metadata->set_version(10); + metadata->set_next_rowset_id(110); + + // add rowset with segment + RowsetMetadataPB rowset_metadata; + rowset_metadata.set_id(110); + rowset_metadata.add_segments("aaa.dat"); + rowset_metadata.add_segments("bbb.dat"); + metadata->add_rowsets()->CopyFrom(rowset_metadata); + std::map segment_id_to_add_dels; + for (int i = 0; i < 10; i++) { + segment_id_to_add_dels[i] = 100; + } + // generate error state + MetaFileBuilder builder(*tablet, metadata); + Status st = builder.update_num_del_stat(segment_id_to_add_dels); + EXPECT_FALSE(st.ok()); + EXPECT_TRUE(StarRocksMetrics::instance()->primary_key_table_error_state_total.value() > 0); +} + } // namespace starrocks::lake