From 3dbfd82f2bcacfad6a96c20904eb12388f5e0407 Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Mon, 8 Jul 2024 21:22:42 +0000 Subject: [PATCH] cleanup(sinsp/metrics): add prometheus metric and label names sanitization methods Signed-off-by: Melissa Kilby --- userspace/libsinsp/metrics_collector.cpp | 42 ++++++++++++++++++-- userspace/libsinsp/test/sinsp_metrics.ut.cpp | 17 +++++--- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/userspace/libsinsp/metrics_collector.cpp b/userspace/libsinsp/metrics_collector.cpp index 2f8f02b97e..3845ca0300 100644 --- a/userspace/libsinsp/metrics_collector.cpp +++ b/userspace/libsinsp/metrics_collector.cpp @@ -29,7 +29,6 @@ limitations under the License. static re2::RE2 s_libs_metrics_units_suffix_pre_prometheus_text_conversion("(_kb|_bytes|_mb|_perc|_percentage|_ratio|_ns|_ts|_sec|_total)", re2::RE2::POSIX); static re2::RE2 s_libs_metrics_units_memory_suffix("(_kb|_bytes)", re2::RE2::POSIX); static re2::RE2 s_libs_metrics_units_perc_suffix("(_perc)", re2::RE2::POSIX); -static re2::RE2 s_libs_metrics_banned_prometheus_naming_characters("(\\.)", re2::RE2::POSIX); // For simplicity, needs to stay in sync w/ typedef enum metrics_v2_value_unit // https://prometheus.io/docs/practices/naming/ or https://prometheus.io/docs/practices/naming/#base-units. @@ -91,6 +90,38 @@ std::string metric_value_to_text(const metrics_v2& metric) return value_text; } +std::string prometheus_sanitize_metric_name(const std::string& name) +{ + // https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels + static const RE2 invalid_chars("[^a-zA-Z0-9_:]"); + std::string sanitized_name = name; + RE2::GlobalReplace(&sanitized_name, invalid_chars, "_"); + RE2::GlobalReplace(&sanitized_name, "_+", "_"); + // Ensure it starts with a letter or underscore (if empty after sanitizing, set to "_") + if (sanitized_name.empty() || (!std::isalpha(sanitized_name.front()) && sanitized_name.front() != '_')) + { + sanitized_name = "_" + sanitized_name; + } + return sanitized_name; +} + +std::string prometheus_sanitize_label_name(const std::string& name) + { + // https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels + static const RE2 invalid_chars("[^a-zA-Z0-9_]"); + std::string sanitized_label = name; + RE2::GlobalReplace(&sanitized_label, invalid_chars, "_"); + RE2::GlobalReplace(&sanitized_label, "_+", "_"); + + // Ensure the label starts with a letter or underscore (if empty after sanitizing, set to "_") + if (sanitized_label.empty() || (!std::isalpha(sanitized_label.front()) && sanitized_label.front() != '_')) + { + sanitized_label = "_" + sanitized_label; + } + + return sanitized_label; +} + std::string prometheus_qualifier(std::string_view prometheus_namespace, std::string_view prometheus_subsystem) { std::string qualifier; @@ -108,7 +139,7 @@ std::string prometheus_qualifier(std::string_view prometheus_namespace, std::str std::string prometheus_exposition_text(std::string_view metric_qualified_name, std::string_view metric_name, std::string_view metric_type_name, std::string_view metric_value, const std::map& const_labels) { - std::string fqn(metric_qualified_name); + std::string fqn = prometheus_sanitize_metric_name(std::string(metric_qualified_name)); std::string prometheus_text = "# HELP " + fqn + " https://falco.org/docs/metrics/\n"; prometheus_text += "# TYPE " + fqn + " " + std::string(metric_type_name) + "\n"; prometheus_text += fqn; @@ -118,6 +149,10 @@ std::string prometheus_exposition_text(std::string_view metric_qualified_name, s bool first_label = true; for (const auto& [key, value] : const_labels) { + if (key.empty()) + { + continue; + } if (!first_label) { prometheus_text += ","; @@ -125,7 +160,7 @@ std::string prometheus_exposition_text(std::string_view metric_qualified_name, s { first_label = false; } - prometheus_text += key + "=\"" + value + "\""; + prometheus_text += prometheus_sanitize_label_name(key) + "=\"" + value + "\""; } prometheus_text += "} "; // white space at the end important! } else @@ -173,7 +208,6 @@ std::string prometheus_metrics_converter::convert_metric_to_text_prometheus(cons std::string prometheus_metric_name_fully_qualified = prometheus_qualifier(prometheus_namespace, prometheus_subsystem) + std::string(metric.name) + "_"; // Remove native libs unit suffixes if applicable. RE2::GlobalReplace(&prometheus_metric_name_fully_qualified, s_libs_metrics_units_suffix_pre_prometheus_text_conversion, ""); - RE2::GlobalReplace(&prometheus_metric_name_fully_qualified, s_libs_metrics_banned_prometheus_naming_characters, "_"); prometheus_metric_name_fully_qualified += std::string(metrics_unit_name_mappings_prometheus[metric.unit]); return prometheus_exposition_text(prometheus_metric_name_fully_qualified, metric.name, diff --git a/userspace/libsinsp/test/sinsp_metrics.ut.cpp b/userspace/libsinsp/test/sinsp_metrics.ut.cpp index 38b3ca1a5e..f5bea53b4c 100644 --- a/userspace/libsinsp/test/sinsp_metrics.ut.cpp +++ b/userspace/libsinsp/test/sinsp_metrics.ut.cpp @@ -115,12 +115,19 @@ testns_falco_memory_rss_bytes )"; ASSERT_EQ(metrics_names_all_str_post_unit_conversion_pre_prometheus_text_conversion, "cpu_usage_ratio memory_rss_bytes memory_vsz_bytes memory_pss_bytes container_memory_used_bytes host_cpu_usage_ratio host_memory_used_bytes host_procs_running host_open_fds n_threads n_fds n_noncached_fd_lookups n_cached_fd_lookups n_failed_fd_lookups n_added_fds n_removed_fds n_stored_evts n_store_evts_drops n_retrieved_evts n_retrieve_evts_drops n_noncached_thread_lookups n_cached_thread_lookups n_failed_thread_lookups n_added_threads n_removed_threads n_drops_full_threadtable n_missing_container_images n_containers"); - // Test global wrapper base metrics (pseudo metrics) - prometheus_text = prometheus_metrics_converter.convert_metric_to_text_prometheus("kernel_release", "testns", "falco", {{"kernel_release", "6.6.7-200.fc39.x86_64"}}); - prometheus_text_substring = R"(# HELP testns_falco_kernel_release_info https://falco.org/docs/metrics/ -# TYPE testns_falco_kernel_release_info gauge -testns_falco_kernel_release_info{kernel_release="6.6.7-200.fc39.x86_64"} 1 + // Test global wrapper base metrics plus test invalid characters sanitization for the metric and label names (pseudo metrics) + prometheus_text = prometheus_metrics_converter.convert_metric_to_text_prometheus("56kernel_release-:!", "", "", {{"0kernel_release__", "6.6.7-200.fc39.x86_64"}, {"", "empty_key_name"}}); + prometheus_text_substring = R"(# HELP _56kernel_release_:_info https://falco.org/docs/metrics/ +# TYPE _56kernel_release_:_info gauge +_56kernel_release_:_info{_0kernel_release_="6.6.7-200.fc39.x86_64"} 1 )"; + + prometheus_text = prometheus_metrics_converter.convert_metric_to_text_prometheus("", "", "", {{"0kernel_release__", "6.6.7-200.fc39.x86_64"}, {"", "empty_key_name"}}); + prometheus_text_substring = R"(# HELP _info https://falco.org/docs/metrics/ +# TYPE _info gauge +_info{_0kernel_release_="6.6.7-200.fc39.x86_64"} 1 +)"; + std::cerr << prometheus_text; ASSERT_TRUE(prometheus_text.find(prometheus_text_substring) != std::string::npos) << "Substring not found in prometheus_text got\n" << prometheus_text;