From a276661bdcfb4ee767e8083a0c067432e3234ba6 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Mon, 20 Nov 2023 15:47:19 +0800 Subject: [PATCH] feat(pprof): Supports both heap profiling and heap sampling (#1684) In https://github.com/XiaoMi/rdsn/pull/433, we updated the way to get heap profile by using ``` HeapProfilerStart(...); sleep(seconds); GetHeapProfile(); HeapProfilerStop(); ``` instead of ``` MallocExtension::instance()->GetHeapSample(...); ``` It provides a way to analyse which pieces of code allocated (and possibly freed) how much memory during the time the request processed on the server. However, in the scenario of a server already in heavy memory consumption but growing very slow, it's hard to tell which pieces of code allocated the most of the memory. This patch adds the heap sampling back, and keep the heap profiling as well. Both of the two ways are using the `pprof/heap` method, the difference is whether the `seconds` parameter appears. When the `seconds` parameter appears, using `GetHeapProfile()`, otherwise, using `GetHeapSample()`. Remember to set environment variable TCMALLOC_SAMPLE_PARAMETER when using heap sampling. --- src/http/pprof_http_service.cpp | 81 ++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 21 deletions(-) diff --git a/src/http/pprof_http_service.cpp b/src/http/pprof_http_service.cpp index f5aeb8b505..ab4767f5c6 100644 --- a/src/http/pprof_http_service.cpp +++ b/src/http/pprof_http_service.cpp @@ -52,6 +52,23 @@ namespace dsn { +bool check_TCMALLOC_SAMPLE_PARAMETER() +{ + char *str = getenv("TCMALLOC_SAMPLE_PARAMETER"); + if (str == nullptr) { + return false; + } + char *endptr; + int val = strtol(str, &endptr, 10); + return (*endptr == '\0' && val > 0); +} + +bool has_TCMALLOC_SAMPLE_PARAMETER() +{ + static bool val = check_TCMALLOC_SAMPLE_PARAMETER(); + return val; +} + // // // == ip:port/pprof/symbol == // // // @@ -345,31 +362,53 @@ void pprof_http_service::heap_handler(const http_request &req, http_response &re resp.status_code = http_status_code::internal_server_error; return; } + auto cleanup = dsn::defer([this]() { _in_pprof_action.store(false); }); + + // If "seconds" parameter is specified with a valid value, use heap profiling, + // otherwise, use heap sampling. + bool use_heap_profile = false; + uint32_t seconds = 0; + const auto &iter = req.query_args.find("seconds"); + if (iter != req.query_args.end() && buf2uint32(iter->second, seconds)) { + // This is true between calls to HeapProfilerStart() and HeapProfilerStop(), and + // also if the program has been run with HEAPPROFILER, or some other + // way to turn on whole-program profiling. + if (IsHeapProfilerRunning()) { + LOG_WARNING("heap profiling is running, dump the full profile directly"); + char *profile = GetHeapProfile(); + resp.status_code = http_status_code::ok; + resp.body = profile; + free(profile); + return; + } - const std::string SECOND = "seconds"; - const uint32_t kDefaultSecond = 10; - - // get seconds from query params, default value is `kDefaultSecond` - uint32_t seconds = kDefaultSecond; - const auto iter = req.query_args.find(SECOND); - if (iter != req.query_args.end()) { - const auto seconds_str = iter->second; - dsn::internal::buf2unsigned(seconds_str, seconds); - } - - std::stringstream profile_name_prefix; - profile_name_prefix << "heap_profile." << getpid() << "." << dsn_now_ns(); + std::stringstream profile_name_prefix; + profile_name_prefix << "heap_profile." << getpid() << "." << dsn_now_ns(); - HeapProfilerStart(profile_name_prefix.str().c_str()); - sleep(seconds); - const char *profile = GetHeapProfile(); - HeapProfilerStop(); + HeapProfilerStart(profile_name_prefix.str().c_str()); + sleep(seconds); + char *profile = GetHeapProfile(); + HeapProfilerStop(); - resp.status_code = http_status_code::ok; - resp.body = profile; - delete profile; + resp.status_code = http_status_code::ok; + resp.body = profile; + free(profile); + } else { + if (!has_TCMALLOC_SAMPLE_PARAMETER()) { + static const std::string kNoEnvMsg = "The environment variable " + "TCMALLOC_SAMPLE_PARAMETER should set to a " + "positive value, such as 524288, before running."; + LOG_WARNING(kNoEnvMsg); + resp.status_code = http_status_code::internal_server_error; + resp.body = kNoEnvMsg; + return; + } - _in_pprof_action.store(false); + std::string buf; + MallocExtension::instance()->GetHeapSample(&buf); + resp.status_code = http_status_code::ok; + resp.body = std::move(buf); + } } // //