Skip to content

Commit

Permalink
[FR] Add API to provide custom profilers #1807 (#1809)
Browse files Browse the repository at this point in the history
This API is akin to the MemoryManager API and lets tools provide
their own profiler which is wrapped in the same way MemoryManager is
wrapped. Namely, the profiler provides Start/Stop methods that are called
at the start/end of running the benchmark in a separate pass.

Co-authored-by: dominic <[email protected]>
  • Loading branch information
xdje42 and dmah42 authored Jul 16, 2024
1 parent d2cd246 commit 7c8ed6b
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 16 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Dominic Hamon <[email protected]> <[email protected]>
Dominik Czarnota <[email protected]>
Dominik Korman <[email protected]>
Donald Aingworth <[email protected]>
Doug Evans <[email protected]>
Eric Backus <[email protected]>
Eric Fiselier <[email protected]>
Eugene Zhuk <[email protected]>
Expand Down
15 changes: 15 additions & 0 deletions docs/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,21 @@ a report on the number of allocations, bytes used, etc.
This data will then be reported alongside other performance data, currently
only when using JSON output.
<a name="profiling" />
## Profiling
It's often useful to also profile benchmarks in particular ways, in addition to
CPU performance. For this reason, benchmark offers the `RegisterProfilerManager`
method that allows a custom `ProfilerManager` to be injected.
If set, the `ProfilerManager::AfterSetupStart` and
`ProfilerManager::BeforeTeardownStop` methods will be called at the start and
end of a separate benchmark run to allow user code to collect and report
user-provided profile metrics.
Output collected from this profiling run must be reported separately.
<a name="using-register-benchmark" />
## Using RegisterBenchmark(name, fn, args...)
Expand Down
20 changes: 20 additions & 0 deletions include/benchmark/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,26 @@ class MemoryManager {
BENCHMARK_EXPORT
void RegisterMemoryManager(MemoryManager* memory_manager);

// If a ProfilerManager is registered (via RegisterProfilerManager()), the
// benchmark will be run an additional time under the profiler to collect and
// report profile metrics for the run of the benchmark.
class ProfilerManager {
public:
virtual ~ProfilerManager() {}

// This is called after `Setup()` code and right before the benchmark is run.
virtual void AfterSetupStart() = 0;

// This is called before `Teardown()` code and right after the benchmark
// completes.
virtual void BeforeTeardownStop() = 0;
};

// Register a ProfilerManager instance that will be used to collect and report
// profile measurements for benchmark runs.
BENCHMARK_EXPORT
void RegisterProfilerManager(ProfilerManager* profiler_manager);

// Add a key-value pair to output as part of the context stanza in the report.
BENCHMARK_EXPORT
void AddCustomContext(const std::string& key, const std::string& value);
Expand Down
4 changes: 4 additions & 0 deletions src/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}

void RegisterProfilerManager(ProfilerManager* manager) {
internal::profiler_manager = manager;
}

void AddCustomContext(const std::string& key, const std::string& value) {
if (internal::global_context == nullptr) {
internal::global_context = new std::map<std::string, std::string>();
Expand Down
59 changes: 43 additions & 16 deletions src/benchmark_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ namespace internal {

MemoryManager* memory_manager = nullptr;

ProfilerManager* profiler_manager = nullptr;

namespace {

static constexpr IterationCount kMaxIterations = 1000000000000;
Expand Down Expand Up @@ -401,6 +403,41 @@ void BenchmarkRunner::RunWarmUp() {
}
}

MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
IterationCount memory_iterations) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
MemoryManager::Result* memory_result = &memory_results.back();
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
return memory_result;
}

void BenchmarkRunner::RunProfilerManager() {
// TODO: Provide a way to specify the number of iterations.
IterationCount profile_iterations = 1;
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
profiler_manager->AfterSetupStart();
RunInThread(&b, profile_iterations, 0, manager.get(),
/*perf_counters_measurement_ptr=*/nullptr);
manager->WaitForAllThreads();
profiler_manager->BeforeTeardownStop();
manager.reset();
b.Teardown();
}

void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");

Expand Down Expand Up @@ -445,28 +482,18 @@ void BenchmarkRunner::DoOneRepetition() {
"then we should have accepted the current iteration run.");
}

// Oh, one last thing, we need to also produce the 'memory measurements'..
// Produce memory measurements if requested.
MemoryManager::Result* memory_result = nullptr;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
memory_result = &memory_results.back();
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
memory_result = RunMemoryManager(memory_iterations);
}

if (profiler_manager != nullptr) {
RunProfilerManager();
}

// Ok, now actually report.
Expand Down
5 changes: 5 additions & 0 deletions src/benchmark_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters);
namespace internal {

extern MemoryManager* memory_manager;
extern ProfilerManager* profiler_manager;

struct RunResults {
std::vector<BenchmarkReporter::Run> non_aggregates;
Expand Down Expand Up @@ -113,6 +114,10 @@ class BenchmarkRunner {
};
IterationResults DoNIterations();

MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);

void RunProfilerManager();

IterationCount PredictNumItersNeeded(const IterationResults& i) const;

bool ShouldReportIterationResults(const IterationResults& i) const;
Expand Down
3 changes: 3 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous
compile_output_test(memory_manager_test)
benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)

compile_output_test(profiler_manager_test)
benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s)

# MSVC does not allow to set the language standard to C++98/03.
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
compile_benchmark_test(cxx03_test)
Expand Down
43 changes: 43 additions & 0 deletions test/profiler_manager_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// FIXME: WIP

#include <memory>

#include "benchmark/benchmark.h"
#include "output_test.h"

class TestProfilerManager : public benchmark::ProfilerManager {
void AfterSetupStart() override {}
void BeforeTeardownStop() override {}
};

void BM_empty(benchmark::State& state) {
for (auto _ : state) {
auto iterations = state.iterations();
benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);

ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
{"\"family_index\": 0,$", MR_Next},
{"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_empty\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"cpu_time\": %float,$", MR_Next},
{"\"time_unit\": \"ns\"$", MR_Next},
{"}", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}});

int main(int argc, char* argv[]) {
std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());

benchmark::RegisterProfilerManager(pm.get());
RunOutputTests(argc, argv);
benchmark::RegisterProfilerManager(nullptr);
}

0 comments on commit 7c8ed6b

Please sign in to comment.