From c934ad50fec989096b9a129952aeb501ce74c2bd Mon Sep 17 00:00:00 2001 From: fanquake Date: Thu, 16 Nov 2023 09:45:25 +0000 Subject: [PATCH] Merge bitcoin/bitcoin#28877: bench: Update nanobench to 4.3.11 fe434a469534766f18d7560d968deed37193835f bench: Update nanobench to 4.3.11 (TheCharlatan) Pull request description: The newest version fixes the false positive `* Turbo is enabled, CPU frequency will fluctuate` warning on AMD CPUs. The file was directly taken from the release page: https://github.com/martinus/nanobench/releases/tag/v4.3.11. Other changes from the release notes: * Check for failures in parseFile(), perf events tweaks by tommi-cujo in https://github.com/martinus/nanobench/pull/84 * Workaround missing noexcept for std::string move assignment by tommi-cujo in https://github.com/martinus/nanobench/pull/87 * removed the link by martinus in https://github.com/martinus/nanobench/pull/89 * Lots of minor cleanups by martinus in https://github.com/martinus/nanobench/pull/85 * Add linter for version & clang-format. Updated version by martinus in https://github.com/martinus/nanobench/pull/90 ACKs for top commit: fanquake: ACK fe434a469534766f18d7560d968deed37193835f - have not tested. Tree-SHA512: a8f15e1db1d993673e4b295a3bab22e67ee3c9f3c0bcbef28974fe9ff37dbb741967a526088d5b148c8d25c9d57cd3b844238100c17b23038638787461805678 --- src/bench/nanobench.h | 98 ++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/src/bench/nanobench.h b/src/bench/nanobench.h index c518b91b6a1b07..4808b866fef4e0 100644 --- a/src/bench/nanobench.h +++ b/src/bench/nanobench.h @@ -33,7 +33,7 @@ // see https://semver.org/ #define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes #define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes -#define ANKERL_NANOBENCH_VERSION_PATCH 10 // backwards-compatible bug fixes +#define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes /////////////////////////////////////////////////////////////////////////////////////////////////// // public facing api - as minimal as possible @@ -120,6 +120,10 @@ # define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value #endif +// noexcept may be missing for std::string. +// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265 +#define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE() std::is_nothrow_move_assignable::value + // declarations /////////////////////////////////////////////////////////////////////////////////// namespace ankerl { @@ -404,7 +408,7 @@ struct Config { Config(); ~Config(); Config& operator=(Config const& other); - Config& operator=(Config&& other) noexcept; + Config& operator=(Config&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)); Config(Config const& other); Config(Config&& other) noexcept; }; @@ -430,7 +434,7 @@ class Result { ~Result(); Result& operator=(Result const& other); - Result& operator=(Result&& other) noexcept; + Result& operator=(Result&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)); Result(Result const& other); Result(Result&& other) noexcept; @@ -596,7 +600,7 @@ class Rng final { * * @return Vector containing the full state: */ - std::vector state() const; + ANKERL_NANOBENCH(NODISCARD) std::vector state() const; private: static constexpr uint64_t rotl(uint64_t x, unsigned k) noexcept; @@ -628,7 +632,7 @@ class Bench { Bench(); Bench(Bench&& other) noexcept; - Bench& operator=(Bench&& other) noexcept; + Bench& operator=(Bench&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)); Bench(Bench const& other); Bench& operator=(Bench const& other); ~Bench() noexcept; @@ -818,7 +822,7 @@ class Bench { * Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see * that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations(). * - * @see maxEpochTim), minEpochIterations + * @see maxEpochTime, minEpochIterations * * @param t Minimum time each epoch should take. */ @@ -1030,7 +1034,7 @@ void doNotOptimizeAway(T const& val); // These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but // this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways. -// see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307 +// see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L443-L446 template void doNotOptimizeAway(T const& val) { // NOLINTNEXTLINE(hicpp-no-assembler) @@ -1781,7 +1785,7 @@ bool isEndlessRunning(std::string const& name); bool isWarningsEnabled(); template -T parseFile(std::string const& filename); +T parseFile(std::string const& filename, bool* fail); void gatherStabilityInformation(std::vector& warnings, std::vector& recommendations); void printStabilityInformationOnce(std::ostream* outStream); @@ -1839,7 +1843,7 @@ class Number { public: Number(int width, int precision, double value); Number(int width, int precision, int64_t value); - std::string to_s() const; + ANKERL_NANOBENCH(NODISCARD) std::string to_s() const; private: friend std::ostream& operator<<(std::ostream& os, Number const& n); @@ -1857,11 +1861,11 @@ std::ostream& operator<<(std::ostream& os, Number const& n); class MarkDownColumn { public: - MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val); - std::string title() const; - std::string separator() const; - std::string invalid() const; - std::string value() const; + MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept; + ANKERL_NANOBENCH(NODISCARD) std::string title() const; + ANKERL_NANOBENCH(NODISCARD) std::string separator() const; + ANKERL_NANOBENCH(NODISCARD) std::string invalid() const; + ANKERL_NANOBENCH(NODISCARD) std::string value() const; private: int mWidth; @@ -1976,9 +1980,9 @@ PerformanceCounters& performanceCounters() { } // Windows version of doNotOptimizeAway -// see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307 -// see https://github.com/facebook/folly/blob/master/folly/Benchmark.h#L280 -// see https://docs.microsoft.com/en-us/cpp/preprocessor/optimize +// see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L514 +// see https://github.com/facebook/folly/blob/v2023.01.30.00/folly/lang/Hint-inl.h#L54-L58 +// see https://learn.microsoft.com/en-us/cpp/preprocessor/optimize # if defined(_MSC_VER) # pragma optimize("", off) void doNotOptimizeAwaySink(void const*) {} @@ -1986,10 +1990,13 @@ void doNotOptimizeAwaySink(void const*) {} # endif template -T parseFile(std::string const& filename) { +T parseFile(std::string const& filename, bool* fail) { std::ifstream fin(filename); // NOLINT(misc-const-correctness) T num{}; fin >> num; + if (fail != nullptr) { + *fail = fin.fail(); + } return num; } @@ -2032,16 +2039,15 @@ void gatherStabilityInformation(std::vector& warnings, std::vector< if (nprocs <= 0) { warnings.emplace_back("couldn't figure out number of processors - no governor, turbo check possible"); } else { - // check frequency scaling for (long id = 0; id < nprocs; ++id) { auto idStr = detail::fmt::to_s(static_cast(id)); auto sysCpu = "/sys/devices/system/cpu/cpu" + idStr; - auto minFreq = parseFile(sysCpu + "/cpufreq/scaling_min_freq"); - auto maxFreq = parseFile(sysCpu + "/cpufreq/scaling_max_freq"); + auto minFreq = parseFile(sysCpu + "/cpufreq/scaling_min_freq", nullptr); + auto maxFreq = parseFile(sysCpu + "/cpufreq/scaling_max_freq", nullptr); if (minFreq != maxFreq) { - auto minMHz = static_cast(minFreq) / 1000.0; - auto maxMHz = static_cast(maxFreq) / 1000.0; + auto minMHz = d(minFreq) / 1000.0; + auto maxMHz = d(maxFreq) / 1000.0; warnings.emplace_back("CPU frequency scaling enabled: CPU " + idStr + " between " + detail::fmt::Number(1, 1, minMHz).to_s() + " and " + detail::fmt::Number(1, 1, maxMHz).to_s() + " MHz"); @@ -2050,13 +2056,15 @@ void gatherStabilityInformation(std::vector& warnings, std::vector< } } - auto currentGovernor = parseFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"); - if ("performance" != currentGovernor) { + auto fail = false; + auto currentGovernor = parseFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", &fail); + if (!fail && "performance" != currentGovernor) { warnings.emplace_back("CPU governor is '" + currentGovernor + "' but should be 'performance'"); recommendPyPerf = true; } - if (0 == parseFile("/sys/devices/system/cpu/intel_pstate/no_turbo")) { + auto noTurbo = parseFile("/sys/devices/system/cpu/intel_pstate/no_turbo", &fail); + if (!fail && noTurbo == 0) { warnings.emplace_back("Turbo is enabled, CPU frequency will fluctuate"); recommendPyPerf = true; } @@ -2250,10 +2258,9 @@ struct IterationLogic::Impl { mNumIters = 0; } - ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, static_cast(elapsed.count())) << " elapsed, " - << detail::fmt::Number(20, 3, static_cast(mTargetRuntimePerEpoch.count())) - << " target. oldIters=" << oldIters << ", mNumIters=" << mNumIters - << ", mState=" << static_cast(mState)); + ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, d(elapsed.count())) << " elapsed, " + << detail::fmt::Number(20, 3, d(mTargetRuntimePerEpoch.count())) << " target. oldIters=" + << oldIters << ", mNumIters=" << mNumIters << ", mState=" << static_cast(mState)); } // NOLINTNEXTLINE(readability-function-cognitive-complexity) @@ -2357,7 +2364,7 @@ struct IterationLogic::Impl { } os << fmt::MarkDownCode(mBench.name()); if (showUnstable) { - auto avgIters = static_cast(mTotalNumIters) / static_cast(mBench.epochs()); + auto avgIters = d(mTotalNumIters) / d(mBench.epochs()); // NOLINTNEXTLINE(bugprone-incorrect-roundings) auto suggestedIters = static_cast(avgIters * 10 + 0.5); @@ -2435,7 +2442,7 @@ class LinuxPerformanceCounters { bool monitor(perf_sw_ids swId, Target target); bool monitor(perf_hw_id hwId, Target target); - bool hasError() const noexcept { + ANKERL_NANOBENCH(NODISCARD) bool hasError() const noexcept { return mHasError; } @@ -2691,16 +2698,23 @@ PerformanceCounters::PerformanceCounters() , mVal() , mHas() { - mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false)); + // HW events mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false)); - mHas.contextSwitches = - mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false)); + if (!mHas.cpuCycles) { + // Fallback to cycles counter, reference cycles not available in many systems. + mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false)); + } mHas.instructions = mPc->monitor(PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.instructions, true, true)); mHas.branchInstructions = mPc->monitor(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.branchInstructions, true, false)); mHas.branchMisses = mPc->monitor(PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target(&mVal.branchMisses, true, false)); // mHas.branchMisses = false; + // SW events + mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false)); + mHas.contextSwitches = + mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false)); + mPc->start(); mPc->calibrate([] { auto before = ankerl::nanobench::Clock::now(); @@ -2789,7 +2803,7 @@ void StreamStateRestorer::restore() { Number::Number(int width, int precision, int64_t value) : mWidth(width) , mPrecision(precision) - , mValue(static_cast(value)) {} + , mValue(d(value)) {} Number::Number(int width, int precision, double value) : mWidth(width) @@ -2823,7 +2837,7 @@ std::ostream& operator<<(std::ostream& os, Number const& n) { return n.write(os); } -MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) +MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept : mWidth(w) , mPrecision(prec) , mTitle(std::move(tit)) @@ -2884,14 +2898,14 @@ std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) { Config::Config() = default; Config::~Config() = default; Config& Config::operator=(Config const&) = default; -Config& Config::operator=(Config&&) noexcept = default; +Config& Config::operator=(Config&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default; Config::Config(Config const&) = default; Config::Config(Config&&) noexcept = default; // provide implementation here so it's only generated once Result::~Result() = default; Result& Result::operator=(Result const&) = default; -Result& Result::operator=(Result&&) noexcept = default; +Result& Result::operator=(Result&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default; Result::Result(Result const&) = default; Result::Result(Result&&) noexcept = default; @@ -2992,7 +3006,7 @@ double Result::medianAbsolutePercentError(Measure m) const { auto data = mNameToMeasurements[detail::u(m)]; // calculates MdAPE which is the median of percentage error - // see https://www.spiderfinancial.com/support/documentation/numxl/reference-manual/forecasting-performance/mdape + // see https://support.numxl.com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error auto med = calcMedian(data); // transform the data to absolute error @@ -3106,7 +3120,7 @@ Bench::Bench() { } Bench::Bench(Bench&&) noexcept = default; -Bench& Bench::operator=(Bench&&) noexcept = default; +Bench& Bench::operator=(Bench&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default; Bench::Bench(Bench const&) = default; Bench& Bench::operator=(Bench const&) = default; Bench::~Bench() noexcept = default; @@ -3423,7 +3437,7 @@ BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure) sumMeasure += rm.second; } - auto n = static_cast(rangeMeasure.size()); + auto n = detail::d(rangeMeasure.size()); auto mean = sumMeasure / n; mNormalizedRootMeanSquare = std::sqrt(err / n) / mean; }