From 1576991177ba97a4b2ff6c45950f1fa6e9aa678c Mon Sep 17 00:00:00 2001
From: Dominic Hamon <dma+github@hey.com>
Date: Tue, 20 Feb 2024 16:51:06 +0000
Subject: [PATCH 01/14] fix some warnings

---
 test/complexity_test.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/test/complexity_test.cc b/test/complexity_test.cc
index 0c159cd27d..fb4ad1ad53 100644
--- a/test/complexity_test.cc
+++ b/test/complexity_test.cc
@@ -71,7 +71,7 @@ void BM_Complexity_O1(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    long tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -120,7 +120,7 @@ void BM_Complexity_O_N(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    long tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -129,7 +129,7 @@ void BM_Complexity_O_N(benchmark::State &state) {
     }
 
     // 1ns per iteration per entry
-    state.SetIterationTime(state.range(0) * 42 * 1e-9);
+    state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
@@ -178,7 +178,7 @@ static void BM_Complexity_O_N_log_N(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    long tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -186,8 +186,8 @@ static void BM_Complexity_O_N_log_N(benchmark::State &state) {
       benchmark::DoNotOptimize(tmp);
     }
 
-    state.SetIterationTime(state.range(0) * kLog2E * std::log(state.range(0)) *
-                           42 * 1e-9);
+    state.SetIterationTime(static_cast<double>(state.range(0)) * kLog2E *
+                           std::log(state.range(0)) * 42.0 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
@@ -238,7 +238,7 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    long tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -246,7 +246,7 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
       benchmark::DoNotOptimize(tmp);
     }
 
-    state.SetIterationTime(state.range(0) * 42 * 1e-9);
+    state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
   }
   state.SetComplexityN(n);
 }

From ef88520d6fc3a9a9461938cd2617305403e12362 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 6 Mar 2024 15:40:31 +0300
Subject: [PATCH 02/14] Revert "fix some warnings" (#1762)

This reverts commit 1576991177ba97a4b2ff6c45950f1fa6e9aa678c.
---
 test/complexity_test.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/complexity_test.cc b/test/complexity_test.cc
index fb4ad1ad53..0c159cd27d 100644
--- a/test/complexity_test.cc
+++ b/test/complexity_test.cc
@@ -71,7 +71,7 @@ void BM_Complexity_O1(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    long tmp = state.iterations();
+    double tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -120,7 +120,7 @@ void BM_Complexity_O_N(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    long tmp = state.iterations();
+    double tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -129,7 +129,7 @@ void BM_Complexity_O_N(benchmark::State &state) {
     }
 
     // 1ns per iteration per entry
-    state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
+    state.SetIterationTime(state.range(0) * 42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
@@ -178,7 +178,7 @@ static void BM_Complexity_O_N_log_N(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    long tmp = state.iterations();
+    double tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -186,8 +186,8 @@ static void BM_Complexity_O_N_log_N(benchmark::State &state) {
       benchmark::DoNotOptimize(tmp);
     }
 
-    state.SetIterationTime(static_cast<double>(state.range(0)) * kLog2E *
-                           std::log(state.range(0)) * 42.0 * 1e-9);
+    state.SetIterationTime(state.range(0) * kLog2E * std::log(state.range(0)) *
+                           42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
@@ -238,7 +238,7 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    long tmp = state.iterations();
+    double tmp = state.iterations();
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
@@ -246,7 +246,7 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
       benchmark::DoNotOptimize(tmp);
     }
 
-    state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
+    state.SetIterationTime(state.range(0) * 42 * 1e-9);
   }
   state.SetComplexityN(n);
 }

From 654d8d6cf368233018c3df2f84f1118603839ac5 Mon Sep 17 00:00:00 2001
From: Tiago Freire <67021355+tmiguelf@users.noreply.github.com>
Date: Wed, 6 Mar 2024 13:50:45 +0100
Subject: [PATCH 03/14] Fixed LTO issue on no discard variable (#1761)

Improve `UseCharPointer()` (thus, `DoNotOptimize()`) under MSVC LTO,
make it actually escape the pointer and prevent it from being optimized away.
---
 src/benchmark.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 31f2cde8ff..563c443800 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -152,8 +152,16 @@ BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
   return global_context;
 }
 
-// FIXME: wouldn't LTO mess this up?
-void UseCharPointer(char const volatile*) {}
+static void const volatile* volatile global_force_escape_pointer;
+
+// FIXME: Verify if LTO still messes this up?
+void UseCharPointer(char const volatile* const v) {
+  // We want to escape the pointer `v` so that the compiler can not eliminate
+  // computations that produced it. To do that, we escape the pointer by storing
+  // it into a volatile variable, since generally, volatile store, is not
+  // something the compiler is allowed to elide.
+  global_force_escape_pointer = reinterpret_cast<void const volatile*>(v);
+}
 
 }  // namespace internal
 

From c64b144f42f7e17bfebd3d2220f8daac48e6365c Mon Sep 17 00:00:00 2001
From: dominic <510002+dmah42@users.noreply.github.com>
Date: Thu, 7 Mar 2024 12:19:56 +0000
Subject: [PATCH 04/14] mitigate clang build warnings -Wconversion (#1763)

* mitigate clang build warnings -Wconversion

* ensure we have warnings set everywhere and fix some
---
 BUILD.bazel               | 19 ++++++++++++++++++-
 CMakeLists.txt            |  1 +
 src/benchmark.cc          |  3 ++-
 src/benchmark_register.cc |  5 +++--
 src/benchmark_register.h  |  4 ++--
 src/benchmark_runner.cc   |  2 +-
 src/cycleclock.h          |  4 ++--
 src/statistics.cc         |  4 ++--
 src/string_util.cc        |  2 +-
 src/sysinfo.cc            |  9 ++++-----
 src/timers.cc             |  4 ++--
 test/BUILD                |  1 +
 test/benchmark_gtest.cc   |  2 +-
 test/complexity_test.cc   | 24 ++++++++++++------------
 14 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/BUILD.bazel b/BUILD.bazel
index d72ae86728..15d836998c 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1,5 +1,22 @@
 licenses(["notice"])
 
+COPTS = [
+    "-pedantic",
+    "-pedantic-errors",
+    "-std=c++11",
+    "-Wall",
+    "-Wconversion",
+    "-Wextra",
+    "-Wshadow",
+    #    "-Wshorten-64-to-32",
+    "-Wfloat-equal",
+    "-fstrict-aliasing",
+    ## assert() are used a lot in tests upstream, which may be optimised out leading to
+    ## unused-variable warning.
+    "-Wno-unused-variable",
+    "-Werror=old-style-cast",
+]
+
 config_setting(
     name = "qnx",
     constraint_values = ["@platforms//os:qnx"],
@@ -47,7 +64,7 @@ cc_library(
     ],
     copts = select({
         ":windows": [],
-        "//conditions:default": ["-Werror=old-style-cast"],
+        "//conditions:default": COPTS,
     }),
     defines = [
         "BENCHMARK_STATIC_DEFINE",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d9bcc6a493..23b519c250 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -190,6 +190,7 @@ else()
   add_cxx_compiler_flag(-Wshadow)
   add_cxx_compiler_flag(-Wfloat-equal)
   add_cxx_compiler_flag(-Wold-style-cast)
+  add_cxx_compiler_flag(-Wconversion)
   if(BENCHMARK_ENABLE_WERROR)
       add_cxx_compiler_flag(-Werror)
   endif()
diff --git a/src/benchmark.cc b/src/benchmark.cc
index 563c443800..1f2f6cc277 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -407,7 +407,8 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       benchmarks_with_threads += (benchmark.threads() > 1);
       runners.emplace_back(benchmark, &perfcounters, reports_for_family);
       int num_repeats_of_this_instance = runners.back().GetNumRepeats();
-      num_repetitions_total += num_repeats_of_this_instance;
+      num_repetitions_total +=
+          static_cast<size_t>(num_repeats_of_this_instance);
       if (reports_for_family)
         reports_for_family->num_runs_total += num_repeats_of_this_instance;
     }
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index e447c9a2d3..8ade048225 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -482,8 +482,9 @@ int Benchmark::ArgsCnt() const {
 
 const char* Benchmark::GetArgName(int arg) const {
   BM_CHECK_GE(arg, 0);
-  BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
-  return arg_names_[arg].c_str();
+  size_t uarg = static_cast<size_t>(arg);
+  BM_CHECK_LT(uarg, arg_names_.size());
+  return arg_names_[uarg].c_str();
 }
 
 TimeUnit Benchmark::GetTimeUnit() const {
diff --git a/src/benchmark_register.h b/src/benchmark_register.h
index 53367c707c..be50265f72 100644
--- a/src/benchmark_register.h
+++ b/src/benchmark_register.h
@@ -24,7 +24,7 @@ typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
   static const T kmax = std::numeric_limits<T>::max();
 
   // Space out the values in multiples of "mult"
-  for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) {
+  for (T i = static_cast<T>(1); i <= hi; i = static_cast<T>(i * mult)) {
     if (i >= lo) {
       dst->push_back(i);
     }
@@ -52,7 +52,7 @@ void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
 
   const auto it = AddPowers(dst, hi_complement, lo_complement, mult);
 
-  std::for_each(it, dst->end(), [](T& t) { t *= -1; });
+  std::for_each(it, dst->end(), [](T& t) { t = static_cast<T>(t * -1); });
   std::reverse(it, dst->end());
 }
 
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index dcddb437e3..a74bdadd3e 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -235,7 +235,7 @@ BenchmarkRunner::BenchmarkRunner(
       has_explicit_iteration_count(b.iterations() != 0 ||
                                    parsed_benchtime_flag.tag ==
                                        BenchTimeType::ITERS),
-      pool(b.threads() - 1),
+      pool(static_cast<size_t>(b.threads() - 1)),
       iters(has_explicit_iteration_count
                 ? ComputeIters(b_, parsed_benchtime_flag)
                 : 1),
diff --git a/src/cycleclock.h b/src/cycleclock.h
index eff563e7fa..91abcf9dba 100644
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@@ -70,7 +70,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   // frequency scaling).  Also note that when the Mac sleeps, this
   // counter pauses; it does not continue counting, nor does it
   // reset to zero.
-  return mach_absolute_time();
+  return static_cast<int64_t>(mach_absolute_time());
 #elif defined(BENCHMARK_OS_EMSCRIPTEN)
   // this goes above x86-specific code because old versions of Emscripten
   // define __x86_64__, although they have nothing to do with it.
@@ -82,7 +82,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__x86_64__) || defined(__amd64__)
   uint64_t low, high;
   __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
-  return (high << 32) | low;
+  return static_cast<int64_t>((high << 32) | low);
 #elif defined(__powerpc__) || defined(__ppc__)
   // This returns a time-base, which is not always precisely a cycle-count.
 #if defined(__powerpc64__) || defined(__ppc64__)
diff --git a/src/statistics.cc b/src/statistics.cc
index 261dcb299a..16b60261fd 100644
--- a/src/statistics.cc
+++ b/src/statistics.cc
@@ -97,7 +97,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   auto error_count = std::count_if(reports.begin(), reports.end(),
                                    [](Run const& run) { return run.skipped; });
 
-  if (reports.size() - error_count < 2) {
+  if (reports.size() - static_cast<size_t>(error_count) < 2) {
     // We don't report aggregated data if there was a single run.
     return results;
   }
@@ -179,7 +179,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
     // Similarly, if there are N repetitions with 1 iterations each,
     // an aggregate will be computed over N measurements, not 1.
     // Thus it is best to simply use the count of separate reports.
-    data.iterations = reports.size();
+    data.iterations = static_cast<IterationCount>(reports.size());
 
     data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
     data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
diff --git a/src/string_util.cc b/src/string_util.cc
index c69e40a813..9ba63a700a 100644
--- a/src/string_util.cc
+++ b/src/string_util.cc
@@ -56,7 +56,7 @@ void ToExponentAndMantissa(double val, int precision, double one_k,
       scaled /= one_k;
       if (scaled <= big_threshold) {
         mantissa_stream << scaled;
-        *exponent = i + 1;
+        *exponent = static_cast<int64_t>(i + 1);
         *mantissa = mantissa_stream.str();
         return;
       }
diff --git a/src/sysinfo.cc b/src/sysinfo.cc
index daeb98b026..57a23e7bc0 100644
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@@ -350,7 +350,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
     CPUInfo::CacheInfo C;
     C.num_sharing = static_cast<int>(b.count());
     C.level = cache.Level;
-    C.size = cache.Size;
+    C.size = static_cast<int>(cache.Size);
     C.type = "Unknown";
     switch (cache.Type) {
       case CacheUnified:
@@ -485,9 +485,8 @@ int GetNumCPUsImpl() {
   // positives.
   std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
   GetSystemInfo(&sysinfo);
-  return sysinfo.dwNumberOfProcessors;  // number of logical
-                                        // processors in the current
-                                        // group
+  // number of logical processors in the current group
+  return static_cast<int>(sysinfo.dwNumberOfProcessors);
 #elif defined(BENCHMARK_OS_SOLARIS)
   // Returns -1 in case of a failure.
   long num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
@@ -837,7 +836,7 @@ std::vector<double> GetLoadAvg() {
     !(defined(__ANDROID__) && __ANDROID_API__ < 29)
   static constexpr int kMaxSamples = 3;
   std::vector<double> res(kMaxSamples, 0.0);
-  const int nelem = getloadavg(res.data(), kMaxSamples);
+  const size_t nelem = static_cast<size_t>(getloadavg(res.data(), kMaxSamples));
   if (nelem < 1) {
     res.clear();
   } else {
diff --git a/src/timers.cc b/src/timers.cc
index 667e7b2eef..d0821f3166 100644
--- a/src/timers.cc
+++ b/src/timers.cc
@@ -245,9 +245,9 @@ std::string LocalDateTimeString() {
       tz_offset_sign = '-';
     }
 
-    tz_len =
+    tz_len = static_cast<size_t>(
         ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li",
-                   tz_offset_sign, offset_minutes / 100, offset_minutes % 100);
+                   tz_offset_sign, offset_minutes / 100, offset_minutes % 100));
     BM_CHECK(tz_len == kTzOffsetLen);
     ((void)tz_len);  // Prevent unused variable warning in optimized build.
   } else {
diff --git a/test/BUILD b/test/BUILD
index e43b802350..b245fa7622 100644
--- a/test/BUILD
+++ b/test/BUILD
@@ -21,6 +21,7 @@ TEST_COPTS = [
     ## assert() are used a lot in tests upstream, which may be optimised out leading to
     ## unused-variable warning.
     "-Wno-unused-variable",
+    "-Werror=old-style-cast",
 ]
 
 # Some of the issues with DoNotOptimize only occur when optimization is enabled
diff --git a/test/benchmark_gtest.cc b/test/benchmark_gtest.cc
index 2c9e555d92..0aa2552c1e 100644
--- a/test/benchmark_gtest.cc
+++ b/test/benchmark_gtest.cc
@@ -38,7 +38,7 @@ TEST(AddRangeTest, Advanced64) {
 
 TEST(AddRangeTest, FullRange8) {
   std::vector<int8_t> dst;
-  AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), int8_t{8});
+  AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), 8);
   EXPECT_THAT(
       dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127}));
 }
diff --git a/test/complexity_test.cc b/test/complexity_test.cc
index 0c159cd27d..0729d15aa7 100644
--- a/test/complexity_test.cc
+++ b/test/complexity_test.cc
@@ -71,11 +71,11 @@ void BM_Complexity_O1(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    double tmp = static_cast<double>(state.iterations());
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
-      tmp *= state.iterations();
+      tmp *= static_cast<double>(state.iterations());
       benchmark::DoNotOptimize(tmp);
     }
 
@@ -120,16 +120,16 @@ void BM_Complexity_O_N(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    double tmp = static_cast<double>(state.iterations());
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
-      tmp *= state.iterations();
+      tmp *= static_cast<double>(state.iterations());
       benchmark::DoNotOptimize(tmp);
     }
 
     // 1ns per iteration per entry
-    state.SetIterationTime(state.range(0) * 42 * 1e-9);
+    state.SetIterationTime(static_cast<double>(state.range(0)) * 42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
@@ -178,16 +178,16 @@ static void BM_Complexity_O_N_log_N(benchmark::State &state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    double tmp = static_cast<double>(state.iterations());
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
-      tmp *= state.iterations();
+      tmp *= static_cast<double>(state.iterations());
       benchmark::DoNotOptimize(tmp);
     }
 
-    state.SetIterationTime(state.range(0) * kLog2E * std::log(state.range(0)) *
-                           42 * 1e-9);
+    state.SetIterationTime(static_cast<double>(state.range(0)) * kLog2E *
+                           std::log(state.range(0)) * 42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
@@ -238,15 +238,15 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
     benchmark::DoNotOptimize(state.iterations());
-    double tmp = state.iterations();
+    double tmp = static_cast<double>(state.iterations());
     benchmark::DoNotOptimize(tmp);
     for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
       benchmark::DoNotOptimize(state.iterations());
-      tmp *= state.iterations();
+      tmp *= static_cast<double>(state.iterations());
       benchmark::DoNotOptimize(tmp);
     }
 
-    state.SetIterationTime(state.range(0) * 42 * 1e-9);
+    state.SetIterationTime(static_cast<double>(state.range(0)) * 42 * 1e-9);
   }
   state.SetComplexityN(n);
 }

From eaafe694d27f31fe05dd9d055da1e57c8d37a004 Mon Sep 17 00:00:00 2001
From: Nicholas Junge <n.junge@appliedai.de>
Date: Thu, 7 Mar 2024 13:28:55 +0100
Subject: [PATCH 05/14] Add Python bindings build using bzlmod (#1764)

* Add a bzlmod Python bindings build

Uses the newly started `@nanobind_bazel` project to build nanobind
extensions. This means that we can drop all in-tree custom build defs
and build files for nanobind and the C++ Python headers.

Additionally, the temporary WORKSPACE overwrite hack naturally goes away
due to the WORKSPACE system being obsolete.

* Bump ruff -> v0.3.1, change ruff settings

The latest minor releases incurred some formatting and configuration
changes, this commit rolls them out.

---------

Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
---
 .pre-commit-config.yaml                      |   2 +-
 MODULE.bazel                                 |  22 +++-
 WORKSPACE                                    |   6 -
 bindings/python/BUILD                        |   3 -
 bindings/python/build_defs.bzl               |  29 ----
 bindings/python/google_benchmark/BUILD       |  18 +--
 bindings/python/google_benchmark/__init__.py |   1 +
 bindings/python/nanobind.BUILD               |  59 ---------
 bindings/python/python_headers.BUILD         |  10 --
 pyproject.toml                               |   3 +-
 setup.py                                     | 132 +++++++++----------
 tools/gbench/util.py                         |   6 +-
 12 files changed, 92 insertions(+), 199 deletions(-)
 delete mode 100644 bindings/python/BUILD
 delete mode 100644 bindings/python/build_defs.bzl
 delete mode 100644 bindings/python/nanobind.BUILD
 delete mode 100644 bindings/python/python_headers.BUILD

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0247d1b062..93455ab60d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ repos:
         types_or: [ python, pyi ]
         args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.13
+    rev: v0.3.1
     hooks:
       - id: ruff
         args: [ --fix, --exit-non-zero-on-fix ]
diff --git a/MODULE.bazel b/MODULE.bazel
index 7e0e016123..45238d6f9d 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -4,11 +4,11 @@ module(
 )
 
 bazel_dep(name = "bazel_skylib", version = "1.5.0")
-bazel_dep(name = "platforms", version = "0.0.7")
+bazel_dep(name = "platforms", version = "0.0.8")
 bazel_dep(name = "rules_foreign_cc", version = "0.10.1")
 bazel_dep(name = "rules_cc", version = "0.0.9")
 
-bazel_dep(name = "rules_python", version = "0.27.1", dev_dependency = True)
+bazel_dep(name = "rules_python", version = "0.31.0", dev_dependency = True)
 bazel_dep(name = "googletest", version = "1.12.1", dev_dependency = True, repo_name = "com_google_googletest")
 
 bazel_dep(name = "libpfm", version = "4.11.0")
@@ -19,7 +19,18 @@ bazel_dep(name = "libpfm", version = "4.11.0")
 # of relying on the changing default version from rules_python.
 
 python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
+python.toolchain(python_version = "3.8")
 python.toolchain(python_version = "3.9")
+python.toolchain(python_version = "3.10")
+python.toolchain(python_version = "3.11")
+python.toolchain(
+    is_default = True,
+    python_version = "3.12",
+)
+use_repo(
+    python,
+    python = "python_versions",
+)
 
 pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
 pip.parse(
@@ -30,3 +41,10 @@ pip.parse(
 use_repo(pip, "tools_pip_deps")
 
 # -- bazel_dep definitions -- #
+
+bazel_dep(name = "nanobind_bazel", version = "", dev_dependency = True)
+git_override(
+    module_name = "nanobind_bazel",
+    commit = "97e3db2744d3f5da244a0846a0644ffb074b4880",
+    remote = "https://github.com/nicholasjng/nanobind-bazel",
+)
diff --git a/WORKSPACE b/WORKSPACE
index 2562070225..503202465e 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -22,9 +22,3 @@ pip_parse(
 load("@tools_pip_deps//:requirements.bzl", "install_deps")
 
 install_deps()
-
-new_local_repository(
-    name = "python_headers",
-    build_file = "@//bindings/python:python_headers.BUILD",
-    path = "<PYTHON_INCLUDE_PATH>",  # May be overwritten by setup.py.
-)
diff --git a/bindings/python/BUILD b/bindings/python/BUILD
deleted file mode 100644
index d61dcb12a1..0000000000
--- a/bindings/python/BUILD
+++ /dev/null
@@ -1,3 +0,0 @@
-exports_files(glob(["*.BUILD"]))
-
-exports_files(["build_defs.bzl"])
diff --git a/bindings/python/build_defs.bzl b/bindings/python/build_defs.bzl
deleted file mode 100644
index b0c1b0f580..0000000000
--- a/bindings/python/build_defs.bzl
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-This file contains some build definitions for C++ extensions used in the Google Benchmark Python bindings.
-"""
-
-_SHARED_LIB_SUFFIX = {
-    "//conditions:default": ".so",
-    "//:windows": ".dll",
-}
-
-def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []):
-    for shared_lib_suffix in _SHARED_LIB_SUFFIX.values():
-        shared_lib_name = name + shared_lib_suffix
-        native.cc_binary(
-            name = shared_lib_name,
-            linkshared = True,
-            linkstatic = True,
-            srcs = srcs + hdrs,
-            copts = copts,
-            features = features,
-            deps = deps,
-        )
-
-    return native.py_library(
-        name = name,
-        data = select({
-            platform: [name + shared_lib_suffix]
-            for platform, shared_lib_suffix in _SHARED_LIB_SUFFIX.items()
-        }),
-    )
diff --git a/bindings/python/google_benchmark/BUILD b/bindings/python/google_benchmark/BUILD
index f516a693eb..0c8e3c103f 100644
--- a/bindings/python/google_benchmark/BUILD
+++ b/bindings/python/google_benchmark/BUILD
@@ -1,4 +1,4 @@
-load("//bindings/python:build_defs.bzl", "py_extension")
+load("@nanobind_bazel//:build_defs.bzl", "nanobind_extension")
 
 py_library(
     name = "google_benchmark",
@@ -9,22 +9,10 @@ py_library(
     ],
 )
 
-py_extension(
+nanobind_extension(
     name = "_benchmark",
     srcs = ["benchmark.cc"],
-    copts = [
-        "-fexceptions",
-        "-fno-strict-aliasing",
-    ],
-    features = [
-        "-use_header_modules",
-        "-parse_headers",
-    ],
-    deps = [
-        "//:benchmark",
-        "@nanobind",
-        "@python_headers",
-    ],
+    deps = ["//:benchmark"],
 )
 
 py_test(
diff --git a/bindings/python/google_benchmark/__init__.py b/bindings/python/google_benchmark/__init__.py
index e14769f451..c1393b4e58 100644
--- a/bindings/python/google_benchmark/__init__.py
+++ b/bindings/python/google_benchmark/__init__.py
@@ -26,6 +26,7 @@ def my_benchmark(state):
   if __name__ == '__main__':
     benchmark.main()
 """
+
 import atexit
 
 from absl import app
diff --git a/bindings/python/nanobind.BUILD b/bindings/python/nanobind.BUILD
deleted file mode 100644
index 9874b80d1f..0000000000
--- a/bindings/python/nanobind.BUILD
+++ /dev/null
@@ -1,59 +0,0 @@
-load("@bazel_skylib//lib:selects.bzl", "selects")
-
-licenses(["notice"])
-
-package(default_visibility = ["//visibility:public"])
-
-config_setting(
-    name = "msvc_compiler",
-    flag_values = {"@bazel_tools//tools/cpp:compiler": "msvc-cl"},
-)
-
-selects.config_setting_group(
-    name = "winplusmsvc",
-    match_all = [
-        "@platforms//os:windows",
-        ":msvc_compiler",
-    ],
-)
-
-cc_library(
-    name = "nanobind",
-    srcs = glob([
-        "src/*.cpp",
-    ]),
-    additional_linker_inputs = select({
-        "@platforms//os:macos": [":cmake/darwin-ld-cpython.sym"],
-        "//conditions:default": [],
-    }),
-    copts = select({
-        ":msvc_compiler": [
-            "/EHsc",  # exceptions
-            "/Os",  # size optimizations
-            "/GL",  # LTO / whole program optimization
-        ],
-        # these should work on both clang and gcc.
-        "//conditions:default": [
-            "-fexceptions",
-            "-flto",
-            "-Os",
-        ],
-    }),
-    includes = [
-        "ext/robin_map/include",
-        "include",
-    ],
-    linkopts = select({
-        ":winplusmsvc": ["/LTGC"],  # Windows + MSVC.
-        "@platforms//os:macos": ["-Wl,@$(location :cmake/darwin-ld-cpython.sym)"],  # Apple.
-        "//conditions:default": [],
-    }),
-    textual_hdrs = glob(
-        [
-            "include/**/*.h",
-            "src/*.h",
-            "ext/robin_map/include/tsl/*.h",
-        ],
-    ),
-    deps = ["@python_headers"],
-)
diff --git a/bindings/python/python_headers.BUILD b/bindings/python/python_headers.BUILD
deleted file mode 100644
index 8f139f8621..0000000000
--- a/bindings/python/python_headers.BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-licenses(["notice"])
-
-package(default_visibility = ["//visibility:public"])
-
-cc_library(
-    name = "python_headers",
-    hdrs = glob(["**/*.h"]),
-    includes = ["."],
-    visibility = ["//visibility:public"],
-)
diff --git a/pyproject.toml b/pyproject.toml
index aa24ae8c3f..62507a8703 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,11 +75,12 @@ src = ["bindings/python"]
 line-length = 80
 target-version = "py311"
 
+[tool.ruff.lint]
 # Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default.
 select = ["E", "F", "I", "W"]
 ignore = [
     "E501", # line too long
 ]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 combine-as-imports = true
diff --git a/setup.py b/setup.py
index cb20042da5..910383c769 100644
--- a/setup.py
+++ b/setup.py
@@ -1,46 +1,27 @@
-import contextlib
 import os
 import platform
 import shutil
-import sysconfig
 from pathlib import Path
-from typing import Generator
+from typing import Any
 
 import setuptools
 from setuptools.command import build_ext
 
-PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
-
 IS_WINDOWS = platform.system() == "Windows"
 IS_MAC = platform.system() == "Darwin"
 
-
-@contextlib.contextmanager
-def temp_fill_include_path(fp: str) -> Generator[None, None, None]:
-    """Temporarily set the Python include path in a file."""
-    with open(fp, "r+") as f:
-        try:
-            content = f.read()
-            replaced = content.replace(
-                PYTHON_INCLUDE_PATH_PLACEHOLDER,
-                Path(sysconfig.get_paths()["include"]).as_posix(),
-            )
-            f.seek(0)
-            f.write(replaced)
-            f.truncate()
-            yield
-        finally:
-            # revert to the original content after exit
-            f.seek(0)
-            f.write(content)
-            f.truncate()
+# hardcoded SABI-related options. Requires that each Python interpreter
+# (hermetic or not) participating is of the same major-minor version.
+version_tuple = tuple(int(i) for i in platform.python_version_tuple())
+py_limited_api = version_tuple >= (3, 12)
+options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {}
 
 
 class BazelExtension(setuptools.Extension):
     """A C/C++ extension that is defined as a Bazel BUILD target."""
 
-    def __init__(self, name: str, bazel_target: str):
-        super().__init__(name=name, sources=[])
+    def __init__(self, name: str, bazel_target: str, **kwargs: Any):
+        super().__init__(name=name, sources=[], **kwargs)
 
         self.bazel_target = bazel_target
         stripped_target = bazel_target.split("//")[-1]
@@ -67,49 +48,58 @@ def copy_extensions_to_source(self):
 
     def bazel_build(self, ext: BazelExtension) -> None:
         """Runs the bazel build to create the package."""
-        with temp_fill_include_path("WORKSPACE"):
-            temp_path = Path(self.build_temp)
-
-            bazel_argv = [
-                "bazel",
-                "build",
-                ext.bazel_target,
-                "--enable_bzlmod=false",
-                f"--symlink_prefix={temp_path / 'bazel-'}",
-                f"--compilation_mode={'dbg' if self.debug else 'opt'}",
-                # C++17 is required by nanobind
-                f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
-            ]
-
-            if IS_WINDOWS:
-                # Link with python*.lib.
-                for library_dir in self.library_dirs:
-                    bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
-            elif IS_MAC:
-                if platform.machine() == "x86_64":
-                    # C++17 needs macOS 10.14 at minimum
-                    bazel_argv.append("--macos_minimum_os=10.14")
-
-                    # cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
-                    # ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
-                    archflags = os.getenv("ARCHFLAGS", "")
-                    if "arm64" in archflags:
-                        bazel_argv.append("--cpu=darwin_arm64")
-                        bazel_argv.append("--macos_cpus=arm64")
-
-                elif platform.machine() == "arm64":
-                    bazel_argv.append("--macos_minimum_os=11.0")
-
-            self.spawn(bazel_argv)
-
-            shared_lib_suffix = ".dll" if IS_WINDOWS else ".so"
-            ext_name = ext.target_name + shared_lib_suffix
-            ext_bazel_bin_path = (
-                temp_path / "bazel-bin" / ext.relpath / ext_name
-            )
-
-            ext_dest_path = Path(self.get_ext_fullpath(ext.name))
-            shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
+        temp_path = Path(self.build_temp)
+        # omit the patch version to avoid build errors if the toolchain is not
+        # yet registered in the current @rules_python version.
+        # patch version differences should be fine.
+        python_version = ".".join(platform.python_version_tuple()[:2])
+
+        bazel_argv = [
+            "bazel",
+            "build",
+            ext.bazel_target,
+            f"--symlink_prefix={temp_path / 'bazel-'}",
+            f"--compilation_mode={'dbg' if self.debug else 'opt'}",
+            # C++17 is required by nanobind
+            f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
+            f"--@rules_python//python/config_settings:python_version={python_version}",
+        ]
+
+        if ext.py_limited_api:
+            bazel_argv += ["--@nanobind_bazel//:py-limited-api=cp312"]
+
+        if IS_WINDOWS:
+            # Link with python*.lib.
+            for library_dir in self.library_dirs:
+                bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
+        elif IS_MAC:
+            if platform.machine() == "x86_64":
+                # C++17 needs macOS 10.14 at minimum
+                bazel_argv.append("--macos_minimum_os=10.14")
+
+                # cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
+                # ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
+                archflags = os.getenv("ARCHFLAGS", "")
+                if "arm64" in archflags:
+                    bazel_argv.append("--cpu=darwin_arm64")
+                    bazel_argv.append("--macos_cpus=arm64")
+
+            elif platform.machine() == "arm64":
+                bazel_argv.append("--macos_minimum_os=11.0")
+
+        self.spawn(bazel_argv)
+
+        if IS_WINDOWS:
+            suffix = ".pyd"
+        else:
+            suffix = ".abi3.so" if ext.py_limited_api else ".so"
+
+        ext_name = ext.target_name + suffix
+        ext_bazel_bin_path = temp_path / "bazel-bin" / ext.relpath / ext_name
+        ext_dest_path = Path(self.get_ext_fullpath(ext.name)).with_name(
+            ext_name
+        )
+        shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
 
 
 setuptools.setup(
@@ -118,6 +108,8 @@ def bazel_build(self, ext: BazelExtension) -> None:
         BazelExtension(
             name="google_benchmark._benchmark",
             bazel_target="//bindings/python/google_benchmark:_benchmark",
+            py_limited_api=py_limited_api,
         )
     ],
+    options=options,
 )
diff --git a/tools/gbench/util.py b/tools/gbench/util.py
index 4d061a3a1e..1119a1a2ca 100644
--- a/tools/gbench/util.py
+++ b/tools/gbench/util.py
@@ -1,5 +1,5 @@
-"""util.py - General utilities for running, loading, and processing benchmarks
-"""
+"""util.py - General utilities for running, loading, and processing benchmarks"""
+
 import json
 import os
 import re
@@ -37,7 +37,7 @@ def is_executable_file(filename):
     elif sys.platform.startswith("win"):
         return magic_bytes == b"MZ"
     else:
-        return magic_bytes == b"\x7FELF"
+        return magic_bytes == b"\x7fELF"
 
 
 def is_json_file(filename):

From ad7c3ff18b9cec0b60706089834f3831fd65a58f Mon Sep 17 00:00:00 2001
From: Afanasyev Ivan <ivafanas@gmail.com>
Date: Sat, 9 Mar 2024 19:35:18 +0700
Subject: [PATCH 06/14] Fix implicit conversion changes signess warning in
 perf_counters.cc (#1765)

`read_bytes` is `ssize_t` (and we know it's non-negative),
we need to explicitly cast it to `size_t`.
---
 src/perf_counters.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/perf_counters.cc b/src/perf_counters.cc
index d466e27e86..2eb97eb46a 100644
--- a/src/perf_counters.cc
+++ b/src/perf_counters.cc
@@ -39,7 +39,8 @@ size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
     auto read_bytes = ::read(lead, ptr, size);
     if (read_bytes >= ssize_t(sizeof(uint64_t))) {
       // Actual data bytes are all bytes minus initial padding
-      std::size_t data_bytes = read_bytes - sizeof(uint64_t);
+      std::size_t data_bytes =
+          static_cast<std::size_t>(read_bytes) - sizeof(uint64_t);
       // This should be very cheap since it's in hot cache
       std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
       // Increment our counters

From 06b4a070156a9333549468e67923a3a16c8f541b Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 18 Mar 2024 14:01:25 +0300
Subject: [PATCH 07/14] clang-tidy broke the world (#1766)

`AnalyzeTemporaryDtors` option is no longer recognized by clang-tidy-18,
and that renders the whole config invalid and completely ignored... ???
---
 .clang-tidy | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index 56938a598d..1e229e582e 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -2,6 +2,5 @@
 Checks:          'clang-analyzer-*,readability-redundant-*,performance-*'
 WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
 HeaderFilterRegex: '.*'
-AnalyzeTemporaryDtors: false
 FormatStyle:     none
 User:            user

From d5c55e8c42a8782cb24f6011d0e88449237ab842 Mon Sep 17 00:00:00 2001
From: PhilipDeegan <philip.deegan@gmail.com>
Date: Thu, 21 Mar 2024 12:29:38 +0000
Subject: [PATCH 08/14] allow BENCHMARK_VERSION to be undefined (#1769)

---
 src/benchmark.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 1f2f6cc277..337bb3faa7 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -757,7 +757,13 @@ int InitializeStreams() {
 
 }  // end namespace internal
 
-std::string GetBenchmarkVersion() { return {BENCHMARK_VERSION}; }
+std::string GetBenchmarkVersion() {
+#ifdef BENCHMARK_VERSION
+  return {BENCHMARK_VERSION};
+#else
+  return {""};
+#endif
+}
 
 void PrintDefaultHelp() {
   fprintf(stdout,

From f3ec7b8820ca8136c4e1dad4552608b51b47831a Mon Sep 17 00:00:00 2001
From: Vasyl Zubko <github@acc.zubko.cc>
Date: Sun, 24 Mar 2024 20:17:34 +0100
Subject: [PATCH 09/14] Fix OpenBSD build (#1772)

---
 src/sysinfo.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sysinfo.cc b/src/sysinfo.cc
index 57a23e7bc0..73b46a5a1a 100644
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@@ -162,7 +162,7 @@ ValueUnion GetSysctlImp(std::string const& name) {
       mib[1] = HW_CPUSPEED;
     }
 
-    if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) {
+    if (sysctl(mib, 2, buff.data(), &buff.size, nullptr, 0) == -1) {
       return ValueUnion();
     }
     return buff;
@@ -734,7 +734,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
 #endif
   unsigned long long hz = 0;
 #if defined BENCHMARK_OS_OPENBSD
-  if (GetSysctl(freqStr, &hz)) return hz * 1000000;
+  if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz * 1000000);
 #else
   if (GetSysctl(freqStr, &hz)) return hz;
 #endif

From 70916cbf71f50b9e1e6f13559e10d6dbb92beb32 Mon Sep 17 00:00:00 2001
From: Fanbo Meng <fanbo.meng@ibm.com>
Date: Wed, 3 Apr 2024 05:26:33 -0400
Subject: [PATCH 10/14] Remove COMPILER_IBMXL macro for z/OS (#1777)

COMPILER_IBMXL identifies the Clang based IBM XL compiler (xlclang) on z/OS. This compiler is obsolete and replaced by the Open XL compiler, so the macro is no longer needed and the existing code would lead to incorrect asm syntax for Open XL.
---
 src/cycleclock.h      | 5 +++--
 src/internal_macros.h | 6 +-----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/cycleclock.h b/src/cycleclock.h
index 91abcf9dba..a25843760b 100644
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@@ -181,10 +181,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__s390__)  // Covers both s390 and s390x.
   // Return the CPU clock.
   uint64_t tsc;
-#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL)
-  // z/OS XL compiler HLASM syntax.
+#if defined(BENCHMARK_OS_ZOS)
+  // z/OS HLASM syntax.
   asm(" stck %0" : "=m"(tsc) : : "cc");
 #else
+  // Linux on Z syntax.
   asm("stck %0" : "=Q"(tsc) : : "cc");
 #endif
   return tsc;
diff --git a/src/internal_macros.h b/src/internal_macros.h
index 8dd7d0c650..f4894ba8e6 100644
--- a/src/internal_macros.h
+++ b/src/internal_macros.h
@@ -11,11 +11,7 @@
 #endif
 
 #if defined(__clang__)
-  #if defined(__ibmxl__)
-    #if !defined(COMPILER_IBMXL)
-      #define COMPILER_IBMXL
-    #endif
-  #elif !defined(COMPILER_CLANG)
+  #if !defined(COMPILER_CLANG)
     #define COMPILER_CLANG
   #endif
 #elif defined(_MSC_VER)

From d6ce1452872abcd7e5a772f757708a2ad0eee71c Mon Sep 17 00:00:00 2001
From: dhairya <97079960+dhairyarungta@users.noreply.github.com>
Date: Sat, 13 Apr 2024 05:22:31 +0800
Subject: [PATCH 11/14] Refactor: Return frequency as double (#1782)

Adjusted the GetSysctl call in sysinfo.cc to ensure the frequency
value is returned as a double rather than an integer. This helps
maintain consistency and clarity in the codebase.
---
 src/sysinfo.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sysinfo.cc b/src/sysinfo.cc
index 73b46a5a1a..7261e2a96b 100644
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@@ -736,7 +736,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
 #if defined BENCHMARK_OS_OPENBSD
   if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz * 1000000);
 #else
-  if (GetSysctl(freqStr, &hz)) return hz;
+  if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz);
 #endif
   fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
           freqStr, strerror(errno));

From c0105603f618fdc03bc4ae5ad2be076f039c12f8 Mon Sep 17 00:00:00 2001
From: David Seifert <16636962+SoapGentoo@users.noreply.github.com>
Date: Sun, 14 Apr 2024 09:05:36 -0700
Subject: [PATCH 12/14] Add `benchmark_main.pc` to link `main()` containing
 library (#1779)

This is similar to the addition in https://github.com/google/googletest/commit/8604c4adac40573f806cfadae44e22f8dfaf212a#diff-eb8e49bdf5e9aafb996777a4f4302ad1efd281222bf3202eb9b77ce47496c345
that added pkg-config support in GTest. Without this, users
need to manually find the library containing `main()`.
---
 cmake/benchmark_main.pc.in | 7 +++++++
 src/CMakeLists.txt         | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 cmake/benchmark_main.pc.in

diff --git a/cmake/benchmark_main.pc.in b/cmake/benchmark_main.pc.in
new file mode 100644
index 0000000000..a90f3cd060
--- /dev/null
+++ b/cmake/benchmark_main.pc.in
@@ -0,0 +1,7 @@
+libdir=@CMAKE_INSTALL_FULL_LIBDIR@
+
+Name: @PROJECT_NAME@
+Description: Google microbenchmark framework (with main() function)
+Version: @VERSION@
+Requires: benchmark
+Libs: -L${libdir} -lbenchmark_main
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 943594b70b..5551099b2a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -86,6 +86,7 @@ set(generated_dir "${PROJECT_BINARY_DIR}")
 set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
 set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
 set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
+set(pkg_config_main "${generated_dir}/${PROJECT_NAME}_main.pc")
 set(targets_to_export benchmark benchmark_main)
 set(targets_export_name "${PROJECT_NAME}Targets")
 
@@ -105,6 +106,7 @@ write_basic_package_version_file(
 )
 
 configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
+configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark_main.pc.in" "${pkg_config_main}" @ONLY)
 
 export (
   TARGETS ${targets_to_export}
@@ -133,7 +135,7 @@ if (BENCHMARK_ENABLE_INSTALL)
       DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
 
   install(
-      FILES "${pkg_config}"
+      FILES "${pkg_config}" "${pkg_config_main}"
       DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
 
   install(

From 185c55d79301f1b3a6505b8432440f7a3994e79a Mon Sep 17 00:00:00 2001
From: Nicholas Junge <nicholas.junge@web.de>
Date: Mon, 15 Apr 2024 11:57:02 +0200
Subject: [PATCH 13/14] Switch git override to stable BCR tag for
 nanobind_bazel (#1778)

This comes following the first BCR release of nanobind_bazel.

Feature-wise, nothing substantial has changed, except that the extensions
are stripped of debug info when built in release mode, which reduces
clutter in the symbol tables.

No stubgen yet, since nanobind v2 has not been released yet.
---
 MODULE.bazel | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/MODULE.bazel b/MODULE.bazel
index 45238d6f9d..ca7bff6531 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -42,9 +42,4 @@ use_repo(pip, "tools_pip_deps")
 
 # -- bazel_dep definitions -- #
 
-bazel_dep(name = "nanobind_bazel", version = "", dev_dependency = True)
-git_override(
-    module_name = "nanobind_bazel",
-    commit = "97e3db2744d3f5da244a0846a0644ffb074b4880",
-    remote = "https://github.com/nicholasjng/nanobind-bazel",
-)
+bazel_dep(name = "nanobind_bazel", version = "1.0.0", dev_dependency = True)

From bc946b919cac6f25a199a526da571638cfde109f Mon Sep 17 00:00:00 2001
From: Nicholas Junge <nicholas.junge@web.de>
Date: Mon, 15 Apr 2024 18:44:09 +0200
Subject: [PATCH 14/14] Modernize wheel building job config (#1783)

It is now possible to build Mac wheels on native machines in Github
Actions, so ARM64 Mac wheels are now built and tested on M1 machines.

Also, the artifact up-/download was migrated to v4, which made it
necessary to upload wheels to unique artifact names, and then later
stitch them together again in a subsequent job.

The cross-platform Mac build injection in setup.py was removed,
since it is no longer necessary.

I relanded a monkey-patching of Bazel build files, this time for
MODULE.bazel. This is because `rules_python` does not allow running
as the root user, which is the case in cibuildwheel+Linux (happens
in a Docker container). Since I did not see a quick way of switching
to rootless containers, and did not want to hardcode the config change
(it can apparently cause cache misses and build failures), I inject the
"ignore_root_user_error" flag into the MODULE.bazel file when running
in cibuildwheel on Linux.
---
 .github/install_bazel.sh     |  9 +++---
 .github/workflows/wheels.yml | 50 +++++++++++++++++-----------
 MODULE.bazel                 |  4 ---
 setup.py                     | 63 +++++++++++++++++++++++++++---------
 4 files changed, 83 insertions(+), 43 deletions(-)

diff --git a/.github/install_bazel.sh b/.github/install_bazel.sh
index d07db0e758..1b0d63c98e 100644
--- a/.github/install_bazel.sh
+++ b/.github/install_bazel.sh
@@ -3,11 +3,10 @@ if ! bazel version; then
   if [ "$arch" == "aarch64" ]; then
     arch="arm64"
   fi
-  echo "Installing wget and downloading $arch Bazel binary from GitHub releases."
-  yum install -y wget
-  wget "https://github.com/bazelbuild/bazel/releases/download/6.4.0/bazel-6.4.0-linux-$arch" -O /usr/local/bin/bazel
-  chmod +x /usr/local/bin/bazel
+  echo "Downloading $arch Bazel binary from GitHub releases."
+  curl -L -o $HOME/bin/bazel --create-dirs "https://github.com/bazelbuild/bazel/releases/download/7.1.1/bazel-7.1.1-linux-$arch"
+  chmod +x $HOME/bin/bazel
 else
-  # bazel is installed for the correct architecture
+  # Bazel is installed for the correct architecture
   exit 0
 fi
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index a36d312aa6..8b772cd8b9 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -15,16 +15,16 @@ jobs:
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      - name: Install Python 3.11
+      - name: Install Python 3.12
         uses: actions/setup-python@v5
         with:
-          python-version: 3.11
+          python-version: 3.12
       - run: python -m pip install build
       - name: Build sdist
         run: python -m build --sdist
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
-          name: dist
+          name: dist-sdist
           path: dist/*.tar.gz
 
   build_wheels:
@@ -32,7 +32,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-13, macos-14, windows-latest]
 
     steps:
       - name: Check out Google Benchmark
@@ -47,32 +47,44 @@ jobs:
           platforms: all
 
       - name: Build wheels on ${{ matrix.os }} using cibuildwheel
-        uses: pypa/cibuildwheel@v2.16.2
+        uses: pypa/cibuildwheel@v2.17
         env:
-          CIBW_BUILD: 'cp38-* cp39-* cp310-* cp311-* cp312-*'
+          CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
           CIBW_SKIP: "*-musllinux_*"
-          CIBW_TEST_SKIP: "*-macosx_arm64"
-          CIBW_ARCHS_LINUX: x86_64 aarch64
-          CIBW_ARCHS_MACOS: x86_64 arm64
-          CIBW_ARCHS_WINDOWS: AMD64
+          CIBW_TEST_SKIP: "cp38-macosx_*:arm64"
+          CIBW_ARCHS_LINUX: auto64 aarch64
+          CIBW_ARCHS_WINDOWS: auto64
           CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
+          # Grab the rootless Bazel installation inside the container.
+          CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin
           CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
 
       - name: Upload Google Benchmark ${{ matrix.os }} wheels
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
-          name: dist
+          name: dist-${{ matrix.os }}
           path: wheelhouse/*.whl
 
+  merge_wheels:
+    name: Merge all built wheels into one artifact
+    runs-on: ubuntu-latest
+    needs: build_wheels
+    steps:
+      - name: Merge wheels
+        uses: actions/upload-artifact/merge@v4
+        with:
+          name: dist
+          pattern: dist-*
+          delete-merged: true
+
   pypi_upload:
     name: Publish google-benchmark wheels to PyPI
-    needs: [build_sdist, build_wheels]
+    needs: [merge_wheels]
     runs-on: ubuntu-latest
     permissions:
       id-token: write
     steps:
-    - uses: actions/download-artifact@v3
-      with:
-        name: dist
-        path: dist
-    - uses: pypa/gh-action-pypi-publish@v1.8.11
+      - uses: actions/download-artifact@v4
+        with:
+          path: dist
+      - uses: pypa/gh-action-pypi-publish@v1
diff --git a/MODULE.bazel b/MODULE.bazel
index ca7bff6531..95db0b1292 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -27,10 +27,6 @@ python.toolchain(
     is_default = True,
     python_version = "3.12",
 )
-use_repo(
-    python,
-    python = "python_versions",
-)
 
 pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
 pip.parse(
diff --git a/setup.py b/setup.py
index 910383c769..40cdc8d339 100644
--- a/setup.py
+++ b/setup.py
@@ -1,14 +1,17 @@
+import contextlib
 import os
 import platform
+import re
 import shutil
 from pathlib import Path
-from typing import Any
+from typing import Any, Generator
 
 import setuptools
 from setuptools.command import build_ext
 
 IS_WINDOWS = platform.system() == "Windows"
 IS_MAC = platform.system() == "Darwin"
+IS_LINUX = platform.system() == "Linux"
 
 # hardcoded SABI-related options. Requires that each Python interpreter
 # (hermetic or not) participating is of the same major-minor version.
@@ -17,6 +20,46 @@
 options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {}
 
 
+def is_cibuildwheel() -> bool:
+    return os.getenv("CIBUILDWHEEL") is not None
+
+
+@contextlib.contextmanager
+def _maybe_patch_toolchains() -> Generator[None, None, None]:
+    """
+    Patch rules_python toolchains to ignore root user error
+    when run in a Docker container on Linux in cibuildwheel.
+    """
+
+    def fmt_toolchain_args(matchobj):
+        suffix = "ignore_root_user_error = True"
+        callargs = matchobj.group(1)
+        # toolchain def is broken over multiple lines
+        if callargs.endswith("\n"):
+            callargs = callargs + "    " + suffix + ",\n"
+        # toolchain def is on one line.
+        else:
+            callargs = callargs + ", " + suffix
+        return "python.toolchain(" + callargs + ")"
+
+    CIBW_LINUX = is_cibuildwheel() and IS_LINUX
+    try:
+        if CIBW_LINUX:
+            module_bazel = Path("MODULE.bazel")
+            content: str = module_bazel.read_text()
+            module_bazel.write_text(
+                re.sub(
+                    r"python.toolchain\(([\w\"\s,.=]*)\)",
+                    fmt_toolchain_args,
+                    content,
+                )
+            )
+        yield
+    finally:
+        if CIBW_LINUX:
+            module_bazel.write_text(content)
+
+
 class BazelExtension(setuptools.Extension):
     """A C/C++ extension that is defined as a Bazel BUILD target."""
 
@@ -73,21 +116,11 @@ def bazel_build(self, ext: BazelExtension) -> None:
             for library_dir in self.library_dirs:
                 bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
         elif IS_MAC:
-            if platform.machine() == "x86_64":
-                # C++17 needs macOS 10.14 at minimum
-                bazel_argv.append("--macos_minimum_os=10.14")
-
-                # cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
-                # ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
-                archflags = os.getenv("ARCHFLAGS", "")
-                if "arm64" in archflags:
-                    bazel_argv.append("--cpu=darwin_arm64")
-                    bazel_argv.append("--macos_cpus=arm64")
-
-            elif platform.machine() == "arm64":
-                bazel_argv.append("--macos_minimum_os=11.0")
+            # C++17 needs macOS 10.14 at minimum
+            bazel_argv.append("--macos_minimum_os=10.14")
 
-        self.spawn(bazel_argv)
+        with _maybe_patch_toolchains():
+            self.spawn(bazel_argv)
 
         if IS_WINDOWS:
             suffix = ".pyd"