diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1a00069e6..83c5bbf81 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -46,10 +46,62 @@ jobs: with: platforms: all - - name: Build wheels on ${{ matrix.os }} using cibuildwheel - uses: pypa/cibuildwheel@v2.17 + - name: Build 3.8 wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.20 env: - CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*" + CIBW_BUILD: "cp38-*" + CIBW_SKIP: "*-musllinux_*" + CIBW_TEST_SKIP: "cp38-macosx_*:arm64" + CIBW_ARCHS_LINUX: auto64 aarch64 + CIBW_ARCHS_WINDOWS: auto64 + CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh + # Grab the rootless Bazel installation inside the container. + CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + + - name: Build 3.9 wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.20 + env: + CIBW_BUILD: "cp39-*" + CIBW_SKIP: "*-musllinux_*" + CIBW_TEST_SKIP: "cp38-macosx_*:arm64" + CIBW_ARCHS_LINUX: auto64 aarch64 + CIBW_ARCHS_WINDOWS: auto64 + CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh + # Grab the rootless Bazel installation inside the container. + CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + + - name: Build 3.10 wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.20 + env: + CIBW_BUILD: "cp310-*" + CIBW_SKIP: "*-musllinux_*" + CIBW_TEST_SKIP: "cp38-macosx_*:arm64" + CIBW_ARCHS_LINUX: auto64 aarch64 + CIBW_ARCHS_WINDOWS: auto64 + CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh + # Grab the rootless Bazel installation inside the container. + CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + + - name: Build 3.11 wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.20 + env: + CIBW_BUILD: "cp311-*" + CIBW_SKIP: "*-musllinux_*" + CIBW_TEST_SKIP: "cp38-macosx_*:arm64" + CIBW_ARCHS_LINUX: auto64 aarch64 + CIBW_ARCHS_WINDOWS: auto64 + CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh + # Grab the rootless Bazel installation inside the container. + CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + + - name: Build 3.12 wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.20 + env: + CIBW_BUILD: "cp312-*" CIBW_SKIP: "*-musllinux_*" CIBW_TEST_SKIP: "cp38-macosx_*:arm64" CIBW_ARCHS_LINUX: auto64 aarch64 diff --git a/CMakeLists.txt b/CMakeLists.txt index 216c1c921..e0cd6962e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # Require CMake 3.10. If available, use the policies up to CMake 3.22. cmake_minimum_required (VERSION 3.10...3.22) -project (benchmark VERSION 1.8.5 LANGUAGES CXX) +project (benchmark VERSION 1.9.0 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) @@ -150,6 +150,10 @@ if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") add_definitions(-D_CRT_SECURE_NO_WARNINGS) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-WX) + endif() + if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-EHs-) add_cxx_compiler_flag(-EHa-) diff --git a/MODULE.bazel b/MODULE.bazel index 4210ea0be..e4f170c83 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,6 +1,6 @@ module( name = "google_benchmark", - version = "1.8.5", + version = "1.9.0", ) bazel_dep(name = "bazel_skylib", version = "1.5.0") diff --git a/bindings/python/google_benchmark/example.py b/bindings/python/google_benchmark/example.py index b5b2f88ff..b92245ea6 100644 --- a/bindings/python/google_benchmark/example.py +++ b/bindings/python/google_benchmark/example.py @@ -61,6 +61,7 @@ def skipped(state): @benchmark.register +@benchmark.option.use_manual_time() def manual_timing(state): while state: # Manually count Python CPU time diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 4cdb4515c..53a22247f 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -290,11 +290,50 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_OVERRIDE #endif +#if defined(__GNUC__) +// Determine the cacheline size based on architecture +#if defined(__i386__) || defined(__x86_64__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#elif defined(__powerpc64__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 128 +#elif defined(__aarch64__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#elif defined(__arm__) +// Cache line sizes for ARM: These values are not strictly correct since +// cache line sizes depend on implementations, not architectures. There +// are even implementations with cache line sizes configurable at boot +// time. +#if defined(__ARM_ARCH_5T__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 32 +#elif defined(__ARM_ARCH_7A__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#endif // ARM_ARCH +#endif // arches +#endif // __GNUC__ + +#ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE +// A reasonable default guess. Note that overestimates tend to waste more +// space, while underestimates tend to waste more time. +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#endif + +#if defined(__GNUC__) +// Indicates that the declared object be cache aligned using +// `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above). +#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \ + __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE))) +#elif defined(_MSC_VER) +#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \ + __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE)) +#else +#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED +#endif + #if defined(_MSC_VER) #pragma warning(push) // C4251: needs to have dll-interface to be used by clients of class #pragma warning(disable : 4251) -#endif +#endif // _MSC_VER_ namespace benchmark { class BenchmarkReporter; @@ -757,9 +796,14 @@ enum Skipped } // namespace internal +#if defined(_MSC_VER) +#pragma warning(push) +// C4324: 'benchmark::State': structure was padded due to alignment specifier +#pragma warning(disable : 4324) +#endif // _MSC_VER_ // State is passed to a running Benchmark and contains state for the // benchmark to use. -class BENCHMARK_EXPORT State { +class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { public: struct StateIterator; friend struct StateIterator; @@ -1024,6 +1068,9 @@ class BENCHMARK_EXPORT State { friend class internal::BenchmarkInstance; }; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // _MSC_VER_ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { return KeepRunningInternal(1, /*is_batch=*/false); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 685f5c688..4045cd64f 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -130,14 +130,14 @@ BenchmarkReporter::Run CreateRunReport( void RunInThread(const BenchmarkInstance* b, IterationCount iters, int thread_id, ThreadManager* manager, PerfCountersMeasurement* perf_counters_measurement, - ProfilerManager* profiler_manager) { + ProfilerManager* profiler_manager_) { internal::ThreadTimer timer( b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); State st = b->Run(iters, thread_id, &timer, manager, - perf_counters_measurement, profiler_manager); + perf_counters_measurement, profiler_manager_); BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; {