From 11f22f1a963ab3c8949cb723a63c07d7a409c8a8 Mon Sep 17 00:00:00 2001 From: Eric Date: Sat, 13 Apr 2024 03:16:11 -0400 Subject: [PATCH] [tzdb] Replace shared_mutex with mutex. (#87929) The overhead of taking a std::mutex is much lower than taking a reader lock on a shared mutex, even under heavy contention. The benefit of shared_mutex only occurs as the amount of time spent in the critical sections grows large enough. In our case all we do is read a pointer and return the lock. As a result, using a shared lock can be ~50%-100% slower Here are the results for the provided benchmark on my machine: ``` 2024-04-07T12:48:51-04:00 Running ./libcxx/benchmarks/shared_mutex_vs_mutex.libcxx.out Run on (12 X 400 MHz CPU s) CPU Caches: L1 Data 32 KiB (x6) L1 Instruction 32 KiB (x6) L2 Unified 1024 KiB (x6) L3 Unified 32768 KiB (x1) Load Average: 2.70, 2.70, 1.63 --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_shared_mutex/threads:1 13.9 ns 13.9 ns 50533700 BM_shared_mutex/threads:2 34.5 ns 68.9 ns 9957784 BM_shared_mutex/threads:4 38.4 ns 137 ns 4987772 BM_shared_mutex/threads:8 51.1 ns 358 ns 1974160 BM_shared_mutex/threads:32 57.1 ns 682 ns 1043648 BM_mutex/threads:1 5.54 ns 5.53 ns 125867422 BM_mutex/threads:2 15.5 ns 30.9 ns 21830116 BM_mutex/threads:4 15.4 ns 57.2 ns 12136920 BM_mutex/threads:8 19.3 ns 140 ns 4997080 BM_mutex/threads:32 20.8 ns 252 ns 2859808 ``` --- libcxx/benchmarks/CMakeLists.txt | 1 + .../shared_mutex_vs_mutex.bench.cpp | 41 +++++++++++++++++++ libcxx/src/include/tzdb/tzdb_list_private.h | 11 +++-- 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 928238c1ac69ba..527a2acf2d3b36 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -221,6 +221,7 @@ set(BENCHMARK_TESTS map.bench.cpp monotonic_buffer.bench.cpp ordered_set.bench.cpp + shared_mutex_vs_mutex.bench.cpp stop_token.bench.cpp std_format_spec_string_unicode.bench.cpp string.bench.cpp diff --git a/libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp b/libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp new file mode 100644 index 00000000000000..19d13b799cc137 --- /dev/null +++ b/libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This benchmark compares the performance of std::mutex and std::shared_mutex in contended scenarios. +// it's meant to establish a baseline overhead for std::shared_mutex and std::mutex, and to help inform decisions about +// which mutex to use when selecting a mutex type for a given use case. + +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +int global_value = 42; +std::mutex m; +std::shared_mutex sm; + +static void BM_shared_mutex(benchmark::State& state) { + for (auto _ : state) { + std::shared_lock lock(sm); + benchmark::DoNotOptimize(global_value); + } +} + +static void BM_mutex(benchmark::State& state) { + for (auto _ : state) { + std::lock_guard lock(m); + benchmark::DoNotOptimize(global_value); + } +} + +BENCHMARK(BM_shared_mutex)->Threads(1)->Threads(2)->Threads(4)->Threads(8)->Threads(32); +BENCHMARK(BM_mutex)->Threads(1)->Threads(2)->Threads(4)->Threads(8)->Threads(32); + +BENCHMARK_MAIN(); diff --git a/libcxx/src/include/tzdb/tzdb_list_private.h b/libcxx/src/include/tzdb/tzdb_list_private.h index 969b2b9f8a9f63..0be7b8e5a10058 100644 --- a/libcxx/src/include/tzdb/tzdb_list_private.h +++ b/libcxx/src/include/tzdb/tzdb_list_private.h @@ -15,8 +15,11 @@ #include // When threads are not available the locking is not required. +// When threads are available, we use std::mutex over std::shared_mutex +// due to the increased overhead of std::shared_mutex. +// See shared_mutex_vs_mutex.bench.cpp #ifndef _LIBCPP_HAS_NO_THREADS -# include +# include #endif #include "types_private.h" @@ -56,7 +59,7 @@ class tzdb_list::__impl { const tzdb& __front() const noexcept { #ifndef _LIBCPP_HAS_NO_THREADS - shared_lock __lock{__mutex_}; + unique_lock __lock{__mutex_}; #endif return __tzdb_.front(); } @@ -72,7 +75,7 @@ class tzdb_list::__impl { const_iterator __begin() const noexcept { #ifndef _LIBCPP_HAS_NO_THREADS - shared_lock __lock{__mutex_}; + unique_lock __lock{__mutex_}; #endif return __tzdb_.begin(); } @@ -87,7 +90,7 @@ class tzdb_list::__impl { void __load_no_lock() { chrono::__init_tzdb(__tzdb_.emplace_front(), __rules_.emplace_front()); } #ifndef _LIBCPP_HAS_NO_THREADS - mutable shared_mutex __mutex_; + mutable mutex __mutex_; #endif forward_list __tzdb_;