Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve search/find_end perf by dropping memcmp #4654

Merged
merged 24 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
73c96da
vectorize search
AlexGuteniev May 5, 2024
0c17a53
very tail fix
AlexGuteniev May 5, 2024
11c05ee
I 🧡 ADL
AlexGuteniev May 5, 2024
d4fcc96
unify ipsum
AlexGuteniev May 5, 2024
da5cf2e
-newline
AlexGuteniev May 5, 2024
da157b1
`strstr` for competition
AlexGuteniev May 5, 2024
772c513
missing progress
AlexGuteniev May 5, 2024
2c6c329
coverage
AlexGuteniev May 5, 2024
81a6000
these tests are too long
AlexGuteniev May 5, 2024
0b59b2e
missing include
AlexGuteniev May 5, 2024
f2806c5
default_searcher
AlexGuteniev May 5, 2024
15e54a9
ADL again
AlexGuteniev May 5, 2024
26646fe
avoid `memcmp` in fallback
AlexGuteniev May 5, 2024
0c473a4
partial review comment
AlexGuteniev Jun 7, 2024
3452fcc
Merge branch 'main' into search
StephanTLavavej Jun 10, 2024
629afd4
Internal static assert `sizeof(_Ty1) == sizeof(_Ty2)`.
StephanTLavavej Jun 10, 2024
a24e6eb
Use `+=` and `+` instead of `_RANGES next`.
StephanTLavavej Jun 10, 2024
9d07a40
Style: Return `_Ptr_res1` instead of `_Ptr_last1` when they're equal.
StephanTLavavej Jun 10, 2024
d57f9b6
Style: In `<algorithm>` and `<functional>`, `_Ptr_last1` doesn't need…
StephanTLavavej Jun 10, 2024
e51b98d
Restore top-level constness for `_UFirst2`.
StephanTLavavej Jun 10, 2024
d4462a5
Benchmark classic search().
StephanTLavavej Jun 10, 2024
95ba820
Simplify `last_known_good_search()`.
StephanTLavavej Jun 10, 2024
72a0d29
Revert vectorized implementation.
StephanTLavavej Jun 10, 2024
38b32d6
Drop `memcmp` paths from `_Equal_rev_pred_unchecked` and `_Equal_rev_…
StephanTLavavej Jun 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(replace src/replace.cpp)
add_benchmark(search src/search.cpp)
add_benchmark(std_copy src/std_copy.cpp)
add_benchmark(swap_ranges src/swap_ranges.cpp)

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const char src[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquet "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
Expand All @@ -34,7 +34,7 @@ const char src[] =
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquam malesuada est at dignissim. Pellentesque finibus "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

template <class T>
Expand Down
96 changes: 96 additions & 0 deletions benchmarks/src/search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstdint>
#include <cstring>
#include <functional>
#include <string>
#include <vector>

const char src_haystack[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
"accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci "
"elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec "
"volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam "
"iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit "
"rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin "
"vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec "
"nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec "
"metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis "
"nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor "
"venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, "
"aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis "
"nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, "
"laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. "
"Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum "
"feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum "
"lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. "
"Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient "
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

const char src_needle[] = "aliquet";

void c_strstr(benchmark::State& state) {
const std::string haystack(std::begin(src_haystack), std::end(src_haystack));
AlexGuteniev marked this conversation as resolved.
Show resolved Hide resolved
const std::string needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::strstr(haystack.c_str(), needle.c_str());
benchmark::DoNotOptimize(res);
}
}

template <class T>
void ranges_search(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::ranges::search(haystack, needle);
benchmark::DoNotOptimize(res);
}
}

template <class T>
void search_default_searcher(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::search(haystack.begin(), haystack.end(), std::default_searcher{needle.begin(), needle.end()});
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(c_strstr);

BENCHMARK(ranges_search<std::uint8_t>);
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
BENCHMARK(ranges_search<std::uint16_t>);
BENCHMARK(ranges_search<std::uint32_t>);
BENCHMARK(ranges_search<std::uint64_t>);

BENCHMARK(search_default_searcher<std::uint8_t>);
BENCHMARK(search_default_searcher<std::uint16_t>);
BENCHMARK(search_default_searcher<std::uint32_t>);
BENCHMARK(search_default_searcher<std::uint64_t>);


BENCHMARK_MAIN();
28 changes: 24 additions & 4 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -2142,13 +2142,33 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays
const _FwdItPat _Last2, _Pr _Pred) { // find first [_First2, _Last2) satisfying _Pred
_STD _Adl_verify_range(_First1, _Last1);
_STD _Adl_verify_range(_First2, _Last2);
auto _UFirst1 = _STD _Get_unwrapped(_First1);
const auto _ULast1 = _STD _Get_unwrapped(_Last1);
const auto _UFirst2 = _STD _Get_unwrapped(_First2);
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
const auto _ULast2 = _STD _Get_unwrapped(_Last2);
auto _UFirst1 = _STD _Get_unwrapped(_First1);
const auto _ULast1 = _STD _Get_unwrapped(_Last1);
auto _UFirst2 = _STD _Get_unwrapped(_First2);
const auto _ULast2 = _STD _Get_unwrapped(_Last2);
if constexpr (_Is_ranges_random_iter_v<_FwdItHaystack> && _Is_ranges_random_iter_v<_FwdItPat>) {
const _Iter_diff_t<_FwdItPat> _Count2 = _ULast2 - _UFirst2;
if (_ULast1 - _UFirst1 >= _Count2) {
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Ptr1 = _STD _To_address(_UFirst1);
const auto _Ptr_last1 = _STD _To_address(_ULast1);

const auto _Ptr_res1 = _STD _Search_vectorized(
_Ptr1, _Ptr_last1, _STD _To_address(_UFirst2), _STD _To_address(_ULast2));
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

if constexpr (is_pointer_v<decltype(_UFirst1)>) {
_UFirst1 = _Ptr_res1;
} else {
_UFirst1 += _Ptr_res1 - _Ptr1;
}

_STD _Seek_wrapped(_Last1, _UFirst1);
return _Last1;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
const auto _Last_possible = _ULast1 - static_cast<_Iter_diff_t<_FwdItHaystack>>(_Count2);
for (;; ++_UFirst1) {
if (_STD _Equal_rev_pred_unchecked(_UFirst1, _UFirst2, _ULast2, _STD _Pass_fn(_Pred))) {
Expand Down
24 changes: 24 additions & 0 deletions stl/inc/functional
Original file line number Diff line number Diff line change
Expand Up @@ -2456,6 +2456,30 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked(
_Iter_diff_t<_FwdItHaystack> _Count1 = _Last1 - _First1;
_Iter_diff_t<_FwdItPat> _Count2 = _Last2 - _First2;

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<_FwdItHaystack, _FwdItPat, _Pred_eq>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Ptr1 = _STD _To_address(_First1);
const auto _Ptr_last1 = _STD _To_address(_Last1);

const auto _Ptr_res1 =
_STD _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_First2), _STD _To_address(_Last2));
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

if constexpr (is_pointer_v<_FwdItHaystack>) {
_First1 = _Ptr_res1;
} else {
_First1 += _Ptr_res1 - _Ptr1;
}

if (_First1 != _Last1) {
return {_First1, _First1 + _Count2};
} else {
return {_Last1, _Last1};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (; _Count2 <= _Count1; ++_First1, (void) --_Count1) { // room for match, try it
_FwdItHaystack _Mid1 = _First1;
for (_FwdItPat _Mid2 = _First2;; ++_Mid1, (void) ++_Mid2) {
Expand Down
66 changes: 65 additions & 1 deletion stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last
const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept;
const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept;

const void* __stdcall __std_search_1(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_search_2(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_search_4(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_search_8(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;

const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
Expand Down Expand Up @@ -195,6 +204,21 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe
}
}

template <class _Ty1, class _Ty2>
_Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
if constexpr (sizeof(_Ty1) == 1) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_1(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 2) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_2(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 4) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_4(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 8) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_8(_First1, _Last1, _First2, _Last2)));
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}

template <class _Ty>
_Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
constexpr bool _Signed = is_signed_v<_Ty>;
Expand Down Expand Up @@ -5360,7 +5384,7 @@ constexpr bool _Equal_memcmp_is_safe_helper =

template <class _Iter1, class _Iter2, class _Pr>
constexpr bool _Equal_memcmp_is_safe =
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, _Pr>;
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, remove_const_t<_Pr>>;

template <class _CtgIt1, class _CtgIt2>
_NODISCARD int _Memcmp_ranges(_CtgIt1 _First1, _CtgIt1 _Last1, _CtgIt2 _First2) {
Expand Down Expand Up @@ -6749,6 +6773,46 @@ namespace ranges {
_STL_INTERNAL_CHECK(_RANGES distance(_First1, _Last1) == _Count1);
_STL_INTERNAL_CHECK(_RANGES distance(_First2, _Last2) == _Count2);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity>
&& is_same_v<_Pj2, identity>) {
if (!_STD is_constant_evaluated()) {
const auto _Ptr1 = _STD to_address(_First1);
const auto _Ptr2 = _STD to_address(_First2);
remove_const_t<decltype(_Ptr1)> _Ptr_last1;
remove_const_t<decltype(_Ptr2)> _Ptr_last2;

if constexpr (is_same_v<_It1, _Se1>) {
_Ptr_last1 = _STD to_address(_Last1);
} else {
_Ptr_last1 = _Ptr1 + _Count1;
}

if constexpr (is_same_v<_It2, _Se2>) {
_Ptr_last2 = _STD to_address(_Last2);
} else {
_Ptr_last2 = _Ptr2 + _Count2;
}

const auto _Ptr_res1 = _STD _Search_vectorized(_Ptr1, _Ptr_last1, _Ptr2, _Ptr_last2);

if constexpr (is_pointer_v<_It1>) {
if (_Ptr_res1 != _Ptr_last1) {
return {_Ptr_res1, _Ptr_res1 + _Count2};
} else {
return {_Ptr_last1, _Ptr_last1};
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
}
} else {
_First1 = _RANGES next(_STD move(_First1), _Ptr_res1 - _Ptr1);
if (_First1 != _Last1) {
return {_First1, _RANGES next(_First1, _Count2)};
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
} else {
return {_First1, _First1};
}
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
for (; _Count1 >= _Count2; ++_First1, (void) --_Count1) {
auto _Match_and_mid1 = _RANGES _Equal_rev_pred(_First1, _First2, _Last2, _Pred, _Proj1, _Proj2);
if (_Match_and_mid1.first) {
Expand Down
Loading