Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement vectorized min_ / max_element for ints #2447

Merged
merged 66 commits into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from 60 commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
2951f9e
Implement vectorized min_ / max_element for ints
AlexGuteniev Dec 26, 2021
19ba1d0
fix copypasta
AlexGuteniev Dec 26, 2021
44b19c9
Guard C++ features
AlexGuteniev Dec 26, 2021
7c233f4
typo
AlexGuteniev Dec 26, 2021
e245018
@miscco review
AlexGuteniev Dec 26, 2021
3bf81a7
contexpr in C++14 / C++17 :-(
AlexGuteniev Dec 26, 2021
f781a70
Fix algorithm
AlexGuteniev Dec 26, 2021
34b935d
x86 build
AlexGuteniev Dec 26, 2021
de0397a
minmax_element
AlexGuteniev Dec 26, 2021
f118d4c
fix corner case; +comments
AlexGuteniev Dec 26, 2021
bc6a055
misleading comments
AlexGuteniev Dec 26, 2021
6a7708e
enable x86 function fusion optimziation
AlexGuteniev Dec 26, 2021
64b9e80
Comment on __stdcall
AlexGuteniev Dec 26, 2021
8e2bf94
-stray def param
AlexGuteniev Dec 26, 2021
300b3b4
literate
AlexGuteniev Dec 26, 2021
7597d6f
Simplify _Base
AlexGuteniev Dec 26, 2021
0519a3e
strange, tests locally pass for me
AlexGuteniev Dec 26, 2021
4246874
try to undo __std_minmax_element
AlexGuteniev Dec 26, 2021
121ea2a
ptrptr
AlexGuteniev Dec 26, 2021
2a7b50b
minmax element wrapper
AlexGuteniev Dec 27, 2021
6664519
minmax
AlexGuteniev Dec 27, 2021
d0ccf20
fix minmax_element for larger value type
AlexGuteniev Dec 27, 2021
24dbd57
some test coverage
AlexGuteniev Dec 27, 2021
3ed6d99
Fix x86 64-bit handling
AlexGuteniev Dec 27, 2021
2b43c89
minor cleanup
AlexGuteniev Dec 27, 2021
137f413
more text cleanup
AlexGuteniev Dec 27, 2021
f01c3c6
-stdlib dependency
AlexGuteniev Dec 27, 2021
d5ab6ac
superfluous
AlexGuteniev Dec 28, 2021
7564248
LT is superfluous
AlexGuteniev Dec 28, 2021
04b72fa
optimize 16-bit case
AlexGuteniev Dec 29, 2021
f4220f3
Fix for optimization of 16-bit
AlexGuteniev Dec 29, 2021
28389bb
enable for any standard mode
AlexGuteniev Jan 2, 2022
92fbbda
guard __CUDACC__
AlexGuteniev Jan 2, 2022
53c6796
reduce repeated code
AlexGuteniev Jan 2, 2022
fb7679d
Element in name
AlexGuteniev Jan 3, 2022
8f91c0d
optimization tuning for release only
AlexGuteniev Jan 3, 2022
4f289c0
Merge remote-tracking branch 'upstream/main' into vector_max
AlexGuteniev Jan 20, 2022
fe29701
Merge remote-tracking branch 'upstream/main' into vector_max
AlexGuteniev Apr 2, 2022
661fd86
Missing typename
AlexGuteniev Apr 2, 2022
c451f69
pointer cast properly
AlexGuteniev Apr 2, 2022
87992c1
narrative
AlexGuteniev Apr 2, 2022
66943a8
unnamed namespace
AlexGuteniev Apr 5, 2022
962b91d
Merge remote-tracking branch 'upstream/main' into vector_max
AlexGuteniev Apr 5, 2022
d077514
extern C
AlexGuteniev Apr 5, 2022
5913478
Merge remote-tracking branch 'upstream/main' into vector_max
AlexGuteniev Apr 23, 2022
d5953f5
more obvious all FFs
AlexGuteniev May 5, 2022
881f1e2
bitwise flags, aligned loads
AlexGuteniev May 10, 2022
8912c70
format
AlexGuteniev May 10, 2022
c02cb63
brace
AlexGuteniev May 10, 2022
ef213be
unternary
AlexGuteniev May 12, 2022
40c323f
Update vector_algorithms.cpp
AlexGuteniev May 12, 2022
856da66
_Ugly
AlexGuteniev May 12, 2022
7e7683f
review comments
AlexGuteniev May 13, 2022
4f332e3
+coverage
AlexGuteniev May 14, 2022
bdf7d82
+coverage
AlexGuteniev May 14, 2022
b79b194
format
AlexGuteniev May 14, 2022
a6ffe63
enhance special cases
AlexGuteniev May 28, 2022
9c6ba04
format
AlexGuteniev May 28, 2022
17e3b25
bring back a needed include
AlexGuteniev May 28, 2022
679e65d
Merge remote-tracking branch 'upstream/main' into vector_max
AlexGuteniev Jun 11, 2022
5c42c76
Merge branch 'main' into vector_max
StephanTLavavej Jun 18, 2022
56799e8
Code review feedback.
StephanTLavavej Jun 18, 2022
f538bbc
Remove workarounds now that we have 17.3 Preview 2.
StephanTLavavej Jun 18, 2022
1172483
Rename "cor" to "correction".
StephanTLavavej Jun 18, 2022
f566426
Remove CUDA guards.
StephanTLavavej Jun 18, 2022
d178303
Restore perma-workarounds.
StephanTLavavej Jun 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ _STL_DISABLE_CLANG_WARNINGS
#undef new

#if _USE_STD_VECTOR_ALGORITHMS

_EXTERN_C
struct _Min_max_element_t {
const void* _Min;
const void* _Max;
};

// The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms
// won't be stored beyond the lifetime of the function, and that the function will only reference arrays denoted by
// those pointers. The optimizer also assumes in that case that a pointer parameter is not returned to the caller via
Expand All @@ -34,7 +40,92 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
const void* _First, const void* _Last, void* _Dest) noexcept;
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
const void* _First, const void* _Last, void* _Dest) noexcept;

const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;

const void* __stdcall __std_max_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_max_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_max_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;

_Min_max_element_t __stdcall __std_minmax_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
_Min_max_element_t __stdcall __std_minmax_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
_Min_max_element_t __stdcall __std_minmax_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
_Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
_END_EXTERN_C

template <class _Ty>
_Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept {
constexpr bool _Signed = _STD is_signed_v<_Ty>;

if constexpr (_STD is_pointer_v<_Ty> || _STD is_null_pointer_v<_Ty>) {
return const_cast<_Ty*>(reinterpret_cast<const _Ty*>(
__std_min_element(reinterpret_cast<const uintptr_t*>(_First), reinterpret_cast<const uintptr_t*>(_Last))));
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
} else if constexpr (sizeof(_Ty) == 1) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_min_element_1(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 2) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_min_element_2(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 4) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_min_element_4(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 8) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_min_element_8(_First, _Last, _Signed)));
} else {
static_assert(_STD _Always_false<_Ty>, "Unexpected size");
}
}

template <class _Ty>
_Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept {
constexpr bool _Signed = _STD is_signed_v<_Ty>;

if constexpr (_STD is_pointer_v<_Ty> || _STD is_null_pointer_v<_Ty>) {
return const_cast<_Ty*>(reinterpret_cast<const _Ty*>(
__std_max_element(reinterpret_cast<const uintptr_t*>(_First), reinterpret_cast<const uintptr_t*>(_Last))));
} else if constexpr (sizeof(_Ty) == 1) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_max_element_1(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 2) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_max_element_2(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 4) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_max_element_4(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 8) {
return const_cast<_Ty*>(static_cast<const _Ty*>(__std_max_element_8(_First, _Last, _Signed)));
} else {
static_assert(_STD _Always_false<_Ty>, "Unexpected size");
}
}

template <class _Ty>
_STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept {
constexpr bool _Signed = _STD is_signed_v<_Ty>;

if constexpr (_STD is_pointer_v<_Ty> || _STD is_null_pointer_v<_Ty>) {
auto _Res =
__std_minmax_element(reinterpret_cast<const uintptr_t*>(_First), reinterpret_cast<const uintptr_t*>(_Last));
return {const_cast<_Ty*>(reinterpret_cast<const _Ty*>(_Res.first)),
const_cast<_Ty*>(reinterpret_cast<const _Ty*>(_Res.second))};
} else if constexpr (sizeof(_Ty) == 1) {
auto _Res = __std_minmax_element_1(_First, _Last, _Signed);
return {
const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Min)), const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Max))};
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
} else if constexpr (sizeof(_Ty) == 2) {
auto _Res = __std_minmax_element_2(_First, _Last, _Signed);
return {
const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Min)), const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Max))};
} else if constexpr (sizeof(_Ty) == 4) {
auto _Res = __std_minmax_element_4(_First, _Last, _Signed);
return {
const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Min)), const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Max))};
} else if constexpr (sizeof(_Ty) == 8) {
auto _Res = __std_minmax_element_8(_First, _Last, _Signed);
return {
const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Min)), const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Max))};
} else {
static_assert(_STD _Always_false<_Ty>, "Unexpected size");
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

_STD_BEGIN
Expand Down Expand Up @@ -9057,8 +9148,33 @@ namespace ranges {
#endif // __cpp_lib_concepts
#endif // _HAS_CXX17

template <class _Iter, class _Pr, class _Elem = _Iter_value_t<_Iter>>
_INLINE_VAR constexpr bool _Is_min_max_optimization_safe = // Activate the vector algorithms for min_/max_element?
_Iterator_is_contiguous<_Iter> // The iterator must be contiguous so we can get raw pointers.
&& !_Iterator_is_volatile<_Iter> // The iterator must not be volatile.
&& conjunction_v<disjunction<is_integral<_Elem>, is_pointer<_Elem>>, // Element is of integral or pointer type.
disjunction< // And either of the following:
#ifdef __cpp_lib_concepts
is_same<_Pr, _RANGES less>, // predicate is ranges::less
#endif // __cpp_lib_concepts
is_same<_Pr, less<>>, is_same<_Pr, less<_Elem>>>>; // predicate is less
barcharcraz marked this conversation as resolved.
Show resolved Hide resolved

template <class _FwdIt, class _Pr>
constexpr _FwdIt _Max_element_unchecked(_FwdIt _First, _FwdIt _Last, _Pr _Pred) { // find largest element
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
#if _USE_STD_VECTOR_ALGORITHMS && !defined(__CUDACC__)
if constexpr (_Is_min_max_optimization_safe<_FwdIt, _Pr>) {
if (!_Is_constant_evaluated()) {
const auto _First_ptr = _To_address(_First);
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
const auto _Result = __std_max_element(_First_ptr, _To_address(_Last));
if constexpr (is_pointer_v<_FwdIt>) {
return _Result;
} else {
return _First + (_Result - _First_ptr);
}
AlexGuteniev marked this conversation as resolved.
Show resolved Hide resolved
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS && !defined(__CUDACC__)

_FwdIt _Found = _First;
if (_First != _Last) {
while (++_First != _Last) {
Expand Down Expand Up @@ -9150,6 +9266,20 @@ namespace ranges {

template <class _FwdIt, class _Pr>
constexpr _FwdIt _Min_element_unchecked(_FwdIt _First, _FwdIt _Last, _Pr _Pred) { // find smallest element
#if _USE_STD_VECTOR_ALGORITHMS && !defined(__CUDACC__)
if constexpr (_Is_min_max_optimization_safe<_FwdIt, _Pr>) {
if (!_Is_constant_evaluated()) {
const auto _First_ptr = _To_address(_First);
const auto _Result = __std_min_element(_First_ptr, _To_address(_Last));
if constexpr (is_pointer_v<_FwdIt>) {
return _Result;
} else {
return _First + (_Result - _First_ptr);
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS && !defined(__CUDACC__)

_FwdIt _Found = _First;
if (_First != _Last) {
while (++_First != _Last) {
Expand Down Expand Up @@ -9241,6 +9371,20 @@ namespace ranges {

template <class _FwdIt, class _Pr>
constexpr pair<_FwdIt, _FwdIt> _Minmax_element_unchecked(_FwdIt _First, _FwdIt _Last, _Pr _Pred) {
#if _USE_STD_VECTOR_ALGORITHMS && !defined(__CUDACC__)
if constexpr (_Is_min_max_optimization_safe<_FwdIt, _Pr>) {
if (!_Is_constant_evaluated()) {
const auto _First_ptr = _To_address(_First);
const auto _Result = __std_minmax_element(_First_ptr, _To_address(_Last));
if constexpr (is_pointer_v<_FwdIt>) {
return {_Result.first, _Result.second};
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
} else {
return {_First + (_Result.first - _First_ptr), _First + (_Result.second - _First_ptr)};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS && !defined(__CUDACC__)

// find smallest and largest elements
pair<_FwdIt, _FwdIt> _Found(_First, _First);

Expand Down
6 changes: 6 additions & 0 deletions stl/inc/xtr1common
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ template <class _Ty, class... _Types>
_INLINE_VAR constexpr bool _Is_any_of_v = // true if and only if _Ty is in _Types
disjunction_v<is_same<_Ty, _Types>...>;

#ifndef __CUDACC__
_NODISCARD constexpr bool _Is_constant_evaluated() noexcept { // Internal function for any standard mode
return __builtin_is_constant_evaluated();
}
#endif // __CUDACC__
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

#if _HAS_CXX20
_NODISCARD constexpr bool is_constant_evaluated() noexcept {
return __builtin_is_constant_evaluated();
Expand Down
Loading