Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
e9ad403
Enable vectorized minmax_element using Neon on ARM64
hazzlim Dec 5, 2025
2329cc4
Roll into _Is_min_max_optimization_safe
hazzlim Dec 9, 2025
58f953b
Don't const-qualify unnamed parameter
hazzlim Dec 9, 2025
9b849a3
Don't define minmax,is_sorted_until on ARM64 for now
hazzlim Dec 9, 2025
b3cd60c
Remove _Traits_8_neon and don't define *_8 functions
hazzlim Dec 9, 2025
05f1ee9
Use non-floating point types where necessary in _Traits_d_neon
hazzlim Dec 9, 2025
1f1ee32
Unify _Get_v_pos interface
hazzlim Dec 9, 2025
3a0a371
Use _tzcnt_u32/_lzcnt_u32 on avx
hazzlim Dec 9, 2025
0b93055
Don't declare _8 functions
hazzlim Dec 9, 2025
017caa1
Add missing const qualifiers
hazzlim Dec 10, 2025
f995cbc
Don't add dispatch for minmax,is_sorted_until for now
hazzlim Dec 10, 2025
d139232
Merge branch 'main' into minmax-element-pr
StephanTLavavej Jan 6, 2026
cce3375
Cleanup handling of 64-bit integers on ARM64.
StephanTLavavej Jan 6, 2026
61327c0
`_Get_first_h_pos()`, `_Get_last_h_pos()`: const params, noexcept.
StephanTLavavej Jan 6, 2026
460655f
Drop `const` for template param `bool _Sign`.
StephanTLavavej Jan 6, 2026
a889815
Uglify: `m` => `_Mx`, `M` => `_Mx`, `r` => `_Rx`
StephanTLavavej Jan 6, 2026
bd21ec9
Fix preprocessor comments.
StephanTLavavej Jan 6, 2026
981bb91
Chain together ARM64 and ARM64EC preprocessor conditionals.
StephanTLavavej Jan 6, 2026
8051595
(Pre-existing) Chain together ARM64 and ARM64EC preprocessor conditio…
StephanTLavavej Jan 6, 2026
3893d1b
Don't modify `_Minmax_impl` to inspect `_Traits::_Has_unsigned_cmp` yet.
StephanTLavavej Jan 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
_Min_max_element_t __stdcall __std_minmax_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
_Min_max_element_t __stdcall __std_minmax_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
_Min_max_element_t __stdcall __std_minmax_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
#ifndef _M_ARM64
_Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
#endif // ^^^ !defined(_M_ARM64) ^^^
_Min_max_element_t __stdcall __std_minmax_element_f(const void* _First, const void* _Last, bool _Unused) noexcept;
_Min_max_element_t __stdcall __std_minmax_element_d(const void* _First, const void* _Last, bool _Unused) noexcept;
#endif // ^^^ _VECTORIZED_MINMAX_ELEMENT ^^^
Expand Down Expand Up @@ -212,7 +214,11 @@ pair<_Ty*, _Ty*> _Minmax_element_vectorized(_Ty* const _First, _Ty* const _Last)
} else if constexpr (sizeof(_Ty) == 4) {
_Res = ::__std_minmax_element_4(_First, _Last, _Signed);
} else if constexpr (sizeof(_Ty) == 8) {
#ifdef _M_ARM64
static_assert(false, "unexpected size; 64-bit integers on ARM64 should not take this codepath");
#else // ^^^ defined(_M_ARM64) / !defined(_M_ARM64) vvv
_Res = ::__std_minmax_element_8(_First, _Last, _Signed);
#endif // ^^^ !defined(_M_ARM64) ^^^
} else {
static_assert(false, "unexpected size");
}
Expand Down
27 changes: 25 additions & 2 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ _STL_DISABLE_CLANG_WARNINGS
#define _VECTORIZED_INCLUDES _VECTORIZED_FOR_X64_X86
#define _VECTORIZED_IS_SORTED_UNTIL _VECTORIZED_FOR_X64_X86
#define _VECTORIZED_MINMAX _VECTORIZED_FOR_X64_X86
#define _VECTORIZED_MINMAX_ELEMENT _VECTORIZED_FOR_X64_X86
#define _VECTORIZED_MINMAX_ELEMENT _VECTORIZED_FOR_X64_X86_ARM64
#define _VECTORIZED_MISMATCH _VECTORIZED_FOR_X64_X86
#define _VECTORIZED_REMOVE _VECTORIZED_FOR_X64_X86
#define _VECTORIZED_REMOVE_COPY _VECTORIZED_FOR_X64_X86
Expand Down Expand Up @@ -207,14 +207,18 @@ const void* __stdcall __std_find_end_8(
const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
#ifndef _M_ARM64
const void* __stdcall __std_min_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
#endif // ^^^ !defined(_M_ARM64) ^^^
const void* __stdcall __std_min_element_f(const void* _First, const void* _Last, bool _Unused) noexcept;
const void* __stdcall __std_min_element_d(const void* _First, const void* _Last, bool _Unused) noexcept;

const void* __stdcall __std_max_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_max_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_max_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
#ifndef _M_ARM64
const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
#endif // ^^^ !defined(_M_ARM64) ^^^
const void* __stdcall __std_max_element_f(const void* _First, const void* _Last, bool _Unused) noexcept;
const void* __stdcall __std_max_element_d(const void* _First, const void* _Last, bool _Unused) noexcept;
#endif // ^^^ _VECTORIZED_MINMAX_ELEMENT ^^^
Expand Down Expand Up @@ -416,7 +420,11 @@ _Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
} else if constexpr (sizeof(_Ty) == 4) {
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_min_element_4(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 8) {
#ifdef _M_ARM64
static_assert(false, "unexpected size; 64-bit integers on ARM64 should not take this codepath");
#else // ^^^ defined(_M_ARM64) / !defined(_M_ARM64) vvv
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_min_element_8(_First, _Last, _Signed)));
#endif // ^^^ !defined(_M_ARM64) ^^^
} else {
static_assert(false, "unexpected size");
}
Expand All @@ -437,7 +445,11 @@ _Ty* _Max_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
} else if constexpr (sizeof(_Ty) == 4) {
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_max_element_4(_First, _Last, _Signed)));
} else if constexpr (sizeof(_Ty) == 8) {
#ifdef _M_ARM64
static_assert(false, "unexpected size; 64-bit integers on ARM64 should not take this codepath");
#else // ^^^ defined(_M_ARM64) / !defined(_M_ARM64) vvv
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_max_element_8(_First, _Last, _Signed)));
#endif // ^^^ !defined(_M_ARM64) ^^^
} else {
static_assert(false, "unexpected size");
}
Expand Down Expand Up @@ -7281,9 +7293,20 @@ constexpr bool _Is_predicate_greater = _Is_any_of_v<_Pr,
#endif // _HAS_CXX20
greater<>, greater<_Iter_value_t<_Iter>>>;

#ifdef _M_ARM64
// We choose not to vectorize minmax_element for 64-bit integers on ARM64
// as this does not improve performance over the scalar code.
template <class _Ty>
_INLINE_VAR constexpr bool _Is_64bit_int_on_arm64_v = sizeof(_Ty) == 8 && !is_floating_point_v<_Ty>;
#else // ^^^ defined(_M_ARM64) / !defined(_M_ARM64) vvv
template <class _Ty>
_INLINE_VAR constexpr bool _Is_64bit_int_on_arm64_v = false;
#endif // ^^^ !defined(_M_ARM64) ^^^

template <class _Iter, class _Pr>
constexpr bool _Is_min_max_optimization_safe = // Activate the vector algorithms for min_/max_element?
_Is_min_max_iterators_safe<_Iter> && _Is_predicate_less<_Iter, _Pr>;
_Is_min_max_iterators_safe<_Iter> && _Is_predicate_less<_Iter, _Pr>
&& !_Is_64bit_int_on_arm64_v<_Iter_value_t<_Iter>>;

// Unlike the position-based vectorized implementation, the value-based vectorized implementation
// does not always produce the expected results for floating-point types.
Expand Down
Loading