Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
cffb1e7
unique vectorization
AlexGuteniev Nov 16, 2024
a0b714d
no point
AlexGuteniev Nov 16, 2024
cccf693
Not unique problem
AlexGuteniev Nov 16, 2024
54781db
Pointed out coverage
AlexGuteniev Nov 16, 2024
fa4ff20
Deduplicate
AlexGuteniev Nov 16, 2024
407897e
Mention unique shuffling requirement
AlexGuteniev Feb 26, 2025
4ca596b
whitespace
AlexGuteniev Feb 26, 2025
54b2938
Merge branch 'main' into unique
StephanTLavavej Mar 3, 2025
a8b1f3b
Include `<type_traits>` for `conditional_t`.
StephanTLavavej Mar 3, 2025
144163b
Direct-init `vector` instead of calling `resize()`.
StephanTLavavej Mar 3, 2025
a2bb838
Drop `std::`.
StephanTLavavej Mar 3, 2025
5ea3d5a
fix types in pointer test
AlexGuteniev Mar 4, 2025
c53a430
simplify unique pointer test
AlexGuteniev Mar 4, 2025
b789701
`<memory>` is no longer used.
StephanTLavavej Mar 4, 2025
2f51ed2
Drop repeated `TD` alias.
StephanTLavavej Mar 4, 2025
8eb10d7
Value-init `ptr_val_array`.
StephanTLavavej Mar 4, 2025
2e9f007
When `is_pointer_v<T>`, `dis(gen)` returns `int`.
StephanTLavavej Mar 4, 2025
e35ed5f
Mark `_Unique_fallback` as `noexcept`.
StephanTLavavej Mar 4, 2025
8928190
Avoid abbreviated function templates.
StephanTLavavej Mar 4, 2025
7a1de3d
Vectorize `remove_copy` and `unique_copy`
AlexGuteniev Mar 22, 2025
d37fde4
Merge remote-tracking branch 'upstream/main' into copycats
AlexGuteniev Mar 25, 2025
4fd4a4c
simplify
AlexGuteniev Mar 25, 2025
c1f5899
Fix error
AlexGuteniev Mar 25, 2025
b9fd6a7
short circuit
AlexGuteniev Mar 26, 2025
ba3403b
benchmark
AlexGuteniev Mar 26, 2025
5268f7e
merge error fix
AlexGuteniev Mar 26, 2025
182a32c
Fix typo, consistently not using quotes
AlexGuteniev Mar 30, 2025
780fd1a
Merge remote-tracking branch 'upstream/main' into copycats
AlexGuteniev Apr 11, 2025
40c5371
Drop const in declarations.
StephanTLavavej Apr 21, 2025
1d9e0bc
Comment grammar.
StephanTLavavej Apr 21, 2025
66a7511
Move iterators when returning from `_Remove_copy_unchecked`.
StephanTLavavej Apr 21, 2025
43006ad
Add const.
StephanTLavavej Apr 22, 2025
d017014
Add newline.
StephanTLavavej Apr 22, 2025
e33242e
Directly construct vectors from size.
StephanTLavavej Apr 22, 2025
443e7d4
LKG_remove_copy should take InIt, OutIt.
StephanTLavavej Apr 22, 2025
98868f5
`test_case_unique_copy` should be called with `out_actual`.
StephanTLavavej Apr 22, 2025
fff4056
Update `reserve` calls.
StephanTLavavej Apr 22, 2025
51d9388
Test the rest of the return values of ranges::remove/remove_copy/uniq…
StephanTLavavej Apr 22, 2025
640bb13
Add test coverage to find the bugs.
StephanTLavavej Apr 22, 2025
62401d4
Fix bugs.
StephanTLavavej Apr 22, 2025
fbf0e0c
Merge branch 'main' into copycats
StephanTLavavej Apr 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions benchmarks/src/remove.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ void r(benchmark::State& state) {
}
}

template <alg_type Type, class T>
void rc(benchmark::State& state) {
std::vector<T> src(lorem_ipsum.begin(), lorem_ipsum.end());
std::vector<T> v(lorem_ipsum.size());
for (auto _ : state) {
benchmark::DoNotOptimize(src);
benchmark::DoNotOptimize(v);
if constexpr (Type == alg_type::std_fn) {
benchmark::DoNotOptimize(std::remove_copy(src.begin(), src.end(), v.begin(), T{'l'}));
} else {
benchmark::DoNotOptimize(std::ranges::remove_copy(src, v.begin(), T{'l'}));
}
}
}

BENCHMARK(r<alg_type::std_fn, std::uint8_t>);
BENCHMARK(r<alg_type::std_fn, std::uint16_t>);
BENCHMARK(r<alg_type::std_fn, std::uint32_t>);
Expand All @@ -36,4 +51,14 @@ BENCHMARK(r<alg_type::rng, std::uint16_t>);
BENCHMARK(r<alg_type::rng, std::uint32_t>);
BENCHMARK(r<alg_type::rng, std::uint64_t>);

BENCHMARK(rc<alg_type::std_fn, std::uint8_t>);
BENCHMARK(rc<alg_type::std_fn, std::uint16_t>);
BENCHMARK(rc<alg_type::std_fn, std::uint32_t>);
BENCHMARK(rc<alg_type::std_fn, std::uint64_t>);

BENCHMARK(rc<alg_type::rng, std::uint8_t>);
BENCHMARK(rc<alg_type::rng, std::uint16_t>);
BENCHMARK(rc<alg_type::rng, std::uint32_t>);
BENCHMARK(rc<alg_type::rng, std::uint64_t>);

BENCHMARK_MAIN();
31 changes: 31 additions & 0 deletions benchmarks/src/unique.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ void u(benchmark::State& state) {
}
}

template <alg_type Type, class T>
void uc(benchmark::State& state) {
std::mt19937_64 gen(22033);
using TD = std::conditional_t<sizeof(T) == 1, int, T>;
std::binomial_distribution<TD> dis(5);

std::vector<T, not_highly_aligned_allocator<T>> src(2552);
std::generate(src.begin(), src.end(), [&] { return static_cast<T>(dis(gen)); });

std::vector<T, not_highly_aligned_allocator<T>> v(src.size());
for (auto _ : state) {
benchmark::DoNotOptimize(src);
benchmark::DoNotOptimize(v);
if constexpr (Type == alg_type::std_fn) {
benchmark::DoNotOptimize(std::unique_copy(src.begin(), src.end(), v.begin()));
} else {
benchmark::DoNotOptimize(std::ranges::unique_copy(src, v.begin()));
}
}
}

BENCHMARK(u<alg_type::std_fn, std::uint8_t>);
BENCHMARK(u<alg_type::std_fn, std::uint16_t>);
BENCHMARK(u<alg_type::std_fn, std::uint32_t>);
Expand All @@ -47,4 +68,14 @@ BENCHMARK(u<alg_type::rng, std::uint16_t>);
BENCHMARK(u<alg_type::rng, std::uint32_t>);
BENCHMARK(u<alg_type::rng, std::uint64_t>);

BENCHMARK(uc<alg_type::std_fn, std::uint8_t>);
BENCHMARK(uc<alg_type::std_fn, std::uint16_t>);
BENCHMARK(uc<alg_type::std_fn, std::uint32_t>);
BENCHMARK(uc<alg_type::std_fn, std::uint64_t>);

BENCHMARK(uc<alg_type::rng, std::uint8_t>);
BENCHMARK(uc<alg_type::rng, std::uint16_t>);
BENCHMARK(uc<alg_type::rng, std::uint32_t>);
BENCHMARK(uc<alg_type::rng, std::uint64_t>);

BENCHMARK_MAIN();
150 changes: 150 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,20 @@ const void* __stdcall __std_search_n_2(const void* _First, const void* _Last, si
const void* __stdcall __std_search_n_4(const void* _First, const void* _Last, size_t _Count, uint32_t _Value) noexcept;
const void* __stdcall __std_search_n_8(const void* _First, const void* _Last, size_t _Count, uint64_t _Value) noexcept;

void* __stdcall __std_remove_copy_1(const void* _First, const void* _Last, void* _Out, uint8_t _Val) noexcept;
void* __stdcall __std_remove_copy_2(const void* _First, const void* _Last, void* _Out, uint16_t _Val) noexcept;
void* __stdcall __std_remove_copy_4(const void* _First, const void* _Last, void* _Out, uint32_t _Val) noexcept;
void* __stdcall __std_remove_copy_8(const void* _First, const void* _Last, void* _Out, uint64_t _Val) noexcept;

void* __stdcall __std_unique_1(void* _First, void* _Last) noexcept;
void* __stdcall __std_unique_2(void* _First, void* _Last) noexcept;
void* __stdcall __std_unique_4(void* _First, void* _Last) noexcept;
void* __stdcall __std_unique_8(void* _First, void* _Last) noexcept;

void* __stdcall __std_unique_copy_1(const void* _First, const void* _Last, void* _Dest) noexcept;
void* __stdcall __std_unique_copy_2(const void* _First, const void* _Last, void* _Dest) noexcept;
void* __stdcall __std_unique_copy_4(const void* _First, const void* _Last, void* _Dest) noexcept;
void* __stdcall __std_unique_copy_8(const void* _First, const void* _Last, void* _Dest) noexcept;
} // extern "C"

_STD_BEGIN
Expand Down Expand Up @@ -257,6 +267,43 @@ _Ty* _Unique_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
}
}

template <class _Ty, class _TVal>
_Ty* _Remove_copy_vectorized(
const _Ty* const _First, const _Ty* const _Last, _Ty* const _Dest, const _TVal _Val) noexcept {
if constexpr (is_pointer_v<_Ty>) {
#ifdef _WIN64
return reinterpret_cast<_Ty*>(::__std_remove_copy_8(_First, _Last, _Dest, reinterpret_cast<uint64_t>(_Val)));
#else // ^^^ defined(_WIN64) / !defined(_WIN64) vvv
return reinterpret_cast<_Ty*>(::__std_remove_copy_4(_First, _Last, _Dest, reinterpret_cast<uint32_t>(_Val)));
#endif // ^^^ !defined(_WIN64) ^^^
} else if constexpr (sizeof(_Ty) == 1) {
return reinterpret_cast<_Ty*>(::__std_remove_copy_1(_First, _Last, _Dest, static_cast<uint8_t>(_Val)));
} else if constexpr (sizeof(_Ty) == 2) {
return reinterpret_cast<_Ty*>(::__std_remove_copy_2(_First, _Last, _Dest, static_cast<uint16_t>(_Val)));
} else if constexpr (sizeof(_Ty) == 4) {
return reinterpret_cast<_Ty*>(::__std_remove_copy_4(_First, _Last, _Dest, static_cast<uint32_t>(_Val)));
} else if constexpr (sizeof(_Ty) == 8) {
return reinterpret_cast<_Ty*>(::__std_remove_copy_8(_First, _Last, _Dest, static_cast<uint64_t>(_Val)));
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size
}
}

template <class _Ty>
_Ty* _Unique_copy_vectorized(const _Ty* const _First, const _Ty* const _Last, _Ty* const _Dest) noexcept {
if constexpr (sizeof(_Ty) == 1) {
return reinterpret_cast<_Ty*>(::__std_unique_copy_1(_First, _Last, _Dest));
} else if constexpr (sizeof(_Ty) == 2) {
return reinterpret_cast<_Ty*>(::__std_unique_copy_2(_First, _Last, _Dest));
} else if constexpr (sizeof(_Ty) == 4) {
return reinterpret_cast<_Ty*>(::__std_unique_copy_4(_First, _Last, _Dest));
} else if constexpr (sizeof(_Ty) == 8) {
return reinterpret_cast<_Ty*>(::__std_unique_copy_8(_First, _Last, _Dest));
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size
}
}

// Can we activate the vector algorithms for find_first_of?
template <class _It1, class _It2, class _Pr>
constexpr bool _Vector_alg_in_find_first_of_is_safe = _Equal_memcmp_is_safe<_It1, _It2, _Pr>;
Expand All @@ -282,6 +329,17 @@ constexpr bool _Vector_alg_in_search_n_is_safe = _Vector_alg_in_find_is_safe<_It
// Can we activate the vector algorithms for unique?
template <class _Iter, class _Pr>
constexpr bool _Vector_alg_in_unique_is_safe = _Equal_memcmp_is_safe<_Iter, _Iter, _Pr>;

// Can we use this output iterator for remove_copy or unique_copy?
template <class _Out, class _In>
constexpr bool _Output_iterator_for_vector_alg_is_safe() {
if constexpr (_Iterator_is_contiguous<_Out>) {
return is_same_v<_Iter_value_t<_Out>, remove_const_t<_Iter_value_t<_In>>>;
} else {
return false;
}
}

_STD_END
#endif // _USE_STD_VECTOR_ALGORITHMS

Expand Down Expand Up @@ -4718,6 +4776,33 @@ _CONSTEXPR20 _OutIt remove_copy(_InIt _First, _InIt _Last, _OutIt _Dest, const _
auto _UFirst = _STD _Get_unwrapped(_First);
const auto _ULast = _STD _Get_unwrapped(_Last);
auto _UDest = _STD _Get_unwrapped_unverified(_Dest);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_find_is_safe<decltype(_UFirst), _Ty>
&& _Output_iterator_for_vector_alg_is_safe<decltype(_UDest), decltype(_UFirst)>()) {
if (!_STD _Is_constant_evaluated()) {
if (!_STD _Could_compare_equal_to_value_type<decltype(_UFirst)>(_Val)) {
_UDest = _STD _Copy_unchecked(_UFirst, _ULast, _UDest);
_STD _Seek_wrapped(_Dest, _UDest);
return _Dest;
}

const auto _Dest_ptr = _STD _To_address(_UDest);
const auto _Result =
_STD _Remove_copy_vectorized(_STD _To_address(_UFirst), _STD _To_address(_ULast), _Dest_ptr, _Val);

if constexpr (is_pointer_v<decltype(_UDest)>) {
_UDest = _Result;
} else {
_UDest += _Result - _Dest_ptr;
}

_STD _Seek_wrapped(_Dest, _UDest);
return _Dest;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (; _UFirst != _ULast; ++_UFirst) {
if (!(*_UFirst == _Val)) {
*_UDest = *_UFirst;
Expand Down Expand Up @@ -4943,6 +5028,31 @@ namespace ranges {
_STL_INTERNAL_STATIC_ASSERT(indirectly_copyable<_It, _Out>);
_STL_INTERNAL_STATIC_ASSERT(indirect_binary_predicate<ranges::equal_to, projected<_It, _Pj>, const _Ty*>);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> && _Output_iterator_for_vector_alg_is_safe<_Out, _It>()
&& sized_sentinel_for<_Se, _It> && is_same_v<_Pj, identity>) {
if (!_STD is_constant_evaluated()) {
const auto _Size = _Last - _First;
auto _End = _First + _Size;

if (!_STD _Could_compare_equal_to_value_type<_It>(_Val)) {
_Output = _STD _Copy_unchecked(_First, _Last, _Output);
return {_STD move(_End), _STD move(_Output)};
}

const auto _Dest_ptr = _STD to_address(_Output);
const auto _Result =
_STD _Remove_copy_vectorized(_STD to_address(_First), _STD to_address(_End), _Dest_ptr, _Val);

if constexpr (is_pointer_v<_Out>) {
return {_STD move(_End), _Result};
} else {
return {_STD move(_End), _STD move(_Output) + (_Result - _Dest_ptr)};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (; _First != _Last; ++_First) {
if (_STD invoke(_Proj, *_First) != _Val) {
*_Output = *_First;
Expand Down Expand Up @@ -5190,6 +5300,26 @@ _CONSTEXPR20 _OutIt unique_copy(_InIt _First, _InIt _Last, _OutIt _Dest, _Pr _Pr

auto _UDest = _STD _Get_unwrapped_unverified(_Dest);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_unique_is_safe<decltype(_UFirst), _Pr>
&& _Output_iterator_for_vector_alg_is_safe<decltype(_UDest), decltype(_UFirst)>()) {
if (!_STD _Is_constant_evaluated()) {
const auto _First_ptr = _STD _To_address(_UFirst);
const auto _Dest_ptr = _STD _To_address(_UDest);
const auto _Result = _STD _Unique_copy_vectorized(_First_ptr, _STD _To_address(_ULast), _Dest_ptr);

if constexpr (is_pointer_v<decltype(_UDest)>) {
_UDest = _Result;
} else {
_UDest += _Result - _Dest_ptr;
}

_STD _Seek_wrapped(_Dest, _UDest);
return _Dest;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

if constexpr (_Is_ranges_fwd_iter_v<_InIt>) { // can reread the source for comparison
auto _Firstb = _UFirst;

Expand Down Expand Up @@ -5317,6 +5447,26 @@ namespace ranges {
return {_STD move(_First), _STD move(_Output)};
}

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && sized_sentinel_for<_Se, _It>
&& _Vector_alg_in_unique_is_safe<_It, _Pr>
&& _Output_iterator_for_vector_alg_is_safe<_Out, _It>()) {
if (!_STD is_constant_evaluated()) {
const auto _Size = _Last - _First;
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + static_cast<size_t>(_Size);
const auto _Output_ptr = _STD to_address(_Output);
const auto _Result = _STD _Unique_copy_vectorized(_First_ptr, _Last_ptr, _Output_ptr);

if constexpr (is_pointer_v<_It> && is_pointer_v<_Out>) {
return {_Last_ptr, _Result};
} else {
return {_STD move(_First) + _Size, _STD move(_Output) + (_Result - _Output_ptr)};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

if constexpr (_Is_input_with_value_type<_Out, iter_value_t<_It>>) {
// Can reread _Output
*_Output = *_First;
Expand Down
Loading