Skip to content

Commit 32e360c

Browse files
committed
Reapply "[libc] Improve qsort" (llvm#121303)
This reverts commit 0b96f1c.
1 parent 4a890c2 commit 32e360c

File tree

14 files changed

+539
-291
lines changed

14 files changed

+539
-291
lines changed

Diff for: libc/src/stdlib/heap_sort.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ namespace internal {
1818
// A simple in-place heapsort implementation.
1919
// Follow the implementation in https://en.wikipedia.org/wiki/Heapsort.
2020

21-
LIBC_INLINE void heap_sort(const Array &array) {
22-
size_t end = array.size();
21+
template <typename A, typename F>
22+
LIBC_INLINE void heap_sort(const A &array, const F &is_less) {
23+
size_t end = array.len();
2324
size_t start = end / 2;
2425

25-
auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
26+
const auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
2627

2728
while (end > 1) {
2829
if (start > 0) {
@@ -40,12 +41,11 @@ LIBC_INLINE void heap_sort(const Array &array) {
4041
while (left_child(root) < end) {
4142
size_t child = left_child(root);
4243
// If there are two children, set child to the greater.
43-
if (child + 1 < end &&
44-
array.elem_compare(child, array.get(child + 1)) < 0)
44+
if ((child + 1 < end) && is_less(array.get(child), array.get(child + 1)))
4545
++child;
4646

4747
// If the root is less than the greater child
48-
if (array.elem_compare(root, array.get(child)) >= 0)
48+
if (!is_less(array.get(root), array.get(child)))
4949
break;
5050

5151
// Swap the root with the greater child and continue sifting down.

Diff for: libc/src/stdlib/qsort.cpp

+4-6
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL {
1818
LLVM_LIBC_FUNCTION(void, qsort,
1919
(void *array, size_t array_size, size_t elem_size,
2020
int (*compare)(const void *, const void *))) {
21-
if (array == nullptr || array_size == 0 || elem_size == 0)
22-
return;
23-
internal::Comparator c(compare);
2421

25-
auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
26-
elem_size, c);
22+
const auto is_less = [compare](const void *a, const void *b) -> bool {
23+
return compare(a, b) < 0;
24+
};
2725

28-
internal::sort(arr);
26+
internal::unstable_sort(array, array_size, elem_size, is_less);
2927
}
3028

3129
} // namespace LIBC_NAMESPACE_DECL

Diff for: libc/src/stdlib/qsort_data.h

+101-70
Original file line numberDiff line numberDiff line change
@@ -17,91 +17,122 @@
1717
namespace LIBC_NAMESPACE_DECL {
1818
namespace internal {
1919

20-
using Compare = int(const void *, const void *);
21-
using CompareWithState = int(const void *, const void *, void *);
22-
23-
enum class CompType { COMPARE, COMPARE_WITH_STATE };
24-
25-
struct Comparator {
26-
union {
27-
Compare *comp_func;
28-
CompareWithState *comp_func_r;
29-
};
30-
const CompType comp_type;
31-
32-
void *arg;
33-
34-
Comparator(Compare *func)
35-
: comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {}
36-
37-
Comparator(CompareWithState *func, void *arg_val)
38-
: comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE),
39-
arg(arg_val) {}
40-
41-
#if defined(__clang__)
42-
// Recent upstream changes to -fsanitize=function find more instances of
43-
// function type mismatches. One case is with the comparator passed to this
44-
// class. Libraries will tend to pass comparators that take pointers to
45-
// varying types while this comparator expects to accept const void pointers.
46-
// Ideally those tools would pass a function that strictly accepts const
47-
// void*s to avoid UB, or would use qsort_r to pass their own comparator.
48-
[[clang::no_sanitize("function")]]
49-
#endif
50-
int comp_vals(const void *a, const void *b) const {
51-
if (comp_type == CompType::COMPARE) {
52-
return comp_func(a, b);
53-
} else {
54-
return comp_func_r(a, b, arg);
20+
class ArrayGenericSize {
21+
cpp::byte *array_base;
22+
size_t array_len;
23+
size_t elem_size;
24+
25+
LIBC_INLINE cpp::byte *get_internal(size_t i) const {
26+
return array_base + (i * elem_size);
27+
}
28+
29+
public:
30+
LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
31+
: array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
32+
elem_size(e) {}
33+
34+
static constexpr bool has_fixed_size() { return false; }
35+
36+
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
37+
38+
LIBC_INLINE void swap(size_t i, size_t j) const {
39+
// It's possible to use 8 byte blocks with `uint64_t`, but that
40+
// generates more machine code as the remainder loop gets
41+
// unrolled, plus 4 byte operations are more likely to be
42+
// efficient on a wider variety of hardware. On x86 LLVM tends
43+
// to unroll the block loop again into 2 16 byte swaps per
44+
// iteration which is another reason that 4 byte blocks yields
45+
// good performance even for big types.
46+
using block_t = uint32_t;
47+
constexpr size_t BLOCK_SIZE = sizeof(block_t);
48+
49+
alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
50+
51+
cpp::byte *elem_i = get_internal(i);
52+
cpp::byte *elem_j = get_internal(j);
53+
54+
const size_t elem_size_rem = elem_size % BLOCK_SIZE;
55+
const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
56+
57+
while (elem_i != elem_i_block_end) {
58+
__builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
59+
__builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
60+
__builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
61+
62+
elem_i += BLOCK_SIZE;
63+
elem_j += BLOCK_SIZE;
64+
}
65+
66+
for (size_t n = 0; n < elem_size_rem; ++n) {
67+
cpp::byte tmp = elem_i[n];
68+
elem_i[n] = elem_j[n];
69+
elem_j[n] = tmp;
5570
}
5671
}
72+
73+
LIBC_INLINE size_t len() const { return array_len; }
74+
75+
// Make an Array starting at index |i| and length |s|.
76+
LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
77+
return ArrayGenericSize(get_internal(i), s, elem_size);
78+
}
79+
80+
// Reset this Array to point at a different interval of the same
81+
// items starting at index |i|.
82+
LIBC_INLINE void reset_bounds(size_t i, size_t s) {
83+
array_base = get_internal(i);
84+
array_len = s;
85+
}
5786
};
5887

59-
class Array {
60-
uint8_t *array;
61-
size_t array_size;
62-
size_t elem_size;
63-
Comparator compare;
88+
// Having a specialized Array type for sorting that knows at
89+
// compile-time what the size of the element is, allows for much more
90+
// efficient swapping and for cheaper offset calculations.
91+
template <size_t ELEM_SIZE> class ArrayFixedSize {
92+
cpp::byte *array_base;
93+
size_t array_len;
6494

65-
public:
66-
Array(uint8_t *a, size_t s, size_t e, Comparator c)
67-
: array(a), array_size(s), elem_size(e), compare(c) {}
68-
69-
uint8_t *get(size_t i) const { return array + i * elem_size; }
70-
71-
void swap(size_t i, size_t j) const {
72-
uint8_t *elem_i = get(i);
73-
uint8_t *elem_j = get(j);
74-
for (size_t b = 0; b < elem_size; ++b) {
75-
uint8_t temp = elem_i[b];
76-
elem_i[b] = elem_j[b];
77-
elem_j[b] = temp;
78-
}
95+
LIBC_INLINE cpp::byte *get_internal(size_t i) const {
96+
return array_base + (i * ELEM_SIZE);
7997
}
8098

81-
int elem_compare(size_t i, const uint8_t *other) const {
82-
// An element must compare equal to itself so we don't need to consult the
83-
// user provided comparator.
84-
if (get(i) == other)
85-
return 0;
86-
return compare.comp_vals(get(i), other);
99+
public:
100+
LIBC_INLINE ArrayFixedSize(void *a, size_t s)
101+
: array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
102+
103+
// Beware this function is used a heuristic for cheap to swap types, so
104+
// instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
105+
// idea perf wise.
106+
static constexpr bool has_fixed_size() { return true; }
107+
108+
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
109+
110+
LIBC_INLINE void swap(size_t i, size_t j) const {
111+
alignas(32) cpp::byte tmp[ELEM_SIZE];
112+
113+
cpp::byte *elem_i = get_internal(i);
114+
cpp::byte *elem_j = get_internal(j);
115+
116+
__builtin_memcpy(tmp, elem_i, ELEM_SIZE);
117+
__builtin_memmove(elem_i, elem_j, ELEM_SIZE);
118+
__builtin_memcpy(elem_j, tmp, ELEM_SIZE);
87119
}
88120

89-
size_t size() const { return array_size; }
121+
LIBC_INLINE size_t len() const { return array_len; }
90122

91-
// Make an Array starting at index |i| and size |s|.
92-
LIBC_INLINE Array make_array(size_t i, size_t s) const {
93-
return Array(get(i), s, elem_size, compare);
123+
// Make an Array starting at index |i| and length |s|.
124+
LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
125+
return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
94126
}
95127

96-
// Reset this Array to point at a different interval of the same items.
97-
LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) {
98-
array = a;
99-
array_size = s;
128+
// Reset this Array to point at a different interval of the same
129+
// items starting at index |i|.
130+
LIBC_INLINE void reset_bounds(size_t i, size_t s) {
131+
array_base = get_internal(i);
132+
array_len = s;
100133
}
101134
};
102135

103-
using SortingRoutine = void(const Array &);
104-
105136
} // namespace internal
106137
} // namespace LIBC_NAMESPACE_DECL
107138

Diff for: libc/src/stdlib/qsort_pivot.h

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//===-- Implementation header for qsort utilities ---------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
10+
#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
11+
12+
#include <stdint.h>
13+
14+
namespace LIBC_NAMESPACE_DECL {
15+
namespace internal {
16+
17+
// Recursively select a pseudomedian if above this threshold.
18+
constexpr size_t PSEUDO_MEDIAN_REC_THRESHOLD = 64;
19+
20+
// Selects a pivot from `array`. Algorithm taken from glidesort by Orson Peters.
21+
//
22+
// This chooses a pivot by sampling an adaptive amount of points, approximating
23+
// the quality of a median of sqrt(n) elements.
24+
template <typename A, typename F>
25+
size_t choose_pivot(const A &array, const F &is_less) {
26+
const size_t len = array.len();
27+
28+
if (len < 8) {
29+
return 0;
30+
}
31+
32+
const size_t len_div_8 = len / 8;
33+
34+
const size_t a = 0; // [0, floor(n/8))
35+
const size_t b = len_div_8 * 4; // [4*floor(n/8), 5*floor(n/8))
36+
const size_t c = len_div_8 * 7; // [7*floor(n/8), 8*floor(n/8))
37+
38+
if (len < PSEUDO_MEDIAN_REC_THRESHOLD)
39+
return median3(array, a, b, c, is_less);
40+
else
41+
return median3_rec(array, a, b, c, len_div_8, is_less);
42+
}
43+
44+
// Calculates an approximate median of 3 elements from sections a, b, c, or
45+
// recursively from an approximation of each, if they're large enough. By
46+
// dividing the size of each section by 8 when recursing we have logarithmic
47+
// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) =
48+
// O(n^(log(3)/log(8))) ~= O(n^0.528) elements.
49+
template <typename A, typename F>
50+
size_t median3_rec(const A &array, size_t a, size_t b, size_t c, size_t n,
51+
const F &is_less) {
52+
if (n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD) {
53+
const size_t n8 = n / 8;
54+
a = median3_rec(array, a, a + (n8 * 4), a + (n8 * 7), n8, is_less);
55+
b = median3_rec(array, b, b + (n8 * 4), b + (n8 * 7), n8, is_less);
56+
c = median3_rec(array, c, c + (n8 * 4), c + (n8 * 7), n8, is_less);
57+
}
58+
return median3(array, a, b, c, is_less);
59+
}
60+
61+
/// Calculates the median of 3 elements.
62+
template <typename A, typename F>
63+
size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) {
64+
const void *a_ptr = array.get(a);
65+
const void *b_ptr = array.get(b);
66+
const void *c_ptr = array.get(c);
67+
68+
const bool x = is_less(a_ptr, b_ptr);
69+
const bool y = is_less(a_ptr, c_ptr);
70+
if (x == y) {
71+
// If x=y=0 then b, c <= a. In this case we want to return max(b, c).
72+
// If x=y=1 then a < b, c. In this case we want to return min(b, c).
73+
// By toggling the outcome of b < c using XOR x we get this behavior.
74+
const bool z = is_less(b_ptr, c_ptr);
75+
return z ^ x ? c : b;
76+
} else {
77+
// Either c <= a < b or b <= a < c, thus a is our median.
78+
return a;
79+
}
80+
}
81+
82+
} // namespace internal
83+
} // namespace LIBC_NAMESPACE_DECL
84+
85+
#endif // LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H

Diff for: libc/src/stdlib/qsort_r.cpp

+5-6
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,12 @@ LLVM_LIBC_FUNCTION(void, qsort_r,
1919
(void *array, size_t array_size, size_t elem_size,
2020
int (*compare)(const void *, const void *, void *),
2121
void *arg)) {
22-
if (array == nullptr || array_size == 0 || elem_size == 0)
23-
return;
24-
internal::Comparator c(compare, arg);
25-
auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
26-
elem_size, c);
2722

28-
internal::sort(arr);
23+
const auto is_less = [compare, arg](const void *a, const void *b) -> bool {
24+
return compare(a, b, arg) < 0;
25+
};
26+
27+
internal::unstable_sort(array, array_size, elem_size, is_less);
2928
}
3029

3130
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)