|
17 | 17 | namespace LIBC_NAMESPACE_DECL {
|
18 | 18 | namespace internal {
|
19 | 19 |
|
20 |
| -using Compare = int(const void *, const void *); |
21 |
| -using CompareWithState = int(const void *, const void *, void *); |
22 |
| - |
23 |
| -enum class CompType { COMPARE, COMPARE_WITH_STATE }; |
24 |
| - |
25 |
| -struct Comparator { |
26 |
| - union { |
27 |
| - Compare *comp_func; |
28 |
| - CompareWithState *comp_func_r; |
29 |
| - }; |
30 |
| - const CompType comp_type; |
31 |
| - |
32 |
| - void *arg; |
33 |
| - |
34 |
| - Comparator(Compare *func) |
35 |
| - : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {} |
36 |
| - |
37 |
| - Comparator(CompareWithState *func, void *arg_val) |
38 |
| - : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE), |
39 |
| - arg(arg_val) {} |
40 |
| - |
41 |
| -#if defined(__clang__) |
42 |
| - // Recent upstream changes to -fsanitize=function find more instances of |
43 |
| - // function type mismatches. One case is with the comparator passed to this |
44 |
| - // class. Libraries will tend to pass comparators that take pointers to |
45 |
| - // varying types while this comparator expects to accept const void pointers. |
46 |
| - // Ideally those tools would pass a function that strictly accepts const |
47 |
| - // void*s to avoid UB, or would use qsort_r to pass their own comparator. |
48 |
| - [[clang::no_sanitize("function")]] |
49 |
| -#endif |
50 |
| - int comp_vals(const void *a, const void *b) const { |
51 |
| - if (comp_type == CompType::COMPARE) { |
52 |
| - return comp_func(a, b); |
53 |
| - } else { |
54 |
| - return comp_func_r(a, b, arg); |
| 20 | +class ArrayGenericSize { |
| 21 | + cpp::byte *array_base; |
| 22 | + size_t array_len; |
| 23 | + size_t elem_size; |
| 24 | + |
| 25 | + LIBC_INLINE cpp::byte *get_internal(size_t i) const { |
| 26 | + return array_base + (i * elem_size); |
| 27 | + } |
| 28 | + |
| 29 | +public: |
| 30 | + LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e) |
| 31 | + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s), |
| 32 | + elem_size(e) {} |
| 33 | + |
| 34 | + static constexpr bool has_fixed_size() { return false; } |
| 35 | + |
| 36 | + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } |
| 37 | + |
| 38 | + LIBC_INLINE void swap(size_t i, size_t j) const { |
| 39 | + // It's possible to use 8 byte blocks with `uint64_t`, but that |
| 40 | + // generates more machine code as the remainder loop gets |
| 41 | + // unrolled, plus 4 byte operations are more likely to be |
| 42 | + // efficient on a wider variety of hardware. On x86 LLVM tends |
| 43 | + // to unroll the block loop again into 2 16 byte swaps per |
| 44 | + // iteration which is another reason that 4 byte blocks yields |
| 45 | + // good performance even for big types. |
| 46 | + using block_t = uint32_t; |
| 47 | + constexpr size_t BLOCK_SIZE = sizeof(block_t); |
| 48 | + |
| 49 | + alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE]; |
| 50 | + |
| 51 | + cpp::byte *elem_i = get_internal(i); |
| 52 | + cpp::byte *elem_j = get_internal(j); |
| 53 | + |
| 54 | + const size_t elem_size_rem = elem_size % BLOCK_SIZE; |
| 55 | + const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); |
| 56 | + |
| 57 | + while (elem_i != elem_i_block_end) { |
| 58 | + __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); |
| 59 | + __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); |
| 60 | + __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); |
| 61 | + |
| 62 | + elem_i += BLOCK_SIZE; |
| 63 | + elem_j += BLOCK_SIZE; |
| 64 | + } |
| 65 | + |
| 66 | + for (size_t n = 0; n < elem_size_rem; ++n) { |
| 67 | + cpp::byte tmp = elem_i[n]; |
| 68 | + elem_i[n] = elem_j[n]; |
| 69 | + elem_j[n] = tmp; |
55 | 70 | }
|
56 | 71 | }
|
| 72 | + |
| 73 | + LIBC_INLINE size_t len() const { return array_len; } |
| 74 | + |
| 75 | + // Make an Array starting at index |i| and length |s|. |
| 76 | + LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const { |
| 77 | + return ArrayGenericSize(get_internal(i), s, elem_size); |
| 78 | + } |
| 79 | + |
| 80 | + // Reset this Array to point at a different interval of the same |
| 81 | + // items starting at index |i|. |
| 82 | + LIBC_INLINE void reset_bounds(size_t i, size_t s) { |
| 83 | + array_base = get_internal(i); |
| 84 | + array_len = s; |
| 85 | + } |
57 | 86 | };
|
58 | 87 |
|
59 |
| -class Array { |
60 |
| - uint8_t *array; |
61 |
| - size_t array_size; |
62 |
| - size_t elem_size; |
63 |
| - Comparator compare; |
| 88 | +// Having a specialized Array type for sorting that knows at |
| 89 | +// compile-time what the size of the element is, allows for much more |
| 90 | +// efficient swapping and for cheaper offset calculations. |
| 91 | +template <size_t ELEM_SIZE> class ArrayFixedSize { |
| 92 | + cpp::byte *array_base; |
| 93 | + size_t array_len; |
64 | 94 |
|
65 |
| -public: |
66 |
| - Array(uint8_t *a, size_t s, size_t e, Comparator c) |
67 |
| - : array(a), array_size(s), elem_size(e), compare(c) {} |
68 |
| - |
69 |
| - uint8_t *get(size_t i) const { return array + i * elem_size; } |
70 |
| - |
71 |
| - void swap(size_t i, size_t j) const { |
72 |
| - uint8_t *elem_i = get(i); |
73 |
| - uint8_t *elem_j = get(j); |
74 |
| - for (size_t b = 0; b < elem_size; ++b) { |
75 |
| - uint8_t temp = elem_i[b]; |
76 |
| - elem_i[b] = elem_j[b]; |
77 |
| - elem_j[b] = temp; |
78 |
| - } |
| 95 | + LIBC_INLINE cpp::byte *get_internal(size_t i) const { |
| 96 | + return array_base + (i * ELEM_SIZE); |
79 | 97 | }
|
80 | 98 |
|
81 |
| - int elem_compare(size_t i, const uint8_t *other) const { |
82 |
| - // An element must compare equal to itself so we don't need to consult the |
83 |
| - // user provided comparator. |
84 |
| - if (get(i) == other) |
85 |
| - return 0; |
86 |
| - return compare.comp_vals(get(i), other); |
| 99 | +public: |
| 100 | + LIBC_INLINE ArrayFixedSize(void *a, size_t s) |
| 101 | + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {} |
| 102 | + |
| 103 | + // Beware this function is used a heuristic for cheap to swap types, so |
| 104 | + // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad |
| 105 | + // idea perf wise. |
| 106 | + static constexpr bool has_fixed_size() { return true; } |
| 107 | + |
| 108 | + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } |
| 109 | + |
| 110 | + LIBC_INLINE void swap(size_t i, size_t j) const { |
| 111 | + alignas(32) cpp::byte tmp[ELEM_SIZE]; |
| 112 | + |
| 113 | + cpp::byte *elem_i = get_internal(i); |
| 114 | + cpp::byte *elem_j = get_internal(j); |
| 115 | + |
| 116 | + __builtin_memcpy(tmp, elem_i, ELEM_SIZE); |
| 117 | + __builtin_memmove(elem_i, elem_j, ELEM_SIZE); |
| 118 | + __builtin_memcpy(elem_j, tmp, ELEM_SIZE); |
87 | 119 | }
|
88 | 120 |
|
89 |
| - size_t size() const { return array_size; } |
| 121 | + LIBC_INLINE size_t len() const { return array_len; } |
90 | 122 |
|
91 |
| - // Make an Array starting at index |i| and size |s|. |
92 |
| - LIBC_INLINE Array make_array(size_t i, size_t s) const { |
93 |
| - return Array(get(i), s, elem_size, compare); |
| 123 | + // Make an Array starting at index |i| and length |s|. |
| 124 | + LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const { |
| 125 | + return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s); |
94 | 126 | }
|
95 | 127 |
|
96 |
| - // Reset this Array to point at a different interval of the same items. |
97 |
| - LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) { |
98 |
| - array = a; |
99 |
| - array_size = s; |
| 128 | + // Reset this Array to point at a different interval of the same |
| 129 | + // items starting at index |i|. |
| 130 | + LIBC_INLINE void reset_bounds(size_t i, size_t s) { |
| 131 | + array_base = get_internal(i); |
| 132 | + array_len = s; |
100 | 133 | }
|
101 | 134 | };
|
102 | 135 |
|
103 |
| -using SortingRoutine = void(const Array &); |
104 |
| - |
105 | 136 | } // namespace internal
|
106 | 137 | } // namespace LIBC_NAMESPACE_DECL
|
107 | 138 |
|
|
0 commit comments