-
Notifications
You must be signed in to change notification settings - Fork 1
/
simd_split.h
172 lines (161 loc) · 6.79 KB
/
simd_split.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/* SPDX-License-Identifier: GPL-3.0-or-later WITH GCC-exception-3.1 */
/* Copyright © 2023-2024 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
* Matthias Kretz <m.kretz@gsi.de>
*/
#ifndef PROTOTYPE_SIMD_SPLIT_H_
#define PROTOTYPE_SIMD_SPLIT_H_
#include "simd.h"
#include <array>
#include <tuple>
namespace std
{
namespace __detail
{
template <typename _Tp, size_t>
using __repeat_type = _Tp;
template <typename _T0, typename... _Ts>
_GLIBCXX_SIMD_INTRINSIC constexpr typename _T0::value_type
__get_simd_element_from_pack(auto __i, const _T0& __x, const _Ts&... __pack)
{
if constexpr (__i.value < _T0::size.value)
return __x[__i.value];
else
return __get_simd_element_from_pack(__ic<__i.value - _T0::size.value>, __pack...);
}
}
template <typename _V, typename _Tp, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr auto
simd_split(const basic_simd<_Tp, _Abi>& __x) noexcept
{
constexpr int __in = simd_size_v<_Tp, _Abi>;
constexpr int __out = _V::size();
constexpr int __rem = __in % __out;
if constexpr (__rem == 0)
{ // -> array
using _Rp = std::array<_V, __in / __out>;
if constexpr (sizeof(_Rp) == sizeof(__x))
return __builtin_bit_cast(_Rp, __x);
else
return [&]<size_t... _Is>(std::index_sequence<_Is...>) {
return _Rp {_V([&](auto __i) { return __x[__i + __out * _Is]; })...};
}(std::make_index_sequence<__in / __out>());
}
else // -> tuple
return [&]<size_t... _Is>(std::index_sequence<_Is...>) {
using _Rem = std::resize_simd_t<__rem, _V>;
using _Rp = std::tuple<__detail::__repeat_type<_V, _Is>..., _Rem>;
return _Rp {_V([&](auto __i) {
return __x[__i + __out * _Is];
})..., _Rem([&](auto __i) {
return __x[__i + __in - __rem];
})};
}(std::make_index_sequence<__in / __out>());
}
template <typename _M, size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr auto
simd_split(const basic_simd_mask<_Bs, _Abi>& __x) noexcept
{
constexpr int __in = basic_simd_mask<_Bs, _Abi>::size();
constexpr int __out = _M::size();
constexpr int __rem = __in % __out;
if constexpr (__rem == 0)
{ // -> array
using _Rp = std::array<_M, __in / __out>;
if constexpr (sizeof(_Rp) == sizeof(__x))
return __builtin_bit_cast(_Rp, __x);
else
return [&]<size_t... _Is>(std::index_sequence<_Is...>) {
return _Rp {_M([&](auto __i) { return __x[__i + __out * _Is]; })...};
}(std::make_index_sequence<__in / __out>());
}
else // -> tuple
return [&]<size_t... _Is>(std::index_sequence<_Is...>) {
using _Rem = std::resize_simd_t<__rem, _M>;
using _Rp = std::tuple<__detail::__repeat_type<_M, _Is>..., _Rem>;
return _Rp {_M([&](auto __i) {
return __x[__i + __out * _Is];
})..., _Rem([&](auto __i) {
return __x[__i + __in - __rem];
})};
}(std::make_index_sequence<__in / __out>());
}
namespace __detail
{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC constexpr auto
__as_simd_builtin(_Tp __x) noexcept
{
if constexpr (_Tp::size.value == 1)
{
using _Vp [[__gnu__::__vector_size__(sizeof(_Tp))]] = typename _Tp::value_type;
return _Vp {__data(__x)};
}
else if constexpr (__vec_builtin<__remove_cvref_t<decltype(__data(__x))>>)
return __data(__x);
else
return __data(__x)._M_data;
}
template <typename _T0, typename _T1, typename... _Ts>
_GLIBCXX_SIMD_INTRINSIC constexpr auto
__cat_recursive(const _T0& __x0, const _T1& __x1, const _Ts&... __xs) noexcept
{
using _Tp = typename _T0::value_type;
constexpr int __size = _T0::size.value + _T1::size.value;
const std::resize_simd_t<__size, _T0>
__x01{__private_init,
[&]<_SimdSizeType... _Is, _SimdSizeType... _Js, _SimdSizeType... _Ks>
[[__gnu__::__always_inline__]]
(_SimdIndexSequence<_Is...>, _SimdIndexSequence<_Js...>,
_SimdIndexSequence<_Ks...>) {
#ifdef __clang__
constexpr int __simd_bytes = std::__bit_ceil(__size) * sizeof(_Tp);
using _Rp [[__gnu__::__vector_size__(__simd_bytes)]] = _Tp;
return _Rp {
__vec_get(__as_simd_builtin(__x0), _Is)...,
__vec_get(__as_simd_builtin(__x1), _Js)...,
((void)_Ks, 0)...
};
#else
constexpr int __x0_size = sizeof(__x0) / sizeof(_Tp);
return __builtin_shufflevector(__as_simd_builtin(__x0), __as_simd_builtin(__x1),
_Is..., __x0_size + _Js...,
((void)_Ks, -1)...);
#endif
}(_MakeSimdIndexSequence<_T0::size.value>(),
_MakeSimdIndexSequence<_T1::size.value>(),
_MakeSimdIndexSequence<std::__bit_ceil(__size) - __size>())};
if constexpr (sizeof...(_Ts) == 0)
return __x01;
else
return __cat_recursive(__x01, __xs...);
}
}
template <typename _Tp, typename... _Abis>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr
simd<_Tp, (simd_size_v<_Tp, _Abis> + ...)>
simd_cat(const basic_simd<_Tp, _Abis>&... __xs) noexcept
{
constexpr int __size = (simd_size_v<_Tp, _Abis> + ...);
if constexpr (sizeof...(_Abis) == 1)
return simd<_Tp, __size>(__xs...);
else if constexpr (__size <= simd_size_v<_Tp>)
return __detail::__cat_recursive(__xs...);
else
return simd<_Tp, (simd_size_v<_Tp, _Abis> + ...)>([&] [[__gnu__::__always_inline__]]
(auto __i) {
return __detail::__get_simd_element_from_pack(__i, __xs...);
});
}
template <size_t _Bs, typename... _Abis>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr
simd_mask<__detail::__mask_integer_from<_Bs>, (basic_simd_mask<_Bs, _Abis>::size.value + ...)>
simd_cat(const basic_simd_mask<_Bs, _Abis>&... __xs) noexcept
{
return simd_mask<__detail::__mask_integer_from<_Bs>,
(basic_simd_mask<_Bs, _Abis>::size.value + ...)>(
[&] [[__gnu__::__always_inline__]] (auto __i) {
return __detail::__get_simd_element_from_pack(__i, __xs...);
});
}
}
#endif // PROTOTYPE_SIMD_SPLIT_H_