diff --git a/godbolt/Vc b/godbolt/Vc index a8a57b733..f29b4977e 100644 --- a/godbolt/Vc +++ b/godbolt/Vc @@ -476,8 +476,8 @@ SSE2Impl } #ifndef VC_VERSION_H_ #define VC_VERSION_H_ -#define Vc_VERSION_STRING "1.4.2-dev" -#define Vc_VERSION_NUMBER 0x010405 +#define Vc_VERSION_STRING "1.4.3-dev" +#define Vc_VERSION_NUMBER 0x010407 #define Vc_VERSION_CHECK(major,minor,patch) ((major << 16) | (minor << 8) | (patch << 1)) #define Vc_LIBRARY_ABI_VERSION 5 #define Vc_IS_VERSION_2 (Vc_VERSION_NUMBER >= Vc_VERSION_CHECK(1, 70, 0)) @@ -913,9 +913,6 @@ using new_type_ alignas(sizeof(n_)) = type_ #if defined Vc_GCC && Vc_GCC >= 0x60000 #define Vc_TEMPLATES_DROP_ATTRIBUTES 1 #endif -#if Vc_IS_VERSION_2 || (defined Vc_GCC && Vc_GCC >= 0x60000) -#define Vc_RECURSIVE_MEMORY 1 -#endif #if defined Vc_CLANG || defined Vc_APPLECLANG #define Vc_UNREACHABLE __builtin_unreachable #define Vc_NEVER_INLINE [[gnu::noinline]] @@ -1134,11 +1131,6 @@ size(macro, %, a, b, c, d) #define Vc_EXACT_TYPE(_test,_reference,_type) \ typename std::enable_if::value, _type>::type #define Vc_make_unique(name) Vc_CAT(Vc_,name,_,__LINE__) -#if defined(Vc_ICC) || defined(Vc_CLANG) || defined Vc_APPLECLANG -#define Vc_OFFSETOF(Type,member) (reinterpret_cast(&reinterpret_cast(0)->member) - reinterpret_cast(0)) -#else -#define Vc_OFFSETOF(Type,member) offsetof(Type, member) -#endif #if defined(Vc_NO_NOEXCEPT) #define Vc_NOEXCEPT throw() #else @@ -1187,6 +1179,9 @@ constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero = {}; namespace Detail { template struct MayAliasImpl { +#ifdef Vc_ICC +#pragma warning(disable:2621) +#endif #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wattributes" @@ -1195,13 +1190,12 @@ typedef T type Vc_MAY_ALIAS; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif +#ifdef Vc_ICC +#pragma warning(enable:2621) +#endif }; } -#ifdef Vc_ICC -template using MayAlias [[gnu::may_alias]] = T; -#else template using MayAlias = typename Detail::MayAliasImpl::type; -#endif template MayAlias &aliasing_cast(From &x) { return *reinterpret_cast *>(&x); @@ -2300,6 +2294,137 @@ Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation( Common::make_gather<1>(mem, Common::convertIndexVector(indexes)), mask); } +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, +const EntryType S1::*member1, +IT indexes) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1] +.gatherArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, +const EntryType S1::*member1, +IT indexes, MaskArgument mask) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1] +.gatherArguments(), +mask); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, +const S2 S1::*member1, +const EntryType S2::*member2, +IT indexes) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1][member2] +.gatherArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, +const S2 S1::*member1, +const EntryType S2::*member2, +IT indexes, MaskArgument mask) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1][member2] +.gatherArguments(), +mask); +} +template +Vc_DEPRECATED( +"use the subscript operator to Vc::array or Vc::vector " +"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, +const EntryType *const S1::*ptrMember1, +IT1 outerIndexes, IT2 innerIndexes) +{ +gather(Common::SubscriptOperation, true>( +array, outerIndexes)[ptrMember1][innerIndexes] +.gatherArguments()); +} +template +Vc_DEPRECATED( +"use the subscript operator to Vc::array or Vc::vector " +"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, +const EntryType *const S1::*ptrMember1, +IT1 outerIndexes, IT2 innerIndexes, +MaskArgument mask) +{ +gather(Common::SubscriptOperation, true>( +array, outerIndexes)[ptrMember1][innerIndexes] +.gatherArguments(), +mask); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void gather(const S1 *array, +const EntryType S1::*member1, IT indexes) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1] +.gatherArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void gather(const S1 *array, +const EntryType S1::*member1, +IT indexes, +MaskArgument mask) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1] +.gatherArguments(), +mask); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void gather(const S1 *array, const S2 S1::*member1, +const EntryType S2::*member2, IT indexes) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1][member2] +.gatherArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void gather(const S1 *array, const S2 S1::*member1, +const EntryType S2::*member2, IT indexes, +MaskArgument mask) +{ +gather(Common::SubscriptOperation, true>( +array, indexes)[member1][member2] +.gatherArguments(), +mask); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void gather(const S1 *array, +const EntryType *const S1::*ptrMember1, +IT1 outerIndexes, IT2 innerIndexes) +{ +gather(Common::SubscriptOperation, true>( +array, outerIndexes)[ptrMember1][innerIndexes] +.gatherArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void gather(const S1 *array, +const EntryType *const S1::*ptrMember1, +IT1 outerIndexes, IT2 innerIndexes, +MaskArgument mask) +{ +gather(Common::SubscriptOperation, true>( +array, outerIndexes)[ptrMember1][innerIndexes] +.gatherArguments(), +mask); +} template Vc_INTRINSIC void gather(const Common::GatherArguments &args) { @@ -2355,6 +2480,67 @@ Vc_INTRINSIC void scatter(MT *mem, IT &&indexes, MaskArgument mask) const Vc_ASSERT_SCATTER_PARAMETER_TYPES_; scatterImplementation(mem, std::forward(indexes), mask); } +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void scatter(S1 *array, EntryType S1::*member1, +IT indexes) const +{ +scatter(Common::SubscriptOperation, true>( +array, indexes)[member1] +.scatterArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void scatter(S1 *array, EntryType S1::*member1, +IT indexes, MaskArgument mask) const +{ +scatter(Common::SubscriptOperation, true>( +array, indexes)[member1] +.scatterArguments(), +mask); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void scatter(S1 *array, S2 S1::*member1, +EntryType S2::*member2, +IT indexes) const +{ +scatter(Common::SubscriptOperation, true>( +array, indexes)[member1][member2] +.scatterArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void scatter(S1 *array, S2 S1::*member1, +EntryType S2::*member2, IT indexes, +MaskArgument mask) const +{ +scatter(Common::SubscriptOperation, true>( +array, indexes)[member1][member2] +.scatterArguments(), +mask); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void scatter(S1 *array, EntryType *S1::*ptrMember1, +IT1 outerIndexes, +IT2 innerIndexes) const +{ +scatter(Common::SubscriptOperation, true>( +array, outerIndexes)[ptrMember1][innerIndexes] +.scatterArguments()); +} +template +Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " +"instead.") inline void scatter(S1 *array, EntryType *S1::*ptrMember1, +IT1 outerIndexes, IT2 innerIndexes, +MaskArgument mask) const +{ +scatter(Common::SubscriptOperation, true>( +array, outerIndexes)[ptrMember1][innerIndexes] +.scatterArguments(), +mask); +} template Vc_INTRINSIC void scatter(const Common::ScatterArguments &args) const { @@ -4978,6 +5164,74 @@ alignas(64) static const unsigned long long data[21 * Size]; } #endif #include +#ifndef VC_SSE_DEBUG_H_ +#define VC_SSE_DEBUG_H_ +#ifndef NDEBUG +#include +#include +#endif +namespace Vc_VERSIONED_NAMESPACE +{ +namespace SSE +{ +#ifdef NDEBUG +class DebugStream +{ +public: +DebugStream(const char *, const char *, int) {} +template inline DebugStream &operator<<(const T &) { return *this; } +}; +#else +class DebugStream +{ +private: +static char hexChar(char x) { return x + (x > 9 ? 87 : 48); } +template static void printVector(V _x) +{ +std::cerr << "0x"; +const auto bytes = reinterpret_cast(&_x); +for (std::size_t i = 0; i < sizeof(V); ++i) { +std::cerr << hexChar(bytes[i] >> 4) << hexChar(bytes[i] & 0xf); +if (i % 4 == 3) { +std::cerr << '\''; +} +} +enum { Size = sizeof(V) / sizeof(T) }; +union { V v; T m[Size]; } x = { _x }; +std::cerr << " = [" << std::setprecision(24) << x.m[0]; +for (int i = 1; i < Size; ++i) { +std::cerr << ", " << std::setprecision(24) << x.m[i]; +} +std::cerr << ']'; +} +public: +DebugStream(const char *func, const char *file, int line) +{ +std::cerr << "\033[1;40;33mDEBUG: " << file << ':' << line << ' ' << func << ' '; +} +template DebugStream &operator<<(const T &x) { std::cerr << x; return *this; } +DebugStream &operator<<(__m128 x) { +printVector(x); +return *this; +} +DebugStream &operator<<(__m128d x) { +printVector(x); +return *this; +} +DebugStream &operator<<(__m128i x) { +printVector(x); +return *this; +} +~DebugStream() +{ +std::cerr << "\033[0m" << std::endl; +} +}; +#endif +#define Vc_DEBUG Vc::SSE::DebugStream(__PRETTY_FUNCTION__, __FILE__, __LINE__) +} +} +#endif #if defined(Vc_GCC) && !defined(__OPTIMIZE__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wold-style-cast" @@ -6898,9 +7152,9 @@ a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)), schar()); return std::min(schar(_mm_cvtsi128_si32(a) >> 8), schar(_mm_cvtsi128_si32(a))); } Vc_INTRINSIC uchar min(__m128i a, uchar) { -a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)), schar()); -a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)), schar()); -a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)), schar()); +a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)), uchar()); +a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)), uchar()); +a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)), uchar()); return std::min((_mm_cvtsi128_si32(a) >> 8) & 0xff, _mm_cvtsi128_si32(a) & 0xff); } Vc_INTRINSIC float max(__m128 a, float) { @@ -6941,9 +7195,9 @@ a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)), schar()); return std::max(schar(_mm_cvtsi128_si32(a) >> 8), schar(_mm_cvtsi128_si32(a))); } Vc_INTRINSIC uchar max(__m128i a, uchar) { -a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)), schar()); -a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)), schar()); -a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)), schar()); +a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)), uchar()); +a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)), uchar()); +a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)), uchar()); return std::max((_mm_cvtsi128_si32(a) >> 8) & 0xff, _mm_cvtsi128_si32(a) & 0xff); } template @@ -20036,7 +20290,7 @@ namespace Detail #define Vc_FIXED_OP(op) \ template ::is_atomic>::type> \ -fixed_size_simd operator op(const fixed_size_simd &a, \ +Vc_INTRINSIC fixed_size_simd operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {private_init, internal_data(a) op internal_data(b)}; \ @@ -20044,7 +20298,7 @@ return {private_init, internal_data(a) op internal_data(b)}; \ template ::is_atomic>::type, \ class = T> \ -fixed_size_simd operator op(const fixed_size_simd &a, \ +Vc_INTRINSIC fixed_size_simd operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {internal_data0(a) op internal_data0(b), \ @@ -20057,7 +20311,7 @@ Vc_ALL_SHIFTS(Vc_FIXED_OP); #define Vc_FIXED_OP(op) \ template ::is_atomic>::type> \ -fixed_size_simd_mask operator op(const fixed_size_simd &a, \ +Vc_INTRINSIC fixed_size_simd_mask operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {private_init, internal_data(a) op internal_data(b)}; \ @@ -20065,7 +20319,7 @@ return {private_init, internal_data(a) op internal_data(b)}; \ template ::is_atomic>::type, \ class = T> \ -fixed_size_simd_mask operator op(const fixed_size_simd &a, \ +Vc_INTRINSIC fixed_size_simd_mask operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {internal_data0(a) op internal_data0(b), \ diff --git a/godbolt/algorithm b/godbolt/algorithm index 7f8e41d3e..326970fcb 100644 --- a/godbolt/algorithm +++ b/godbolt/algorithm @@ -1188,7 +1188,9 @@ enable_if< Traits::is_simd_vector::value && Traits::has_contiguous_storage::value, It> it) { +#ifndef _MSC_VER Vc_ASSERT(&*it + 1 == &*(it + 1)); +#endif return V(&*it, Vc::Unaligned); } template @@ -1299,12 +1301,11 @@ typename = typename std::iterator_traits::iterator_category> class Iterator; template class Iterator -: public std::iterator::iterator_category, V, -typename std::iterator_traits::difference_type, -IteratorDetails::Pointer, -IteratorDetails::Reference> { public: +using iterator_category = typename std::iterator_traits::iterator_category; +using difference_type = typename std::iterator_traits::difference_type; +using value_type = V; using pointer = IteratorDetails::Pointer; using reference = IteratorDetails::Reference; using const_pointer = IteratorDetails::Pointer; diff --git a/godbolt/containers b/godbolt/containers index ba255f2d1..9be19506b 100644 --- a/godbolt/containers +++ b/godbolt/containers @@ -151,9 +151,481 @@ return out << AnsiColor::blue << "ยป" << AnsiColor::normal; #endif #ifndef VC_MEMORY_ #define VC_MEMORY_ -#ifndef VC_COMMON_MAKE_UNIQUE_H_ -#define VC_COMMON_MAKE_UNIQUE_H_ -#include +#ifndef VC_COMMON_MEMORY_H_ +#define VC_COMMON_MEMORY_H_ +#ifndef VC_COMMON_MEMORYBASE_H_ +#define VC_COMMON_MEMORYBASE_H_ +#include +#include +#include +namespace Vc_VERSIONED_NAMESPACE +{ +namespace Common +{ +#define Vc_MEM_OPERATOR_EQ(op) \ +template \ +Vc_ALWAYS_INLINE enable_if_mutable operator op##=(const T &x) { \ +const V v = value() op x; \ +v.store(&m_data[0], Flags()); \ +return *this; \ +} +template class MemoryVector +{ +typedef typename std::remove_cv<_V>::type V; +template using enable_if_mutable = +typename std::enable_if::value && !std::is_const<_V>::value, R>::type; +using EntryType = +typename std::conditional::value, const typename V::EntryType, +typename V::EntryType>::type; +typedef typename V::Mask Mask; +EntryType m_data[V::Size]; +public: +Vc_INTRINSIC MemoryVector() = default; +MemoryVector(const MemoryVector &) = delete; +MemoryVector(MemoryVector &&) = delete; +Vc_ALWAYS_INLINE Vc_PURE V value() const { return V(&m_data[0], Flags()); } +Vc_ALWAYS_INLINE Vc_PURE operator V() const { return value(); } +template +Vc_ALWAYS_INLINE enable_if_mutable operator=(const T &x) { +V v; +v = x; +v.store(&m_data[0], Flags()); +return *this; +} +Vc_ALL_BINARY(Vc_MEM_OPERATOR_EQ); +Vc_ALL_ARITHMETICS(Vc_MEM_OPERATOR_EQ); +Vc_ALWAYS_INLINE EntryType &operator[](size_t i) { return m_data[i]; } +Vc_ALWAYS_INLINE const EntryType &operator[](size_t i) const { return m_data[i]; } +}; +template class MemoryVectorIterator +{ +typedef typename std::remove_cv<_V>::type V; +template using enable_if_mutable = +typename std::enable_if::value && !std::is_const<_V>::value, R>::type; +using iterator_traits = std::iterator_traits *>; +MemoryVector<_V, Flags> *d; +public: +typedef typename iterator_traits::difference_type difference_type; +typedef typename iterator_traits::value_type value_type; +typedef typename iterator_traits::pointer pointer; +typedef typename iterator_traits::reference reference; +typedef typename iterator_traits::iterator_category iterator_category; +constexpr MemoryVectorIterator(MemoryVector<_V, Flags> *dd) : d(dd) {} +constexpr MemoryVectorIterator(const MemoryVectorIterator &) = default; +constexpr MemoryVectorIterator(MemoryVectorIterator &&) = default; +Vc_ALWAYS_INLINE MemoryVectorIterator &operator=(const MemoryVectorIterator &) = default; +Vc_ALWAYS_INLINE void *orderBy() const { return d; } +Vc_ALWAYS_INLINE difference_type operator-(const MemoryVectorIterator &rhs) const { return d - rhs.d; } +Vc_ALWAYS_INLINE reference operator[](size_t i) const { return d[i]; } +Vc_ALWAYS_INLINE reference operator*() const { return *d; } +Vc_ALWAYS_INLINE pointer operator->() const { return d; } +Vc_ALWAYS_INLINE MemoryVectorIterator &operator++() { ++d; return *this; } +Vc_ALWAYS_INLINE MemoryVectorIterator operator++(int) { MemoryVectorIterator r(*this); ++d; return r; } +Vc_ALWAYS_INLINE MemoryVectorIterator &operator--() { --d; return *this; } +Vc_ALWAYS_INLINE MemoryVectorIterator operator--(int) { MemoryVectorIterator r(*this); --d; return r; } +Vc_ALWAYS_INLINE MemoryVectorIterator &operator+=(size_t n) { d += n; return *this; } +Vc_ALWAYS_INLINE MemoryVectorIterator &operator-=(size_t n) { d -= n; return *this; } +Vc_ALWAYS_INLINE MemoryVectorIterator operator+(size_t n) const { return MemoryVectorIterator(d + n); } +Vc_ALWAYS_INLINE MemoryVectorIterator operator-(size_t n) const { return MemoryVectorIterator(d - n); } +}; +template +Vc_ALWAYS_INLINE bool operator==(const MemoryVectorIterator &l, const MemoryVectorIterator &r) +{ +return l.orderBy() == r.orderBy(); +} +template +Vc_ALWAYS_INLINE bool operator!=(const MemoryVectorIterator &l, const MemoryVectorIterator &r) +{ +return l.orderBy() != r.orderBy(); +} +template +Vc_ALWAYS_INLINE bool operator>=(const MemoryVectorIterator &l, const MemoryVectorIterator &r) +{ +return l.orderBy() >= r.orderBy(); +} +template +Vc_ALWAYS_INLINE bool operator<=(const MemoryVectorIterator &l, const MemoryVectorIterator &r) +{ +return l.orderBy() <= r.orderBy(); +} +template +Vc_ALWAYS_INLINE bool operator> (const MemoryVectorIterator &l, const MemoryVectorIterator &r) +{ +return l.orderBy() > r.orderBy(); +} +template +Vc_ALWAYS_INLINE bool operator< (const MemoryVectorIterator &l, const MemoryVectorIterator &r) +{ +return l.orderBy() < r.orderBy(); +} +#undef Vc_MEM_OPERATOR_EQ +#define Vc_VPH_OPERATOR(op) \ +template \ +decltype(std::declval() op std::declval()) operator op( \ +const MemoryVector &x, const MemoryVector &y) \ +{ \ +return x.value() op y.value(); \ +} +Vc_ALL_ARITHMETICS(Vc_VPH_OPERATOR); +Vc_ALL_BINARY (Vc_VPH_OPERATOR); +Vc_ALL_COMPARES (Vc_VPH_OPERATOR); +#undef Vc_VPH_OPERATOR +template> class MemoryRange +{ +Parent *m_parent; +size_t m_first; +size_t m_last; +public: +MemoryRange(Parent *p, size_t firstIndex, size_t lastIndex) +: m_parent(p), m_first(firstIndex), m_last(lastIndex) +{} +MemoryVectorIterator begin() const { return &m_parent->vector(m_first , Flags()); } +MemoryVectorIterator end() const { return &m_parent->vector(m_last + 1, Flags()); } +}; +template class MemoryDimensionBase; +template class MemoryDimensionBase +{ +private: +Parent *p() { return static_cast(this); } +const Parent *p() const { return static_cast(this); } +public: +typedef typename V::EntryType EntryType; +Vc_ALWAYS_INLINE Vc_PURE EntryType *entries() { return &p()->m_mem[0]; } +Vc_ALWAYS_INLINE Vc_PURE const EntryType *entries() const { return &p()->m_mem[0]; } +Vc_ALWAYS_INLINE Vc_PURE EntryType &scalar(size_t i) { return entries()[i]; } +Vc_ALWAYS_INLINE Vc_PURE const EntryType scalar(size_t i) const { return entries()[i]; } +#ifdef DOXYGEN +Vc_ALWAYS_INLINE Vc_PURE operator EntryType*() { return entries(); } +Vc_ALWAYS_INLINE Vc_PURE operator const EntryType*() const { return entries(); } +#else +template ::type, EntryType *>::value || +std::is_same::type, void *>::value, +int>::type = 0> +Vc_ALWAYS_INLINE Vc_PURE operator T() +{ +return entries(); +} +template ::value || +std::is_same::value, +int>::type = 0> +Vc_ALWAYS_INLINE Vc_PURE operator T() const +{ +return entries(); +} +#endif +template +Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex, Flags) { +return MemoryRange(p(), firstIndex, lastIndex); +} +Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex) { +return MemoryRange(p(), firstIndex, lastIndex); +} +template +Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex, Flags) const { +return MemoryRange(p(), firstIndex, lastIndex); +} +Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex) const { +return MemoryRange(p(), firstIndex, lastIndex); +} +Vc_ALWAYS_INLINE EntryType &operator[](size_t i) { return entries()[i]; } +Vc_ALWAYS_INLINE const EntryType &operator[](size_t i) const { return entries()[i]; } +template Vc_ALWAYS_INLINE Vc_PURE V operator[](Vector i) const +{ +return V(entries(), i); +} +}; +template class MemoryDimensionBase +{ +private: +Parent *p() { return static_cast(this); } +const Parent *p() const { return static_cast(this); } +public: +typedef typename V::EntryType EntryType; +static constexpr size_t rowCount() { return Parent::RowCount; } +Vc_ALWAYS_INLINE Vc_PURE EntryType *entries(size_t x = 0) { return &p()->m_mem[x][0]; } +Vc_ALWAYS_INLINE Vc_PURE const EntryType *entries(size_t x = 0) const { return &p()->m_mem[x][0]; } +Vc_ALWAYS_INLINE Vc_PURE EntryType &scalar(size_t i, size_t j) { return entries(i)[j]; } +Vc_ALWAYS_INLINE Vc_PURE const EntryType scalar(size_t i, size_t j) const { return entries(i)[j]; } +Vc_ALWAYS_INLINE Vc_PURE RowMemory &operator[](size_t i) { +return p()->m_mem[i]; +} +Vc_ALWAYS_INLINE Vc_PURE const RowMemory &operator[](size_t i) const { +return p()->m_mem[i]; +} +Vc_ALWAYS_INLINE Vc_PURE size_t rowsCount() const { return p()->rowsCount(); } +}; +template class MemoryBase : public MemoryDimensionBase +{ +static_assert((V::size() * sizeof(typename V::EntryType)) % V::MemoryAlignment == 0, +"Vc::Memory can only be used for data-parallel types storing a number " +"of values that's a multiple of the memory alignment."); +private: +Parent *p() { return static_cast(this); } +const Parent *p() const { return static_cast(this); } +template +using vector_reference = MayAlias> &; +template +using const_vector_reference = const MayAlias> &; +public: +typedef typename V::EntryType EntryType; +Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return p()->entriesCount(); } +Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return p()->vectorsCount(); } +using MemoryDimensionBase::entries; +using MemoryDimensionBase::scalar; +template +Vc_ALWAYS_INLINE MemoryVectorIterator< V, Flags> begin(Flags flags = Flags()) { return &firstVector(flags); } +template +Vc_ALWAYS_INLINE MemoryVectorIterator begin(Flags flags = Flags()) const { return &firstVector(flags); } +template +Vc_ALWAYS_INLINE MemoryVectorIterator< V, Flags> end(Flags flags = Flags()) { return &lastVector(flags) + 1; } +template +Vc_ALWAYS_INLINE MemoryVectorIterator end(Flags flags = Flags()) const { return &lastVector(flags) + 1; } +template +Vc_ALWAYS_INLINE Vc_PURE +typename std::enable_if::value, +vector_reference>::type +vector(size_t i, Flags = Flags()) +{ +return *aliasing_cast>(&entries()[i * V::Size]); +} +template +Vc_ALWAYS_INLINE Vc_PURE +typename std::enable_if::value, +const_vector_reference>::type +vector(size_t i, Flags = Flags()) const +{ +return *aliasing_cast>(&entries()[i * V::Size]); +} +template +Vc_ALWAYS_INLINE Vc_PURE vector_reference vectorAt(size_t i, +Flags flags = Flags()) +{ +return *aliasing_cast>(&entries()[i]); +} +template +Vc_ALWAYS_INLINE Vc_PURE const_vector_reference vectorAt( +size_t i, Flags flags = Flags()) const +{ +return *aliasing_cast>(&entries()[i]); +} +template +Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if< +std::is_convertible::value, +vector_reference() | Unaligned)>>::type +vector(size_t i, ShiftT shift, Flags = Flags()) +{ +return *aliasing_cast< +MemoryVector() | Unaligned)>>( +&entries()[i * V::Size + shift]); +} +template +Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if< +std::is_convertible::value, +const_vector_reference() | Unaligned)>>::type +vector(size_t i, ShiftT shift, Flags = Flags()) const +{ +return *aliasing_cast< +MemoryVector() | Unaligned)>>( +&entries()[i * V::Size + shift]); +} +template +Vc_ALWAYS_INLINE Vc_PURE vector_reference firstVector(Flags f = Flags()) +{ +return vector(0, f); +} +template +Vc_ALWAYS_INLINE Vc_PURE const_vector_reference firstVector( +Flags f = Flags()) const +{ +return vector(0, f); +} +template +Vc_ALWAYS_INLINE Vc_PURE vector_reference lastVector(Flags f = Flags()) +{ +return vector(vectorsCount() - 1, f); +} +template +Vc_ALWAYS_INLINE Vc_PURE const_vector_reference lastVector( +Flags f = Flags()) const +{ +return vector(vectorsCount() - 1, f); +} +Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned char *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } +Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned short *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } +Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned int *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } +Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned long *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } +Vc_ALWAYS_INLINE void setZero() { +V zero(Vc::Zero); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) = zero; +} +} +template +Vc_ALWAYS_INLINE Parent &operator=(U &&x) { +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) = std::forward(x); +} +} +template +inline Parent &operator+=(const MemoryBase &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) += rhs.vector(i); +} +return static_cast(*this); +} +template +inline Parent &operator-=(const MemoryBase &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) -= rhs.vector(i); +} +return static_cast(*this); +} +template +inline Parent &operator*=(const MemoryBase &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) *= rhs.vector(i); +} +return static_cast(*this); +} +template +inline Parent &operator/=(const MemoryBase &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) /= rhs.vector(i); +} +return static_cast(*this); +} +inline Parent &operator+=(EntryType rhs) { +V v(rhs); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) += v; +} +return static_cast(*this); +} +inline Parent &operator-=(EntryType rhs) { +V v(rhs); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) -= v; +} +return static_cast(*this); +} +inline Parent &operator*=(EntryType rhs) { +V v(rhs); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) *= v; +} +return static_cast(*this); +} +inline Parent &operator/=(EntryType rhs) { +V v(rhs); +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) /= v; +} +return static_cast(*this); +} +template +inline bool operator==(const MemoryBase &rhs) const { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +if (!(V(vector(i)) == V(rhs.vector(i))).isFull()) { +return false; +} +} +return true; +} +template +inline bool operator!=(const MemoryBase &rhs) const { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +if (!(V(vector(i)) == V(rhs.vector(i))).isEmpty()) { +return false; +} +} +return true; +} +template +inline bool operator<(const MemoryBase &rhs) const { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +if (!(V(vector(i)) < V(rhs.vector(i))).isFull()) { +return false; +} +} +return true; +} +template +inline bool operator<=(const MemoryBase &rhs) const { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +if (!(V(vector(i)) <= V(rhs.vector(i))).isFull()) { +return false; +} +} +return true; +} +template +inline bool operator>(const MemoryBase &rhs) const { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +if (!(V(vector(i)) > V(rhs.vector(i))).isFull()) { +return false; +} +} +return true; +} +template +inline bool operator>=(const MemoryBase &rhs) const { +assert(vectorsCount() == rhs.vectorsCount()); +for (size_t i = 0; i < vectorsCount(); ++i) { +if (!(V(vector(i)) >= V(rhs.vector(i))).isFull()) { +return false; +} +} +return true; +} +}; +namespace Detail +{ +template +inline void copyVectors(MemoryBase &dst, +const MemoryBase &src) +{ +const size_t vectorsCount = dst.vectorsCount(); +size_t i = 3; +for (; i < vectorsCount; i += 4) { +const V tmp3 = src.vector(i - 3); +const V tmp2 = src.vector(i - 2); +const V tmp1 = src.vector(i - 1); +const V tmp0 = src.vector(i - 0); +dst.vector(i - 3) = tmp3; +dst.vector(i - 2) = tmp2; +dst.vector(i - 1) = tmp1; +dst.vector(i - 0) = tmp0; +} +for (i -= 3; i < vectorsCount; ++i) { +dst.vector(i) = src.vector(i); +} +} +} +} +} +#endif +#include +#include +#include +#include +#include #ifndef VC_COMMON_MALLOC_H_ #define VC_COMMON_MALLOC_H_ #ifndef Vc_VECTOR_DECLARED_ @@ -234,6 +706,249 @@ namespace Vc_VERSIONED_NAMESPACE { namespace Common { +template struct _MemorySizeCalculation +{ +enum AlignmentCalculations { +Alignment = V::Size, +AlignmentMask = Alignment - 1, +MaskedSize = Size & AlignmentMask, +Padding = Alignment - MaskedSize, +PaddedSize = MaskedSize == 0 ? Size : Size + Padding +}; +}; +template +class Memory : public MemoryBase, 2, +Memory> +{ +public: +typedef typename V::EntryType EntryType; +private: +using RowMemory = Memory; +typedef MemoryBase, 2, RowMemory> Base; +friend class MemoryBase, 2, RowMemory>; +friend class MemoryDimensionBase, 2, +RowMemory>; +enum : size_t { +Alignment = V::MemoryAlignment, +PaddedSize2 = _MemorySizeCalculation::PaddedSize +}; +alignas(static_cast(Alignment)) +RowMemory m_mem[Size1]; +public: +using Base::vector; +enum Constants { +RowCount = Size1, +VectorsCount = PaddedSize2 / V::Size +}; +Memory() = default; +static constexpr size_t rowsCount() { return RowCount; } +static constexpr size_t entriesCount() { return Size1 * Size2; } +static constexpr size_t vectorsCount() { return VectorsCount * Size1; } +template +Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +Detail::copyVectors(*this, rhs); +return *this; +} +Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) { +Detail::copyVectors(*this, rhs); +return *this; +} +inline Memory &operator=(const V &v) { +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) = v; +} +return *this; +} +}; +template +class Memory : +public MemoryBase, 1, void> +{ +public: +typedef typename V::EntryType EntryType; +private: +typedef MemoryBase, 1, void> Base; +friend class MemoryBase, 1, void>; +friend class MemoryDimensionBase, 1, void>; +enum : size_t { +Alignment = V::MemoryAlignment, +MaskedSize = Size & (V::Size - 1), +Padding = V::Size - MaskedSize, +PaddedSize = MaskedSize == 0 ? Size : Size + Padding +}; +alignas(static_cast(Alignment)) +EntryType m_mem[PaddedSize]; +public: +using Base::vector; +enum Constants { +EntriesCount = Size, +VectorsCount = PaddedSize / V::Size +}; +Memory() +{ +if (InitPadding) { +Base::lastVector() = V::Zero(); +} +} +Memory(std::initializer_list init) +{ +Vc_ASSERT(init.size() <= Size); +Base::lastVector() = V::Zero(); +std::copy(init.begin(), init.end(), &m_mem[0]); +} +static Vc_ALWAYS_INLINE Vc_CONST Memory &fromRawData(EntryType *ptr) +{ +char *addr = reinterpret_cast(ptr); +typedef Memory MM; +addr -= offsetof(MM, m_mem); +return *new(addr) MM; +} +static constexpr size_t entriesCount() { return EntriesCount; } +static constexpr size_t vectorsCount() { return VectorsCount; } +inline Memory(const Memory &rhs) +{ +Detail::copyVectors(*this, rhs); +} +template inline Memory(const Memory &rhs) +{ +assert(vectorsCount() == rhs.vectorsCount()); +Detail::copyVectors(*this, rhs); +} +inline Memory &operator=(const Memory &rhs) +{ +Detail::copyVectors(*this, rhs); +return *this; +} +template inline Memory &operator=(const Memory &rhs) +{ +assert(vectorsCount() == rhs.vectorsCount()); +Detail::copyVectors(*this, rhs); +return *this; +} +Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) { +std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType)); +return *this; +} +inline Memory &operator=(const V &v) { +for (size_t i = 0; i < vectorsCount(); ++i) { +vector(i) = v; +} +return *this; +} +}; +template class Memory : public MemoryBase, 1, void> +{ +public: +typedef typename V::EntryType EntryType; +private: +typedef MemoryBase, 1, void> Base; +friend class MemoryBase, 1, void>; +friend class MemoryDimensionBase, 1, void>; +enum InternalConstants { +Alignment = V::Size, +AlignmentMask = Alignment - 1 +}; +size_t m_entriesCount; +size_t m_vectorsCount; +EntryType *m_mem; +size_t calcPaddedEntriesCount(size_t x) +{ +size_t masked = x & AlignmentMask; +return (masked == 0 ? x : x + (Alignment - masked)); +} +public: +using Base::vector; +Vc_ALWAYS_INLINE Memory(size_t size) +: m_entriesCount(size), +m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)), +m_mem(Vc::malloc(m_vectorsCount)) +{ +m_vectorsCount /= V::Size; +Base::lastVector() = V::Zero(); +} +template +Vc_ALWAYS_INLINE Memory(const MemoryBase &rhs) +: m_entriesCount(rhs.entriesCount()), +m_vectorsCount(rhs.vectorsCount()), +m_mem(Vc::malloc(m_vectorsCount * V::Size)) +{ +Detail::copyVectors(*this, rhs); +} +Vc_ALWAYS_INLINE Memory(const Memory &rhs) +: m_entriesCount(rhs.entriesCount()), +m_vectorsCount(rhs.vectorsCount()), +m_mem(Vc::malloc(m_vectorsCount * V::Size)) +{ +Detail::copyVectors(*this, rhs); +} +Vc_ALWAYS_INLINE ~Memory() +{ +Vc::free(m_mem); +} +inline void swap(Memory &rhs) { +std::swap(m_mem, rhs.m_mem); +std::swap(m_entriesCount, rhs.m_entriesCount); +std::swap(m_vectorsCount, rhs.m_vectorsCount); +} +Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; } +Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; } +template +Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +Detail::copyVectors(*this, rhs); +return *this; +} +Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) { +assert(vectorsCount() == rhs.vectorsCount()); +Detail::copyVectors(*this, rhs); +return *this; +} +Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) { +std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType)); +return *this; +} +}; +Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr) +{ +Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best()); +} +Vc_ALWAYS_INLINE void prefetchForModify(const void *addr) +{ +Vc::Detail::prefetchForModify(addr, VectorAbi::Best()); +} +Vc_ALWAYS_INLINE void prefetchClose(const void *addr) +{ +Vc::Detail::prefetchClose(addr, VectorAbi::Best()); +} +Vc_ALWAYS_INLINE void prefetchMid(const void *addr) +{ +Vc::Detail::prefetchMid(addr, VectorAbi::Best()); +} +Vc_ALWAYS_INLINE void prefetchFar(const void *addr) +{ +Vc::Detail::prefetchFar(addr, VectorAbi::Best()); +} +} +using Common::Memory; +using Common::prefetchForOneRead; +using Common::prefetchForModify; +using Common::prefetchClose; +using Common::prefetchMid; +using Common::prefetchFar; +} +namespace std +{ +template Vc_ALWAYS_INLINE void swap(Vc::Memory &a, Vc::Memory &b) { a.swap(b); } +} +#endif +#ifndef VC_COMMON_MAKE_UNIQUE_H_ +#define VC_COMMON_MAKE_UNIQUE_H_ +#include +namespace Vc_VERSIONED_NAMESPACE +{ +namespace Common +{ template struct Deleter { Vc_ALWAYS_INLINE void operator()(T *ptr) {