diff --git a/Inc/DirectXMath.h b/Inc/DirectXMath.h index 9f9d791..124a101 100644 --- a/Inc/DirectXMath.h +++ b/Inc/DirectXMath.h @@ -135,7 +135,7 @@ #endif #elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) +#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) #include #else #include diff --git a/Inc/DirectXMathConvert.inl b/Inc/DirectXMathConvert.inl index 8097f20..193fae1 100644 --- a/Inc/DirectXMathConvert.inl +++ b/Inc/DirectXMathConvert.inl @@ -38,7 +38,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat } while (++ElementIndex < 4); return Result; #elif defined(_XM_ARM_NEON_INTRINSICS_) - float fScale = 1.0f / (float)(1U << DivExponent); + float fScale = 1.0f / static_cast(1U << DivExponent); float32x4_t vResult = vcvtq_f32_s32(vreinterpretq_s32_f32(VInt)); return vmulq_n_f32(vResult, fScale); #else // _XM_SSE_INTRINSICS_ @@ -85,7 +85,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt } while (++ElementIndex < 4); return Result; #elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent)); + float32x4_t vResult = vmulq_n_f32(VFloat, static_cast(1U << MulExponent)); // In case of positive overflow, detect it uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt); // Float to int conversion @@ -128,7 +128,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat } while (++ElementIndex < 4); return Result; #elif defined(_XM_ARM_NEON_INTRINSICS_) - float fScale = 1.0f / (float)(1U << DivExponent); + float fScale = 1.0f / static_cast(1U << DivExponent); float32x4_t vResult = vcvtq_f32_u32(vreinterpretq_u32_f32(VUInt)); return vmulq_n_f32(vResult, fScale); #else // _XM_SSE_INTRINSICS_ @@ -185,7 +185,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt } while (++ElementIndex < 4); return Result; #elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent)); + float32x4_t vResult = vmulq_n_f32(VFloat, static_cast(1U << MulExponent)); // In case of overflow, detect it uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt); // Float to int conversion @@ -301,7 +301,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2A(const uint32_t* pSource) noexcept V.vector4_u32[3] = 0; return V; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) uint32x2_t x = vld1_u32_ex(pSource, 64); #else uint32x2_t x = vld1_u32(pSource); @@ -348,7 +348,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept V.vector4_f32[3] = 0.f; return V; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) float32x2_t x = vld1_f32_ex(reinterpret_cast(pSource), 64); #else float32x2_t x = vld1_f32(reinterpret_cast(pSource)); @@ -461,7 +461,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3A(const uint32_t* pSource) noexcept return V; #elif defined(_XM_ARM_NEON_INTRINSICS_) // Reads an extra integer which is zero'd -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) uint32x4_t V = vld1q_u32_ex(pSource, 128); #else uint32x4_t V = vld1q_u32(pSource); @@ -521,7 +521,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept return V; #elif defined(_XM_ARM_NEON_INTRINSICS_) // Reads an extra float which is zero'd -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) float32x4_t V = vld1q_f32_ex(reinterpret_cast(pSource), 128); #else float32x4_t V = vld1q_f32(reinterpret_cast(pSource)); @@ -635,7 +635,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4A(const uint32_t* pSource) noexcept V.vector4_u32[3] = pSource[3]; return V; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) return vld1q_u32_ex(pSource, 128); #else return vreinterpretq_f32_u32(vld1q_u32(pSource)); @@ -679,7 +679,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept V.vector4_f32[3] = pSource->w; return V; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) return vld1q_f32_ex(reinterpret_cast(pSource), 128); #else return vld1q_f32(reinterpret_cast(pSource)); @@ -915,7 +915,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept return M; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128); float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128); float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128); @@ -1077,7 +1077,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept return M; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128); float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128); #else @@ -1208,7 +1208,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept #elif defined(_XM_ARM_NEON_INTRINSICS_) XMMATRIX M; -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) M.r[0] = vld1q_f32_ex(reinterpret_cast(&pSource->_11), 128); M.r[1] = vld1q_f32_ex(reinterpret_cast(&pSource->_21), 128); M.r[2] = vld1q_f32_ex(reinterpret_cast(&pSource->_31), 128); @@ -1305,7 +1305,7 @@ inline void XM_CALLCONV XMStoreInt2A pDestination[1] = V.vector4_u32[1]; #elif defined(_XM_ARM_NEON_INTRINSICS_) uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1_u32_ex(pDestination, VL, 64); #else vst1_u32(pDestination, VL); @@ -1350,7 +1350,7 @@ inline void XM_CALLCONV XMStoreFloat2A pDestination->y = V.vector4_f32[1]; #elif defined(_XM_ARM_NEON_INTRINSICS_) float32x2_t VL = vget_low_f32(V); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); #else vst1_f32(reinterpret_cast(pDestination), VL); @@ -1469,7 +1469,7 @@ inline void XM_CALLCONV XMStoreInt3A pDestination[2] = V.vector4_u32[2]; #elif defined(_XM_ARM_NEON_INTRINSICS_) uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1_u32_ex(pDestination, VL, 64); #else vst1_u32(pDestination, VL); @@ -1526,7 +1526,7 @@ inline void XM_CALLCONV XMStoreFloat3A pDestination->z = V.vector4_f32[2]; #elif defined(_XM_ARM_NEON_INTRINSICS_) float32x2_t VL = vget_low_f32(V); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); #else vst1_f32(reinterpret_cast(pDestination), VL); @@ -1656,7 +1656,7 @@ inline void XM_CALLCONV XMStoreInt4A pDestination[2] = V.vector4_u32[2]; pDestination[3] = V.vector4_u32[3]; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1q_u32_ex(pDestination, V, 128); #else vst1q_u32(pDestination, vreinterpretq_u32_f32(V)); @@ -1703,7 +1703,7 @@ inline void XM_CALLCONV XMStoreFloat4A pDestination->z = V.vector4_f32[2]; pDestination->w = V.vector4_f32[3]; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1q_f32_ex(reinterpret_cast(pDestination), V, 128); #else vst1q_f32(reinterpret_cast(pDestination), V); @@ -1913,7 +1913,7 @@ inline void XM_CALLCONV XMStoreFloat4x3A pDestination->m[3][2] = M.r[3].vector4_f32[2]; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); vst1q_f32_ex(&pDestination->m[0][0], T2, 128); @@ -2057,7 +2057,7 @@ inline void XM_CALLCONV XMStoreFloat3x4A float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128); vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128); vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128); @@ -2166,7 +2166,7 @@ inline void XM_CALLCONV XMStoreFloat4x4A pDestination->m[3][3] = M.r[3].vector4_f32[3]; #elif defined(_XM_ARM_NEON_INTRINSICS_) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) vst1q_f32_ex(reinterpret_cast(&pDestination->_11), M.r[0], 128); vst1q_f32_ex(reinterpret_cast(&pDestination->_21), M.r[1], 128); vst1q_f32_ex(reinterpret_cast(&pDestination->_31), M.r[2], 128); diff --git a/Inc/DirectXMathVector.inl b/Inc/DirectXMathVector.inl index 35d2e2a..1b76bf2 100644 --- a/Inc/DirectXMathVector.inl +++ b/Inc/DirectXMathVector.inl @@ -1734,7 +1734,7 @@ inline XMVECTOR XM_CALLCONV XMVectorNearEqual #elif defined(_XM_ARM_NEON_INTRINSICS_) float32x4_t vDelta = vsubq_f32(V1, V2); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) return vacleq_f32(vDelta, Epsilon); #else return vreinterpretq_f32_u32(vcleq_f32(vabsq_f32(vDelta), Epsilon)); @@ -6328,7 +6328,7 @@ inline bool XM_CALLCONV XMVector2NearEqual (dy <= Epsilon.vector4_f32[1])); #elif defined(_XM_ARM_NEON_INTRINSICS_) float32x2_t vDelta = vsub_f32(vget_low_f32(V1), vget_low_f32(V2)); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) uint32x2_t vTemp = vacle_f32(vDelta, vget_low_u32(Epsilon)); #else uint32x2_t vTemp = vcle_f32(vabs_f32(vDelta), vget_low_f32(Epsilon)); @@ -9057,7 +9057,7 @@ inline bool XM_CALLCONV XMVector3NearEqual (dz <= Epsilon.vector4_f32[2])) != 0); #elif defined(_XM_ARM_NEON_INTRINSICS_) float32x4_t vDelta = vsubq_f32(V1, V2); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) uint32x4_t vResult = vacleq_f32(vDelta, Epsilon); #else uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon); @@ -12924,7 +12924,7 @@ inline bool XM_CALLCONV XMVector4NearEqual (dw <= Epsilon.vector4_f32[3])) != 0); #elif defined(_XM_ARM_NEON_INTRINSICS_) float32x4_t vDelta = vsubq_f32(V1, V2); -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) uint32x4_t vResult = vacleq_f32(vDelta, Epsilon); #else uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon);