diff --git a/headers/cpubenchmark.h b/headers/cpubenchmark.h index 0352ebd..7640447 100644 --- a/headers/cpubenchmark.h +++ b/headers/cpubenchmark.h @@ -89,7 +89,7 @@ inline unsigned long long rdtsc() { static __inline__ unsigned long long startRDTSC(void) { return rdtsc(); } static __inline__ unsigned long long stopRDTSCP(void) { return rdtsc(); } -#elif (defined(__GNUC__) && (defined(__arch64__))) +#elif (defined(__GNUC__) && (defined(__aarch64__))) inline uint64_t rdtsc() { uint64_t cycles; asm volatile("mrs %0, cntvct_el0" diff --git a/headers/simdgroupsimple.h b/headers/simdgroupsimple.h index cdedf6b..148f151 100644 --- a/headers/simdgroupsimple.h +++ b/headers/simdgroupsimple.h @@ -166,7 +166,7 @@ namespace FastPForLib { _mm_storeu_si128(out++, comprBlock); } -#elif (defined(__GNUC__) && (defined(__arch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) +#elif (defined(__GNUC__) && (defined(__aarch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) inline static void comprIncompleteBlock(const uint8_t &n, const __m128i *&in, __m128i *&out) { // Since we have to produce exactly one compressed vector anyway, we can @@ -641,7 +641,7 @@ namespace FastPForLib { _mm_and_si128(_mm_srli_epi32(comprBlock, k * b), mask)); } -#elif (defined(__GNUC__) && (defined(__arch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) +#elif (defined(__GNUC__) && (defined(__aarch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) inline static void decomprIncompleteBlock(const uint8_t &n, const __m128i *&in, __m128i *&out) { diff --git a/src/simdbitpacking.cpp b/src/simdbitpacking.cpp index 4c90c76..e552c9b 100644 --- a/src/simdbitpacking.cpp +++ b/src/simdbitpacking.cpp @@ -8943,7 +8943,7 @@ static void __SIMD_fastunpack1_32(const __m128i *__restrict__ in, _mm_storeu_si128(out++, OutReg3); _mm_storeu_si128(out++, OutReg4); } -#elif (defined(__GNUC__) && (defined(__arch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) +#elif (defined(__GNUC__) && (defined(__aarch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) OutReg1 = _mm_and_si128(_mm_srli_epi32(InReg1, 0), mask); OutReg2 = _mm_and_si128(_mm_srli_epi32(InReg2, 1), mask); OutReg3 = _mm_and_si128(_mm_srli_epi32(InReg1, 2), mask); diff --git a/src/simdunalignedbitpacking.cpp b/src/simdunalignedbitpacking.cpp index b391b9a..21627f3 100644 --- a/src/simdunalignedbitpacking.cpp +++ b/src/simdunalignedbitpacking.cpp @@ -8943,7 +8943,7 @@ static void __SIMD_fastunpack1_32(const __m128i *__restrict__ in, _mm_storeu_si128(out++, OutReg3); _mm_storeu_si128(out++, OutReg4); } -#elif (defined(__GNUC__) && (defined(__arch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) +#elif (defined(__GNUC__) && (defined(__aarch64__))) || (defined(_MSC_VER) && defined(_M_ARM64)) OutReg1 = _mm_and_si128(_mm_srli_epi32(InReg1, 0), mask); OutReg2 = _mm_and_si128(_mm_srli_epi32(InReg2, 1), mask); OutReg3 = _mm_and_si128(_mm_srli_epi32(InReg1, 2), mask);