diff --git a/benchmarks/bench-argsort.hpp b/benchmarks/bench-argsort.hpp index 24ffea76..905fb581 100644 --- a/benchmarks/bench-argsort.hpp +++ b/benchmarks/bench-argsort.hpp @@ -52,7 +52,7 @@ template static void avx512argsort(benchmark::State &state, Args &&...args) { auto args_tuple = std::make_tuple(std::move(args)...); - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { state.SkipWithMessage("Requires AVX512 BW ISA"); } // Perform setup here @@ -84,9 +84,9 @@ static void avx512argsort(benchmark::State &state, Args &&...args) } } -#define BENCH_BOTH(type)\ - BENCH(avx512argsort, type)\ - BENCH(stdargsort, type)\ +#define BENCH_BOTH(type) \ + BENCH(avx512argsort, type) \ + BENCH(stdargsort, type) BENCH_BOTH(int64_t) BENCH_BOTH(uint64_t) diff --git a/benchmarks/bench-partial-qsort.hpp b/benchmarks/bench-partial-qsort.hpp index 2560107d..c5091392 100644 --- a/benchmarks/bench-partial-qsort.hpp +++ b/benchmarks/bench-partial-qsort.hpp @@ -3,10 +3,10 @@ template static void avx512_partial_qsort(benchmark::State &state) { - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { state.SkipWithMessage("Requires AVX512 BW ISA"); } - if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { + if ((sizeof(T) == 2) && (!__builtin_cpu_supports("avx512vbmi2"))) { state.SkipWithMessage("Requires AVX512 VBMI2 ISA"); } // Perform setup here diff --git a/benchmarks/bench-qselect.hpp b/benchmarks/bench-qselect.hpp index 0f6ad8c7..af3c401a 100644 --- a/benchmarks/bench-qselect.hpp +++ b/benchmarks/bench-qselect.hpp @@ -3,10 +3,10 @@ template static void avx512_qselect(benchmark::State &state) { - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { state.SkipWithMessage("Requires AVX512 BW ISA"); } - if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { + if ((sizeof(T) == 2) && (!__builtin_cpu_supports("avx512vbmi2"))) { state.SkipWithMessage("Requires AVX512 VBMI2 ISA"); } // Perform setup here diff --git a/benchmarks/bench-qsort-common.h b/benchmarks/bench-qsort-common.h index 208b16c9..0f873641 100644 --- a/benchmarks/bench-qsort-common.h +++ b/benchmarks/bench-qsort-common.h @@ -5,7 +5,7 @@ #include "avx512-32bit-qsort.hpp" #include "avx512-64bit-argsort.hpp" #include "avx512-64bit-qsort.hpp" -#include "cpuinfo.h" + #include "rand_array.h" #include @@ -19,8 +19,7 @@ }))) #define BENCH(func, type) \ - MY_BENCHMARK_CAPTURE( \ - func, type, random_5k, 5000, std::string("random")); \ + MY_BENCHMARK_CAPTURE(func, type, random_5k, 5000, std::string("random")); \ MY_BENCHMARK_CAPTURE( \ func, type, random_100k, 100000, std::string("random")); \ MY_BENCHMARK_CAPTURE( \ @@ -34,5 +33,4 @@ MY_BENCHMARK_CAPTURE( \ func, type, reverse_10k, 10000, std::string("reverse")); - #endif diff --git a/benchmarks/bench-qsort.hpp b/benchmarks/bench-qsort.hpp index ae02ac9d..3b03b1da 100644 --- a/benchmarks/bench-qsort.hpp +++ b/benchmarks/bench-qsort.hpp @@ -41,10 +41,10 @@ template static void avx512qsort(benchmark::State &state, Args &&...args) { auto args_tuple = std::make_tuple(std::move(args)...); - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { state.SkipWithMessage("Requires AVX512 BW ISA"); } - if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { + if ((sizeof(T) == 2) && (!__builtin_cpu_supports("avx512vbmi2"))) { state.SkipWithMessage("Requires AVX512 VBMI2"); } // Perform setup here @@ -80,8 +80,8 @@ static void avx512qsort(benchmark::State &state, Args &&...args) } } -#define BENCH_BOTH_QSORT(type)\ - BENCH(avx512qsort, type)\ +#define BENCH_BOTH_QSORT(type) \ + BENCH(avx512qsort, type) \ BENCH(stdsort, type) BENCH_BOTH_QSORT(uint64_t) diff --git a/benchmarks/bench-qsortfp16.cpp b/benchmarks/bench-qsortfp16.cpp index 9a90d9d6..769c2c2f 100644 --- a/benchmarks/bench-qsortfp16.cpp +++ b/benchmarks/bench-qsortfp16.cpp @@ -1,12 +1,12 @@ #include "avx512fp16-16bit-qsort.hpp" -#include "cpuinfo.h" + #include "rand_array.h" #include template static void avx512_qsort(benchmark::State &state) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { // Perform setup here size_t ARRSIZE = state.range(0); std::vector arr; @@ -35,7 +35,7 @@ static void avx512_qsort(benchmark::State &state) template static void stdsort(benchmark::State &state) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { // Perform setup here size_t ARRSIZE = state.range(0); std::vector arr; @@ -67,7 +67,7 @@ BENCHMARK(stdsort<_Float16>)->Arg(10000)->Arg(1000000); template static void avx512_qselect(benchmark::State &state) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { // Perform setup here int64_t K = state.range(0); size_t ARRSIZE = 10000; @@ -98,7 +98,7 @@ static void avx512_qselect(benchmark::State &state) template static void stdnthelement(benchmark::State &state) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { // Perform setup here int64_t K = state.range(0); size_t ARRSIZE = 10000; @@ -133,7 +133,7 @@ BENCHMARK(stdnthelement<_Float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000); template static void avx512_partial_qsort(benchmark::State &state) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { // Perform setup here int64_t K = state.range(0); size_t ARRSIZE = 10000; @@ -164,7 +164,7 @@ static void avx512_partial_qsort(benchmark::State &state) template static void stdpartialsort(benchmark::State &state) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { // Perform setup here int64_t K = state.range(0); size_t ARRSIZE = 10000; diff --git a/meson.build b/meson.build index b5a40cbc..ca28c0a1 100644 --- a/meson.build +++ b/meson.build @@ -10,7 +10,7 @@ gtest_dep = dependency('gtest_main', required : true, static: true) gbench_dep = dependency('benchmark', required : true, static: true) fp16code = '''#include -int main() { +int main() { __m512h temp = _mm512_set1_ph(1.0f); __m512h var2 = _mm512_min_ph(temp, temp); return 0; @@ -18,21 +18,20 @@ int main() { ''' cancompilefp16 = cpp.compiles(fp16code, args:'-march=sapphirerapids') -subdir('utils') subdir('tests') subdir('benchmarks') testexe = executable('testexe', include_directories : [src, utils], dependencies : gtest_dep, - link_whole : [libtests, libcpuinfo] + link_whole : [libtests] ) benchexe = executable('benchexe', include_directories : [src, utils, bench], dependencies : [gbench_dep], link_args: ['-lbenchmark_main'], - link_whole : [libbench, libcpuinfo], + link_whole : [libbench], ) summary({ diff --git a/tests/test-argselect.hpp b/tests/test-argselect.hpp index 298000d4..b5a98c1d 100644 --- a/tests/test-argselect.hpp +++ b/tests/test-argselect.hpp @@ -11,7 +11,7 @@ TYPED_TEST_SUITE_P(avx512argselect); TYPED_TEST_P(avx512argselect, test_random) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { const int arrsize = 1024; auto arr = get_uniform_rand_array(arrsize); std::vector sorted_inx; diff --git a/tests/test-argsort-common.h b/tests/test-argsort-common.h index 2e293620..543bfaec 100644 --- a/tests/test-argsort-common.h +++ b/tests/test-argsort-common.h @@ -1,5 +1,5 @@ #include "avx512-64bit-argsort.hpp" -#include "cpuinfo.h" + #include "rand_array.h" #include #include diff --git a/tests/test-argsort.hpp b/tests/test-argsort.hpp index f7a4a23f..d2c403c2 100644 --- a/tests/test-argsort.hpp +++ b/tests/test-argsort.hpp @@ -10,7 +10,7 @@ TYPED_TEST_SUITE_P(avx512argsort); TYPED_TEST_P(avx512argsort, test_random) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { std::vector arrsizes; for (int64_t ii = 0; ii <= 1024; ++ii) { arrsizes.push_back(ii); @@ -39,7 +39,7 @@ TYPED_TEST_P(avx512argsort, test_random) TYPED_TEST_P(avx512argsort, test_constant) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { std::vector arrsizes; for (int64_t ii = 0; ii <= 1024; ++ii) { arrsizes.push_back(ii); @@ -71,7 +71,7 @@ TYPED_TEST_P(avx512argsort, test_constant) TYPED_TEST_P(avx512argsort, test_small_range) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { std::vector arrsizes; for (int64_t ii = 0; ii <= 1024; ++ii) { arrsizes.push_back(ii); @@ -100,7 +100,7 @@ TYPED_TEST_P(avx512argsort, test_small_range) TYPED_TEST_P(avx512argsort, test_sorted) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { std::vector arrsizes; for (int64_t ii = 0; ii <= 1024; ++ii) { arrsizes.push_back(ii); @@ -129,7 +129,7 @@ TYPED_TEST_P(avx512argsort, test_sorted) TYPED_TEST_P(avx512argsort, test_reverse) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { std::vector arrsizes; for (int64_t ii = 0; ii <= 1024; ++ii) { arrsizes.push_back(ii); @@ -159,7 +159,7 @@ TYPED_TEST_P(avx512argsort, test_reverse) TYPED_TEST_P(avx512argsort, test_array_with_nan) { - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; } if (!std::is_floating_point::value) { @@ -193,7 +193,7 @@ TYPED_TEST_P(avx512argsort, test_array_with_nan) TYPED_TEST_P(avx512argsort, test_max_value_at_end_of_array) { - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; } std::vector arrsizes; @@ -224,7 +224,7 @@ TYPED_TEST_P(avx512argsort, test_max_value_at_end_of_array) TYPED_TEST_P(avx512argsort, test_all_inf_array) { - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; } std::vector arrsizes; diff --git a/tests/test-keyvalue.cpp b/tests/test-keyvalue.cpp index 2099ba5d..a05e9528 100644 --- a/tests/test-keyvalue.cpp +++ b/tests/test-keyvalue.cpp @@ -4,7 +4,7 @@ * *******************************************/ #include "avx512-64bit-keyvaluesort.hpp" -#include "cpuinfo.h" + #include "rand_array.h" #include #include @@ -30,7 +30,7 @@ TYPED_TEST_SUITE_P(KeyValueSort); TYPED_TEST_P(KeyValueSort, test_64bit_random_data) { - if (cpu_has_avx512bw()) { + if (__builtin_cpu_supports("avx512bw")) { std::vector keysizes; for (int64_t ii = 0; ii < 1024; ++ii) { keysizes.push_back((TypeParam)ii); diff --git a/tests/test-partial-qsort.hpp b/tests/test-partial-qsort.hpp index 4ba5caa8..6050b4c2 100644 --- a/tests/test-partial-qsort.hpp +++ b/tests/test-partial-qsort.hpp @@ -10,8 +10,9 @@ TYPED_TEST_P(avx512_partial_sort, test_ranges) int64_t arrsize = 1024; int64_t nranges = 500; - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arr; diff --git a/tests/test-qselect.hpp b/tests/test-qselect.hpp index f0c0c242..6d062076 100644 --- a/tests/test-qselect.hpp +++ b/tests/test-qselect.hpp @@ -7,8 +7,9 @@ TYPED_TEST_SUITE_P(avx512_select); TYPED_TEST_P(avx512_select, test_random) { - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; @@ -51,8 +52,9 @@ TYPED_TEST_P(avx512_select, test_random) TYPED_TEST_P(avx512_select, test_small_range) { - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; diff --git a/tests/test-qsort-common.h b/tests/test-qsort-common.h index a41b2c63..9690265a 100644 --- a/tests/test-qsort-common.h +++ b/tests/test-qsort-common.h @@ -4,7 +4,7 @@ #include "avx512-16bit-qsort.hpp" #include "avx512-32bit-qsort.hpp" #include "avx512-64bit-qsort.hpp" -#include "cpuinfo.h" + #include "rand_array.h" #include diff --git a/tests/test-qsort-fp.hpp b/tests/test-qsort-fp.hpp index 438305b1..8309d509 100644 --- a/tests/test-qsort-fp.hpp +++ b/tests/test-qsort-fp.hpp @@ -13,7 +13,7 @@ TYPED_TEST_SUITE_P(avx512_sort_fp); TYPED_TEST_P(avx512_sort_fp, test_random_nan) { const int num_nans = 3; - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { GTEST_SKIP() << "Skipping this test, it requires avx512bw"; } std::vector arrsizes; diff --git a/tests/test-qsort.hpp b/tests/test-qsort.hpp index 91f0e9b9..d6c1d85a 100644 --- a/tests/test-qsort.hpp +++ b/tests/test-qsort.hpp @@ -12,8 +12,9 @@ TYPED_TEST_SUITE_P(avx512_sort); TYPED_TEST_P(avx512_sort, test_random) { - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; @@ -41,8 +42,9 @@ TYPED_TEST_P(avx512_sort, test_random) TYPED_TEST_P(avx512_sort, test_reverse) { - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; @@ -72,8 +74,9 @@ TYPED_TEST_P(avx512_sort, test_reverse) TYPED_TEST_P(avx512_sort, test_constant) { - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; @@ -103,8 +106,9 @@ TYPED_TEST_P(avx512_sort, test_constant) TYPED_TEST_P(avx512_sort, test_small_range) { - if (cpu_has_avx512bw()) { - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if (__builtin_cpu_supports("avx512bw")) { + if ((sizeof(TypeParam) == 2) + && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; @@ -131,10 +135,10 @@ TYPED_TEST_P(avx512_sort, test_small_range) TYPED_TEST_P(avx512_sort, test_max_value_at_end_of_array) { - if (!cpu_has_avx512bw()) { + if (!__builtin_cpu_supports("avx512bw")) { GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; } - if ((sizeof(TypeParam) == 2) && (!cpu_has_avx512_vbmi2())) { + if ((sizeof(TypeParam) == 2) && (!__builtin_cpu_supports("avx512vbmi2"))) { GTEST_SKIP() << "Skipping this test, it requires avx512_vbmi2"; } std::vector arrsizes; diff --git a/tests/test-qsortfp16.cpp b/tests/test-qsortfp16.cpp index d6a45f7b..786af4e6 100644 --- a/tests/test-qsortfp16.cpp +++ b/tests/test-qsortfp16.cpp @@ -4,14 +4,14 @@ * *******************************************/ #include "avx512fp16-16bit-qsort.hpp" -#include "cpuinfo.h" + #include "rand_array.h" #include #include TEST(avx512_qsort_float16, test_arrsizes) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { std::vector arrsizes; for (int64_t ii = 0; ii < 1024; ++ii) { arrsizes.push_back(ii); @@ -41,7 +41,7 @@ TEST(avx512_qsort_float16, test_arrsizes) TEST(avx512_qsort_float16, test_special_floats) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { const int arrsize = 1111; std::vector<_Float16> arr; std::vector<_Float16> sortedarr; @@ -75,7 +75,7 @@ TEST(avx512_qsort_float16, test_special_floats) TEST(avx512_qselect_float16, test_arrsizes) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { std::vector arrsizes; for (int64_t ii = 0; ii < 1024; ++ii) { arrsizes.push_back(ii); @@ -120,7 +120,7 @@ TEST(avx512_qselect_float16, test_arrsizes) TEST(avx512_partial_qsort_float16, test_ranges) { - if (cpu_has_avx512fp16()) { + if (__builtin_cpu_supports("avx512fp16")) { int64_t arrsize = 1024; int64_t nranges = 500; diff --git a/utils/cpuinfo.cpp b/utils/cpuinfo.cpp deleted file mode 100644 index c05acf34..00000000 --- a/utils/cpuinfo.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/******************************************* - * * Copyright (C) 2022 Intel Corporation - * * SPDX-License-Identifier: BSD-3-Clause - * *******************************************/ - -#include "cpuinfo.h" - -static void -cpuid(uint32_t feature, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) -{ - __asm__ volatile( - "cpuid" - "\n\t" - : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) - : "a"(feature), "c"(0)); -} - -int cpu_has_avx512_vbmi2() -{ - uint32_t eax(0), ebx(0), ecx(0), edx(0); - cpuid(0x07, &eax, &ebx, &ecx, &edx); - return (ecx >> 6) & 0x1; -} - -int cpu_has_avx512bw() -{ - uint32_t eax(0), ebx(0), ecx(0), edx(0); - cpuid(0x07, &eax, &ebx, &ecx, &edx); - return (ebx >> 30) & 0x1; -} - -int cpu_has_avx512fp16() -{ - uint32_t eax(0), ebx(0), ecx(0), edx(0); - cpuid(0x07, &eax, &ebx, &ecx, &edx); - return (edx >> 23) & 0x1; -} - -// TODO: -//int check_os_supports_avx512() -//{ -// uint32_t eax(0), ebx(0), ecx(0), edx(0); -// cpuid(0x01, &eax, &ebx, &ecx, &edx); -// // XSAVE: -// if ((ecx >> 27) & 0x1) { -// uint32_t xget_eax, xget_edx, index(0); -// __asm__ ("xgetbv" : "=a"(xget_eax), "=d"(xget_edx) : "c" (index)) -// } -// -//} diff --git a/utils/cpuinfo.h b/utils/cpuinfo.h deleted file mode 100644 index 96f167e5..00000000 --- a/utils/cpuinfo.h +++ /dev/null @@ -1,13 +0,0 @@ -/******************************************* - * * Copyright (C) 2022 Intel Corporation - * * SPDX-License-Identifier: BSD-3-Clause - * *******************************************/ - -#include -#include - -int cpu_has_avx512_vbmi2(); - -int cpu_has_avx512bw(); - -int cpu_has_avx512fp16(); diff --git a/utils/meson.build b/utils/meson.build deleted file mode 100644 index 5f344536..00000000 --- a/utils/meson.build +++ /dev/null @@ -1,6 +0,0 @@ -libcpuinfo = [] - -libcpuinfo += static_library('cpuinfo', - files('cpuinfo.cpp'), - cpp_args : ['-O3'], -)