forked from gordonl0811/LowLatencyProgramming
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.cpp
59 lines (43 loc) · 1.71 KB
/
benchmark.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <benchmark/benchmark.h>
#include "simd_instructions.h"
#include "../utils/utils.h"
static void BenchmarkMultiplyAddScalar(benchmark::State& state) {
std::vector<float> floats_a = GenerateRandomFloats(8, 100);
std::vector<float> floats_b = GenerateRandomFloats(8, 100);
std::vector<float> floats_c = GenerateRandomFloats(8, 100);
float a[8];
float b[8];
float c[8];
float d[8];
std::copy(floats_a.begin(), floats_a.end(), a);
std::copy(floats_b.begin(), floats_b.end(), b);
std::copy(floats_c.begin(), floats_c.end(), c);
for (auto _ : state) {
for (int i = 0; i < state.range(0); i++) {
SimdInstructions::MultiplyAddScalar(a, b, c, d);
benchmark::DoNotOptimize(d);
}
}
}
static void BenchmarkMultiplyAddVectorized(benchmark::State& state) {
std::vector<float> floats_a = GenerateRandomFloats(8, 100);
std::vector<float> floats_b = GenerateRandomFloats(8, 100);
std::vector<float> floats_c = GenerateRandomFloats(8, 100);
float a[8];
float b[8];
float c[8];
std::copy(floats_a.begin(), floats_a.end(), a);
std::copy(floats_b.begin(), floats_b.end(), b);
std::copy(floats_c.begin(), floats_c.end(), c);
__m256 a_simd = _mm256_load_ps(a);
__m256 b_simd = _mm256_load_ps(b);
__m256 c_simd = _mm256_load_ps(c);
for (auto _ : state) {
for (int i = 0; i < state.range(0); i++) {
benchmark::DoNotOptimize(SimdInstructions::MultiplyAddVectorized(a_simd, b_simd, c_simd));
}
}
}
BENCHMARK(BenchmarkMultiplyAddScalar)->RangeMultiplier(10)->Range(1, 10000000);;
BENCHMARK(BenchmarkMultiplyAddVectorized)->RangeMultiplier(10)->Range(1, 10000000);;
BENCHMARK_MAIN();