@@ -76,22 +76,21 @@ static void * align_with_offset(void * ptr, int offset) {
76
76
return (char *) std::align (MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
77
77
}
78
78
79
- static void benchmark_function (size_t size, size_t q_size, int64_t iterations, const std::function<size_t (void )> & function ) {
79
+ static void benchmark_function (size_t size, size_t q_size, int64_t iterations, const std::function<float (void )> & func ) {
80
80
int64_t min_time_us = INT64_MAX;
81
81
int64_t total_time_us = 0 ;
82
82
int64_t min_time_cycles = INT64_MAX;
83
83
int64_t total_time_cycles = 0 ;
84
84
85
85
for (int i = 0 ; i < WARMUP; i++) {
86
- function ();
86
+ func ();
87
87
}
88
88
89
-
90
89
for (int i = 0 ; i < iterations; i++) {
91
90
const int64_t start_time = ggml_time_us ();
92
91
const int64_t start_cycles = cpu_cycles ();
93
92
94
- function ();
93
+ func ();
95
94
96
95
const int64_t end_cycles = cpu_cycles ();
97
96
const int64_t end_time = ggml_time_us ();
@@ -245,15 +244,15 @@ int main(int argc, char * argv[]) {
245
244
246
245
std::vector<uint8_t > test_data1_v (largest*4 + MAX_ALIGNMENT*2 );
247
246
std::vector<uint8_t > test_data2_v (largest*4 + MAX_ALIGNMENT*2 );
248
- std::vector<uint8_t > test_q1_v (largest*4 + MAX_ALIGNMENT*2 );
249
- std::vector<uint8_t > test_q2_v (largest*4 + MAX_ALIGNMENT*2 );
250
- std::vector<uint8_t > test_out_v (largest*4 + MAX_ALIGNMENT*2 );
247
+ std::vector<uint8_t > test_q1_v (largest*4 + MAX_ALIGNMENT*2 );
248
+ std::vector<uint8_t > test_q2_v (largest*4 + MAX_ALIGNMENT*2 );
249
+ std::vector<uint8_t > test_out_v (largest*4 + MAX_ALIGNMENT*2 );
251
250
252
251
float * test_data1 = (float *) align_with_offset (test_data1_v.data (), params.alignment_offset );
253
252
float * test_data2 = (float *) align_with_offset (test_data2_v.data (), params.alignment_offset );
254
- float * test_q1 = (float *) align_with_offset (test_q1_v.data (), params.alignment_offset );
255
- float * test_q2 = (float *) align_with_offset (test_q2_v.data (), params.alignment_offset );
256
- float * test_out = (float *) align_with_offset (test_out_v.data (), params.alignment_offset );
253
+ float * test_q1 = (float *) align_with_offset (test_q1_v.data (), params.alignment_offset );
254
+ float * test_q2 = (float *) align_with_offset (test_q2_v.data (), params.alignment_offset );
255
+ float * test_out = (float *) align_with_offset (test_out_v.data (), params.alignment_offset );
257
256
258
257
generate_data (0 , largest, test_data1);
259
258
generate_data (1 , largest, test_data2);
@@ -283,7 +282,7 @@ int main(int argc, char * argv[]) {
283
282
printf (" quantize_row_q_reference\n " );
284
283
for (size_t size : params.test_sizes ) {
285
284
printf (" %zu values (%.2f MB)\n " , size, 4 *size/(float )(1024 *1024 ));
286
- auto quantize_fn = [&](void ) {
285
+ auto quantize_fn = [&](void ) -> float {
287
286
qfns.from_float_reference (test_data1, test_q1, size);
288
287
return test_q1[0 ];
289
288
};
@@ -297,7 +296,7 @@ int main(int argc, char * argv[]) {
297
296
printf (" quantize_row_q\n " );
298
297
for (size_t size : params.test_sizes ) {
299
298
printf (" %zu values (%.2f MB)\n " , size, 4 *size/(float )(1024 *1024 ));
300
- auto quantize_fn = [&](void ) {
299
+ auto quantize_fn = [&](void ) -> float {
301
300
qfns.from_float (test_data1, test_q1, size);
302
301
return test_q1[0 ];
303
302
};
@@ -312,7 +311,7 @@ int main(int argc, char * argv[]) {
312
311
qfns.from_float (test_data1, test_q1, largest);
313
312
for (size_t size : params.test_sizes ) {
314
313
printf (" %zu values (%.2f MB)\n " , size, 4 *size/(float )(1024 *1024 ));
315
- auto quantize_fn = [&](void ) {
314
+ auto quantize_fn = [&](void ) -> float {
316
315
qfns.to_float (test_q1, test_out, size);
317
316
return test_out[0 ];
318
317
};
@@ -326,7 +325,7 @@ int main(int argc, char * argv[]) {
326
325
printf (" quantize_row_q_dot\n " );
327
326
for (size_t size : params.test_sizes ) {
328
327
printf (" %zu values (%.2f MB)\n " , size, 4 *size/(float )(1024 *1024 ));
329
- auto quantize_fn = [&](void ) {
328
+ auto quantize_fn = [&](void ) -> float {
330
329
auto vdot = ggml_internal_get_type_traits (qfns.vec_dot_type );
331
330
vdot.from_float (test_data1, test_q1, size);
332
331
return test_q1[0 ];
@@ -343,7 +342,7 @@ int main(int argc, char * argv[]) {
343
342
qfns.from_float (test_data2, test_q2, largest);
344
343
for (size_t size : params.test_sizes ) {
345
344
printf (" %zu values (%.2f MB)\n " , size, 4 *size/(float )(1024 *1024 ));
346
- auto quantize_fn = [&](void ) {
345
+ auto quantize_fn = [&](void ) -> float {
347
346
float result;
348
347
qfns.vec_dot (size, &result, test_q1, test_q2);
349
348
return result;
0 commit comments