Skip to content

Commit 6232f2d

Browse files
authored
ggml : optimize non-SIMD Q4_0 vector dot product (#703)
1 parent 6c24870 commit 6232f2d

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

ggml.c

+7-5
Original file line numberDiff line numberDiff line change
@@ -2160,18 +2160,20 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
21602160
const uint8_t * restrict p0 = x[i].qs;
21612161
const uint8_t * restrict p1 = y[i].qs;
21622162

2163+
int sumi = 0;
21632164
for (int j = 0; j < QK/2; j++) {
21642165
const uint8_t v0 = p0[j];
21652166
const uint8_t v1 = p1[j];
21662167

2167-
const float f0 = d0*((int8_t) (v0 & 0xf) - 8);
2168-
const float f1 = d0*((int8_t) (v0 >> 4) - 8);
2168+
const int8_t i0 = (int8_t) (v0 & 0xf) - 8;
2169+
const int8_t i1 = (int8_t) (v0 >> 4) - 8;
21692170

2170-
const float f2 = d1*((int8_t) (v1 & 0xf) - 8);
2171-
const float f3 = d1*((int8_t) (v1 >> 4) - 8);
2171+
const int8_t i2 = (int8_t) (v1 & 0xf) - 8;
2172+
const int8_t i3 = (int8_t) (v1 >> 4) - 8;
21722173

2173-
sumf += f0*f2 + f1*f3;
2174+
sumi += i0*i2 + i1*i3;
21742175
}
2176+
sumf += d0 * d1 * sumi;
21752177
}
21762178
#endif
21772179

0 commit comments

Comments
 (0)