-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathdgemm_blocked.c
55 lines (49 loc) · 1.51 KB
/
dgemm_blocked.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
const char* dgemm_desc = "Simple blocked dgemm.";
#ifndef BLOCK_SIZE
#define BLOCK_SIZE ((int) 16)
#endif
/*
A is M-by-K
B is K-by-N
C is M-by-N
lda is the leading dimension of the matrix (the M of square_dgemm).
*/
void basic_dgemm(const int lda, const int M, const int N, const int K,
const double *A, const double *B, double *C)
{
int i, j, k;
for (i = 0; i < M; ++i) {
for (j = 0; j < N; ++j) {
double cij = C[j*lda+i];
for (k = 0; k < K; ++k) {
cij += A[k*lda+i] * B[j*lda+k];
}
C[j*lda+i] = cij;
}
}
}
void do_block(const int lda,
const double *A, const double *B, double *C,
const int i, const int j, const int k)
{
const int M = (i+BLOCK_SIZE > lda? lda-i : BLOCK_SIZE);
const int N = (j+BLOCK_SIZE > lda? lda-j : BLOCK_SIZE);
const int K = (k+BLOCK_SIZE > lda? lda-k : BLOCK_SIZE);
basic_dgemm(lda, M, N, K,
A + i + k*lda, B + k + j*lda, C + i + j*lda);
}
void square_dgemm(const int M, const double *A, const double *B, double *C)
{
const int n_blocks = M / BLOCK_SIZE + (M%BLOCK_SIZE? 1 : 0);
int bi, bj, bk;
for (bi = 0; bi < n_blocks; ++bi) {
const int i = bi * BLOCK_SIZE;
for (bj = 0; bj < n_blocks; ++bj) {
const int j = bj * BLOCK_SIZE;
for (bk = 0; bk < n_blocks; ++bk) {
const int k = bk * BLOCK_SIZE;
do_block(M, A, B, C, i, j, k);
}
}
}
}