Skip to content

Commit

Permalink
Merge pull request #5091 from goplanid/develop
Browse files Browse the repository at this point in the history
Small gemm kernel improvements for AArch64
  • Loading branch information
martin-frbg authored Jan 24, 2025
2 parents a54f9a9 + d1bfa97 commit 876ba58
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion kernel/arm64/dgemm_small_kernel_tn_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ CNAME(BLASLONG M,
const BLASLONG n2 = N & -2;
const BLASLONG n8 = N & -8;

const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
FLOAT* packed_a =
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;

Expand Down
2 changes: 1 addition & 1 deletion kernel/arm64/dgemm_small_kernel_tt_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ CNAME(BLASLONG M,
const BLASLONG n4 = N & -4;
const BLASLONG n2 = N & -2;

const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
FLOAT* packed_a =
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;

Expand Down
2 changes: 1 addition & 1 deletion kernel/arm64/sgemm_small_kernel_tn_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ CNAME(BLASLONG M,
const BLASLONG n8 = N & -8;
const BLASLONG n4 = N & -4;

const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
FLOAT* packed_a =
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;

Expand Down
2 changes: 1 addition & 1 deletion kernel/arm64/sgemm_small_kernel_tt_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ CNAME(BLASLONG M,
const BLASLONG n8 = N & -8;
const BLASLONG n4 = N & -4;

const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
FLOAT* packed_a =
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;

Expand Down

0 comments on commit 876ba58

Please sign in to comment.