Skip to content

Commit

Permalink
Merge pull request #5071 from annop-w/sgemm_throttling
Browse files Browse the repository at this point in the history
Add thread throttling profile for SGEMM on NEOVERSEV1
  • Loading branch information
martin-frbg authored Jan 23, 2025
2 parents 9f2319b + c8cd8da commit a54f9a9
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,6 @@ In chronological order:

* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE

* Annop Wongwathanarat <annop.wongwathanarat@arm.com>
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
57 changes: 47 additions & 10 deletions interface/gemm.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*********************************************************************/
/* Copyright 2024 The OpenBLAS Project */
/* Copyright 2024, 2025 The OpenBLAS Project */
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
Expand Down Expand Up @@ -177,6 +177,49 @@ static int init_amxtile_permission() {
}
#endif

#ifdef DYNAMIC_ARCH
extern char* gotoblas_corename(void);
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
return
MNK < 262144L ? 1
: MNK < 1124864L ? MIN(ncpu, 6)
: MNK < 7880599L ? MIN(ncpu, 12)
: MNK < 17173512L ? MIN(ncpu, 16)
: MNK < 33386248L ? MIN(ncpu, 20)
: MNK < 57066625L ? MIN(ncpu, 24)
: MNK < 91733851L ? MIN(ncpu, 32)
: MNK < 265847707L ? MIN(ncpu, 40)
: MNK < 458314011L ? MIN(ncpu, 48)
: MNK < 729000000L ? MIN(ncpu, 56)
: ncpu;
}
#endif

static inline int get_gemm_optimal_nthreads(double MNK) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
}
#endif
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
return 1;
}
else {
if (MNK/ncpu < SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD) {
return MNK/(SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD);
}
else {
return ncpu;
}
}
}

#ifndef CBLAS

void NAME(char *TRANSA, char *TRANSB,
Expand Down Expand Up @@ -310,7 +353,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
FLOAT *beta = (FLOAT*) vbeta;
FLOAT *a = (FLOAT*) va;
FLOAT *b = (FLOAT*) vb;
FLOAT *c = (FLOAT*) vc;
FLOAT *c = (FLOAT*) vc;
#endif

blas_arg_t args;
Expand Down Expand Up @@ -352,7 +395,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && defined(USE_SGEMM_KERNEL_DIRECT)
#ifdef DYNAMIC_ARCH
if (support_avx512() )
#endif
#endif
if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans && SGEMM_DIRECT_PERFORMANT(m,n,k)) {
SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc);
return;
Expand Down Expand Up @@ -604,13 +647,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
#endif

MNK = (double) args.m * (double) args.n * (double) args.k;
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) )
args.nthreads = 1;
else {
args.nthreads = num_cpu_avail(3);
if (MNK/args.nthreads < SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD)
args.nthreads = MNK/(SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD);
}
args.nthreads = get_gemm_optimal_nthreads(MNK);

args.common = NULL;

Expand Down

0 comments on commit a54f9a9

Please sign in to comment.