Skip to content

Commit 81b1af6

Browse files
jancieskoJan Cieskocaugonnet
authored
cudastf (examples): Fix compiler errors when enabling examples for CUDA STF (#3516)
* Make index types consistent in loops * Add implementation of missing operator Co-authored-by: Jan Ciesko <jciesko@gmail.com> Co-authored-by: Cédric Augonnet <158148890+caugonnet@users.noreply.github.com>
1 parent abfb7b4 commit 81b1af6

File tree

6 files changed

+98
-90
lines changed

6 files changed

+98
-90
lines changed

cudax/examples/stf/linear_algebra/06-pdgemm.cu

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ public:
160160
{
161161
nvtxRangePushA("FILL");
162162
// Fill blocks by blocks
163-
for (int colb = 0; colb < nt; colb++)
163+
for (size_t colb = 0; colb < nt; colb++)
164164
{
165-
for (int rowb = 0; rowb < mt; rowb++)
165+
for (size_t rowb = 0; rowb < mt; rowb++)
166166
{
167167
// Each task fills a block
168168
auto& h = get_handle(rowb, colb);
@@ -251,9 +251,9 @@ void PDGEMM(stream_ctx& ctx,
251251
double beta,
252252
matrix<double>& C)
253253
{
254-
for (int m = 0; m < C.mt; m++)
254+
for (size_t m = 0; m < C.mt; m++)
255255
{
256-
for (int n = 0; n < C.nt; n++)
256+
for (size_t n = 0; n < C.nt; n++)
257257
{
258258
//=========================================
259259
// alpha*A*B does not contribute; scale C
@@ -271,7 +271,7 @@ void PDGEMM(stream_ctx& ctx,
271271
if (transb == CUBLAS_OP_N)
272272
{
273273
assert(A.nt == B.mt);
274-
for (int k = 0; k < A.nt; k++)
274+
for (size_t k = 0; k < A.nt; k++)
275275
{
276276
double zbeta = k == 0 ? beta : 1.0;
277277
DGEMM(ctx, transa, transb, alpha, A, m, k, B, k, n, zbeta, C, m, n);
@@ -282,7 +282,7 @@ void PDGEMM(stream_ctx& ctx,
282282
//=====================================
283283
else
284284
{
285-
for (int k = 0; k < A.nt; k++)
285+
for (size_t k = 0; k < A.nt; k++)
286286
{
287287
double zbeta = k == 0 ? beta : 1.0;
288288
DGEMM(ctx, transa, transb, alpha, A, m, k, B, n, k, zbeta, C, m, n);
@@ -296,7 +296,7 @@ void PDGEMM(stream_ctx& ctx,
296296
//=====================================
297297
if (transb == CUBLAS_OP_N)
298298
{
299-
for (int k = 0; k < A.mt; k++)
299+
for (size_t k = 0; k < A.mt; k++)
300300
{
301301
double zbeta = k == 0 ? beta : 1.0;
302302
DGEMM(ctx, transa, transb, alpha, A, k, m, B, k, n, zbeta, C, m, n);
@@ -307,7 +307,7 @@ void PDGEMM(stream_ctx& ctx,
307307
//==========================================
308308
else
309309
{
310-
for (int k = 0; k < A.mt; k++)
310+
for (size_t k = 0; k < A.mt; k++)
311311
{
312312
double zbeta = k == 0 ? beta : 1.0;
313313
DGEMM(ctx, transa, transb, alpha, A, k, m, B, n, k, zbeta, C, m, n);
@@ -328,14 +328,14 @@ void run(stream_ctx& ctx, size_t N, size_t NB)
328328
cuda_safe_call(cudaGetDeviceCount(&ndevs));
329329

330330
/* Warm up allocators */
331-
for (size_t d = 0; d < ndevs; d++)
331+
for (int d = 0; d < ndevs; d++)
332332
{
333333
auto lX = ctx.logical_data(shape_of<slice<double>>(1));
334334
ctx.parallel_for(exec_place::device(d), lX.shape(), lX.write())->*[] _CCCL_DEVICE(size_t, auto) {};
335335
}
336336

337337
/* Initializes CUBLAS on all devices */
338-
for (size_t d = 0; d < ndevs; d++)
338+
for (int d = 0; d < ndevs; d++)
339339
{
340340
cuda_safe_call(cudaSetDevice(d));
341341
get_cublas_handle();

cudax/examples/stf/linear_algebra/07-cholesky.cu

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ public:
9191

9292
handles.resize(mt * nt);
9393

94-
for (int colb = 0; colb < nt; colb++)
94+
for (size_t colb = 0; colb < nt; colb++)
9595
{
9696
int low_rowb = sym_matrix ? colb : 0;
97-
for (int rowb = low_rowb; rowb < mt; rowb++)
97+
for (size_t rowb = low_rowb; rowb < mt; rowb++)
9898
{
9999
T* addr_h = get_block_h(rowb, colb);
100100
auto& h = handle(rowb, colb);
@@ -171,10 +171,10 @@ public:
171171
{
172172
nvtxRangePushA("FILL");
173173
// Fill blocks by blocks
174-
for (int colb = 0; colb < nt; colb++)
174+
for (size_t colb = 0; colb < nt; colb++)
175175
{
176176
int low_rowb = sym_matrix ? colb : 0;
177-
for (int rowb = low_rowb; rowb < mt; rowb++)
177+
for (size_t rowb = low_rowb; rowb < mt; rowb++)
178178
{
179179
// Each task fills a block
180180
auto& h = handle(rowb, colb);
@@ -363,9 +363,9 @@ void PDNRM2_HOST(matrix<double>* A, double* result)
363363
reserved::dot::set_current_color("red");
364364
#endif
365365

366-
for (int rowb = 0; rowb < A->mt; rowb++)
366+
for (size_t rowb = 0; rowb < A->mt; rowb++)
367367
{
368-
for (int colb = 0; colb < A->nt; colb++)
368+
for (size_t colb = 0; colb < A->nt; colb++)
369369
{
370370
ctx.host_launch(A->handle(rowb, colb).read())->*[=](auto sA) {
371371
double res2 = 0.0;
@@ -452,17 +452,17 @@ void PDTRSM(cublasSideMode_t side,
452452
//===========================================
453453
if (trans == CUBLAS_OP_N)
454454
{
455-
for (int k = 0; k < B.mt; k++)
455+
for (size_t k = 0; k < B.mt; k++)
456456
{
457457
double lalpha = k == 0 ? alpha : 1.0;
458-
for (int n = 0; n < B.nt; n++)
458+
for (size_t n = 0; n < B.nt; n++)
459459
{
460460
cuda_safe_call(cudaSetDevice(A.get_preferred_devid(k, k)));
461461
DTRSM(side, uplo, trans, diag, lalpha, A, k, k, B, k, n);
462462
}
463-
for (int m = k + 1; m < B.mt; m++)
463+
for (size_t m = k + 1; m < B.mt; m++)
464464
{
465-
for (int n = 0; n < B.nt; n++)
465+
for (size_t n = 0; n < B.nt; n++)
466466
{
467467
cuda_safe_call(cudaSetDevice(A.get_preferred_devid(m, k)));
468468
DGEMM(CUBLAS_OP_N, CUBLAS_OP_N, -1.0, A, m, k, B, k, n, lalpha, B, m, n);
@@ -475,17 +475,17 @@ void PDTRSM(cublasSideMode_t side,
475475
//================================================
476476
else
477477
{
478-
for (int k = 0; k < B.mt; k++)
478+
for (size_t k = 0; k < B.mt; k++)
479479
{
480480
double lalpha = k == 0 ? alpha : 1.0;
481-
for (int n = 0; n < B.nt; n++)
481+
for (size_t n = 0; n < B.nt; n++)
482482
{
483483
cuda_safe_call(cudaSetDevice(A.get_preferred_devid(B.mt - k - 1, B.mt - k - 1)));
484484
DTRSM(side, uplo, trans, diag, lalpha, A, B.mt - k - 1, B.mt - k - 1, B, B.mt - k - 1, n);
485485
}
486-
for (int m = k + 1; m < B.mt; m++)
486+
for (size_t m = k + 1; m < B.mt; m++)
487487
{
488-
for (int n = 0; n < B.nt; n++)
488+
for (size_t n = 0; n < B.nt; n++)
489489
{
490490
cuda_safe_call(cudaSetDevice(A.get_preferred_devid(B.mt - k - 1, B.mt - 1 - m)));
491491
DGEMM(
@@ -543,9 +543,9 @@ void PDGEMM(cublasOperation_t transa,
543543
reserved::dot::set_current_color("blue");
544544
#endif
545545

546-
for (int m = 0; m < C.mt; m++)
546+
for (size_t m = 0; m < C.mt; m++)
547547
{
548-
for (int n = 0; n < C.nt; n++)
548+
for (size_t n = 0; n < C.nt; n++)
549549
{
550550
//=========================================
551551
// alpha*A*B does not contribute; scale C
@@ -562,7 +562,7 @@ void PDGEMM(cublasOperation_t transa,
562562
//================================
563563
if (transb == CUBLAS_OP_N)
564564
{
565-
for (int k = 0; k < A.nt; k++)
565+
for (size_t k = 0; k < A.nt; k++)
566566
{
567567
double zbeta = k == 0 ? beta : 1.0;
568568
DGEMM(transa, transb, alpha, A, m, k, B, k, n, zbeta, C, m, n);
@@ -573,7 +573,7 @@ void PDGEMM(cublasOperation_t transa,
573573
//=====================================
574574
else
575575
{
576-
for (int k = 0; k < A.nt; k++)
576+
for (size_t k = 0; k < A.nt; k++)
577577
{
578578
double zbeta = k == 0 ? beta : 1.0;
579579
DGEMM(transa, transb, alpha, A, m, k, B, n, k, zbeta, C, m, n);
@@ -587,7 +587,7 @@ void PDGEMM(cublasOperation_t transa,
587587
//=====================================
588588
if (transb == CUBLAS_OP_N)
589589
{
590-
for (int k = 0; k < A.mt; k++)
590+
for (size_t k = 0; k < A.mt; k++)
591591
{
592592
double zbeta = k == 0 ? beta : 1.0;
593593
DGEMM(transa, transb, alpha, A, k, m, B, k, n, zbeta, C, m, n);
@@ -598,7 +598,7 @@ void PDGEMM(cublasOperation_t transa,
598598
//==========================================
599599
else
600600
{
601-
for (int k = 0; k < A.mt; k++)
601+
for (size_t k = 0; k < A.mt; k++)
602602
{
603603
double zbeta = k == 0 ? beta : 1.0;
604604
DGEMM(transa, transb, alpha, A, k, m, B, n, k, zbeta, C, m, n);
@@ -640,7 +640,7 @@ int main(int argc, char** argv)
640640
int ndevs;
641641
cuda_safe_call(cudaGetDeviceCount(&ndevs));
642642

643-
for (size_t d = 0; d < ndevs; d++)
643+
for (int d = 0; d < ndevs; d++)
644644
{
645645
auto lX = ctx.logical_data(shape_of<slice<double>>(1));
646646
ctx.parallel_for(exec_place::device(d), lX.shape(), lX.write())->*[] _CCCL_DEVICE(size_t, auto) {};

0 commit comments

Comments
 (0)