From aa9066a26432fc8f0b1d67e107e79b885742d88e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Wed, 19 Jul 2023 12:53:54 +0200 Subject: [PATCH 01/10] Add xGEMMT and their test cases The xGEMMT subroutines are added in the Fortran API. The tests are updated as well. --- BLAS/SRC/CMakeLists.txt | 8 +- BLAS/SRC/Makefile | 8 +- BLAS/SRC/cgemmt.f | 570 ++++++++++++++++++++++++++++++++ BLAS/SRC/dgemmt.f | 432 ++++++++++++++++++++++++ BLAS/SRC/sgemmt.f | 432 ++++++++++++++++++++++++ BLAS/SRC/zgemmt.f | 570 ++++++++++++++++++++++++++++++++ BLAS/TESTING/cblat3.f | 706 ++++++++++++++++++++++++++++++++++++++- BLAS/TESTING/cblat3.in | 1 + BLAS/TESTING/dblat3.f | 518 ++++++++++++++++++++++++++++- BLAS/TESTING/dblat3.in | 1 + BLAS/TESTING/sblat3.f | 518 ++++++++++++++++++++++++++++- BLAS/TESTING/sblat3.in | 1 + BLAS/TESTING/zblat3.f | 714 +++++++++++++++++++++++++++++++++++++++- BLAS/TESTING/zblat3.in | 1 + 14 files changed, 4418 insertions(+), 62 deletions(-) create mode 100644 BLAS/SRC/cgemmt.f create mode 100644 BLAS/SRC/dgemmt.f create mode 100644 BLAS/SRC/sgemmt.f create mode 100644 BLAS/SRC/zgemmt.f diff --git a/BLAS/SRC/CMakeLists.txt b/BLAS/SRC/CMakeLists.txt index c92bd2bad9..3bdaad3a2e 100644 --- a/BLAS/SRC/CMakeLists.txt +++ b/BLAS/SRC/CMakeLists.txt @@ -82,15 +82,15 @@ set(ZBLAS2 zgemv.f zgbmv.f zhemv.f zhbmv.f zhpmv.f #--------------------------------------------------------- # Level 3 BLAS #--------------------------------------------------------- -set(SBLAS3 sgemm.f ssymm.f ssyrk.f ssyr2k.f strmm.f strsm.f) +set(SBLAS3 sgemm.f ssymm.f ssyrk.f ssyr2k.f strmm.f strsm.f sgemmt.f) set(CBLAS3 cgemm.f csymm.f csyrk.f csyr2k.f ctrmm.f ctrsm.f - chemm.f cherk.f cher2k.f) + chemm.f cherk.f cher2k.f cgemmt.f) -set(DBLAS3 dgemm.f dsymm.f dsyrk.f dsyr2k.f dtrmm.f dtrsm.f) +set(DBLAS3 dgemm.f dsymm.f dsyrk.f dsyr2k.f dtrmm.f dtrsm.f dgemmt.f) set(ZBLAS3 zgemm.f zsymm.f zsyrk.f zsyr2k.f ztrmm.f ztrsm.f - zhemm.f zherk.f zher2k.f) + zhemm.f zherk.f zher2k.f zgemmt.f) set(SOURCES) diff --git a/BLAS/SRC/Makefile b/BLAS/SRC/Makefile index 70534c8358..145f40ff42 100644 --- a/BLAS/SRC/Makefile +++ b/BLAS/SRC/Makefile @@ -127,18 +127,18 @@ $(ZBLAS2): $(FRC) # Comment out the next 4 definitions if you already have # the Level 3 BLAS. #--------------------------------------------------------- -SBLAS3 = sgemm.o ssymm.o ssyrk.o ssyr2k.o strmm.o strsm.o +SBLAS3 = sgemm.o ssymm.o ssyrk.o ssyr2k.o strmm.o strsm.o sgemmt.o $(SBLAS3): $(FRC) CBLAS3 = cgemm.o csymm.o csyrk.o csyr2k.o ctrmm.o ctrsm.o \ - chemm.o cherk.o cher2k.o + chemm.o cherk.o cher2k.o cgemmt.o $(CBLAS3): $(FRC) -DBLAS3 = dgemm.o dsymm.o dsyrk.o dsyr2k.o dtrmm.o dtrsm.o +DBLAS3 = dgemm.o dsymm.o dsyrk.o dsyr2k.o dtrmm.o dtrsm.o dgemmt.o $(DBLAS3): $(FRC) ZBLAS3 = zgemm.o zsymm.o zsyrk.o zsyr2k.o ztrmm.o ztrsm.o \ - zhemm.o zherk.o zher2k.o + zhemm.o zherk.o zher2k.o zgemmt.o $(ZBLAS3): $(FRC) ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ diff --git a/BLAS/SRC/cgemmt.f b/BLAS/SRC/cgemmt.f new file mode 100644 index 0000000000..e6071a345b --- /dev/null +++ b/BLAS/SRC/cgemmt.f @@ -0,0 +1,570 @@ +*> \brief \b CGEMMT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB,BETA, +* C,LDC) +* +* .. Scalar Arguments .. +* COMPLEX ALPHA,BETA +* INTEGER K,LDA,LDB,LDC,N +* CHARACTER TRANSA,TRANSB, UPLO +* .. +* .. Array Arguments .. +* COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEMMT performs one of the matrix-matrix operations +*> +*> C := alpha*op( A )*op( B ) + beta*C, +*> +*> where op( X ) is one of +*> +*> op( X ) = X or op( X ) = X**T, +*> +*> alpha and beta are scalars, and A, B and C are matrices, with op( A ) +*> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. +*> Thereby, the routine only accesses and updates the upper or lower +*> triangular part of the result matrix C. This behaviour can be used, +*> the resulting matrix C is known to be symmetric. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the lower or the upper +*> triangular part of C is access and updated. +*> +*> UPLO = 'L' or 'l', the lower tringular part of C is used. +*> +*> UPLO = 'U' or 'u', the upper tringular part of C is used. +*> \endverbatim +* +*> \param[in] TRANSA +*> \verbatim +*> TRANSA is CHARACTER*1 +*> On entry, TRANSA specifies the form of op( A ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSA = 'N' or 'n', op( A ) = A. +*> +*> TRANSA = 'T' or 't', op( A ) = A**T. +*> +*> TRANSA = 'C' or 'c', op( A ) = A**H. +*> \endverbatim +*> +*> \param[in] TRANSB +*> \verbatim +*> TRANSB is CHARACTER*1 +*> On entry, TRANSB specifies the form of op( B ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSB = 'N' or 'n', op( B ) = B. +*> +*> TRANSB = 'T' or 't', op( B ) = B**T. +*> +*> TRANSB = 'C' or 'c', op( B ) = B**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of rows and columns of +*> the matrix C, the number of columns of op(B) and the number +*> of rows of op(A). N must be at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry, K specifies the number of columns of the matrix +*> op( A ) and the number of rows of the matrix op( B ). K must +*> be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is +*> k when TRANSA = 'N' or 'n', and is n otherwise. +*> Before entry with TRANSA = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by m part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANSA = 'N' or 'n' then +*> LDA must be at least max( 1, n ), otherwise LDA must be at +*> least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension ( LDB, kb ), where kb is +*> n when TRANSB = 'N' or 'n', and is k otherwise. +*> Before entry with TRANSB = 'N' or 'n', the leading k by n +*> part of the array B must contain the matrix B, otherwise +*> the leading n by k part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANSB = 'N' or 'n' then +*> LDB must be at least max( 1, k ), otherwise LDB must be at +*> least max( 1, n ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX. +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then C need not be set on input. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension ( LDC, N ) +*> Before entry, the leading n by n part of the array C must +*> contain the matrix C, except when beta is zero, in which +*> case C need not be set on entry. +*> On exit, the upper or lower trinangular part of the matrix +*> C is overwritten by the n by n matrix +*> ( alpha*op( A )*op( B ) + beta*C ). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Martin Koehler +* +*> \ingroup gemm +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 19-July-2023. +*> Martin Koehler, MPI Magdeburg +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB, + + BETA,C,LDC) + IMPLICIT NONE +* +* -- Reference BLAS level3 routine -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + COMPLEX ALPHA,BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANSA,TRANSB,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,J,L,NROWA,NROWB,ISTART, ISTOP + LOGICAL CONJA,CONJB,NOTA,NOTB,UPPER +* .. +* .. Parameters .. + COMPLEX ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA and NROWB as the number of rows of A and B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + CONJA = LSAME(TRANSA,'C') + CONJB = LSAME(TRANSB,'C') + IF (NOTA) THEN + NROWA = N + ELSE + NROWA = K + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF + UPPER = LSAME(UPLO, 'U') + +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT. UPPER) .AND. (.NOT. LSAME(UPLO, 'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 2 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CGEMMT',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 10 I = ISTART, ISTOP + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + DO 30 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + IF (BETA.EQ.ZERO) THEN + DO 50 I = ISTART, ISTOP + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + TEMP = ALPHA*B(L,J) + DO 70 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + ELSE IF (CONJA) THEN +* +* Form C := alpha*A**H*B + beta*C. +* + DO 120 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 110 I = ISTART, ISTOP + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A**T*B + beta*C +* + DO 150 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 140 I = ISTART, ISTOP + TEMP = ZERO + DO 130 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 130 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF (NOTA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A*B**H + beta*C. +* + DO 200 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + IF (BETA.EQ.ZERO) THEN + DO 160 I = ISTART,ISTOP + C(I,J) = ZERO + 160 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 170 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 170 CONTINUE + END IF + DO 190 L = 1,K + TEMP = ALPHA*CONJG(B(J,L)) + DO 180 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 180 CONTINUE + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B**T + beta*C +* + DO 250 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + IF (BETA.EQ.ZERO) THEN + DO 210 I = ISTART, ISTOP + C(I,J) = ZERO + 210 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 220 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 220 CONTINUE + END IF + DO 240 L = 1,K + TEMP = ALPHA*B(J,L) + DO 230 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 230 CONTINUE + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF (CONJA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A**H*B**H + beta*C. +* + DO 280 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 270 I = ISTART, ISTOP + TEMP = ZERO + DO 260 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L)) + 260 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*A**H*B**T + beta*C +* + DO 310 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 300 I = ISTART, ISTOP + TEMP = ZERO + DO 290 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*B(J,L) + 290 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF (CONJB) THEN +* +* Form C := alpha*A**T*B**H + beta*C +* + DO 340 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 330 I = ISTART, ISTOP + TEMP = ZERO + DO 320 L = 1,K + TEMP = TEMP + A(L,I)*CONJG(B(J,L)) + 320 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A**T*B**T + beta*C +* + DO 370 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 360 I = ISTART, ISTOP + TEMP = ZERO + DO 350 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 350 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMMT +* + END diff --git a/BLAS/SRC/dgemmt.f b/BLAS/SRC/dgemmt.f new file mode 100644 index 0000000000..718fafb17f --- /dev/null +++ b/BLAS/SRC/dgemmt.f @@ -0,0 +1,432 @@ +*> \brief \b DGEMMT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB,BETA, +* C,LDC) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA,BETA +* INTEGER K,LDA,LDB,LDC,N +* CHARACTER TRANSA,TRANSB, UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEMMT performs one of the matrix-matrix operations +*> +*> C := alpha*op( A )*op( B ) + beta*C, +*> +*> where op( X ) is one of +*> +*> op( X ) = X or op( X ) = X**T, +*> +*> alpha and beta are scalars, and A, B and C are matrices, with op( A ) +*> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. +*> Thereby, the routine only accesses and updates the upper or lower +*> triangular part of the result matrix C. This behaviour can be used, +*> the resulting matrix C is known to be symmetric. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the lower or the upper +*> triangular part of C is access and updated. +*> +*> UPLO = 'L' or 'l', the lower tringular part of C is used. +*> +*> UPLO = 'U' or 'u', the upper tringular part of C is used. +*> \endverbatim +* +*> \param[in] TRANSA +*> \verbatim +*> TRANSA is CHARACTER*1 +*> On entry, TRANSA specifies the form of op( A ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSA = 'N' or 'n', op( A ) = A. +*> +*> TRANSA = 'T' or 't', op( A ) = A**T. +*> +*> TRANSA = 'C' or 'c', op( A ) = A**T. +*> \endverbatim +*> +*> \param[in] TRANSB +*> \verbatim +*> TRANSB is CHARACTER*1 +*> On entry, TRANSB specifies the form of op( B ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSB = 'N' or 'n', op( B ) = B. +*> +*> TRANSB = 'T' or 't', op( B ) = B**T. +*> +*> TRANSB = 'C' or 'c', op( B ) = B**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of rows and columns of +*> the matrix C, the number of columns of op(B) and the number +*> of rows of op(A). N must be at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry, K specifies the number of columns of the matrix +*> op( A ) and the number of rows of the matrix op( B ). K must +*> be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is +*> k when TRANSA = 'N' or 'n', and is n otherwise. +*> Before entry with TRANSA = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by m part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANSA = 'N' or 'n' then +*> LDA must be at least max( 1, n ), otherwise LDA must be at +*> least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is +*> n when TRANSB = 'N' or 'n', and is k otherwise. +*> Before entry with TRANSB = 'N' or 'n', the leading k by n +*> part of the array B must contain the matrix B, otherwise +*> the leading n by k part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANSB = 'N' or 'n' then +*> LDB must be at least max( 1, k ), otherwise LDB must be at +*> least max( 1, n ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION. +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then C need not be set on input. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension ( LDC, N ) +*> Before entry, the leading n by n part of the array C must +*> contain the matrix C, except when beta is zero, in which +*> case C need not be set on entry. +*> On exit, the upper or lower trinangular part of the matrix +*> C is overwritten by the n by n matrix +*> ( alpha*op( A )*op( B ) + beta*C ). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Martin Koehler +* +*> \ingroup gemm +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 19-July-2023. +*> Martin Koehler, MPI Magdeburg +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB, + + BETA,C,LDC) + IMPLICIT NONE +* +* -- Reference BLAS level3 routine -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA,BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANSA,TRANSB,UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I,INFO,J,L,NROWA,NROWB, ISTART, ISTOP + LOGICAL NOTA,NOTB, UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA and NROWB as the number of rows of A +* and B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + IF (NOTA) THEN + NROWA = N + ELSE + NROWA = K + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF + UPPER = LSAME(UPLO, 'U') +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT. UPPER) .AND. (.NOT. LSAME(UPLO, 'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 2 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('DGEMMT',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And if alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 10 I = ISTART, ISTOP + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 30 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + IF (BETA.EQ.ZERO) THEN + DO 50 I = ISTART, ISTOP + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + TEMP = ALPHA*B(L,J) + DO 70 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A**T*B + beta*C +* + DO 120 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 110 I = ISTART, ISTOP + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF (NOTA) THEN +* +* Form C := alpha*A*B**T + beta*C +* + DO 170 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + IF (BETA.EQ.ZERO) THEN + DO 130 I = ISTART,ISTOP + C(I,J) = ZERO + 130 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 140 I = ISTART,ISTOP + C(I,J) = BETA*C(I,J) + 140 CONTINUE + END IF + DO 160 L = 1,K + TEMP = ALPHA*B(J,L) + DO 150 I = ISTART,ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A**T*B**T + beta*C +* + DO 200 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 190 I = ISTART, ISTOP + TEMP = ZERO + DO 180 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 180 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEMM +* + END diff --git a/BLAS/SRC/sgemmt.f b/BLAS/SRC/sgemmt.f new file mode 100644 index 0000000000..3875e63664 --- /dev/null +++ b/BLAS/SRC/sgemmt.f @@ -0,0 +1,432 @@ +*> \brief \b SGEMMT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB,BETA, +* C,LDC) +* +* .. Scalar Arguments .. +* REAL ALPHA,BETA +* INTEGER K,LDA,LDB,LDC,N +* CHARACTER TRANSA,TRANSB, UPLO +* .. +* .. Array Arguments .. +* REAL A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEMMT performs one of the matrix-matrix operations +*> +*> C := alpha*op( A )*op( B ) + beta*C, +*> +*> where op( X ) is one of +*> +*> op( X ) = X or op( X ) = X**T, +*> +*> alpha and beta are scalars, and A, B and C are matrices, with op( A ) +*> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. +*> Thereby, the routine only accesses and updates the upper or lower +*> triangular part of the result matrix C. This behaviour can be used, +*> the resulting matrix C is known to be symmetric. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the lower or the upper +*> triangular part of C is access and updated. +*> +*> UPLO = 'L' or 'l', the lower tringular part of C is used. +*> +*> UPLO = 'U' or 'u', the upper tringular part of C is used. +*> \endverbatim +* +*> \param[in] TRANSA +*> \verbatim +*> TRANSA is CHARACTER*1 +*> On entry, TRANSA specifies the form of op( A ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSA = 'N' or 'n', op( A ) = A. +*> +*> TRANSA = 'T' or 't', op( A ) = A**T. +*> +*> TRANSA = 'C' or 'c', op( A ) = A**T. +*> \endverbatim +*> +*> \param[in] TRANSB +*> \verbatim +*> TRANSB is CHARACTER*1 +*> On entry, TRANSB specifies the form of op( B ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSB = 'N' or 'n', op( B ) = B. +*> +*> TRANSB = 'T' or 't', op( B ) = B**T. +*> +*> TRANSB = 'C' or 'c', op( B ) = B**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of rows and columns of +*> the matrix C, the number of columns of op(B) and the number +*> of rows of op(A). N must be at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry, K specifies the number of columns of the matrix +*> op( A ) and the number of rows of the matrix op( B ). K must +*> be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension ( LDA, ka ), where ka is +*> k when TRANSA = 'N' or 'n', and is n otherwise. +*> Before entry with TRANSA = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by m part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANSA = 'N' or 'n' then +*> LDA must be at least max( 1, n ), otherwise LDA must be at +*> least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension ( LDB, kb ), where kb is +*> n when TRANSB = 'N' or 'n', and is k otherwise. +*> Before entry with TRANSB = 'N' or 'n', the leading k by n +*> part of the array B must contain the matrix B, otherwise +*> the leading n by k part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANSB = 'N' or 'n' then +*> LDB must be at least max( 1, k ), otherwise LDB must be at +*> least max( 1, n ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL. +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then C need not be set on input. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension ( LDC, N ) +*> Before entry, the leading n by n part of the array C must +*> contain the matrix C, except when beta is zero, in which +*> case C need not be set on entry. +*> On exit, the upper or lower trinangular part of the matrix +*> C is overwritten by the n by n matrix +*> ( alpha*op( A )*op( B ) + beta*C ). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Martin Koehler +* +*> \ingroup gemm +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 19-July-2023. +*> Martin Koehler, MPI Magdeburg +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB, + + BETA,C,LDC) + IMPLICIT NONE +* +* -- Reference BLAS level3 routine -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + REAL ALPHA,BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANSA,TRANSB,UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,J,L,NROWA,NROWB, ISTART, ISTOP + LOGICAL NOTA,NOTB, UPPER +* .. +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA and NROWB as the number of rows of A +* and B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + IF (NOTA) THEN + NROWA = N + ELSE + NROWA = K + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF + UPPER = LSAME(UPLO, 'U') +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT. UPPER) .AND. (.NOT. LSAME(UPLO, 'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 2 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('SGEMMT',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And if alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 10 I = ISTART, ISTOP + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 30 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + IF (BETA.EQ.ZERO) THEN + DO 50 I = ISTART, ISTOP + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + TEMP = ALPHA*B(L,J) + DO 70 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A**T*B + beta*C +* + DO 120 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 110 I = ISTART, ISTOP + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF (NOTA) THEN +* +* Form C := alpha*A*B**T + beta*C +* + DO 170 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + IF (BETA.EQ.ZERO) THEN + DO 130 I = ISTART,ISTOP + C(I,J) = ZERO + 130 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 140 I = ISTART,ISTOP + C(I,J) = BETA*C(I,J) + 140 CONTINUE + END IF + DO 160 L = 1,K + TEMP = ALPHA*B(J,L) + DO 150 I = ISTART,ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A**T*B**T + beta*C +* + DO 200 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 190 I = ISTART, ISTOP + TEMP = ZERO + DO 180 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 180 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEMMT +* + END diff --git a/BLAS/SRC/zgemmt.f b/BLAS/SRC/zgemmt.f new file mode 100644 index 0000000000..37828abaad --- /dev/null +++ b/BLAS/SRC/zgemmt.f @@ -0,0 +1,570 @@ +*> \brief \b ZGEMMT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB,BETA, +* C,LDC) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA,BETA +* INTEGER K,LDA,LDB,LDC,N +* CHARACTER TRANSA,TRANSB, UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMMT performs one of the matrix-matrix operations +*> +*> C := alpha*op( A )*op( B ) + beta*C, +*> +*> where op( X ) is one of +*> +*> op( X ) = X or op( X ) = X**T, +*> +*> alpha and beta are scalars, and A, B and C are matrices, with op( A ) +*> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. +*> Thereby, the routine only accesses and updates the upper or lower +*> triangular part of the result matrix C. This behaviour can be used, +*> the resulting matrix C is known to be symmetric. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the lower or the upper +*> triangular part of C is access and updated. +*> +*> UPLO = 'L' or 'l', the lower tringular part of C is used. +*> +*> UPLO = 'U' or 'u', the upper tringular part of C is used. +*> \endverbatim +* +*> \param[in] TRANSA +*> \verbatim +*> TRANSA is CHARACTER*1 +*> On entry, TRANSA specifies the form of op( A ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSA = 'N' or 'n', op( A ) = A. +*> +*> TRANSA = 'T' or 't', op( A ) = A**T. +*> +*> TRANSA = 'C' or 'c', op( A ) = A**H. +*> \endverbatim +*> +*> \param[in] TRANSB +*> \verbatim +*> TRANSB is CHARACTER*1 +*> On entry, TRANSB specifies the form of op( B ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSB = 'N' or 'n', op( B ) = B. +*> +*> TRANSB = 'T' or 't', op( B ) = B**T. +*> +*> TRANSB = 'C' or 'c', op( B ) = B**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of rows and columns of +*> the matrix C, the number of columns of op(B) and the number +*> of rows of op(A). N must be at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry, K specifies the number of columns of the matrix +*> op( A ) and the number of rows of the matrix op( B ). K must +*> be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16. +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is +*> k when TRANSA = 'N' or 'n', and is n otherwise. +*> Before entry with TRANSA = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by m part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANSA = 'N' or 'n' then +*> LDA must be at least max( 1, n ), otherwise LDA must be at +*> least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is +*> n when TRANSB = 'N' or 'n', and is k otherwise. +*> Before entry with TRANSB = 'N' or 'n', the leading k by n +*> part of the array B must contain the matrix B, otherwise +*> the leading n by k part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANSB = 'N' or 'n' then +*> LDB must be at least max( 1, k ), otherwise LDB must be at +*> least max( 1, n ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16. +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then C need not be set on input. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension ( LDC, N ) +*> Before entry, the leading n by n part of the array C must +*> contain the matrix C, except when beta is zero, in which +*> case C need not be set on entry. +*> On exit, the upper or lower trinangular part of the matrix +*> C is overwritten by the n by n matrix +*> ( alpha*op( A )*op( B ) + beta*C ). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Martin Koehler +* +*> \ingroup gemm +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 19-July-2023. +*> Martin Koehler, MPI Magdeburg +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB, + + BETA,C,LDC) + IMPLICIT NONE +* +* -- Reference BLAS level3 routine -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA,BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANSA,TRANSB,UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I,INFO,J,L,NROWA,NROWB,ISTART, ISTOP + LOGICAL CONJA,CONJB,NOTA,NOTB,UPPER +* .. +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA and NROWB as the number of rows of A and B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + CONJA = LSAME(TRANSA,'C') + CONJB = LSAME(TRANSB,'C') + IF (NOTA) THEN + NROWA = N + ELSE + NROWA = K + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF + UPPER = LSAME(UPLO, 'U') + +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT. UPPER) .AND. (.NOT. LSAME(UPLO, 'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 2 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZGEMMT',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 10 I = ISTART, ISTOP + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + DO 30 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + IF (BETA.EQ.ZERO) THEN + DO 50 I = ISTART, ISTOP + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + TEMP = ALPHA*B(L,J) + DO 70 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + ELSE IF (CONJA) THEN +* +* Form C := alpha*A**H*B + beta*C. +* + DO 120 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 110 I = ISTART, ISTOP + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A**T*B + beta*C +* + DO 150 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 140 I = ISTART, ISTOP + TEMP = ZERO + DO 130 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 130 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF (NOTA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A*B**H + beta*C. +* + DO 200 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + IF (BETA.EQ.ZERO) THEN + DO 160 I = ISTART,ISTOP + C(I,J) = ZERO + 160 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 170 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 170 CONTINUE + END IF + DO 190 L = 1,K + TEMP = ALPHA*CONJG(B(J,L)) + DO 180 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 180 CONTINUE + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B**T + beta*C +* + DO 250 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + IF (BETA.EQ.ZERO) THEN + DO 210 I = ISTART, ISTOP + C(I,J) = ZERO + 210 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 220 I = ISTART, ISTOP + C(I,J) = BETA*C(I,J) + 220 CONTINUE + END IF + DO 240 L = 1,K + TEMP = ALPHA*B(J,L) + DO 230 I = ISTART, ISTOP + C(I,J) = C(I,J) + TEMP*A(I,L) + 230 CONTINUE + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF (CONJA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A**H*B**H + beta*C. +* + DO 280 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 270 I = ISTART, ISTOP + TEMP = ZERO + DO 260 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L)) + 260 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*A**H*B**T + beta*C +* + DO 310 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 300 I = ISTART, ISTOP + TEMP = ZERO + DO 290 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*B(J,L) + 290 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF (CONJB) THEN +* +* Form C := alpha*A**T*B**H + beta*C +* + DO 340 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 330 I = ISTART, ISTOP + TEMP = ZERO + DO 320 L = 1,K + TEMP = TEMP + A(L,I)*CONJG(B(J,L)) + 320 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A**T*B**T + beta*C +* + DO 370 J = 1,N + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 360 I = ISTART, ISTOP + TEMP = ZERO + DO 350 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 350 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMMT +* + END diff --git a/BLAS/TESTING/cblat3.f b/BLAS/TESTING/cblat3.f index 18adeba6d5..a8cd24c123 100644 --- a/BLAS/TESTING/cblat3.f +++ b/BLAS/TESTING/cblat3.f @@ -19,7 +19,7 @@ *> Test program for the COMPLEX Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records -*> of the file are read using list-directed input, the last 9 records +*> of the file are read using list-directed input, the last 10 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 23 lines: @@ -46,6 +46,7 @@ *> CSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> CHER2K T PUT F FOR NO TEST. SAME COLUMNS. *> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> CGEMMT T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== @@ -93,7 +94,7 @@ PROGRAM CBLAT3 INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS - PARAMETER ( NSUBS = 9 ) + PARAMETER ( NSUBS = 10 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO @@ -127,6 +128,7 @@ PROGRAM CBLAT3 EXTERNAL SDIFF, LCE * .. External Subroutines .. EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CCHKE, CMMCH + EXTERNAL CCHK6 * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. @@ -139,7 +141,7 @@ PROGRAM CBLAT3 * .. Data statements .. DATA SNAMES/'CGEMM ', 'CHEMM ', 'CSYMM ', 'CTRMM ', $ 'CTRSM ', 'CHERK ', 'CSYRK ', 'CHER2K', - $ 'CSYR2K'/ + $ 'CSYR2K', 'CGEMMT'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. @@ -317,7 +319,7 @@ PROGRAM CBLAT3 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, - $ 180, 180 )ISNUM + $ 180, 180, 185 )ISNUM * Test CGEMM, 01. 140 CALL CCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, @@ -346,6 +348,11 @@ PROGRAM CBLAT3 $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 + 185 CALL CCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, + $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, + $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, + $ CC, CS, CT, G ) + * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 @@ -2031,7 +2038,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) RBETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, - $ 90 )ISNUM + $ 90, 100 )ISNUM 10 INFOT = 1 CALL CGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2212,7 +2219,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 13 CALL CGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 20 INFOT = 1 CALL CHEMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2279,7 +2286,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 30 INFOT = 1 CALL CSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2346,7 +2353,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 40 INFOT = 1 CALL CTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2503,7 +2510,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL CTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 50 INFOT = 1 CALL CTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2660,7 +2667,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL CTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 60 INFOT = 1 CALL CHERK( '/', 'N', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2715,7 +2722,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 10 CALL CHERK( 'L', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 70 INFOT = 1 CALL CSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2770,7 +2777,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 10 CALL CSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 80 INFOT = 1 CALL CHER2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2837,7 +2844,7 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL CHER2K( 'L', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 90 INFOT = 1 CALL CSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2904,8 +2911,186 @@ SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL CSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + GO TO 110 + 100 INFOT = 1 + CALL CGEMMT( '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'N', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'N', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'T', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'T', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'T', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'C', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'C', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CGEMMT( '/', 'C', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 2 + CALL CGEMMT( 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGEMMT( 'U', '/', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGEMMT( 'U', '/', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGEMMT( 'L', '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGEMMT( 'L', '/', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGEMMT( 'L', '/', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 3 + CALL CGEMMT( 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGEMMT( 'U', 'C', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGEMMT( 'U', 'T', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'N', 'C', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'N', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'C', 'C', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'C', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'T', 'C', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGEMMT( 'U', 'T', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'N', 'C', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'N', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'C', 'C', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'C', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'T', 'C', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CGEMMT( 'U', 'T', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 8 + CALL CGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'N', 'C', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'C', 'C', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'C', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'T', 'C', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGEMMT( 'U', 'T', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 10 + CALL CGEMMT( 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CGEMMT( 'U', 'C', 'N', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CGEMMT( 'U', 'T', 'N', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'N', 'C', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'C', 'C', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'C', 'T', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'T', 'C', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CGEMMT( 'U', 'T', 'T', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + GO TO 110 + * - 100 IF( OK )THEN + 110 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT @@ -3486,3 +3671,496 @@ SUBROUTINE XERBLA( SRNAME, INFO ) * End of XERBLA * END + + SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, + $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, + $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) +* +* Tests CGEMMT. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0, 0.0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) +* .. Scalar Arguments .. + REAL EPS, THRESH + INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA + LOGICAL FATAL, REWI, TRACE + CHARACTER*6 SNAME +* .. Array Arguments .. + COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), + $ AS( NMAX*NMAX ), B( NMAX, NMAX ), + $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), + $ C( NMAX, NMAX ), CC( NMAX*NMAX ), + $ CS( NMAX*NMAX ), CT( NMAX ) + REAL G( NMAX ) + INTEGER IDIM( NIDIM ) +* .. Local Scalars .. + COMPLEX ALPHA, ALS, BETA, BLS + REAL ERR, ERRMAX + INTEGER I, IA, IB, ICA, ICB, IK, IN, K, KS, LAA, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, + $ MA, MB, N, NA, NARGS, NB, NC, NS, IS + LOGICAL NULL, RESET, SAME, TRANA, TRANB + CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS + CHARACTER*3 ICH + CHARACTER*2 ISHAPE +* .. Local Arrays .. + LOGICAL ISAME( 13 ) +* .. External Functions .. + LOGICAL LCE, LCERES + EXTERNAL LCE, LCERES +* .. External Subroutines .. + EXTERNAL CGEMM, CMAKE, CMMCH +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Scalars in Common .. + INTEGER INFOT, NOUTC + LOGICAL LERR, OK +* .. Common blocks .. + COMMON /INFOC/INFOT, NOUTC, OK, LERR +* .. Data statements .. + DATA ICH/'NTC'/ + DATA ISHAPE/'UL'/ + +* .. Executable Statements .. +* + NARGS = 13 + NC = 0 + RESET = .TRUE. + ERRMAX = RZERO +* + DO 100 IN = 1, NIDIM + N = IDIM( IN ) +* Set LDC to 1 more than minimum value if room. + LDC = N + IF( LDC.LT.NMAX ) + $ LDC = LDC + 1 +* Skip tests if not enough room. + IF( LDC.GT.NMAX ) + $ GO TO 100 + LCC = LDC*N + NULL = N.LE.0 +* + DO 90 IK = 1, NIDIM + K = IDIM( IK ) +* + DO 80 ICA = 1, 3 + TRANSA = ICH( ICA: ICA ) + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' +* + IF( TRANA )THEN + MA = K + NA = N + ELSE + MA = N + NA = K + END IF +* Set LDA to 1 more than minimum value if room. + LDA = MA + IF( LDA.LT.NMAX ) + $ LDA = LDA + 1 +* Skip tests if not enough room. + IF( LDA.GT.NMAX ) + $ GO TO 80 + LAA = LDA*NA +* +* Generate the matrix A. +* + CALL CMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, + $ RESET, ZERO ) +* + DO 70 ICB = 1, 3 + TRANSB = ICH( ICB: ICB ) + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* + IF( TRANB )THEN + MB = N + NB = K + ELSE + MB = K + NB = N + END IF +* Set LDB to 1 more than minimum value if room. + LDB = MB + IF( LDB.LT.NMAX ) + $ LDB = LDB + 1 +* Skip tests if not enough room. + IF( LDB.GT.NMAX ) + $ GO TO 70 + LBB = LDB*NB +* +* Generate the matrix B. +* + CALL CMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, + $ LDB, RESET, ZERO ) +* + DO 60 IA = 1, NALF + ALPHA = ALF( IA ) +* + DO 50 IB = 1, NBET + BETA = BET( IB ) + DO 45 IS = 1, 2 + UPLO = ISHAPE( IS: IS ) + +* +* Generate the matrix C. +* + CALL CMAKE( 'GE', UPLO, ' ', M, N, C, NMAX, + $ CC, LDC, RESET, ZERO ) +* + NC = NC + 1 +* +* Save every datum before calling the +* subroutine. +* + UPLOS = UPLO + TRANAS = TRANSA + TRANBS = TRANSB + NS = N + KS = K + ALS = ALPHA + DO 10 I = 1, LAA + AS( I ) = AA( I ) + 10 CONTINUE + LDAS = LDA + DO 20 I = 1, LBB + BS( I ) = BB( I ) + 20 CONTINUE + LDBS = LDB + BLS = BETA + DO 30 I = 1, LCC + CS( I ) = CC( I ) + 30 CONTINUE + LDCS = LDC +* +* Call the subroutine. +* + IF( TRACE ) + $ WRITE( NTRA, FMT = 9995 )NC, SNAME, UPLO + $ TRANSA, TRANSB, N, K, ALPHA, LDA, LDB, + $ BETA, LDC + IF( REWI ) + $ REWIND NTRA + CALL CGEMMT( UPLO, TRANSA, TRANSB, N, K, + $ ALPHA, AA, LDA, BB, LDB, BETA, + $ CC, LDC ) +* +* Check if error-exit was taken incorrectly. +* + IF( .NOT.OK )THEN + WRITE( NOUT, FMT = 9994 ) + FATAL = .TRUE. + GO TO 120 + END IF +* +* See what data changed inside subroutines. +* + ISAME( 1 ) = UPLOS.EQ.UPLO + ISAME( 2 ) = TRANSA.EQ.TRANAS + ISAME( 3 ) = TRANSB.EQ.TRANBS + ISAME( 4 ) = NS.EQ.N + ISAME( 5 ) = KS.EQ.K + ISAME( 6 ) = ALS.EQ.ALPHA + ISAME( 7 ) = LCE( AS, AA, LAA ) + ISAME( 8 ) = LDAS.EQ.LDA + ISAME( 9 ) = LCE( BS, BB, LBB ) + ISAME( 10 ) = LDBS.EQ.LDB + ISAME( 11 ) = BLS.EQ.BETA + IF( NULL )THEN + ISAME( 12 ) = LCE( CS, CC, LCC ) + ELSE + ISAME( 12 ) = LCERES( 'GE', ' ', M, N, CS, + $ CC, LDC ) + END IF + ISAME( 13 ) = LDCS.EQ.LDC +* +* If data was incorrectly changed, report +* and return. +* + SAME = .TRUE. + DO 40 I = 1, NARGS + SAME = SAME.AND.ISAME( I ) + IF( .NOT.ISAME( I ) ) + $ WRITE( NOUT, FMT = 9998 )I + 40 CONTINUE + IF( .NOT.SAME )THEN + FATAL = .TRUE. + GO TO 120 + END IF +* + IF( .NOT.NULL )THEN +* +* Check the result. +* + CALL CMMTCH( UPLO, TRANSA, TRANSB, N, + $ K, ALPHA, A, NMAX, B, NMAX, + $ BETA, C, NMAX, CT, G, CC, LDC, + $ EPS, ERR, FATAL, NOUT, .TRUE.) + ERRMAX = MAX( ERRMAX, ERR ) +* If got really bad answer, report and +* return. + IF( FATAL ) + $ GO TO 120 + END IF + 45 CONTINUE +* + 50 CONTINUE +* + 60 CONTINUE +* + 70 CONTINUE +* + 80 CONTINUE +* + 90 CONTINUE +* + 100 CONTINUE +* +* +* Report result. +* + IF( ERRMAX.LT.THRESH )THEN + WRITE( NOUT, FMT = 9999 )SNAME, NC + ELSE + WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX + END IF + GO TO 130 +* + 120 CONTINUE + WRITE( NOUT, FMT = 9996 )SNAME + WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, + $ ALPHA, LDA, LDB, BETA, LDC +* + 130 CONTINUE + RETURN +* + 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', + $ 'S)' ) + 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', + $ 'ANGED INCORRECTLY *******' ) + 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', + $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, + $ ' - SUSPECT *******' ) + 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) + 9995 FORMAT( 1X, I6, ': ', A6, '(''',A1, ''',''',A1, ''',''', A1,''',', + $ 2( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, + $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) + 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', + $ '******' ) +* +* End of CCHK6 +* + END + + SUBROUTINE CMMTCH( UPLO, TRANSA, TRANSB, N, KK, ALPHA, A, LDA, + $ B, LDB, BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, + $ FATAL, NOUT, MV ) + IMPLICIT NONE +* +* Checks the results of the computational tests. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0, 0.0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0, RONE = 1.0 ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + REAL EPS, ERR + INTEGER KK, LDA, LDB, LDC, LDCC, N, NOUT + LOGICAL FATAL, MV + CHARACTER*1 TRANSA, TRANSB, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ CC( LDCC, * ), CT( * ) + REAL G( * ) +* .. Local Scalars .. + COMPLEX CL + REAL ERRI + INTEGER I, J, K, ISTART, ISTOP + LOGICAL CTRANA, CTRANB, TRANA, TRANB, UPPER +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT +* .. Statement Functions .. + REAL ABS1 +* .. Statement Function definitions .. + ABS1( CL ) = ABS( REAL( CL ) ) + ABS( AIMAG( CL ) ) +* .. Executable Statements .. + UPPER = UPLO.EQ.'U' + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' + CTRANA = TRANSA.EQ.'C' + CTRANB = TRANSB.EQ.'C' +* +* Compute expected result, one column at a time, in CT using data +* in A, B and C. +* Compute gauges in G. +* + ISTART = 1 + ISTOP = 1 + + DO 220 J = 1, N +* + IF ( UPPER ) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 10 I = ISTART, ISTOP + CT( I ) = ZERO + G( I ) = RZERO + 10 CONTINUE + IF( .NOT.TRANA.AND..NOT.TRANB )THEN + DO 30 K = 1, KK + DO 20 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( K, J ) + G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) + 20 CONTINUE + 30 CONTINUE + ELSE IF( TRANA.AND..NOT.TRANB )THEN + IF( CTRANA )THEN + DO 50 K = 1, KK + DO 40 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( K, J ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( K, J ) ) + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, KK + DO 60 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( K, J ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( K, J ) ) + 60 CONTINUE + 70 CONTINUE + END IF + ELSE IF( .NOT.TRANA.AND.TRANB )THEN + IF( CTRANB )THEN + DO 90 K = 1, KK + DO 80 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( I, K ) )* + $ ABS1( B( J, K ) ) + 80 CONTINUE + 90 CONTINUE + ELSE + DO 110 K = 1, KK + DO 100 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( J, K ) + G( I ) = G( I ) + ABS1( A( I, K ) )* + $ ABS1( B( J, K ) ) + 100 CONTINUE + 110 CONTINUE + END IF + ELSE IF( TRANA.AND.TRANB )THEN + IF( CTRANA )THEN + IF( CTRANB )THEN + DO 130 K = 1, KK + DO 120 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )* + $ CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 120 CONTINUE + 130 CONTINUE + ELSE + DO 150 K = 1, KK + DO 140 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( J, K ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE + IF( CTRANB )THEN + DO 170 K = 1, KK + DO 160 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 190 K = 1, KK + DO 180 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( J, K ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 180 CONTINUE + 190 CONTINUE + END IF + END IF + END IF + DO 200 I = ISTART, ISTOP + CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) + G( I ) = ABS1( ALPHA )*G( I ) + + $ ABS1( BETA )*ABS1( C( I, J ) ) + 200 CONTINUE +* +* Compute the error ratio for this result. +* + ERR = ZERO + DO 210 I = ISTART, ISTOP + ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS + IF( G( I ).NE.RZERO ) + $ ERRI = ERRI/G( I ) + ERR = MAX( ERR, ERRI ) + IF( ERR*SQRT( EPS ).GE.RONE ) + $ GO TO 230 + 210 CONTINUE +* + 220 CONTINUE +* +* If the loop completes, all results are at least half accurate. + GO TO 250 +* +* Report fatal error. +* + 230 FATAL = .TRUE. + WRITE( NOUT, FMT = 9999 ) + DO 240 I = ISTART, ISTOP + IF( MV )THEN + WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) + ELSE + WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) + END IF + 240 CONTINUE + IF( N.GT.1 ) + $ WRITE( NOUT, FMT = 9997 )J +* + 250 CONTINUE + RETURN +* + 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', + $ 'F ACCURATE *******', /' EXPECTED RE', + $ 'SULT COMPUTED RESULT' ) + 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) + 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) +* +* End of CMMTCH +* + END + diff --git a/BLAS/TESTING/cblat3.in b/BLAS/TESTING/cblat3.in index f1480557a1..686fe64084 100644 --- a/BLAS/TESTING/cblat3.in +++ b/BLAS/TESTING/cblat3.in @@ -21,3 +21,4 @@ CHERK T PUT F FOR NO TEST. SAME COLUMNS. CSYRK T PUT F FOR NO TEST. SAME COLUMNS. CHER2K T PUT F FOR NO TEST. SAME COLUMNS. CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +CGEMMT T PUT F FOR NO TEST. SAME COLUMNS. diff --git a/BLAS/TESTING/dblat3.f b/BLAS/TESTING/dblat3.f index 89087d539c..ddfbbfbd6a 100644 --- a/BLAS/TESTING/dblat3.f +++ b/BLAS/TESTING/dblat3.f @@ -19,10 +19,10 @@ *> Test program for the DOUBLE PRECISION Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records -*> of the file are read using list-directed input, the last 6 records +*> of the file are read using list-directed input, the last 7 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the -*> following 20 lines: +*> following 21 lines: *> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE @@ -43,6 +43,7 @@ *> DTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> DSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> DGEMMT T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== @@ -90,7 +91,7 @@ PROGRAM DBLAT3 INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS - PARAMETER ( NSUBS = 6 ) + PARAMETER ( NSUBS = 7 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX @@ -132,7 +133,7 @@ PROGRAM DBLAT3 COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'DGEMM ', 'DSYMM ', 'DTRMM ', 'DTRSM ', - $ 'DSYRK ', 'DSYR2K'/ + $ 'DSYRK ', 'DSYR2K', 'DGEMMT'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. @@ -309,7 +310,7 @@ PROGRAM DBLAT3 INFOT = 0 OK = .TRUE. FATAL = .FALSE. - GO TO ( 140, 150, 160, 160, 170, 180 )ISNUM + GO TO ( 140, 150, 160, 160, 170, 180, 185 )ISNUM * Test DGEMM, 01. 140 CALL DCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, @@ -338,6 +339,12 @@ PROGRAM DBLAT3 $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 +* Test DGEMMT, 07. + 185 CALL DCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, + $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, + $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, + $ CC, CS, CT, G ) + * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 @@ -1882,7 +1889,7 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) ALPHA = ONE BETA = TWO * - GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM + GO TO ( 10, 20, 30, 40, 50, 60, 70 )ISNUM 10 INFOT = 1 CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -1967,7 +1974,7 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 13 CALL DGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 20 INFOT = 1 CALL DSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2034,7 +2041,7 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 30 INFOT = 1 CALL DTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2143,7 +2150,7 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL DTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 40 INFOT = 1 CALL DTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2252,7 +2259,7 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL DTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 50 INFOT = 1 CALL DSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2307,7 +2314,7 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 10 CALL DSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 60 INFOT = 1 CALL DSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2374,8 +2381,78 @@ SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL DSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + GO TO 80 + 70 INFOT = 1 + CALL DGEMMT( '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGEMMT( 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGEMMT( 'U', '/', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGEMMT( 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGEMMT( 'U', 'T', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGEMMT( 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGEMMT( 'U', 'N', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGEMMT( 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGEMMT( 'U', 'T', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DGEMMT( 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DGEMMT( 'U', 'N', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DGEMMT( 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DGEMMT( 'U', 'T', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGEMMT( 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGEMMT( 'U', 'T', 'N', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGEMMT( 'U', 'T', 'T', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DGEMMT( 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DGEMMT( 'U', 'T', 'T', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * - 70 IF( OK )THEN + 80 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT @@ -2867,3 +2944,420 @@ SUBROUTINE XERBLA( SRNAME, INFO ) * End of XERBLA * END + + SUBROUTINE DCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, + $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, + $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) +* +* Tests DGEMMT. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 19-July-2023. +* Martin Koehler, MPI Magdeburg +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. Scalar Arguments .. + DOUBLE PRECISION EPS, THRESH + INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA + LOGICAL FATAL, REWI, TRACE + CHARACTER*6 SNAME +* .. Array Arguments .. + DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), + $ AS( NMAX*NMAX ), B( NMAX, NMAX ), + $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), + $ C( NMAX, NMAX ), CC( NMAX*NMAX ), + $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) + INTEGER IDIM( NIDIM ) +* .. Local Scalars .. + DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX + INTEGER I, IA, IB, ICA, ICB, IK, IN, K, KS, LAA, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, + $ MA, MB, N, NA, NARGS, NB, NC, NS, IS + LOGICAL NULL, RESET, SAME, TRANA, TRANB + CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS + CHARACTER*3 ICH + CHARACTER*2 ISHAPE +* .. Local Arrays .. + LOGICAL ISAME( 13 ) +* .. External Functions .. + LOGICAL LDE, LDERES + EXTERNAL LDE, LDERES +* .. External Subroutines .. + EXTERNAL DGEMMT, DMAKE, DMMTCH +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Scalars in Common .. + INTEGER INFOT, NOUTC + LOGICAL LERR, OK +* .. Common blocks .. + COMMON /INFOC/INFOT, NOUTC, OK, LERR +* .. Data statements .. + DATA ICH/'NTC'/ + DATA ISHAPE/'UL'/ +* .. Executable Statements .. +* + NARGS = 13 + NC = 0 + RESET = .TRUE. + ERRMAX = ZERO +* + DO 100 IN = 1, NIDIM + N = IDIM( IN ) +* Set LDC to 1 more than minimum value if room. + LDC = N + IF( LDC.LT.NMAX ) + $ LDC = LDC + 1 +* Skip tests if not enough room. + IF( LDC.GT.NMAX ) + $ GO TO 100 + LCC = LDC*N + NULL = N.LE.0 +* + DO 90 IK = 1, NIDIM + K = IDIM( IK ) +* + DO 80 ICA = 1, 3 + TRANSA = ICH( ICA: ICA ) + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' +* + IF( TRANA )THEN + MA = K + NA = N + ELSE + MA = N + NA = K + END IF +* Set LDA to 1 more than minimum value if room. + LDA = MA + IF( LDA.LT.NMAX ) + $ LDA = LDA + 1 +* Skip tests if not enough room. + IF( LDA.GT.NMAX ) + $ GO TO 80 + LAA = LDA*NA +* +* Generate the matrix A. +* + CALL DMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, + $ RESET, ZERO ) +* + DO 70 ICB = 1, 3 + TRANSB = ICH( ICB: ICB ) + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* + IF( TRANB )THEN + MB = N + NB = K + ELSE + MB = K + NB = N + END IF +* Set LDB to 1 more than minimum value if room. + LDB = MB + IF( LDB.LT.NMAX ) + $ LDB = LDB + 1 +* Skip tests if not enough room. + IF( LDB.GT.NMAX ) + $ GO TO 70 + LBB = LDB*NB +* +* Generate the matrix B. +* + CALL DMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, + $ LDB, RESET, ZERO ) +* + DO 60 IA = 1, NALF + ALPHA = ALF( IA ) +* + DO 50 IB = 1, NBET + BETA = BET( IB ) + + DO 45 IS = 1, 2 + UPLO = ISHAPE( IS: IS ) + +* +* Generate the matrix C. +* + CALL DMAKE( 'GE', UPLO, ' ', N, N, C, + $ NMAX, CC, LDC, RESET, ZERO ) +* + NC = NC + 1 +* +* Save every datum before calling the +* subroutine. +* + UPLOS = UPLO + TRANAS = TRANSA + TRANBS = TRANSB + NS = N + KS = K + ALS = ALPHA + DO 10 I = 1, LAA + AS( I ) = AA( I ) + 10 CONTINUE + LDAS = LDA + DO 20 I = 1, LBB + BS( I ) = BB( I ) + 20 CONTINUE + LDBS = LDB + BLS = BETA + DO 30 I = 1, LCC + CS( I ) = CC( I ) + 30 CONTINUE + LDCS = LDC +* +* Call the subroutine. +* + IF( TRACE ) + $ WRITE( NTRA, FMT = 9995 )NC, SNAME, + $ UPLO, TRANSA, TRANSB, N, K, ALPHA, LDA, + $ LDB, BETA, LDC + IF( REWI ) + $ REWIND NTRA + CALL DGEMMT( UPLO, TRANSA, TRANSB, N, + $ K, ALPHA, AA, LDA, BB, LDB, + $ BETA, CC, LDC ) +* +* Check if error-exit was taken incorrectly. +* + IF( .NOT.OK )THEN + WRITE( NOUT, FMT = 9994 ) + FATAL = .TRUE. + GO TO 120 + END IF +* +* See what data changed inside subroutines. +* + ISAME( 1 ) = UPLO.EQ.UPLOS + ISAME( 2 ) = TRANSA.EQ.TRANAS + ISAME( 3 ) = TRANSB.EQ.TRANBS + ISAME( 4 ) = NS.EQ.N + ISAME( 5 ) = KS.EQ.K + ISAME( 6 ) = ALS.EQ.ALPHA + ISAME( 7 ) = LDE( AS, AA, LAA ) + ISAME( 8 ) = LDAS.EQ.LDA + ISAME( 9 ) = LDE( BS, BB, LBB ) + ISAME( 10 ) = LDBS.EQ.LDB + ISAME( 11 ) = BLS.EQ.BETA + IF( NULL )THEN + ISAME( 12 ) = LDE( CS, CC, LCC ) + ELSE + ISAME( 12 ) = LDERES( 'GE', ' ', M, N, + $ CS, CC, LDC ) + END IF + ISAME( 13 ) = LDCS.EQ.LDC +* +* If data was incorrectly changed, report +* and return. +* + SAME = .TRUE. + DO 40 I = 1, NARGS + SAME = SAME.AND.ISAME( I ) + IF( .NOT.ISAME( I ) ) + $ WRITE( NOUT, FMT = 9998 )I + 40 CONTINUE + IF( .NOT.SAME )THEN + FATAL = .TRUE. + GO TO 120 + END IF +* + IF( .NOT.NULL )THEN +* +* Check the result. +* + CALL DMMTCH( UPLO, TRANSA, TRANSB, + $ N, K, + $ ALPHA, A, NMAX, B, NMAX, BETA, + $ C, NMAX, CT, G, CC, LDC, EPS, + $ ERR, FATAL, NOUT, .TRUE. ) + ERRMAX = MAX( ERRMAX, ERR ) +* If got really bad answer, report and +* return. + IF( FATAL ) + $ GO TO 120 + END IF +* + 45 CONTINUE +* + 50 CONTINUE +* + 60 CONTINUE +* + 70 CONTINUE +* + 80 CONTINUE +* + 90 CONTINUE +* + 100 CONTINUE +* +* +* Report result. +* + IF( ERRMAX.LT.THRESH )THEN + WRITE( NOUT, FMT = 9999 )SNAME, NC + ELSE + WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX + END IF + GO TO 130 +* + 120 CONTINUE + WRITE( NOUT, FMT = 9996 )SNAME + WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANSA, TRANSB, N, K, + $ ALPHA, LDA, LDB, BETA, LDC +* + 130 CONTINUE + RETURN +* + 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', + $ 'S)' ) + 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', + $ 'ANGED INCORRECTLY *******' ) + 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', + $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, + $ ' - SUSPECT *******' ) + 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) + 9995 FORMAT( 1X, I6, ': ', A6, '(''',A1, ''',''',A1, ''',''', A1,''',', + $ 2( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', + $ 'C,', I3, ').' ) + 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', + $ '******' ) +* +* End of DCHK6 +* + END + + SUBROUTINE DMMTCH( UPLO, TRANSA, TRANSB, N, KK, ALPHA, A, LDA, + $ B, LDB, BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, + $ FATAL, NOUT, MV ) +* +* Checks the results of the computational tests. +* +* Auxiliary routine for test program for Level 3 Blas. (DGEMMT) +* +* -- Written on 19-July-2023. +* Martin Koehler, MPI Magdeburg +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA, EPS, ERR + INTEGER KK, LDA, LDB, LDC, LDCC, N, NOUT + LOGICAL FATAL, MV + CHARACTER*1 UPLO, TRANSA, TRANSB +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ CC( LDCC, * ), CT( * ), G( * ) +* .. Local Scalars .. + DOUBLE PRECISION ERRI + INTEGER I, J, K, ISTART, ISTOP + LOGICAL TRANA, TRANB, UPPER +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. Executable Statements .. + UPPER = UPLO.EQ.'U' + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* +* Compute expected result, one column at a time, in CT using data +* in A, B and C. +* Compute gauges in G. +* + ISTART = 1 + ISTOP = N + + DO 120 J = 1, N +* + IF ( UPPER ) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + DO 10 I = ISTART, ISTOP + CT( I ) = ZERO + G( I ) = ZERO + 10 CONTINUE + IF( .NOT.TRANA.AND..NOT.TRANB )THEN + DO 30 K = 1, KK + DO 20 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( K, J ) + G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( K, J ) ) + 20 CONTINUE + 30 CONTINUE + ELSE IF( TRANA.AND..NOT.TRANB )THEN + DO 50 K = 1, KK + DO 40 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( K, J ) + G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( K, J ) ) + 40 CONTINUE + 50 CONTINUE + ELSE IF( .NOT.TRANA.AND.TRANB )THEN + DO 70 K = 1, KK + DO 60 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( J, K ) + G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( J, K ) ) + 60 CONTINUE + 70 CONTINUE + ELSE IF( TRANA.AND.TRANB )THEN + DO 90 K = 1, KK + DO 80 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( J, K ) + G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( J, K ) ) + 80 CONTINUE + 90 CONTINUE + END IF + DO 100 I = ISTART, ISTOP + CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) + G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( I, J ) ) + 100 CONTINUE +* +* Compute the error ratio for this result. +* + ERR = ZERO + DO 110 I = ISTART, ISTOP + ERRI = ABS( CT( I ) - CC( I, J ) )/EPS + IF( G( I ).NE.ZERO ) + $ ERRI = ERRI/G( I ) + ERR = MAX( ERR, ERRI ) + IF( ERR*SQRT( EPS ).GE.ONE ) + $ GO TO 130 + 110 CONTINUE +* + 120 CONTINUE +* +* If the loop completes, all results are at least half accurate. + GO TO 150 +* +* Report fatal error. +* + 130 FATAL = .TRUE. + WRITE( NOUT, FMT = 9999 ) + DO 140 I = ISTART, ISTOP + IF( MV )THEN + WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) + ELSE + WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) + END IF + 140 CONTINUE + IF( N.GT.1 ) + $ WRITE( NOUT, FMT = 9997 )J +* + 150 CONTINUE + RETURN +* + 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', + $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', + $ 'TED RESULT' ) + 9998 FORMAT( 1X, I7, 2G18.6 ) + 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) +* +* End of DMMTCH +* + END + diff --git a/BLAS/TESTING/dblat3.in b/BLAS/TESTING/dblat3.in index 0098f3e521..82e571ee84 100644 --- a/BLAS/TESTING/dblat3.in +++ b/BLAS/TESTING/dblat3.in @@ -18,3 +18,4 @@ DTRMM T PUT F FOR NO TEST. SAME COLUMNS. DTRSM T PUT F FOR NO TEST. SAME COLUMNS. DSYRK T PUT F FOR NO TEST. SAME COLUMNS. DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +DGEMMT T PUT F FOR NO TEST. SAME COLUMNS. diff --git a/BLAS/TESTING/sblat3.f b/BLAS/TESTING/sblat3.f index c4c1fccee8..a0522d96e8 100644 --- a/BLAS/TESTING/sblat3.f +++ b/BLAS/TESTING/sblat3.f @@ -19,7 +19,7 @@ *> Test program for the REAL Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records -*> of the file are read using list-directed input, the last 6 records +*> of the file are read using list-directed input, the last 7 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 20 lines: @@ -43,6 +43,7 @@ *> STRSM T PUT F FOR NO TEST. SAME COLUMNS. *> SSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> SGEMMT T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== @@ -90,7 +91,7 @@ PROGRAM SBLAT3 INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS - PARAMETER ( NSUBS = 6 ) + PARAMETER ( NSUBS = 7 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX @@ -132,7 +133,7 @@ PROGRAM SBLAT3 COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'SGEMM ', 'SSYMM ', 'STRMM ', 'STRSM ', - $ 'SSYRK ', 'SSYR2K'/ + $ 'SSYRK ', 'SSYR2K', 'SGEMMT'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. @@ -309,7 +310,7 @@ PROGRAM SBLAT3 INFOT = 0 OK = .TRUE. FATAL = .FALSE. - GO TO ( 140, 150, 160, 160, 170, 180 )ISNUM + GO TO ( 140, 150, 160, 160, 170, 180, 185 )ISNUM * Test SGEMM, 01. 140 CALL SCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, @@ -338,6 +339,12 @@ PROGRAM SBLAT3 $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 +* Test SGEMMT, 07. + 185 CALL SCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, + $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, + $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, + $ CC, CS, CT, G ) + GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 @@ -1866,7 +1873,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) REAL A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CHKXER, SGEMM, SSYMM, SSYR2K, SSYRK, STRMM, - $ STRSM + $ STRSM, SGEMMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. @@ -1882,7 +1889,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) ALPHA = ONE BETA = TWO * - GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM + GO TO ( 10, 20, 30, 40, 50, 60, 70 )ISNUM 10 INFOT = 1 CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -1967,7 +1974,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 13 CALL SGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 20 INFOT = 1 CALL SSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2034,7 +2041,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 30 INFOT = 1 CALL STRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2143,7 +2150,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL STRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 40 INFOT = 1 CALL STRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2252,7 +2259,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL STRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 50 INFOT = 1 CALL SSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2307,7 +2314,7 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 10 CALL SSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 70 + GO TO 80 60 INFOT = 1 CALL SSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2374,8 +2381,78 @@ SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL SSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + GO TO 80 + 70 INFOT = 1 + CALL SGEMMT( '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGEMMT( 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGEMMT( 'U', '/', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGEMMT( 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGEMMT( 'U', 'T', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGEMMT( 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGEMMT( 'U', 'N', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGEMMT( 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGEMMT( 'U', 'T', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SGEMMT( 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SGEMMT( 'U', 'N', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SGEMMT( 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SGEMMT( 'U', 'T', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGEMMT( 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGEMMT( 'U', 'T', 'N', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGEMMT( 'U', 'T', 'T', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SGEMMT( 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SGEMMT( 'U', 'T', 'T', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * - 70 IF( OK )THEN + 80 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT @@ -2865,5 +2942,422 @@ SUBROUTINE XERBLA( SRNAME, INFO ) $ ' *******' ) * * End of XERBLA +* + END + + + SUBROUTINE SCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, + $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, + $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) +* +* Tests SGEMMT. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 19-July-2023. +* Martin Koehler, MPI Magdeburg +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. Scalar Arguments .. + REAL EPS, THRESH + INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA + LOGICAL FATAL, REWI, TRACE + CHARACTER*6 SNAME +* .. Array Arguments .. + REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), + $ AS( NMAX*NMAX ), B( NMAX, NMAX ), + $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), + $ C( NMAX, NMAX ), CC( NMAX*NMAX ), + $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) + INTEGER IDIM( NIDIM ) +* .. Local Scalars .. + REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX + INTEGER I, IA, IB, ICA, ICB, IK, IN, K, KS, LAA, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, + $ MA, MB, N, NA, NARGS, NB, NC, NS, IS + LOGICAL NULL, RESET, SAME, TRANA, TRANB + CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS + CHARACTER*3 ICH + CHARACTER*2 ISHAPE +* .. Local Arrays .. + LOGICAL ISAME( 13 ) +* .. External Functions .. + LOGICAL LSE, LSERES + EXTERNAL LSE, LSERES +* .. External Subroutines .. + EXTERNAL SGEMMT, DMAKE, DMMTCH +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Scalars in Common .. + INTEGER INFOT, NOUTC + LOGICAL LERR, OK +* .. Common blocks .. + COMMON /INFOC/INFOT, NOUTC, OK, LERR +* .. Data statements .. + DATA ICH/'NTC'/ + DATA ISHAPE/'UL'/ +* .. Executable Statements .. +* + NARGS = 13 + NC = 0 + RESET = .TRUE. + ERRMAX = ZERO +* + DO 100 IN = 1, NIDIM + N = IDIM( IN ) +* Set LDC to 1 more than minimum value if room. + LDC = N + IF( LDC.LT.NMAX ) + $ LDC = LDC + 1 +* Skip tests if not enough room. + IF( LDC.GT.NMAX ) + $ GO TO 100 + LCC = LDC*N + NULL = N.LE.0 +* + DO 90 IK = 1, NIDIM + K = IDIM( IK ) +* + DO 80 ICA = 1, 3 + TRANSA = ICH( ICA: ICA ) + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' +* + IF( TRANA )THEN + MA = K + NA = N + ELSE + MA = N + NA = K + END IF +* Set LDA to 1 more than minimum value if room. + LDA = MA + IF( LDA.LT.NMAX ) + $ LDA = LDA + 1 +* Skip tests if not enough room. + IF( LDA.GT.NMAX ) + $ GO TO 80 + LAA = LDA*NA +* +* Generate the matrix A. +* + CALL SMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, + $ RESET, ZERO ) +* + DO 70 ICB = 1, 3 + TRANSB = ICH( ICB: ICB ) + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* + IF( TRANB )THEN + MB = N + NB = K + ELSE + MB = K + NB = N + END IF +* Set LDB to 1 more than minimum value if room. + LDB = MB + IF( LDB.LT.NMAX ) + $ LDB = LDB + 1 +* Skip tests if not enough room. + IF( LDB.GT.NMAX ) + $ GO TO 70 + LBB = LDB*NB +* +* Generate the matrix B. +* + CALL SMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, + $ LDB, RESET, ZERO ) +* + DO 60 IA = 1, NALF + ALPHA = ALF( IA ) +* + DO 50 IB = 1, NBET + BETA = BET( IB ) + + DO 45 IS = 1, 2 + UPLO = ISHAPE( IS: IS ) + +* +* Generate the matrix C. +* + CALL SMAKE( 'GE', UPLO, ' ', N, N, C, + $ NMAX, CC, LDC, RESET, ZERO ) +* + NC = NC + 1 +* +* Save every datum before calling the +* subroutine. +* + UPLOS = UPLO + TRANAS = TRANSA + TRANBS = TRANSB + NS = N + KS = K + ALS = ALPHA + DO 10 I = 1, LAA + AS( I ) = AA( I ) + 10 CONTINUE + LDAS = LDA + DO 20 I = 1, LBB + BS( I ) = BB( I ) + 20 CONTINUE + LDBS = LDB + BLS = BETA + DO 30 I = 1, LCC + CS( I ) = CC( I ) + 30 CONTINUE + LDCS = LDC +* +* Call the subroutine. +* + IF( TRACE ) + $ WRITE( NTRA, FMT = 9995 )NC, SNAME, + $ UPLO, TRANSA, TRANSB, N, K, ALPHA, LDA, + $ LDB, BETA, LDC + IF( REWI ) + $ REWIND NTRA + CALL SGEMMT( UPLO, TRANSA, TRANSB, N, + $ K, ALPHA, AA, LDA, BB, LDB, + $ BETA, CC, LDC ) +* +* Check if error-exit was taken incorrectly. +* + IF( .NOT.OK )THEN + WRITE( NOUT, FMT = 9994 ) + FATAL = .TRUE. + GO TO 120 + END IF +* +* See what data changed inside subroutines. +* + ISAME( 1 ) = UPLO.EQ.UPLOS + ISAME( 2 ) = TRANSA.EQ.TRANAS + ISAME( 3 ) = TRANSB.EQ.TRANBS + ISAME( 4 ) = NS.EQ.N + ISAME( 5 ) = KS.EQ.K + ISAME( 6 ) = ALS.EQ.ALPHA + ISAME( 7 ) = LSE( AS, AA, LAA ) + ISAME( 8 ) = LDAS.EQ.LDA + ISAME( 9 ) = LSE( BS, BB, LBB ) + ISAME( 10 ) = LDBS.EQ.LDB + ISAME( 11 ) = BLS.EQ.BETA + IF( NULL )THEN + ISAME( 12 ) = LSE( CS, CC, LCC ) + ELSE + ISAME( 12 ) = LSERES( 'GE', ' ', M, N, + $ CS, CC, LDC ) + END IF + ISAME( 13 ) = LDCS.EQ.LDC +* +* If data was incorrectly changed, report +* and return. +* + SAME = .TRUE. + DO 40 I = 1, NARGS + SAME = SAME.AND.ISAME( I ) + IF( .NOT.ISAME( I ) ) + $ WRITE( NOUT, FMT = 9998 )I + 40 CONTINUE + IF( .NOT.SAME )THEN + FATAL = .TRUE. + GO TO 120 + END IF +* + IF( .NOT.NULL )THEN +* +* Check the result. +* + CALL SMMTCH( UPLO, TRANSA, TRANSB, + $ N, K, + $ ALPHA, A, NMAX, B, NMAX, BETA, + $ C, NMAX, CT, G, CC, LDC, EPS, + $ ERR, FATAL, NOUT, .TRUE. ) + ERRMAX = MAX( ERRMAX, ERR ) +* If got really bad answer, report and +* return. + IF( FATAL ) + $ GO TO 120 + END IF +* + 45 CONTINUE +* + 50 CONTINUE +* + 60 CONTINUE +* + 70 CONTINUE +* + 80 CONTINUE +* + 90 CONTINUE +* + 100 CONTINUE +* +* +* Report result. +* + IF( ERRMAX.LT.THRESH )THEN + WRITE( NOUT, FMT = 9999 )SNAME, NC + ELSE + WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX + END IF + GO TO 130 +* + 120 CONTINUE + WRITE( NOUT, FMT = 9996 )SNAME + WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANSA, TRANSB, N, K, + $ ALPHA, LDA, LDB, BETA, LDC +* + 130 CONTINUE + RETURN +* + 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', + $ 'S)' ) + 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', + $ 'ANGED INCORRECTLY *******' ) + 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', + $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, + $ ' - SUSPECT *******' ) + 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) + 9995 FORMAT( 1X, I6, ': ', A6, '(''',A1, ''',''',A1, ''',''', A1,''',', + $ 2( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', + $ 'C,', I3, ').' ) + 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', + $ '******' ) +* +* End of DCHK6 +* + END + + SUBROUTINE SMMTCH( UPLO, TRANSA, TRANSB, N, KK, ALPHA, A, LDA, + $ B, LDB, BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, + $ FATAL, NOUT, MV ) +* +* Checks the results of the computational tests. +* +* Auxiliary routine for test program for Level 3 Blas. (SGEMMT) +* +* -- Written on 19-July-2023. +* Martin Koehler, MPI Magdeburg +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. Scalar Arguments .. + REAL ALPHA, BETA, EPS, ERR + INTEGER KK, LDA, LDB, LDC, LDCC, N, NOUT + LOGICAL FATAL, MV + CHARACTER*1 UPLO, TRANSA, TRANSB +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ CC( LDCC, * ), CT( * ), G( * ) +* .. Local Scalars .. + REAL ERRI + INTEGER I, J, K, ISTART, ISTOP + LOGICAL TRANA, TRANB, UPPER +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. Executable Statements .. + UPPER = UPLO.EQ.'U' + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* +* Compute expected result, one column at a time, in CT using data +* in A, B and C. +* Compute gauges in G. +* + ISTART = 1 + ISTOP = N + + DO 120 J = 1, N +* + IF ( UPPER ) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + DO 10 I = ISTART, ISTOP + CT( I ) = ZERO + G( I ) = ZERO + 10 CONTINUE + IF( .NOT.TRANA.AND..NOT.TRANB )THEN + DO 30 K = 1, KK + DO 20 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( K, J ) + G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( K, J ) ) + 20 CONTINUE + 30 CONTINUE + ELSE IF( TRANA.AND..NOT.TRANB )THEN + DO 50 K = 1, KK + DO 40 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( K, J ) + G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( K, J ) ) + 40 CONTINUE + 50 CONTINUE + ELSE IF( .NOT.TRANA.AND.TRANB )THEN + DO 70 K = 1, KK + DO 60 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( J, K ) + G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( J, K ) ) + 60 CONTINUE + 70 CONTINUE + ELSE IF( TRANA.AND.TRANB )THEN + DO 90 K = 1, KK + DO 80 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( J, K ) + G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( J, K ) ) + 80 CONTINUE + 90 CONTINUE + END IF + DO 100 I = ISTART, ISTOP + CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) + G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( I, J ) ) + 100 CONTINUE +* +* Compute the error ratio for this result. +* + ERR = ZERO + DO 110 I = ISTART, ISTOP + ERRI = ABS( CT( I ) - CC( I, J ) )/EPS + IF( G( I ).NE.ZERO ) + $ ERRI = ERRI/G( I ) + ERR = MAX( ERR, ERRI ) + IF( ERR*SQRT( EPS ).GE.ONE ) + $ GO TO 130 + 110 CONTINUE +* + 120 CONTINUE +* +* If the loop completes, all results are at least half accurate. + GO TO 150 +* +* Report fatal error. +* + 130 FATAL = .TRUE. + WRITE( NOUT, FMT = 9999 ) + DO 140 I = ISTART, ISTOP + IF( MV )THEN + WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) + ELSE + WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) + END IF + 140 CONTINUE + IF( N.GT.1 ) + $ WRITE( NOUT, FMT = 9997 )J +* + 150 CONTINUE + RETURN +* + 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', + $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', + $ 'TED RESULT' ) + 9998 FORMAT( 1X, I7, 2G18.6 ) + 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) +* +* End of DMMTCH * END diff --git a/BLAS/TESTING/sblat3.in b/BLAS/TESTING/sblat3.in index 5c4e3b83e1..9741a5dd61 100644 --- a/BLAS/TESTING/sblat3.in +++ b/BLAS/TESTING/sblat3.in @@ -18,3 +18,4 @@ STRMM T PUT F FOR NO TEST. SAME COLUMNS. STRSM T PUT F FOR NO TEST. SAME COLUMNS. SSYRK T PUT F FOR NO TEST. SAME COLUMNS. SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +SGEMMT T PUT F FOR NO TEST. SAME COLUMNS. diff --git a/BLAS/TESTING/zblat3.f b/BLAS/TESTING/zblat3.f index fb4d8019e9..9b54f6be8f 100644 --- a/BLAS/TESTING/zblat3.f +++ b/BLAS/TESTING/zblat3.f @@ -19,7 +19,7 @@ *> Test program for the COMPLEX*16 Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records -*> of the file are read using list-directed input, the last 9 records +*> of the file are read using list-directed input, the last 10 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 23 lines: @@ -46,6 +46,7 @@ *> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGEMMT T PUT F FOR NO TEST. SAME COLUMNS. *> *> *> Further Details @@ -94,7 +95,7 @@ PROGRAM ZBLAT3 INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS - PARAMETER ( NSUBS = 9 ) + PARAMETER ( NSUBS = 10 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) @@ -128,7 +129,8 @@ PROGRAM ZBLAT3 LOGICAL LZE EXTERNAL DDIFF, LZE * .. External Subroutines .. - EXTERNAL ZCHK1, ZCHK2, ZCHK3, ZCHK4, ZCHK5, ZCHKE, ZMMCH + EXTERNAL ZCHK1, ZCHK2, ZCHK3, ZCHK4, ZCHK5, ZCHK6 + EXTERNAL ZCHKE, ZMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. @@ -141,7 +143,7 @@ PROGRAM ZBLAT3 * .. Data statements .. DATA SNAMES/'ZGEMM ', 'ZHEMM ', 'ZSYMM ', 'ZTRMM ', $ 'ZTRSM ', 'ZHERK ', 'ZSYRK ', 'ZHER2K', - $ 'ZSYR2K'/ + $ 'ZSYR2K', 'ZGEMMT'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. @@ -319,7 +321,7 @@ PROGRAM ZBLAT3 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, - $ 180, 180 )ISNUM + $ 180, 180, 185 )ISNUM * Test ZGEMM, 01. 140 CALL ZCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, @@ -348,6 +350,13 @@ PROGRAM ZBLAT3 $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 +* Test ZGEMMT, 01. + 185 CALL ZCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, + $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, + $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, + $ CC, CS, CT, G ) + GO TO 190 + * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 @@ -2008,7 +2017,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. - REAL ONE, TWO + DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. COMPLEX*16 ALPHA, BETA @@ -2038,7 +2047,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) RBETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, - $ 90 )ISNUM + $ 90, 100 )ISNUM 10 INFOT = 1 CALL ZGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2219,7 +2228,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 13 CALL ZGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 20 INFOT = 1 CALL ZHEMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2286,7 +2295,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 30 INFOT = 1 CALL ZSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2353,7 +2362,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 40 INFOT = 1 CALL ZTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2510,7 +2519,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 50 INFOT = 1 CALL ZTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2667,7 +2676,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 60 INFOT = 1 CALL ZHERK( '/', 'N', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2722,7 +2731,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 10 CALL ZHERK( 'L', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 70 INFOT = 1 CALL ZSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2777,7 +2786,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 10 CALL ZSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 80 INFOT = 1 CALL ZHER2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2844,7 +2853,7 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL ZHER2K( 'L', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) - GO TO 100 + GO TO 110 90 INFOT = 1 CALL ZSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) @@ -2911,8 +2920,186 @@ SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) INFOT = 12 CALL ZSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + GO TO 110 + 100 INFOT = 1 + CALL ZGEMMT( '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'N', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'N', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'T', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'T', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'T', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'C', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'C', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZGEMMT( '/', 'C', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 2 + CALL ZGEMMT( 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGEMMT( 'U', '/', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGEMMT( 'U', '/', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGEMMT( 'L', '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGEMMT( 'L', '/', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGEMMT( 'L', '/', 'T', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 3 + CALL ZGEMMT( 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGEMMT( 'U', 'C', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGEMMT( 'U', 'T', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'N', 'C', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'N', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'C', 'C', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'C', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'T', 'C', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGEMMT( 'U', 'T', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'N', 'C', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'N', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'C', 'C', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'C', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'T', 'C', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZGEMMT( 'U', 'T', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 8 + CALL ZGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'N', 'C', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'C', 'C', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'C', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'T', 'C', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGEMMT( 'U', 'T', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + + INFOT = 10 + CALL ZGEMMT( 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZGEMMT( 'U', 'C', 'N', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZGEMMT( 'U', 'T', 'N', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'N', 'C', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'N', 'T', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'C', 'C', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'C', 'T', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'T', 'C', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZGEMMT( 'U', 'T', 'T', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) + CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) + GO TO 110 + * - 100 IF( OK )THEN + 110 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT @@ -3496,3 +3683,498 @@ SUBROUTINE XERBLA( SRNAME, INFO ) * End of XERBLA * END + + + + SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, + $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, + $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) +* +* Tests ZGEMMT. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0, 0.0 ) ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) +* .. Scalar Arguments .. + DOUBLE PRECISION EPS, THRESH + INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA + LOGICAL FATAL, REWI, TRACE + CHARACTER*6 SNAME +* .. Array Arguments .. + COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), + $ AS( NMAX*NMAX ), B( NMAX, NMAX ), + $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), + $ C( NMAX, NMAX ), CC( NMAX*NMAX ), + $ CS( NMAX*NMAX ), CT( NMAX ) + DOUBLE PRECISION G( NMAX ) + INTEGER IDIM( NIDIM ) +* .. Local Scalars .. + COMPLEX*16 ALPHA, ALS, BETA, BLS + DOUBLE PRECISION ERR, ERRMAX + INTEGER I, IA, IB, ICA, ICB, IK, IN, K, KS, LAA, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, + $ MA, MB, N, NA, NARGS, NB, NC, NS, IS + LOGICAL NULL, RESET, SAME, TRANA, TRANB + CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS + CHARACTER*3 ICH + CHARACTER*2 ISHAPE +* .. Local Arrays .. + LOGICAL ISAME( 13 ) +* .. External Functions .. + LOGICAL LZE, LZERES + EXTERNAL LZE, LZERES +* .. External Subroutines .. + EXTERNAL CGEMM, ZMAKE, CMMCH +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Scalars in Common .. + INTEGER INFOT, NOUTC + LOGICAL LERR, OK +* .. Common blocks .. + COMMON /INFOC/INFOT, NOUTC, OK, LERR +* .. Data statements .. + DATA ICH/'NTC'/ + DATA ISHAPE/'UL'/ + +* .. Executable Statements .. +* + NARGS = 13 + NC = 0 + RESET = .TRUE. + ERRMAX = RZERO +* + DO 100 IN = 1, NIDIM + N = IDIM( IN ) +* Set LDC to 1 more than minimum value if room. + LDC = N + IF( LDC.LT.NMAX ) + $ LDC = LDC + 1 +* Skip tests if not enough room. + IF( LDC.GT.NMAX ) + $ GO TO 100 + LCC = LDC*N + NULL = N.LE.0 +* + DO 90 IK = 1, NIDIM + K = IDIM( IK ) +* + DO 80 ICA = 1, 3 + TRANSA = ICH( ICA: ICA ) + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' +* + IF( TRANA )THEN + MA = K + NA = N + ELSE + MA = N + NA = K + END IF +* Set LDA to 1 more than minimum value if room. + LDA = MA + IF( LDA.LT.NMAX ) + $ LDA = LDA + 1 +* Skip tests if not enough room. + IF( LDA.GT.NMAX ) + $ GO TO 80 + LAA = LDA*NA +* +* Generate the matrix A. +* + CALL ZMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, + $ RESET, ZERO ) +* + DO 70 ICB = 1, 3 + TRANSB = ICH( ICB: ICB ) + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* + IF( TRANB )THEN + MB = N + NB = K + ELSE + MB = K + NB = N + END IF +* Set LDB to 1 more than minimum value if room. + LDB = MB + IF( LDB.LT.NMAX ) + $ LDB = LDB + 1 +* Skip tests if not enough room. + IF( LDB.GT.NMAX ) + $ GO TO 70 + LBB = LDB*NB +* +* Generate the matrix B. +* + CALL ZMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, + $ LDB, RESET, ZERO ) +* + DO 60 IA = 1, NALF + ALPHA = ALF( IA ) +* + DO 50 IB = 1, NBET + BETA = BET( IB ) + DO 45 IS = 1, 2 + UPLO = ISHAPE( IS: IS ) + +* +* Generate the matrix C. +* + CALL ZMAKE( 'GE', UPLO, ' ', M, N, C, NMAX, + $ CC, LDC, RESET, ZERO ) +* + NC = NC + 1 +* +* Save every datum before calling the +* subroutine. +* + UPLOS = UPLO + TRANAS = TRANSA + TRANBS = TRANSB + NS = N + KS = K + ALS = ALPHA + DO 10 I = 1, LAA + AS( I ) = AA( I ) + 10 CONTINUE + LDAS = LDA + DO 20 I = 1, LBB + BS( I ) = BB( I ) + 20 CONTINUE + LDBS = LDB + BLS = BETA + DO 30 I = 1, LCC + CS( I ) = CC( I ) + 30 CONTINUE + LDCS = LDC +* +* Call the subroutine. +* + IF( TRACE ) + $ WRITE( NTRA, FMT = 9995 )NC, SNAME, UPLO + $ TRANSA, TRANSB, N, K, ALPHA, LDA, LDB, + $ BETA, LDC + IF( REWI ) + $ REWIND NTRA + CALL ZGEMMT( UPLO, TRANSA, TRANSB, N, K, + $ ALPHA, AA, LDA, BB, LDB, BETA, + $ CC, LDC ) +* +* Check if error-exit was taken incorrectly. +* + IF( .NOT.OK )THEN + WRITE( NOUT, FMT = 9994 ) + FATAL = .TRUE. + GO TO 120 + END IF +* +* See what data changed inside subroutines. +* + ISAME( 1 ) = UPLOS.EQ.UPLO + ISAME( 2 ) = TRANSA.EQ.TRANAS + ISAME( 3 ) = TRANSB.EQ.TRANBS + ISAME( 4 ) = NS.EQ.N + ISAME( 5 ) = KS.EQ.K + ISAME( 6 ) = ALS.EQ.ALPHA + ISAME( 7 ) = LZE( AS, AA, LAA ) + ISAME( 8 ) = LDAS.EQ.LDA + ISAME( 9 ) = LZE( BS, BB, LBB ) + ISAME( 10 ) = LDBS.EQ.LDB + ISAME( 11 ) = BLS.EQ.BETA + IF( NULL )THEN + ISAME( 12 ) = LZE( CS, CC, LCC ) + ELSE + ISAME( 12 ) = LZERES( 'GE', ' ', M, N, CS, + $ CC, LDC ) + END IF + ISAME( 13 ) = LDCS.EQ.LDC +* +* If data was incorrectly changed, report +* and return. +* + SAME = .TRUE. + DO 40 I = 1, NARGS + SAME = SAME.AND.ISAME( I ) + IF( .NOT.ISAME( I ) ) + $ WRITE( NOUT, FMT = 9998 )I + 40 CONTINUE + IF( .NOT.SAME )THEN + FATAL = .TRUE. + GO TO 120 + END IF +* + IF( .NOT.NULL )THEN +* +* Check the result. +* + CALL ZMMTCH( UPLO, TRANSA, TRANSB, N, + $ K, ALPHA, A, NMAX, B, NMAX, + $ BETA, C, NMAX, CT, G, CC, LDC, + $ EPS, ERR, FATAL, NOUT, .TRUE.) + ERRMAX = MAX( ERRMAX, ERR ) +* If got really bad answer, report and +* return. + IF( FATAL ) + $ GO TO 120 + END IF + 45 CONTINUE +* + 50 CONTINUE +* + 60 CONTINUE +* + 70 CONTINUE +* + 80 CONTINUE +* + 90 CONTINUE +* + 100 CONTINUE +* +* +* Report result. +* + IF( ERRMAX.LT.THRESH )THEN + WRITE( NOUT, FMT = 9999 )SNAME, NC + ELSE + WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX + END IF + GO TO 130 +* + 120 CONTINUE + WRITE( NOUT, FMT = 9996 )SNAME + WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, + $ ALPHA, LDA, LDB, BETA, LDC +* + 130 CONTINUE + RETURN +* + 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', + $ 'S)' ) + 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', + $ 'ANGED INCORRECTLY *******' ) + 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', + $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, + $ ' - SUSPECT *******' ) + 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) + 9995 FORMAT( 1X, I6, ': ', A6, '(''',A1, ''',''',A1, ''',''', A1,''',', + $ 2( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, + $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) + 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', + $ '******' ) +* +* End of ZCHK6 +* + END + + SUBROUTINE ZMMTCH( UPLO, TRANSA, TRANSB, N, KK, ALPHA, A, LDA, + $ B, LDB, BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, + $ FATAL, NOUT, MV ) + IMPLICIT NONE +* +* Checks the results of the computational tests. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0, 0.0 ) ) + DOUBLE PRECISION RZERO, RONE + PARAMETER ( RZERO = 0.0D0, RONE = 1.0D0 ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + DOUBLE PRECISION EPS, ERR + INTEGER KK, LDA, LDB, LDC, LDCC, N, NOUT + LOGICAL FATAL, MV + CHARACTER*1 TRANSA, TRANSB, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ CC( LDCC, * ), CT( * ) + DOUBLE PRECISION G( * ) +* .. Local Scalars .. + COMPLEX*16 CL + DOUBLE PRECISION ERRI + INTEGER I, J, K, ISTART, ISTOP + LOGICAL CTRANA, CTRANB, TRANA, TRANB, UPPER +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. Statement Function definitions .. + ABS1( CL ) = ABS( DBLE( CL ) ) + ABS( DIMAG( CL ) ) +* .. Executable Statements .. + UPPER = UPLO.EQ.'U' + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' + CTRANA = TRANSA.EQ.'C' + CTRANB = TRANSB.EQ.'C' +* +* Compute expected result, one column at a time, in CT using data +* in A, B and C. +* Compute gauges in G. +* + ISTART = 1 + ISTOP = 1 + + DO 220 J = 1, N +* + IF ( UPPER ) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + + DO 10 I = ISTART, ISTOP + CT( I ) = ZERO + G( I ) = RZERO + 10 CONTINUE + IF( .NOT.TRANA.AND..NOT.TRANB )THEN + DO 30 K = 1, KK + DO 20 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( K, J ) + G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) + 20 CONTINUE + 30 CONTINUE + ELSE IF( TRANA.AND..NOT.TRANB )THEN + IF( CTRANA )THEN + DO 50 K = 1, KK + DO 40 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( K, J ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( K, J ) ) + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, KK + DO 60 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( K, J ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( K, J ) ) + 60 CONTINUE + 70 CONTINUE + END IF + ELSE IF( .NOT.TRANA.AND.TRANB )THEN + IF( CTRANB )THEN + DO 90 K = 1, KK + DO 80 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( I, K ) )* + $ ABS1( B( J, K ) ) + 80 CONTINUE + 90 CONTINUE + ELSE + DO 110 K = 1, KK + DO 100 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( J, K ) + G( I ) = G( I ) + ABS1( A( I, K ) )* + $ ABS1( B( J, K ) ) + 100 CONTINUE + 110 CONTINUE + END IF + ELSE IF( TRANA.AND.TRANB )THEN + IF( CTRANA )THEN + IF( CTRANB )THEN + DO 130 K = 1, KK + DO 120 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )* + $ CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 120 CONTINUE + 130 CONTINUE + ELSE + DO 150 K = 1, KK + DO 140 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( J, K ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE + IF( CTRANB )THEN + DO 170 K = 1, KK + DO 160 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 190 K = 1, KK + DO 180 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( J, K ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 180 CONTINUE + 190 CONTINUE + END IF + END IF + END IF + DO 200 I = ISTART, ISTOP + CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) + G( I ) = ABS1( ALPHA )*G( I ) + + $ ABS1( BETA )*ABS1( C( I, J ) ) + 200 CONTINUE +* +* Compute the error ratio for this result. +* + ERR = ZERO + DO 210 I = ISTART, ISTOP + ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS + IF( G( I ).NE.RZERO ) + $ ERRI = ERRI/G( I ) + ERR = MAX( ERR, ERRI ) + IF( ERR*SQRT( EPS ).GE.RONE ) + $ GO TO 230 + 210 CONTINUE +* + 220 CONTINUE +* +* If the loop completes, all results are at least half accurate. + GO TO 250 +* +* Report fatal error. +* + 230 FATAL = .TRUE. + WRITE( NOUT, FMT = 9999 ) + DO 240 I = ISTART, ISTOP + IF( MV )THEN + WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) + ELSE + WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) + END IF + 240 CONTINUE + IF( N.GT.1 ) + $ WRITE( NOUT, FMT = 9997 )J +* + 250 CONTINUE + RETURN +* + 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', + $ 'F ACCURATE *******', /' EXPECTED RE', + $ 'SULT COMPUTED RESULT' ) + 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) + 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) +* +* End of ZMMTCH +* + END + diff --git a/BLAS/TESTING/zblat3.in b/BLAS/TESTING/zblat3.in index a3618b0f6d..ed6e9dd601 100644 --- a/BLAS/TESTING/zblat3.in +++ b/BLAS/TESTING/zblat3.in @@ -21,3 +21,4 @@ ZHERK T PUT F FOR NO TEST. SAME COLUMNS. ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +ZGEMMT T PUT F FOR NO TEST. SAME COLUMNS. From d0870915d43a2b5a75018478899f054630ef25d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Thu, 20 Jul 2023 15:31:30 +0200 Subject: [PATCH 02/10] CBLAS routines for xGEMMT added --- CBLAS/include/cblas.h | 21 +++++++ CBLAS/include/cblas_64.h | 22 +++++++ CBLAS/include/cblas_f77.h | 36 +++++++++++ CBLAS/include/cblas_test.h | 4 ++ CBLAS/src/CMakeLists.txt | 8 +-- CBLAS/src/Makefile | 8 +-- CBLAS/src/cblas_cgemm.c | 2 +- CBLAS/src/cblas_cgemmt.c | 122 ++++++++++++++++++++++++++++++++++++ CBLAS/src/cblas_dgemm.c | 2 +- CBLAS/src/cblas_dgemmt.c | 121 ++++++++++++++++++++++++++++++++++++ CBLAS/src/cblas_sgemm.c | 2 +- CBLAS/src/cblas_sgemmt.c | 123 +++++++++++++++++++++++++++++++++++++ CBLAS/src/cblas_zgemm.c | 2 +- CBLAS/src/cblas_zgemmt.c | 121 ++++++++++++++++++++++++++++++++++++ 14 files changed, 582 insertions(+), 12 deletions(-) create mode 100644 CBLAS/src/cblas_cgemmt.c create mode 100644 CBLAS/src/cblas_dgemmt.c create mode 100644 CBLAS/src/cblas_sgemmt.c create mode 100644 CBLAS/src/cblas_zgemmt.c diff --git a/CBLAS/include/cblas.h b/CBLAS/include/cblas.h index 171ff1d609..dfab386bb6 100644 --- a/CBLAS/include/cblas.h +++ b/CBLAS/include/cblas.h @@ -472,6 +472,12 @@ void cblas_sgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const CBLAS_INT K, const float alpha, const float *A, const CBLAS_INT lda, const float *B, const CBLAS_INT ldb, const float beta, float *C, const CBLAS_INT ldc); +void cblas_sgemmt(CBLAS_LAYOUT layout,CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const float alpha, const float *A, + const CBLAS_INT lda, const float *B, const CBLAS_INT ldb, + const float beta, float *C, const CBLAS_INT ldc); + void cblas_ssymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const CBLAS_INT M, const CBLAS_INT N, const float alpha, const float *A, const CBLAS_INT lda, @@ -502,6 +508,11 @@ void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT lda, const double *B, const CBLAS_INT ldb, const double beta, double *C, const CBLAS_INT ldc); +void cblas_dgemmt(CBLAS_LAYOUT layout,CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const double alpha, const double *A, + const CBLAS_INT lda, const double *B, const CBLAS_INT ldb, + const double beta, double *C, const CBLAS_INT ldc); void cblas_dsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const CBLAS_INT M, const CBLAS_INT N, const double alpha, const double *A, const CBLAS_INT lda, @@ -532,6 +543,11 @@ void cblas_cgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const CBLAS_INT K, const void *alpha, const void *A, const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, const void *beta, void *C, const CBLAS_INT ldc); +void cblas_cgemmt(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const CBLAS_INT M, const CBLAS_INT N, + const CBLAS_INT K, const void *alpha, const void *A, + const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, + const void *beta, void *C, const CBLAS_INT ldc); void cblas_csymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const CBLAS_INT M, const CBLAS_INT N, const void *alpha, const void *A, const CBLAS_INT lda, @@ -562,6 +578,11 @@ void cblas_zgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const CBLAS_INT K, const void *alpha, const void *A, const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, const void *beta, void *C, const CBLAS_INT ldc); +void cblas_zgemmt(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const void *alpha, const void *A, + const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, + const void *beta, void *C, const CBLAS_INT ldc); void cblas_zsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const CBLAS_INT M, const CBLAS_INT N, const void *alpha, const void *A, const CBLAS_INT lda, diff --git a/CBLAS/include/cblas_64.h b/CBLAS/include/cblas_64.h index 3901ecf446..aa4125b9bf 100644 --- a/CBLAS/include/cblas_64.h +++ b/CBLAS/include/cblas_64.h @@ -423,6 +423,12 @@ void cblas_sgemm_64(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const int64_t K, const float alpha, const float *A, const int64_t lda, const float *B, const int64_t ldb, const float beta, float *C, const int64_t ldc); +void cblas_sgemmt_64(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int64_t N, + const int64_t K, const float alpha, const float *A, + const int64_t lda, const float *B, const int64_t ldb, + const float beta, float *C, const int64_t ldc); + void cblas_ssymm_64(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const int64_t M, const int64_t N, const float alpha, const float *A, const int64_t lda, @@ -453,6 +459,11 @@ void cblas_dgemm_64(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const int64_t K, const double alpha, const double *A, const int64_t lda, const double *B, const int64_t ldb, const double beta, double *C, const int64_t ldc); +void cblas_dgemmt_64(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int64_t N, + const int64_t K, const double alpha, const double *A, + const int64_t lda, const double *B, const int64_t ldb, + const double beta, double *C, const int64_t ldc); void cblas_dsymm_64(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const int64_t M, const int64_t N, const double alpha, const double *A, const int64_t lda, @@ -483,6 +494,12 @@ void cblas_cgemm_64(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const int64_t K, const void *alpha, const void *A, const int64_t lda, const void *B, const int64_t ldb, const void *beta, void *C, const int64_t ldc); +void cblas_cgemmt_64(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int64_t N, + const int64_t K, const void *alpha, const void *A, + const int64_t lda, const void *B, const int64_t ldb, + const void *beta, void *C, const int64_t ldc); + void cblas_csymm_64(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const int64_t M, const int64_t N, const void *alpha, const void *A, const int64_t lda, @@ -513,6 +530,11 @@ void cblas_zgemm_64(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const int64_t K, const void *alpha, const void *A, const int64_t lda, const void *B, const int64_t ldb, const void *beta, void *C, const int64_t ldc); +void cblas_zgemmt_64(CBLAS_LAYOUT layout,CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int64_t N, + const int64_t K, const void *alpha, const void *A, + const int64_t lda, const void *B, const int64_t ldb, + const void *beta, void *C, const int64_t ldc); void cblas_zsymm_64(CBLAS_LAYOUT layout, CBLAS_SIDE Side, CBLAS_UPLO Uplo, const int64_t M, const int64_t N, const void *alpha, const void *A, const int64_t lda, diff --git a/CBLAS/include/cblas_f77.h b/CBLAS/include/cblas_f77.h index b9a1172e29..fb5486f205 100644 --- a/CBLAS/include/cblas_f77.h +++ b/CBLAS/include/cblas_f77.h @@ -193,24 +193,28 @@ #define F77_zherk_base F77_GLOBAL_SUFFIX(zherk,ZHERK) #define F77_zher2k_base F77_GLOBAL_SUFFIX(zher2k,ZHER2K) #define F77_sgemm_base F77_GLOBAL_SUFFIX(sgemm,SGEMM) +#define F77_sgemmt_base F77_GLOBAL_SUFFIX(sgemmt,SGEMMT) #define F77_ssymm_base F77_GLOBAL_SUFFIX(ssymm,SSYMM) #define F77_ssyrk_base F77_GLOBAL_SUFFIX(ssyrk,SSYRK) #define F77_ssyr2k_base F77_GLOBAL_SUFFIX(ssyr2k,SSYR2K) #define F77_strmm_base F77_GLOBAL_SUFFIX(strmm,STRMM) #define F77_strsm_base F77_GLOBAL_SUFFIX(strsm,STRSM) #define F77_dgemm_base F77_GLOBAL_SUFFIX(dgemm,DGEMM) +#define F77_dgemmt_base F77_GLOBAL_SUFFIX(dgemmt,DGEMMT) #define F77_dsymm_base F77_GLOBAL_SUFFIX(dsymm,DSYMM) #define F77_dsyrk_base F77_GLOBAL_SUFFIX(dsyrk,DSYRK) #define F77_dsyr2k_base F77_GLOBAL_SUFFIX(dsyr2k,DSYR2K) #define F77_dtrmm_base F77_GLOBAL_SUFFIX(dtrmm,DTRMM) #define F77_dtrsm_base F77_GLOBAL_SUFFIX(dtrsm,DTRSM) #define F77_cgemm_base F77_GLOBAL_SUFFIX(cgemm,CGEMM) +#define F77_cgemmt_base F77_GLOBAL_SUFFIX(cgemmt,CGEMMT) #define F77_csymm_base F77_GLOBAL_SUFFIX(csymm,CSYMM) #define F77_csyrk_base F77_GLOBAL_SUFFIX(csyrk,CSYRK) #define F77_csyr2k_base F77_GLOBAL_SUFFIX(csyr2k,CSYR2K) #define F77_ctrmm_base F77_GLOBAL_SUFFIX(ctrmm,CTRMM) #define F77_ctrsm_base F77_GLOBAL_SUFFIX(ctrsm,CTRSM) #define F77_zgemm_base F77_GLOBAL_SUFFIX(zgemm,ZGEMM) +#define F77_zgemmt_base F77_GLOBAL_SUFFIX(zgemmt,ZGEMMT) #define F77_zsymm_base F77_GLOBAL_SUFFIX(zsymm,ZSYMM) #define F77_zsyrk_base F77_GLOBAL_SUFFIX(zsyrk,ZSYRK) #define F77_zsyr2k_base F77_GLOBAL_SUFFIX(zsyr2k,ZSYR2K) @@ -385,6 +389,7 @@ /* Single Precision */ #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__, 1, 1) + #define F77_sgemmt(...) F77_sgemmt_base(__VA_ARGS__, 1, 1, 1) #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__, 1, 1) #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__, 1, 1) #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__, 1, 1) @@ -394,6 +399,7 @@ /* Double Precision */ #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__, 1, 1) + #define F77_dgemmt(...) F77_dgemmt_base(__VA_ARGS__, 1, 1, 1) #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__, 1, 1) #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__, 1, 1) #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__, 1, 1) @@ -403,6 +409,7 @@ /* Single Complex Precision */ #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__, 1, 1) + #define F77_cgemmt(...) F77_cgemmt_base(__VA_ARGS__, 1, 1, 1) #define F77_csymm(...) F77_csymm_base(__VA_ARGS__, 1, 1) #define F77_chemm(...) F77_chemm_base(__VA_ARGS__, 1, 1) #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__, 1, 1) @@ -415,6 +422,7 @@ /* Double Complex Precision */ #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__, 1, 1) + #define F77_zgemmt(...) F77_zgemmt_base(__VA_ARGS__, 1, 1, 1) #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__, 1, 1) #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__, 1, 1) #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__, 1, 1) @@ -509,6 +517,7 @@ /* Single Precision */ #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__) + #define F77_sgemmt(...) F77_sgemmt_base(__VA_ARGS__) #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__) #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__) #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__) @@ -518,6 +527,7 @@ /* Double Precision */ #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__) + #define F77_dgemmt(...) F77_dgemmt_base(__VA_ARGS__) #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__) #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__) #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__) @@ -527,6 +537,7 @@ /* Single Complex Precision */ #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__) + #define F77_cgemmt(...) F77_cgemmt_base(__VA_ARGS__) #define F77_csymm(...) F77_csymm_base(__VA_ARGS__) #define F77_chemm(...) F77_chemm_base(__VA_ARGS__) #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__) @@ -539,6 +550,7 @@ /* Double Complex Precision */ #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__) + #define F77_zgemmt(...) F77_zgemmt_base(__VA_ARGS__) #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__) #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__) #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__) @@ -977,6 +989,12 @@ void F77_sgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float * , size_t, size_t #endif ); +void F77_sgemmt_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); + void F77_ssymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT #ifdef BLAS_FORTRAN_STRLEN_END , size_t, size_t @@ -1010,6 +1028,12 @@ void F77_dgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double , size_t, size_t #endif ); +void F77_dgemmt_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); + void F77_dsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT #ifdef BLAS_FORTRAN_STRLEN_END , size_t, size_t @@ -1043,6 +1067,12 @@ void F77_cgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float * , size_t, size_t #endif ); +void F77_cgemmt_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); + void F77_csymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT #ifdef BLAS_FORTRAN_STRLEN_END , size_t, size_t @@ -1091,6 +1121,12 @@ void F77_zgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double , size_t, size_t #endif ); +void F77_zgemmt_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); + void F77_zsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT #ifdef BLAS_FORTRAN_STRLEN_END , size_t, size_t diff --git a/CBLAS/include/cblas_test.h b/CBLAS/include/cblas_test.h index f8174ba43c..b232923f31 100644 --- a/CBLAS/include/cblas_test.h +++ b/CBLAS/include/cblas_test.h @@ -158,24 +158,28 @@ typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX; #define F77_zherk F77_GLOBAL(czherk,CZHERK) #define F77_zher2k F77_GLOBAL(czher2k,CZHER2K) #define F77_sgemm F77_GLOBAL(csgemm,CSGEMM) +#define F77_sgemmt F77_GLOBAL(csgemmt,CSGEMMT) #define F77_ssymm F77_GLOBAL(cssymm,CSSYMM) #define F77_ssyrk F77_GLOBAL(cssyrk,CSSYRK) #define F77_ssyr2k F77_GLOBAL(cssyr2k,CSSYR2K) #define F77_strmm F77_GLOBAL(cstrmm,CSTRMM) #define F77_strsm F77_GLOBAL(cstrsm,CSTRSM) #define F77_dgemm F77_GLOBAL(cdgemm,CDGEMM) +#define F77_dgemmt F77_GLOBAL(cdgemmt,CDGEMMT) #define F77_dsymm F77_GLOBAL(cdsymm,CDSYMM) #define F77_dsyrk F77_GLOBAL(cdsyrk,CDSYRK) #define F77_dsyr2k F77_GLOBAL(cdsyr2k,CDSYR2K) #define F77_dtrmm F77_GLOBAL(cdtrmm,CDTRMM) #define F77_dtrsm F77_GLOBAL(cdtrsm,CDTRSM) #define F77_cgemm F77_GLOBAL(ccgemm,CCGEMM) +#define F77_cgemmt F77_GLOBAL(ccgemmt,CCGEMMT) #define F77_csymm F77_GLOBAL(ccsymm,CCSYMM) #define F77_csyrk F77_GLOBAL(ccsyrk,CCSYRK) #define F77_csyr2k F77_GLOBAL(ccsyr2k,CCSYR2K) #define F77_ctrmm F77_GLOBAL(cctrmm,CCTRMM) #define F77_ctrsm F77_GLOBAL(cctrsm,CCTRSM) #define F77_zgemm F77_GLOBAL(czgemm,CZGEMM) +#define F77_zgemmt F77_GLOBAL(czgemmt,CZGEMMT) #define F77_zsymm F77_GLOBAL(czsymm,CZSYMM) #define F77_zsyrk F77_GLOBAL(czsyrk,CZSYRK) #define F77_zsyr2k F77_GLOBAL(czsyr2k,CZSYR2K) diff --git a/CBLAS/src/CMakeLists.txt b/CBLAS/src/CMakeLists.txt index 0e4f740cc8..8aae9decd4 100644 --- a/CBLAS/src/CMakeLists.txt +++ b/CBLAS/src/CMakeLists.txt @@ -85,21 +85,21 @@ set(ZLEV2 cblas_zgemv.c cblas_zgbmv.c cblas_zhemv.c cblas_zhbmv.c cblas_zhpmv.c # Files for level 3 single precision real set(SLEV3 cblas_sgemm.c cblas_ssymm.c cblas_ssyrk.c cblas_ssyr2k.c cblas_strmm.c - cblas_strsm.c) + cblas_strsm.c cblas_sgemmt.c) # Files for level 3 double precision real set(DLEV3 cblas_dgemm.c cblas_dsymm.c cblas_dsyrk.c cblas_dsyr2k.c cblas_dtrmm.c - cblas_dtrsm.c) + cblas_dtrsm.c cblas_cgemmt.c) # Files for level 3 single precision complex set(CLEV3 cblas_cgemm.c cblas_csymm.c cblas_chemm.c cblas_cherk.c cblas_cher2k.c cblas_ctrmm.c cblas_ctrsm.c cblas_csyrk.c - cblas_csyr2k.c) + cblas_csyr2k.c cblas_cgemmt.c) # Files for level 3 double precision complex set(ZLEV3 cblas_zgemm.c cblas_zsymm.c cblas_zhemm.c cblas_zherk.c cblas_zher2k.c cblas_ztrmm.c cblas_ztrsm.c cblas_zsyrk.c - cblas_zsyr2k.c) + cblas_zsyr2k.c cblas_zgemmt.c) set(SOURCES) diff --git a/CBLAS/src/Makefile b/CBLAS/src/Makefile index a455cd66be..ba0b63a487 100644 --- a/CBLAS/src/Makefile +++ b/CBLAS/src/Makefile @@ -137,21 +137,21 @@ zlib2: $(zlev2) $(errhand) # Files for level 3 single precision real slev3 = cblas_sgemm.o cblas_ssymm.o cblas_ssyrk.o cblas_ssyr2k.o cblas_strmm.o \ - cblas_strsm.o + cblas_strsm.o cblas_sgemmt.o # Files for level 3 double precision real dlev3 = cblas_dgemm.o cblas_dsymm.o cblas_dsyrk.o cblas_dsyr2k.o cblas_dtrmm.o \ - cblas_dtrsm.o + cblas_dtrsm.o cblas_dgemmt.o # Files for level 3 single precision complex clev3 = cblas_cgemm.o cblas_csymm.o cblas_chemm.o cblas_cherk.o \ cblas_cher2k.o cblas_ctrmm.o cblas_ctrsm.o cblas_csyrk.o \ - cblas_csyr2k.o + cblas_csyr2k.o cblas_cgemmt.o # Files for level 3 double precision complex zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \ - cblas_zsyr2k.o + cblas_zsyr2k.o cblas_zgemmt.o .PHONY: slib3 dlib3 clib3 zlib3 # Single precision real diff --git a/CBLAS/src/cblas_cgemm.c b/CBLAS/src/cblas_cgemm.c index fe4b599a19..5950ed1f8c 100644 --- a/CBLAS/src/cblas_cgemm.c +++ b/CBLAS/src/cblas_cgemm.c @@ -89,7 +89,7 @@ void API_SUFFIX(cblas_cgemm)(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE Tr else if ( TransB == CblasNoTrans ) TA='N'; else { - API_SUFFIX(cblas_xerbla)(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); + API_SUFFIX(cblas_xerbla)(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; diff --git a/CBLAS/src/cblas_cgemmt.c b/CBLAS/src/cblas_cgemmt.c new file mode 100644 index 0000000000..4d63dd284d --- /dev/null +++ b/CBLAS/src/cblas_cgemmt.c @@ -0,0 +1,122 @@ +/* + * + * cblas_cgemm.c + * This program is a C interface to cgemm. + * Written by Keita Teranishi + * 4/8/1998 + * + */ + +#include "cblas.h" +#include "cblas_f77.h" +void API_SUFFIX(cblas_cgemm_t)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, + const CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const void *alpha, const void *A, + const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, + const void *beta, void *C, const CBLAS_INT ldc) +{ + char TA, TB; + char UL; +#ifdef F77_CHAR + F77_CHAR F77_TA, F77_TB, F77_UL; +#else + #define F77_TA &TA + #define F77_TB &TB + #define F77_UL &UL +#endif + +#ifdef F77_INT + F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; + F77_INT F77_ldc=ldc; +#else + #define F77_N N + #define F77_K K + #define F77_lda lda + #define F77_ldb ldb + #define F77_ldc ldc +#endif + + extern int CBLAS_CallFromC; + extern int RowMajorStrg; + RowMajorStrg = 0; + CBLAS_CallFromC = 1; + + if ( Uplo == CblasUpper ) UL = 'U'; + else if (Uplo == CblasLower) UL= 'L'; + else { + API_SUFFIX(cblas_xerbla)(2, "cblas_cgemmt", "Illegal Uplo setting, %d\n", Uplo); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + if( layout == CblasColMajor ) + { + if(TransA == CblasTrans) TA='T'; + else if ( TransA == CblasConjTrans ) TA='C'; + else if ( TransA == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_cgemmt", "Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + if(TransB == CblasTrans) TB='T'; + else if ( TransB == CblasConjTrans ) TB='C'; + else if ( TransB == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_cgemmt", "Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_cgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, alpha, A, + &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + } else if (layout == CblasRowMajor) + { + RowMajorStrg = 1; + if(TransA == CblasTrans) TB='T'; + else if ( TransA == CblasConjTrans ) TB='C'; + else if ( TransA == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_cgemmt", "Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + if(TransB == CblasTrans) TA='T'; + else if ( TransB == CblasConjTrans ) TA='C'; + else if ( TransB == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_cgemmt", "Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + + #endif + + F77_cgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, alpha, B, + &F77_ldb, A, &F77_lda, beta, C, &F77_ldc); + } + else API_SUFFIX(cblas_xerbla)(1, "cblas_cgemmt", "Illegal layout setting, %d\n", layout); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; +} diff --git a/CBLAS/src/cblas_dgemm.c b/CBLAS/src/cblas_dgemm.c index bee9aa8a68..c4ae0275c2 100644 --- a/CBLAS/src/cblas_dgemm.c +++ b/CBLAS/src/cblas_dgemm.c @@ -89,7 +89,7 @@ void API_SUFFIX(cblas_dgemm)(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE Tr else if ( TransB == CblasNoTrans ) TA='N'; else { - API_SUFFIX(cblas_xerbla)(2, "cblas_dgemm","Illegal TransB setting, %d\n", TransB); + API_SUFFIX(cblas_xerbla)(3, "cblas_dgemm","Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; diff --git a/CBLAS/src/cblas_dgemmt.c b/CBLAS/src/cblas_dgemmt.c new file mode 100644 index 0000000000..84242f5c83 --- /dev/null +++ b/CBLAS/src/cblas_dgemmt.c @@ -0,0 +1,121 @@ +/* + * + * cblas_dgemm.c + * This program is a C interface to dgemm. + * Written by Keita Teranishi + * 4/8/1998 + * + */ + +#include "cblas.h" +#include "cblas_f77.h" +void API_SUFFIX(cblas_dgemmt)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, + const CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const double alpha, const double *A, + const CBLAS_INT lda, const double *B, const CBLAS_INT ldb, + const double beta, double *C, const CBLAS_INT ldc) +{ + char TA, TB, UL; +#ifdef F77_CHAR + F77_CHAR F77_TA, F77_TB. F77_UL; +#else + #define F77_TA &TA + #define F77_TB &TB + #define F77_UL &UL +#endif + +#ifdef F77_INT + F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; + F77_INT F77_ldc=ldc; +#else + #define F77_N N + #define F77_K K + #define F77_lda lda + #define F77_ldb ldb + #define F77_ldc ldc +#endif + + extern int CBLAS_CallFromC; + extern int RowMajorStrg; + RowMajorStrg = 0; + CBLAS_CallFromC = 1; + + if ( Uplo == CblasUpper ) UL = 'U'; + else if (Uplo == CblasLower) UL= 'L'; + else { + API_SUFFIX(cblas_xerbla)(2, "cblas_dgemmt", "Illegal Uplo setting, %d\n", Uplo); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + + if( layout == CblasColMajor ) + { + if(TransA == CblasTrans) TA='T'; + else if ( TransA == CblasConjTrans ) TA='C'; + else if ( TransA == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_dgemmt","Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + if(TransB == CblasTrans) TB='T'; + else if ( TransB == CblasConjTrans ) TB='C'; + else if ( TransB == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_dgemmt","Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_dgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, &alpha, A, + &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); + } else if (layout == CblasRowMajor) + { + RowMajorStrg = 1; + if(TransA == CblasTrans) TB='T'; + else if ( TransA == CblasConjTrans ) TB='C'; + else if ( TransA == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_dgemmt","Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + if(TransB == CblasTrans) TA='T'; + else if ( TransB == CblasConjTrans ) TA='C'; + else if ( TransB == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_dgemmt","Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_dgemmt( F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, &alpha, B, + &F77_ldb, A, &F77_lda, &beta, C, &F77_ldc); + } + else API_SUFFIX(cblas_xerbla)(1, "cblas_dgemmt", "Illegal layout setting, %d\n", layout); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; +} diff --git a/CBLAS/src/cblas_sgemm.c b/CBLAS/src/cblas_sgemm.c index a7b21fb58b..26be2a8f0a 100644 --- a/CBLAS/src/cblas_sgemm.c +++ b/CBLAS/src/cblas_sgemm.c @@ -90,7 +90,7 @@ void API_SUFFIX(cblas_sgemm)(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE Tr else if ( TransB == CblasNoTrans ) TA='N'; else { - API_SUFFIX(cblas_xerbla)(2, "cblas_sgemm", + API_SUFFIX(cblas_xerbla)(3, "cblas_sgemm", "Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; diff --git a/CBLAS/src/cblas_sgemmt.c b/CBLAS/src/cblas_sgemmt.c new file mode 100644 index 0000000000..89024c8998 --- /dev/null +++ b/CBLAS/src/cblas_sgemmt.c @@ -0,0 +1,123 @@ +/* + * + * cblas_sgemm.c + * This program is a C interface to sgemm. + * Written by Keita Teranishi + * 4/8/1998 + * + */ + +#include "cblas.h" +#include "cblas_f77.h" +void API_SUFFIX(cblas_sgemmt)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, + const CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const float alpha, const float *A, + const CBLAS_INT lda, const float *B, const CBLAS_INT ldb, + const float beta, float *C, const CBLAS_INT ldc) +{ + char TA, TB, UL; +#ifdef F77_CHAR + F77_CHAR F77_TA, F77_TB, F77_UL; +#else + #define F77_TA &TA + #define F77_TB &TB + #define F77_UL &UL +#endif + +#ifdef F77_INT + F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; + F77_INT F77_ldc=ldc; +#else + #define F77_N N + #define F77_K K + #define F77_lda lda + #define F77_ldb ldb + #define F77_ldc ldc +#endif + + extern int CBLAS_CallFromC; + extern int RowMajorStrg; + RowMajorStrg = 0; + CBLAS_CallFromC = 1; + + if ( Uplo == CblasUpper ) UL = 'U'; + else if (Uplo == CblasLower) UL= 'L'; + else { + API_SUFFIX(cblas_xerbla)(2, "cblas_sgemmt", "Illegal Uplo setting, %d\n", Uplo); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + + if( layout == CblasColMajor ) + { + if(TransA == CblasTrans) TA='T'; + else if ( TransA == CblasConjTrans ) TA='C'; + else if ( TransA == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_sgemmt", + "Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + if(TransB == CblasTrans) TB='T'; + else if ( TransB == CblasConjTrans ) TB='C'; + else if ( TransB == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_sgemmt", + "Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_sgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); + } else if (layout == CblasRowMajor) + { + RowMajorStrg = 1; + if(TransA == CblasTrans) TB='T'; + else if ( TransA == CblasConjTrans ) TB='C'; + else if ( TransA == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_sgemmt", + "Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + if(TransB == CblasTrans) TA='T'; + else if ( TransB == CblasConjTrans ) TA='C'; + else if ( TransB == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_sgemmt", + "Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_sgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, &alpha, B, &F77_ldb, A, &F77_lda, &beta, C, &F77_ldc); + } else + API_SUFFIX(cblas_xerbla)(1, "cblas_sgemmt", + "Illegal layout setting, %d\n", layout); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; +} diff --git a/CBLAS/src/cblas_zgemm.c b/CBLAS/src/cblas_zgemm.c index 3aaf59abc7..9b3b66e568 100644 --- a/CBLAS/src/cblas_zgemm.c +++ b/CBLAS/src/cblas_zgemm.c @@ -89,7 +89,7 @@ void API_SUFFIX(cblas_zgemm)(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE Tr else if ( TransB == CblasNoTrans ) TA='N'; else { - API_SUFFIX(cblas_xerbla)(2, "cblas_zgemm","Illegal TransB setting, %d\n", TransB); + API_SUFFIX(cblas_xerbla)(3, "cblas_zgemm","Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; diff --git a/CBLAS/src/cblas_zgemmt.c b/CBLAS/src/cblas_zgemmt.c new file mode 100644 index 0000000000..1bfe59e33c --- /dev/null +++ b/CBLAS/src/cblas_zgemmt.c @@ -0,0 +1,121 @@ +/* + * + * cblas_zgemm.c + * This program is a C interface to zgemm. + * Written by Keita Teranishi + * 4/8/1998 + * + */ + +#include "cblas.h" +#include "cblas_f77.h" +void API_SUFFIX(cblas_zgemmt)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, + const CBLAS_TRANSPOSE TransB, const CBLAS_INT N, + const CBLAS_INT K, const void *alpha, const void *A, + const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, + const void *beta, void *C, const CBLAS_INT ldc) +{ + char TA, TB, UL; +#ifdef F77_CHAR + F77_CHAR F77_TA, F77_TB, F77_UL; +#else + #define F77_TA &TA + #define F77_TB &TB + #define F77_UL &UL +#endif + +#ifdef F77_INT + F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; + F77_INT F77_ldc=ldc; +#else + #define F77_N N + #define F77_K K + #define F77_lda lda + #define F77_ldb ldb + #define F77_ldc ldc +#endif + + extern int CBLAS_CallFromC; + extern int RowMajorStrg; + RowMajorStrg = 0; + CBLAS_CallFromC = 1; + + if ( Uplo == CblasUpper ) UL = 'U'; + else if (Uplo == CblasLower) UL= 'L'; + else { + API_SUFFIX(cblas_xerbla)(2, "cblas_zgemmt", "Illegal Uplo setting, %d\n", Uplo); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + + if( layout == CblasColMajor ) + { + if(TransA == CblasTrans) TA='T'; + else if ( TransA == CblasConjTrans ) TA='C'; + else if ( TransA == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_zgemmt","Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + if(TransB == CblasTrans) TB='T'; + else if ( TransB == CblasConjTrans ) TB='C'; + else if ( TransB == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_zgemmt","Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_zgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, alpha, A, + &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + } else if (layout == CblasRowMajor) + { + RowMajorStrg = 1; + if(TransA == CblasTrans) TB='T'; + else if ( TransA == CblasConjTrans ) TB='C'; + else if ( TransA == CblasNoTrans ) TB='N'; + else + { + API_SUFFIX(cblas_xerbla)(3, "cblas_zgemmt","Illegal TransA setting, %d\n", TransA); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + if(TransB == CblasTrans) TA='T'; + else if ( TransB == CblasConjTrans ) TA='C'; + else if ( TransB == CblasNoTrans ) TA='N'; + else + { + API_SUFFIX(cblas_xerbla)(4, "cblas_zgemmt","Illegal TransB setting, %d\n", TransB); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + #ifdef F77_CHAR + F77_TA = C2F_CHAR(&TA); + F77_TB = C2F_CHAR(&TB); + F77_UL = C2F_CHAR(&UL); + #endif + + F77_zgemmt(F77_UL, F77_TA, F77_TB, &F77_N, &F77_K, alpha, B, + &F77_ldb, A, &F77_lda, beta, C, &F77_ldc); + } + else API_SUFFIX(cblas_xerbla)(1, "cblas_zgemmt", "Illegal layout setting, %d\n", layout); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; +} From b0142220e7d0609f6904b9604b1e67bb4d927ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Fri, 21 Jul 2023 14:00:46 +0200 Subject: [PATCH 03/10] Tests for cblas_cgemmt --- CBLAS/include/cblas.h | 2 +- CBLAS/src/cblas_cgemm.c | 2 +- CBLAS/src/cblas_cgemmt.c | 29 +- CBLAS/testing/c_c3chke.c | 227 +++++++++++++++ CBLAS/testing/c_cblas3.c | 81 ++++++ CBLAS/testing/c_cblat3.f | 580 ++++++++++++++++++++++++++++++++++++++- CBLAS/testing/cin3 | 1 + 7 files changed, 901 insertions(+), 21 deletions(-) diff --git a/CBLAS/include/cblas.h b/CBLAS/include/cblas.h index dfab386bb6..c323e9e5aa 100644 --- a/CBLAS/include/cblas.h +++ b/CBLAS/include/cblas.h @@ -544,7 +544,7 @@ void cblas_cgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, const void *beta, void *C, const CBLAS_INT ldc); void cblas_cgemmt(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, - CBLAS_TRANSPOSE TransB, const CBLAS_INT M, const CBLAS_INT N, + CBLAS_TRANSPOSE TransB, const CBLAS_INT N, const CBLAS_INT K, const void *alpha, const void *A, const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, const void *beta, void *C, const CBLAS_INT ldc); diff --git a/CBLAS/src/cblas_cgemm.c b/CBLAS/src/cblas_cgemm.c index 5950ed1f8c..fe4b599a19 100644 --- a/CBLAS/src/cblas_cgemm.c +++ b/CBLAS/src/cblas_cgemm.c @@ -89,7 +89,7 @@ void API_SUFFIX(cblas_cgemm)(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE Tr else if ( TransB == CblasNoTrans ) TA='N'; else { - API_SUFFIX(cblas_xerbla)(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); + API_SUFFIX(cblas_xerbla)(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; diff --git a/CBLAS/src/cblas_cgemmt.c b/CBLAS/src/cblas_cgemmt.c index 4d63dd284d..2d2fae25e7 100644 --- a/CBLAS/src/cblas_cgemmt.c +++ b/CBLAS/src/cblas_cgemmt.c @@ -9,7 +9,7 @@ #include "cblas.h" #include "cblas_f77.h" -void API_SUFFIX(cblas_cgemm_t)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, +void API_SUFFIX(cblas_cgemmt)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const CBLAS_INT N, const CBLAS_INT K, const void *alpha, const void *A, const CBLAS_INT lda, const void *B, const CBLAS_INT ldb, @@ -41,17 +41,18 @@ void API_SUFFIX(cblas_cgemm_t)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, RowMajorStrg = 0; CBLAS_CallFromC = 1; - if ( Uplo == CblasUpper ) UL = 'U'; - else if (Uplo == CblasLower) UL= 'L'; - else { - API_SUFFIX(cblas_xerbla)(2, "cblas_cgemmt", "Illegal Uplo setting, %d\n", Uplo); - CBLAS_CallFromC = 0; - RowMajorStrg = 0; - return; - } if( layout == CblasColMajor ) { + if ( Uplo == CblasUpper ) UL = 'U'; + else if (Uplo == CblasLower) UL= 'L'; + else { + API_SUFFIX(cblas_xerbla)(2, "cblas_cgemmt", "Illegal Uplo setting, %d\n", Uplo); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + if(TransA == CblasTrans) TA='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; @@ -85,6 +86,16 @@ void API_SUFFIX(cblas_cgemm_t)(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, } else if (layout == CblasRowMajor) { RowMajorStrg = 1; + + if ( Uplo == CblasUpper ) UL = 'L'; + else if (Uplo == CblasLower) UL= 'U'; + else { + API_SUFFIX(cblas_xerbla)(2, "cblas_cgemmt", "Illegal Uplo setting, %d\n", Uplo); + CBLAS_CallFromC = 0; + RowMajorStrg = 0; + return; + } + if(TransA == CblasTrans) TB='T'; else if ( TransA == CblasConjTrans ) TB='C'; else if ( TransA == CblasNoTrans ) TB='N'; diff --git a/CBLAS/testing/c_c3chke.c b/CBLAS/testing/c_c3chke.c index b5bbc753cd..af13a82b8c 100644 --- a/CBLAS/testing/c_c3chke.c +++ b/CBLAS/testing/c_c3chke.c @@ -274,6 +274,233 @@ void F77_c3chke(char * rout) { cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ); chkxer(); + } else if (strncmp( sf,"cblas_cgemmt" ,12)==0) { + cblas_rout = "cblas_cgemmt" ; + + cblas_info = 1; + cblas_cgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 1; + cblas_cgemm( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 1; + cblas_cgemm( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 1; + cblas_cgemm( INVALID, CblasTrans, CblasTrans, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 2; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 2; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, INVALID, CblasTrans, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 3; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 3; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, INVALID, 0, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 9; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 2 ); + chkxer(); + cblas_info = 9; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 2 ); + chkxer(); + cblas_info = 9; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2, + ALPHA, A, 1, B, 2, BETA, C, 1 ); + chkxer(); + cblas_info = 9; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2, + ALPHA, A, 2, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0, + ALPHA, A, 2, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0, + ALPHA, A, 2, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = FALSE; + cblas_cgemm( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 4; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 5; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 6; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 9; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2, + ALPHA, A, 1, B, 1, BETA, C, 2 ); + chkxer(); + cblas_info = 9; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2, + ALPHA, A, 1, B, 2, BETA, C, 2 ); + chkxer(); + cblas_info = 9; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0, + ALPHA, A, 1, B, 2, BETA, C, 1 ); + chkxer(); + cblas_info = 9; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0, + ALPHA, A, 2, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2, + ALPHA, A, 2, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 11; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0, + ALPHA, A, 1, B, 2, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0, + ALPHA, A, 1, B, 2, BETA, C, 1 ); + chkxer(); + cblas_info = 14; RowMajorStrg = TRUE; + cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0, + ALPHA, A, 1, B, 1, BETA, C, 1 ); + chkxer(); } else if (strncmp( sf,"cblas_chemm" ,11)==0) { cblas_rout = "cblas_chemm" ; diff --git a/CBLAS/testing/c_cblas3.c b/CBLAS/testing/c_cblas3.c index f758dc9ebc..bac202b86e 100644 --- a/CBLAS/testing/c_cblas3.c +++ b/CBLAS/testing/c_cblas3.c @@ -87,6 +87,87 @@ void F77_cgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CB cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } + +void F77_cgemmt(CBLAS_INT *layout, char *uplop, char *transpa, char *transpb, CBLAS_INT *n, + CBLAS_INT *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { + + CBLAS_TEST_COMPLEX *A, *B, *C; + CBLAS_INT i,j,LDA, LDB, LDC; + CBLAS_TRANSPOSE transa, transb; + CBLAS_UPLO uplo; + + get_transpose_type(transpa, &transa); + get_transpose_type(transpb, &transb); + get_uplo_type(uplop, &uplo); + + if (*layout == TEST_ROW_MJR) { + if (transa == CblasNoTrans) { + LDA = *k+1; + A=(CBLAS_TEST_COMPLEX*)malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX)); + for( i=0; i<*n; i++ ) + for( j=0; j<*k; j++ ) { + A[i*LDA+j].real=a[j*(*lda)+i].real; + A[i*LDA+j].imag=a[j*(*lda)+i].imag; + } + } + else { + LDA = *n+1; + A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX)); + for( i=0; i<*k; i++ ) + for( j=0; j<*n; j++ ) { + A[i*LDA+j].real=a[j*(*lda)+i].real; + A[i*LDA+j].imag=a[j*(*lda)+i].imag; + } + } + + if (transb == CblasNoTrans) { + LDB = *n+1; + B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) ); + for( i=0; i<*k; i++ ) + for( j=0; j<*n; j++ ) { + B[i*LDB+j].real=b[j*(*ldb)+i].real; + B[i*LDB+j].imag=b[j*(*ldb)+i].imag; + } + } + else { + LDB = *k+1; + B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX)); + for( i=0; i<*n; i++ ) + for( j=0; j<*k; j++ ) { + B[i*LDB+j].real=b[j*(*ldb)+i].real; + B[i*LDB+j].imag=b[j*(*ldb)+i].imag; + } + } + + LDC = *n+1; + C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX)); + for( j=0; j<*n; j++ ) + for( i=0; i<*n; i++ ) { + C[i*LDC+j].real=c[j*(*ldc)+i].real; + C[i*LDC+j].imag=c[j*(*ldc)+i].imag; + } + cblas_cgemmt( CblasRowMajor, uplo, transa, transb, *n, *k, alpha, A, LDA, + B, LDB, beta, C, LDC ); + for( j=0; j<*n; j++ ) + for( i=0; i<*n; i++ ) { + c[j*(*ldc)+i].real=C[i*LDC+j].real; + c[j*(*ldc)+i].imag=C[i*LDC+j].imag; + } + free(A); + free(B); + free(C); + } + else if (*layout == TEST_COL_MJR) + cblas_cgemmt( CblasColMajor, uplo, transa, transb, *n, *k, alpha, a, *lda, + b, *ldb, beta, c, *ldc ); + else + cblas_cgemmt( UNDEFINED, uplo, transa, transb, *n, *k, alpha, a, *lda, + b, *ldb, beta, c, *ldc ); +} + + void F77_chemm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, diff --git a/CBLAS/testing/c_cblat3.f b/CBLAS/testing/c_cblat3.f index 94144b8750..eb4e1124ba 100644 --- a/CBLAS/testing/c_cblat3.f +++ b/CBLAS/testing/c_cblat3.f @@ -3,10 +3,10 @@ PROGRAM CBLAT3 * Test program for the COMPLEX Level 3 Blas. * * The program must be driven by a short data file. The first 13 records -* of the file are read using list-directed input, the last 9 records +* of the file are read using list-directed input, the last 10 records * are read using the format ( A12, L2 ). An annotated example of a data * file can be obtained by deleting the first 3 characters from the -* following 22 lines: +* following 23 lines: * 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE * -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) * F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. @@ -29,6 +29,7 @@ PROGRAM CBLAT3 * cblas_csyrk T PUT F FOR NO TEST. SAME COLUMNS. * cblas_cher2k T PUT F FOR NO TEST. SAME COLUMNS. * cblas_csyr2k T PUT F FOR NO TEST. SAME COLUMNS. +* cblas_cgemmt T PUT F FOR NO TEST. SAME COLUMNS. * * See: * @@ -49,7 +50,7 @@ PROGRAM CBLAT3 INTEGER NIN, NOUT PARAMETER ( NIN = 5, NOUT = 6 ) INTEGER NSUBS - PARAMETER ( NSUBS = 9 ) + PARAMETER ( NSUBS = 10 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO, RHALF, RONE @@ -83,7 +84,7 @@ PROGRAM CBLAT3 LOGICAL LCE EXTERNAL SDIFF, LCE * .. External Subroutines .. - EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CMMCH + EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CCHK6, CMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. @@ -97,7 +98,7 @@ PROGRAM CBLAT3 DATA SNAMES/'cblas_cgemm ', 'cblas_chemm ', $ 'cblas_csymm ', 'cblas_ctrmm ', 'cblas_ctrsm ', $ 'cblas_cherk ', 'cblas_csyrk ', 'cblas_cher2k', - $ 'cblas_csyr2k'/ + $ 'cblas_csyr2k', 'cblas_cgemmt' / * .. Executable Statements .. * NOUTC = NOUT @@ -295,7 +296,7 @@ PROGRAM CBLAT3 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, - $ 180, 180 )ISNUM + $ 180, 180, 185 )ISNUM * Test CGEMM, 01. 140 IF (CORDER) THEN CALL CCHK1(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, @@ -329,13 +330,13 @@ PROGRAM CBLAT3 CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 1 ) + $ 1 ) END IF GO TO 190 * Test CHERK, 06, CSYRK, 07. @@ -357,15 +358,30 @@ PROGRAM CBLAT3 CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 1 ) + $ 1 ) END IF GO TO 190 +* Test CGEMMT, 10. + 185 IF (CORDER) THEN + CALL CCHK6(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, + $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, + $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, + $ CC, CS, CT, G, 0 ) + END IF + IF (RORDER) THEN + CALL CCHK6(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, + $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, + $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, + $ CC, CS, CT, G, 1 ) + END IF + GO TO 190 + * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 @@ -2785,3 +2801,547 @@ REAL FUNCTION SDIFF( X, Y ) * End of SDIFF. * END + + SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, + $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, + $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G, + $ IORDER ) + IMPLICIT NONE +* +* Tests CGEMMT. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0, 0.0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) +* .. Scalar Arguments .. + REAL EPS, THRESH + INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA, IORDER + LOGICAL FATAL, REWI, TRACE + CHARACTER*12 SNAME +* .. Array Arguments .. + COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), + $ AS( NMAX*NMAX ), B( NMAX, NMAX ), + $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), + $ C( NMAX, NMAX ), CC( NMAX*NMAX ), + $ CS( NMAX*NMAX ), CT( NMAX ) + REAL G( NMAX ) + INTEGER IDIM( NIDIM ) +* .. Local Scalars .. + COMPLEX ALPHA, ALS, BETA, BLS + REAL ERR, ERRMAX + INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, + $ MA, MB, N, NA, NARGS, NB, NC, NS, IS + LOGICAL NULL, RESET, SAME, TRANA, TRANB + CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS + CHARACTER*3 ICH + CHARACTER*2 ISHAPE +* .. Local Arrays .. + LOGICAL ISAME( 13 ) +* .. External Functions .. + LOGICAL LCE, LCERES + EXTERNAL LCE, LCERES +* .. External Subroutines .. + EXTERNAL CCGEMM, CMAKE, CMMCH +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Scalars in Common .. + INTEGER INFOT, NOUTC + LOGICAL LERR, OK +* .. Common blocks .. + COMMON /INFOC/INFOT, NOUTC, OK, LERR +* .. Data statements .. + DATA ICH/'NTC'/ + DATA ISHAPE/'UL'/ +* .. Executable Statements .. +* + NARGS = 13 + NC = 0 + RESET = .TRUE. + ERRMAX = RZERO +* + DO 100 IN = 1, NIDIM + N = IDIM( IN ) +* Set LDC to 1 more than minimum value if room. + LDC = N + IF( LDC.LT.NMAX ) + $ LDC = LDC + 1 +* Skip tests if not enough room. + IF( LDC.GT.NMAX ) + $ GO TO 100 + LCC = LDC*N + NULL = N.LE.0. +* + DO 90 IK = 1, NIDIM + K = IDIM( IK ) +* + DO 80 ICA = 1, 3 + TRANSA = ICH( ICA: ICA ) + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' +* + IF( TRANA )THEN + MA = K + NA = N + ELSE + MA = N + NA = K + END IF +* Set LDA to 1 more than minimum value if room. + LDA = MA + IF( LDA.LT.NMAX ) + $ LDA = LDA + 1 +* Skip tests if not enough room. + IF( LDA.GT.NMAX ) + $ GO TO 80 + LAA = LDA*NA +* +* Generate the matrix A. +* + CALL CMAKE( 'ge', ' ', ' ', MA, NA, A, NMAX, AA, LDA, + $ RESET, ZERO ) +* + DO 70 ICB = 1, 3 + TRANSB = ICH( ICB: ICB ) + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' +* + IF( TRANB )THEN + MB = N + NB = K + ELSE + MB = K + NB = N + END IF +* Set LDB to 1 more than minimum value if room. + LDB = MB + IF( LDB.LT.NMAX ) + $ LDB = LDB + 1 +* Skip tests if not enough room. + IF( LDB.GT.NMAX ) + $ GO TO 70 + LBB = LDB*NB +* +* Generate the matrix B. +* + CALL CMAKE( 'ge', ' ', ' ', MB, NB, B, NMAX, BB, + $ LDB, RESET, ZERO ) +* + DO 60 IA = 1, NALF + ALPHA = ALF( IA ) +* + DO 50 IB = 1, NBET + BETA = BET( IB ) + DO 45 IS = 1, 2 + UPLO = ISHAPE(IS:IS) +* +* Generate the matrix C. +* + CALL CMAKE( 'ge', UPLO, ' ', N, N, C, NMAX, + $ CC, LDC, RESET, ZERO ) +* + NC = NC + 1 +* +* Save every datum before calling the +* subroutine. +* + UPLOS = UPLO + TRANAS = TRANSA + TRANBS = TRANSB + NS = N + KS = K + ALS = ALPHA + DO 10 I = 1, LAA + AS( I ) = AA( I ) + 10 CONTINUE + LDAS = LDA + DO 20 I = 1, LBB + BS( I ) = BB( I ) + 20 CONTINUE + LDBS = LDB + BLS = BETA + DO 30 I = 1, LCC + CS( I ) = CC( I ) + 30 CONTINUE + LDCS = LDC +* +* Call the subroutine. +* + IF( TRACE ) + $ CALL CPRCN8(NTRA, NC, SNAME, IORDER, UPLO, + $ TRANSA, TRANSB, N, K, ALPHA, LDA, + $ LDB, BETA, LDC) + IF( REWI ) + $ REWIND NTRA + CALL CCGEMMT(IORDER, UPLO, TRANSA, TRANSB, N, + $ K, ALPHA, AA, LDA, BB, LDB, + $ BETA, CC, LDC ) +* +* Check if error-exit was taken incorrectly. +* + IF( .NOT.OK )THEN + WRITE( NOUT, FMT = 9994 ) + FATAL = .TRUE. + GO TO 120 + END IF +* +* See what data changed inside subroutines. +* + ISAME( 1 ) = UPLO .EQ. UPLOS + ISAME( 2 ) = TRANSA.EQ.TRANAS + ISAME( 3 ) = TRANSB.EQ.TRANBS + ISAME( 4 ) = NS.EQ.N + ISAME( 5 ) = KS.EQ.K + ISAME( 6 ) = ALS.EQ.ALPHA + ISAME( 7 ) = LCE( AS, AA, LAA ) + ISAME( 8 ) = LDAS.EQ.LDA + ISAME( 9 ) = LCE( BS, BB, LBB ) + ISAME( 10 ) = LDBS.EQ.LDB + ISAME( 11 ) = BLS.EQ.BETA + IF( NULL )THEN + ISAME( 12 ) = LCE( CS, CC, LCC ) + ELSE + ISAME( 12 ) = LCERES( 'ge', ' ', N, N, CS, + $ CC, LDC ) + END IF + ISAME( 13 ) = LDCS.EQ.LDC +* +* If data was incorrectly changed, report +* and return. +* + SAME = .TRUE. + DO 40 I = 1, NARGS + SAME = SAME.AND.ISAME( I ) + IF( .NOT.ISAME( I ) ) + $ WRITE( NOUT, FMT = 9998 )I + 40 CONTINUE + IF( .NOT.SAME )THEN + FATAL = .TRUE. + GO TO 120 + END IF +* + IF( .NOT.NULL )THEN +* +* Check the result. +* + CALL CMMTCH( UPLO, TRANSA, TRANSB, N, K, + $ ALPHA, A, NMAX, B, NMAX, BETA, + $ C, NMAX, CT, G, CC, LDC, EPS, + $ ERR, FATAL, NOUT, .TRUE. ) + ERRMAX = MAX( ERRMAX, ERR ) +* If got really bad answer, report and +* return. + IF( FATAL ) + $ GO TO 120 + END IF +* + 45 CONTINUE +* + 50 CONTINUE +* + 60 CONTINUE +* + 70 CONTINUE +* + 80 CONTINUE +* + 90 CONTINUE +* + 100 CONTINUE +* +* +* Report result. +* + IF( ERRMAX.LT.THRESH )THEN + IF ( IORDER.EQ.0) WRITE( NOUT, FMT = 10000 )SNAME, NC + IF ( IORDER.EQ.1) WRITE( NOUT, FMT = 10001 )SNAME, NC + ELSE + IF ( IORDER.EQ.0) WRITE( NOUT, FMT = 10002 )SNAME, NC, ERRMAX + IF ( IORDER.EQ.1) WRITE( NOUT, FMT = 10003 )SNAME, NC, ERRMAX + END IF + GO TO 130 +* + 120 CONTINUE + WRITE( NOUT, FMT = 9996 )SNAME + CALL CPRCN8(NOUT, NC, SNAME, IORDER, UPLO, TRANSA, TRANSB, + $ N, K, ALPHA, LDA, LDB, BETA, LDC) +* + 130 CONTINUE + RETURN +* +10003 FORMAT( ' ', A12,' COMPLETED THE ROW-MAJOR COMPUTATIONAL ', + $ 'TESTS (', I6, ' CALLS)', /' ******* BUT WITH MAXIMUM TEST ', + $ 'RATIO ', F8.2, ' - SUSPECT *******' ) +10002 FORMAT( ' ', A12,' COMPLETED THE COLUMN-MAJOR COMPUTATIONAL ', + $ 'TESTS (', I6, ' CALLS)', /' ******* BUT WITH MAXIMUM TEST ', + $ 'RATIO ', F8.2, ' - SUSPECT *******' ) +10001 FORMAT( ' ', A12,' PASSED THE ROW-MAJOR COMPUTATIONAL TESTS', + $ ' (', I6, ' CALL', 'S)' ) +10000 FORMAT( ' ', A12,' PASSED THE COLUMN-MAJOR COMPUTATIONAL TESTS', + $ ' (', I6, ' CALL', 'S)' ) + 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', + $ 'ANGED INCORRECTLY *******' ) + 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) + 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', + $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, + $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) + 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', + $ '******' ) +* +* End of CCHK6. +* + END + + SUBROUTINE CPRCN8(NOUT, NC, SNAME, IORDER, UPLO, + $ TRANSA, TRANSB, N, + $ K, ALPHA, LDA, LDB, BETA, LDC) + INTEGER NOUT, NC, IORDER, N, K, LDA, LDB, LDC + COMPLEX ALPHA, BETA + CHARACTER*1 TRANSA, TRANSB, UPLO + CHARACTER*12 SNAME + CHARACTER*14 CRC, CTA,CTB,CUPLO + + IF (UPLO.EQ.'U') THEN + CUPLO = 'CblasUpper' + ELSE + CUPLO = 'CblasLower' + END IF + IF (TRANSA.EQ.'N')THEN + CTA = ' CblasNoTrans' + ELSE IF (TRANSA.EQ.'T')THEN + CTA = ' CblasTrans' + ELSE + CTA = 'CblasConjTrans' + END IF + IF (TRANSB.EQ.'N')THEN + CTB = ' CblasNoTrans' + ELSE IF (TRANSB.EQ.'T')THEN + CTB = ' CblasTrans' + ELSE + CTB = 'CblasConjTrans' + END IF + IF (IORDER.EQ.1)THEN + CRC = ' CblasRowMajor' + ELSE + CRC = ' CblasColMajor' + END IF + WRITE(NOUT, FMT = 9995)NC,SNAME,CRC, CUPLO, CTA,CTB + WRITE(NOUT, FMT = 9994)N, K, ALPHA, LDA, LDB, BETA, LDC + + 9995 FORMAT( 1X, I6, ': ', A12,'(', A14, ',', A14, ',', A14, ',', + $ A14, ',') + 9994 FORMAT( 10X, 2( I3, ',' ) ,' (', F4.1,',',F4.1,') , A,', + $ I3, ', B,', I3, ', (', F4.1,',',F4.1,') , C,', I3, ').' ) + END + + SUBROUTINE CMMTCH(UPLO, TRANSA, TRANSB, N, KK, ALPHA, A, LDA, + $ B, LDB, + $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, + $ NOUT, MV ) + IMPLICIT NONE +* +* Checks the results of the computational tests. +* +* Auxiliary routine for test program for Level 3 Blas. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0, 0.0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0, RONE = 1.0 ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + REAL EPS, ERR + INTEGER KK, LDA, LDB, LDC, LDCC, N, NOUT + LOGICAL FATAL, MV + CHARACTER*1 TRANSA, TRANSB, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ CC( LDCC, * ), CT( * ) + REAL G( * ) +* .. Local Scalars .. + COMPLEX CL + REAL ERRI + INTEGER I, J, K, ISTART, ISTOP + LOGICAL CTRANA, CTRANB, TRANA, TRANB, UPPER +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT +* .. Statement Functions .. + REAL ABS1 +* .. Statement Function definitions .. + ABS1( CL ) = ABS( REAL( CL ) ) + ABS( AIMAG( CL ) ) +* .. Executable Statements .. + + UPPER = UPLO.EQ.'U' + TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' + TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' + CTRANA = TRANSA.EQ.'C' + CTRANB = TRANSB.EQ.'C' + + ISTART = 1 + ISTOP = N +* +* Compute expected result, one column at a time, in CT using data +* in A, B and C. +* Compute gauges in G. +* + DO 220 J = 1, N +* + IF (UPPER) THEN + ISTART = 1 + ISTOP = J + ELSE + ISTART = J + ISTOP = N + END IF + DO 10 I = ISTART, ISTOP + CT( I ) = ZERO + G( I ) = RZERO + 10 CONTINUE + IF( .NOT.TRANA.AND..NOT.TRANB )THEN + DO 30 K = 1, KK + DO 20 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( K, J ) + G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) + 20 CONTINUE + 30 CONTINUE + ELSE IF( TRANA.AND..NOT.TRANB )THEN + IF( CTRANA )THEN + DO 50 K = 1, KK + DO 40 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( K, J ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( K, J ) ) + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, KK + DO 60 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( K, J ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( K, J ) ) + 60 CONTINUE + 70 CONTINUE + END IF + ELSE IF( .NOT.TRANA.AND.TRANB )THEN + IF( CTRANB )THEN + DO 90 K = 1, KK + DO 80 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( I, K ) )* + $ ABS1( B( J, K ) ) + 80 CONTINUE + 90 CONTINUE + ELSE + DO 110 K = 1, KK + DO 100 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( I, K )*B( J, K ) + G( I ) = G( I ) + ABS1( A( I, K ) )* + $ ABS1( B( J, K ) ) + 100 CONTINUE + 110 CONTINUE + END IF + ELSE IF( TRANA.AND.TRANB )THEN + IF( CTRANA )THEN + IF( CTRANB )THEN + DO 130 K = 1, KK + DO 120 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )* + $ CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 120 CONTINUE + 130 CONTINUE + ELSE + DO 150 K = 1, KK + DO 140 I = ISTART, ISTOP + CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( J, K ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE + IF( CTRANB )THEN + DO 170 K = 1, KK + DO 160 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*CONJG( B( J, K ) ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 190 K = 1, KK + DO 180 I = ISTART, ISTOP + CT( I ) = CT( I ) + A( K, I )*B( J, K ) + G( I ) = G( I ) + ABS1( A( K, I ) )* + $ ABS1( B( J, K ) ) + 180 CONTINUE + 190 CONTINUE + END IF + END IF + END IF + DO 200 I = ISTART, ISTOP + CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) + G( I ) = ABS1( ALPHA )*G( I ) + + $ ABS1( BETA )*ABS1( C( I, J ) ) + 200 CONTINUE +* +* Compute the error ratio for this result. +* + ERR = ZERO + DO 210 I = ISTART, ISTOP + ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS + IF( G( I ).NE.RZERO ) + $ ERRI = ERRI/G( I ) + ERR = MAX( ERR, ERRI ) + IF( ERR*SQRT( EPS ).GE.RONE ) + $ GO TO 230 + 210 CONTINUE +* + 220 CONTINUE +* +* If the loop completes, all results are at least half accurate. + GO TO 250 +* +* Report fatal error. +* + 230 FATAL = .TRUE. + WRITE( NOUT, FMT = 9999 ) + DO 240 I = ISTART, ISTOP + IF( MV )THEN + WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) + ELSE + WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) + END IF + 240 CONTINUE + IF( N.GT.1 ) + $ WRITE( NOUT, FMT = 9997 )J +* + 250 CONTINUE + RETURN +* + 9999 FORMAT(' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', + $ 'F ACCURATE *******', /' EXPECTED RE', + $ 'SULT COMPUTED RESULT' ) + 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) + 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) +* +* End of CMMCH. +* + END + diff --git a/CBLAS/testing/cin3 b/CBLAS/testing/cin3 index 7b34f267bb..3854aef885 100644 --- a/CBLAS/testing/cin3 +++ b/CBLAS/testing/cin3 @@ -20,3 +20,4 @@ cblas_cherk T PUT F FOR NO TEST. SAME COLUMNS. cblas_csyrk T PUT F FOR NO TEST. SAME COLUMNS. cblas_cher2k T PUT F FOR NO TEST. SAME COLUMNS. cblas_csyr2k T PUT F FOR NO TEST. SAME COLUMNS. +cblas_cgemmt T PUT F FOR NO TEST. SAME COLUMNS. From 191e998a4707d96809cf7f253f66ebd4c169ab51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 09:42:06 +0200 Subject: [PATCH 04/10] Update documentation of xGEMMT --- BLAS/SRC/cgemmt.f | 4 ++-- BLAS/SRC/dgemmt.f | 2 +- BLAS/SRC/sgemmt.f | 2 +- BLAS/SRC/zgemmt.f | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/BLAS/SRC/cgemmt.f b/BLAS/SRC/cgemmt.f index e6071a345b..653e9877c1 100644 --- a/BLAS/SRC/cgemmt.f +++ b/BLAS/SRC/cgemmt.f @@ -37,8 +37,8 @@ *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. *> Thereby, the routine only accesses and updates the upper or lower -*> triangular part of the result matrix C. This behaviour can be used, -*> the resulting matrix C is known to be symmetric. +*> triangular part of the result matrix C. This behaviour can be used if +*> the resulting matrix C is known to be Hermitian or symmetric. *> \endverbatim * * Arguments: diff --git a/BLAS/SRC/dgemmt.f b/BLAS/SRC/dgemmt.f index 718fafb17f..5d767ee607 100644 --- a/BLAS/SRC/dgemmt.f +++ b/BLAS/SRC/dgemmt.f @@ -37,7 +37,7 @@ *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. *> Thereby, the routine only accesses and updates the upper or lower -*> triangular part of the result matrix C. This behaviour can be used, +*> triangular part of the result matrix C. This behaviour can be used if *> the resulting matrix C is known to be symmetric. *> \endverbatim * diff --git a/BLAS/SRC/sgemmt.f b/BLAS/SRC/sgemmt.f index 3875e63664..b2ad38e275 100644 --- a/BLAS/SRC/sgemmt.f +++ b/BLAS/SRC/sgemmt.f @@ -37,7 +37,7 @@ *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. *> Thereby, the routine only accesses and updates the upper or lower -*> triangular part of the result matrix C. This behaviour can be used, +*> triangular part of the result matrix C. This behaviour can be used if *> the resulting matrix C is known to be symmetric. *> \endverbatim * diff --git a/BLAS/SRC/zgemmt.f b/BLAS/SRC/zgemmt.f index 37828abaad..5533c780a7 100644 --- a/BLAS/SRC/zgemmt.f +++ b/BLAS/SRC/zgemmt.f @@ -37,8 +37,8 @@ *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an n by k matrix, op( B ) a k by n matrix and C an n by n matrix. *> Thereby, the routine only accesses and updates the upper or lower -*> triangular part of the result matrix C. This behaviour can be used, -*> the resulting matrix C is known to be symmetric. +*> triangular part of the result matrix C. This behaviour can be used if +*> the resulting matrix C is known to be Hermitian or symmetric. *> \endverbatim * * Arguments: From 0fbf6125d0388d19d44e4e0aa53e0f8d449767d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 09:48:10 +0200 Subject: [PATCH 05/10] Fix implicit variable --- BLAS/TESTING/sblat3.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BLAS/TESTING/sblat3.f b/BLAS/TESTING/sblat3.f index a0522d96e8..fb396775a0 100644 --- a/BLAS/TESTING/sblat3.f +++ b/BLAS/TESTING/sblat3.f @@ -3146,7 +3146,7 @@ SUBROUTINE SCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, IF( NULL )THEN ISAME( 12 ) = LSE( CS, CC, LCC ) ELSE - ISAME( 12 ) = LSERES( 'GE', ' ', M, N, + ISAME( 12 ) = LSERES( 'GE', ' ', N, N, $ CS, CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC From 650c432d0229f7be628fa3a537370f26be9cac75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 09:52:51 +0200 Subject: [PATCH 06/10] Fix further implicit variables --- BLAS/TESTING/cblat3.f | 2 +- BLAS/TESTING/dblat3.f | 2 +- BLAS/TESTING/zblat3.f | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/BLAS/TESTING/cblat3.f b/BLAS/TESTING/cblat3.f index a8cd24c123..efe798add7 100644 --- a/BLAS/TESTING/cblat3.f +++ b/BLAS/TESTING/cblat3.f @@ -3879,7 +3879,7 @@ SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, IF( NULL )THEN ISAME( 12 ) = LCE( CS, CC, LCC ) ELSE - ISAME( 12 ) = LCERES( 'GE', ' ', M, N, CS, + ISAME( 12 ) = LCERES( 'GE', ' ', N, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC diff --git a/BLAS/TESTING/dblat3.f b/BLAS/TESTING/dblat3.f index ddfbbfbd6a..24c5eb7782 100644 --- a/BLAS/TESTING/dblat3.f +++ b/BLAS/TESTING/dblat3.f @@ -3145,7 +3145,7 @@ SUBROUTINE DCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, IF( NULL )THEN ISAME( 12 ) = LDE( CS, CC, LCC ) ELSE - ISAME( 12 ) = LDERES( 'GE', ' ', M, N, + ISAME( 12 ) = LDERES( 'GE', ' ', N, N, $ CS, CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC diff --git a/BLAS/TESTING/zblat3.f b/BLAS/TESTING/zblat3.f index 9b54f6be8f..ca974cfb77 100644 --- a/BLAS/TESTING/zblat3.f +++ b/BLAS/TESTING/zblat3.f @@ -3893,7 +3893,7 @@ SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, IF( NULL )THEN ISAME( 12 ) = LZE( CS, CC, LCC ) ELSE - ISAME( 12 ) = LZERES( 'GE', ' ', M, N, CS, + ISAME( 12 ) = LZERES( 'GE', ' ', N, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC From 91c7f9eb9be64b5285decf18cad0daf8a75cfe70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 09:58:58 +0200 Subject: [PATCH 07/10] Fix missing comma --- BLAS/TESTING/cblat3.f | 2 +- BLAS/TESTING/zblat3.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BLAS/TESTING/cblat3.f b/BLAS/TESTING/cblat3.f index efe798add7..1e8c40ae64 100644 --- a/BLAS/TESTING/cblat3.f +++ b/BLAS/TESTING/cblat3.f @@ -3846,7 +3846,7 @@ SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, * Call the subroutine. * IF( TRACE ) - $ WRITE( NTRA, FMT = 9995 )NC, SNAME, UPLO + $ WRITE( NTRA, FMT = 9995 )NC, SNAME, UPLO, $ TRANSA, TRANSB, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) diff --git a/BLAS/TESTING/zblat3.f b/BLAS/TESTING/zblat3.f index ca974cfb77..3e6e338ce2 100644 --- a/BLAS/TESTING/zblat3.f +++ b/BLAS/TESTING/zblat3.f @@ -3860,7 +3860,7 @@ SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, * Call the subroutine. * IF( TRACE ) - $ WRITE( NTRA, FMT = 9995 )NC, SNAME, UPLO + $ WRITE( NTRA, FMT = 9995 )NC, SNAME, UPLO, $ TRANSA, TRANSB, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) From 5171846bbee1addb386fcaa918f46b114591c68f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 10:24:26 +0200 Subject: [PATCH 08/10] Remove useless variable --- BLAS/TESTING/cblat3.f | 4 ++-- BLAS/TESTING/zblat3.f | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/BLAS/TESTING/cblat3.f b/BLAS/TESTING/cblat3.f index 1e8c40ae64..1d11c1554d 100644 --- a/BLAS/TESTING/cblat3.f +++ b/BLAS/TESTING/cblat3.f @@ -3708,7 +3708,7 @@ SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, COMPLEX ALPHA, ALS, BETA, BLS REAL ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IN, K, KS, LAA, - $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, $ MA, MB, N, NA, NARGS, NB, NC, NS, IS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS @@ -3815,7 +3815,7 @@ SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, * * Generate the matrix C. * - CALL CMAKE( 'GE', UPLO, ' ', M, N, C, NMAX, + CALL CMAKE( 'GE', UPLO, ' ', N, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 diff --git a/BLAS/TESTING/zblat3.f b/BLAS/TESTING/zblat3.f index 3e6e338ce2..96a6928bfb 100644 --- a/BLAS/TESTING/zblat3.f +++ b/BLAS/TESTING/zblat3.f @@ -3722,7 +3722,7 @@ SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, COMPLEX*16 ALPHA, ALS, BETA, BLS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IN, K, KS, LAA, - $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, + $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, $ MA, MB, N, NA, NARGS, NB, NC, NS, IS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB, UPLO, UPLOS @@ -3829,7 +3829,7 @@ SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, * * Generate the matrix C. * - CALL ZMAKE( 'GE', UPLO, ' ', M, N, C, NMAX, + CALL ZMAKE( 'GE', UPLO, ' ', N, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 From c0f8f4a69ac144b41d19129006e7e4551d65288a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 11:04:51 +0200 Subject: [PATCH 09/10] Fix wrong write --- BLAS/TESTING/cblat3.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BLAS/TESTING/cblat3.f b/BLAS/TESTING/cblat3.f index 1d11c1554d..7d60c1d1f2 100644 --- a/BLAS/TESTING/cblat3.f +++ b/BLAS/TESTING/cblat3.f @@ -3938,7 +3938,7 @@ SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME - WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, + WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE From da1646c7eb3909c8f79879fd08bd188842d169b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 22 Aug 2023 11:12:35 +0200 Subject: [PATCH 10/10] Fix another variable --- BLAS/TESTING/zblat3.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BLAS/TESTING/zblat3.f b/BLAS/TESTING/zblat3.f index 96a6928bfb..1b7c98e96a 100644 --- a/BLAS/TESTING/zblat3.f +++ b/BLAS/TESTING/zblat3.f @@ -3952,7 +3952,7 @@ SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME - WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, + WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE