diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index efededcf36..9f8f5d0d77 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -79,6 +79,8 @@ macro(SetDefaultL1) SetFallback(CROTKERNEL zrot.S) SetFallback(ZROTKERNEL zrot.S) SetFallback(XROTKERNEL zrot.S) + SetFallback(SROTMKERNEL rotm.S) + SetFallback(DROTMKERNEL rotm.S) SetFallback(SSCALKERNEL scal.S) SetFallback(DSCALKERNEL scal.S) SetFallback(CSCALKERNEL zscal.S) diff --git a/common_d.h b/common_d.h index 6f4bb2dedc..5b9cffca85 100644 --- a/common_d.h +++ b/common_d.h @@ -22,6 +22,7 @@ #define DSUM_K dsum_k #define DSWAP_K dswap_k #define DROT_K drot_k +#define DROTM_K drotm_k #define DGEMV_N dgemv_n #define DGEMV_T dgemv_t diff --git a/common_level1.h b/common_level1.h index d2ed47e567..afc1fff3de 100644 --- a/common_level1.h +++ b/common_level1.h @@ -1,3 +1,4 @@ + /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ @@ -213,8 +214,8 @@ int srotmg_k(float *, float *, float *, float *, float *); int drotmg_k(double *, double *, double *, double *, double *); int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *); -int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float); -int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double); +int srotm_k (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); +int drotm_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble); diff --git a/common_macro.h b/common_macro.h index a924651de2..171ccc15d5 100644 --- a/common_macro.h +++ b/common_macro.h @@ -361,6 +361,7 @@ #define SUM_K DSUM_K #define SWAP_K DSWAP_K #define ROT_K DROT_K +#define ROTM_K DROTM_K #define GEMV_N DGEMV_N #define GEMV_T DGEMV_T @@ -977,6 +978,7 @@ #define SUM_K SSUM_K #define SWAP_K SSWAP_K #define ROT_K SROT_K +#define ROTM_K SROTM_K #define GEMV_N SGEMV_N #define GEMV_T SGEMV_T diff --git a/common_param.h b/common_param.h index c082d248e8..71df4ae2e5 100644 --- a/common_param.h +++ b/common_param.h @@ -197,6 +197,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); + int (*srotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); #endif #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); @@ -330,6 +331,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); #endif #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); + int (*drotm_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); diff --git a/common_s.h b/common_s.h index fdd80b62f6..e996fbd73e 100644 --- a/common_s.h +++ b/common_s.h @@ -24,6 +24,7 @@ #define SSCAL_K sscal_k #define SSWAP_K sswap_k #define SROT_K srot_k +#define SROTM_K srotm_k #define SGEMV_N sgemv_n #define SGEMV_T sgemv_t diff --git a/interface/rotm.c b/interface/rotm.c index 9dc08354ac..9ef87da329 100644 --- a/interface/rotm.c +++ b/interface/rotm.c @@ -7,149 +7,21 @@ void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ - blasint n = *N; - blasint incx = *INCX; - blasint incy = *INCY; + blasint n = *N; + blasint incx = *INCX; + blasint incy = *INCY; + PRINT_DEBUG_NAME #else void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){ -#endif - - blasint i__1, i__2; + PRINT_DEBUG_CNAME; - blasint i__; - FLOAT w, z__; - blasint kx, ky; - FLOAT dh11, dh12, dh22, dh21, dflag; - blasint nsteps; - -#ifndef CBLAS - PRINT_DEBUG_CNAME; -#else - PRINT_DEBUG_CNAME; #endif - --dparam; - --dy; - --dx; - - dflag = dparam[1]; - if (n <= 0 || dflag == - 2.0) goto L140; - - if (! (incx == incy && incx > 0)) goto L70; - - nsteps = n * incx; - if (dflag < 0.) { - goto L50; - } else if (dflag == 0) { - goto L10; - } else { - goto L30; - } -L10: - dh12 = dparam[4]; - dh21 = dparam[3]; - i__1 = nsteps; - i__2 = incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w + z__ * dh12; - dy[i__] = w * dh21 + z__; -/* L20: */ - } - goto L140; -L30: - dh11 = dparam[2]; - dh22 = dparam[5]; - i__2 = nsteps; - i__1 = incx; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w * dh11 + z__; - dy[i__] = -w + dh22 * z__; -/* L40: */ - } - goto L140; -L50: - dh11 = dparam[2]; - dh12 = dparam[4]; - dh21 = dparam[3]; - dh22 = dparam[5]; - i__1 = nsteps; - i__2 = incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w * dh11 + z__ * dh12; - dy[i__] = w * dh21 + z__ * dh22; -/* L60: */ - } - goto L140; -L70: - kx = 1; - ky = 1; - if (incx < 0) { - kx = (1 - n) * incx + 1; - } - if (incy < 0) { - ky = (1 - n) * incy + 1; - } + ROTM_K(n, dx, incx, dy, incy, dparam); - if (dflag < 0.) { - goto L120; - } else if (dflag == 0) { - goto L80; - } else { - goto L100; - } -L80: - dh12 = dparam[4]; - dh21 = dparam[3]; - i__2 = n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w + z__ * dh12; - dy[ky] = w * dh21 + z__; - kx += incx; - ky += incy; -/* L90: */ - } - goto L140; -L100: - dh11 = dparam[2]; - dh22 = dparam[5]; - i__2 = n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w * dh11 + z__; - dy[ky] = -w + dh22 * z__; - kx += incx; - ky += incy; -/* L110: */ - } - goto L140; -L120: - dh11 = dparam[2]; - dh12 = dparam[4]; - dh21 = dparam[3]; - dh22 = dparam[5]; - i__2 = n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w * dh11 + z__ * dh12; - dy[ky] = w * dh21 + z__ * dh22; - kx += incx; - ky += incy; -/* L130: */ - } -L140: return; } diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 74e6760c27..bc713e6033 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -125,6 +125,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SROTMKERNEL}" "" "rotm_k" false "" "" false "SINGLE") endif () if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") @@ -148,6 +149,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") @@ -1105,6 +1107,7 @@ endif () GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 09337363da..e67aea7980 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -336,6 +336,14 @@ ifndef XROTKERNEL XROTKERNEL = zrot.S endif +ifndef SROTMKERNEL +SROTMKERNEL = rotm.S +endif + +ifndef DROTMKERNEL +DROTMKERNEL = rotm.S +endif + ### SCAL ### ifndef SSCALKERNEL @@ -504,14 +512,14 @@ SBLASOBJS += \ sasum_k$(TSUFFIX).$(SUFFIX) ssum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \ sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \ snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \ - saxpby_k$(TSUFFIX).$(SUFFIX) + saxpby_k$(TSUFFIX).$(SUFFIX) srotm_k$(TSUFFIX).$(SUFFIX) DBLASOBJS += \ damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \ idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \ dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ - daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) + daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) drotm_k$(TSUFFIX).$(SUFFIX) QBLASOBJS += \ qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \ @@ -841,6 +849,12 @@ $(KDIR)srot_k$(TSUFFIX).$(SUFFIX) $(KDIR)srot_k$(TPSUFFIX).$(PSUFFIX) : $(KERN $(KDIR)drot_k$(TSUFFIX).$(SUFFIX) $(KDIR)drot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTKERNEL) $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ +$(KDIR)srotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)srotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SROTMKERNEL) + $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@ + +$(KDIR)drotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)drotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTMKERNEL) + $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ + $(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ diff --git a/kernel/arm/KERNEL.ARMV5 b/kernel/arm/KERNEL.ARMV5 index e977dda3a0..655c5eb42b 100644 --- a/kernel/arm/KERNEL.ARMV5 +++ b/kernel/arm/KERNEL.ARMV5 @@ -66,6 +66,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index 344a718851..f70d688c70 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -50,6 +50,9 @@ DROTKERNEL = rot_vfp.S CROTKERNEL = rot_vfp.S ZROTKERNEL = rot_vfp.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SDOTKERNEL = sdot_vfp.S DDOTKERNEL = ddot_vfp.S CDOTKERNEL = cdot_vfp.S diff --git a/kernel/arm64/KERNEL.ARMV8 b/kernel/arm64/KERNEL.ARMV8 index c8a53c86b1..74fe1b298c 100644 --- a/kernel/arm64/KERNEL.ARMV8 +++ b/kernel/arm64/KERNEL.ARMV8 @@ -55,6 +55,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.ARMV8SVE b/kernel/arm64/KERNEL.ARMV8SVE index dc58e329fc..a64a5bc857 100644 --- a/kernel/arm64/KERNEL.ARMV8SVE +++ b/kernel/arm64/KERNEL.ARMV8SVE @@ -69,6 +69,9 @@ DROTKERNEL = rot.c CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.CORTEXA53 b/kernel/arm64/KERNEL.CORTEXA53 index e2e0067707..53f47e897e 100644 --- a/kernel/arm64/KERNEL.CORTEXA53 +++ b/kernel/arm64/KERNEL.CORTEXA53 @@ -55,6 +55,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.CORTEXA57 b/kernel/arm64/KERNEL.CORTEXA57 index 0be3348938..b8a3cdf24f 100644 --- a/kernel/arm64/KERNEL.CORTEXA57 +++ b/kernel/arm64/KERNEL.CORTEXA57 @@ -95,6 +95,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1 index 5b31744730..4517f088eb 100644 --- a/kernel/arm64/KERNEL.NEOVERSEN1 +++ b/kernel/arm64/KERNEL.NEOVERSEN1 @@ -55,6 +55,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.NEOVERSEN2 b/kernel/arm64/KERNEL.NEOVERSEN2 index 2f7400113b..ff33a9c261 100644 --- a/kernel/arm64/KERNEL.NEOVERSEN2 +++ b/kernel/arm64/KERNEL.NEOVERSEN2 @@ -55,6 +55,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.THUNDERX b/kernel/arm64/KERNEL.THUNDERX index 669f62698a..cc35b161ef 100644 --- a/kernel/arm64/KERNEL.THUNDERX +++ b/kernel/arm64/KERNEL.THUNDERX @@ -66,6 +66,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.THUNDERX2T99 b/kernel/arm64/KERNEL.THUNDERX2T99 index 41cedc8519..e0b49ee5a9 100644 --- a/kernel/arm64/KERNEL.THUNDERX2T99 +++ b/kernel/arm64/KERNEL.THUNDERX2T99 @@ -55,6 +55,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.TSV110 b/kernel/arm64/KERNEL.TSV110 index 54d016e17f..08169be7f4 100644 --- a/kernel/arm64/KERNEL.TSV110 +++ b/kernel/arm64/KERNEL.TSV110 @@ -91,6 +91,9 @@ DROTKERNEL = rot.S CROTKERNEL = zrot.S ZROTKERNEL = zrot.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal.S DSCALKERNEL = scal.S CSCALKERNEL = zscal.S diff --git a/kernel/arm64/KERNEL.generic b/kernel/arm64/KERNEL.generic index 838adb05ab..1e2af7accf 100644 --- a/kernel/arm64/KERNEL.generic +++ b/kernel/arm64/KERNEL.generic @@ -127,6 +127,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/csky/KERNEL b/kernel/csky/KERNEL index afa8a08817..3e6baacc57 100644 --- a/kernel/csky/KERNEL +++ b/kernel/csky/KERNEL @@ -66,6 +66,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/e2k/KERNEL b/kernel/e2k/KERNEL index afa8a08817..3e6baacc57 100644 --- a/kernel/e2k/KERNEL +++ b/kernel/e2k/KERNEL @@ -66,6 +66,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/loongarch64/KERNEL b/kernel/loongarch64/KERNEL index e5d145a718..dcd541ba2f 100644 --- a/kernel/loongarch64/KERNEL +++ b/kernel/loongarch64/KERNEL @@ -18,6 +18,14 @@ ifndef SROTKERNEL SROTKERNEL = ../arm/rot.c endif +ifndef SROTMKERNEL +SROTMKERNEL = ../riscv64/rotm.c +endif + +ifndef DROTMKERNEL +DROTMKERNEL = ../riscv64/rotm.c +endif + ifndef DROTKERNEL DROTKERNEL = ../arm/rot.c endif diff --git a/kernel/loongarch64/KERNEL.LA264 b/kernel/loongarch64/KERNEL.LA264 index 068b3cf4c4..863525ee0f 100644 --- a/kernel/loongarch64/KERNEL.LA264 +++ b/kernel/loongarch64/KERNEL.LA264 @@ -74,6 +74,9 @@ DROTKERNEL = rot_lsx.S CROTKERNEL = crot_lsx.S ZROTKERNEL = crot_lsx.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SNRM2KERNEL = snrm2_lsx.S DNRM2KERNEL = dnrm2_lsx.S CNRM2KERNEL = cnrm2_lsx.S diff --git a/kernel/loongarch64/KERNEL.LA464 b/kernel/loongarch64/KERNEL.LA464 index ca8c4d3884..bca1434281 100644 --- a/kernel/loongarch64/KERNEL.LA464 +++ b/kernel/loongarch64/KERNEL.LA464 @@ -74,6 +74,9 @@ DROTKERNEL = rot_lasx.S CROTKERNEL = crot_lasx.S ZROTKERNEL = crot_lasx.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SNRM2KERNEL = snrm2_lasx.S DNRM2KERNEL = dnrm2_lasx.S CNRM2KERNEL = cnrm2_lasx.S diff --git a/kernel/loongarch64/KERNEL.generic b/kernel/loongarch64/KERNEL.generic index 213add9ee5..9a248de6d2 100644 --- a/kernel/loongarch64/KERNEL.generic +++ b/kernel/loongarch64/KERNEL.generic @@ -121,6 +121,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/mips/KERNEL.P5600 b/kernel/mips/KERNEL.P5600 index c37b88adbe..99bafab005 100644 --- a/kernel/mips/KERNEL.P5600 +++ b/kernel/mips/KERNEL.P5600 @@ -100,6 +100,9 @@ CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c endif +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + ifndef NO_MSA SSCALKERNEL = ../mips/sscal_msa.c DSCALKERNEL = ../mips/dscal_msa.c diff --git a/kernel/mips/KERNEL.generic b/kernel/mips/KERNEL.generic index 17f2ef976b..64182a1f86 100644 --- a/kernel/mips/KERNEL.generic +++ b/kernel/mips/KERNEL.generic @@ -120,6 +120,9 @@ DROTKERNEL = ../mips/rot.c CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../mips/scal.c DSCALKERNEL = ../mips/scal.c CSCALKERNEL = ../mips/zscal.c diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index 54939a9efe..4d7dcdebc9 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -6,8 +6,10 @@ CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c CSWAPKERNEL = ../mips/zswap.c ZSWAPKERNEL = ../mips/zswap.c - - + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + ifndef SNRM2KERNEL SNRM2KERNEL = snrm2.S endif diff --git a/kernel/mips64/KERNEL.LOONGSON3R4 b/kernel/mips64/KERNEL.LOONGSON3R4 index 1149d97f12..53e3c4d147 100644 --- a/kernel/mips64/KERNEL.LOONGSON3R4 +++ b/kernel/mips64/KERNEL.LOONGSON3R4 @@ -30,6 +30,9 @@ CROTKERNEL = ../mips/crot_msa.c ZROTKERNEL = ../mips/zrot_msa.c endif +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + ifndef NO_MSA SSCALKERNEL = ../mips/sscal_msa.c DSCALKERNEL = ../mips/dscal_msa.c diff --git a/kernel/mips64/KERNEL.MIPS64_GENERIC b/kernel/mips64/KERNEL.MIPS64_GENERIC index 33bcbeedd5..0d985842c2 100644 --- a/kernel/mips64/KERNEL.MIPS64_GENERIC +++ b/kernel/mips64/KERNEL.MIPS64_GENERIC @@ -120,6 +120,9 @@ DROTKERNEL = ../mips/rot.c CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../mips/scal.c DSCALKERNEL = ../mips/scal.c CSCALKERNEL = ../mips/zscal.c diff --git a/kernel/mips64/KERNEL.generic b/kernel/mips64/KERNEL.generic index 17f2ef976b..64182a1f86 100644 --- a/kernel/mips64/KERNEL.generic +++ b/kernel/mips64/KERNEL.generic @@ -120,6 +120,9 @@ DROTKERNEL = ../mips/rot.c CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../mips/scal.c DSCALKERNEL = ../mips/scal.c CSCALKERNEL = ../mips/zscal.c diff --git a/kernel/power/KERNEL.POWER10 b/kernel/power/KERNEL.POWER10 index c009e33cf4..6343e07274 100644 --- a/kernel/power/KERNEL.POWER10 +++ b/kernel/power/KERNEL.POWER10 @@ -218,6 +218,10 @@ SROTKERNEL = srot.c DROTKERNEL = drot.c CROTKERNEL = crot.c ZROTKERNEL = zrot.c + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + # SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c diff --git a/kernel/power/KERNEL.POWER5 b/kernel/power/KERNEL.POWER5 index bea7b17c8c..10d37cf5af 100644 --- a/kernel/power/KERNEL.POWER5 +++ b/kernel/power/KERNEL.POWER5 @@ -59,3 +59,6 @@ CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c SGEMVNKERNEL = ../arm/gemv_n.c SGEMVTKERNEL = ../arm/gemv_t.c + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c diff --git a/kernel/power/KERNEL.POWER6 b/kernel/power/KERNEL.POWER6 index e6d2c9a513..07cf2d201c 100644 --- a/kernel/power/KERNEL.POWER6 +++ b/kernel/power/KERNEL.POWER6 @@ -57,3 +57,6 @@ ZTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index 001401d532..32e550b924 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -239,6 +239,10 @@ SROTKERNEL = srot.c DROTKERNEL = drot.c CROTKERNEL = crot.c ZROTKERNEL = zrot.c + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + # SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c diff --git a/kernel/power/KERNEL.POWER9 b/kernel/power/KERNEL.POWER9 index a18c31a2e9..20149e22b4 100644 --- a/kernel/power/KERNEL.POWER9 +++ b/kernel/power/KERNEL.POWER9 @@ -163,6 +163,10 @@ SROTKERNEL = srot.c DROTKERNEL = drot.c CROTKERNEL = crot.c ZROTKERNEL = zrot.c + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + # SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c diff --git a/kernel/power/KERNEL.PPC440 b/kernel/power/KERNEL.PPC440 index fd9a8c7801..61d2ff2a87 100644 --- a/kernel/power/KERNEL.PPC440 +++ b/kernel/power/KERNEL.PPC440 @@ -70,6 +70,9 @@ CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c endif +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal_ppc440.S DSCALKERNEL = scal_ppc440.S CSCALKERNEL = zscal_ppc440.S diff --git a/kernel/power/KERNEL.PPC440FP2 b/kernel/power/KERNEL.PPC440FP2 index 3359385b6c..a89a26211a 100644 --- a/kernel/power/KERNEL.PPC440FP2 +++ b/kernel/power/KERNEL.PPC440FP2 @@ -60,6 +60,9 @@ DROTKERNEL = rot_ppc440.S CROTKERNEL = zrot_ppc440.S ZROTKERNEL = zrot_ppc440.S +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal_hummer.S DSCALKERNEL = scal_hummer.S CSCALKERNEL = zscal_hummer.S diff --git a/kernel/power/KERNEL.PPC970 b/kernel/power/KERNEL.PPC970 index fee5fa5290..7263e70005 100644 --- a/kernel/power/KERNEL.PPC970 +++ b/kernel/power/KERNEL.PPC970 @@ -89,3 +89,6 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c endif + +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c diff --git a/kernel/power/KERNEL.PPCG4 b/kernel/power/KERNEL.PPCG4 index 0297df5973..33a59fb133 100644 --- a/kernel/power/KERNEL.PPCG4 +++ b/kernel/power/KERNEL.PPCG4 @@ -64,6 +64,9 @@ DROTKERNEL = rot_ppc440.S CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal_ppc440.S DSCALKERNEL = scal_ppc440.S diff --git a/kernel/riscv64/KERNEL.C910V b/kernel/riscv64/KERNEL.C910V index 2798a870ed..7001a151ee 100644 --- a/kernel/riscv64/KERNEL.C910V +++ b/kernel/riscv64/KERNEL.C910V @@ -71,6 +71,9 @@ DROTKERNEL = rot_vector.c CROTKERNEL = zrot_vector.c ZROTKERNEL = zrot_vector.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = scal_vector.c DSCALKERNEL = scal_vector.c CSCALKERNEL = zscal_vector.c diff --git a/kernel/riscv64/KERNEL.RISCV64_GENERIC b/kernel/riscv64/KERNEL.RISCV64_GENERIC index 67f81cacda..e27e472e0b 100644 --- a/kernel/riscv64/KERNEL.RISCV64_GENERIC +++ b/kernel/riscv64/KERNEL.RISCV64_GENERIC @@ -71,6 +71,9 @@ DROTKERNEL = ../riscv64/rot.c CROTKERNEL = ../riscv64/zrot.c ZROTKERNEL = ../riscv64/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../riscv64/scal.c DSCALKERNEL = ../riscv64/scal.c CSCALKERNEL = ../riscv64/zscal.c diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL128B b/kernel/riscv64/KERNEL.RISCV64_ZVL128B index fec69ee094..2e954d8a42 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL128B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL128B @@ -71,6 +71,9 @@ DROTKERNEL = rot_rvv.c CROTKERNEL = zrot_rvv.c ZROTKERNEL = zrot_rvv.c +SROTMKERNEL = rotm_rvv.c +DROTMKERNEL = rotm_rvv.c + SSCALKERNEL = scal_rvv.c DSCALKERNEL = scal_rvv.c CSCALKERNEL = zscal_rvv.c diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL256B b/kernel/riscv64/KERNEL.RISCV64_ZVL256B index d8690682f4..5cdb6604b0 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL256B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL256B @@ -66,6 +66,12 @@ DROTKERNEL = rot_vector.c CROTKERNEL = zrot_vector.c ZROTKERNEL = zrot_vector.c +SROTMKERNEL = rotm_rvv.c +DROTMKERNEL = rotm_rvv.c + +SROTMKERNEL = rotm_rvv.c +DROTMKERNEL = rotm_rvv.c + SSCALKERNEL = scal_vector.c DSCALKERNEL = scal_vector.c CSCALKERNEL = zscal_vector.c diff --git a/kernel/riscv64/KERNEL.x280 b/kernel/riscv64/KERNEL.x280 index 86708fe015..4dd6e12ead 100644 --- a/kernel/riscv64/KERNEL.x280 +++ b/kernel/riscv64/KERNEL.x280 @@ -98,6 +98,12 @@ DROTKERNEL = rot_rvv.c CROTKERNEL = zrot_rvv.c ZROTKERNEL = zrot_rvv.c +SROTMKERNEL = rotm_rvv.c +DROTMKERNEL = rotm_rvv.c + +SROTMKERNEL = rotm_rvv.c +DROTMKERNEL = rotm_rvv.c + SSCALKERNEL = scal_rvv.c DSCALKERNEL = scal_rvv.c CSCALKERNEL = zscal_rvv.c diff --git a/kernel/riscv64/rotm.c b/kernel/riscv64/rotm.c new file mode 100644 index 0000000000..e151aa5f88 --- /dev/null +++ b/kernel/riscv64/rotm.c @@ -0,0 +1,159 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *dx, BLASLONG incx, FLOAT *dy, BLASLONG incy, FLOAT *dparam) +{ + BLASLONG i__1, i__2; + BLASLONG i__; + FLOAT w, z__; + BLASLONG kx, ky; + FLOAT dh11, dh12, dh22, dh21, dflag; + BLASLONG nsteps; + + --dparam; + --dy; + --dx; + + dflag = dparam[1]; + if (n <= 0 || dflag == - 2.0) goto L140; + + if (! (incx == incy && incx > 0)) goto L70; + + nsteps = n * incx; + if (dflag < 0.) { + goto L50; + } else if (dflag == 0) { + goto L10; + } else { + goto L30; + } +L10: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__1 = nsteps; + i__2 = incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w + z__ * dh12; + dy[i__] = w * dh21 + z__; +/* L20: */ + } + goto L140; +L30: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = nsteps; + i__1 = incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__; + dy[i__] = -w + dh22 * z__; +/* L40: */ + } + goto L140; +L50: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__1 = nsteps; + i__2 = incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__ * dh12; + dy[i__] = w * dh21 + z__ * dh22; +/* L60: */ + } + goto L140; +L70: + kx = 1; + ky = 1; + if (incx < 0) { + kx = (1 - n) * incx + 1; + } + if (incy < 0) { + ky = (1 - n) * incy + 1; + } + + if (dflag < 0.) { + goto L120; + } else if (dflag == 0) { + goto L80; + } else { + goto L100; + } +L80: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__2 = n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w + z__ * dh12; + dy[ky] = w * dh21 + z__; + kx += incx; + ky += incy; +/* L90: */ + } + goto L140; +L100: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__; + dy[ky] = -w + dh22 * z__; + kx += incx; + ky += incy; +/* L110: */ + } + goto L140; +L120: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__2 = n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__ * dh12; + dy[ky] = w * dh21 + z__ * dh22; + kx += incx; + ky += incy; +/* L130: */ + } +L140: + return(0); +} diff --git a/kernel/riscv64/rotm_rvv.c b/kernel/riscv64/rotm_rvv.c new file mode 100644 index 0000000000..46c678ff63 --- /dev/null +++ b/kernel/riscv64/rotm_rvv.c @@ -0,0 +1,266 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +#if !defined(DOUBLE) +#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define FLOAT_V_T vfloat32m8_t +#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 +#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 +#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 +#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m8 +#else +#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define FLOAT_V_T vfloat64m8_t +#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 +#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 +#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 +#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m8 +#endif + +int CNAME(BLASLONG n, FLOAT *dx, BLASLONG incx, FLOAT *dy, BLASLONG incy, FLOAT *dparam) +{ + BLASLONG i__1, i__2; + BLASLONG kx, ky; + FLOAT dh11, dh12, dh22, dh21, dflag; + BLASLONG nsteps; + + --dparam; + --dy; + --dx; + + FLOAT_V_T v_w, v_z__, v_dx, v_dy; + BLASLONG stride, stride_x, stride_y, offset; + + dflag = dparam[1]; + if (n <= 0 || dflag == - 2.0) goto L140; + + if (!(incx == incy && incx > 0)) goto L70; + + nsteps = n * incx; + if (dflag < 0.) { + goto L50; + } else if (dflag == 0) { + goto L10; + } else { + goto L30; + } +L10: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__1 = nsteps; + i__2 = incx; + if(i__2 < 0){ + offset = i__1 - 2; + dx += offset; + dy += offset; + i__1 = -i__1; + i__2 = -i__2; + } + stride = i__2 * sizeof(FLOAT); + n = i__1 / i__2; + // printf("L10 RVV, i__2: %d, i__1: %d, stride: %d, n: %d \n", i__2, i__1, stride, n); + for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { + vl = VSETVL(n); + + v_w = VLSEV_FLOAT(&dx[1], stride, vl); + v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); + + v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); + v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); + + VSSEV_FLOAT(&dx[1], stride, v_dx, vl); + VSSEV_FLOAT(&dy[1], stride, v_dy, vl); + } + goto L140; +L30: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = nsteps; + i__1 = incx; + if(i__1 < 0){ + offset = i__2 - 2; + dx += offset; + dy += offset; + i__1 = -i__1; + i__2 = -i__2; + } + stride = i__1 * sizeof(FLOAT); + n = i__2 / i__1; + // printf("L30 RVV, i__2: %d, i__1: %d, stride: %d, n: %d \n", i__2, i__1, stride, n); + for (size_t vl; n > 0; n -= vl, dx += vl*i__1, dy += vl*i__1) { + vl = VSETVL(n); + + v_w = VLSEV_FLOAT(&dx[1], stride, vl); + v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); + + v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); + v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); + + VSSEV_FLOAT(&dx[1], stride, v_dx, vl); + VSSEV_FLOAT(&dy[1], stride, v_dy, vl); + } + goto L140; +L50: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__1 = nsteps; + i__2 = incx; + if(i__2 < 0){ + offset = i__1 - 2; + dx += offset; + dy += offset; + i__1 = -i__1; + i__2 = -i__2; + } + stride = i__2 * sizeof(FLOAT); + n = i__1 / i__2; + // printf("L50 RVV, i__2: %d, i__1: %d, stride: %d, n: %d \n", i__2, i__1, stride, n); + for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { + vl = VSETVL(n); + + v_w = VLSEV_FLOAT(&dx[1], stride, vl); + v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); + + v_dx = VFMULVF_FLOAT(v_w, dh11, vl); + v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); + VSSEV_FLOAT(&dx[1], stride, v_dx, vl); + + v_dy = VFMULVF_FLOAT(v_w, dh21, vl); + v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); + VSSEV_FLOAT(&dy[1], stride, v_dy, vl); + } + goto L140; +L70: + kx = 1; + ky = 1; + if (incx < 0) { + kx = (1 - n) * incx + 1; + } + if (incy < 0) { + ky = (1 - n) * incy + 1; + } + + if (dflag < 0.) { + goto L120; + } else if (dflag == 0) { + goto L80; + } else { + goto L100; + } +L80: + dh12 = dparam[4]; + dh21 = dparam[3]; + if(incx < 0){ + incx = -incx; + dx -= n*incx; + } + if(incy < 0){ + incy = -incy; + dy -= n*incy; + } + stride_x = incx * sizeof(FLOAT); + stride_y = incy * sizeof(FLOAT); + // printf("L120 RVV, n: %d, i__1: %d, stride_x: %d, stride_y: %d, n: %d \n", n, i__1, stride_x, stride_y, n); + for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { + vl = VSETVL(n); + + v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); + v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); + + v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); + v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); + + VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); + VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); + } + goto L140; +L100: + dh11 = dparam[2]; + dh22 = dparam[5]; + if(incx < 0){ + incx = -incx; + dx -= n*incx; + } + if(incy < 0){ + incy = -incy; + dy -= n*incy; + } + stride_x = incx * sizeof(FLOAT); + stride_y = incy * sizeof(FLOAT); + // printf("L120 RVV, n: %d, i__1: %d, stride_x: %d, stride_y: %d, n: %d \n", n, i__1, stride_x, stride_y, n); + for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { + vl = VSETVL(n); + + v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); + v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); + + v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); + v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); + + VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); + VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); + } + goto L140; +L120: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + if(incx < 0){ + incx = -incx; + dx -= n*incx; + } + if(incy < 0){ + incy = -incy; + dy -= n*incy; + } + stride_x = incx * sizeof(FLOAT); + stride_y = incy * sizeof(FLOAT); + // printf("L120 RVV, n: %d, i__1: %d, stride_x: %d, stride_y: %d, n: %d \n", n, i__1, stride_x, stride_y, n); + for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { + vl = VSETVL(n); + + v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); + v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); + + v_dx = VFMULVF_FLOAT(v_w, dh11, vl); + v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); + VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); + + v_dy = VFMULVF_FLOAT(v_w, dh21, vl); + v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); + VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); + } +L140: + return(0); +} \ No newline at end of file diff --git a/kernel/x86/KERNEL.generic b/kernel/x86/KERNEL.generic index 0aac0ce996..4ae98b16af 100644 --- a/kernel/x86/KERNEL.generic +++ b/kernel/x86/KERNEL.generic @@ -124,6 +124,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 2deb5a864c..c0ef72c051 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -298,6 +298,14 @@ ifndef ZROTKERNEL ZROTKERNEL = zrot_sse2.S endif +ifndef SROTMKERNEL +SROTMKERNEL = ../riscv64/rotm.c +endif + +ifndef DROTMKERNEL +DROTMKERNEL = ../riscv64/rotm.c +endif + ifndef XROTKERNEL XROTKERNEL = zrot.S endif diff --git a/kernel/x86_64/KERNEL.generic b/kernel/x86_64/KERNEL.generic index 7cb0cb836c..16aea681bd 100644 --- a/kernel/x86_64/KERNEL.generic +++ b/kernel/x86_64/KERNEL.generic @@ -124,6 +124,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/zarch/KERNEL.Z13 b/kernel/zarch/KERNEL.Z13 index fe82d81e6a..8b7e810ab9 100644 --- a/kernel/zarch/KERNEL.Z13 +++ b/kernel/zarch/KERNEL.Z13 @@ -66,6 +66,9 @@ DROTKERNEL = drot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = dscal.c CSCALKERNEL = ../arm/zscal.c diff --git a/kernel/zarch/KERNEL.Z14 b/kernel/zarch/KERNEL.Z14 index 3510938a71..5c0f26c656 100644 --- a/kernel/zarch/KERNEL.Z14 +++ b/kernel/zarch/KERNEL.Z14 @@ -66,6 +66,9 @@ DROTKERNEL = drot.c CROTKERNEL = crot.c ZROTKERNEL = zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c CSCALKERNEL = cscal.c diff --git a/kernel/zarch/KERNEL.ZARCH_GENERIC b/kernel/zarch/KERNEL.ZARCH_GENERIC index 33850d0f7d..d53e7ae254 100644 --- a/kernel/zarch/KERNEL.ZARCH_GENERIC +++ b/kernel/zarch/KERNEL.ZARCH_GENERIC @@ -66,6 +66,9 @@ DROTKERNEL = ../arm/rot.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c +SROTMKERNEL = ../riscv64/rotm.c +DROTMKERNEL = ../riscv64/rotm.c + SSCALKERNEL = ../arm/scal.c DSCALKERNEL = ../arm/scal.c CSCALKERNEL = ../arm/zscal.c diff --git a/utest/test_rot.c b/utest/test_rot.c index 0e74ecbb36..acd9ff1ce6 100644 --- a/utest/test_rot.c +++ b/utest/test_rot.c @@ -53,6 +53,24 @@ CTEST(rot,drot_inc_0) ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); } } +CTEST(rot,drotm_inc_1) +{ + blasint i = 0; + blasint N = 12, incX = 1, incY = 1; + double param[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; + double x_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + double y_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + double x_referece[] = {3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0, 33.0, 36.0}; + double y_referece[] = {4.0, 8.0, 12.0, 16.0, 20.0, 24.0, 28.0, 32.0, 36.0, 40.0, 44.0, 48.0}; + + //OpenBLAS + BLASFUNC(drotm)(&N, x_actual, &incX, y_actual, &incY, param); + + for(i = 0; i < N; i++){ + ASSERT_DBL_NEAR_TOL(x_referece[i], x_actual[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(y_referece[i], y_actual[i], DOUBLE_EPS); + } +} #endif #ifdef BUILD_COMPLEX16 @@ -96,6 +114,24 @@ CTEST(rot,srot_inc_0) ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS); } } +CTEST(rot,srotm_inc_1) +{ + blasint i = 0; + blasint N = 12, incX = 1, incY = 1; + float param[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; + float x_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + float y_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + float x_referece[] = {3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0, 33.0, 36.0}; + float y_referece[] = {4.0, 8.0, 12.0, 16.0, 20.0, 24.0, 28.0, 32.0, 36.0, 40.0, 44.0, 48.0}; + + //OpenBLAS + BLASFUNC(srotm)(&N, x_actual, &incX, y_actual, &incY, param); + + for(i = 0; i < N; i++){ + ASSERT_DBL_NEAR_TOL(x_referece[i], x_actual[i], SINGLE_EPS); + ASSERT_DBL_NEAR_TOL(y_referece[i], y_actual[i], SINGLE_EPS); + } +} #endif #ifdef BUILD_COMPLEX