From 3e5979400c5f5b2e00392e4e709e2774aabec3ad Mon Sep 17 00:00:00 2001 From: Alexis Montoison <35051714+amontoison@users.noreply.github.com> Date: Mon, 13 Nov 2023 01:52:05 -0600 Subject: [PATCH] [CUSPARSE] Support in-place triangular solves with CUDA v12.x (#2164) --- lib/cusparse/generic.jl | 8 +- lib/cusparse/interfaces.jl | 195 +++++++++++++------------- test/libraries/cusparse/interfaces.jl | 107 +++++++++----- 3 files changed, 178 insertions(+), 132 deletions(-) diff --git a/lib/cusparse/generic.jl b/lib/cusparse/generic.jl index 09e7ae031c..87410f6208 100644 --- a/lib/cusparse/generic.jl +++ b/lib/cusparse/generic.jl @@ -535,13 +535,17 @@ function sm!(transa::SparseChar, transb::SparseChar, uplo::SparseChar, diag::Spa transa = T <: Real && transa == 'C' ? 'T' : transa transb = T <: Real && transb == 'C' ? 'T' : transb + # Check if we solve a triangular system in-place with transb != 'N'. + # In that case we need to update the descriptor of C such that it represents Bᵀ. + is_C_transposed = (B === C) && (transb != 'N') + if isa(A, CuSparseMatrixCSC) && transa == 'C' && T <: Complex throw(ArgumentError("Backward and forward sweeps with the adjoint of a complex CSC matrix is not supported. Use a CSR or COO matrix instead.")) end mA,nA = size(A) mB,nB = size(B) - mC,nC = size(C) + mC,nC = !is_C_transposed ? size(C) : reverse(size(C)) (mA != nA) && throw(DimensionMismatch("A must be square, but has dimensions ($mA,$nA)!")) (mC != mA) && throw(DimensionMismatch("C must have $mA rows, but has $mC rows")) @@ -566,7 +570,7 @@ function sm!(transa::SparseChar, transb::SparseChar, uplo::SparseChar, diag::Spa cusparseSpMatSetAttribute(descA, 'D', cusparse_diag, Csize_t(sizeof(cusparse_diag))) descB = CuDenseMatrixDescriptor(B) - descC = CuDenseMatrixDescriptor(C) + descC = CuDenseMatrixDescriptor(C, transposed=is_C_transposed) spsm_desc = CuSparseSpSMDescriptor() function bufferSize() diff --git a/lib/cusparse/interfaces.jl b/lib/cusparse/interfaces.jl index 4dc3d767f5..3715a8d45a 100644 --- a/lib/cusparse/interfaces.jl +++ b/lib/cusparse/interfaces.jl @@ -208,12 +208,26 @@ end # triangular for SparseMatrixType in (:CuSparseMatrixBSR,) - if VERSION >= v"1.10-" + if VERSION ≥ v"1.10-" @eval begin - LinearAlgebra.generic_trimatdiv!(C::DenseCuVector{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AbstractVector{T}) where {T<:BlasFloat} = + LinearAlgebra.generic_trimatdiv!(C::DenseCuVector{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::DenseCuVector{T}) where {T<:BlasFloat} = sv2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', uploc, isunitc, one(T), A, C === B ? C : copyto!(C, B), 'O') - LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} = + LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::DenseCuMatrix{T}) where {T<:BlasFloat} = sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, C === B ? C : copyto!(C, B), 'O') + function LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AdjOrTrans{T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} + transb = LinearAlgebra.wrapper_char(B) + (transb == 'C') && (T <: Complex) && throw(ErrorException("This operation is not supported by the current CUDA version.")) + C !== parent(B) && copyto!(C, B) + sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, C, 'O') + end + function LinearAlgebra.generic_trimatdiv!(C::Transpose{T,<:DenseCuMatrix{T}}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::Transpose{T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} + B === C || throw(ErrorException("This operation is only supported if B and C are identical.")) + sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'T', uploc, isunitc, one(T), A, parent(B), 'O') + end + function LinearAlgebra.generic_trimatdiv!(C::Adjoint{T,<:DenseCuMatrix{T}}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::Adjoint{T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} + B === C || throw(ErrorException("This operation is only supported if B and C are identical.")) + sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'C', uploc, isunitc, one(T), A, parent(B), 'O') + end end else ## direct @@ -231,6 +245,14 @@ for SparseMatrixType in (:CuSparseMatrixBSR,) LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, B::DenseCuMatrix{T}) where {T<:BlasFloat} = sm2!('N', 'N', $uploc, $isunitc, one(T), parent(A), B, 'O') + + LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, + B::Transpose{T,<:DenseCuMatrix}) where {T<:BlasFloat} = + sm2!('N', 'T', $uploc, $isunitc, one(T), parent(A), parent(B), 'O') + + LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, + B::Adjoint{T,<:DenseCuMatrix}) where {T<:BlasFloat} = + sm2!('N', 'C', $uploc, $isunitc, one(T), parent(A), parent(B), 'O') end end @@ -252,6 +274,14 @@ for SparseMatrixType in (:CuSparseMatrixBSR,) LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::DenseCuMatrix{T}) where {T<:BlasFloat} = sm2!($transa, 'N', $uploc, $isunitc, one(T), parent(parent(A)), B, 'O') + + LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, + B::Transpose{T,<:DenseCuMatrix}) where {T<:BlasFloat} = + sm2!($transa, 'T', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), 'O') + + LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, + B::Adjoint{T,<:DenseCuMatrix}) where {T<:BlasFloat} = + sm2!($transa, 'C', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), 'O') end end end @@ -259,7 +289,7 @@ for SparseMatrixType in (:CuSparseMatrixBSR,) end # SparseMatrixType loop for SparseMatrixType in (:CuSparseMatrixCOO, :CuSparseMatrixCSR, :CuSparseMatrixCSC) - if VERSION >= v"1.10-" + if VERSION ≥ v"1.10-" @eval begin function LinearAlgebra.generic_trimatdiv!(C::DenseCuVector{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::DenseCuVector{T}) where {T<:BlasFloat} if CUSPARSE.version() ≥ v"12.0" @@ -272,31 +302,40 @@ for SparseMatrixType in (:CuSparseMatrixCOO, :CuSparseMatrixCSR, :CuSparseMatrix end function LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::DenseCuMatrix{T}) where {T<:BlasFloat} if CUSPARSE.version() ≥ v"12.0" - sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), parent(A), B, C, 'O') + sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, B, C, 'O') else $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) C !== B && copyto!(C, B) sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, C, 'O') end end - function LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AdjOrTrans{<:T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} - CUSPARSE.version() < v"12.0" && throw(ErrorException("This operation is not supported by the current CUDA version.")) + function LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AdjOrTrans{T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} transb = LinearAlgebra.wrapper_char(B) - transb == 'C' && throw(ErrorException("adjoint rhs is not supported")) - sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', transb, uploc, isunitc, one(T), A, parent(B), C, 'O') + (transb == 'C') && (T <: Complex) && throw(ErrorException("This operation is not supported by the current CUDA version.")) + if CUSPARSE.version() ≥ v"12.0" + sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', transb, uploc, isunitc, one(T), A, parent(B), C, 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + C !== parent(B) && copyto!(C, B) + sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, C, 'O') + end end - function LinearAlgebra.:(\)(A::LinearAlgebra.UpperOrLowerTriangular{T,<:$SparseMatrixType}, B::DenseCuVector{T}) where {T} - C = CuVector{T}(undef, length(B)) - LinearAlgebra.ldiv!(C, A, B) + function LinearAlgebra.generic_trimatdiv!(C::Transpose{T,<:DenseCuMatrix{T}}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::Transpose{T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} + (B !== C) && throw(ErrorException("This operation is only supported if B and C are identical.")) + if CUSPARSE.version() ≥ v"12.0" + sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'T', uploc, isunitc, one(T), A, parent(B), parent(C), 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'T', uploc, isunitc, one(T), A, parent(B), 'O') + end end - end - - for rhs in (:(DenseCuMatrix{T}), :(Transpose{T,<:DenseCuMatrix}), :(Adjoint{T,<:DenseCuMatrix})) - @eval begin - function LinearAlgebra.:(\)(A::LinearAlgebra.UpperOrLowerTriangular{T,<:$SparseMatrixType}, B::$rhs) where {T} - m, n = size(B) - C = CuMatrix{T}(undef, m, n) - LinearAlgebra.ldiv!(C, A, B) + function LinearAlgebra.generic_trimatdiv!(C::Adjoint{T,<:DenseCuMatrix{T}}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::Adjoint{T,<:DenseCuMatrix{T}}) where {T<:BlasFloat} + (B !== C) && throw(ErrorException("This operation is only supported if B and C are identical.")) + if CUSPARSE.version() ≥ v"12.0" + sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'C', uploc, isunitc, one(T), A, parent(B), parent(C), 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'C', uploc, isunitc, one(T), A, parent(B), 'O') end end end @@ -308,59 +347,40 @@ for SparseMatrixType in (:CuSparseMatrixCOO, :CuSparseMatrixCSR, :CuSparseMatrix (:UnitUpperTriangular, 'U', 'U')) @eval begin # Left division with vectors - function LinearAlgebra.ldiv!(C::DenseCuVector{T}, - A::$t{T,<:$SparseMatrixType}, - B::DenseCuVector{T}) where {T<:BlasFloat} + function LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, B::DenseCuVector{T}) where {T<:BlasFloat} if CUSPARSE.version() ≥ v"12.0" - sv!('N', $uploc, $isunitc, one(T), parent(A), B, C, 'O') + sv!('N', $uploc, $isunitc, one(T), parent(A), B, B, 'O') else $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) - copyto!(C, B) - sv2!('N', $uploc, $isunitc, one(T), parent(A), C, 'O') + sv2!('N', $uploc, $isunitc, one(T), parent(A), B, 'O') end end # Left division with matrices - function LinearAlgebra.ldiv!(C::DenseCuMatrix{T}, - A::$t{T,<:$SparseMatrixType}, - B::DenseCuMatrix{T}) where {T<:BlasFloat} + function LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, B::DenseCuMatrix{T}) where {T<:BlasFloat} if CUSPARSE.version() ≥ v"12.0" - sm!('N', 'N', $uploc, $isunitc, one(T), parent(A), B, C, 'O') + sm!('N', 'N', $uploc, $isunitc, one(T), parent(A), B, B, 'O') else $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) - copyto!(C, B) - sm2!('N', 'N', $uploc, $isunitc, one(T), parent(A), C, 'O') + sm2!('N', 'N', $uploc, $isunitc, one(T), parent(A), B, 'O') end end - function LinearAlgebra.ldiv!(C::DenseCuMatrix{T}, - A::$t{T,<:$SparseMatrixType}, - B::Transpose{T,<:DenseCuMatrix}) where {T<:BlasFloat} - CUSPARSE.version() < v"12.0" && throw(ErrorException("This operation is not supported by the current CUDA version.")) - sm!('N', 'T', $uploc, $isunitc, one(T), parent(A), parent(B), C, 'O') - end - - # transb = 'C' is not supported. - function LinearAlgebra.ldiv!(C::DenseCuMatrix{T}, - A::$t{T,<:$SparseMatrixType}, - B::Adjoint{T,<:DenseCuMatrix}) where {T<:BlasReal} - CUSPARSE.version() < v"12.0" && throw(ErrorException("This operation is not supported by the current CUDA version.")) - sm!('N', 'T', $uploc, $isunitc, one(T), parent(A), parent(B), C, 'O') - end - - function LinearAlgebra.:(\)(A::$t{T,<:$SparseMatrixType}, B::DenseCuVector{T}) where {T} - m = length(B) - C = CuVector{T}(undef, m) - LinearAlgebra.ldiv!(C, A, B) + function LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, B::Transpose{T,<:DenseCuMatrix}) where {T<:BlasFloat} + if CUSPARSE.version() ≥ v"12.0" + sm!('N', 'T', $uploc, $isunitc, one(T), parent(A), parent(B), parent(B), 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!('N', 'T', $uploc, $isunitc, one(T), parent(A), parent(B), 'O') + end end - end - for rhs in (:(DenseCuMatrix{T}), :(Transpose{T,<:DenseCuMatrix}), :(Adjoint{T,<:DenseCuMatrix})) - @eval begin - function LinearAlgebra.:(\)(A::$t{T,<:$SparseMatrixType}, B::$rhs) where {T} - m, n = size(B) - C = CuMatrix{T}(undef, m, n) - LinearAlgebra.ldiv!(C, A, B) + function LinearAlgebra.ldiv!(A::$t{T,<:$SparseMatrixType}, B::Adjoint{T,<:DenseCuMatrix}) where {T<:BlasFloat} + if CUSPARSE.version() ≥ v"12.0" + sm!('N', 'C', $uploc, $isunitc, one(T), parent(A), parent(B), parent(B), 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!('N', 'C', $uploc, $isunitc, one(T), parent(A), parent(B), 'O') end end end @@ -376,57 +396,40 @@ for SparseMatrixType in (:CuSparseMatrixCOO, :CuSparseMatrixCSR, :CuSparseMatrix (:Adjoint, 'C')) @eval begin # Left division with vectors - function LinearAlgebra.ldiv!(C::DenseCuVector{T}, - A::$t{T,<:$opa{T,<:$SparseMatrixType}}, - B::DenseCuVector{T}) where {T<:BlasFloat} + function LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::DenseCuVector{T}) where {T<:BlasFloat} if CUSPARSE.version() ≥ v"12.0" - sv!($transa, $uploc, $isunitc, one(T), parent(parent(A)), B, C, 'O') + sv!($transa, $uploc, $isunitc, one(T), parent(parent(A)), B, B, 'O') else - copyto!(C, B) - sv2!($transa, $uploc, $isunitc, one(T), parent(parent(A)), C, 'O') + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sv2!($transa, $uploc, $isunitc, one(T), parent(parent(A)), B, 'O') end end # Left division with matrices - function LinearAlgebra.ldiv!(C::DenseCuMatrix{T}, - A::$t{T,<:$opa{T,<:$SparseMatrixType}}, - B::DenseCuMatrix{T}) where {T<:BlasFloat} + function LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::DenseCuMatrix{T}) where {T<:BlasFloat} if CUSPARSE.version() ≥ v"12.0" - sm!($transa, 'N', $uploc, $isunitc, one(T), parent(parent(A)), B, C, 'O') + sm!($transa, 'N', $uploc, $isunitc, one(T), parent(parent(A)), B, B, 'O') else - copyto!(C, B) - sm2!($transa, 'N', $uploc, $isunitc, one(T), parent(parent(A)), C, 'O') + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!($transa, 'N', $uploc, $isunitc, one(T), parent(parent(A)), B, 'O') end end - function LinearAlgebra.ldiv!(C::DenseCuMatrix{T}, - A::$t{T,<:$opa{T,<:$SparseMatrixType}}, - B::Transpose{T,<:DenseCuMatrix}) where {T<:BlasFloat} - CUSPARSE.version() < v"12.0" && throw(ErrorException("This operation is not supported by the current CUDA version.")) - sm!($transa, 'T', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), C, 'O') - end - - # transb = 'C' is not supported. - function LinearAlgebra.ldiv!(C::DenseCuMatrix{T}, - A::$t{T,<:$opa{T,<:$SparseMatrixType}}, - B::Adjoint{T,<:DenseCuMatrix}) where {T<:BlasReal} - CUSPARSE.version() < v"12.0" && throw(ErrorException("This operation is not supported by the current CUDA version.")) - sm!($transa, 'T', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), C, 'O') - end - - function LinearAlgebra.:(\)(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::DenseCuVector{T}) where {T} - m = length(B) - C = CuVector{T}(undef, m) - LinearAlgebra.ldiv!(C, A, B) + function LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::Transpose{T,<:DenseCuMatrix}) where {T<:BlasFloat} + if CUSPARSE.version() ≥ v"12.0" + sm!($transa, 'T', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), parent(B), 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!($transa, 'T', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), 'O') + end end - end - for rhs in (:(DenseCuMatrix{T}), :(Transpose{T,<:DenseCuMatrix}), :(Adjoint{T,<:DenseCuMatrix})) - @eval begin - function LinearAlgebra.:(\)(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::$rhs) where {T} - m, n = size(B) - C = CuMatrix{T}(undef, m, n) - LinearAlgebra.ldiv!(C, A, B) + function LinearAlgebra.ldiv!(A::$t{T,<:$opa{T,<:$SparseMatrixType}}, B::Adjoint{T,<:DenseCuMatrix}) where {T<:BlasFloat} + if CUSPARSE.version() ≥ v"12.0" + sm!($transa, 'C', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), parent(B), 'O') + else + $SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version.")) + sm2!($transa, 'C', $uploc, $isunitc, one(T), parent(parent(A)), parent(B), 'O') end end end diff --git a/test/libraries/cusparse/interfaces.jl b/test/libraries/cusparse/interfaces.jl index 0cb61c8c22..1238bb96f0 100644 --- a/test/libraries/cusparse/interfaces.jl +++ b/test/libraries/cusparse/interfaces.jl @@ -358,8 +358,8 @@ using LinearAlgebra, SparseArrays for triangle in [LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitUpperTriangular] @testset "ldiv!($triangle(CuSparseMatrixBSR), CuVector) -- $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64] - for opa in [identity, transpose, adjoint] - A = A = rand(elty, 10, 10) + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + A = rand(elty, 10, 10) A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A A = sparse(A) @@ -373,8 +373,8 @@ using LinearAlgebra, SparseArrays end @testset "$triangle(CuSparseMatrixBSR) \\ CuVector -- $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64] - for opa in [identity, transpose, adjoint] - A = A = rand(elty, 10, 10) + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + A = rand(elty, 10, 10) A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A A = sparse(A) @@ -388,32 +388,38 @@ using LinearAlgebra, SparseArrays end @testset "ldiv!($triangle(CuSparseMatrixBSR), CuMatrix) -- $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64] - for opa in [identity, transpose, adjoint] - A = A = rand(elty, 10, 10) - A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) - A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A - A = sparse(A) - B = rand(elty, 10, 2) - dA = CuSparseMatrixBSR(A, 1) - dB = CuArray(B) - ldiv!(triangle(opa(A)), B) - ldiv!(triangle(opa(dA)), dB) - @test B ≈ collect(dB) + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + @testset "opb = $opb" for opb in [identity, transpose, adjoint] + elty <: Complex && opb == adjoint && continue + A = rand(elty, 10, 10) + A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) + A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A + A = sparse(A) + B = opb == identity ? rand(elty, 10, 2) : rand(elty, 2, 10) + dA = CuSparseMatrixBSR(A, 1) + dB = CuArray(B) + ldiv!(triangle(opa(A)), opb(B)) + ldiv!(triangle(opa(dA)), opb(dB)) + @test B ≈ collect(dB) + end end end @testset "$triangle(CuSparseMatrixBSR) \\ CuMatrix -- $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64] - for opa in [identity, transpose, adjoint] - A = A = rand(elty, 10, 10) - A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) - A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A - A = sparse(A) - B = rand(elty, 10, 2) - dA = CuSparseMatrixBSR(A, 1) - dB = CuArray(B) - C = triangle(opa(A)) \ B - dC = triangle(opa(dA)) \ dB - @test C ≈ collect(dC) + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + @testset "opb = $opb" for opb in [identity, transpose, adjoint] + elty <: Complex && opb == adjoint && continue + A = rand(elty, 10, 10) + A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) + A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A + A = sparse(A) + B = opb == identity ? rand(elty, 10, 2) : rand(elty, 2, 10) + dA = CuSparseMatrixBSR(A, 1) + dB = CuArray(B) + C = triangle(opa(A)) \ opb(B) + dC = triangle(opa(dA)) \ opb(dB) + @test C ≈ collect(dC) + end end end end @@ -421,8 +427,24 @@ using LinearAlgebra, SparseArrays for SparseMatrixType in (CuSparseMatrixCOO, CuSparseMatrixCSC, CuSparseMatrixCSR) SparseMatrixType == CuSparseMatrixCOO && CUSPARSE.version() < v"12.0" && continue for triangle in [LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitUpperTriangular] + @testset "ldiv!($triangle($SparseMatrixType), CuVector) -- $elty" for elty in [Float64,ComplexF64] + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + SparseMatrixType == CuSparseMatrixCSC && elty <: Complex && opa == adjoint && continue + A = A = rand(elty, 10, 10) + A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) + A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A + A = sparse(A) + y = rand(elty, 10) + dA = SparseMatrixType(A) + dy = CuArray(y) + ldiv!(triangle(opa(A)), y) + ldiv!(triangle(opa(dA)), dy) + @test y ≈ collect(dy) + end + end + @testset "ldiv!(CuVector, $triangle($SparseMatrixType), CuVector) -- $elty" for elty in [Float64,ComplexF64] - for opa in [identity, transpose, adjoint] + @testset "opa = $opa" for opa in [identity, transpose, adjoint] SparseMatrixType == CuSparseMatrixCSC && elty <: Complex && opa == adjoint && continue A = A = rand(elty, 10, 10) A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) @@ -440,7 +462,7 @@ using LinearAlgebra, SparseArrays end @testset "$triangle($SparseMatrixType) \\ CuVector -- $elty" for elty in [Float64,ComplexF64] - for opa in [identity, transpose, adjoint] + @testset "opa = $opa" for opa in [identity, transpose, adjoint] SparseMatrixType == CuSparseMatrixCSC && elty <: Complex && opa == adjoint && continue A = rand(elty, 10, 10) A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) @@ -455,11 +477,29 @@ using LinearAlgebra, SparseArrays end end + @testset "ldiv!($triangle($SparseMatrixType), CuMatrix) -- $elty" for elty in [Float64,ComplexF64] + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + @testset "opb = $opb" for opb in [identity, transpose, adjoint] + SparseMatrixType == CuSparseMatrixCSC && elty <: Complex && opa == adjoint && continue + elty <: Complex && opb == adjoint && continue + A = rand(elty, 10, 10) + A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) + A = triangle in (UnitLowerTriangular, UnitUpperTriangular) ? A - Diagonal(A) + I : A + A = sparse(A) + B = opb == identity ? rand(elty, 10, 2) : rand(elty, 2, 10) + dA = SparseMatrixType(A) + dB = CuArray(B) + ldiv!(triangle(opa(A)), opb(B)) + ldiv!(triangle(opa(dA)), opb(dB)) + @test B ≈ collect(dB) + end + end + end + @testset "ldiv!(CuMatrix, $triangle($SparseMatrixType), CuMatrix) -- $elty" for elty in [Float64,ComplexF64] - for opa in [identity, transpose, adjoint] - for opb in [identity, transpose, adjoint] + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + @testset "opb = $opb" for opb in [identity, transpose, adjoint] SparseMatrixType == CuSparseMatrixCSC && elty <: Complex && opa == adjoint && continue - opb != identity && CUSPARSE.version() < v"12.0" && continue elty <: Complex && opb == adjoint && continue A = rand(elty, 10, 10) A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A) @@ -478,10 +518,9 @@ using LinearAlgebra, SparseArrays end @testset "$triangle($SparseMatrixType) \\ CuMatrix -- $elty" for elty in [Float64,ComplexF64] - for opa in [identity, transpose, adjoint] - for opb in [identity, transpose, adjoint] + @testset "opa = $opa" for opa in [identity, transpose, adjoint] + @testset "opb = $opb" for opb in [identity, transpose, adjoint] SparseMatrixType == CuSparseMatrixCSC && elty <: Complex && opa == adjoint && continue - opb != identity && CUSPARSE.version() < v"12.0" && continue elty <: Complex && opb == adjoint && continue A = rand(elty, 10, 10) A = triangle in (UnitLowerTriangular, LowerTriangular) ? tril(A) : triu(A)