From 3261517b68956fefc391790b8e59e5158dbcb6ce Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 02:58:06 +0530 Subject: [PATCH 01/14] rm NNlibCUDA --- lib/NNlibCUDA/.gitignore | 1 - lib/NNlibCUDA/LICENSE.md | 22 ---- lib/NNlibCUDA/Project.toml | 23 ---- lib/NNlibCUDA/README.md | 5 - lib/NNlibCUDA/src/NNlibCUDA.jl | 16 --- lib/NNlibCUDA/src/batchedmul.jl | 7 -- lib/NNlibCUDA/src/cudnn/activations.jl | 39 ------ lib/NNlibCUDA/src/cudnn/batchnorm.jl | 124 ------------------- lib/NNlibCUDA/src/cudnn/conv.jl | 127 -------------------- lib/NNlibCUDA/src/cudnn/cudnn.jl | 13 -- lib/NNlibCUDA/src/cudnn/pooling.jl | 56 --------- lib/NNlibCUDA/src/cudnn/softmax.jl | 102 ---------------- lib/NNlibCUDA/src/upsample.jl | 157 ------------------------- lib/NNlibCUDA/test/activations.jl | 32 ----- lib/NNlibCUDA/test/batchedmul.jl | 56 --------- lib/NNlibCUDA/test/batchnorm.jl | 8 -- lib/NNlibCUDA/test/conv.jl | 60 ---------- lib/NNlibCUDA/test/pooling.jl | 25 ---- lib/NNlibCUDA/test/runtests.jl | 21 ---- lib/NNlibCUDA/test/softmax.jl | 12 -- lib/NNlibCUDA/test/test_utils.jl | 20 ---- lib/NNlibCUDA/test/upsample.jl | 27 ----- 22 files changed, 953 deletions(-) delete mode 100644 lib/NNlibCUDA/.gitignore delete mode 100644 lib/NNlibCUDA/LICENSE.md delete mode 100644 lib/NNlibCUDA/Project.toml delete mode 100644 lib/NNlibCUDA/README.md delete mode 100644 lib/NNlibCUDA/src/NNlibCUDA.jl delete mode 100644 lib/NNlibCUDA/src/batchedmul.jl delete mode 100644 lib/NNlibCUDA/src/cudnn/activations.jl delete mode 100644 lib/NNlibCUDA/src/cudnn/batchnorm.jl delete mode 100644 lib/NNlibCUDA/src/cudnn/conv.jl delete mode 100644 lib/NNlibCUDA/src/cudnn/cudnn.jl delete mode 100644 lib/NNlibCUDA/src/cudnn/pooling.jl delete mode 100644 lib/NNlibCUDA/src/cudnn/softmax.jl delete mode 100644 lib/NNlibCUDA/src/upsample.jl delete mode 100644 lib/NNlibCUDA/test/activations.jl delete mode 100644 lib/NNlibCUDA/test/batchedmul.jl delete mode 100644 lib/NNlibCUDA/test/batchnorm.jl delete mode 100644 lib/NNlibCUDA/test/conv.jl delete mode 100644 lib/NNlibCUDA/test/pooling.jl delete mode 100644 lib/NNlibCUDA/test/runtests.jl delete mode 100644 lib/NNlibCUDA/test/softmax.jl delete mode 100644 lib/NNlibCUDA/test/test_utils.jl delete mode 100644 lib/NNlibCUDA/test/upsample.jl diff --git a/lib/NNlibCUDA/.gitignore b/lib/NNlibCUDA/.gitignore deleted file mode 100644 index c177dd3c6..000000000 --- a/lib/NNlibCUDA/.gitignore +++ /dev/null @@ -1 +0,0 @@ -#Manifest.toml diff --git a/lib/NNlibCUDA/LICENSE.md b/lib/NNlibCUDA/LICENSE.md deleted file mode 100644 index 824b06f8b..000000000 --- a/lib/NNlibCUDA/LICENSE.md +++ /dev/null @@ -1,22 +0,0 @@ -The NNlib.jl package is licensed under the MIT "Expat" License: - -> Copyright (c) 2017-19: Julia Computing, Inc., Mike J Innes, and Contributors -> -> Permission is hereby granted, free of charge, to any person obtaining a copy -> of this software and associated documentation files (the "Software"), to deal -> in the Software without restriction, including without limitation the rights -> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -> copies of the Software, and to permit persons to whom the Software is -> furnished to do so, subject to the following conditions: -> -> The above copyright notice and this permission notice shall be included in all -> copies or substantial portions of the Software. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -> SOFTWARE. -> diff --git a/lib/NNlibCUDA/Project.toml b/lib/NNlibCUDA/Project.toml deleted file mode 100644 index a335e9544..000000000 --- a/lib/NNlibCUDA/Project.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "NNlibCUDA" -uuid = "a00861dc-f156-4864-bf3c-e6376f28a68d" -version = "0.1.0" - -[deps] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[compat] -CUDA = "3.0" -NNlib = "0.7" -julia = "1.6" - -[extras] -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" - -[targets] -test = ["ForwardDiff", "Test", "Zygote"] diff --git a/lib/NNlibCUDA/README.md b/lib/NNlibCUDA/README.md deleted file mode 100644 index 17703ac97..000000000 --- a/lib/NNlibCUDA/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# NNlibCUDA - -CUDA compatibility for NNlib.jl. - -Julia gpu kernels are in `src/`, while wrappers around `cudnn` are in `src/cudnn/`. diff --git a/lib/NNlibCUDA/src/NNlibCUDA.jl b/lib/NNlibCUDA/src/NNlibCUDA.jl deleted file mode 100644 index 48c11dfa3..000000000 --- a/lib/NNlibCUDA/src/NNlibCUDA.jl +++ /dev/null @@ -1,16 +0,0 @@ -module NNlibCUDA - -using NNlib -using CUDA -using Random, Statistics - -include("upsample.jl") -include("batchedmul.jl") -include("cudnn/cudnn.jl") -include("cudnn/conv.jl") -include("cudnn/pooling.jl") -include("cudnn/softmax.jl") -include("cudnn/activations.jl") -include("cudnn/batchnorm.jl") - -end # module diff --git a/lib/NNlibCUDA/src/batchedmul.jl b/lib/NNlibCUDA/src/batchedmul.jl deleted file mode 100644 index 471cac00e..000000000 --- a/lib/NNlibCUDA/src/batchedmul.jl +++ /dev/null @@ -1,7 +0,0 @@ -# Batched matrix multiplication -# 1st argument is produced by NNlib.storage_type(A) -NNlib._batched_gemm!(::Type{<:CuArray}, transA::Char, transB::Char, α::Number, A, B, β::Number, C) = - CUBLAS.gemm_strided_batched!(transA, transB, α, A, B, β, C) - -Base.unsafe_convert(::Type{CuPtr{T}}, A::NNlib.BatchedAdjOrTrans{T}) where {T} = - Base.unsafe_convert(CuPtr{T}, parent(A)) diff --git a/lib/NNlibCUDA/src/cudnn/activations.jl b/lib/NNlibCUDA/src/cudnn/activations.jl deleted file mode 100644 index d6df03803..000000000 --- a/lib/NNlibCUDA/src/cudnn/activations.jl +++ /dev/null @@ -1,39 +0,0 @@ - -# Activation - -using Base.Broadcast -using CUDA.CUDNN: cudnnActivationForward!, cudnnOpTensor!, - CUDNN_ACTIVATION_TANH,CUDNN_ACTIVATION_SIGMOID,CUDNN_ACTIVATION_ELU, - CUDNN_ACTIVATION_RELU,CUDNN_ACTIVATION_CLIPPED_RELU,CUDNN_OP_TENSOR_MAX - -for (f, op) in [ - CUDA.tanh => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_TANH), - NNlib.σ => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_SIGMOID), - NNlib.elu => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_ELU), - NNlib.relu => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_RELU), - # NNlib.relu6 => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_CLIPPED_RELU, coef=6.0), - # NNlib.leakyrelu => (src,dst)->cudnnOpTensor!(dst, src, src; op=CUDNN_OP_TENSOR_MAX, alpha1=0.01), - ] - - @eval begin - # in-place - function Base.materialize!(dst::DenseCuArray{<:CUDNNFloat}, - bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}}) - $op(bc.args[1], dst) - return dst - end - - # out of place - function Base.materialize(bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}}) - ElType = Broadcast.combine_eltypes(bc.f, bc.args) - dst = similar(bc, ElType) - $op(bc.args[1], dst) - return dst - end - end -end - -# CUDNN_ACTIVATION_IDENTITY does not work with cudnnActivationForward -# FIXME: put this optimization in GPUArrays' `copyto!` (like Base.Broadcast's `copyto!`) -Base.broadcasted(::typeof(identity), x::DenseCuArray{T}) where {T<:CUDNNFloat} = x - diff --git a/lib/NNlibCUDA/src/cudnn/batchnorm.jl b/lib/NNlibCUDA/src/cudnn/batchnorm.jl deleted file mode 100644 index eb742c2fb..000000000 --- a/lib/NNlibCUDA/src/cudnn/batchnorm.jl +++ /dev/null @@ -1,124 +0,0 @@ -using CUDA.CUDNN: CUDNN_BN_MIN_EPSILON, cudnnBatchNormalizationBackward, - cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL, - cudnnBatchNormalizationForwardTraining - - -# TODO: replace with new cudnn normalization interface -# https://github.com/JuliaGPU/CUDA.jl/blob/master/lib/cudnn/normalization.jl - -mutable struct BNCache - mean - ivar -end - -BNCache() = BNCache(nothing, nothing) - -@inline _wsize(y) = (fill(1, ndims(y)-2)..., size(y)[end-1], 1) - -# NOTE: CuDNN supports only 4D and 5D Tensors for BatchNorm Operations -# so reshape a 2D Tensor into 4D -batchnorm(g::DenseCuArray{T}, b::DenseCuArray{T}, x::DenseCuArray{T,2}, - running_mean::DenseCuArray{T}, running_var::DenseCuArray{T}, momentum; - cache = nothing, alpha = T(1), beta = T(0), - eps = T(1e-5), training = true) where T<:Union{Float32, Float64} = - dropdims(batchnorm(g, b, reshape(x, 1, 1, size(x, 1), size(x, 2)), running_mean, running_var, momentum, - cache = cache, alpha = alpha, beta = beta, eps = eps, training = training), dims = (1, 2)) - -function batchnorm(g::DenseCuArray{T}, b::DenseCuArray{T}, x::Union{DenseCuArray{T,4},DenseCuArray{T,5}}, - running_mean::DenseCuArray{T}, running_var::DenseCuArray{T}, momentum; - cache = nothing, alpha = T(1), beta = T(0), - eps = T(1e-5), training = true) where T<:Union{Float32, Float64} - cudnnBNForward!(similar(x), g, b, x, running_mean, running_var, momentum, cache = cache, - alpha = alpha, beta = beta, eps = eps, training = training) -end - -function cudnnBNForward!(y::DenseCuArray{T}, g::DenseCuArray{T}, b::DenseCuArray{T}, x::DenseCuArray{T}, - running_mean::DenseCuArray{T}, running_var::DenseCuArray{T}, - momentum; cache = nothing, - alpha = T(1), beta = T(0), - eps = T(1e-5), training = true) where T<:Union{Float32, Float64} - dims = _wsize(x) - if eps < CUDNN_BN_MIN_EPSILON - # warn("eps ",eps," is too small for CuDNN so eps has been assigned the value ", CUDNN_BN_MIN_EPSILON) - eps = CUDNN_BN_MIN_EPSILON - end - xd = cudnnTensorDescriptor(x) - yd = cudnnTensorDescriptor(y) - gd = cudnnTensorDescriptor(CUDNN_TENSOR_NCHW, cudnnDataType(T), Cint(length(dims)), dim4(dims,Val(CUDNN_TENSOR_NCHW))) - - if training - - if cache !== nothing - mean = zeros(CuArray{T}, dims...) - ivar = ones(CuArray{T}, dims...) - else - mean = CU_NULL - ivar = CU_NULL - end - - cudnnBatchNormalizationForwardTraining(handle(), CUDNN_BATCHNORM_SPATIAL, scalingParameter(T, alpha), scalingParameter(T, beta), xd, x, yd, y, gd, g, b, momentum, running_mean, running_var, eps, mean, ivar) - - if cache !== nothing - cache.mean = mean - cache.ivar = ivar - end - else - cudnnBatchNormalizationForwardInference(handle(), CUDNN_BATCHNORM_SPATIAL, scalingParameter(T, alpha), scalingParameter(T, beta), xd, x, yd, y, gd, g, b, running_mean, running_var, eps) - end - return y -end - -function ∇batchnorm(g::DenseCuArray{T}, b::DenseCuArray{T}, x::DenseCuArray{T, 2}, dy::DenseCuArray{T, 2}, - running_mean::DenseCuArray{T}, running_var::DenseCuArray{T}, momentum; - cache = nothing, eps = T(1e-5), alpha = T(1), - beta = T(0), training = true) where T<:Union{Float32, Float64} - dg, db, dx = ∇batchnorm(g, b, reshape(x, 1, 1, size(x, 1), size(x, 2)), reshape(dy, 1, 1, size(dy, 1), - size(dy, 2)), running_mean, running_var, momentum, cache = cache, eps = eps, - alpha = alpha, beta = beta, training = training) - (dg, db, dropdims(dx, dims = (1, 2))) -end - -function ∇batchnorm(g::DenseCuArray{T}, b::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArray{T}, - running_mean::DenseCuArray{T}, running_var::DenseCuArray{T}, momentum; - cache = nothing, eps = T(1e-5), alpha = T(1), - beta = T(0), training = true) where T<:Union{Float32, Float64} - dg = similar(g) - db = similar(b) - dx = similar(x) - cudnnBNBackward!(dg, g, db, dx, x, dy, running_mean, running_var, T(momentum), - training = training, cache = cache, eps = eps, alpha = alpha, beta = beta) - (dg, db, dx) -end - -function cudnnBNBackward!(dg::DenseCuArray{T}, g::DenseCuArray{T}, db::DenseCuArray{T}, - dx::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArray{T}, - running_mean::DenseCuArray{T}, running_var::DenseCuArray{T}, - momentum; cache = nothing, eps = T(1e-5), - alpha = T(1), beta = T(0), - dalpha = T(1), dbeta = T(0), training = true) where T<:Union{Float32, Float64} - if training - xd = cudnnTensorDescriptor(x) - dyd = cudnnTensorDescriptor(dy) - dxd = cudnnTensorDescriptor(dx) - gd = cudnnTensorDescriptor(CUDNN_TENSOR_NCHW, cudnnDataType(T), Cint(length(_wsize(x))), dim4(_wsize(x),Val(CUDNN_TENSOR_NCHW))) - if cache !== nothing - mean, ivar = cache.mean, cache.ivar - info("mean and ivar are fetched from the cache") - else - mean, ivar = CU_NULL, CU_NULL - end - - if eps < CUDNN_BN_MIN_EPSILON - eps = CUDNN_BN_MIN_EPSILON - end - - cudnnBatchNormalizationBackward(handle(), CUDNN_BATCHNORM_SPATIAL, scalingParameter(T, alpha), scalingParameter(T, beta), scalingParameter(T, dalpha), scalingParameter(T, dbeta), xd, x, dyd, dy, dxd, dx, gd, g, dg, db, eps, mean, ivar) - else - ivar = 1 ./ sqrt.(reshape(running_var, _wsize(x)) .+ eps) - dx .= dy .* reshape(g, _wsize(x)) .* ivar - rdims = ((1:ndims(x)-2)..., ndims(x)) - dg .= vec(sum(dy .* (x .- reshape(running_mean, _wsize(x))) .* ivar, dims=rdims)) - db .= vec(sum(dy, dims=rdims)) - end -end - \ No newline at end of file diff --git a/lib/NNlibCUDA/src/cudnn/conv.jl b/lib/NNlibCUDA/src/cudnn/conv.jl deleted file mode 100644 index bbcdde3ac..000000000 --- a/lib/NNlibCUDA/src/cudnn/conv.jl +++ /dev/null @@ -1,127 +0,0 @@ - -# Deprecated methods -using NNlib: DenseConvDims -import NNlib: stride, padding, dilation, flipkernel, spatial_dims, kernel_size, - conv!, ∇conv_filter!, ∇conv_data!, - maxpool!, meanpool!, ∇maxpool!, ∇meanpool!, PoolDims - -using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims, - cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf, - cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf, - cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter, - cudnnConvolutionBackwardBias - -const CUDNNFloat = Union{Float16,Float32,Float64} - -# Since CUDNN does not support 1D convolution, Conv in Flux will give a CUDNNError if the size is 1-dimensional. -fix1d(x) = x -fix1d(x::DenseCuArray{T, 3}) where T = reshape(x, 1, size(x, 1), size(x, 2), size(x, 3)) -fix1d(cdims::DenseConvDims{1,K,C_in,C_out,S,P,D,F}) where {K,C_in,C_out,S,P,D,F} = - DenseConvDims{2,(1,K...),C_in,C_out,(1,S...),(0,0,P...),(1,D...),F}((1,cdims.I...)) -fix1d(pdims::PoolDims{1,K,S,P,D}) where {K,S,P,D,F} = - PoolDims{2,(1,K...),(1,S...),(0,0,P...),(1,D...)}((1,pdims.I...), pdims.C_in) - -# Convolution - -function cudnnConvolutionDescriptor(cdims::DenseConvDims, x::DenseCuArray{T}) where T - cdims, x = fix1d(cdims), fix1d(x) - mode=(NNlib.flipkernel(cdims) ? CUDNN_CROSS_CORRELATION : CUDNN_CONVOLUTION) - cudnnConvolutionDescriptor(convdims(nnlibPadding(cdims),size(x)), convdims(NNlib.stride(cdims),size(x)), convdims(NNlib.dilation(cdims),size(x)), mode, cudnnDataType(T), math_mode(), CUDNN_DEFAULT_REORDER, Cint(1)) -end - -function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims::DenseConvDims; - alpha=1, beta=0, algo=-1) where T<:CUDNNFloat - if cudnnversion() < v"6" - all(x -> x == 1, dilation(cdims)) || error("Only dilation = 1 is supported in cuDNN version < 6") - end - if algo != -1 - @warn "algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end - d = cudnnConvolutionDescriptor(cdims, x) - cudnnConvolutionForward!(y, w, x, d; alpha, beta, z=y) -end - -function NNlib.conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, - cdims::DenseConvDims, bias::DenseCuArray{T}, σ=identity; - z::DenseCuArray{T}=y, alpha=1, beta=0, algo=-1) where T<:CUDNNFloat - if cudnnversion() < v"6" - all(x -> x == 1, dilation(cdims)) || error("Only dilation = 1 is supported in cuDNN version < 6") - end - if algo != -1 - @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end - d = cudnnConvolutionDescriptor(cdims, x) - # only relu and identity are supported by cudnnConvolutionForward! - activation = (σ == NNlib.relu ? CUDNN_ACTIVATION_RELU : CUDNN_ACTIVATION_IDENTITY) - cudnnConvolutionForward!(y, w, x, d; z, bias, activation, alpha, beta) - if activation === CUDNN_ACTIVATION_IDENTITY && σ ∉ (nothing, identity) - y = σ.(y) - end - return y -end - -function ∇conv_data!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, w::DenseCuArray{T}, - cdims::DenseConvDims; alpha=1, beta=0, algo=-1) where T<:CUDNNFloat - if cudnnversion() < v"6" - all(x -> x == 1, dilation(cdims)) || error("Only dilation = 1 is supported in cuDNN version < 6") - end - if algo != -1 - @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end - alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta); - xDesc, yDesc, wDesc = cudnnTensorDescriptor(dx), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(w) - convDesc = cudnnConvolutionDescriptor(cdims, dx) - p = cudnnConvolutionBwdDataAlgoPerf(wDesc, w, yDesc, dy, convDesc, xDesc, dx) - @workspace size=p.memory workspace->cudnnConvolutionBackwardData(handle(), alpha, wDesc, w, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, xDesc, dx) - return dx -end - -function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArray{T}, - cdims::DenseConvDims; alpha=1, beta=0, algo=-1) where T<:CUDNNFloat - if cudnnversion() < v"6" - all(x -> x == 1, dilation(cdims)) || error("Only dilation = 1 is supported in cuDNN version < 6") - end - if algo != -1 - @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end - alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta); - xDesc, yDesc, wDesc = cudnnTensorDescriptor(x), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(dw) - convDesc = cudnnConvolutionDescriptor(cdims, x) - p = cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, yDesc, dy, convDesc, wDesc, dw); - @workspace size=p.memory workspace->cudnnConvolutionBackwardFilter(handle(), alpha, xDesc, x, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, wDesc, dw); - return dw -end - - -function ∇conv_bias!(db::DenseCuArray{T}, dy::DenseCuArray{T}; alpha=1, beta=0) where T<:CUDNNFloat - alpha,beta = scalingParameter(T,alpha), scalingParameter(T,beta) - bDesc, yDesc = cudnnTensorDescriptor.((db,dy)) - cudnnConvolutionBackwardBias(handle(), alpha, yDesc, dy, beta, bDesc, db) - return db -end - -# Compatibility shims until users upgrade to new NNlib format -function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}; pad=0, stride=1, flipkernel=0, dilation=1, kwargs...) where {T<:CUDNNFloat} - cdims = DenseConvDims(x, w; padding=pad, stride=stride, flipkernel=(flipkernel!=0), dilation=dilation) - return conv!(y, x, w, cdims; kwargs...) -end - -function ∇conv_filter!(dw::DenseCuArray{T}, dy::DenseCuArray{T}, x::DenseCuArray{T}; pad=0, stride=1, flipkernel=0, dilation=1, kwargs...) where {T<:CUDNNFloat} - cdims = DenseConvDims(x, dw; padding=pad, stride=stride, flipkernel=(flipkernel!=0), dilation=dilation) - # NOTE!!! This compat shim re-arranges the argument order! - return ∇conv_filter!(dw, x, dy, cdims; kwargs...) -end - - -function cudnnConvolutionForward(y::DenseCuArray{T,N}, x::DenseCuArray{T,N}, w::DenseCuArray{T,N}, - cdims::DenseConvDims; algo=0, alpha=1, beta=0) where {T,N} - # @warn "`cudnnConvolutionForward(y,x,w,c::DenseConvDims)` is deprecated, please use one of the methods in `@doc cudnnConvolutionForward!`." maxlog=1 - cudnnConvolutionForward!(y, w, x; alpha, beta, padding=nnlibPadding(cdims), stride=NNlib.stride(cdims), dilation=NNlib.dilation(cdims), mode=(NNlib.flipkernel(cdims) ? CUDNN_CROSS_CORRELATION : CUDNN_CONVOLUTION)) -end - -function cudnnConvolutionBiasActivationForward(y::DenseCuArray{T,N}, x::DenseCuArray{T,N}, w::DenseCuArray{T,N}, z::DenseCuArray{T,N}, bias::DenseCuArray{T,N}, - cdims::DenseConvDims; algo=0, alpha1=1, alpha2=1, - activationMode=CUDNN_ACTIVATION_RELU, activationCoeff=0.0, activationReluNanOpt=CUDNN_NOT_PROPAGATE_NAN) where {T,N} - # @warn "`cudnnConvolutionBiasActivationForward` is deprecated, please use one of the methods in `@doc cudnnConvolutionForward!`." maxlog=1 - cudnnConvolutionForward!(y, w, x; bias, activation=activationMode, z, alpha=alpha1, beta=alpha2, padding=nnlibPadding(cdims), stride=NNlib.stride(cdims), dilation=NNlib.dilation(cdims), mode=(NNlib.flipkernel(cdims) ? CUDNN_CROSS_CORRELATION : CUDNN_CONVOLUTION)) -end diff --git a/lib/NNlibCUDA/src/cudnn/cudnn.jl b/lib/NNlibCUDA/src/cudnn/cudnn.jl deleted file mode 100644 index 99ee1b6a9..000000000 --- a/lib/NNlibCUDA/src/cudnn/cudnn.jl +++ /dev/null @@ -1,13 +0,0 @@ -using CUDA.CUDNN: handle, @workspace, cudnnTensorDescriptor, cudnnFilterDescriptor, - cudnnDataType, math_mode, CUDNN_DEFAULT_REORDER, CUDNN_CROSS_CORRELATION, - CUDNN_NOT_PROPAGATE_NAN, CUDNN_TENSOR_NCHW, dim4 - -cudnnversion() = CUDA.CUDNN.version() - -function nnlibPadding(dims) - pd = NNlib.padding(dims) - if !all(pd[1:2:end] .== pd[2:2:end]) - @warn "cuDNN does not support asymmetric padding; defaulting to symmetric choice" maxlog=1 - end - return pd[1:2:end] -end \ No newline at end of file diff --git a/lib/NNlibCUDA/src/cudnn/pooling.jl b/lib/NNlibCUDA/src/cudnn/pooling.jl deleted file mode 100644 index be9ecc5d2..000000000 --- a/lib/NNlibCUDA/src/cudnn/pooling.jl +++ /dev/null @@ -1,56 +0,0 @@ -using CUDA.CUDNN: cudnnPoolingMode_t, CUDNN_POOLING_MAX, - CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING, - cudnnPoolingForward!, pooldims, cudnnPoolingBackward - -import CUDA.CUDNN: cudnnPoolingDescriptor - -function cudnnPoolingDescriptor(pdims::PoolDims, x::DenseCuArray{T}, mode::cudnnPoolingMode_t) where T - pdims, x = fix1d(pdims), fix1d(x) - window, padding, stride = NNlib.kernel_size(pdims), nnlibPadding(pdims), NNlib.stride(pdims) - nanOpt = CUDNN_NOT_PROPAGATE_NAN - cudnnPoolingDescriptor(mode, nanOpt, Cint(max(2,ndims(x)-2)), pooldims(window,size(x)), pooldims(padding,size(x)), pooldims(stride,size(x))) -end - -function maxpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDNNFloat - d = cudnnPoolingDescriptor(pdims, x, CUDNN_POOLING_MAX) - cudnnPoolingForward!(y, x, d) -end - -function ∇maxpool!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDNNFloat - xDesc, yDesc = cudnnTensorDescriptor.((x, y)) - d = cudnnPoolingDescriptor(pdims, x, CUDNN_POOLING_MAX) - alpha, beta = scalingParameter(T,1), scalingParameter(T,0) - cudnnPoolingBackward(handle(), d, alpha, yDesc, y, yDesc, dy, xDesc, x, beta, xDesc, dx) - return dx -end - -function meanpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDNNFloat - d = cudnnPoolingDescriptor(pdims, x, CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING) - cudnnPoolingForward!(y, x, d) -end - -function ∇meanpool!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDNNFloat - xDesc, yDesc = cudnnTensorDescriptor.((x, y)) - d = cudnnPoolingDescriptor(pdims, x, CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING) - alpha, beta = scalingParameter(T,1), scalingParameter(T,0) - cudnnPoolingBackward(handle(), d, alpha, yDesc, y, yDesc, dy, xDesc, x, beta, xDesc, dx) - return dx -end - -function maxpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, k; pad=map(_->0,k), stride=k) where {T<:CUDNNFloat} - pdims = PoolDims(x, k; padding=pad, stride=stride) - return maxpool!(y, x, pdims) -end - -function meanpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, k; pad=map(_->0,k), stride=k) where {T<:CUDNNFloat} - pdims = PoolDims(x, k; padding=pad, stride=stride) - return meanpool!(y, x, pdims) -end - -# Deprecated methods -function cudnnPoolingForward(y::DenseCuArray{T,N}, x::DenseCuArray{T,N}, pdims::NNlib.PoolDims; - alpha=1, beta=0, mode=CUDNN_POOLING_MAX) where {T,N} - # @warn "`cudnnPoolingForward(y,x,d::PoolDims)` is deprecated, please use one of the methods in `@doc cudnnPoolingForward`." maxlog=1 - cudnnPoolingForward!(y, x; window=NNlib.kernel_size(pdims), padding=nnlibPadding(pdims), stride=NNlib.stride(pdims), mode, alpha, beta) -end - diff --git a/lib/NNlibCUDA/src/cudnn/softmax.jl b/lib/NNlibCUDA/src/cudnn/softmax.jl deleted file mode 100644 index 06a373132..000000000 --- a/lib/NNlibCUDA/src/cudnn/softmax.jl +++ /dev/null @@ -1,102 +0,0 @@ -import NNlib: softmax, softmax!, ∇softmax, ∇softmax!, - logsoftmax, logsoftmax!, ∇logsoftmax, ∇logsoftmax! - -using CUDA.CUDNN: CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL, - CUDNN_SOFTMAX_FAST, CUDNN_SOFTMAX_ACCURATE, cudnnSoftmaxForward!, - cudnnSoftmaxBackward - -# Softmax - -# @denizyuret: do not do inplace operations with softmax/logsoftmax when (1) cpu version is not, (2) one can use softmax! -function softmax(x::T; dims=1) where {T<:DenseCuArray} - softmax!(similar(x), x; dims) -end - -function ∇softmax(dy::T, x::T, y::T; dims=1) where {T<:DenseCuArray} - ∇softmax!(similar(x), dy, x, y; dims) -end - -function logsoftmax(x::T; dims=1) where {T<:DenseCuArray} - logsoftmax!(similar(x), x; dims) -end - -function ∇logsoftmax(dy::T, x::T, y::T; dims=1) where {T<:DenseCuArray} - ∇logsoftmax!(similar(x), dy, x, y; dims) -end - -# @denizyuret: backup implementations for unsupported/slow size/dims combinations: -function _softmax!(y::T, x::T; dims) where {T<:DenseCuArray} - y .= exp.(x .- maximum(x; dims)) - y ./= sum(y; dims) -end - -function _∇softmax!(dx::T, dy::T, x::T, y::T; dims) where {T<:DenseCuArray} - dx .= y .* (dy .- sum(dy .* y; dims)) -end - -function _logsoftmax!(y::T, x::T; dims) where {T<:DenseCuArray} - y .= x .- maximum(x; dims) - y .-= log.(sum(exp.(y); dims)) -end - -function _∇logsoftmax!(dx::T, dy::T, x::T, y::T; dims) where {T<:DenseCuArray} - dx .= dy .- sum(dy; dims) .* exp.(y) -end - -# Trick by @norci to use cudnn for softmax dims args that are contiguous: -# If dims=(dmin:dmax) then CUDNN_SOFTMAX_MODE_CHANNEL does the trick with reshape -# (1, prod(size(x)[1:dmin-1]), prod(size(x)[dmin:dmax]), :) -# softmaxdims returns nothing when the backup implementation should be used. - -function softmaxdims(x, dims) - dims === Colon() && return (1, 1, length(x), 1) - mind,maxd = minimum(dims),maximum(dims) - all(i in dims for i in mind:maxd) || return nothing # cannot handle if not contiguous - stride = dimsize = 1 - for i in 1:(mind-1); stride *= size(x,i); end # Using size(x,i) assumes trailing dims = 1, robust to maxd > ndims(x) - for i in mind:maxd; dimsize *= size(x,i); end - batchsize = length(x)÷(stride*dimsize) - # Here is a region where cudnn is slower, so we go with the backup: - batchsize == 1 && 64 <= stride <= 4096 && 64 <= dimsize <= 4096 && return nothing - return (1, stride, dimsize, batchsize) -end - -# Determine softmax algo based on math_mode - -softmaxalgo() = (CUDA.math_mode()===CUDA.FAST_MATH ? CUDNN_SOFTMAX_FAST : CUDNN_SOFTMAX_ACCURATE) - -# Main implementations: - -function softmax!(y::T, x::T = y; dims=1) where {T<:DenseCuArray} - s = softmaxdims(x, dims) - s === nothing && return _softmax!(y, x; dims) - cudnnSoftmaxForward!(reshape(y,s), reshape(x,s); mode = CUDNN_SOFTMAX_MODE_CHANNEL, algo = softmaxalgo()) - return y -end - -function ∇softmax!(dx::T, dy::T, x::T, y::T; dims=1) where {R,T<:DenseCuArray{R}} - s = softmaxdims(x, dims) - s === nothing && return _∇softmax!(dx, dy, x, y; dims) - xDesc = cudnnTensorDescriptor(reshape(x,s)) - alpha, beta = scalingParameter(R,1), scalingParameter(R,0) - cudnnSoftmaxBackward(handle(), softmaxalgo(), CUDNN_SOFTMAX_MODE_CHANNEL, - alpha, xDesc, y, xDesc, dy, beta, xDesc, dx) - return dx -end - -function logsoftmax!(y::T, x::T = y; dims=1) where {T<:DenseCuArray} - s = softmaxdims(x, dims) - s === nothing && return _logsoftmax!(y, x; dims) - cudnnSoftmaxForward!(reshape(y,s), reshape(x,s); mode = CUDNN_SOFTMAX_MODE_CHANNEL, algo = CUDNN_SOFTMAX_LOG) - return y -end - -function ∇logsoftmax!(dx::T, dy::T, x::T, y::T; dims=1) where {R,T<:DenseCuArray{R}} - s = softmaxdims(x, dims) - s === nothing && return _∇logsoftmax!(dx, dy, x, y; dims) - xDesc = cudnnTensorDescriptor(reshape(x,s)) - alpha, beta = scalingParameter(R,1), scalingParameter(R,0) - cudnnSoftmaxBackward(handle(), CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL, - alpha, xDesc, y, xDesc, dy, beta, xDesc, dx) - return dx -end diff --git a/lib/NNlibCUDA/src/upsample.jl b/lib/NNlibCUDA/src/upsample.jl deleted file mode 100644 index 287adbd28..000000000 --- a/lib/NNlibCUDA/src/upsample.jl +++ /dev/null @@ -1,157 +0,0 @@ - -# -# Upsampling -# - -# GPU based bilinear upsampling including its gradient -# -# Based on the Caffe2 implementation at: -# The code is a translation from the following files: -# - https://github.com/pytorch/pytorch/blob/v1.8.0-rc1/caffe2/operators/upsample_op.cu -# - https://github.com/pytorch/pytorch/blob/v1.8.0-rc1/caffe2/core/common_gpu.h -# -# Copyright (c) 2016-2021 Facebook Inc. -# Copyright (c) 2015 Google Inc. -# Copyright (c) 2015 Yangqing Jia -# Copyright 2019-2020 Kakao Brain -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, are -# permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this list of -# conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, this list of -# conditions and the following disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America and -# IDIAP Research Institute nor the names of its contributors may be used to endorse or -# promote products derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Forward and backward pass have been tested to produce the same output -# as pytorch with align_corners=True - it works modulo bit noise. - -function upsample_bilinear_whcn_kernel!(n_elem, rheight, rwidth, x, y) - index = (threadIdx().x - 1) + (blockIdx().x - 1) * blockDim().x - - if index < n_elem - in_w, in_h, channels, batchsize = size(x) - out_w, out_h, _, _ = size(y) - - ow = index % out_w - oh = index ÷ out_w - - real_index = rheight*oh - ih0 = Base.floor(Int, real_index) - offset = (ih0 < in_h-1) ? 1 : 0 - ih1 = ih0 + offset + 1 - h1lambda = real_index - ih0 - h0lambda = 1 - h1lambda - ih0 += 1 - - real_index = rwidth*ow - iw0 = Base.floor(Int, real_index) - offset = (iw0 < in_w-1) ? 1 : 0 - iw1 = iw0 + offset + 1 - w1lambda = real_index - iw0 - w0lambda = 1 - w1lambda - iw0 += 1 - - @inbounds for n in 1:batchsize - for c in 1:channels - val = h0lambda * (w0lambda * x[iw0, ih0, c, n] + # h0 * w0 * i00 - w1lambda * x[iw1, ih0, c, n]) + # h0 * w1 * i01 - h1lambda * (w0lambda * x[iw0, ih1, c, n] + # h1 * w0 * i10 - w1lambda * x[iw1, ih1, c, n]) # h1 * w1 * i11 - y[ow+1, oh+1, c, n] = val - end - end - end - return nothing -end - -# Δ is the gradient backpropagated from downstream layers -function ∇upsample_bilinear_whcn_kernel!(n_elem, rheight, rwidth, Δ, dx) - index = (threadIdx().x - 1) + (blockIdx().x - 1) * blockDim().x - - if index < n_elem - in_width, in_height, channels, batchsize = size(Δ) - out_width, out_height, _, _ = size(dx) - - iw = index % in_width - ih = index ÷ in_width - - # Compute Y axis lambdas - real_index_h = rheight*ih - oh0 = Base.floor(Int, real_index_h) - offset = (oh0 < out_height-1) ? 1 : 0 - oh1 = oh0 + offset + 1 - h1lambda = real_index_h - oh0 - h0lambda = 1 - h1lambda - oh0 += 1 - - # # Compute X axis lambdas - real_index_w = rwidth * iw - ow0 = Base.floor(Int, real_index_w) - offset = (ow0 < out_width - 1) ? 1 : 0 - ow1 = ow0 + offset + 1 - w1lambda = real_index_w - ow0 - w0lambda = 1 - w1lambda - ow0 += 1 - - @inbounds for n in 1:batchsize - for c in 1:channels - val = Δ[iw+1, ih+1, c, n] - @atomic dx[ow0, oh0, c, n] += h0lambda * w0lambda * val - @atomic dx[ow1, oh0, c, n] += h0lambda * w1lambda * val - @atomic dx[ow0, oh1, c, n] += h1lambda * w0lambda * val - @atomic dx[ow1, oh1, c, n] += h1lambda * w1lambda * val - end - end - end # if - return nothing -end - -function NNlib.upsample_bilinear_whcn!(y::CuArray{T,4}, x::CuArray{T,4}) where T - w,h,c,n = size(x) - out_w, out_h = (size(y,1), size(y,2)) - - out_size = out_h*out_w - rheight = T((h-1)/(out_h-1)) - rwidth = T((w-1)/(out_w-1)) - - kernel = @cuda launch=false upsample_bilinear_whcn_kernel!(out_size, rheight, rwidth, x, y) - config = launch_configuration(kernel.fun; max_threads=256) - threads = Base.min(out_size, config.threads) - blocks = cld(out_size, threads) - kernel(out_size, rheight, rwidth, x, y; threads=threads, blocks=blocks) - return y -end - -function NNlib.∇upsample_bilinear_whcn!(dx::CuArray{T,4}, Δ::CuArray{T,4}) where T - w,h,c,n = Base.size(Δ) - out_w, out_h = (size(dx, 1), size(dx, 2)) - in_size = h*w - rheight = T((out_h-1)/(h-1)) # reversed compared to forward pass - rwidth = T((out_w-1)/(w-1)) - - kernel = @cuda launch=false ∇upsample_bilinear_whcn_kernel!(in_size, rheight, rwidth, Δ, dx) - config = launch_configuration(kernel.fun; max_threads=256) - threads = Base.min(in_size, config.threads) - blocks = cld(in_size, threads) - kernel(in_size, rheight, rwidth, Δ, dx; threads=threads, blocks=blocks) - return dx -end diff --git a/lib/NNlibCUDA/test/activations.jl b/lib/NNlibCUDA/test/activations.jl deleted file mode 100644 index 67ca7c1cc..000000000 --- a/lib/NNlibCUDA/test/activations.jl +++ /dev/null @@ -1,32 +0,0 @@ -@testset "activation broadcast" begin - for f in NNlib.ACTIVATIONS - if f ∉ [:rrelu] - @eval gputest(x -> $f.(x), rand(Float64, 5)) - end - end -end - -@testset "forward diff" begin - f(x) = logσ.(x) - ds = Dual.(rand(5),1) - @test f(ds) ≈ collect(f(CuArray(ds))) -end - -@testset "softplus" begin - # softplus does not give `Inf` for large arguments - x = CuArray([1000.]) - @test all(softplus.(x) .== x) -end - -@testset "input is preserved" begin - x = CUDA.ones(1) - @test Array(x) == [1f0] - tanh.(x) - @test Array(x) == [1f0] - y = tanh.(x) - @test Array(x) == [1f0] - @test Array(y) == [tanh(1f0)] - x .= tanh.(y) - @test Array(y) == [tanh(1f0)] - @test Array(x) == [tanh(tanh(1f0))] -end diff --git a/lib/NNlibCUDA/test/batchedmul.jl b/lib/NNlibCUDA/test/batchedmul.jl deleted file mode 100644 index d6a3b6023..000000000 --- a/lib/NNlibCUDA/test/batchedmul.jl +++ /dev/null @@ -1,56 +0,0 @@ -@testset "batched_mul" begin - using NNlib: batched_mul, batched_mul!, batched_vec, - batched_adjoint, batched_transpose - - A = randn(Float32, 3,3,2); - B = randn(Float32, 3,3,2); - - C = batched_mul(A, B) - @test CuArray(C) ≈ batched_mul(CuArray(A), CuArray(B)) - - Ct = batched_mul(batched_transpose(A), B) - @test CuArray(Ct) ≈ batched_mul(batched_transpose(CuArray(A)), CuArray(B)) - - Ca = batched_mul(A, batched_adjoint(B)) - @test CuArray(Ca) ≈ batched_mul(CuArray(A), batched_adjoint(CuArray(B))) - - # 5-arg batched_mul! - C .= pi - batched_mul!(C, A, B, 2f0, 3f0) - cuCpi = CuArray(similar(C)) .= pi - @test CuArray(C) ≈ batched_mul!(cuCpi, CuArray(A), CuArray(B), 2f0, 3f0) - - # PermutedDimsArray - @test CuArray(Ct) ≈ batched_mul(PermutedDimsArray(CuArray(A), (2,1,3)), CuArray(B)) - - D = permutedims(B, (1,3,2)) - Cp = batched_mul(batched_adjoint(A), B) - @test CuArray(Cp) ≈ batched_mul(batched_adjoint(CuArray(A)), PermutedDimsArray(CuArray(D), (1,3,2))) - - # Methods which reshape - M = randn(Float32, 3,3) - - Cm = batched_mul(A, M) - @test CuArray(Cm) ≈ batched_mul(CuArray(A), CuArray(M)) - - Cv = batched_vec(permutedims(A,(3,1,2)), M) - @test CuArray(Cv) ≈ batched_vec(PermutedDimsArray(CuArray(A),(3,1,2)), CuArray(M)) -end - -@testset "NNlib storage_type etc." begin - using LinearAlgebra - using NNlib: is_strided, are_strided, storage_type - - M = cu(ones(10,10)) - - @test is_strided(M) - @test is_strided(view(M, 1:2:5,:)) - @test is_strided(PermutedDimsArray(M, (2,1))) - - @test !is_strided(reshape(view(M, 1:2:10,:), 10,:)) - @test !is_strided((M .+ im)') - @test !is_strided(Diagonal(cu(ones(3)))) - - @test storage_type(M) == CuArray{Float32,2} - @test storage_type(reshape(view(M, 1:2:10,:), 10,:)) == CuArray{Float32,2} -end diff --git a/lib/NNlibCUDA/test/batchnorm.jl b/lib/NNlibCUDA/test/batchnorm.jl deleted file mode 100644 index 0da8a0e0f..000000000 --- a/lib/NNlibCUDA/test/batchnorm.jl +++ /dev/null @@ -1,8 +0,0 @@ -@testset "Batchnorm" begin - v = CUDA.rand(Float32, 2) - m = CUDA.rand(Float32, 2, 5) - for training in (false, true) - NNlibCUDA.batchnorm(v, v, m, v, v, 1.0; training=training) - NNlibCUDA.∇batchnorm(v, v, m, m, v, v, 1.0; training=training) - end -end diff --git a/lib/NNlibCUDA/test/conv.jl b/lib/NNlibCUDA/test/conv.jl deleted file mode 100644 index 6d7e0d6df..000000000 --- a/lib/NNlibCUDA/test/conv.jl +++ /dev/null @@ -1,60 +0,0 @@ -using NNlib: DenseConvDims - -@testset "convolution" begin - a, b, c = rand(Float64, 10, 10, 3, 1), rand(Float64, 2, 2, 3, 4), rand(Float64, 9, 9, 4, 1) - da, db, dc = CuArray(a), CuArray(b), CuArray(c) - cdims = DenseConvDims(a, b) - @test NNlib.conv(a, b, cdims) ≈ collect(NNlib.conv(da, db, cdims)) - @test ∇conv_data(c, b, cdims) ≈ collect(∇conv_data(dc, db, cdims)) - @test ∇conv_filter(a, c, cdims) ≈ collect(∇conv_filter(da, dc, cdims)) - - # Test for agreement between CPU NNlib and CuDNN versions, across a variety of kwargs - for num_spatial_dims in (1, 2, 3) - # Initialize data we'll run our tests over - C_in = 3 - C_out = 4 - batch_size = 1 - x = rand(Float64, fill(8, num_spatial_dims)..., C_in, batch_size) - w = rand(Float64, fill(2, num_spatial_dims)..., C_in, C_out) - b = rand(Float64, fill(1, num_spatial_dims)..., C_in, C_out) - options = (Dict(), Dict(:dilation => 2), Dict(:flipkernel => true), Dict(:stride => 2), Dict(:padding => 1)) - - # @denizyuret: algo option deprecated for nnlib, handling in cudnn - # algos = (1, 0, 1, 1,) - # for (opts, algo) in zip(options, algos) - - for opts in options - cdims = DenseConvDims(x, w; opts...) - y = NNlib.conv(x, w, cdims) - - # Test that basic convolution is equivalent across GPU/CPU - gputest((x, w) -> NNlib.conv(x, w, cdims), x, w) - gputest((y, w) -> NNlib.∇conv_data(y, w, cdims), y, w) - gputest((x, y) -> NNlib.∇conv_filter(x, y, cdims), x, y, checkgrad=false) # TODO fix grad - - # Scaling factors - gputest((x, w) -> NNlib.conv(x, w, cdims; alpha=2.0), x, w, checkgrad=false) # TODO - gputest((y, w) -> NNlib.∇conv_data(y, w, cdims; alpha=2.0), y, w, checkgrad=false) # TODO - gputest((x, y) -> NNlib.∇conv_filter(x, y, cdims; alpha=2.0), x, y, checkgrad=false) # TODO - - gputest((y, x, w) -> NNlib.conv!(copy(y), x, w, cdims; beta=2.0), y, x, w, checkgrad=false) # TODO - # @test_broken gputest((x, y, w) -> NNlib.∇conv_data!(copy(x), y, w, cdims; beta=2.0), x, y, w, checkgrad=false) #TODO - gputest((w, x, y) -> NNlib.∇conv_filter!(copy(w), x, y, cdims; beta=2.0), w, x, y, checkgrad=false) # TODO - - # Test the compatibility shims - cy,cx,cw = CuArray{Float32}.((y,x,w)) - opts2 = Dict((k==:padding ? :pad : k)=>v for (k,v) in opts) - @test NNlib.conv!(similar(cy),cx,cw; opts2...) ≈ NNlib.conv!(similar(cy),cx,cw,cdims) - @test NNlib.∇conv_filter!(similar(cw),cy,cx; opts2...) ≈ NNlib.∇conv_filter!(similar(cw),cx,cy,cdims) - end - - # CPU implementation of ∇conv_bias! - db = zeros(Float64, 1, 1, 3, 1) - dy = randn(Float64, 8, 8, 3, 1) - function NNlibCUDA.∇conv_bias!(db, dy) - db .= sum(dy, dims=(1:(ndims(dy)-2))) - return db - end - gputest(NNlibCUDA.∇conv_bias!, db, dy, checkgrad=false) - end -end diff --git a/lib/NNlibCUDA/test/pooling.jl b/lib/NNlibCUDA/test/pooling.jl deleted file mode 100644 index 9cd3739fe..000000000 --- a/lib/NNlibCUDA/test/pooling.jl +++ /dev/null @@ -1,25 +0,0 @@ -@testset "pooling" begin - - # Test for agreement between CPU NNlib and CuDNN versions, across a variety of kwargs - for num_spatial_dims in (1, 2, 3) - # Initialize data we'll run our tests over - C_in = 3 - batch_size = 1 - x = rand(Float64, fill(8, num_spatial_dims)..., C_in, batch_size) - - # Test that pooling is equivalent across GPU/CPU - pdims = PoolDims(x, 2) - y = maxpool(x, pdims) - dy = ones(size(y)) - gputest(x -> maxpool(x, pdims), x) - gputest((dy, y, x) -> ∇maxpool(dy, y, x, pdims), dy, y, x, checkgrad=false) - gputest(x -> maxpool(x, pdims), x) - gputest((dy, y, x) -> ∇maxpool(dy, y, x, pdims), dy, y, x, checkgrad=false) - - # Test the compatibility shims for pooling - cx,cy,cdy = CuArray{Float32}.((x,y,dy)) - win,pad=2,1 - maxpool!(similar(cy), cx, win; pad=pad, stride=win) ≈ maxpool!(similar(cy), cx, PoolDims(cx, win; padding=pad, stride=win)) - meanpool!(similar(cy), cx, win; pad=pad, stride=win) ≈ meanpool!(similar(cy), cx, PoolDims(cx, win; padding=pad, stride=win)) - end -end diff --git a/lib/NNlibCUDA/test/runtests.jl b/lib/NNlibCUDA/test/runtests.jl deleted file mode 100644 index 75e8ef779..000000000 --- a/lib/NNlibCUDA/test/runtests.jl +++ /dev/null @@ -1,21 +0,0 @@ -using Test -using NNlib -using Zygote -using NNlibCUDA -using ForwardDiff: Dual -using CUDA -CUDA.allowscalar(false) - -include("test_utils.jl") - -if CUDA.functional() - include("activations.jl") - include("batchedmul.jl") - include("upsample.jl") - include("conv.jl") - include("pooling.jl") - include("softmax.jl") - include("batchnorm.jl") -else - @warn "needs working CUDA installation to perform tests" -end diff --git a/lib/NNlibCUDA/test/softmax.jl b/lib/NNlibCUDA/test/softmax.jl deleted file mode 100644 index 2611e04f6..000000000 --- a/lib/NNlibCUDA/test/softmax.jl +++ /dev/null @@ -1,12 +0,0 @@ -@testset "softmax" begin - for dims in [(5,5), (5,)] - x = randn(Float64, dims) - y = softmax(x) - dy = randn(Float64, dims) - gputest(softmax, x) - gputest(∇softmax, dy, x, y, checkgrad=false) - y = logsoftmax(x) - gputest(logsoftmax, x) - gputest(∇logsoftmax, dy, x, y, checkgrad=false) - end -end diff --git a/lib/NNlibCUDA/test/test_utils.jl b/lib/NNlibCUDA/test/test_utils.jl deleted file mode 100644 index 46d33e0d8..000000000 --- a/lib/NNlibCUDA/test/test_utils.jl +++ /dev/null @@ -1,20 +0,0 @@ -function gputest(f, xs...; checkgrad=true, atol=1e-10) - cpu_in = xs - gpu_in = CuArray.(xs) - - cpu_out = f(cpu_in...) - gpu_out = f(gpu_in...) - @test collect(cpu_out) ≈ collect(gpu_out) - - if checkgrad - cpu_grad = gradient((x...) -> sum(f(x...)), cpu_in...) - gpu_grad = gradient((x...) -> sum(f(x...)), gpu_in...) - for (cpu_g, gpu_g) in zip(cpu_grad, gpu_grad) - if cpu_g === nothing - @test gpu_g === nothing - else - @test collect(cpu_g) ≈ collect(gpu_g) atol=atol - end - end - end -end diff --git a/lib/NNlibCUDA/test/upsample.jl b/lib/NNlibCUDA/test/upsample.jl deleted file mode 100644 index 4858eecbe..000000000 --- a/lib/NNlibCUDA/test/upsample.jl +++ /dev/null @@ -1,27 +0,0 @@ -@testset "Bilinear upsampling" begin - x = Float32[1 2; 3 4][:,:,:,:] - x = cat(x,x; dims=3) - x = cat(x,x; dims=4) - xgpu = cu(x) - - y_true = Float32[ 1//1 4//3 5//3 2//1; - 7//5 26//15 31//15 12//5; - 9//5 32//15 37//15 14//5; - 11//5 38//15 43//15 16//5; - 13//5 44//15 49//15 18//5; - 3//1 10//3 11//3 4//1] - y_true = cat(y_true,y_true; dims=3) - y_true = cat(y_true,y_true; dims=4) - y_true_gpu = cu(y_true) - - y = upsample_bilinear(xgpu, (3,2)) - @test size(y) == size(y_true_gpu) - @test eltype(y) == Float32 - @test collect(y) ≈ collect(y_true_gpu) - - o = CUDA.ones(Float32,6,4,2,1) - grad_true = 6*CUDA.ones(Float32,2,2,2,1) - @test ∇upsample_bilinear(o; size=(2,2)) ≈ grad_true - - gputest(x -> upsample_bilinear(x, (3, 2)), x, atol=1e-5) -end From 758118d95aa9526f1080ed6eb317af24366cd996 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 02:58:28 +0530 Subject: [PATCH 02/14] rm .ci --- .ci/develop.jl | 7 ------- .ci/develop_nnlibcuda.jl | 11 ----------- .ci/test.jl | 5 ----- .ci/test_nnlibcuda.jl | 5 ----- 4 files changed, 28 deletions(-) delete mode 100644 .ci/develop.jl delete mode 100644 .ci/develop_nnlibcuda.jl delete mode 100644 .ci/test.jl delete mode 100644 .ci/test_nnlibcuda.jl diff --git a/.ci/develop.jl b/.ci/develop.jl deleted file mode 100644 index ec423b45e..000000000 --- a/.ci/develop.jl +++ /dev/null @@ -1,7 +0,0 @@ -import Pkg - -root_directory = dirname(@__DIR__) - -nnlib = Pkg.PackageSpec(path = root_directory) -Pkg.develop(nnlib) -Pkg.precompile() diff --git a/.ci/develop_nnlibcuda.jl b/.ci/develop_nnlibcuda.jl deleted file mode 100644 index f309ca01c..000000000 --- a/.ci/develop_nnlibcuda.jl +++ /dev/null @@ -1,11 +0,0 @@ -import Pkg - -root_directory = dirname(@__DIR__) - -nnlib = Pkg.PackageSpec(path = root_directory) -nnlibcuda = Pkg.PackageSpec(path = joinpath(root_directory, "lib", "NNlibCUDA")) - -Pkg.develop(nnlib) -Pkg.develop(nnlibcuda) - -Pkg.precompile() diff --git a/.ci/test.jl b/.ci/test.jl deleted file mode 100644 index 51d8e28e8..000000000 --- a/.ci/test.jl +++ /dev/null @@ -1,5 +0,0 @@ -import Pkg - -pkgs = ["NNlib"] - -Pkg.test(pkgs; coverage = true) diff --git a/.ci/test_nnlibcuda.jl b/.ci/test_nnlibcuda.jl deleted file mode 100644 index e9bb40071..000000000 --- a/.ci/test_nnlibcuda.jl +++ /dev/null @@ -1,5 +0,0 @@ -import Pkg - -pkgs = ["NNlibCUDA"] - -Pkg.test(pkgs; coverage = true) From ac405f3248d884d77ac58f4b3b711459da25147d Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 03:09:37 +0530 Subject: [PATCH 03/14] use CUDA test dep --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index caf41960b..6008e45f1 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,7 @@ Requires = "0.5, 1.0" julia = "1.3" [extras] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -28,4 +29,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["ChainRulesTestUtils", "FiniteDifferences", "Logging", "Random", "StableRNGs", "Test", "Zygote"] +test = ["ChainRulesTestUtils", "CUDA", "FiniteDifferences", "Logging", "Random", "StableRNGs", "Test", "Zygote"] From 7806bf8478800bf4830536c185bd76b4c4b9909a Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 03:10:34 +0530 Subject: [PATCH 04/14] test NNlibCUDA when possible --- test/runtests.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 7b4c17f31..60d9231b7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -56,3 +56,15 @@ end include("utils.jl") end +using CUDA + +if VERSION >= v"1.6" && CUDA.functional() + import Pkg + Pkg.develop(url = "https://github.com/FluxML/NNlibCUDA.jl") + using NNlibCUDA + @testset "CUDA" begin + Pkg.test("NNlibCUDA") + end +else + @info "Insufficient version or CUDA not found: Skipping CUDA tests" +end From 996b98605bf3e775978ff8c53b6dcffd32f907eb Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 03:31:36 +0530 Subject: [PATCH 05/14] restore ci --- .buildkite/pipeline.yml | 16 +++++++++++----- .github/workflows/ci.yml | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 457442d97..624e8429b 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -3,19 +3,25 @@ steps: plugins: - JuliaCI/julia#v1: version: "1.6" + - JuliaCI/julia-test#v1: ~ - JuliaCI/julia-coverage#v1: codecov: true dirs: - src - - lib - commands: - - julia .ci/develop_nnlibcuda.jl - - julia .ci/test_nnlibcuda.jl +# commands: +# - julia --project=test -e """ +# Pkg.develop(url = \"https://github.com/FluxML/NNlibCUDA.jl\") +# Pkg.instantiate() +# Pkg.build() +# Pkg.status() +# Pkg.test() +# Pkg.test(\"NNlibCUDA\") +# """ agents: queue: "juliagpu" cuda: "*" timeout_in_minutes: 60 - + ## Add these when julia 1.7 is out # - label: "GPU julia v1" # plugins: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 245502576..4ea9f56cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,8 +65,8 @@ jobs: ${{ runner.os }}-test-${{ env.cache-name }}- ${{ runner.os }}-test- ${{ runner.os }}- - - run: julia .ci/develop.jl - - run: julia .ci/test.jl + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: From 281727de24553f8ef70ae2724206b75d1b13225c Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 04:13:50 +0530 Subject: [PATCH 06/14] set NNLIB_TEST_CUDA on bk --- .buildkite/pipeline.yml | 12 ++++-------- test/runtests.jl | 14 +++++++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 624e8429b..c6256be49 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -20,6 +20,8 @@ steps: agents: queue: "juliagpu" cuda: "*" + env: + NNLIB_TEST_CUDA: true timeout_in_minutes: 60 ## Add these when julia 1.7 is out @@ -27,14 +29,11 @@ steps: # plugins: # - JuliaCI/julia#v1: # version: "1" + # - JuliaCI/julia-test#v1: ~ # - JuliaCI/julia-coverage#v1: # codecov: true # dirs: # - src - # - lib - # commands: - # - julia .ci/develop.jl - # - julia .ci/test.jl # agents: # queue: "juliagpu" # cuda: "*" @@ -44,14 +43,11 @@ steps: # plugins: # - JuliaCI/julia#v1: # version: "nightly" + # - JuliaCI/julia-test#v1: ~ # - JuliaCI/julia-coverage#v1: # codecov: true # dirs: # - src - # - lib - # commands: - # - julia .ci/develop.jl - # - julia .ci/test.jl # agents: # queue: "juliagpu" # cuda: "*" diff --git a/test/runtests.jl b/test/runtests.jl index 60d9231b7..688da6e11 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -59,11 +59,15 @@ end using CUDA if VERSION >= v"1.6" && CUDA.functional() - import Pkg - Pkg.develop(url = "https://github.com/FluxML/NNlibCUDA.jl") - using NNlibCUDA - @testset "CUDA" begin - Pkg.test("NNlibCUDA") + if ENV["NNLIB_TEST_CUDA"] == "true" + import Pkg + Pkg.develop(url = "https://github.com/FluxML/NNlibCUDA.jl") + using NNlibCUDA + @testset "CUDA" begin + Pkg.test("NNlibCUDA") + end + else + @info "Skipping CUDA tests, set NNLIB_TEST_CUDA=true to run them" end else @info "Insufficient version or CUDA not found: Skipping CUDA tests" From d5ad6aab50423382f76f40b7752f6e7c242a6473 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 04:48:24 +0530 Subject: [PATCH 07/14] typo --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 688da6e11..87f03b902 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -70,5 +70,5 @@ if VERSION >= v"1.6" && CUDA.functional() @info "Skipping CUDA tests, set NNLIB_TEST_CUDA=true to run them" end else - @info "Insufficient version or CUDA not found: Skipping CUDA tests" + @info "Insufficient version or CUDA not found; Skipping CUDA tests" end From 11e0da593c591a42d976bc796a95418823fe7a09 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 04:53:18 +0530 Subject: [PATCH 08/14] handle if env variable not set --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 87f03b902..35ce7ede5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -59,7 +59,7 @@ end using CUDA if VERSION >= v"1.6" && CUDA.functional() - if ENV["NNLIB_TEST_CUDA"] == "true" + if get(ENV, "NNLIB_TEST_CUDA", "false") == "true" import Pkg Pkg.develop(url = "https://github.com/FluxML/NNlibCUDA.jl") using NNlibCUDA From 3a99c4028c7829b9bbafff4d16368afe135336d8 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 14:50:32 +0530 Subject: [PATCH 09/14] use has_cuda --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 35ce7ede5..c2acb429f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -58,7 +58,7 @@ end using CUDA -if VERSION >= v"1.6" && CUDA.functional() +if VERSION >= v"1.6" && CUDA.has_cuda() if get(ENV, "NNLIB_TEST_CUDA", "false") == "true" import Pkg Pkg.develop(url = "https://github.com/FluxML/NNlibCUDA.jl") From 39585c093ffd914d066864bf3e0ed703c064a1a6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 14:52:00 +0530 Subject: [PATCH 10/14] add Adapt compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6008e45f1..fe0cc4fe3 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] -Adapt = "3.2" +Adapt = "2, 3.2" ChainRulesCore = "0.9" Compat = "3.14" Requires = "0.5, 1.0" From 4b7064c0052ac37a1000a2fe60416166e6523526 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 14:53:40 +0530 Subject: [PATCH 11/14] keep using functional --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index c2acb429f..35ce7ede5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -58,7 +58,7 @@ end using CUDA -if VERSION >= v"1.6" && CUDA.has_cuda() +if VERSION >= v"1.6" && CUDA.functional() if get(ENV, "NNLIB_TEST_CUDA", "false") == "true" import Pkg Pkg.develop(url = "https://github.com/FluxML/NNlibCUDA.jl") From 16d30424ae6f2bbd41bdbcf7ce39645a0980e83e Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 15 Apr 2021 22:15:31 +0530 Subject: [PATCH 12/14] test on 1.5 and up --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ea9f56cc..01e75e912 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: version: - - '1.3' # Replace this with the minimum Julia version that your package supports. + - '1.5' # Replace this with the minimum Julia version that your package supports. - '1' # automatically expands to the latest stable 1.x release of Julia - 'nightly' os: From 7fe6618d009af1f79b68a3374811e0f2c9ba66cb Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 16 Apr 2021 14:28:55 +0530 Subject: [PATCH 13/14] lower bound nnlib to 1.5 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index fe0cc4fe3..13193df27 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ Adapt = "2, 3.2" ChainRulesCore = "0.9" Compat = "3.14" Requires = "0.5, 1.0" -julia = "1.3" +julia = "1.5" [extras] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" From 53ada557b44c07e590aeb28d00766134bcedbc8e Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 16 Apr 2021 16:53:23 +0530 Subject: [PATCH 14/14] trigger bk and bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 13193df27..f1974c9d9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "NNlib" uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.7.18" +version = "0.7.19" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"