Skip to content

Commit

Permalink
Move CUDNN and CUTENSOR into separate packages (JuliaGPU#1624)
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt authored and simonbyrne committed Nov 13, 2023
1 parent 6140d69 commit c3e25dc
Show file tree
Hide file tree
Showing 92 changed files with 1,816 additions and 1,679 deletions.
719 changes: 383 additions & 336 deletions .buildkite/pipeline.yml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
.vscode
lcov.info
build/
lib/**/Manifest.toml
213 changes: 1 addition & 212 deletions deps/bindeps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -534,126 +534,6 @@ function find_libcudadevrt(cuda::LocalToolkit)
end


#
# CUDNN
#

export libcudnn, has_cudnn

const __libcudnn = Ref{Union{String,Nothing}}()
function libcudnn(; throw_error::Bool=true)
path = @initialize_ref __libcudnn begin
# CUDNN depends on CUBLAS
libcublas()

find_cudnn(toolkit(), v"8")
end CUDA.CUDNN.__runtime_init__()
if path === nothing && throw_error
error("This functionality is unavailabe as CUDNN is missing.")
end
path
end
has_cudnn() = libcudnn(throw_error=false) !== nothing

function find_cudnn(cuda::ArtifactToolkit, version)
artifact_dir = cuda_artifact("CUDNN", cuda.release)
if artifact_dir === nothing
return nothing
end
path = artifact_library(artifact_dir, "cudnn", [version])

# HACK: eagerly open CUDNN sublibraries to avoid dlopen discoverability issues
for sublibrary in ("ops_infer", "ops_train",
"cnn_infer", "cnn_train",
"adv_infer", "adv_train")
sublibrary_path = artifact_library(artifact_dir, "cudnn_$(sublibrary)", [version])
Libdl.dlopen(sublibrary_path)
end

@debug "Using CUDNN from an artifact at $(artifact_dir)"
Libdl.dlopen(path)
return path
end

function find_cudnn(cuda::LocalToolkit, version)
path = find_library("cudnn", [version]; locations=cuda.dirs)
if path === nothing
return nothing
end

# with a local CUDNN version, we shouldn't need to eagerly open sublibraries,
# as they are expected to be globally discoverable next to libcudnn.so

@debug "Using local CUDNN at $(path)"
Libdl.dlopen(path)
return path
end


#
# CUTENSOR
#

export libcutensor, libcutensormg, has_cutensor, has_cutensormg

const __libcutensor = Ref{Union{String,Nothing}}()
function libcutensor(; throw_error::Bool=true)
path = @initialize_ref __libcutensor begin
# CUTENSOR depends on CUBLAS
libcublas()

find_cutensor(toolkit(), "cutensor", v"1")
end
if path === nothing && throw_error
error("This functionality is unavailabe as CUTENSOR is missing.")
end
path
end
has_cutensor() = libcutensor(throw_error=false) !== nothing

const __libcutensormg = Ref{Union{String,Nothing}}()
function libcutensormg(; throw_error::Bool=true)
path = @initialize_ref __libcutensor begin
# CUTENSORMg additionally depends on CUDARt
libcudart()

if CUTENSOR.version() < v"1.4"
nothing
else
find_cutensor(toolkit(), "cutensorMg", v"1")
end
end
if path === nothing && throw_error
error("This functionality is unavailabe as CUTENSORMg is missing.")
end
path
end
has_cutensormg() = libcutensormg(throw_error=false) !== nothing

function find_cutensor(cuda::ArtifactToolkit, name, version)
artifact_dir = cuda_artifact("CUTENSOR", cuda.release)
if artifact_dir === nothing
return nothing
end
path = artifact_library(artifact_dir, name, [version])

@debug "Using CUTENSOR library $name from an artifact at $(artifact_dir)"
Libdl.dlopen(path)
return path
end

function find_cutensor(cuda::LocalToolkit, name, version)
path = find_library(name, [version]; locations=cuda.dirs)
if path === nothing
return nothing
end

@debug "Using local CUTENSOR library $name at $(path)"
Libdl.dlopen(path)
return path
end


#
# NCCL
#
Expand All @@ -666,7 +546,7 @@ function libnccl(; throw_error::Bool=true)
find_nccl(toolkit(), "nccl", v"1")
end
if path === nothing && throw_error
error("This functionality is unavailabe as CUTENSOR is missing.")
error("This functionality is unavailabe as NCCL is missing.")
end
path
end
Expand Down Expand Up @@ -696,97 +576,6 @@ function find_nccl(cuda::LocalToolkit, name, version)
end


#
# CUQUANTUM
#

export libcutensornet, has_cutensornet, libcustatevec, has_custatevec

const __libcutensornet = Ref{Union{String,Nothing}}()
function libcutensornet(; throw_error::Bool=true)
path = @initialize_ref __libcutensornet begin
# CUTENSORNET depends on CUTENSOR
libcutensor(throw_error=throw_error)

if CUDA.runtime_version() < v"11"
# XXX: bound this using tags in the Artifact.toml?
nothing
else
find_cutensornet(toolkit(), "cutensornet", v"0.1.0")
end
end
if path === nothing && throw_error
error("This functionality is unavailabe as CUTENSORNET is missing.")
end
return path
end
has_cutensornet() = has_cutensor() && libcutensornet(throw_error=false) !== nothing

const __libcustatevec = Ref{Union{String,Nothing}}()
function libcustatevec(; throw_error::Bool=true)
path = @initialize_ref __libcustatevec begin

if CUDA.runtime_version() < v"11"
# XXX: bound this using tags in the Artifact.toml?
nothing
else
find_custatevec(toolkit(), "custatevec", v"0.1.0")
end
end
if path === nothing && throw_error
error("This functionality is unavailabe as CUSTATEVEC is missing.")
end
return path
end
has_custatevec() = libcustatevec(throw_error=false) !== nothing

function find_cutensornet(cuda::ArtifactToolkit, name, version)
artifact_dir = generic_artifact("cuQuantum")
if artifact_dir === nothing
return nothing
end
path = artifact_library(artifact_dir, name, [version])

@debug "Using CUTENSORNET library $name from an artifact at $(artifact_dir)"
Libdl.dlopen(path)
return path
end

function find_cutensornet(cuda::LocalToolkit, name, version)
path = find_library(name, [version]; locations=cuda.dirs)
if path === nothing
return nothing
end

@debug "Using local CUTENSORNET library $name at $(path)"
Libdl.dlopen(path)
return path
end

function find_custatevec(cuda::ArtifactToolkit, name, version)
artifact_dir = cuda_artifact("cuQuantum", v"0.1.3")
if artifact_dir === nothing
return nothing
end
path = artifact_library(artifact_dir, name, [version])

@debug "Using CUSTATEVEC library $name from an artifact at $(artifact_dir)"
Libdl.dlopen(path)
return path
end

function find_custatevec(cuda::LocalToolkit, name, version)
path = find_library(name, [version]; locations=cuda.dirs)
if path === nothing
return nothing
end

@debug "Using local CUSTATEVEC library $name at $(path)"
Libdl.dlopen(path)
return path
end


#
# Utilities
#
Expand Down
9 changes: 9 additions & 0 deletions lib/cudnn/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name = "CUDNN"
uuid = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
authors = ["Tim Besard <tim.besard@gmail.com>"]
version = "0.1.0"

[deps]
CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
16 changes: 8 additions & 8 deletions lib/cudnn/CUDNN.jl → lib/cudnn/src/CUDNN.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
"""
CUDA.CUDNN
CUDNN
High level interface to cuDNN functions. See
https://github.com/JuliaGPU/CUDA.jl/blob/master/lib/state/README.md
for a design overview.
[README.md](https://github.com/JuliaGPU/CUDA.jl/blob/master/lib/cudnn/README.md) for a
design overview.
"""
module CUDNN

using ..APIUtils

using ..CUDA
using ..CUDA: CUstream, libraryPropertyType
using ..CUDA: libcudnn, @retry_reclaim, isdebug, initialize_context
using CUDA
using CUDA.APIUtils
using CUDA: CUstream, libraryPropertyType
using CUDA: @retry_reclaim, isdebug, initialize_context

using CEnum: @cenum

include("bindeps.jl")

# core library
include("libcudnn_common.jl")
Expand Down
File renamed without changes.
File renamed without changes.
55 changes: 55 additions & 0 deletions lib/cudnn/src/bindeps.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
using CUDA.Deps: @initialize_ref, libcublas, cuda_artifact, artifact_library, find_library,
LocalToolkit, ArtifactToolkit, toolkit

import Libdl

export libcudnn, has_cudnn

const __libcudnn = Ref{Union{String,Nothing}}()
function libcudnn(; throw_error::Bool=true)
path = @initialize_ref __libcudnn begin
# CUDNN depends on CUBLAS
libcublas()

find_cudnn(toolkit(), v"8")
end __runtime_init__()
if path === nothing && throw_error
error("This functionality is unavailabe as CUDNN is missing.")
end
path
end
has_cudnn() = libcudnn(throw_error=false) !== nothing

function find_cudnn(cuda::ArtifactToolkit, version)
artifact_dir = cuda_artifact("CUDNN", cuda.release)
if artifact_dir === nothing
return nothing
end
path = artifact_library(artifact_dir, "cudnn", [version])

# HACK: eagerly open CUDNN sublibraries to avoid dlopen discoverability issues
for sublibrary in ("ops_infer", "ops_train",
"cnn_infer", "cnn_train",
"adv_infer", "adv_train")
sublibrary_path = artifact_library(artifact_dir, "cudnn_$(sublibrary)", [version])
Libdl.dlopen(sublibrary_path)
end

@debug "Using CUDNN from an artifact at $(artifact_dir)"
Libdl.dlopen(path)
return path
end

function find_cudnn(cuda::LocalToolkit, version)
path = find_library("cudnn", [version]; locations=cuda.dirs)
if path === nothing
return nothing
end

# with a local CUDNN version, we shouldn't need to eagerly open sublibraries,
# as they are expected to be globally discoverable next to libcudnn.so

@debug "Using local CUDNN at $(path)"
Libdl.dlopen(path)
return path
end
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
7 changes: 7 additions & 0 deletions lib/cudnn/test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Loading

0 comments on commit c3e25dc

Please sign in to comment.