From fbef0d94046bcc87a665ffda21c67a7ea94ef6db Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Wed, 14 Jun 2023 22:34:31 +0200 Subject: [PATCH] make NNlibCUDA an extension (#492) * make NNlibCUDA an extension * cuDNN extension * update workflows * uncomment when 1.10 is out * cleanup * cleanup * test Project.toml * cleanup * rethink extensions * batchnorm * cleanup --- .buildkite/pipeline.yml | 76 +++++++------------ .github/workflows/CompatHelper.yml | 12 ++- .github/workflows/TagBot.yml | 22 +++++- .github/workflows/ci.yml | 14 ++-- .gitignore | 2 +- Project.toml | 17 +++-- README.md | 11 ++- docs/src/index.md | 11 ++- ext/NNlibCUDA/.buildkite/pipeline.yml | 20 ----- .../.github/workflows/compathelper.yml | 26 ------- ext/NNlibCUDA/.github/workflows/tagbot.yml | 15 ---- ext/NNlibCUDA/.gitignore | 1 - ext/NNlibCUDA/LICENSE.md | 23 ------ ext/NNlibCUDA/Project.toml | 29 ------- ext/NNlibCUDA/README.md | 5 -- ext/NNlibCUDA/src/NNlibCUDA.jl | 24 ------ ext/NNlibCUDA/test/batchnorm.jl | 27 ------- .../NNlibCUDACUDNNExt.jl} | 15 ++++ .../activations.jl | 0 .../cudnn => NNlibCUDACUDNNExt}/batchnorm.jl | 2 +- .../src/cudnn => NNlibCUDACUDNNExt}/conv.jl | 0 .../cudnn => NNlibCUDACUDNNExt}/pooling.jl | 0 .../cudnn => NNlibCUDACUDNNExt}/softmax.jl | 0 ext/NNlibCUDAExt/NNlibCUDAExt.jl | 16 ++++ .../src => NNlibCUDAExt}/activations.jl | 0 .../src => NNlibCUDAExt}/batchedadjtrans.jl | 0 .../src => NNlibCUDAExt}/batchedmul.jl | 0 ext/{NNlibCUDA/src => NNlibCUDAExt}/ctc.jl | 2 +- ext/{NNlibCUDA/src => NNlibCUDAExt}/fold.jl | 0 .../src => NNlibCUDAExt}/sampling.jl | 0 .../src => NNlibCUDAExt}/scatter.jl | 0 ext/{NNlibCUDA/src => NNlibCUDAExt}/utils.jl | 0 src/NNlib.jl | 4 +- src/activations.jl | 2 +- src/ctc.jl | 2 +- src/deprecations.jl | 20 ----- src/dropout.jl | 2 +- src/normalization.jl | 4 + src/upsample.jl | 9 --- test/Project.toml | 12 +-- .../test => test/ext_cuda}/activations.jl | 4 +- .../test => test/ext_cuda}/batchedadjtrans.jl | 0 .../test => test/ext_cuda}/batchedmul.jl | 0 test/ext_cuda/batchnorm.jl | 27 +++++++ {ext/NNlibCUDA/test => test/ext_cuda}/conv.jl | 0 {ext/NNlibCUDA/test => test/ext_cuda}/ctc.jl | 6 -- .../test => test/ext_cuda}/dropout.jl | 3 - {ext/NNlibCUDA/test => test/ext_cuda}/fold.jl | 0 .../test => test/ext_cuda}/gather.jl | 0 .../test => test/ext_cuda}/pooling.jl | 0 .../test => test/ext_cuda}/runtests.jl | 6 +- .../test => test/ext_cuda}/sampling.jl | 0 .../test => test/ext_cuda}/scatter.jl | 0 .../test => test/ext_cuda}/softmax.jl | 0 .../test => test/ext_cuda}/test_utils.jl | 0 test/runtests.jl | 9 ++- 56 files changed, 180 insertions(+), 300 deletions(-) delete mode 100644 ext/NNlibCUDA/.buildkite/pipeline.yml delete mode 100644 ext/NNlibCUDA/.github/workflows/compathelper.yml delete mode 100644 ext/NNlibCUDA/.github/workflows/tagbot.yml delete mode 100644 ext/NNlibCUDA/.gitignore delete mode 100644 ext/NNlibCUDA/LICENSE.md delete mode 100644 ext/NNlibCUDA/Project.toml delete mode 100644 ext/NNlibCUDA/README.md delete mode 100644 ext/NNlibCUDA/src/NNlibCUDA.jl delete mode 100644 ext/NNlibCUDA/test/batchnorm.jl rename ext/{NNlibCUDA/src/cudnn/cudnn.jl => NNlibCUDACUDNNExt/NNlibCUDACUDNNExt.jl} (69%) rename ext/{NNlibCUDA/src/cudnn => NNlibCUDACUDNNExt}/activations.jl (100%) rename ext/{NNlibCUDA/src/cudnn => NNlibCUDACUDNNExt}/batchnorm.jl (99%) rename ext/{NNlibCUDA/src/cudnn => NNlibCUDACUDNNExt}/conv.jl (100%) rename ext/{NNlibCUDA/src/cudnn => NNlibCUDACUDNNExt}/pooling.jl (100%) rename ext/{NNlibCUDA/src/cudnn => NNlibCUDACUDNNExt}/softmax.jl (100%) create mode 100644 ext/NNlibCUDAExt/NNlibCUDAExt.jl rename ext/{NNlibCUDA/src => NNlibCUDAExt}/activations.jl (100%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/batchedadjtrans.jl (100%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/batchedmul.jl (100%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/ctc.jl (99%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/fold.jl (100%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/sampling.jl (100%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/scatter.jl (100%) rename ext/{NNlibCUDA/src => NNlibCUDAExt}/utils.jl (100%) create mode 100644 src/normalization.jl rename {ext/NNlibCUDA/test => test/ext_cuda}/activations.jl (84%) rename {ext/NNlibCUDA/test => test/ext_cuda}/batchedadjtrans.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/batchedmul.jl (100%) create mode 100644 test/ext_cuda/batchnorm.jl rename {ext/NNlibCUDA/test => test/ext_cuda}/conv.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/ctc.jl (93%) rename {ext/NNlibCUDA/test => test/ext_cuda}/dropout.jl (95%) rename {ext/NNlibCUDA/test => test/ext_cuda}/fold.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/gather.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/pooling.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/runtests.jl (88%) rename {ext/NNlibCUDA/test => test/ext_cuda}/sampling.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/scatter.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/softmax.jl (100%) rename {ext/NNlibCUDA/test => test/ext_cuda}/test_utils.jl (100%) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 398580897..2f7702fb7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,44 +1,53 @@ steps: - - label: "GPU julia v1.6" + - label: ":julia: Julia {{matrix.julia}} + CUDA GPU" plugins: - JuliaCI/julia#v1: - version: "1.6" - - JuliaCI/julia-test#v1: ~ + version: "{{matrix.julia}}" + - JuliaCI/julia-test#v1: + test_args: "--quickfail" - JuliaCI/julia-coverage#v1: codecov: true dirs: - src -# commands: -# - julia --project=test -e """ -# Pkg.develop(url = \"https://github.com/FluxML/NNlibCUDA.jl\") -# Pkg.instantiate() -# Pkg.build() -# Pkg.status() -# Pkg.test() -# Pkg.test(\"NNlibCUDA\") -# """ + - ext agents: queue: "juliagpu" cuda: "*" env: NNLIB_TEST_CUDA: true + if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 60 + matrix: + setup: + julia: + # - "1.9" # uncomment when 1.10 is out + - "1" + - "nightly" + adjustments: + - with: + julia: "nightly" + soft_fail: true - - label: "GPU julia v1" + - label: ":julia: Julia 1 + AMD GPU" plugins: - JuliaCI/julia#v1: version: "1" - - JuliaCI/julia-test#v1: ~ + - JuliaCI/julia-test#v1: - JuliaCI/julia-coverage#v1: codecov: true dirs: - src + - ext agents: queue: "juliagpu" - cuda: "*" - env: - NNLIB_TEST_CUDA: true + rocm: "*" + rocmgpu: "*" timeout_in_minutes: 60 + env: + JULIA_AMDGPU_CORE_MUST_LOAD: "1" + JULIA_AMDGPU_HIP_MUST_LOAD: "1" + NNLIB_TEST_AMDGPU: true + - label: "Benchmarks" plugins: @@ -55,38 +64,5 @@ steps: if: build.pull_request.labels includes "benchmark" timeout_in_minutes: 30 - - label: "AMDGPU - Julia 1.9" - plugins: - - JuliaCI/julia#v1: - version: 1.9-nightly - - JuliaCI/julia-test#v1: - - JuliaCI/julia-coverage#v1: - codecov: true - dirs: - - src - - ext - agents: - queue: "juliagpu" - rocm: "*" - rocmgpu: "*" - timeout_in_minutes: 60 - env: - JULIA_AMDGPU_CORE_MUST_LOAD: "1" - JULIA_AMDGPU_HIP_MUST_LOAD: "1" - NNLIB_TEST_AMDGPU: true - - # - label: "GPU julia nightly" - # plugins: - # - JuliaCI/julia#v1: - # version: "nightly" - # - JuliaCI/julia-test#v1: ~ - # - JuliaCI/julia-coverage#v1: - # codecov: true - # dirs: - # - src - # agents: - # queue: "juliagpu" - # cuda: "*" - # timeout_in_minutes: 60 env: SECRET_CODECOV_TOKEN: "IlEMvDI6RciJQr5eX7qBBpHYFAe8+Svf3lNJh9gZi0MeJZQvMZWzHfW/lVncA9d9K+gDBBTv/zwqF86xOaIFLuACNdcGZiGgHS+NGeXN5CEppjqLnqKuaeHmLgJ43jygxRwgF88LhwTGcHG7pmESIp1Bn3Jd23UUv4t8hJLBDF+KJLZMefzCXnEVzfwJYxhJktnKJPA4dOv59w33Vj1x5uCYZbQlLP54IJPBm8UGdXS+JrUX8Z7lhxbkJUi6c+R6cvVBw27uRjF0pUJY26mt1frx8MzTGTOweXTpi+Kc5JhzlokMlan17j6T/b7qMC13IuKopfqu1GhkSBQD3ZhQqA==;U2FsdGVkX19l7JMB48k4oJHLoaqC7/MmvQWmaiBxRN472ZC6AcQ0uCBRy6Fw8tI0YcjIxKDScaBnJ2v/deOfhg==" diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index a784fc3e1..3dfba52e8 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -10,6 +10,16 @@ jobs: CompatHelper: runs-on: ubuntu-latest steps: + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v1 + with: + version: '1' + arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' - name: "Add the General registry via Git" run: | import Pkg @@ -32,4 +42,4 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} - # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} + # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} \ No newline at end of file diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index cbaeb01f4..3042569cb 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,13 +1,33 @@ name: TagBot on: - issue_comment: # https://discourse.julialang.org/t/ann-required-updates-to-tagbot-yml/49249 + issue_comment: types: - created + workflow_dispatch: + inputs: + lookback: + default: 3 +permissions: + actions: read + checks: read + contents: write + deployments: read + issues: read + discussions: read + packages: read + pages: read + pull-requests: read + repository-projects: read + security-events: read + statuses: read jobs: TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} + # Edit the following line to reflect the actual name of the GitHub Secret containing your private key ssh: ${{ secrets.DOCUMENTER_KEY }} + # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }} \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73144ff1a..dac08e745 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ defaults: jobs: test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.julia-threads }} thread(s) - ${{ github.event_name }} + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.julia-threads }} thread(s) runs-on: ${{ matrix.os }} env: JULIA_NUM_THREADS: ${{ matrix.julia-threads }} @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: version: - - '1.6' + # - '1.9' # uncomment when julia 1.10 is out - '1' # automatically expands to the latest stable 1.x release of Julia - 'nightly' os: @@ -69,17 +69,17 @@ jobs: - name: "Run test without coverage" uses: julia-actions/julia-runtest@v1 - if: ${{ !contains(fromJson('["1", "1.6"]'), matrix.version) || matrix.os != 'ubuntu-latest' }} + if: ${{ !contains(fromJson('["1", "1.9"]'), matrix.version) || matrix.os != 'ubuntu-latest' }} with: coverage: false - name: "Run test with coverage" uses: julia-actions/julia-runtest@v1 - if: contains(fromJson('["1", "1.6"]'), matrix.version) && matrix.os == 'ubuntu-latest' + if: contains(fromJson('["1", "1.9"]'), matrix.version) && matrix.os == 'ubuntu-latest' - uses: julia-actions/julia-processcoverage@v1 - if: contains(fromJson('["1", "1.6"]'), matrix.version) && matrix.os == 'ubuntu-latest' + if: contains(fromJson('["1", "1.9"]'), matrix.version) && matrix.os == 'ubuntu-latest' - uses: codecov/codecov-action@v3 - if: contains(fromJson('["1", "1.6"]'), matrix.version) && matrix.os == 'ubuntu-latest' + if: contains(fromJson('["1", "1.9"]'), matrix.version) && matrix.os == 'ubuntu-latest' with: file: lcov.info @@ -90,7 +90,7 @@ jobs: - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: - version: '1.6' + version: '1.9' - run: | julia --project=docs -e ' using Pkg diff --git a/.gitignore b/.gitignore index ac7124fbc..9bcfd5280 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,7 @@ deps.jl *.log .vscode/ /Manifest.toml -lib/NNlibCUDA/Manifest.toml +test/Manifest.toml benchmark/Manifest.toml benchmark/*.json benchmark/report.md diff --git a/Project.toml b/Project.toml index e99614bec..30e546357 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "NNlib" uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.8.20" +version = "0.9.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -16,19 +16,22 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [weakdeps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [extensions] NNlibAMDGPUExt = "AMDGPU" +NNlibCUDAExt = "CUDA" +NNlibCUDACUDNNExt = ["CUDA", "cuDNN"] [compat] AMDGPU = "0.4.8" -Adapt = "2, 3.2" +Adapt = "3.2" Atomix = "0.1" ChainRulesCore = "1.13" +CUDA = "4" +cuDNN = "1" GPUArraysCore = "0.1" KernelAbstractions = "0.9.2" -Requires = "0.5, 1.0" -julia = "1.6" - -[extras] -AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +Requires = "1.0" +julia = "1.9" \ No newline at end of file diff --git a/README.md b/README.md index e6de1e2a6..3892a2cc1 100644 --- a/README.md +++ b/README.md @@ -16,4 +16,13 @@ This package provides a library of functions useful for neural networks, such as For use with automatic differentiation, this package defines gradients using [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl). These will be seen by various packages including [Zygote.jl](https://github.com/FluxML/Zygote.jl). -To use these functions with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) you will need [NNlibCUDA.jl](https://github.com/FluxML/NNlibCUDA.jl) as well. +GPU support is provided as package extensions (see the `ext/` folder). In order to load the extensions, use the imports +```julia +using NNlib, CUDA, cuDNN +``` +for CUDA support, or +```julia +using NNlib, AMDGPU +``` +for AMDGPU support. + diff --git a/docs/src/index.md b/docs/src/index.md index 91adcee0c..46958da1b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,5 +4,12 @@ For use with automatic differentiation, this package defines gradients using [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl). These will be seen by various packages including [Zygote.jl](https://github.com/FluxML/Zygote.jl). -To use these functions with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) you will need [NNlibCUDA.jl](https://github.com/FluxML/NNlibCUDA.jl) as well. -For [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) you will need to load it and NNlib in the same Julia session. +GPU support is provided as package extensions. In order to load the extensions, use the imports +```julia +using NNlib, CUDA, cuDNN +``` +for CUDA support, or +```julia +using NNlib, AMDGPU +``` +for AMDGPU support. \ No newline at end of file diff --git a/ext/NNlibCUDA/.buildkite/pipeline.yml b/ext/NNlibCUDA/.buildkite/pipeline.yml deleted file mode 100644 index 915f10e07..000000000 --- a/ext/NNlibCUDA/.buildkite/pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -steps: - - label: "GPU integration with julia v1.6" - plugins: - - JuliaCI/julia#v1: - version: "1.6" - - JuliaCI/julia-test#v1: ~ - agents: - queue: "juliagpu" - cuda: "*" - timeout_in_minutes: 60 - - - label: "GPU integration with julia v1" - plugins: - - JuliaCI/julia#v1: - version: "1.8" - - JuliaCI/julia-test#v1: ~ - agents: - queue: "juliagpu" - cuda: "*" - timeout_in_minutes: 60 diff --git a/ext/NNlibCUDA/.github/workflows/compathelper.yml b/ext/NNlibCUDA/.github/workflows/compathelper.yml deleted file mode 100644 index 0243c7062..000000000 --- a/ext/NNlibCUDA/.github/workflows/compathelper.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: CompatHelper - -on: - schedule: - - cron: '00 * * * *' - issues: - types: [opened, reopened] - -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - julia-version: [1.2.0] - julia-arch: [x86] - os: [ubuntu-latest] - steps: - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.julia-version }} - - name: Pkg.add("CompatHelper") - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/ext/NNlibCUDA/.github/workflows/tagbot.yml b/ext/NNlibCUDA/.github/workflows/tagbot.yml deleted file mode 100644 index f49313b66..000000000 --- a/ext/NNlibCUDA/.github/workflows/tagbot.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: TagBot -on: - issue_comment: - types: - - created - workflow_dispatch: -jobs: - TagBot: - if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' - runs-on: ubuntu-latest - steps: - - uses: JuliaRegistries/TagBot@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/ext/NNlibCUDA/.gitignore b/ext/NNlibCUDA/.gitignore deleted file mode 100644 index ba39cc531..000000000 --- a/ext/NNlibCUDA/.gitignore +++ /dev/null @@ -1 +0,0 @@ -Manifest.toml diff --git a/ext/NNlibCUDA/LICENSE.md b/ext/NNlibCUDA/LICENSE.md deleted file mode 100644 index 5284ae49b..000000000 --- a/ext/NNlibCUDA/LICENSE.md +++ /dev/null @@ -1,23 +0,0 @@ -The NNlibCUDA.jl package is licensed under the MIT "Expat" License: - -> Copyright (c) 2021-22: Julia Computing, INc., Dhairya Gandhi and Contributors -> -> Permission is hereby granted, free of charge, to any person obtaining -> a copy of this software and associated documentation files (the -> "Software"), to deal in the Software without restriction, including -> without limitation the rights to use, copy, modify, merge, publish, -> distribute, sublicense, and/or sell copies of the Software, and to -> permit persons to whom the Software is furnished to do so, subject to -> the following conditions: -> -> The above copyright notice and this permission notice shall be -> included in all copies or substantial portions of the Software. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - diff --git a/ext/NNlibCUDA/Project.toml b/ext/NNlibCUDA/Project.toml deleted file mode 100644 index dac85ae58..000000000 --- a/ext/NNlibCUDA/Project.toml +++ /dev/null @@ -1,29 +0,0 @@ -name = "NNlibCUDA" -uuid = "a00861dc-f156-4864-bf3c-e6376f28a68d" -version = "0.2.7" - -[deps] -Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" - -[compat] -Adapt = "3.3" -cuDNN = "1" -CUDA = "4" -NNlib = "0.8.15" -julia = "1.6" - -[extras] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" - -[targets] -test = ["CUDA", "ChainRulesCore", "ForwardDiff", "Test", "Zygote"] diff --git a/ext/NNlibCUDA/README.md b/ext/NNlibCUDA/README.md deleted file mode 100644 index 927bfe552..000000000 --- a/ext/NNlibCUDA/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# NNlibCUDA.jl - -This is a glue package which extends functions from [NNlib.jl](https://github.com/FluxML/NNlib.jl) to work with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl). It should be loaded automatically when using [Flux.jl](https://github.com/FluxML/Flux.jl), but not when using NNlib.jl by itself. - -Julia gpu kernels are in `src/`, while wrappers around `cudnn` are in `src/cudnn/`. diff --git a/ext/NNlibCUDA/src/NNlibCUDA.jl b/ext/NNlibCUDA/src/NNlibCUDA.jl deleted file mode 100644 index e1c3c225c..000000000 --- a/ext/NNlibCUDA/src/NNlibCUDA.jl +++ /dev/null @@ -1,24 +0,0 @@ -module NNlibCUDA - -using NNlib -using CUDA, cuDNN -using Random, Statistics - -const IntOrIntTuple = Union{Integer, NTuple{N,<:Integer} where N} - -include("sampling.jl") -include("activations.jl") -include("batchedadjtrans.jl") -include("batchedmul.jl") -include("ctc.jl") -include("fold.jl") -include("scatter.jl") -include("utils.jl") -include("cudnn/cudnn.jl") -include("cudnn/conv.jl") -include("cudnn/pooling.jl") -include("cudnn/softmax.jl") -include("cudnn/activations.jl") -include("cudnn/batchnorm.jl") - -end # module diff --git a/ext/NNlibCUDA/test/batchnorm.jl b/ext/NNlibCUDA/test/batchnorm.jl deleted file mode 100644 index 96c68dda4..000000000 --- a/ext/NNlibCUDA/test/batchnorm.jl +++ /dev/null @@ -1,27 +0,0 @@ -@testset "Batchnorm" begin - v = CUDA.rand(Float32, 2) - m = CUDA.rand(Float32, 2, 5) - - @testset for training in (true, false), track_stats in (true, false) - kws = (training=training, track_stats=track_stats) - - # Normal - NNlibCUDA.batchnorm(v, v, m, v, v, 1.0; kws...) - NNlibCUDA.∇batchnorm(v, v, m, m, v, v, 1.0; kws...) - - # No affine - NNlibCUDA.batchnorm(nothing, nothing, m, v, v, 1.0; kws...) - NNlibCUDA.∇batchnorm(nothing, nothing, m, m, v, v, 1.0; kws...) - - # No tracking - NNlibCUDA.batchnorm(v, v, m, nothing, nothing, 1.0; kws...) - NNlibCUDA.∇batchnorm(v, v, m, m, nothing, nothing, 1.0; kws...) - - # Both or neither tracked or affine params must be set - for (α, β) in ((v, nothing), (nothing, v)) - @test_throws MethodError NNlibCUDA.batchnorm(α, β, m, v, v, 1.0; kws...) - @test_throws MethodError NNlibCUDA.∇batchnorm(α, β, m, m, v, v, 1.0; kws...) - @test_throws ArgumentError NNlibCUDA.batchnorm(v, v, m, α, β, 1.0; kws...) - end - end -end diff --git a/ext/NNlibCUDA/src/cudnn/cudnn.jl b/ext/NNlibCUDACUDNNExt/NNlibCUDACUDNNExt.jl similarity index 69% rename from ext/NNlibCUDA/src/cudnn/cudnn.jl rename to ext/NNlibCUDACUDNNExt/NNlibCUDACUDNNExt.jl index e3fc55068..238c3cb25 100644 --- a/ext/NNlibCUDA/src/cudnn/cudnn.jl +++ b/ext/NNlibCUDACUDNNExt/NNlibCUDACUDNNExt.jl @@ -1,3 +1,10 @@ +module NNlibCUDACUDNNExt + +using NNlib +using cuDNN +using CUDA +using Random, Statistics + using cuDNN: handle, with_workspace, cudnnTensorDescriptor, cudnnFilterDescriptor, cudnnDataType, math_mode, CUDNN_DEFAULT_REORDER, CUDNN_CROSS_CORRELATION, CUDNN_NOT_PROPAGATE_NAN, CUDNN_TENSOR_NCHW, dim4 @@ -11,3 +18,11 @@ function nnlibPadding(dims) end return pd[1:2:end] end + +include("conv.jl") +include("pooling.jl") +include("softmax.jl") +include("activations.jl") +include("batchnorm.jl") + +end # module \ No newline at end of file diff --git a/ext/NNlibCUDA/src/cudnn/activations.jl b/ext/NNlibCUDACUDNNExt/activations.jl similarity index 100% rename from ext/NNlibCUDA/src/cudnn/activations.jl rename to ext/NNlibCUDACUDNNExt/activations.jl diff --git a/ext/NNlibCUDA/src/cudnn/batchnorm.jl b/ext/NNlibCUDACUDNNExt/batchnorm.jl similarity index 99% rename from ext/NNlibCUDA/src/cudnn/batchnorm.jl rename to ext/NNlibCUDACUDNNExt/batchnorm.jl index 1e20fbc87..2c38f009e 100644 --- a/ext/NNlibCUDA/src/cudnn/batchnorm.jl +++ b/ext/NNlibCUDACUDNNExt/batchnorm.jl @@ -1,7 +1,7 @@ using cuDNN: CUDNN_BN_MIN_EPSILON, cudnnBatchNormalizationBackward, cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL, cudnnBatchNormalizationForwardTraining - +import NNlib: batchnorm, ∇batchnorm # TODO: replace with new cudnn normalization interface # https://github.com/JuliaGPU/CUDA.jl/blob/master/lib/cudnn/normalization.jl diff --git a/ext/NNlibCUDA/src/cudnn/conv.jl b/ext/NNlibCUDACUDNNExt/conv.jl similarity index 100% rename from ext/NNlibCUDA/src/cudnn/conv.jl rename to ext/NNlibCUDACUDNNExt/conv.jl diff --git a/ext/NNlibCUDA/src/cudnn/pooling.jl b/ext/NNlibCUDACUDNNExt/pooling.jl similarity index 100% rename from ext/NNlibCUDA/src/cudnn/pooling.jl rename to ext/NNlibCUDACUDNNExt/pooling.jl diff --git a/ext/NNlibCUDA/src/cudnn/softmax.jl b/ext/NNlibCUDACUDNNExt/softmax.jl similarity index 100% rename from ext/NNlibCUDA/src/cudnn/softmax.jl rename to ext/NNlibCUDACUDNNExt/softmax.jl diff --git a/ext/NNlibCUDAExt/NNlibCUDAExt.jl b/ext/NNlibCUDAExt/NNlibCUDAExt.jl new file mode 100644 index 000000000..876481886 --- /dev/null +++ b/ext/NNlibCUDAExt/NNlibCUDAExt.jl @@ -0,0 +1,16 @@ +module NNlibCUDAExt + +using NNlib +using CUDA +using Random, Statistics + +include("sampling.jl") +include("activations.jl") +include("batchedadjtrans.jl") +include("batchedmul.jl") +include("ctc.jl") +include("fold.jl") +include("scatter.jl") +include("utils.jl") + +end # module diff --git a/ext/NNlibCUDA/src/activations.jl b/ext/NNlibCUDAExt/activations.jl similarity index 100% rename from ext/NNlibCUDA/src/activations.jl rename to ext/NNlibCUDAExt/activations.jl diff --git a/ext/NNlibCUDA/src/batchedadjtrans.jl b/ext/NNlibCUDAExt/batchedadjtrans.jl similarity index 100% rename from ext/NNlibCUDA/src/batchedadjtrans.jl rename to ext/NNlibCUDAExt/batchedadjtrans.jl diff --git a/ext/NNlibCUDA/src/batchedmul.jl b/ext/NNlibCUDAExt/batchedmul.jl similarity index 100% rename from ext/NNlibCUDA/src/batchedmul.jl rename to ext/NNlibCUDAExt/batchedmul.jl diff --git a/ext/NNlibCUDA/src/ctc.jl b/ext/NNlibCUDAExt/ctc.jl similarity index 99% rename from ext/NNlibCUDA/src/ctc.jl rename to ext/NNlibCUDAExt/ctc.jl index b59d201cb..84a319ba8 100644 --- a/ext/NNlibCUDA/src/ctc.jl +++ b/ext/NNlibCUDAExt/ctc.jl @@ -1,4 +1,4 @@ -# CTC loss moved from Flux.jl to NNlib + NNlibCUDA +# CTC loss moved from Flux.jl to NNlib import NNlib: ctc_loss, ctc_alpha, ∇ctc_loss diff --git a/ext/NNlibCUDA/src/fold.jl b/ext/NNlibCUDAExt/fold.jl similarity index 100% rename from ext/NNlibCUDA/src/fold.jl rename to ext/NNlibCUDAExt/fold.jl diff --git a/ext/NNlibCUDA/src/sampling.jl b/ext/NNlibCUDAExt/sampling.jl similarity index 100% rename from ext/NNlibCUDA/src/sampling.jl rename to ext/NNlibCUDAExt/sampling.jl diff --git a/ext/NNlibCUDA/src/scatter.jl b/ext/NNlibCUDAExt/scatter.jl similarity index 100% rename from ext/NNlibCUDA/src/scatter.jl rename to ext/NNlibCUDAExt/scatter.jl diff --git a/ext/NNlibCUDA/src/utils.jl b/ext/NNlibCUDAExt/utils.jl similarity index 100% rename from ext/NNlibCUDA/src/utils.jl rename to ext/NNlibCUDAExt/utils.jl diff --git a/src/NNlib.jl b/src/NNlib.jl index 183d83cbc..8b0d3d5d5 100644 --- a/src/NNlib.jl +++ b/src/NNlib.jl @@ -20,7 +20,6 @@ using Statistics: mean const libblas = Base.libblas_name -const IntOrIntTuple = Union{Integer, NTuple{N,<:Integer} where N} const Numeric = Union{AbstractArray{<:T}, T} where {T<:Number} # Include APIs @@ -104,6 +103,9 @@ end include("sampling.jl") include("functions.jl") +include("normalization.jl") +# export batchnorm, ∇batchnorm + ## Include implementations include("impl/padding_edges.jl") diff --git a/src/activations.jl b/src/activations.jl index c7ee13826..a034586a8 100644 --- a/src/activations.jl +++ b/src/activations.jl @@ -1,6 +1,6 @@ ## Activation functions # -# Some of activation functions have its wrapper function for GPU in NNlibCUDA.jl. +# Some of activation functions have its wrapper function for GPU in NNlibCUDAExt.jl. # https://github.com/JuliaGPU/CuArrays.jl/issues/614 ACTIVATIONS = [ diff --git a/src/ctc.jl b/src/ctc.jl index 449a519ba..6202622c3 100644 --- a/src/ctc.jl +++ b/src/ctc.jl @@ -1,4 +1,4 @@ -# CTC loss moved from Flux.jl to NNlib + NNlibCUDA +# CTC loss moved from Flux.jl to NNlib ## CPU implementation diff --git a/src/deprecations.jl b/src/deprecations.jl index e76cbc796..64c3045d6 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -1,23 +1,3 @@ - -### Deprecated while v0.7 was latest - -function ∇softmax(Δ, x; dims = 1) - # This 2-arg version recomputes the forward pass, which is slow. - # Removed from use in 0.7, but only prints a warning during 0.8: - Base.depwarn("`∇softmax(Δ, x)` without `y = softmax(x)` argument is deprecated, as this is inefficient, please use `∇softmax_data(dy, y)`", :∇softmax) - ∇softmax(Δ, x, softmax(x; dims); dims) -end -∇softmax!(Δ, x; dims = 1) = Δ .= ∇softmax(Δ, x; dims) -∇softmax!(out, Δ, x; dims = 1) = out .= ∇softmax(Δ, x; dims) - -function ∇logsoftmax(Δ, x; dims = 1) - Base.depwarn("`∇logsoftmax(Δ, x)` without `y = logsoftmax(x)` argument is deprecated, please use `∇logsoftmax_data(dy, y)`", :∇logsoftmax) - ∇logsoftmax(Δ, x, logsoftmax(x; dims); dims) -end -∇logsoftmax!(Δ, x; dims = 1) = Δ .= ∇logsoftmax(Δ, x; dims) -∇logsoftmax!(out, Δ, x; dims = 1) = out .= ∇logsoftmax(Δ, x; dims) - - ### Deprecated while v0.8 was latest export ∇softmax, diff --git a/src/dropout.jl b/src/dropout.jl index d021ee54d..86bcb6c6f 100644 --- a/src/dropout.jl +++ b/src/dropout.jl @@ -158,5 +158,5 @@ _rng_from_array(::AbstractArray) = Random.default_rng() @non_differentiable _rng_from_array(::Any) # This exists because `rand!(default_rng(), CUDA.rand(3))` ignores the RNG, -# and Flux would prefer an error. NNlibCUDA will overload it to produce that. +# and Flux would prefer an error. NNlibCUDAExt will overload it to produce that. _rng_compat_array(::AbstractRNG, ::AbstractArray) = nothing diff --git a/src/normalization.jl b/src/normalization.jl new file mode 100644 index 000000000..48fc53de6 --- /dev/null +++ b/src/normalization.jl @@ -0,0 +1,4 @@ +# TODO: add CPU implementation +function batchnorm end + +function ∇batchnorm end diff --git a/src/upsample.jl b/src/upsample.jl index 2f58666b9..a320ca9e6 100644 --- a/src/upsample.jl +++ b/src/upsample.jl @@ -380,15 +380,6 @@ function ∇upsample_linear_kernel!( return dx end -# Compatibility layer for old versions of NNlibCUDA. -# TODO Can be removed from NNlib 0.9. -upsample_linear_wcn!(y, x) = upsample_linear_kernel!(y, x) -upsample_bilinear_whcn!(y, x) = upsample_linear_kernel!(y, x) -upsample_trilinear_whdcn!(y, x) = upsample_linear_kernel!(y, x) -∇upsample_linear_wcn!(y, x) = ∇upsample_linear_kernel!(y, x) -∇upsample_bilinear_whcn!(y, x) = ∇upsample_linear_kernel!(y, x) -∇upsample_trilinear_whdcn!(y, x) = ∇upsample_linear_kernel!(y, x) - # Linear (CPU): parallelization along channel x batch dimensions. @kernel function _upsample_linear_kernel!(::CPU, y::T, x::T, rwidth, align::Val{A}) where { diff --git a/test/Project.toml b/test/Project.toml index 82062f811..1549b4923 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,19 +1,19 @@ [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" -KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" -NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/ext/NNlibCUDA/test/activations.jl b/test/ext_cuda/activations.jl similarity index 84% rename from ext/NNlibCUDA/test/activations.jl rename to test/ext_cuda/activations.jl index c2047782d..fb9d2ebfc 100644 --- a/ext/NNlibCUDA/test/activations.jl +++ b/test/ext_cuda/activations.jl @@ -14,8 +14,8 @@ end end end -# Broadcasting over complex CuArray works without NNlibCUDA, this test checks that -# NNlibCUDA does not cause such operations to take a fast path which does not support +# Broadcasting over complex CuArray works without NNlibCUDAExt, this test checks that +# NNlibCUDAExt does not cause such operations to take a fast path which does not support # complex numbers (e.g. cuDNN) @testset "complex" begin f(x) = tanh.(x) diff --git a/ext/NNlibCUDA/test/batchedadjtrans.jl b/test/ext_cuda/batchedadjtrans.jl similarity index 100% rename from ext/NNlibCUDA/test/batchedadjtrans.jl rename to test/ext_cuda/batchedadjtrans.jl diff --git a/ext/NNlibCUDA/test/batchedmul.jl b/test/ext_cuda/batchedmul.jl similarity index 100% rename from ext/NNlibCUDA/test/batchedmul.jl rename to test/ext_cuda/batchedmul.jl diff --git a/test/ext_cuda/batchnorm.jl b/test/ext_cuda/batchnorm.jl new file mode 100644 index 000000000..0adea7024 --- /dev/null +++ b/test/ext_cuda/batchnorm.jl @@ -0,0 +1,27 @@ +@testset "Batchnorm" begin + v = CUDA.rand(Float32, 2) + m = CUDA.rand(Float32, 2, 5) + + @testset for training in (true, false), track_stats in (true, false) + kws = (training=training, track_stats=track_stats) + + # Normal + batchnorm(v, v, m, v, v, 1.0; kws...) + ∇batchnorm(v, v, m, m, v, v, 1.0; kws...) + + # No affine + batchnorm(nothing, nothing, m, v, v, 1.0; kws...) + ∇batchnorm(nothing, nothing, m, m, v, v, 1.0; kws...) + + # No tracking + batchnorm(v, v, m, nothing, nothing, 1.0; kws...) + ∇batchnorm(v, v, m, m, nothing, nothing, 1.0; kws...) + + # Both or neither tracked or affine params must be set + for (α, β) in ((v, nothing), (nothing, v)) + @test_throws MethodError batchnorm(α, β, m, v, v, 1.0; kws...) + @test_throws MethodError ∇batchnorm(α, β, m, m, v, v, 1.0; kws...) + @test_throws ArgumentError batchnorm(v, v, m, α, β, 1.0; kws...) + end + end +end diff --git a/ext/NNlibCUDA/test/conv.jl b/test/ext_cuda/conv.jl similarity index 100% rename from ext/NNlibCUDA/test/conv.jl rename to test/ext_cuda/conv.jl diff --git a/ext/NNlibCUDA/test/ctc.jl b/test/ext_cuda/ctc.jl similarity index 93% rename from ext/NNlibCUDA/test/ctc.jl rename to test/ext_cuda/ctc.jl index a786c1422..5c0a1e2bf 100644 --- a/ext/NNlibCUDA/test/ctc.jl +++ b/test/ext_cuda/ctc.jl @@ -1,9 +1,3 @@ -using Test -using NNlib: ctc_loss -using Zygote: gradient -using LinearAlgebra -using CUDA, NNlibCUDA - # Custom function to check numerical gradient of ctc loss, # based on `ngradient` in `Tracker.jl` function ctc_ngradient(x, y) diff --git a/ext/NNlibCUDA/test/dropout.jl b/test/ext_cuda/dropout.jl similarity index 95% rename from ext/NNlibCUDA/test/dropout.jl rename to test/ext_cuda/dropout.jl index 96cc88f05..db291a481 100644 --- a/ext/NNlibCUDA/test/dropout.jl +++ b/test/ext_cuda/dropout.jl @@ -1,6 +1,3 @@ -using NNlib, NNlibCUDA, CUDA, Test -using Zygote, ChainRulesCore - @testset "dropout + CUDA" begin # Basics x1 = CUDA.randn(3, 4) diff --git a/ext/NNlibCUDA/test/fold.jl b/test/ext_cuda/fold.jl similarity index 100% rename from ext/NNlibCUDA/test/fold.jl rename to test/ext_cuda/fold.jl diff --git a/ext/NNlibCUDA/test/gather.jl b/test/ext_cuda/gather.jl similarity index 100% rename from ext/NNlibCUDA/test/gather.jl rename to test/ext_cuda/gather.jl diff --git a/ext/NNlibCUDA/test/pooling.jl b/test/ext_cuda/pooling.jl similarity index 100% rename from ext/NNlibCUDA/test/pooling.jl rename to test/ext_cuda/pooling.jl diff --git a/ext/NNlibCUDA/test/runtests.jl b/test/ext_cuda/runtests.jl similarity index 88% rename from ext/NNlibCUDA/test/runtests.jl rename to test/ext_cuda/runtests.jl index 8af877bba..c0b140036 100644 --- a/ext/NNlibCUDA/test/runtests.jl +++ b/test/ext_cuda/runtests.jl @@ -1,13 +1,12 @@ using Test using NNlib using Zygote -using NNlibCUDA using ForwardDiff: Dual using Statistics: mean -using CUDA +using CUDA, cuDNN +using NNlib: batchnorm, ∇batchnorm CUDA.allowscalar(false) -@testset "NNlibCUDA" begin include("test_utils.jl") include("activations.jl") include("dropout.jl") @@ -22,4 +21,3 @@ include("batchnorm.jl") include("scatter.jl") include("gather.jl") include("sampling.jl") -end diff --git a/ext/NNlibCUDA/test/sampling.jl b/test/ext_cuda/sampling.jl similarity index 100% rename from ext/NNlibCUDA/test/sampling.jl rename to test/ext_cuda/sampling.jl diff --git a/ext/NNlibCUDA/test/scatter.jl b/test/ext_cuda/scatter.jl similarity index 100% rename from ext/NNlibCUDA/test/scatter.jl rename to test/ext_cuda/scatter.jl diff --git a/ext/NNlibCUDA/test/softmax.jl b/test/ext_cuda/softmax.jl similarity index 100% rename from ext/NNlibCUDA/test/softmax.jl rename to test/ext_cuda/softmax.jl diff --git a/ext/NNlibCUDA/test/test_utils.jl b/test/ext_cuda/test_utils.jl similarity index 100% rename from ext/NNlibCUDA/test/test_utils.jl rename to test/ext_cuda/test_utils.jl diff --git a/test/runtests.jl b/test/runtests.jl index 3d7786ccc..cc5f4f84f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,6 +11,9 @@ using Adapt using KernelAbstractions DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib, UnicodePlots); recursive=true) +ENV["NNLIB_TEST_CUDA"] = true # uncomment to run CUDA tests +# ENV["NNLIB_TEST_AMDGPU"] = true # uncomment to run AMDGPU tests + const rng = StableRNG(123) include("test_utils.jl") @@ -92,10 +95,8 @@ end if get(ENV, "NNLIB_TEST_CUDA", "false") == "true" using CUDA if CUDA.functional() - import Pkg - using NNlibCUDA @testset "CUDA" begin - Pkg.test("NNlibCUDA") + include("ext_cuda/runtests.jl") end else @info "Insufficient version or CUDA not found; Skipping CUDA tests" @@ -127,7 +128,7 @@ end @info "AMDGPU.jl package is not functional. Skipping AMDGPU tests." end else - @info "Skipping AMDGPU tests, set NNLIB_TEST_CUDA=true to run them." + @info "Skipping AMDGPU tests, set NNLIB_TEST_AMDGPU=true to run them." end @testset "Doctests" begin