Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split KernelAbstractions into frontend and backends #200

Merged
merged 7 commits into from
Feb 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
agents:
queue: "juliagpu"
cuda: "*"
Expand All @@ -15,9 +18,12 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "1.6-nightly"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for the record: it should be possible to put back JULIA_LOAD_PATH=@ once a fix like JuliaPackaging/BinaryBuilder.jl#1007 is implemented

(But using JULIA_LOAD_PATH=@ was just my preference for making sure that test is reproducible; i.e., avoid accidentally using @stdlib and @v#.#. It's not a strict requirement to make CI work.)

agents:
queue: "juliagpu"
cuda: "*"
Expand All @@ -27,9 +33,12 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "nightly"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
agents:
queue: "juliagpu"
cuda: "*"
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/ci-julia-1.6-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- staging
- trying
tags: '*'
defaults:
run:
shell: bash
jobs:
CI-julia-1-6-nightly:
name: CI-julia-1-6-nightly
Expand Down Expand Up @@ -37,8 +40,9 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/ci-julia-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- staging
- trying
tags: '*'
defaults:
run:
shell: bash
jobs:
CI-julia-nightly:
name: CI-julia-nightly
Expand Down Expand Up @@ -37,8 +40,9 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- staging
- trying
tags: '*'
defaults:
run:
shell: bash
jobs:
CI:
name: CI
Expand Down Expand Up @@ -40,8 +43,9 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand All @@ -55,13 +59,13 @@ jobs:
with:
version: '1'
- run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")'
shell: bash
env:
JULIA_PKG_SERVER: ""
- run: |
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))
Pkg.instantiate()'
- run: julia --project=docs docs/make.jl
env:
Expand All @@ -76,13 +80,13 @@ jobs:
with:
version: 'nightly'
- run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")'
shell: bash
env:
JULIA_PKG_SERVER: ""
- run: |
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))
Pkg.instantiate()'
- run: |
julia --project=docs -e '
Expand Down
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name = "KernelAbstractions"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
authors = ["Valentin Churavy <v.churavy@gmail.com>"]
version = "0.5.3"
version = "0.6.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Expand All @@ -15,7 +14,6 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
Adapt = "0.4, 1.0, 2.0, 3.0"
CUDA = "~1.0, ~1.1, ~1.2, 1.3, 2"
Cassette = "0.3.3"
MacroTools = "0.5"
SpecialFunctions = "0.10, 1.0"
Expand Down
2 changes: 1 addition & 1 deletion examples/matmul.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, Test, CUDA
using KernelAbstractions, CUDAKernels, Test, CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using KernelAbstractions
using CUDAKernels
using CUDA
using Test

Expand Down
1 change: 1 addition & 0 deletions examples/memcopy_static.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using KernelAbstractions
using CUDAKernels
using CUDA
using Test

Expand Down
3 changes: 2 additions & 1 deletion examples/mpi.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# EXCLUDE FROM TESTING
using KernelAbstractions
using CUDA
using CUDAKernels
using CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
2 changes: 1 addition & 1 deletion examples/naive_transpose.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, Test, CUDA
using KernelAbstractions, CUDAKernels, Test, CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
4 changes: 2 additions & 2 deletions examples/performance.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, CUDA, Test
using KernelAbstractions, CUDAKernels, CUDA, Test
using KernelAbstractions.Extras: @unroll

has_cuda_gpu() || exit()
Expand Down Expand Up @@ -199,4 +199,4 @@ for (name, kernel) in (
end
end
end
end
end
20 changes: 20 additions & 0 deletions lib/CUDAKernels/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name = "CUDAKernels"
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
authors = ["Valentin Churavy <v.churavy@gmail.com>"]
version = "0.1.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
Adapt = "0.4, 1.0, 2.0, 3.0"
CUDA = "~1.0, ~1.1, ~1.2, 1.3, 2"
Cassette = "0.3.3"
KernelAbstractions = "0.6"
SpecialFunctions = "0.10, 1.0"
StaticArrays = "0.12, 1.0"
39 changes: 34 additions & 5 deletions src/backends/cuda.jl → lib/CUDAKernels/src/CUDAKernels.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
module CUDAKernels

import CUDA
import SpecialFunctions
import StaticArrays
import StaticArrays: MArray
import Cassette
import Adapt
import KernelAbstractions

export CUDADevice

const FREE_STREAMS = CUDA.CuStream[]
const STREAMS = CUDA.CuStream[]
Expand Down Expand Up @@ -44,6 +53,10 @@ function next_stream()
end
end

import KernelAbstractions: Event, CPUEvent, NoneEvent, MultiEvent, CPU, GPU, isdone, failed

struct CUDADevice <: GPU end
jpsamaroo marked this conversation as resolved.
Show resolved Hide resolved

struct CudaEvent <: Event
event::CUDA.CuEvent
end
Expand All @@ -58,6 +71,8 @@ function Event(::CUDADevice)
CudaEvent(event)
end

import Base: wait

wait(ev::CudaEvent, progress=yield) = wait(CPU(), ev, progress)

function wait(::CPU, ev::CudaEvent, progress=yield)
Expand Down Expand Up @@ -113,7 +128,7 @@ function __pin!(a)
return nothing
end

function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
function KernelAbstractions.async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
A isa Array && __pin!(A)
B isa Array && __pin!(B)

Expand All @@ -131,7 +146,7 @@ function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
return CudaEvent(event)
end


import KernelAbstractions: Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config

###
# Kernel launch
Expand Down Expand Up @@ -186,7 +201,7 @@ function (obj::Kernel{CUDADevice})(args...; ndrange=nothing, dependencies=nothin

# If the kernel is statically sized we can tell the compiler about that
if KernelAbstractions.workgroupsize(obj) <: StaticSize
maxthreads = prod(get(KernelAbstractions.workgroupsize(obj)))
maxthreads = prod(KernelAbstractions.get(KernelAbstractions.workgroupsize(obj)))
else
maxthreads = nothing
end
Expand All @@ -211,8 +226,12 @@ end

Cassette.@context CUDACtx

import KernelAbstractions: CompilerMetadata, CompilerPass, DynamicCheck, LinearIndices
import KernelAbstractions: __index_Local_Linear, __index_Group_Linear, __index_Global_Linear, __index_Local_Cartesian, __index_Group_Cartesian, __index_Global_Cartesian, __validindex, __print
import KernelAbstractions: mkcontext, expand, __iterspace, __ndrange, __dynamic_checkbounds

function mkcontext(kernel::Kernel{CUDADevice}, _ndrange, iterspace)
metadata = CompilerMetadata{ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
metadata = CompilerMetadata{KernelAbstractions.ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
Cassette.disablehooks(CUDACtx(pass = CompilerPass, metadata=metadata))
end

Expand Down Expand Up @@ -251,7 +270,9 @@ end
end
end

generate_overdubs(CUDACtx)
import KernelAbstractions: groupsize, __groupsize, __workitems_iterspace, add_float_contract, sub_float_contract, mul_float_contract

KernelAbstractions.generate_overdubs(@__MODULE__, CUDACtx)

###
# CUDA specific method rewrites
Expand Down Expand Up @@ -311,9 +332,12 @@ else
const emit_shmem = CUDA._shmem
end

import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size

###
# GPU implementation of shared memory
###

@inline function Cassette.overdub(ctx::CUDACtx, ::typeof(SharedMemory), ::Type{T}, ::Val{Dims}, ::Val{Id}) where {T, Dims, Id}
ptr = emit_shmem(Val(Id), T, Val(prod(Dims)))
CUDA.CuDeviceArray(Dims, ptr)
Expand Down Expand Up @@ -341,3 +365,8 @@ end
###

Adapt.adapt_storage(to::ConstAdaptor, a::CUDA.CuDeviceArray) = Base.Experimental.Const(a)

# Argument conversion
KernelAbstractions.argconvert(k::Kernel{CUDADevice}, arg) = CUDA.cudaconvert(arg)

end
17 changes: 6 additions & 11 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module KernelAbstractions

export @kernel
export @Const, @localmem, @private, @uniform, @synchronize, @index, groupsize, @print
export Device, GPU, CPU, CUDADevice, Event, MultiEvent, NoneEvent
export Device, GPU, CPU, Event, MultiEvent, NoneEvent
export async_copy!


Expand Down Expand Up @@ -330,9 +330,6 @@ abstract type Device end
abstract type GPU <: Device end

struct CPU <: Device end
struct CUDADevice <: GPU end
# struct AMD <: GPU end
# struct Intel <: GPU end

include("nditeration.jl")
using .NDIteration
Expand Down Expand Up @@ -462,17 +459,10 @@ end
end
end

###
# Backends/Implementation
###

# Utils
__size(args::Tuple) = Tuple{args...}
__size(i::Int) = Tuple{i}

include("backends/cpu.jl")
include("backends/cuda.jl")

###
# Extras
# - LoopInfo
Expand All @@ -481,4 +471,9 @@ include("backends/cuda.jl")
include("extras/extras.jl")

include("reflection.jl")

# CPU backend

include("cpu.jl")

end #module
4 changes: 2 additions & 2 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ end
include("compiler/contract.jl")
include("compiler/pass.jl")

function generate_overdubs(Ctx)
@eval begin
function generate_overdubs(mod, Ctx)
@eval mod begin
@inline Cassette.overdub(ctx::$Ctx, ::typeof(groupsize)) = __groupsize(ctx.metadata)
@inline Cassette.overdub(ctx::$Ctx, ::typeof(__workitems_iterspace)) = workitems(__iterspace(ctx.metadata))

Expand Down
Loading