Skip to content

Commit

Permalink
Try #200:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] authored Feb 11, 2021
2 parents a6e4965 + 86c056f commit 9bf7359
Show file tree
Hide file tree
Showing 19 changed files with 120 additions and 45 deletions.
21 changes: 18 additions & 3 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- julia -e 'using Pkg; Pkg.add(name="Run", version="0.1")'
- julia -e 'using Run; Run.prepare_test()'
- julia -e 'using Run; Run.test()'
agents:
queue: "juliagpu"
cuda: "*"
Expand All @@ -15,9 +20,14 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "1.6-nightly"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- julia -e 'using Pkg; Pkg.add(name="Run", version="0.1")'
- julia -e 'using Run; Run.prepare_test()'
- julia -e 'using Run; Run.test()'
agents:
queue: "juliagpu"
cuda: "*"
Expand All @@ -27,9 +37,14 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "nightly"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- julia -e 'using Pkg; Pkg.add(name="Run", version="0.1")'
- julia -e 'using Run; Run.prepare_test()'
- julia -e 'using Run; Run.test()'
agents:
queue: "juliagpu"
cuda: "*"
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/ci-julia-1.6-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: julia -e 'using Pkg; pkg"add Run@0.1"'
- run: julia -e 'using Run; Run.prepare_test()'
- run: julia -e 'using Run; Run.test()'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/ci-julia-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: julia -e 'using Pkg; pkg"add Run@0.1"'
- run: julia -e 'using Run; Run.prepare_test()'
- run: julia -e 'using Run; Run.test()'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: julia -e 'using Pkg; pkg"add Run@0.1"'
- run: julia -e 'using Run; Run.prepare_test()'
- run: julia -e 'using Run; Run.test()'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand All @@ -62,6 +65,7 @@ jobs:
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))
Pkg.instantiate()'
- run: julia --project=docs docs/make.jl
env:
Expand All @@ -83,6 +87,7 @@ jobs:
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))
Pkg.instantiate()'
- run: |
julia --project=docs -e '
Expand Down
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name = "KernelAbstractions"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
authors = ["Valentin Churavy <v.churavy@gmail.com>"]
version = "0.5.3"
version = "0.6.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Expand All @@ -15,7 +14,6 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
Adapt = "0.4, 1.0, 2.0, 3.0"
CUDA = "~1.0, ~1.1, ~1.2, 1.3, 2"
Cassette = "0.3.3"
MacroTools = "0.5"
SpecialFunctions = "0.10, 1.0"
Expand Down
2 changes: 1 addition & 1 deletion examples/matmul.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, Test, CUDA
using KernelAbstractions, CUDAKernels, Test, CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using KernelAbstractions
using CUDAKernels
using CUDA
using Test

Expand Down
1 change: 1 addition & 0 deletions examples/memcopy_static.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using KernelAbstractions
using CUDAKernels
using CUDA
using Test

Expand Down
3 changes: 2 additions & 1 deletion examples/mpi.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# EXCLUDE FROM TESTING
using KernelAbstractions
using CUDA
using CUDAKernels
using CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
2 changes: 1 addition & 1 deletion examples/naive_transpose.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, Test, CUDA
using KernelAbstractions, CUDAKernels, Test, CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
4 changes: 2 additions & 2 deletions examples/performance.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, CUDA, Test
using KernelAbstractions, CUDAKernels, CUDA, Test
using KernelAbstractions.Extras: @unroll

has_cuda_gpu() || exit()
Expand Down Expand Up @@ -199,4 +199,4 @@ for (name, kernel) in (
end
end
end
end
end
20 changes: 20 additions & 0 deletions lib/CUDAKernels/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name = "CUDAKernels"
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
authors = ["Valentin Churavy <v.churavy@gmail.com>"]
version = "0.1.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
Adapt = "0.4, 1.0, 2.0, 3.0"
CUDA = "~1.0, ~1.1, ~1.2, 1.3, 2"
Cassette = "0.3.3"
KernelAbstractions = "0.6"
SpecialFunctions = "0.10, 1.0"
StaticArrays = "0.12, 1.0"
39 changes: 34 additions & 5 deletions src/backends/cuda.jl → lib/CUDAKernels/src/CUDAKernels.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
module CUDAKernels

import CUDA
import SpecialFunctions
import StaticArrays
import StaticArrays: MArray
import Cassette
import Adapt
import KernelAbstractions

export CUDADevice

const FREE_STREAMS = CUDA.CuStream[]
const STREAMS = CUDA.CuStream[]
Expand Down Expand Up @@ -44,6 +53,10 @@ function next_stream()
end
end

import KernelAbstractions: Event, CPUEvent, NoneEvent, MultiEvent, CPU, GPU, isdone, failed

struct CUDADevice <: GPU end

struct CudaEvent <: Event
event::CUDA.CuEvent
end
Expand All @@ -58,6 +71,8 @@ function Event(::CUDADevice)
CudaEvent(event)
end

import Base: wait

wait(ev::CudaEvent, progress=yield) = wait(CPU(), ev, progress)

function wait(::CPU, ev::CudaEvent, progress=yield)
Expand Down Expand Up @@ -113,7 +128,7 @@ function __pin!(a)
return nothing
end

function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
function KernelAbstractions.async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
A isa Array && __pin!(A)
B isa Array && __pin!(B)

Expand All @@ -131,7 +146,7 @@ function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
return CudaEvent(event)
end


import KernelAbstractions: Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config

###
# Kernel launch
Expand Down Expand Up @@ -186,7 +201,7 @@ function (obj::Kernel{CUDADevice})(args...; ndrange=nothing, dependencies=nothin

# If the kernel is statically sized we can tell the compiler about that
if KernelAbstractions.workgroupsize(obj) <: StaticSize
maxthreads = prod(get(KernelAbstractions.workgroupsize(obj)))
maxthreads = prod(KernelAbstractions.get(KernelAbstractions.workgroupsize(obj)))
else
maxthreads = nothing
end
Expand All @@ -211,8 +226,12 @@ end

Cassette.@context CUDACtx

import KernelAbstractions: CompilerMetadata, CompilerPass, DynamicCheck, LinearIndices
import KernelAbstractions: __index_Local_Linear, __index_Group_Linear, __index_Global_Linear, __index_Local_Cartesian, __index_Group_Cartesian, __index_Global_Cartesian, __validindex, __print
import KernelAbstractions: mkcontext, expand, __iterspace, __ndrange, __dynamic_checkbounds

function mkcontext(kernel::Kernel{CUDADevice}, _ndrange, iterspace)
metadata = CompilerMetadata{ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
metadata = CompilerMetadata{KernelAbstractions.ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
Cassette.disablehooks(CUDACtx(pass = CompilerPass, metadata=metadata))
end

Expand Down Expand Up @@ -251,7 +270,9 @@ end
end
end

generate_overdubs(CUDACtx)
import KernelAbstractions: groupsize, __groupsize, __workitems_iterspace, add_float_contract, sub_float_contract, mul_float_contract

KernelAbstractions.generate_overdubs(@__MODULE__, CUDACtx)

###
# CUDA specific method rewrites
Expand Down Expand Up @@ -311,9 +332,12 @@ else
const emit_shmem = CUDA._shmem
end

import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size

###
# GPU implementation of shared memory
###

@inline function Cassette.overdub(ctx::CUDACtx, ::typeof(SharedMemory), ::Type{T}, ::Val{Dims}, ::Val{Id}) where {T, Dims, Id}
ptr = emit_shmem(Val(Id), T, Val(prod(Dims)))
CUDA.CuDeviceArray(Dims, ptr)
Expand Down Expand Up @@ -341,3 +365,8 @@ end
###

Adapt.adapt_storage(to::ConstAdaptor, a::CUDA.CuDeviceArray) = Base.Experimental.Const(a)

# Argument conversion
KernelAbstractions.argconvert(k::Kernel{CUDADevice}, arg) = CUDA.cudaconvert(arg)

end
17 changes: 6 additions & 11 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module KernelAbstractions

export @kernel
export @Const, @localmem, @private, @uniform, @synchronize, @index, groupsize, @print
export Device, GPU, CPU, CUDADevice, Event, MultiEvent, NoneEvent
export Device, GPU, CPU, Event, MultiEvent, NoneEvent
export async_copy!


Expand Down Expand Up @@ -330,9 +330,6 @@ abstract type Device end
abstract type GPU <: Device end

struct CPU <: Device end
struct CUDADevice <: GPU end
# struct AMD <: GPU end
# struct Intel <: GPU end

include("nditeration.jl")
using .NDIteration
Expand Down Expand Up @@ -462,17 +459,10 @@ end
end
end

###
# Backends/Implementation
###

# Utils
__size(args::Tuple) = Tuple{args...}
__size(i::Int) = Tuple{i}

include("backends/cpu.jl")
include("backends/cuda.jl")

###
# Extras
# - LoopInfo
Expand All @@ -481,4 +471,9 @@ include("backends/cuda.jl")
include("extras/extras.jl")

include("reflection.jl")

# CPU backend

include("cpu.jl")

end #module
4 changes: 2 additions & 2 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ end
include("compiler/contract.jl")
include("compiler/pass.jl")

function generate_overdubs(Ctx)
@eval begin
function generate_overdubs(mod, Ctx)
@eval mod begin
@inline Cassette.overdub(ctx::$Ctx, ::typeof(groupsize)) = __groupsize(ctx.metadata)
@inline Cassette.overdub(ctx::$Ctx, ::typeof(__workitems_iterspace)) = workitems(__iterspace(ctx.metadata))

Expand Down
5 changes: 4 additions & 1 deletion src/backends/cpu.jl → src/cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ end
__print(items...)
end

generate_overdubs(CPUCtx)
generate_overdubs(@__MODULE__, CPUCtx)

# Don't recurse into these functions
const cpufuns = (:cos, :cospi, :sin, :sinpi, :tan,
Expand Down Expand Up @@ -263,3 +263,6 @@ end
@inline function Cassette.overdub(ctx::CPUCtx, ::typeof(Base.getindex), A::ScratchArray{N}, idx) where N
return @inbounds aview(A.data, ntuple(_->:, Val(N))..., idx)
end

# Argument conversion
KernelAbstractions.argconvert(k::Kernel{CPU}, arg) = arg
Loading

0 comments on commit 9bf7359

Please sign in to comment.