Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEBUG: Run GitHub Actions #1

Closed
wants to merge 14 commits into from
15 changes: 12 additions & 3 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
agents:
queue: "juliagpu"
cuda: "*"
Expand All @@ -15,9 +18,12 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "1.6-nightly"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
agents:
queue: "juliagpu"
cuda: "*"
Expand All @@ -27,9 +33,12 @@ steps:
plugins:
- JuliaCI/julia#v1:
version: "nightly"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
commands:
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
agents:
queue: "juliagpu"
cuda: "*"
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/ci-julia-1.6-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- staging
- trying
tags: '*'
defaults:
run:
shell: bash
jobs:
CI-julia-1-6-nightly:
name: CI-julia-1-6-nightly
Expand Down Expand Up @@ -37,8 +40,9 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/ci-julia-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- staging
- trying
tags: '*'
defaults:
run:
shell: bash
jobs:
CI-julia-nightly:
name: CI-julia-nightly
Expand Down Expand Up @@ -37,8 +40,9 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- staging
- trying
tags: '*'
defaults:
run:
shell: bash
jobs:
CI:
name: CI
Expand Down Expand Up @@ -40,8 +43,9 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()))'
- run: julia --project=test -e 'using Pkg; Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))'
- run: JULIA_LOAD_PATH=@ julia --project=test --color=yes --check-bounds=yes --code-coverage=user --depwarn=yes test/runtests.jl
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand All @@ -55,13 +59,13 @@ jobs:
with:
version: '1'
- run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")'
shell: bash
env:
JULIA_PKG_SERVER: ""
- run: |
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))
Pkg.instantiate()'
- run: julia --project=docs docs/make.jl
env:
Expand All @@ -76,13 +80,13 @@ jobs:
with:
version: 'nightly'
- run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")'
shell: bash
env:
JULIA_PKG_SERVER: ""
- run: |
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.develop(PackageSpec(path=joinpath(pwd(),"lib","CUDAKernels")))
Pkg.instantiate()'
- run: |
julia --project=docs -e '
Expand Down
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name = "KernelAbstractions"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
authors = ["Valentin Churavy <v.churavy@gmail.com>"]
version = "0.5.3"
version = "0.6.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Expand All @@ -15,7 +14,6 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
Adapt = "0.4, 1.0, 2.0, 3.0"
CUDA = "~1.0, ~1.1, ~1.2, 1.3, 2"
Cassette = "0.3.3"
MacroTools = "0.5"
SpecialFunctions = "0.10, 1.0"
Expand Down
2 changes: 1 addition & 1 deletion examples/matmul.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, Test, CUDA
using KernelAbstractions, CUDAKernels, Test, CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using KernelAbstractions
using CUDAKernels
using CUDA
using Test

Expand Down
1 change: 1 addition & 0 deletions examples/memcopy_static.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using KernelAbstractions
using CUDAKernels
using CUDA
using Test

Expand Down
3 changes: 2 additions & 1 deletion examples/mpi.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# EXCLUDE FROM TESTING
using KernelAbstractions
using CUDA
using CUDAKernels
using CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
2 changes: 1 addition & 1 deletion examples/naive_transpose.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, Test, CUDA
using KernelAbstractions, CUDAKernels, Test, CUDA

if has_cuda_gpu()
CUDA.allowscalar(false)
Expand Down
4 changes: 2 additions & 2 deletions examples/performance.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using KernelAbstractions, CUDA, Test
using KernelAbstractions, CUDAKernels, CUDA, Test
using KernelAbstractions.Extras: @unroll

has_cuda_gpu() || exit()
Expand Down Expand Up @@ -199,4 +199,4 @@ for (name, kernel) in (
end
end
end
end
end
20 changes: 20 additions & 0 deletions lib/CUDAKernels/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name = "CUDAKernels"
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
authors = ["Valentin Churavy <v.churavy@gmail.com>"]
version = "0.1.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
Adapt = "0.4, 1.0, 2.0, 3.0"
CUDA = "~1.0, ~1.1, ~1.2, 1.3, 2"
Cassette = "0.3.3"
KernelAbstractions = "0.6"
SpecialFunctions = "0.10, 1.0"
StaticArrays = "0.12, 1.0"
39 changes: 34 additions & 5 deletions src/backends/cuda.jl → lib/CUDAKernels/src/CUDAKernels.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
module CUDAKernels

import CUDA
import SpecialFunctions
import StaticArrays
import StaticArrays: MArray
import Cassette
import Adapt
import KernelAbstractions

export CUDADevice

const FREE_STREAMS = CUDA.CuStream[]
const STREAMS = CUDA.CuStream[]
Expand Down Expand Up @@ -44,6 +53,10 @@ function next_stream()
end
end

import KernelAbstractions: Event, CPUEvent, NoneEvent, MultiEvent, CPU, GPU, isdone, failed

struct CUDADevice <: GPU end

struct CudaEvent <: Event
event::CUDA.CuEvent
end
Expand All @@ -58,6 +71,8 @@ function Event(::CUDADevice)
CudaEvent(event)
end

import Base: wait

wait(ev::CudaEvent, progress=yield) = wait(CPU(), ev, progress)

function wait(::CPU, ev::CudaEvent, progress=yield)
Expand Down Expand Up @@ -113,7 +128,7 @@ function __pin!(a)
return nothing
end

function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
function KernelAbstractions.async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
A isa Array && __pin!(A)
B isa Array && __pin!(B)

Expand All @@ -131,7 +146,7 @@ function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
return CudaEvent(event)
end


import KernelAbstractions: Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config

###
# Kernel launch
Expand Down Expand Up @@ -186,7 +201,7 @@ function (obj::Kernel{CUDADevice})(args...; ndrange=nothing, dependencies=nothin

# If the kernel is statically sized we can tell the compiler about that
if KernelAbstractions.workgroupsize(obj) <: StaticSize
maxthreads = prod(get(KernelAbstractions.workgroupsize(obj)))
maxthreads = prod(KernelAbstractions.get(KernelAbstractions.workgroupsize(obj)))
else
maxthreads = nothing
end
Expand All @@ -211,8 +226,12 @@ end

Cassette.@context CUDACtx

import KernelAbstractions: CompilerMetadata, CompilerPass, DynamicCheck, LinearIndices
import KernelAbstractions: __index_Local_Linear, __index_Group_Linear, __index_Global_Linear, __index_Local_Cartesian, __index_Group_Cartesian, __index_Global_Cartesian, __validindex, __print
import KernelAbstractions: mkcontext, expand, __iterspace, __ndrange, __dynamic_checkbounds

function mkcontext(kernel::Kernel{CUDADevice}, _ndrange, iterspace)
metadata = CompilerMetadata{ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
metadata = CompilerMetadata{KernelAbstractions.ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
Cassette.disablehooks(CUDACtx(pass = CompilerPass, metadata=metadata))
end

Expand Down Expand Up @@ -251,7 +270,9 @@ end
end
end

generate_overdubs(CUDACtx)
import KernelAbstractions: groupsize, __groupsize, __workitems_iterspace, add_float_contract, sub_float_contract, mul_float_contract

KernelAbstractions.generate_overdubs(@__MODULE__, CUDACtx)

###
# CUDA specific method rewrites
Expand Down Expand Up @@ -311,9 +332,12 @@ else
const emit_shmem = CUDA._shmem
end

import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size

###
# GPU implementation of shared memory
###

@inline function Cassette.overdub(ctx::CUDACtx, ::typeof(SharedMemory), ::Type{T}, ::Val{Dims}, ::Val{Id}) where {T, Dims, Id}
ptr = emit_shmem(Val(Id), T, Val(prod(Dims)))
CUDA.CuDeviceArray(Dims, ptr)
Expand Down Expand Up @@ -341,3 +365,8 @@ end
###

Adapt.adapt_storage(to::ConstAdaptor, a::CUDA.CuDeviceArray) = Base.Experimental.Const(a)

# Argument conversion
KernelAbstractions.argconvert(k::Kernel{CUDADevice}, arg) = CUDA.cudaconvert(arg)

end
17 changes: 6 additions & 11 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module KernelAbstractions

export @kernel
export @Const, @localmem, @private, @uniform, @synchronize, @index, groupsize, @print
export Device, GPU, CPU, CUDADevice, Event, MultiEvent, NoneEvent
export Device, GPU, CPU, Event, MultiEvent, NoneEvent
export async_copy!


Expand Down Expand Up @@ -330,9 +330,6 @@ abstract type Device end
abstract type GPU <: Device end

struct CPU <: Device end
struct CUDADevice <: GPU end
# struct AMD <: GPU end
# struct Intel <: GPU end

include("nditeration.jl")
using .NDIteration
Expand Down Expand Up @@ -462,17 +459,10 @@ end
end
end

###
# Backends/Implementation
###

# Utils
__size(args::Tuple) = Tuple{args...}
__size(i::Int) = Tuple{i}

include("backends/cpu.jl")
include("backends/cuda.jl")

###
# Extras
# - LoopInfo
Expand All @@ -481,4 +471,9 @@ include("backends/cuda.jl")
include("extras/extras.jl")

include("reflection.jl")

# CPU backend

include("cpu.jl")

end #module
4 changes: 2 additions & 2 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ end
include("compiler/contract.jl")
include("compiler/pass.jl")

function generate_overdubs(Ctx)
@eval begin
function generate_overdubs(mod, Ctx)
@eval mod begin
@inline Cassette.overdub(ctx::$Ctx, ::typeof(groupsize)) = __groupsize(ctx.metadata)
@inline Cassette.overdub(ctx::$Ctx, ::typeof(__workitems_iterspace)) = workitems(__iterspace(ctx.metadata))

Expand Down
Loading