Skip to content

Commit

Permalink
Update Runic to released version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrikekre committed Nov 6, 2024
1 parent 265e5b8 commit 973240c
Show file tree
Hide file tree
Showing 30 changed files with 102 additions and 70 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/Check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Check
on:
push:
branches:
- 'master'
- 'release-'
tags:
- '*'
pull_request:
jobs:
runic:
name: Runic formatting
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: "1"
- uses: julia-actions/cache@v2
- uses: fredrikekre/runic-action@v1
with:
version: "1"
25 changes: 0 additions & 25 deletions .github/workflows/runic.yml

This file was deleted.

1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function main()
push_preview = true,
)
end
return
end

isinteractive() || main()
2 changes: 2 additions & 0 deletions examples/histogram.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ function histogram!(histogram_output, input)
# Need static block size
kernel! = histogram_kernel!(backend, (256,))
kernel!(histogram_output, input, ndrange = size(input))
return
end

function move(backend, input)
# TODO replace with adapt(backend, input)
out = KernelAbstractions.allocate(backend, eltype(input), size(input))
KernelAbstractions.copyto!(backend, out, input)
return out
end

@testset "histogram tests" begin
Expand Down
1 change: 1 addition & 0 deletions examples/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ function matmul!(output, a, b)
backend = KernelAbstractions.get_backend(a)
kernel! = matmul_kernel!(backend)
kernel!(output, a, b, ndrange = size(output))
return
end

a = rand!(allocate(backend, Float32, 256, 123))
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function mycopy!(A, B)

kernel = copy_kernel!(backend)
kernel(A, B, ndrange = length(A))
return
end

A = KernelAbstractions.zeros(backend, Float64, 128, 128)
Expand Down
1 change: 1 addition & 0 deletions examples/memcopy_static.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function mycopy_static!(A, B)

kernel = copy_kernel!(backend, 32, size(A)) # if size(A) varies this will cause recompilation
kernel(A, B, ndrange = size(A))
return
end

A = KernelAbstractions.zeros(backend, Float64, 128, 128)
Expand Down
3 changes: 3 additions & 0 deletions examples/mpi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ function cooperative_test!(req)
done, _ = MPI.Test(req, MPI.Status)
yield()
end
return
end

function cooperative_wait(task::Task)
Expand All @@ -17,6 +18,7 @@ function cooperative_wait(task::Task)
yield()
end
wait(task)
return
end

function exchange!(h_send_buf, d_recv_buf, h_recv_buf, src_rank, dst_rank, comm)
Expand Down Expand Up @@ -68,6 +70,7 @@ function main(backend)
cooperative_wait(send_task)

@test all(d_recv_buf .== src_rank)
return
end

main(backend)
1 change: 1 addition & 0 deletions examples/naive_transpose.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function naive_transpose!(a, b)
groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
kernel! = naive_transpose_kernel!(backend, groupsize)
kernel!(a, b, ndrange = size(a))
return
end

# resolution of grid will be res*res
Expand Down
4 changes: 2 additions & 2 deletions ext/EnzymeCore07Ext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, cpu_fwd)

fwd_kernel(f, args...; ndrange, workgroupsize)
return fwd_kernel(f, args...; ndrange, workgroupsize)
end

function EnzymeRules.forward(
Expand All @@ -36,7 +36,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, gpu_fwd)

fwd_kernel(f, args...; ndrange, workgroupsize)
return fwd_kernel(f, args...; ndrange, workgroupsize)
end

_enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
Expand Down
4 changes: 2 additions & 2 deletions ext/EnzymeCore08Ext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, cpu_fwd)

fwd_kernel(config, f, args...; ndrange, workgroupsize)
return fwd_kernel(config, f, args...; ndrange, workgroupsize)
end

function EnzymeRules.forward(
Expand All @@ -38,7 +38,7 @@ function EnzymeRules.forward(
f = kernel.f
fwd_kernel = similar(kernel, gpu_fwd)

fwd_kernel(config, f, args...; ndrange, workgroupsize)
return fwd_kernel(config, f, args...; ndrange, workgroupsize)
end

_enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
Expand Down
32 changes: 16 additions & 16 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ synchronize(backend)
```
"""
macro kernel(expr)
__kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
end

"""
Expand All @@ -69,7 +69,7 @@ This allows for two different configurations:
"""
macro kernel(ex...)
if length(ex) == 1
__kernel(ex[1], true, false)
return __kernel(ex[1], true, false)
else
generate_cpu = true
force_inbounds = false
Expand All @@ -89,7 +89,7 @@ macro kernel(ex...)
)
end
end
__kernel(ex[end], generate_cpu, force_inbounds)
return __kernel(ex[end], generate_cpu, force_inbounds)
end
end

Expand Down Expand Up @@ -167,7 +167,7 @@ a tuple corresponding to kernel configuration. In order to get
the total size you can use `prod(@groupsize())`.
"""
macro groupsize()
quote
return quote
$groupsize($(esc(:__ctx__)))
end
end
Expand All @@ -179,7 +179,7 @@ Query the ndrange on the backend. This function returns
a tuple corresponding to kernel configuration.
"""
macro ndrange()
quote
return quote
$size($ndrange($(esc(:__ctx__))))
end
end
Expand All @@ -193,7 +193,7 @@ macro localmem(T, dims)
# Stay in sync with CUDAnative
id = gensym("static_shmem")

quote
return quote
$SharedMemory($(esc(T)), Val($(esc(dims))), Val($(QuoteNode(id))))
end
end
Expand All @@ -214,7 +214,7 @@ macro private(T, dims)
if dims isa Integer
dims = (dims,)
end
quote
return quote
$Scratchpad($(esc(:__ctx__)), $(esc(T)), Val($(esc(dims))))
end
end
Expand All @@ -226,7 +226,7 @@ Creates a private local of `mem` per item in the workgroup. This can be safely u
across [`@synchronize`](@ref) statements.
"""
macro private(expr)
esc(expr)
return esc(expr)
end

"""
Expand All @@ -236,7 +236,7 @@ end
that span workitems, or are reused across `@synchronize` statements.
"""
macro uniform(value)
esc(value)
return esc(value)
end

"""
Expand All @@ -247,7 +247,7 @@ from each thread in the workgroup are visible in from all other threads in the
workgroup.
"""
macro synchronize()
quote
return quote
$__synchronize()
end
end
Expand All @@ -264,7 +264,7 @@ workgroup. `cond` is not allowed to have any visible sideffects.
- `CPU`: This synchronization will always occur.
"""
macro synchronize(cond)
quote
return quote
$(esc(cond)) && $__synchronize()
end
end
Expand All @@ -289,7 +289,7 @@ end
```
"""
macro context()
esc(:(__ctx__))
return esc(:(__ctx__))
end

"""
Expand Down Expand Up @@ -329,7 +329,7 @@ macro print(items...)
end
end

quote
return quote
$__print($(map(esc, args)...))
end
end
Expand Down Expand Up @@ -385,7 +385,7 @@ macro index(locale, args...)
end

index_function = Symbol(:__index_, locale, :_, indexkind)
Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
return Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
end

###
Expand Down Expand Up @@ -591,7 +591,7 @@ struct Kernel{Backend, WorkgroupSize <: _Size, NDRange <: _Size, Fun}
end

function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F}
Kernel{D, WS, ND, F}(kernel.backend, f)
return Kernel{D, WS, ND, F}(kernel.backend, f)
end

workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize
Expand Down Expand Up @@ -701,7 +701,7 @@ end
push!(args, item)
end

quote
return quote
print($(args...))
end
end
Expand Down
10 changes: 5 additions & 5 deletions src/cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing)
return nothing
end

__run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
return __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
end

const CPU_GRAINSIZE = 1024 # Vectorization, 4x unrolling, minimal grain size
Expand Down Expand Up @@ -162,15 +162,15 @@ end

@inline function __index_Global_Linear(ctx, idx::CartesianIndex)
I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx)
@inbounds LinearIndices(__ndrange(ctx))[I]
return @inbounds LinearIndices(__ndrange(ctx))[I]
end

@inline function __index_Local_Cartesian(_, idx::CartesianIndex)
return idx
end

@inline function __index_Group_Cartesian(ctx, ::CartesianIndex)
__groupindex(ctx)
return __groupindex(ctx)
end

@inline function __index_Global_Cartesian(ctx, idx::CartesianIndex)
Expand All @@ -191,7 +191,7 @@ end
# CPU implementation of shared memory
###
@inline function SharedMemory(::Type{T}, ::Val{Dims}, ::Val) where {T, Dims}
MArray{__size(Dims), T}(undef)
return MArray{__size(Dims), T}(undef)
end

###
Expand All @@ -212,7 +212,7 @@ end
# https://github.com/JuliaLang/julia/issues/39308
@inline function aview(A, I::Vararg{Any, N}) where {N}
J = Base.to_indices(A, I)
Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
return Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
end

@inline function Base.getindex(A::ScratchArray{N}, idx) where {N}
Expand Down
8 changes: 5 additions & 3 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ function find_return(stmt)
result |= @capture(expr, return x_)
expr
end
result
return result
end

# XXX: Proper errors
Expand Down Expand Up @@ -103,6 +103,7 @@ function transform_gpu!(def, constargs, force_inbounds)
Expr(:block, let_constargs...),
body,
)
return
end

# The hard case, transform the function for CPU execution
Expand Down Expand Up @@ -137,6 +138,7 @@ function transform_cpu!(def, constargs, force_inbounds)
Expr(:block, let_constargs...),
Expr(:block, new_stmts...),
)
return
end

struct WorkgroupLoop
Expand All @@ -150,7 +152,7 @@ end
is_sync(expr) = @capture(expr, @synchronize() | @synchronize(a_))

function is_scope_construct(expr::Expr)
expr.head === :block # ||
return expr.head === :block # ||
# expr.head === :let
end

Expand All @@ -160,7 +162,7 @@ function find_sync(stmt)
result |= is_sync(expr)
expr
end
result
return result
end

# TODO proper handling of LineInfo
Expand Down
Loading

0 comments on commit 973240c

Please sign in to comment.