Skip to content

Commit

Permalink
Additional backend types to support Base.Threads
Browse files Browse the repository at this point in the history
  • Loading branch information
kaipartmann committed Jul 15, 2024
1 parent 772c8cf commit 7575bdf
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo
export DictionaryCellList, FullGridCellList
export ParallelUpdate, SemiParallelUpdate, SerialUpdate
export initialize!, update!, initialize_grid!, update_grid!
export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
export PeriodicBox, copy_neighborhood_search

end # module PointNeighbors
16 changes: 13 additions & 3 deletions src/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ in this case to avoid unnecessary updates.
The first flag in `points_moving` indicates if points in `x` are moving.
The second flag indicates if points in `y` are moving.
!!! warning "Experimental feature: Backend specification"
The keyword argument `parallelization_backend` allows users to specify the
multithreading backend. This feature is currently considered experimental!
Possible parallelization backends are:
- [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic`
- [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static`
- [`PolyesterBackend`](@ref) to use `Polyester.@batch`
- [`KernelAbstractions.Backend`](@ref) to execute the update inside a GPU kernel
See also [`initialize!`](@ref).
"""
@inline function update!(search::AbstractNeighborhoodSearch, x, y;
Expand Down Expand Up @@ -130,7 +140,7 @@ Note that `system_coords` and `neighbor_coords` can be identical.
See also [`initialize!`](@ref), [`update!`](@ref).
"""
function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborhood_search;
parallel::Union{Bool, KernelAbstractions.Backend} = true,
parallel::Union{Bool, ParallelizationBackend} = true,
points = axes(system_coords, 2)) where {T}
# The type annotation above is to make Julia specialize on the type of the function.
# Otherwise, unspecialized code will cause a lot of allocations
Expand All @@ -141,7 +151,7 @@ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborho
# threaded loop with `Polyester.@batch`, or, when `system_coords` is a GPU array,
# launch the loop as a kernel on the GPU.
parallel_ = Val(parallel)
elseif parallel isa KernelAbstractions.Backend
elseif parallel isa ParallelizationBackend
# WARNING! Undocumented, experimental feature:
# When a `KernelAbstractions.Backend` is passed, launch the loop as a GPU kernel
# on this backend. This is useful to test the GPU code on the CPU by passing
Expand All @@ -165,7 +175,7 @@ end
# When a `KernelAbstractions.Backend` is passed, launch a GPU kernel on this backend
@inline function foreach_point_neighbor(f, system_coords, neighbor_coords,
neighborhood_search, points,
backend::KernelAbstractions.Backend)
backend::ParallelizationBackend)
@threaded backend for point in points
foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point)
end
Expand Down
56 changes: 54 additions & 2 deletions src/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,38 @@ end
return floor(Int, i)
end

abstract type AbstractUserBackend end

"""
PolyesterBackend()
A type to specify the parallelization backend when using [`@threaded`](@ref). If this type
is specified, the @threaded macro will use `Polyester.@batch` for the multithreaded `for`
loop.
"""
struct PolyesterBackend <: AbstractUserBackend end

"""
ThreadsDynamicBackend()
A type to specify the parallelization backend when using [`@threaded`](@ref). If this type
is specified, the @threaded macro will use `Threads.@threads :dynamic` for the multithreaded
`for` loop.
"""
struct ThreadsDynamicBackend <: AbstractUserBackend end

"""
ThreadsStaticBackend()
A type to specify the parallelization backend when using [`@threaded`](@ref). If this type
is specified, the @threaded macro will use `Threads.@threads :static` for the multithreaded
`for` loop.
"""
struct ThreadsStaticBackend <: AbstractUserBackend end

const ParallelizationBackend = Union{AbstractUserBackend, KernelAbstractions.Backend}

"""
@threaded x for ... end
Expand All @@ -30,15 +62,21 @@ Semantically the same as `Threads.@threads` when iterating over a `AbstractUnitR
but without guarantee that the underlying implementation uses `Threads.@threads`
or works for more general `for` loops.
The first argument must either be a `KernelAbstractions.Backend` or an array from which the
The first argument must either be a parallelization backend or an array from which the
backend can be derived to determine if the loop must be run threaded on the CPU
or launched as a kernel on the GPU. Passing `KernelAbstractions.CPU()` will run the GPU
kernel on the CPU.
Possible parallelization backends are:
- [`PolyesterBackend`](@ref) to use `Polyester.@batch`
- [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic`
- [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static`
- [`KernelAbstractions.Backend`](@ref) to execute the loop inside a GPU kernel
In particular, the underlying threading capabilities might be provided by other packages
such as [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl).
!!! warn
!!! warning "Not suited for general loops"
This macro does not necessarily work for general `for` loops. For example,
it does not necessarily support general iterables such as `eachline(filename)`.
"""
Expand Down Expand Up @@ -67,6 +105,20 @@ end
end
end

# Use `Threads.@threads :dynamic`
@inline function parallel_foreach(f, iterator, x::ThreadsDynamicBackend)
Threads.@threads :dynamic for i in iterator
@inline f(i)
end
end

# Use `Threads.@threads :static`
@inline function parallel_foreach(f, iterator, x::ThreadsStaticBackend)
Threads.@threads :static for i in iterator
@inline f(i)
end
end

# On GPUs, execute `f` inside a GPU kernel with KernelAbstractions.jl
@inline function parallel_foreach(f, iterator,
x::Union{AbstractGPUArray, KernelAbstractions.Backend})
Expand Down

0 comments on commit 7575bdf

Please sign in to comment.