diff --git a/src/PointNeighbors.jl b/src/PointNeighbors.jl index b7a8203c..ec32a650 100644 --- a/src/PointNeighbors.jl +++ b/src/PointNeighbors.jl @@ -24,6 +24,7 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo export DictionaryCellList, FullGridCellList export ParallelUpdate, SemiParallelUpdate, SerialUpdate export initialize!, update!, initialize_grid!, update_grid! +export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend export PeriodicBox, copy_neighborhood_search end # module PointNeighbors diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index dacbc103..ea43406d 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -36,6 +36,16 @@ in this case to avoid unnecessary updates. The first flag in `points_moving` indicates if points in `x` are moving. The second flag indicates if points in `y` are moving. +!!! warning "Experimental feature: Backend specification" + The keyword argument `parallelization_backend` allows users to specify the + multithreading backend. This feature is currently considered experimental! + + Possible parallelization backends are: + - [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic` + - [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static` + - [`PolyesterBackend`](@ref) to use `Polyester.@batch` + - [`KernelAbstractions.Backend`](@ref) to execute the update inside a GPU kernel + See also [`initialize!`](@ref). """ @inline function update!(search::AbstractNeighborhoodSearch, x, y; @@ -130,7 +140,7 @@ Note that `system_coords` and `neighbor_coords` can be identical. See also [`initialize!`](@ref), [`update!`](@ref). """ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborhood_search; - parallel::Union{Bool, KernelAbstractions.Backend} = true, + parallel::Union{Bool, ParallelizationBackend} = true, points = axes(system_coords, 2)) where {T} # The type annotation above is to make Julia specialize on the type of the function. # Otherwise, unspecialized code will cause a lot of allocations @@ -141,7 +151,7 @@ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborho # threaded loop with `Polyester.@batch`, or, when `system_coords` is a GPU array, # launch the loop as a kernel on the GPU. parallel_ = Val(parallel) - elseif parallel isa KernelAbstractions.Backend + elseif parallel isa ParallelizationBackend # WARNING! Undocumented, experimental feature: # When a `KernelAbstractions.Backend` is passed, launch the loop as a GPU kernel # on this backend. This is useful to test the GPU code on the CPU by passing @@ -165,7 +175,7 @@ end # When a `KernelAbstractions.Backend` is passed, launch a GPU kernel on this backend @inline function foreach_point_neighbor(f, system_coords, neighbor_coords, neighborhood_search, points, - backend::KernelAbstractions.Backend) + backend::ParallelizationBackend) @threaded backend for point in points foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search, point) end diff --git a/src/util.jl b/src/util.jl index d979ac25..8af9de67 100644 --- a/src/util.jl +++ b/src/util.jl @@ -22,6 +22,38 @@ end return floor(Int, i) end +abstract type AbstractUserBackend end + +""" + PolyesterBackend() + +A type to specify the parallelization backend when using [`@threaded`](@ref). If this type +is specified, the @threaded macro will use `Polyester.@batch` for the multithreaded `for` +loop. +""" +struct PolyesterBackend <: AbstractUserBackend end + +""" + ThreadsDynamicBackend() + +A type to specify the parallelization backend when using [`@threaded`](@ref). If this type +is specified, the @threaded macro will use `Threads.@threads :dynamic` for the multithreaded +`for` loop. +""" +struct ThreadsDynamicBackend <: AbstractUserBackend end + +""" + ThreadsStaticBackend() + + +A type to specify the parallelization backend when using [`@threaded`](@ref). If this type +is specified, the @threaded macro will use `Threads.@threads :static` for the multithreaded +`for` loop. +""" +struct ThreadsStaticBackend <: AbstractUserBackend end + +const ParallelizationBackend = Union{AbstractUserBackend, KernelAbstractions.Backend} + """ @threaded x for ... end @@ -30,15 +62,21 @@ Semantically the same as `Threads.@threads` when iterating over a `AbstractUnitR but without guarantee that the underlying implementation uses `Threads.@threads` or works for more general `for` loops. -The first argument must either be a `KernelAbstractions.Backend` or an array from which the +The first argument must either be a parallelization backend or an array from which the backend can be derived to determine if the loop must be run threaded on the CPU or launched as a kernel on the GPU. Passing `KernelAbstractions.CPU()` will run the GPU kernel on the CPU. +Possible parallelization backends are: +- [`PolyesterBackend`](@ref) to use `Polyester.@batch` +- [`ThreadsDynamicBackend`](@ref) to use `Threads.@threads :dynamic` +- [`ThreadsStaticBackend`](@ref) to use `Threads.@threads :static` +- [`KernelAbstractions.Backend`](@ref) to execute the loop inside a GPU kernel + In particular, the underlying threading capabilities might be provided by other packages such as [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl). -!!! warn +!!! warning "Not suited for general loops" This macro does not necessarily work for general `for` loops. For example, it does not necessarily support general iterables such as `eachline(filename)`. """ @@ -67,6 +105,20 @@ end end end +# Use `Threads.@threads :dynamic` +@inline function parallel_foreach(f, iterator, x::ThreadsDynamicBackend) + Threads.@threads :dynamic for i in iterator + @inline f(i) + end +end + +# Use `Threads.@threads :static` +@inline function parallel_foreach(f, iterator, x::ThreadsStaticBackend) + Threads.@threads :static for i in iterator + @inline f(i) + end +end + # On GPUs, execute `f` inside a GPU kernel with KernelAbstractions.jl @inline function parallel_foreach(f, iterator, x::Union{AbstractGPUArray, KernelAbstractions.Backend})