From 609a8feab39a8dd8d6649f548314b00b8c612945 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 3 Sep 2024 16:38:00 +0200 Subject: [PATCH 01/23] add macro to create custom Ops also on aarch64 --- src/operators.jl | 22 ++++++++++++++++++++++ test/test_reduce.jl | 9 +++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 5ded20f22..35ef4ce13 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -107,3 +107,25 @@ function Op(f, T=Any; iscommutative=false) finalizer(free, op) return op end + +macro Op(f, T) + name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) + name_fptr = gensym(Symbol(f, :_, T, :_ptr)) + name_module = gensym(Symbol(f, :_, T, :_module)) + esc(quote + module $(name_module) + $(name_wrapper) = $OpWrapper{typeof($f),$T}($f) + $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) + function __init__() + global $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) + end + function $Op(::typeof($f), ::Type{T}; iscommutative=true) + op = $Op($OP_NULL.val, $(name_fptr)) + # int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op) + $API.MPI_Op_create($(name_fptr), iscommutative, op) + + finalizer($free, op) + end + end + end) +end diff --git a/test/test_reduce.jl b/test/test_reduce.jl index dbf0c6e79..a478c5c77 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -59,10 +59,15 @@ if isroot @test sum_mesg == sz .* mesg end +function my_reduce(x, y) + 2x+y-x +end +@Op(my_reduce, Int) + if can_do_closures - operators = [MPI.SUM, +, (x,y) -> 2x+y-x] + operators = [MPI.SUM, +, my_reduce, (x,y) -> 2x+y-x] else - operators = [MPI.SUM, +] + operators = [MPI.SUM, +, my_reduce] end for T = [Int] From fb2f54b47898bed141a0119584c146ea6b8c6d3c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 3 Sep 2024 16:40:55 +0200 Subject: [PATCH 02/23] fixup! add macro to create custom Ops also on aarch64 --- test/test_reduce.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_reduce.jl b/test/test_reduce.jl index a478c5c77..6ea6cd967 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -62,7 +62,7 @@ end function my_reduce(x, y) 2x+y-x end -@Op(my_reduce, Int) +MPI.@Op(my_reduce, Int) if can_do_closures operators = [MPI.SUM, +, my_reduce, (x,y) -> 2x+y-x] From 50c46ac37d59bd372eb08a83ba164f286cf7bd7e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 3 Sep 2024 16:43:56 +0200 Subject: [PATCH 03/23] fixup! fixup! add macro to create custom Ops also on aarch64 --- test/test_reduce.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/test_reduce.jl b/test/test_reduce.jl index 6ea6cd967..7e39ac17b 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -59,10 +59,12 @@ if isroot @test sum_mesg == sz .* mesg end -function my_reduce(x, y) - 2x+y-x +@eval begin + function my_reduce(x, y) + 2x+y-x + end + MPI.@Op(my_reduce, Int) end -MPI.@Op(my_reduce, Int) if can_do_closures operators = [MPI.SUM, +, my_reduce, (x,y) -> 2x+y-x] From 20d1bc9ff007d97305de786ea750f9bf5fa49633 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 3 Sep 2024 16:56:17 +0200 Subject: [PATCH 04/23] fixup! fixup! fixup! add macro to create custom Ops also on aarch64 --- src/operators.jl | 18 ++++++++++++++---- test/test_reduce.jl | 8 +++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 35ef4ce13..16e2a4737 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -108,24 +108,34 @@ function Op(f, T=Any; iscommutative=false) return op end +""" + @Op(f, T) + +Define a custom operator [`Op`](@ref) using the function `f`. + +""" macro Op(f, T) name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) name_fptr = gensym(Symbol(f, :_, T, :_ptr)) name_module = gensym(Symbol(f, :_, T, :_module)) - esc(quote + expr = quote module $(name_module) + import ..$f, ..$T $(name_wrapper) = $OpWrapper{typeof($f),$T}($f) $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) function __init__() global $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) end - function $Op(::typeof($f), ::Type{T}; iscommutative=true) - op = $Op($OP_NULL.val, $(name_fptr)) + import MPI: Op + function Op(::typeof($f), ::Type{$T}; iscommutative=true) + op = Op($OP_NULL.val, $(name_fptr)) # int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op) $API.MPI_Op_create($(name_fptr), iscommutative, op) finalizer($free, op) end end - end) + end + expr.head = :toplevel + esc(expr) end diff --git a/test/test_reduce.jl b/test/test_reduce.jl index 7e39ac17b..6ea6cd967 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -59,12 +59,10 @@ if isroot @test sum_mesg == sz .* mesg end -@eval begin - function my_reduce(x, y) - 2x+y-x - end - MPI.@Op(my_reduce, Int) +function my_reduce(x, y) + 2x+y-x end +MPI.@Op(my_reduce, Int) if can_do_closures operators = [MPI.SUM, +, my_reduce, (x,y) -> 2x+y-x] From 824a006af322953a1dbe89e7a3bb6e9dd35825e9 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 4 Sep 2024 15:14:04 +0200 Subject: [PATCH 05/23] fix Julia 1.6 --- src/operators.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operators.jl b/src/operators.jl index 16e2a4737..273c3fa05 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -120,7 +120,8 @@ macro Op(f, T) name_module = gensym(Symbol(f, :_, T, :_module)) expr = quote module $(name_module) - import ..$f, ..$T + # import ..$f, ..$T + $(Expr(:import, Expr(:., :., :., f), Expr(:., :., :., T))) # Julia 1.6 strugles with import ..$f, ..$T $(name_wrapper) = $OpWrapper{typeof($f),$T}($f) $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) function __init__() From 03ae586629e304c3b08151b94f5791fb9b33ed35 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 4 Sep 2024 15:20:43 +0200 Subject: [PATCH 06/23] Expand docs --- docs/src/reference/advanced.md | 1 + src/operators.jl | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/src/reference/advanced.md b/docs/src/reference/advanced.md index 6440fd5ca..53f50c84e 100644 --- a/docs/src/reference/advanced.md +++ b/docs/src/reference/advanced.md @@ -26,6 +26,7 @@ MPI.Types.duplicate ```@docs MPI.Op +MPI.@Op ``` ## Info objects diff --git a/src/operators.jl b/src/operators.jl index 273c3fa05..9b1e57f37 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -112,7 +112,24 @@ end @Op(f, T) Define a custom operator [`Op`](@ref) using the function `f`. - +On platfroms like AArch53, Julia does not support runtime closures, +being passed to C. The generic version of [`Op`](@ref) uses that +to support arbitrary function being passed as MPI reduction operators. +In contrast `@Op` can be used to statically declare a function to +be passed as an MPI operator. + +```julia +function my_reduce(x, y) + 2x+y-x +end +MPI.@Op(my_reduce, Int) +# ... + MPI.Reduce!(send_arr, recv_arr, my_reduce, MPI.COMM_WORLD; root=root) +#... + +!!! warning + Note that `@Op` works be introducing a new method to `Op`, potentially invalidating other users of `Op`. +``` """ macro Op(f, T) name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) From 6b9cae2597e3deedf8440005d78eb72fc1db51b7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 4 Sep 2024 15:24:10 +0200 Subject: [PATCH 07/23] use constant globals --- src/operators.jl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 9b1e57f37..4d263b758 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -135,20 +135,23 @@ macro Op(f, T) name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) name_fptr = gensym(Symbol(f, :_, T, :_ptr)) name_module = gensym(Symbol(f, :_, T, :_module)) + # The gist is that we can use a method very similar to how we handle `min`/`max` + # but since this might be used from user code we can't use add_load_time_hook! + # this is why we introduce a new module that has a `__init__` function. expr = quote module $(name_module) # import ..$f, ..$T $(Expr(:import, Expr(:., :., :., f), Expr(:., :., :., T))) # Julia 1.6 strugles with import ..$f, ..$T - $(name_wrapper) = $OpWrapper{typeof($f),$T}($f) - $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) + const $(name_wrapper) = $OpWrapper{typeof($f),$T}($f) + const $(name_fptr) = Ref(@cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype}))) function __init__() - global $(name_fptr) = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) + $(name_fptr)[] = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) end import MPI: Op function Op(::typeof($f), ::Type{$T}; iscommutative=true) - op = Op($OP_NULL.val, $(name_fptr)) + op = Op($OP_NULL.val, $(name_fptr)[]) # int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op) - $API.MPI_Op_create($(name_fptr), iscommutative, op) + $API.MPI_Op_create($(name_fptr)[], iscommutative, op) finalizer($free, op) end From 4e066100ae9e26ff22fa92d09e8f260c5e7f084a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 5 Sep 2024 09:04:34 +0200 Subject: [PATCH 08/23] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mosè Giordano <765740+giordano@users.noreply.github.com> --- src/operators.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 4d263b758..63cfe178d 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -112,7 +112,7 @@ end @Op(f, T) Define a custom operator [`Op`](@ref) using the function `f`. -On platfroms like AArch53, Julia does not support runtime closures, +On platfroms like AArch64, Julia does not support runtime closures, being passed to C. The generic version of [`Op`](@ref) uses that to support arbitrary function being passed as MPI reduction operators. In contrast `@Op` can be used to statically declare a function to @@ -124,12 +124,11 @@ function my_reduce(x, y) end MPI.@Op(my_reduce, Int) # ... - MPI.Reduce!(send_arr, recv_arr, my_reduce, MPI.COMM_WORLD; root=root) +MPI.Reduce!(send_arr, recv_arr, my_reduce, MPI.COMM_WORLD; root=root) #... - +``` !!! warning Note that `@Op` works be introducing a new method to `Op`, potentially invalidating other users of `Op`. -``` """ macro Op(f, T) name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) From 2b1dbdbdef8e9a5cd8df3be7469c07c55c358fbc Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 5 Sep 2024 16:31:39 +0200 Subject: [PATCH 09/23] reflect on MPI.Datatype --- src/operators.jl | 15 +++++++++++++++ test/test_reduce.jl | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/operators.jl b/src/operators.jl index 63cfe178d..ddbb20c11 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -91,6 +91,21 @@ function (w::OpWrapper{F,T})(_a::Ptr{Cvoid}, _b::Ptr{Cvoid}, _len::Ptr{Cint}, t: end +function (w::OpWrapper{F,Any})(_a::Ptr{Cvoid}, _b::Ptr{Cvoid}, _len::Ptr{Cint}, t::Ptr{MPI_Datatype}) where {F} + len = unsafe_load(_len) + T = to_type(Datatype(unsafe_load(t))) # Ptr might actually point to a Julia object so we could unsafe_pointer_to_objref? + @assert isconcretetype(T) + function copy(::Type{T}) where T + a = Ptr{T}(_a) + b = Ptr{T}(_b) + for i = 1:len + unsafe_store!(b, w.f(unsafe_load(a,i), unsafe_load(b,i)), i) + end + end + copy(T) + return nothing +end + function Op(f, T=Any; iscommutative=false) @static if MPI_LIBRARY == "MicrosoftMPI" && Sys.WORD_SIZE == 32 error("User-defined reduction operators are not supported on 32-bit Windows.\nSee https://github.com/JuliaParallel/MPI.jl/issues/246 for more details.") diff --git a/test/test_reduce.jl b/test/test_reduce.jl index 6ea6cd967..1e72f58a0 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -62,7 +62,8 @@ end function my_reduce(x, y) 2x+y-x end -MPI.@Op(my_reduce, Int) +# MPI.@Op(my_reduce, Int) +MPI.@Op(my_reduce, Any) if can_do_closures operators = [MPI.SUM, +, my_reduce, (x,y) -> 2x+y-x] From cb8ee2d9f8e38265bf2e242cdf3d414997c40db5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 6 Sep 2024 08:21:54 +0200 Subject: [PATCH 10/23] fix dispatch rule for abstract types --- src/operators.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operators.jl b/src/operators.jl index ddbb20c11..3607edae5 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -162,7 +162,7 @@ macro Op(f, T) $(name_fptr)[] = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) end import MPI: Op - function Op(::typeof($f), ::Type{$T}; iscommutative=true) + function Op(::typeof($f), ::Type{<:$T}; iscommutative=true) op = Op($OP_NULL.val, $(name_fptr)[]) # int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op) $API.MPI_Op_create($(name_fptr)[], iscommutative, op) From 1dc17402e518a19396c63673f16eb4179cdacedc Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 6 Sep 2024 10:23:05 +0200 Subject: [PATCH 11/23] Add fallbacks for existing ops --- src/operators.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/operators.jl b/src/operators.jl index 3607edae5..2022b85c7 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -174,3 +174,11 @@ macro Op(f, T) expr.head = :toplevel esc(expr) end + +@Op(min, Any) +@Op(max, Any) +@Op(+, Any) +@Op(*, Any) +@Op(&, Any) +@Op(|, Any) +@Op(⊻, Any) From a70429a92538c1e6dd6d79da38a126525489a10b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 6 Sep 2024 10:26:48 +0200 Subject: [PATCH 12/23] don't duplicate code unecessarily --- src/operators.jl | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 2022b85c7..edb9bfa97 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -81,21 +81,11 @@ end function (w::OpWrapper{F,T})(_a::Ptr{Cvoid}, _b::Ptr{Cvoid}, _len::Ptr{Cint}, t::Ptr{MPI_Datatype}) where {F,T} len = unsafe_load(_len) - @assert isconcretetype(T) - a = Ptr{T}(_a) - b = Ptr{T}(_b) - for i = 1:len - unsafe_store!(b, w.f(unsafe_load(a,i), unsafe_load(b,i)), i) + if !isconcretetype(T) + T = to_type(Datatype(unsafe_load(t))) # Ptr might actually point to a Julia object so we could unsafe_pointer_to_objref? end - return nothing -end - - -function (w::OpWrapper{F,Any})(_a::Ptr{Cvoid}, _b::Ptr{Cvoid}, _len::Ptr{Cint}, t::Ptr{MPI_Datatype}) where {F} - len = unsafe_load(_len) - T = to_type(Datatype(unsafe_load(t))) # Ptr might actually point to a Julia object so we could unsafe_pointer_to_objref? - @assert isconcretetype(T) function copy(::Type{T}) where T + @assert isconcretetype(T) a = Ptr{T}(_a) b = Ptr{T}(_b) for i = 1:len From 30e6dab7616e2fe65169fe5b6499694064059a99 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 13 Sep 2024 12:24:33 +0200 Subject: [PATCH 13/23] rename --- docs/src/reference/advanced.md | 2 +- src/operators.jl | 32 +++++++++++++++++++------------- test/Project.toml | 10 ++++++---- test/test_reduce.jl | 3 +-- 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/docs/src/reference/advanced.md b/docs/src/reference/advanced.md index 53f50c84e..7d4ed0ab6 100644 --- a/docs/src/reference/advanced.md +++ b/docs/src/reference/advanced.md @@ -26,7 +26,7 @@ MPI.Types.duplicate ```@docs MPI.Op -MPI.@Op +MPI.@RegisterOp ``` ## Info objects diff --git a/src/operators.jl b/src/operators.jl index edb9bfa97..3792f4b99 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -114,34 +114,39 @@ function Op(f, T=Any; iscommutative=false) end """ - @Op(f, T) + @RegisterOp(f, T, internal=false) -Define a custom operator [`Op`](@ref) using the function `f`. +Register a custom operator [`Op`](@ref) using the function `f` statically. On platfroms like AArch64, Julia does not support runtime closures, being passed to C. The generic version of [`Op`](@ref) uses that to support arbitrary function being passed as MPI reduction operators. -In contrast `@Op` can be used to statically declare a function to +In contrast `@RegisterOp` can be used to statically declare a function to be passed as an MPI operator. ```julia function my_reduce(x, y) 2x+y-x end -MPI.@Op(my_reduce, Int) +MPI.@RegisterOp(my_reduce, Int) # ... MPI.Reduce!(send_arr, recv_arr, my_reduce, MPI.COMM_WORLD; root=root) #... ``` !!! warning - Note that `@Op` works be introducing a new method to `Op`, potentially invalidating other users of `Op`. + Note that `@RegisterOp` works be introducing a new method to `Op`, potentially invalidating other users of `Op`. + +!!! note + `T` can be `Any`, but that will lead to a runtime dispatch. """ -macro Op(f, T) +macro RegisterOp(f, T) name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) name_fptr = gensym(Symbol(f, :_, T, :_ptr)) name_module = gensym(Symbol(f, :_, T, :_module)) # The gist is that we can use a method very similar to how we handle `min`/`max` # but since this might be used from user code we can't use add_load_time_hook! # this is why we introduce a new module that has a `__init__` function. + # If this module approach is too costly for loading MPI.jl for internal use we could use + # `add_load_time_hook` expr = quote module $(name_module) # import ..$f, ..$T @@ -152,6 +157,7 @@ macro Op(f, T) $(name_fptr)[] = @cfunction($(name_wrapper), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{$MPI_Datatype})) end import MPI: Op + # we can't create a const Op since MPI needs to be initialized? function Op(::typeof($f), ::Type{<:$T}; iscommutative=true) op = Op($OP_NULL.val, $(name_fptr)[]) # int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op) @@ -165,10 +171,10 @@ macro Op(f, T) esc(expr) end -@Op(min, Any) -@Op(max, Any) -@Op(+, Any) -@Op(*, Any) -@Op(&, Any) -@Op(|, Any) -@Op(⊻, Any) +@RegisterOp(min, Any) +@RegisterOp(max, Any) +@RegisterOp(+, Any) +@RegisterOp(*, Any) +@RegisterOp(&, Any) +@RegisterOp(|, Any) +@RegisterOp(⊻, Any) diff --git a/test/Project.toml b/test/Project.toml index f83c1fb10..671d7c828 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -3,20 +3,22 @@ DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +[weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + [compat] AMDGPU = "0.6, 0.7, 0.8, 0.9, 1" CUDA = "3, 4, 5" DoubleFloats = "1.4" MPIPreferences = "0.1" +StaticArrays = "1" TOML = "< 0.0.1, 1.0" [extras] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - -[weakdeps] -AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" diff --git a/test/test_reduce.jl b/test/test_reduce.jl index 1e72f58a0..3a5191629 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -62,8 +62,7 @@ end function my_reduce(x, y) 2x+y-x end -# MPI.@Op(my_reduce, Int) -MPI.@Op(my_reduce, Any) +MPI.@RegisterOp(my_reduce, Any) if can_do_closures operators = [MPI.SUM, +, my_reduce, (x,y) -> 2x+y-x] From aea093f529140cc9d660f48fcec2e1a24f89ffeb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 13 Sep 2024 14:17:12 +0200 Subject: [PATCH 14/23] add reference from Op --- src/operators.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operators.jl b/src/operators.jl index 3792f4b99..4aca305f5 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -16,6 +16,7 @@ associative, and if `iscommutative` is true, assumed to be commutative as well. - [`Allreduce!`](@ref)/[`Allreduce`](@ref) - [`Scan!`](@ref)/[`Scan`](@ref) - [`Exscan!`](@ref)/[`Exscan`](@ref) +- [`@RegisterOp`](@ref) """ mutable struct Op val::MPI_Op From 30c5ffd7032098d03b3838354d5a34b7ac4253fb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 13 Sep 2024 14:18:22 +0200 Subject: [PATCH 15/23] fix default --- src/operators.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operators.jl b/src/operators.jl index 4aca305f5..7fdaa7779 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -159,7 +159,7 @@ macro RegisterOp(f, T) end import MPI: Op # we can't create a const Op since MPI needs to be initialized? - function Op(::typeof($f), ::Type{<:$T}; iscommutative=true) + function Op(::typeof($f), ::Type{<:$T}; iscommutative=false) op = Op($OP_NULL.val, $(name_fptr)[]) # int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op) $API.MPI_Op_create($(name_fptr)[], iscommutative, op) From 80a7df6244613faef13980dd2ff46a9f0c7dcf29 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 09:51:54 +0200 Subject: [PATCH 16/23] small fixes --- src/operators.jl | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 7fdaa7779..1a99b0902 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -83,7 +83,9 @@ end function (w::OpWrapper{F,T})(_a::Ptr{Cvoid}, _b::Ptr{Cvoid}, _len::Ptr{Cint}, t::Ptr{MPI_Datatype}) where {F,T} len = unsafe_load(_len) if !isconcretetype(T) - T = to_type(Datatype(unsafe_load(t))) # Ptr might actually point to a Julia object so we could unsafe_pointer_to_objref? + concrete_T = to_type(Datatype(unsafe_load(t))) # Ptr might actually point to a Julia object so we could unsafe_pointer_to_objref? + else + concrete_T = T end function copy(::Type{T}) where T @assert isconcretetype(T) @@ -93,7 +95,7 @@ function (w::OpWrapper{F,T})(_a::Ptr{Cvoid}, _b::Ptr{Cvoid}, _len::Ptr{Cint}, t: unsafe_store!(b, w.f(unsafe_load(a,i), unsafe_load(b,i)), i) end end - copy(T) + copy(concrete_T) return nothing end @@ -115,14 +117,14 @@ function Op(f, T=Any; iscommutative=false) end """ - @RegisterOp(f, T, internal=false) + @RegisterOp(f, T) Register a custom operator [`Op`](@ref) using the function `f` statically. On platfroms like AArch64, Julia does not support runtime closures, -being passed to C. The generic version of [`Op`](@ref) uses that -to support arbitrary function being passed as MPI reduction operators. -In contrast `@RegisterOp` can be used to statically declare a function to -be passed as an MPI operator. +being passed to C. The generic version of [`Op`](@ref) uses runtime closures +to support arbitrary functions being passed as MPI reduction operators. +`@RegisterOp` statically adds a function to the set of functions allowed as +as an MPI operator. ```julia function my_reduce(x, y) @@ -134,10 +136,11 @@ MPI.Reduce!(send_arr, recv_arr, my_reduce, MPI.COMM_WORLD; root=root) #... ``` !!! warning - Note that `@RegisterOp` works be introducing a new method to `Op`, potentially invalidating other users of `Op`. + Note that `@RegisterOp` works be introducing a new method of the generic function `Op`. + It can only be used as a top-level statement and may trigger method invalidations. !!! note - `T` can be `Any`, but that will lead to a runtime dispatch. + `T` can be `Any`, but this will lead to a runtime dispatch. """ macro RegisterOp(f, T) name_wrapper = gensym(Symbol(f, :_, T, :_wrapper)) From 52707164ccc874513fee968b9c59473ece75bdd6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 10:04:59 +0200 Subject: [PATCH 17/23] enable additional test --- test/test_reduce.jl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/test/test_reduce.jl b/test/test_reduce.jl index 3a5191629..6ad5d24cf 100644 --- a/test/test_reduce.jl +++ b/test/test_reduce.jl @@ -122,19 +122,17 @@ end MPI.Barrier( MPI.COMM_WORLD ) -if can_do_closures - send_arr = [Double64(i)/10 for i = 1:10] - - result = MPI.Reduce(send_arr, +, MPI.COMM_WORLD; root=root) - if rank == root - @test result ≈ [Double64(sz*i)/10 for i = 1:10] rtol=sz*eps(Double64) - else - @test result === nothing - end +send_arr = [Double64(i)/10 for i = 1:10] - MPI.Barrier( MPI.COMM_WORLD ) +result = MPI.Reduce(send_arr, +, MPI.COMM_WORLD; root=root) +if rank == root + @test result ≈ [Double64(sz*i)/10 for i = 1:10] rtol=sz*eps(Double64) +else + @test result === nothing end +MPI.Barrier( MPI.COMM_WORLD ) + GC.gc() MPI.Finalize() @test MPI.Finalized() From 88ba2113d1be38ced29f6ff4cec0c30a86a59153 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 10:07:45 +0200 Subject: [PATCH 18/23] add hint to error --- src/operators.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 1a99b0902..9f3fed798 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -101,9 +101,17 @@ end function Op(f, T=Any; iscommutative=false) @static if MPI_LIBRARY == "MicrosoftMPI" && Sys.WORD_SIZE == 32 - error("User-defined reduction operators are not supported on 32-bit Windows.\nSee https://github.com/JuliaParallel/MPI.jl/issues/246 for more details.") + error(""" + User-defined reduction operators are not supported on 32-bit Windows. + See https://github.com/JuliaParallel/MPI.jl/issues/246 for more details. + """) elseif Sys.ARCH ∈ (:aarch64, :ppc64le, :powerpc64le) || startswith(lowercase(String(Sys.ARCH)), "arm") - error("User-defined reduction operators are currently not supported on non-Intel architectures.\nSee https://github.com/JuliaParallel/MPI.jl/issues/404 for more details.") + error(""" + User-defined reduction operators are currently not supported on non-Intel architectures. + See https://github.com/JuliaParallel/MPI.jl/issues/404 for more details. + + You may want to use `@RegisterOp` to statically register `f`. + """) end w = OpWrapper{typeof(f),T}(f) fptr = @cfunction($w, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cint}, Ptr{MPI_Datatype})) From 79ade6c369c2db13ea93287fcceb9bfed87e7ebb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 10:08:37 +0200 Subject: [PATCH 19/23] remove unsused test dependency --- test/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 671d7c828..aba0423ff 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -3,7 +3,6 @@ DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 74edbd5dba72b619ab28bb7fa0006a1c67efee46 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 10:10:00 +0200 Subject: [PATCH 20/23] update example --- docs/examples/03-reduce.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/examples/03-reduce.jl b/docs/examples/03-reduce.jl index 86d22be11..cfd3f9c99 100644 --- a/docs/examples/03-reduce.jl +++ b/docs/examples/03-reduce.jl @@ -31,10 +31,15 @@ function pool(S1::SummaryStat, S2::SummaryStat) SummaryStat(m,v,n) end +# Register the custom reduction operator. This is necessary only on platforms +# where Julia doesn't support closures as cfunctions (e.g. ARM), but can be used +# on all platforms for consistency. +MPI.@RegisterOp(pool, SummaryStat) + X = randn(10,3) .* [1,3,7]' # Perform a scalar reduction -summ = MPI.Reduce(SummaryStat(X), pool, root, comm) +summ = MPI.Reduce(SummaryStat(X), pool, comm; root) if MPI.Comm_rank(comm) == root @show summ.var @@ -42,7 +47,7 @@ end # Perform a vector reduction: # the reduction operator is applied elementwise -col_summ = MPI.Reduce(mapslices(SummaryStat,X,dims=1), pool, root, comm) +col_summ = MPI.Reduce(mapslices(SummaryStat,X,dims=1), pool, comm; root) if MPI.Comm_rank(comm) == root col_var = map(summ -> summ.var, col_summ) From 61358f42d64d169f0f92792ff9f5592b0fe9284e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 10:25:19 +0200 Subject: [PATCH 21/23] fixup! remove unsused test dependency --- test/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index aba0423ff..52ccb4376 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -15,7 +15,6 @@ AMDGPU = "0.6, 0.7, 0.8, 0.9, 1" CUDA = "3, 4, 5" DoubleFloats = "1.4" MPIPreferences = "0.1" -StaticArrays = "1" TOML = "< 0.0.1, 1.0" [extras] From 865eba3dde3103df5313ef0081831bc616bdd5a4 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 13:25:03 +0200 Subject: [PATCH 22/23] add note to knownissues --- docs/src/knownissues.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/knownissues.md b/docs/src/knownissues.md index 3cd84d067..8da854205 100644 --- a/docs/src/knownissues.md +++ b/docs/src/knownissues.md @@ -210,3 +210,5 @@ However they have two limitations: * [Julia's C-compatible function pointers](https://docs.julialang.org/en/v1/manual/calling-c-and-fortran-code/index.html#Creating-C-Compatible-Julia-Function-Pointers-1) cannot be used where the `stdcall` calling convention is expected, which is the case for 32-bit Microsoft MPI, * closure cfunctions in Julia are based on LLVM trampolines, which are not supported on ARM architecture. + +As an alternative [`@ReduceOp`](@ref) may be used to statically register reduction operations. From 2e179940fb2e2c793e71e15faac4aa22315f26c8 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 24 Sep 2024 13:37:06 +0200 Subject: [PATCH 23/23] fixup! add note to knownissues --- docs/src/knownissues.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/knownissues.md b/docs/src/knownissues.md index 8da854205..179ccd6ed 100644 --- a/docs/src/knownissues.md +++ b/docs/src/knownissues.md @@ -211,4 +211,4 @@ However they have two limitations: * [Julia's C-compatible function pointers](https://docs.julialang.org/en/v1/manual/calling-c-and-fortran-code/index.html#Creating-C-Compatible-Julia-Function-Pointers-1) cannot be used where the `stdcall` calling convention is expected, which is the case for 32-bit Microsoft MPI, * closure cfunctions in Julia are based on LLVM trampolines, which are not supported on ARM architecture. -As an alternative [`@ReduceOp`](@ref) may be used to statically register reduction operations. +As an alternative [`MPI.@RegisterOp`](@ref) may be used to statically register reduction operations.