From f002a5078cbaeb4b35692350b9c51fc9be330c55 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Wed, 22 Jun 2022 19:32:17 -0700 Subject: [PATCH 01/16] apply operators byslab --- src/DataLayouts/DataLayouts.jl | 92 +++- src/DataLayouts/struct.jl | 65 ++- src/Fields/Fields.jl | 2 +- src/Fields/indices.jl | 51 ++ src/Operators/spectralelement.jl | 778 +++++++++++++++---------------- test/DataLayouts/data2d.jl | 6 +- 6 files changed, 563 insertions(+), 431 deletions(-) diff --git a/src/DataLayouts/DataLayouts.jl b/src/DataLayouts/DataLayouts.jl index 58e2f2ec84..dbec7a5a16 100644 --- a/src/DataLayouts/DataLayouts.jl +++ b/src/DataLayouts/DataLayouts.jl @@ -296,6 +296,19 @@ function replace_basetype(data::IJFH{S, Nij}, ::Type{T}) where {S, Nij, T} return IJFH{S′, Nij}(similar(array, T)) end +@inline function Base.getindex(data::IJFH{S}, i, j, _, _, h) where {S} + @inbounds get_struct(parent(data), S, Val(3), CartesianIndex(i, j, 1, h)) +end +@inline function Base.setindex!(data::IJFH{S}, val, i, j, _, _, h) where {S} + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(3), + CartesianIndex(i, j, 1, h), + ) +end + + Base.length(data::IJFH) = size(parent(data), 4) @generated function _property_view( @@ -455,6 +468,18 @@ end IFH{SS, Ni}(dataview) end +@inline function Base.getindex(data::IFH{S}, i, _, _, _, h) where {S} + @inbounds get_struct(parent(data), S, Val(2), CartesianIndex(i, 1, h)) +end +@inline function Base.setindex!(data::IFH{S}, val, i, _, _, _, h) where {S} + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(3), + CartesianIndex(i, 1, h), + ) +end + # ====================== # Data0D DataLayout # ====================== @@ -523,7 +548,7 @@ end end Base.@propagate_inbounds function Base.getindex(data::DataF{S}) where {S} - @inbounds get_struct(parent(data), S) + @inbounds get_struct(parent(data), S, Val(1), CartesianIndex(1)) end @propagate_inbounds function Base.getindex(col::Data0D, I::CartesianIndex{5}) @@ -531,7 +556,12 @@ end end Base.@propagate_inbounds function Base.setindex!(data::DataF{S}, val) where {S} - @inbounds set_struct!(parent(data), convert(S, val)) + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(1), + CartesianIndex(1), + ) end @propagate_inbounds function Base.setindex!( @@ -646,8 +676,7 @@ end ) where {S, Nij} @boundscheck (1 <= i <= Nij && 1 <= j <= Nij) || throw(BoundsError(data, (i, j))) - dataview = @inbounds view(parent(data), i, j, :) - @inbounds get_struct(dataview, S) + @inbounds get_struct(parent(data), S, Val(3), CartesianIndex(i, j, 1)) end @inline function Base.setindex!( @@ -658,8 +687,12 @@ end ) where {S, Nij} @boundscheck (1 <= i <= Nij && 1 <= j <= Nij) || throw(BoundsError(data, (i, j))) - dataview = @inbounds view(parent(data), i, j, :) - set_struct!(dataview, convert(S, val)) + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(3), + CartesianIndex(i, j, 1), + ) end @inline function column(data::IJF{S, Nij}, i, j) where {S, Nij} @@ -758,14 +791,17 @@ end @inline function Base.getindex(data::IF{S, Ni}, i::Integer) where {S, Ni} @boundscheck (1 <= i <= Ni) || throw(BoundsError(data, (i,))) - dataview = @inbounds view(parent(data), i, :) - @inbounds get_struct(dataview, S) + @inbounds get_struct(parent(data), S, Val(2), CartesianIndex(i, 1)) end @inline function Base.setindex!(data::IF{S, Ni}, val, i::Integer) where {S, Ni} @boundscheck (1 <= i <= Ni) || throw(BoundsError(data, (i,))) - dataview = @inbounds view(parent(data), i, :) - set_struct!(dataview, convert(S, val)) + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(2), + CartesianIndex(i, 1), + ) end @inline function column(data::IF{S, Ni}, i) where {S, Ni} @@ -851,8 +887,7 @@ end @inline function Base.getindex(data::VF{S}, v::Integer) where {S} @boundscheck 1 <= v <= size(parent(data), 1) || throw(BoundsError(data, (v,))) - dataview = @inbounds view(parent(data), v, :) - @inbounds get_struct(dataview, S) + @inbounds get_struct(parent(data), S, Val(2), CartesianIndex(v, 1)) end @propagate_inbounds function Base.getindex( @@ -873,8 +908,12 @@ end @inline function Base.setindex!(data::VF{S}, val, v::Integer) where {S} @boundscheck (1 <= v <= length(parent(data))) || throw(BoundsError(data, (v,))) - dataview = @inbounds view(parent(data), v, :) - @inbounds set_struct!(dataview, convert(S, val)) + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(2), + CartesianIndex(v, 1), + ) end @inline function column(data::VF, i, h) @@ -1039,6 +1078,19 @@ function gather( nothing end end + +@inline function Base.getindex(data::VIJFH{S}, i, j, _, v, h) where {S} + @inbounds get_struct(parent(data), S, Val(4), CartesianIndex(v, i, j, 1, h)) +end +@inline function Base.setindex!(data::VIJFH{S}, val, i, j, _, v, h) where {S} + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(4), + CartesianIndex(v, i, j, 1, h), + ) +end + # ====================== # Data1DX DataLayout # ====================== @@ -1166,6 +1218,18 @@ end IFH{S, Nij}(dataview) end +@inline function Base.getindex(data::VIFH{S}, i, _, _, v, h) where {S} + @inbounds get_struct(parent(data), S, Val(3), CartesianIndex(v, i, 1, h)) +end +@inline function Base.setindex!(data::VIFH{S}, val, i, _, _, v, h) where {S} + @inbounds set_struct!( + parent(data), + convert(S, val), + Val(3), + CartesianIndex(v, i, 1, h), + ) +end + # ========================================= # Special DataLayouts for regular gridding # ========================================= diff --git a/src/DataLayouts/struct.jl b/src/DataLayouts/struct.jl index ccd485b327..38abbf5686 100644 --- a/src/DataLayouts/struct.jl +++ b/src/DataLayouts/struct.jl @@ -164,16 +164,26 @@ typesize(::Type{T}, ::Type{S}) where {T, S} = div(sizeof(S), sizeof(T)) return out end +@inline offset_index( + start_index::CartesianIndex{N}, + ::Val{D}, + offset, +) where {N, D} = CartesianIndex( + ntuple(n -> n == D ? start_index[n] + offset : start_index[n], N), +) + """ - get_struct(array, S[, offset=0]) + get_struct(array, S, Val(D), start_index) -Construct an object of type `S` from the values of `array`, optionally offset by `offset` from the start of the array. +Construct an object of type `S` packed along the `D` dimension, from the values of `array`, +starting at `start_index`. """ Base.@propagate_inbounds @generated function get_struct( array::AbstractArray{T}, ::Type{S}, - offset, -) where {T, S} + ::Val{D}, + start_index::CartesianIndex, +) where {T, S, D} tup = :(()) for i in 1:fieldcount(S) push!( @@ -181,7 +191,8 @@ Base.@propagate_inbounds @generated function get_struct( :(get_struct( array, fieldtype(S, $i), - offset + fieldtypeoffset(T, S, $i), + Val($D), + offset_index(start_index, Val($D), fieldtypeoffset(T, S, $i)), )), ) end @@ -189,29 +200,37 @@ Base.@propagate_inbounds @generated function get_struct( Base.@_propagate_inbounds_meta @inbounds bypass_constructor(S, $tup) end + # else + # Base.@_propagate_inbounds_meta + # args = ntuple(fieldcount(S)) do i + # get_struct(array, fieldtype(S, i), Val(D), offset_index(start_index, Val(D), fieldtypeoffset(T, S, i))) + # end + # return bypass_constructor(S, args) + # end end # recursion base case: hit array type is the same as the struct leaf type Base.@propagate_inbounds function get_struct( array::AbstractArray{S}, ::Type{S}, - offset, -) where {S} - return @inbounds array[offset + 1] + ::Val{D}, + start_index::CartesianIndex, +) where {S, D} + @inbounds return array[start_index] end -Base.@propagate_inbounds function get_struct( - array::AbstractArray{T}, - ::Type{S}, -) where {T, S} - @inbounds get_struct(array, S, 0) -end +""" + set_struct!(array, val::S, Val(D), start_index) +Store an object `val` of type `S` packed along the `D` dimension, into `array`, +starting at `start_index`. +""" Base.@propagate_inbounds @generated function set_struct!( array::AbstractArray{T}, val::S, - offset, -) where {T, S} + ::Val{D}, + start_index::CartesianIndex, +) where {T, S, D} ex = quote Base.@_propagate_inbounds_meta end @@ -221,7 +240,8 @@ Base.@propagate_inbounds @generated function set_struct!( :(set_struct!( array, getfield(val, $i), - offset + fieldtypeoffset(T, S, $i), + Val($D), + offset_index(start_index, Val($D), fieldtypeoffset(T, S, $i)), )), ) end @@ -232,16 +252,13 @@ end Base.@propagate_inbounds function set_struct!( array::AbstractArray{S}, val::S, - offset, -) where {S} - @inbounds array[offset + 1] = val + ::Val{D}, + index::CartesianIndex, +) where {S, D} + @inbounds array[index] = val val end -Base.@propagate_inbounds function set_struct!(array, val) - @inbounds set_struct!(array, val, 0) -end - # For complex nested types (ex. wrapped SMatrix) we hit a recursion limit and de-optimize # We know the recursion will terminate due to the fact that bitstype fields # cannot be self referential so there are no cycles in get/set_struct (bounded tree) diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index 03d78d4413..479b0b2d7c 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -8,7 +8,7 @@ import ..Domains import ..Topologies import ..Spaces: Spaces, AbstractSpace, AbstractPointSpace import ..Geometry: Geometry, Cartesian12Vector -import ..Utilities: PlusHalf +import ..Utilities: PlusHalf, half using ..RecursiveApply using ClimaComms diff --git a/src/Fields/indices.jl b/src/Fields/indices.jl index 71d7440d61..be0ba2da85 100644 --- a/src/Fields/indices.jl +++ b/src/Fields/indices.jl @@ -128,3 +128,54 @@ end # potential TODO: # - define a ColumnIndices type, make it work with https://github.com/JuliaFolds/FLoops.jl + + + +struct SlabIndex{VIdx} + v::VIdx + h::Int +end + + +Base.getindex(field::Field, slabidx::SlabIndex) = slab(field, slabidx) + +function slab(field::SpectralElementField, slabidx::SlabIndex{Nothing}) + slab(field, slabidx.h) +end +function slab( + field::CenterExtrudedFiniteDifferenceField, + slabidx::SlabIndex{Int}, +) + slab(field, slabidx.v, slabidx.h) +end +function slab( + field::FaceExtrudedFiniteDifferenceField, + slabidx::SlabIndex{PlusHalf{Int}}, +) + slab(field, slabidx.v + half, slabidx.h) +end + +function byslab(fn, space::Spaces.AbstractSpectralElementSpace) + Nh = Topologies.nlocalelems(space.topology)::Int + for h in 1:Nh + fn(SlabIndex(nothing, h)) + end +end +function byslab(fn, space::Spaces.CenterExtrudedFiniteDifferenceSpace) + Nh = Topologies.nlocalelems(Spaces.topology(space)) + Nv = Spaces.nlevels(space) + for h in 1:Nh + for v in 1:Nv + fn(SlabIndex(v, h)) + end + end +end +function byslab(fn, space::Spaces.FaceExtrudedFiniteDifferenceSpace) + Nh = Topologies.nlocalelems(Spaces.topology(space)) + Nv = Spaces.nlevels(space) + for h in 1:Nh + for v in 1:Nv + fn(SlabIndex(v - half, h)) + end + end +end diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index ab2bd09f37..c4e99f5b0a 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -12,9 +12,9 @@ Represents an operation that is applied to each element. Subtypes `Op` of this should define the following: - [`operator_return_eltype(::Op, ElTypes...)`](@ref) - [`allocate_work(::Op, args...)`](@ref) -- [`apply_slab(::Op, work, args...)`](@ref) +- [`apply_operator(::Op, work, args...)`](@ref) -Additionally, the result type `OpResult <: OperatorSlabResult` of `apply_slab` should define `get_node(::OpResult, i, j)`. +Additionally, the result type `OpResult <: OperatorSlabResult` of `apply_operator` should define `get_node(::OpResult, ij, slabidx)`. """ abstract type SpectralElementOperator end @@ -28,9 +28,26 @@ function operator_axes end operator_axes(space::Spaces.AbstractSpace) = () operator_axes(space::Spaces.SpectralElementSpace1D) = (1,) operator_axes(space::Spaces.SpectralElementSpace2D) = (1, 2) +operator_axes(space::Spaces.SpectralElementSpaceSlab1D) = (1,) +operator_axes(space::Spaces.SpectralElementSpaceSlab2D) = (1, 2) operator_axes(space::Spaces.ExtrudedFiniteDifferenceSpace) = operator_axes(space.horizontal_space) + +function node_indices(space::Spaces.SpectralElementSpace1D) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + CartesianIndices((Nq,)) +end +function node_indices(space::Spaces.SpectralElementSpace2D) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + CartesianIndices((Nq, Nq)) +end +node_indices(space::Spaces.ExtrudedFiniteDifferenceSpace) = + node_indices(space.horizontal_space) + + """ SpectralBroadcasted{Style}(op, args[,axes[, work]]) @@ -151,206 +168,201 @@ function Base.Broadcast.materialize(sbc::SpectralBroadcasted) copy(Base.Broadcast.instantiate(sbc)) end -Base.@propagate_inbounds function _inner_copyto!( - field_out::Field, - sbc::SpectralBroadcasted, - v::Integer, - h::Integer, -) - slab_out = slab(field_out, v, h) - out_slab_space = slab(axes(sbc), v, h) - in_slab_space = slab(input_space(sbc), v, h) - _slab_args = _apply_slab_args(slab_args(sbc.args, v, h), v, h) - copy_slab!( - slab_out, - apply_slab(sbc.op, out_slab_space, in_slab_space, _slab_args...), - ) -end - -function _serial_copyto!( - field_out::Field, - sbc::SpectralBroadcasted, - Nv::Int, - Nh::Int, -) - @inbounds for h in 1:Nh, v in 1:Nv - _inner_copyto!(field_out, sbc, v, h) +function Base.copyto!(out::Field, sbc::SpectralBroadcasted) + Fields.byslab(axes(out)) do slabidx + copyto_slab!(out, sbc, slabidx) end - return field_out -end - -function _threaded_copyto!( - field_out::Field, - sbc::SpectralBroadcasted, - Nv::Int, - Nh::Int, -) - @inbounds begin - Threads.@threads for h in 1:Nh - for v in 1:Nv - _inner_copyto!(field_out, sbc, v, h) - end - end - end - return field_out -end - -function Base.copyto!(field_out::Field, sbc::SpectralBroadcasted) - space = axes(field_out) - Nv = Spaces.nlevels(space) - Nh = Topologies.nlocalelems(Spaces.topology(space)) - if enable_threading() - return _threaded_copyto!(field_out, sbc, Nv, Nh) - end - return _serial_copyto!(field_out, sbc, Nv, Nh) + return out end function Base.Broadcast.materialize!(dest, sbc::SpectralBroadcasted) copyto!(dest, Base.Broadcast.instantiate(sbc)) end -Base.@propagate_inbounds function slab( - sbc::SpectralBroadcasted{Style}, - inds..., -) where {Style <: SpectralStyle} - _args = slab_args(sbc.args, inds...) - _axes = slab(axes(sbc), inds...) - SpectralBroadcasted{Style}(sbc.op, _args, _axes, sbc.input_space) -end - -Base.@propagate_inbounds function slab( - bc::Base.Broadcast.Broadcasted{Style}, - inds..., -) where {Style <: AbstractSpectralStyle} - _args = slab_args(bc.args, inds...) - _axes = slab(axes(bc), inds...) - Base.Broadcast.Broadcasted{Style}(bc.f, _args, _axes) -end - function Base.copyto!( - field_out::Field, + out::Field, bc::Base.Broadcast.Broadcasted{Style}, ) where {Style <: AbstractSpectralStyle} - space = axes(field_out) - topology = Spaces.topology(space) - Nv = Spaces.nlevels(space)::Int - Nh = Topologies.nlocalelems(topology)::Int - @inbounds for h in 1:Nh, v in 1:Nv - slab_out = slab(field_out, v, h) - _slab_args = _apply_slab_args(slab_args(bc.args, v, h), v, h) - copy_slab!( - slab_out, - Base.Broadcast.Broadcasted{Style}(bc.f, _slab_args, axes(slab_out)), - ) + Fields.byslab(axes(out)) do slabidx + copyto_slab!(out, bc, slabidx) end - return field_out + return out end -function copy_slab!(slab_out::Fields.SlabField1D, res) - space = axes(slab_out) - Nq = Quadratures.degrees_of_freedom(space.quadrature_style)::Int - @inbounds for i in 1:Nq - set_node!(slab_out, i, get_node(res, i)) - end - return slab_out -end +""" + copyto_slab!(out, bc, slabidx) -function copy_slab!(slab_out::Fields.SlabField2D, res) - space = axes(slab_out) - Nq = Quadratures.degrees_of_freedom(space.quadrature_style)::Int - @inbounds for j in 1:Nq, i in 1:Nq - set_node!(slab_out, i, j, get_node(res, i, j)) +Copy the slab indexed by `slabidx` from `bc` to `out`. +""" +function copyto_slab!(out, bc, slabidx) + space = axes(out) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + rbc = resolve_operator(bc, slabidx) + @inbounds for ij in node_indices(axes(out)) + set_node!(out, ij, slabidx, get_node(rbc, ij, slabidx)) end - return slab_out + return nothing end -# 1D get/set node -# Recursively call get_node() on broadcast arguments in a way that is statically reducible by the optimizer -# see Base.Broadcast.preprocess_args -@inline node_args(args::Tuple, inds...) = - (get_node(args[1], inds...), node_args(Base.tail(args), inds...)...) -@inline node_args(args::Tuple{Any}, inds...) = (get_node(args[1], inds...),) -@inline node_args(args::Tuple{}, inds...) = () +""" + resolve_operator(bc, slabidx) -# 1D intermediate slab data types -Base.@propagate_inbounds function get_node(v::MVector, i) - v[i] +Recursively evaluate any operators in `bc` at `slabidx`, replacing any +`SpectralBroadcasted` objects with an `IF`/`IJF` object. +""" +function resolve_operator(bc::SpectralBroadcasted, slabidx) + args = _resolve_operator_args(slabidx, bc.args...) + apply_operator(bc.op, bc.axes, slabidx, args...) end - -Base.@propagate_inbounds function get_node(v::SVector, i) - v[i] +function resolve_operator( + bc::Base.Broadcast.Broadcasted{CompositeSpectralStyle}, + slabidx, +) + args = _resolve_operator_args(slabidx, bc.args...) + Base.Broadcast.Broadcasted{CompositeSpectralStyle}(bc.f, args, bc.axes) end +resolve_operator(x, slabidx) = x + +""" + _resolve_operator(slabidx, args...) + +Calls `resolve_operator(arg, slabidx)` for each `arg` in `args` +""" +_resolve_operator_args(slabidx) = () +@inline _resolve_operator_args(slabidx, arg, xargs...) = ( + resolve_operator(arg, slabidx), + _resolve_operator_args(slabidx, xargs...)..., +) + -Base.@propagate_inbounds function get_node(scalar, i) + +_get_node(ij, slabidx) = () +_get_node(ij, slabidx, arg, xargs...) = + (get_node(arg, ij, slabidx), _get_node(ij, slabidx, xargs...)...) + +Base.@propagate_inbounds function get_node(scalar, ij, slabidx) scalar[] end - -Base.@propagate_inbounds function get_node(field::Fields.SlabField1D, i) - getindex(Fields.field_values(field), i) +Base.@propagate_inbounds function get_node( + field::Fields.Field, + ij::CartesianIndex{1}, + slabidx, +) + i, = Tuple(ij) + if field isa Fields.FaceExtrudedFiniteDifferenceField + v = slabidx.v + half + else + v = slabidx.v + end + h = slabidx.h + Fields.field_values(field)[i, nothing, nothing, v, h] end - -Base.@propagate_inbounds function get_node(bc::Base.Broadcast.Broadcasted, i) - bc.f(node_args(bc.args, i)...) +Base.@propagate_inbounds function get_node( + field::Fields.Field, + ij::CartesianIndex{2}, + slabidx, +) + i, j = Tuple(ij) + if field isa Fields.FaceExtrudedFiniteDifferenceField + v = slabidx.v + half + else + v = slabidx.v + end + h = slabidx.h + Fields.field_values(field)[i, j, nothing, v, h] end -Base.@propagate_inbounds set_node!(field::Fields.SlabField1D, i, val) = - setindex!(Fields.field_values(field), val, i) -# 2D get/set node -Base.@propagate_inbounds function get_node(v::MMatrix, i, j) - v[i, j] -end -Base.@propagate_inbounds function get_node(v::SMatrix, i, j) - v[i, j] +Base.@propagate_inbounds function get_node( + bc::Base.Broadcast.Broadcasted, + ij, + slabidx, +) + bc.f(_get_node(ij, slabidx, bc.args...)...) end - -Base.@propagate_inbounds function get_node(scalar, i, j) - scalar[] +Base.@propagate_inbounds function get_node( + data::Union{DataLayouts.IJF, DataLayouts.IF}, + ij, + slabidx, +) + data[ij] +end +Base.@propagate_inbounds function get_node( + data::StaticArrays.SArray, + ij, + slabidx, +) + data[ij] end -Base.@propagate_inbounds function get_node(field::Fields.SlabField2D, i, j) - getindex(Fields.field_values(field), i, j) +dont_limit = (args...) -> true +for m in methods(get_node) + m.recursion_relation = dont_limit end -Base.@propagate_inbounds function get_node(bc::Base.Broadcast.Broadcasted, i, j) - bc.f(node_args(bc.args, i, j)...) +Base.@propagate_inbounds function get_local_geometry( + space::Spaces.AbstractSpectralElementSpace, + ij::CartesianIndex{1}, + slabidx, +) + i, = Tuple(ij) + h = slabidx.h + if space isa Spaces.FaceExtrudedFiniteDifferenceSpace + v = slabidx.v + half + else + v = slabidx.v + end + Spaces.local_geometry_data(space)[i, j, nothing, v, h] +end +Base.@propagate_inbounds function get_local_geometry( + space::Spaces.AbstractSpectralElementSpace, + ij::CartesianIndex{2}, + slabidx, +) + i, j = Tuple(ij) + h = slabidx.h + if space isa Spaces.FaceExtrudedFiniteDifferenceSpace + v = slabidx.v + half + else + v = slabidx.v + end + Spaces.local_geometry_data(space)[i, j, nothing, v, h] end Base.@propagate_inbounds function set_node!( - field::Fields.SlabField2D, - i, - j, + field::Fields.Field, + ij::CartesianIndex{1}, + slabidx, val, ) - setindex!(Fields.field_values(field), val, i, j) + i, = Tuple(ij) + if field isa Fields.FaceExtrudedFiniteDifferenceField + v = slabidx.v + half + else + v = slabidx.v + end + h = slabidx.h + Fields.field_values(field)[i, nothing, nothing, v, h] = val end - -# Broadcast recursive machinery -@inline _apply_slab_args(args::Tuple, inds...) = ( - _apply_slab(args[1], inds...), - _apply_slab_args(Base.tail(args), inds...)..., -) -@inline _apply_slab_args(args::Tuple{Any}, inds...) = - (_apply_slab(args[1], inds...),) -@inline _apply_slab_args(args::Tuple{}, inds...) = () - -@inline _apply_slab(x, inds...) = x -@inline _apply_slab(sbc::SpectralBroadcasted, inds...) = apply_slab( - sbc.op, - slab(axes(sbc), inds...), - slab(input_space(sbc), inds...), - _apply_slab_args(sbc.args, inds...)..., -) -@inline _apply_slab( - bc::Base.Broadcast.Broadcasted{CompositeSpectralStyle}, - inds..., -) = Base.Broadcast.Broadcasted{CompositeSpectralStyle}( - bc.f, - _apply_slab_args(bc.args, inds...), - bc.axes, +Base.@propagate_inbounds function set_node!( + field::Fields.Field, + ij::CartesianIndex{2}, + slabidx, + val, ) + i, j = Tuple(ij) + if field isa Fields.FaceExtrudedFiniteDifferenceField + v = slabidx.v + half + else + v = slabidx.v + end + h = slabidx.h + Fields.field_values(field)[i, j, nothing, v, h] = val +end + function Base.Broadcast.BroadcastStyle( ::Type{SB}, @@ -366,6 +378,10 @@ function Base.Broadcast.BroadcastStyle( end + + + + """ div = Divergence() div.(u) @@ -401,50 +417,53 @@ Divergence{()}(space) = Divergence{operator_axes(space)}() operator_return_eltype(op::Divergence{I}, ::Type{S}) where {I, S} = RecursiveApply.rmaptype(Geometry.divergence_result_type, S) -function apply_slab(op::Divergence{(1,)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::Divergence{(1,)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MVector{Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) Jv¹ = local_geometry.J ⊠ RecursiveApply.rmap( v -> Geometry.contravariant1(v, local_geometry), - get_node(slab_data, i), + v, ) for ii in 1:Nq out[ii] = out[ii] ⊞ (D[ii, i] ⊠ Jv¹) end end @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] - out[i] = out[i] ⊠ local_geometry.invJ + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + out[i] = RecursiveApply.rdiv(out[i], local_geometry.J) end - return SVector(out) + return SArray(out) end -function apply_slab(op::Divergence{(1, 2)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::Divergence{(1, 2)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) Jv¹ = local_geometry.J ⊠ RecursiveApply.rmap( v -> Geometry.contravariant1(v, local_geometry), - get_node(slab_data, i, j), + v, ) for ii in 1:Nq out[ii, j] = out[ii, j] ⊞ (D[ii, i] ⊠ Jv¹) @@ -452,17 +471,18 @@ function apply_slab(op::Divergence{(1, 2)}, slab_space, _, slab_data) Jv² = local_geometry.J ⊠ RecursiveApply.rmap( v -> Geometry.contravariant2(v, local_geometry), - get_node(slab_data, i, j), + v, ) for jj in 1:Nq out[i, jj] = out[i, jj] ⊞ (D[jj, j] ⊠ Jv²) end end @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] - out[i, j] = out[i, j] ⊠ local_geometry.invJ + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + out[i, j] = RecursiveApply.rdiv(out[i, j], local_geometry.J) end - return SMatrix(out) + return SArray(out) end @@ -510,50 +530,52 @@ WeakDivergence{()}(space) = WeakDivergence{operator_axes(space)}() operator_return_eltype(::WeakDivergence{I}, ::Type{S}) where {I, S} = RecursiveApply.rmaptype(Geometry.divergence_result_type, S) -function apply_slab(op::WeakDivergence{(1,)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::WeakDivergence{(1,)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MVector{Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) WJv¹ = local_geometry.WJ ⊠ RecursiveApply.rmap( v -> Geometry.contravariant1(v, local_geometry), - get_node(slab_data, i), + v, ) for ii in 1:Nq out[ii] = out[ii] ⊞ (D[i, ii] ⊠ WJv¹) end end @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) out[i] = RecursiveApply.rdiv(out[i], ⊟(local_geometry.WJ)) end - return SVector(out) + return SArray(out) end -function apply_slab(op::WeakDivergence{(1, 2)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) - RT = operator_return_eltype(op, eltype(slab_data)) - # allocate temp output + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) WJv¹ = local_geometry.WJ ⊠ RecursiveApply.rmap( v -> Geometry.contravariant1(v, local_geometry), - get_node(slab_data, i, j), + v, ) for ii in 1:Nq out[ii, j] = out[ii, j] ⊞ (D[i, ii] ⊠ WJv¹) @@ -561,17 +583,18 @@ function apply_slab(op::WeakDivergence{(1, 2)}, slab_space, _, slab_data) WJv² = local_geometry.WJ ⊠ RecursiveApply.rmap( v -> Geometry.contravariant2(v, local_geometry), - get_node(slab_data, i, j), + v, ) for jj in 1:Nq out[i, jj] = out[i, jj] ⊞ (D[j, jj] ⊠ WJv²) end end @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) out[i, j] = RecursiveApply.rdiv(out[i, j], ⊟(local_geometry.WJ)) end - return SMatrix(out) + return SArray(out) end """ @@ -603,36 +626,38 @@ Gradient{()}(space) = Gradient{operator_axes(space)}() operator_return_eltype(::Gradient{I}, ::Type{S}) where {I, S} = RecursiveApply.rmaptype(T -> Geometry.gradient_result_type(Val(I), T), S) -function apply_slab(op::Gradient{(1,)}, slab_space, _, slab_data) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::Gradient{(1,)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MVector{Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for i in 1:Nq - x = get_node(slab_data, i) + ij = CartesianIndex((i,)) + x = get_node(arg, ij, slabidx) for ii in 1:Nq ∂f∂ξ = Geometry.Covariant1Vector(D[ii, i]) ⊗ x out[ii] += ∂f∂ξ end end - return SVector(out) + return SArray(out) end -function apply_slab(op::Gradient{(1, 2)}, slab_space, _, slab_data) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::Gradient{(1, 2)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for j in 1:Nq, i in 1:Nq - x = get_node(slab_data, i, j) + ij = CartesianIndex((i, j)) + x = get_node(arg, ij, slabidx) for ii in 1:Nq ∂f∂ξ₁ = Geometry.Covariant12Vector(D[ii, i], zero(eltype(D))) ⊗ x out[ii, j] = out[ii, j] ⊞ ∂f∂ξ₁ @@ -642,7 +667,7 @@ function apply_slab(op::Gradient{(1, 2)}, slab_space, _, slab_data) out[i, jj] = out[i, jj] ⊞ ∂f∂ξ₂ end end - return SMatrix(out) + return SArray(out) end @@ -687,47 +712,48 @@ WeakGradient{()}(space) = WeakGradient{operator_axes(space)}() operator_return_eltype(::WeakGradient{I}, ::Type{S}) where {I, S} = RecursiveApply.rmaptype(T -> Geometry.gradient_result_type(Val(I), T), S) -function apply_slab(op::WeakGradient{(1,)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::WeakGradient{(1,)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MVector{Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] - W = local_geometry.WJ * local_geometry.invJ - Wx = W ⊠ get_node(slab_data, i) + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + Wx = W ⊠ get_node(arg, ij, slabidx) for ii in 1:Nq Dᵀ₁Wf = Geometry.Covariant1Vector(D[i, ii]) ⊗ Wx out[ii] = out[ii] ⊟ Dᵀ₁Wf end end @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] - W = local_geometry.WJ * local_geometry.invJ + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ / local_geometry.J out[i] = RecursiveApply.rdiv(out[i], W) end - return SVector(out) + return SArray(out) end -function apply_slab(op::WeakGradient{(1, 2)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) DataLayouts._mzero!(out, FT) @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] - W = local_geometry.WJ * local_geometry.invJ - Wx = W ⊠ get_node(slab_data, i, j) + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + Wx = W ⊠ get_node(arg, ij, slabidx) for ii in 1:Nq Dᵀ₁Wf = Geometry.Covariant12Vector(D[i, ii], zero(eltype(D))) ⊗ Wx out[ii, j] = out[ii, j] ⊟ Dᵀ₁Wf @@ -738,11 +764,12 @@ function apply_slab(op::WeakGradient{(1, 2)}, slab_space, _, slab_data) end end @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] - W = local_geometry.WJ * local_geometry.invJ + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ / local_geometry.J out[i, j] = RecursiveApply.rdiv(out[i, j], W) end - return SMatrix(out) + return SArray(out) end abstract type CurlSpectralElementOperator <: SpectralElementOperator end @@ -795,22 +822,21 @@ Curl{()}(space) = Curl{operator_axes(space)}() operator_return_eltype(::Curl{I}, ::Type{S}) where {I, S} = RecursiveApply.rmaptype(T -> Geometry.curl_result_type(Val(I), T), S) -function apply_slab(op::Curl{(1,)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::Curl{(1,)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MVector{Nq, RT}(undef) DataLayouts._mzero!(out, FT) if RT <: Geometry.Contravariant2Vector @inbounds for i in 1:Nq - v₃ = Geometry.covariant3( - get_node(slab_data, i), - slab_local_geometry[i], - ) + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) + v₃ = Geometry.covariant3(v, local_geometry) for ii in 1:Nq D₁v₃ = D[ii, i] ⊠ v₃ out[ii] = out[ii] ⊞ Geometry.Contravariant2Vector(⊟(D₁v₃)) @@ -820,57 +846,46 @@ function apply_slab(op::Curl{(1,)}, slab_space, _, slab_data) error("invalid return type: $RT") end @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] - out[i] = out[i] ⊠ local_geometry.invJ + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + out[i] = RecursiveApply.rdiv(out[i], local_geometry.J) end - return SVector(out) + return SArray(out) end -function apply_slab(op::Curl{(1, 2)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) DataLayouts._mzero!(out, FT) # input data is a Covariant12Vector field if RT <: Geometry.Contravariant3Vector @inbounds for j in 1:Nq, i in 1:Nq - v₁ = Geometry.covariant1( - get_node(slab_data, i, j), - slab_local_geometry[i, j], - ) + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) + v₁ = Geometry.covariant1(v, local_geometry) for jj in 1:Nq D₂v₁ = D[jj, j] ⊠ v₁ - out[i, jj] = - out[i, jj] ⊞ Geometry.Contravariant3Vector( - ⊟(D₂v₁), - slab_local_geometry[i, jj], - ) + out[i, jj] = out[i, jj] ⊞ Geometry.Contravariant3Vector(⊟(D₂v₁)) end - v₂ = Geometry.covariant2( - get_node(slab_data, i, j), - slab_local_geometry[i, j], - ) + v₂ = Geometry.covariant2(v, local_geometry) for ii in 1:Nq D₁v₂ = D[ii, i] ⊠ v₂ - out[ii, j] = - out[ii, j] ⊞ Geometry.Contravariant3Vector( - D₁v₂, - slab_local_geometry[ii, j], - ) + out[ii, j] = out[ii, j] ⊞ Geometry.Contravariant3Vector(D₁v₂) end end # input data is a Covariant3Vector field elseif RT <: Geometry.Contravariant12Vector @inbounds for j in 1:Nq, i in 1:Nq - v₃ = Geometry.covariant3( - get_node(slab_data, i, j), - slab_local_geometry[i, j], - ) + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) + v₃ = Geometry.covariant3(v, local_geometry) for ii in 1:Nq D₁v₃ = D[ii, i] ⊠ v₃ out[ii, j] = @@ -888,10 +903,11 @@ function apply_slab(op::Curl{(1, 2)}, slab_space, _, slab_data) error("invalid return type") end @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] - out[i, j] = out[i, j] ⊠ local_geometry.invJ + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + out[i, j] = RecursiveApply.rdiv(out[i, j], local_geometry.J) end - return SMatrix(out) + return SArray(out) end """ @@ -937,26 +953,23 @@ WeakCurl{()}(space) = WeakCurl{operator_axes(space)}() operator_return_eltype(::WeakCurl{I}, ::Type{S}) where {I, S} = RecursiveApply.rmaptype(T -> Geometry.curl_result_type(Val(I), T), S) -function apply_slab(op::WeakCurl{(1,)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MVector{Nq, RT}(undef) DataLayouts._mzero!(out, FT) # input data is a Covariant3Vector field if RT <: Geometry.Contravariant2Vector @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] - W = local_geometry.WJ * local_geometry.invJ - Wv₃ = - W ⊠ Geometry.covariant3( - get_node(slab_data, i), - slab_local_geometry[i], - ) + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + Wv₃ = W ⊠ Geometry.covariant3(v, local_geometry) for ii in 1:Nq Dᵀ₁Wv₃ = D[i, ii] ⊠ Wv₃ out[ii] = out[ii] ⊞ Geometry.Contravariant2Vector(⊟(Dᵀ₁Wv₃)) @@ -966,64 +979,49 @@ function apply_slab(op::WeakCurl{(1,)}, slab_space, _, slab_data) error("invalid return type: $RT") end @inbounds for i in 1:Nq - local_geometry = slab_local_geometry[i] + ij = CartesianIndex((i,)) + local_geometry = get_local_geometry(space, ij, slabidx) out[i] = RecursiveApply.rdiv(out[i], local_geometry.WJ) end - return SVector(out) + return SArray(out) end -function apply_slab(op::WeakCurl{(1, 2)}, slab_space, _, slab_data) - slab_local_geometry = Spaces.local_geometry_data(slab_space) - FT = Spaces.undertype(slab_space) - QS = Spaces.quadrature_style(slab_space) +function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - RT = operator_return_eltype(op, eltype(slab_data)) + RT = operator_return_eltype(op, eltype(arg)) out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) DataLayouts._mzero!(out, FT) # input data is a Covariant12Vector field if RT <: Geometry.Contravariant3Vector @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] - W = local_geometry.WJ * local_geometry.invJ - Wv₁ = - W ⊠ Geometry.covariant1( - get_node(slab_data, i, j), - slab_local_geometry[i, j], - ) + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + Wv₁ = W ⊠ Geometry.covariant1(v, local_geometry) for jj in 1:Nq Dᵀ₂Wv₁ = D[j, jj] ⊠ Wv₁ - out[i, jj] = - out[i, jj] ⊞ Geometry.Contravariant3Vector( - Dᵀ₂Wv₁, - slab_local_geometry[i, jj], - ) + out[i, jj] = out[i, jj] ⊞ Geometry.Contravariant3Vector(Dᵀ₂Wv₁) end - Wv₂ = - W ⊠ Geometry.covariant2( - get_node(slab_data, i, j), - slab_local_geometry[i, j], - ) + Wv₂ = W ⊠ Geometry.covariant2(v, local_geometry) for ii in 1:Nq Dᵀ₁Wv₂ = D[i, ii] ⊠ Wv₂ out[ii, j] = - out[ii, j] ⊞ Geometry.Contravariant3Vector( - ⊟(Dᵀ₁Wv₂), - slab_local_geometry[ii, j], - ) + out[ii, j] ⊞ Geometry.Contravariant3Vector(⊟(Dᵀ₁Wv₂)) end end # input data is a Covariant3Vector field elseif RT <: Geometry.Contravariant12Vector @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] - W = local_geometry.WJ * local_geometry.invJ - Wv₃ = - W ⊠ Geometry.covariant3( - get_node(slab_data, i, j), - slab_local_geometry[i, j], - ) + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) + v = get_node(arg, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + Wv₃ = W ⊠ Geometry.covariant3(v, local_geometry) for ii in 1:Nq Dᵀ₁Wv₃ = D[i, ii] ⊠ Wv₃ out[ii, j] = @@ -1040,11 +1038,12 @@ function apply_slab(op::WeakCurl{(1, 2)}, slab_space, _, slab_data) else error("invalid return type") end - @inbounds for j in 1:Nq, i in 1:Nq - local_geometry = slab_local_geometry[i, j] + for j in 1:Nq, i in 1:Nq + ij = CartesianIndex((i, j)) + local_geometry = get_local_geometry(space, ij, slabidx) out[i, j] = RecursiveApply.rdiv(out[i, j], local_geometry.WJ) end - return SMatrix(out) + return SArray(out) end # interplation / restriction @@ -1074,55 +1073,54 @@ struct Interpolate{I, S} <: TensorOperator end Interpolate(space) = Interpolate{operator_axes(space), typeof(space)}(space) -function apply_slab( - op::Interpolate{(1,)}, - slab_space_out, - slab_space_in, - slab_data, -) - FT = Spaces.undertype(slab_space_out) - QS_in = Spaces.quadrature_style(slab_space_in) - QS_out = Spaces.quadrature_style(slab_space_out) +function apply_operator(op::Interpolate{(1,)}, space_out, slabidx, arg) + FT = Spaces.undertype(space_out) + space_in = axes(arg) + QS_in = Spaces.quadrature_style(space_in) + QS_out = Spaces.quadrature_style(space_out) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) Imat = Quadratures.interpolation_matrix(FT, QS_out, QS_in) - S = eltype(slab_data) + S = eltype(arg) slab_data_out = MVector{Nq_out, S}(undef) @inbounds for i in 1:Nq_out # manually inlined rmatmul with slab_getnode - r = Imat[i, 1] ⊠ get_node(slab_data, 1) + r = Imat[i, 1] ⊠ get_node(arg, CartesianIndex((1,)), slabidx) for ii in 2:Nq_in - r = RecursiveApply.rmuladd(Imat[i, ii], get_node(slab_data, ii), r) + ij = CartesianIndex((ii,)) + r = RecursiveApply.rmuladd( + Imat[i, ii], + get_node(arg, ii, slabidx), + r, + ) end slab_data_out[i] = r end - return slab_data_out + return SArray(slab_data_out) end -function apply_slab( - op::Interpolate{(1, 2)}, - slab_space_out, - slab_space_in, - slab_data, -) - FT = Spaces.undertype(slab_space_out) - QS_in = Spaces.quadrature_style(slab_space_in) - QS_out = Spaces.quadrature_style(slab_space_out) +function apply_operator(op::Interpolate{(1, 2)}, space_out, slabidx, arg) + FT = Spaces.undertype(space_out) + space_in = axes(arg) + QS_in = Spaces.quadrature_style(space_in) + QS_out = Spaces.quadrature_style(space_out) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) Imat = Quadratures.interpolation_matrix(FT, QS_out, QS_in) - S = eltype(slab_data) + S = eltype(arg) # temporary storage temp = MArray{Tuple{Nq_out, Nq_in}, S, 2, Nq_out * Nq_in}(undef) slab_data_out = MArray{Tuple{Nq_out, Nq_out}, S, 2, Nq_out * Nq_out}(undef) @inbounds for j in 1:Nq_in, i in 1:Nq_out # manually inlined rmatmul1 with slab get_node # we do this to remove one allocated intermediate array - r = Imat[i, 1] ⊠ get_node(slab_data, 1, j) + ij = CartesianIndex((1, j)) + r = Imat[i, 1] ⊠ get_node(arg, ij, slabidx) for ii in 2:Nq_in + ij = CartesianIndex((ii, j)) r = RecursiveApply.rmuladd( Imat[i, ii], - get_node(slab_data, ii, j), + get_node(arg, ij, slabidx), r, ) end @@ -1163,69 +1161,71 @@ struct Restrict{I, S} <: TensorOperator end Restrict(space) = Restrict{operator_axes(space), typeof(space)}(space) -function apply_slab( - op::Restrict{(1,)}, - slab_space_out, - slab_space_in, - slab_data, -) - FT = Spaces.undertype(slab_space_out) - QS_in = Spaces.quadrature_style(slab_space_in) - QS_out = Spaces.quadrature_style(slab_space_out) +function apply_operator(op::Restrict{(1,)}, space_out, slabidx, arg) + FT = Spaces.undertype(space_out) + space_in = axes(arg) + QS_in = Spaces.quadrature_style(space_in) + QS_out = Spaces.quadrature_style(space_out) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) ImatT = Quadratures.interpolation_matrix(FT, QS_in, QS_out)' # transpose - S = eltype(slab_data) + S = eltype(arg) slab_data_out = MVector{Nq_out, S}(undef) - slab_local_geometry_in = Spaces.local_geometry_data(slab_space_in) - slab_local_geometry_out = Spaces.local_geometry_data(slab_space_out) - WJ_in = slab_local_geometry_in.WJ - WJ_out = slab_local_geometry_out.WJ @inbounds for i in 1:Nq_out # manually inlined rmatmul with slab get_node - r = ImatT[i, 1] ⊠ (WJ_in[1] ⊠ get_node(slab_data, 1)) + ij = CartesianIndex((1,)) + WJ = get_local_geometry(space_in, ij, slabidx).WJ + r = ImatT[i, 1] ⊠ (WJ ⊠ get_node(arg, ij, slabidx)) for ii in 2:Nq_in - WJ_node = WJ_in[ii] ⊠ get_node(slab_data, ii) - r = RecursiveApply.rmuladd(ImatT[i, ii], WJ_node, r) + ij = CartesianIndex((ii,)) + WJ = get_local_geometry(space_in, ij, slabidx).WJ + r = RecursiveApply.rmuladd( + ImatT[i, ii], + WJ ⊠ get_node(arg, ij, slabidx), + r, + ) end - slab_data_out[i] = RecursiveApply.rdiv(r, WJ_out[i]) + ij_out = CartesianIndex((i,)) + WJ_out = get_local_geometry(space_out, ij_out, slabidx).WJ + slab_data_out[i] = RecursiveApply.rdiv(r, WJ_out) end return slab_data_out end -function apply_slab( - op::Restrict{(1, 2)}, - slab_space_out, - slab_space_in, - slab_data, -) - FT = Spaces.undertype(slab_space_out) - QS_in = Spaces.quadrature_style(slab_space_in) - QS_out = Spaces.quadrature_style(slab_space_out) +function apply_operator(op::Restrict{(1, 2)}, space_out, slabidx, arg) + FT = Spaces.undertype(space_out) + space_in = axes(arg) + QS_in = Spaces.quadrature_style(space_in) + QS_out = Spaces.quadrature_style(space_out) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) ImatT = Quadratures.interpolation_matrix(FT, QS_in, QS_out)' # transpose - S = eltype(slab_data) + S = eltype(arg) # temporary storage temp = MArray{Tuple{Nq_out, Nq_in}, S, 2, Nq_out * Nq_in}(undef) slab_data_out = MArray{Tuple{Nq_out, Nq_out}, S, 2, Nq_out * Nq_out}(undef) - slab_local_geometry_in = Spaces.local_geometry_data(slab_space_in) - slab_local_geometry_out = Spaces.local_geometry_data(slab_space_out) - WJ_in = slab_local_geometry_in.WJ - WJ_out = slab_local_geometry_out.WJ @inbounds for j in 1:Nq_in, i in 1:Nq_out # manually inlined rmatmul1 with slab get_node - r = ImatT[i, 1] ⊠ (WJ_in[1, j] ⊠ get_node(slab_data, 1, j)) + ij = CartesianIndex((1, j)) + WJ = get_local_geometry(space_in, ij, slabidx).WJ + r = ImatT[i, 1] ⊠ (WJ ⊠ get_node(arg, ij, slabidx)) for ii in 2:Nq_in - WJ_node = WJ_in[ii, j] ⊠ get_node(slab_data, ii, j) - r = RecursiveApply.rmuladd(ImatT[i, ii], WJ_node, r) + ij = CartesianIndex((ii, j)) + WJ = get_local_geometry(space_in, ij, slabidx).WJ + r = RecursiveApply.rmuladd( + ImatT[i, ii], + WJ ⊠ get_node(arg, ij, slabidx), + r, + ) end temp[i, j] = r end @inbounds for j in 1:Nq_out, i in 1:Nq_out + ij_out = CartesianIndex((i, j)) + WJ_out = get_local_geometry(space_out, ij_out, slabidx).WJ slab_data_out[i, j] = RecursiveApply.rdiv( RecursiveApply.rmatmul2(ImatT, temp, i, j), - WJ_out[i, j], + WJ_out, ) end return SMatrix(slab_data_out) diff --git a/test/DataLayouts/data2d.jl b/test/DataLayouts/data2d.jl index 09ea858320..10575e29f1 100644 --- a/test/DataLayouts/data2d.jl +++ b/test/DataLayouts/data2d.jl @@ -31,10 +31,10 @@ end @testset "get_struct / set_struct!" begin array = [1.0, 2.0, 3.0] S = Tuple{Complex{Float64}, Float64} - @test get_struct(array, S) == (1.0 + 2.0im, 3.0) - set_struct!(array, (4.0 + 2.0im, 6.0)) + @test get_struct(array, S, Val(1), CartesianIndex(1)) == (1.0 + 2.0im, 3.0) + set_struct!(array, (4.0 + 2.0im, 6.0), Val(1), CartesianIndex(1)) @test array == [4.0, 2.0, 6.0] - @test get_struct(array, S) == (4.0 + 2.0im, 6.0) + @test get_struct(array, S, Val(1), CartesianIndex(1)) == (4.0 + 2.0im, 6.0) end @testset "IJFH" begin From 3a647f8a385e57e8b02676beecb7e8e0d8dcf279 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 23 Jan 2023 17:43:55 -0800 Subject: [PATCH 02/16] kill input_space --- src/Operators/spectralelement.jl | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index c4e99f5b0a..473a4e96e7 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -55,33 +55,23 @@ This is similar to a `Base.Broadcast.Broadcasted` object, except it contains spa This is returned by `Base.Broadcast.broadcasted(op::SpectralElementOperator)`. """ -struct SpectralBroadcasted{Style, Op, Args, Axes, InputSpace} <: +struct SpectralBroadcasted{Style, Op, Args, Axes} <: Base.AbstractBroadcasted op::Op args::Args axes::Axes - input_space::InputSpace end SpectralBroadcasted{Style}( op::Op, args::Args, axes::Axes = nothing, - input_space::InputSpace = nothing, -) where {Style, Op, Args, Axes, InputSpace} = - SpectralBroadcasted{Style, Op, Args, Axes, InputSpace}( +) where {Style, Op, Args, Axes} = + SpectralBroadcasted{Style, Op, Args, Axes}( op, args, axes, - input_space, ) -input_space(arg) = axes(arg) -input_space(::SpectralElementOperator, space) = space - -input_space(sbc::SpectralBroadcasted) = - isnothing(sbc.input_space) ? - input_space(sbc.op, tuplemap(axes, sbc.args)...) : sbc.input_space - return_space(::SpectralElementOperator, space) = space Base.axes(sbc::SpectralBroadcasted) = @@ -119,13 +109,8 @@ function Base.Broadcast.instantiate( axes = sbc.axes Base.Broadcast.check_broadcast_axes(axes, args...) end - if sbc.input_space isa Nothing - inspace = input_space(sbc) - else - inspace = sbc.input_space - end op = typeof(op)(axes) - return SpectralBroadcasted{Style}(op, args, axes, inspace) + return SpectralBroadcasted{Style}(op, args, axes) end function Base.Broadcast.instantiate( @@ -1049,7 +1034,6 @@ end # interplation / restriction abstract type TensorOperator <: SpectralElementOperator end -input_space(op::TensorOperator, inspace) = inspace return_space(op::TensorOperator, inspace) = op.space operator_return_eltype(::TensorOperator, ::Type{S}) where {S} = S From ea6941a889968c2b90ba6dff22ac774ff2688bfa Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Tue, 24 Jan 2023 16:04:18 -0800 Subject: [PATCH 03/16] switch to using Fields as temporaries --- src/DataLayouts/DataLayouts.jl | 30 ++++++- src/Operators/spectralelement.jl | 139 ++++++++++++++++--------------- 2 files changed, 101 insertions(+), 68 deletions(-) diff --git a/src/DataLayouts/DataLayouts.jl b/src/DataLayouts/DataLayouts.jl index dbec7a5a16..4ab7b8b528 100644 --- a/src/DataLayouts/DataLayouts.jl +++ b/src/DataLayouts/DataLayouts.jl @@ -15,7 +15,7 @@ indexes the underlying array as `[i,j,k,f,v,h]` module DataLayouts import Base: Base, @propagate_inbounds -import StaticArrays: SOneTo, MArray +import StaticArrays: SOneTo, MArray, SArray import ClimaComms import ..enable_threading, ..slab, ..slab_args, ..column, ..column_args, ..level @@ -628,6 +628,14 @@ end rebuild(data::IJF{S, Nij}, array::A) where {S, Nij, A <: AbstractArray} = IJF{S, Nij}(array) +function IJF{S, Nij}(::Type{MArray}, ::Type{T}) where {S, Nij, T} + Nf = typesize(T, S) + array = MArray{Tuple{Nij, Nij, Nf}, T, 3, Nij * Nij * Nf}(undef) + IJF{S, Nij}(array) +end +function SArray(ijf::IJF{S, Nij, <:MArray}) where {S, Nij} + IJF{S, Nij}(SArray(parent(ijf))) +end function replace_basetype(data::IJF{S, Nij}, ::Type{T}) where {S, Nij, T} array = parent(data) @@ -673,6 +681,9 @@ end data::IJF{S, Nij}, i::Integer, j::Integer, + k = nothing, + v = nothing, + h = nothing, ) where {S, Nij} @boundscheck (1 <= i <= Nij && 1 <= j <= Nij) || throw(BoundsError(data, (i, j))) @@ -752,6 +763,14 @@ function IF{S, Ni}(array::AbstractArray{T, 2}) where {S, Ni, T} check_basetype(T, S) IF{S, Ni, typeof(array)}(array) end +function IF{S, Ni}(::Type{MArray}, ::Type{T}) where {S, Ni, T} + Nf = typesize(T, S) + array = MArray{Tuple{Ni, Nf}, T, 2, Ni * Nf}(undef) + IF{S, Ni}(array) +end +function SArray(data::IF{S, Ni, <:MArray}) where {S, Ni} + IF{S, Ni}(SArray(parent(data))) +end function replace_basetype(data::IF{S, Ni}, ::Type{T}) where {S, Ni, T} array = parent(data) @@ -789,7 +808,14 @@ end IF{SS, Ni}(dataview) end -@inline function Base.getindex(data::IF{S, Ni}, i::Integer) where {S, Ni} +@inline function Base.getindex( + data::IF{S, Ni}, + i::Integer, + j = nothing, + k = nothing, + v = nothing, + h = nothing, +) where {S, Ni} @boundscheck (1 <= i <= Ni) || throw(BoundsError(data, (i,))) @inbounds get_struct(parent(data), S, Val(2), CartesianIndex(i, 1)) end diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index 473a4e96e7..80f547f8f8 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -55,8 +55,7 @@ This is similar to a `Base.Broadcast.Broadcasted` object, except it contains spa This is returned by `Base.Broadcast.broadcasted(op::SpectralElementOperator)`. """ -struct SpectralBroadcasted{Style, Op, Args, Axes} <: - Base.AbstractBroadcasted +struct SpectralBroadcasted{Style, Op, Args, Axes} <: Base.AbstractBroadcasted op::Op args::Args axes::Axes @@ -66,11 +65,7 @@ SpectralBroadcasted{Style}( args::Args, axes::Axes = nothing, ) where {Style, Op, Args, Axes} = - SpectralBroadcasted{Style, Op, Args, Axes}( - op, - args, - axes, - ) + SpectralBroadcasted{Style, Op, Args, Axes}(op, args, axes) return_space(::SpectralElementOperator, space) = space @@ -289,7 +284,10 @@ for m in methods(get_node) end Base.@propagate_inbounds function get_local_geometry( - space::Spaces.AbstractSpectralElementSpace, + space::Union{ + Spaces.AbstractSpectralElementSpace, + Spaces.ExtrudedFiniteDifferenceSpace, + }, ij::CartesianIndex{1}, slabidx, ) @@ -300,10 +298,13 @@ Base.@propagate_inbounds function get_local_geometry( else v = slabidx.v end - Spaces.local_geometry_data(space)[i, j, nothing, v, h] + Spaces.local_geometry_data(space)[i, nothing, nothing, v, h] end Base.@propagate_inbounds function get_local_geometry( - space::Spaces.AbstractSpectralElementSpace, + space::Union{ + Spaces.AbstractSpectralElementSpace, + Spaces.ExtrudedFiniteDifferenceSpace, + }, ij::CartesianIndex{2}, slabidx, ) @@ -409,8 +410,8 @@ function apply_operator(op::Divergence{(1,)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MVector{Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = IF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) @@ -429,7 +430,7 @@ function apply_operator(op::Divergence{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i] = RecursiveApply.rdiv(out[i], local_geometry.J) end - return SArray(out) + return Field(SArray(out), space) end function apply_operator(op::Divergence{(1, 2)}, space, slabidx, arg) @@ -439,8 +440,8 @@ function apply_operator(op::Divergence{(1, 2)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IJF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) @@ -467,7 +468,7 @@ function apply_operator(op::Divergence{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i, j] = RecursiveApply.rdiv(out[i, j], local_geometry.J) end - return SArray(out) + return Field(SArray(out), space) end @@ -522,8 +523,8 @@ function apply_operator(op::WeakDivergence{(1,)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MVector{Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) @@ -542,7 +543,7 @@ function apply_operator(op::WeakDivergence{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i] = RecursiveApply.rdiv(out[i], ⊟(local_geometry.WJ)) end - return SArray(out) + return Field(SArray(out), space) end function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) @@ -551,8 +552,9 @@ function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IJF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) + @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) @@ -579,7 +581,7 @@ function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i, j] = RecursiveApply.rdiv(out[i, j], ⊟(local_geometry.WJ)) end - return SArray(out) + return Field(SArray(out), space) end """ @@ -618,8 +620,8 @@ function apply_operator(op::Gradient{(1,)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MVector{Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) x = get_node(arg, ij, slabidx) @@ -628,7 +630,7 @@ function apply_operator(op::Gradient{(1,)}, space, slabidx, arg) out[ii] += ∂f∂ξ end end - return SArray(out) + return Field(SArray(out), space) end function apply_operator(op::Gradient{(1, 2)}, space, slabidx, arg) @@ -638,8 +640,9 @@ function apply_operator(op::Gradient{(1, 2)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IJF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) + @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) x = get_node(arg, ij, slabidx) @@ -652,7 +655,7 @@ function apply_operator(op::Gradient{(1, 2)}, space, slabidx, arg) out[i, jj] = out[i, jj] ⊞ ∂f∂ξ₂ end end - return SArray(out) + return Field(SArray(out), space) end @@ -704,8 +707,8 @@ function apply_operator(op::WeakGradient{(1,)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MVector{Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) @@ -722,7 +725,7 @@ function apply_operator(op::WeakGradient{(1,)}, space, slabidx, arg) W = local_geometry.WJ / local_geometry.J out[i] = RecursiveApply.rdiv(out[i], W) end - return SArray(out) + return Field(SArray(out), space) end function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) @@ -732,8 +735,9 @@ function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IJF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) + @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) @@ -754,7 +758,7 @@ function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) W = local_geometry.WJ / local_geometry.J out[i, j] = RecursiveApply.rdiv(out[i, j], W) end - return SArray(out) + return Field(SArray(out), space) end abstract type CurlSpectralElementOperator <: SpectralElementOperator end @@ -814,8 +818,8 @@ function apply_operator(op::Curl{(1,)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MVector{Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) if RT <: Geometry.Contravariant2Vector @inbounds for i in 1:Nq ij = CartesianIndex((i,)) @@ -835,7 +839,7 @@ function apply_operator(op::Curl{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i] = RecursiveApply.rdiv(out[i], local_geometry.J) end - return SArray(out) + return Field(SArray(out), space) end function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) @@ -845,8 +849,9 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IJF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) + # input data is a Covariant12Vector field if RT <: Geometry.Contravariant3Vector @inbounds for j in 1:Nq, i in 1:Nq @@ -892,7 +897,7 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i, j] = RecursiveApply.rdiv(out[i, j], local_geometry.J) end - return SArray(out) + return Field(SArray(out), space) end """ @@ -945,8 +950,8 @@ function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MVector{Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) # input data is a Covariant3Vector field if RT <: Geometry.Contravariant2Vector @inbounds for i in 1:Nq @@ -968,7 +973,7 @@ function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i] = RecursiveApply.rdiv(out[i], local_geometry.WJ) end - return SArray(out) + return Field(SArray(out), space) end function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) @@ -978,8 +983,9 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output RT = operator_return_eltype(op, eltype(arg)) - out = StaticArrays.MMatrix{Nq, Nq, RT}(undef) - DataLayouts._mzero!(out, FT) + out = DataLayouts.IJF{RT, Nq}(MArray, FT) + fill!(parent(out), zero(FT)) + # input data is a Covariant12Vector field if RT <: Geometry.Contravariant3Vector @inbounds for j in 1:Nq, i in 1:Nq @@ -1028,7 +1034,7 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) out[i, j] = RecursiveApply.rdiv(out[i, j], local_geometry.WJ) end - return SArray(out) + return Field(SArray(out), space) end # interplation / restriction @@ -1065,22 +1071,23 @@ function apply_operator(op::Interpolate{(1,)}, space_out, slabidx, arg) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) Imat = Quadratures.interpolation_matrix(FT, QS_out, QS_in) - S = eltype(arg) - slab_data_out = MVector{Nq_out, S}(undef) + RT = eltype(arg) + out = DataLayouts.IF{RT, Nq_out}(MArray, FT) @inbounds for i in 1:Nq_out # manually inlined rmatmul with slab_getnode - r = Imat[i, 1] ⊠ get_node(arg, CartesianIndex((1,)), slabidx) + ij = CartesianIndex((1,)) + r = Imat[i, 1] ⊠ get_node(arg, ij, slabidx) for ii in 2:Nq_in ij = CartesianIndex((ii,)) r = RecursiveApply.rmuladd( Imat[i, ii], - get_node(arg, ii, slabidx), + get_node(arg, ij, slabidx), r, ) end - slab_data_out[i] = r + out[i] = r end - return SArray(slab_data_out) + return Field(SArray(out), space_out) end function apply_operator(op::Interpolate{(1, 2)}, space_out, slabidx, arg) @@ -1091,10 +1098,10 @@ function apply_operator(op::Interpolate{(1, 2)}, space_out, slabidx, arg) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) Imat = Quadratures.interpolation_matrix(FT, QS_out, QS_in) - S = eltype(arg) + RT = eltype(arg) # temporary storage - temp = MArray{Tuple{Nq_out, Nq_in}, S, 2, Nq_out * Nq_in}(undef) - slab_data_out = MArray{Tuple{Nq_out, Nq_out}, S, 2, Nq_out * Nq_out}(undef) + temp = DataLayouts.IJF{RT, max(Nq_in, Nq_out)}(MArray, FT) + out = DataLayouts.IJF{RT, Nq_out}(MArray, FT) @inbounds for j in 1:Nq_in, i in 1:Nq_out # manually inlined rmatmul1 with slab get_node # we do this to remove one allocated intermediate array @@ -1111,9 +1118,9 @@ function apply_operator(op::Interpolate{(1, 2)}, space_out, slabidx, arg) temp[i, j] = r end @inbounds for j in 1:Nq_out, i in 1:Nq_out - slab_data_out[i, j] = RecursiveApply.rmatmul2(Imat, temp, i, j) + out[i, j] = RecursiveApply.rmatmul2(Imat, temp, i, j) end - return SMatrix(slab_data_out) + return Field(SArray(out), space_out) end @@ -1153,8 +1160,8 @@ function apply_operator(op::Restrict{(1,)}, space_out, slabidx, arg) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) ImatT = Quadratures.interpolation_matrix(FT, QS_in, QS_out)' # transpose - S = eltype(arg) - slab_data_out = MVector{Nq_out, S}(undef) + RT = eltype(arg) + out = IF{RT, Nq_out}(MArray, FT) @inbounds for i in 1:Nq_out # manually inlined rmatmul with slab get_node ij = CartesianIndex((1,)) @@ -1171,9 +1178,9 @@ function apply_operator(op::Restrict{(1,)}, space_out, slabidx, arg) end ij_out = CartesianIndex((i,)) WJ_out = get_local_geometry(space_out, ij_out, slabidx).WJ - slab_data_out[i] = RecursiveApply.rdiv(r, WJ_out) + out[i] = RecursiveApply.rdiv(r, WJ_out) end - return slab_data_out + return Field(SArray(out), space_out) end function apply_operator(op::Restrict{(1, 2)}, space_out, slabidx, arg) @@ -1184,10 +1191,10 @@ function apply_operator(op::Restrict{(1, 2)}, space_out, slabidx, arg) Nq_in = Quadratures.degrees_of_freedom(QS_in) Nq_out = Quadratures.degrees_of_freedom(QS_out) ImatT = Quadratures.interpolation_matrix(FT, QS_in, QS_out)' # transpose - S = eltype(arg) + RT = eltype(arg) # temporary storage - temp = MArray{Tuple{Nq_out, Nq_in}, S, 2, Nq_out * Nq_in}(undef) - slab_data_out = MArray{Tuple{Nq_out, Nq_out}, S, 2, Nq_out * Nq_out}(undef) + temp = DataLayouts.IJF{RT, max(Nq_in, Nq_out)}(MArray, FT) + out = DataLayouts.IJF{RT, Nq_out}(MArray, FT) @inbounds for j in 1:Nq_in, i in 1:Nq_out # manually inlined rmatmul1 with slab get_node ij = CartesianIndex((1, j)) @@ -1207,12 +1214,12 @@ function apply_operator(op::Restrict{(1, 2)}, space_out, slabidx, arg) @inbounds for j in 1:Nq_out, i in 1:Nq_out ij_out = CartesianIndex((i, j)) WJ_out = get_local_geometry(space_out, ij_out, slabidx).WJ - slab_data_out[i, j] = RecursiveApply.rdiv( + out[i, j] = RecursiveApply.rdiv( RecursiveApply.rmatmul2(ImatT, temp, i, j), WJ_out, ) end - return SMatrix(slab_data_out) + return Field(SArray(out), space_out) end From 05c69c1d631d3465078899f51fbe0b2366fabee5 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Wed, 25 Jan 2023 12:10:43 -0800 Subject: [PATCH 04/16] add doc --- src/Operators/spectralelement.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index 80f547f8f8..e90b6e5244 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -190,7 +190,14 @@ end resolve_operator(bc, slabidx) Recursively evaluate any operators in `bc` at `slabidx`, replacing any -`SpectralBroadcasted` objects with an `IF`/`IJF` object. +`SpectralBroadcasted` objects. + +- if `bc` is a regular `Broadcasted` object, return a new `Broadcasted` with `resolve_operator` called on each `arg` +- if `bc` is a regular `SpectralBroadcasted` object: + - call `resolve_operator` called on each `arg` + - call `apply_operator`, returning the resulting "pseudo Field": a `Field` with an + `IF`/`IJF` data object. +- if `bc` is a `Field`, return that """ function resolve_operator(bc::SpectralBroadcasted, slabidx) args = _resolve_operator_args(slabidx, bc.args...) From b2d25f9f18ea04fc0f66db26ab771573622585ae Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 26 Jan 2023 16:13:08 -0800 Subject: [PATCH 05/16] WIP: CUDA --- src/Device.jl | 5 + src/Fields/Fields.jl | 6 + src/Operators/Operators.jl | 3 +- src/Operators/spectralelement.jl | 98 ++++- src/Spaces/Spaces.jl | 2 +- src/Spaces/spectralelement.jl | 17 + src/Topologies/Topologies.jl | 1 + .../spectralelement/rectilinear_cuda.jl | 369 ++++++++++++++++++ 8 files changed, 494 insertions(+), 7 deletions(-) create mode 100644 test/Operators/spectralelement/rectilinear_cuda.jl diff --git a/src/Device.jl b/src/Device.jl index 980f8f0f1a..9d7507313f 100644 --- a/src/Device.jl +++ b/src/Device.jl @@ -8,4 +8,9 @@ device(; disablegpu = false) = device_array_type(::ClimaComms.CPU) = Array device_array_type(::ClimaComms.CUDA) = CUDA.CuArray +device(ctx::ClimaComms.SingletonCommsContext) = ctx.device + +device(::Array) = ClimaComms.CPU() +device(::CUDA.CuArray) = ClimaComms.CUDA() + end diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index 479b0b2d7c..de611d246e 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -12,6 +12,7 @@ import ..Utilities: PlusHalf, half using ..RecursiveApply using ClimaComms +import Adapt import StaticArrays, LinearAlgebra, Statistics, InteractiveUtils @@ -48,6 +49,11 @@ comm_context(topology::Topologies.Topology2D) = topology.context comm_context(topology::T) where {T <: Topologies.AbstractTopology} = ClimaComms.SingletonCommsContext() +Adapt.adapt_structure(to, field::Field) = + Field( + Adapt.adapt_structure(to, Fields.field_values(field)), + Adapt.adapt_structure(to, axes(field)), + ) # Point Field diff --git a/src/Operators/Operators.jl b/src/Operators/Operators.jl index 8ed202c589..bc6bc721b0 100644 --- a/src/Operators/Operators.jl +++ b/src/Operators/Operators.jl @@ -2,9 +2,10 @@ module Operators import LinearAlgebra -using StaticArrays +using StaticArrays, CUDA import ..enable_threading, ..slab, ..slab_args, ..column, ..column_args +import ..Device import ..DataLayouts: DataLayouts, Data2D, DataSlab2D import ..Geometry: Geometry, Covariant12Vector, Contravariant12Vector, ⊗ import ..Spaces: Spaces, Quadratures, AbstractSpace diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index e90b6e5244..649e0a3f9c 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -4,6 +4,12 @@ struct SpectralStyle <: AbstractSpectralStyle end struct CompositeSpectralStyle <: AbstractSpectralStyle end +struct CUDASpectralStyle <: AbstractSpectralStyle end + +import ClimaComms +AbstractSpectralStyle(::ClimaComms.CPU) = SpectralStyle +AbstractSpectralStyle(::ClimaComms.CUDA) = CUDASpectralStyle + """ SpectralElementOperator @@ -55,17 +61,19 @@ This is similar to a `Base.Broadcast.Broadcasted` object, except it contains spa This is returned by `Base.Broadcast.broadcasted(op::SpectralElementOperator)`. """ -struct SpectralBroadcasted{Style, Op, Args, Axes} <: Base.AbstractBroadcasted +struct SpectralBroadcasted{Style, Op, Args, Axes, Work} <: Base.AbstractBroadcasted op::Op args::Args axes::Axes + work::Work end SpectralBroadcasted{Style}( op::Op, args::Args, axes::Axes = nothing, -) where {Style, Op, Args, Axes} = - SpectralBroadcasted{Style, Op, Args, Axes}(op, args, axes) + work::Work = nothing, +) where {Style, Op, Args, Axes, Work} = + SpectralBroadcasted{Style, Op, Args, Axes, Work}(op, args, axes, work) return_space(::SpectralElementOperator, space) = space @@ -92,8 +100,8 @@ Base.eltype(sbc::SpectralBroadcasted) = @inline instantiate_args(::Tuple{}) = () function Base.Broadcast.instantiate( - sbc::SpectralBroadcasted{Style}, -) where {Style} + sbc::SpectralBroadcasted, +) op = sbc.op # recursively instantiate the arguments to allocate intermediate work arrays args = instantiate_args(sbc.args) @@ -105,9 +113,21 @@ function Base.Broadcast.instantiate( Base.Broadcast.check_broadcast_axes(axes, args...) end op = typeof(op)(axes) + Style = AbstractSpectralStyle(Device.device(axes)) return SpectralBroadcasted{Style}(op, args, axes) end +import Adapt + +Adapt.adapt_structure(to, sbc::SpectralBroadcasted{Style}) where {Style} = + SpectralBroadcasted{Style}( + sbc.op, + map(arg -> Adapt.adapt_structure(to, arg), sbc.args), + Adapt.adapt_structure(to, sbc.axes), + ) + + + function Base.Broadcast.instantiate( bc::Base.Broadcast.Broadcasted{Style}, ) where {Style <: AbstractSpectralStyle} @@ -155,6 +175,39 @@ function Base.copyto!(out::Field, sbc::SpectralBroadcasted) return out end +function Base.copyto!(out::Field, sbc::SpectralBroadcasted{CUDASpectralStyle}) + space = axes(out) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + Nh = Topologies.nlocalelems(Spaces.topology(space)) + Nv = Spaces.nlevels(space) + + @cuda threads=(Nq,Nq) blocks=(Nv,Nh) copyto_kernel!(out, sbc) + return out +end + + +function copyto_kernel!(out, sbc) + i = threadIdx().x + j = threadIdx().y + #if axes(out) isa Spaces.AbstractSpectralElementSpace + v = nothing + #elseif axes(out) isa Spaces.CenterExtrudedFiniteDifferenceSpace + # v = blockIdx().x + #elseif axes(out) isa Spaces.FaceExtrudedFiniteDifferenceSpace + # v = blockIdx().x - half + #end + h = blockIdx().y + ij = CartesianIndex((i,j)) + slabidx = Fields.SlabIndex{Nothing}(v,h) + + result = apply_operator_kernel(sbc.op, axes(out), ij, slabidx, sbc.args...) + set_node!(out, ij, slabidx, result) + return nothing +end + + + function Base.Broadcast.materialize!(dest, sbc::SpectralBroadcasted) copyto!(dest, Base.Broadcast.instantiate(sbc)) end @@ -666,6 +719,41 @@ function apply_operator(op::Gradient{(1, 2)}, space, slabidx, arg) end + +function apply_operator_kernel(op::Gradient{(1, 2)}, space, ij, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + # allocate temp output + IT = eltype(arg) + Nf = DataLayouts.typesize(FT, IT) + array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work = IJF{IT, Nq}(array) + i,j = ij.I + work[i,j] = get_node(arg, ij, slabidx) + CUDA.sync_threads() + + ∂f∂ξ₁ = D[i,1] * work[1,j] + ∂f∂ξ₂ = D[j,1] * work[i,1] + for k = 2:Nq + ∂f∂ξ₁ += D[i,k] * work[k,j] + ∂f∂ξ₂ += D[j,k] * work[i,k] + end + return Geometry.Covariant12Vector(∂f∂ξ₁,∂f∂ξ₂) +end + +function allocate_shared(op::Gradient{(1,2)}, arg) + S = eltype(arg) + space = axes(arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + Nf = DataLayouts.typesize(FT, S) + array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + return IJF{S, Nq}(array) +end + """ wgrad = WeakGradient() wgrad.(f) diff --git a/src/Spaces/Spaces.jl b/src/Spaces/Spaces.jl index 2c8e0318b4..ecd6c4f25f 100644 --- a/src/Spaces/Spaces.jl +++ b/src/Spaces/Spaces.jl @@ -22,7 +22,7 @@ import ..slab, ..column, ..level import ..Utilities: PlusHalf import ..DataLayouts, ..Geometry, ..Domains, ..Meshes, ..Topologies import ..Device -using StaticArrays, ForwardDiff, LinearAlgebra, UnPack +using StaticArrays, ForwardDiff, LinearAlgebra, UnPack, Adapt abstract type AbstractSpace end diff --git a/src/Spaces/spectralelement.jl b/src/Spaces/spectralelement.jl index e803fdc34b..afa1da45b8 100644 --- a/src/Spaces/spectralelement.jl +++ b/src/Spaces/spectralelement.jl @@ -20,6 +20,7 @@ function Base.show(io::IO, space::AbstractSpectralElementSpace) print(iio, " "^(indent + 2), space.quadrature_style) end +Device.device(space::AbstractSpectralElementSpace) = Device.device(topology(space)) topology(space::AbstractSpectralElementSpace) = space.topology quadrature_style(space::AbstractSpectralElementSpace) = space.quadrature_style @@ -175,6 +176,21 @@ struct SpectralElementSpace2D{ boundary_surface_geometries::BS end +Adapt.adapt_structure(to, space::SpectralElementSpace2D) = +SpectralElementSpace2D( + nothing, + Adapt.adapt_structure(to, space.quadrature_style), + Adapt.adapt_structure(to, space.global_geometry), + Adapt.adapt_structure(to, space.local_geometry), + Adapt.adapt_structure(to, space.ghost_geometry), + Adapt.adapt_structure(to, space.local_dss_weights), + Adapt.adapt_structure(to, space.ghost_dss_weights), + Adapt.adapt_structure(to, space.internal_surface_geometry), + Adapt.adapt_structure(to, space.boundary_surface_geometries), +) + + + """ SpectralElementSpace2D(topology, quadrature_style; enable_bubble) @@ -772,3 +788,4 @@ function Base.iterate( return ((i, j), e), ((i, j), e) end end + diff --git a/src/Topologies/Topologies.jl b/src/Topologies/Topologies.jl index 5de54430b7..6dda097e41 100644 --- a/src/Topologies/Topologies.jl +++ b/src/Topologies/Topologies.jl @@ -2,6 +2,7 @@ module Topologies using DocStringExtensions +import ..Device import ..Geometry import ..Domains: Domains, coordinate_type import ..Meshes: Meshes, domain, coordinates diff --git a/test/Operators/spectralelement/rectilinear_cuda.jl b/test/Operators/spectralelement/rectilinear_cuda.jl new file mode 100644 index 0000000000..bceec3c120 --- /dev/null +++ b/test/Operators/spectralelement/rectilinear_cuda.jl @@ -0,0 +1,369 @@ +using Test +using StaticArrays +using ClimaComms, ClimaCore +import ClimaCore: + Geometry, Fields, Domains, Topologies, Meshes, Spaces, Operators +using LinearAlgebra, IntervalSets +using CUDA + +FT = Float64 +domain = Domains.RectangleDomain( + Geometry.XPoint{FT}(-pi) .. Geometry.XPoint{FT}(pi), + Geometry.YPoint{FT}(-pi) .. Geometry.YPoint{FT}(pi); + x1periodic = true, + x2periodic = true, +) + +Nq = 5 +quad = Spaces.Quadratures.GLL{Nq}() + +grid_mesh = Meshes.RectilinearMesh(domain, 17, 16) + + +grid_topology_cpu = + Topologies.Topology2D(ClimaComms.SingletonCommsContext(ClimaCore.Device.device(;disablegpu=true)), grid_mesh) +grid_space_cpu = Spaces.SpectralElementSpace2D(grid_topology_cpu, quad) +coords_cpu = Fields.coordinate_field(grid_space_cpu) + +f_cpu = sin.(coords_cpu.x .+ 2 .* coords_cpu.y) + +grad = Operators.Gradient() +gradf_cpu = grad.(f_cpu) + + +grid_topology = + Topologies.Topology2D(ClimaComms.SingletonCommsContext(ClimaCore.Device.device()), grid_mesh) +grid_space = Spaces.SpectralElementSpace2D(grid_topology, quad) +coords = Fields.coordinate_field(grid_space) + +CUDA.allowscalar(false) +f = sin.(coords.x .+ 2 .* coords.y) + +grad = Operators.Gradient() +gradf = grad.(f) + + + + + + + +#== + +ts_mesh = Meshes.RectilinearMesh(domain, 17, 16) +ts_topology = Topologies.Topology2D(ClimaComms.SingletonCommsContext(), ts_mesh) +ts_space = Spaces.SpectralElementSpace2D(ts_topology, quad) +ts_coords = Fields.coordinate_field(ts_space) + +grid_test_setup = (grid_topology, grid_space, grid_coords) +ts_test_setup = (ts_topology, ts_space, ts_coords) + +@testset "interpolate / restrict" begin + + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + INq = 9 + Iquad = Spaces.Quadratures.GLL{INq}() + Ispace = Spaces.SpectralElementSpace2D(topology, Iquad) + + I = Operators.Interpolate(Ispace) + R = Operators.Restrict(space) + + f = sin.(coords.x .+ 2 .* coords.y) + + interpolated_field = I.(f) + Spaces.weighted_dss2!(interpolated_field) + + @test axes(interpolated_field).quadrature_style == Iquad + @test axes(interpolated_field).topology == topology + + restrict_field = R.(f) + Spaces.weighted_dss2!(restrict_field) + + @test axes(restrict_field).quadrature_style == quad + @test axes(restrict_field).topology == topology + + interp_restrict_field = R.(I.(f)) + Spaces.weighted_dss2!(interp_restrict_field) + + @test axes(interp_restrict_field).quadrature_style == quad + @test axes(interp_restrict_field).topology == topology + + @test norm(interp_restrict_field .- f) ≤ 3.0e-4 + end +end + +@testset "gradient" begin + + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + f = sin.(coords.x .+ 2 .* coords.y) + + grad = Operators.Gradient() + gradf = grad.(f) + Spaces.weighted_dss2!(gradf) + + @test gradf ≈ + Geometry.Covariant12Vector.( + Geometry.UVVector.( + cos.(coords.x .+ 2 .* coords.y), + 2 .* cos.(coords.x .+ 2 .* coords.y), + ), + ) rtol = 1e-2 + + fv = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(coords.x .+ 2 .* coords.y), + ) + gradfv = Geometry.transform.(Ref(Geometry.UVAxis()), grad.(fv)) + Spaces.weighted_dss2!(gradfv) + @test eltype(gradfv) <: Geometry.Axis2Tensor + end +end + + +@testset "weak gradient" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + f = sin.(coords.x .+ 2 .* coords.y) + + wgrad = Operators.WeakGradient() + gradf = wgrad.(f) + Spaces.weighted_dss2!(gradf) + + @test Geometry.UVVector.(gradf) ≈ + Geometry.UVVector.( + cos.(coords.x .+ 2 .* coords.y), + 2 .* cos.(coords.x .+ 2 .* coords.y), + ) rtol = 1e-2 + end +end + +@testset "curl" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(3 .* coords.x .+ 4 .* coords.y), + ) + + curl = Operators.Curl() + curlv = curl.(Geometry.Covariant12Vector.(v)) + Spaces.weighted_dss2!(curlv) + curlv_ref = + Geometry.Contravariant3Vector.( + .-3 .* sin.(3 .* coords.x .+ 4 .* coords.y) .- + 2 .* cos.(coords.x .+ 2 .* coords.y), + ) + + @test curlv ≈ curlv_ref rtol = 1e-2 + end +end + +@testset "curl-curl" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(3 .* coords.x .+ 2 .* coords.y), + ) + curlv_ref = + .-3 .* sin.(3 .* coords.x .+ 2 .* coords.y) .- + 2 .* cos.(coords.x .+ 2 .* coords.y) + curlcurlv_ref1 = + .-6 .* cos.(3 .* coords.x .+ 2 .* coords.y) .+ + 4 .* sin.(coords.x .+ 2 .* coords.y) + curlcurlv_ref2 = + 9 .* cos.(3 .* coords.x .+ 2 .* coords.y) .- + 2 .* sin.(coords.x .+ 2 .* coords.y) + + curl = Operators.Curl() + curlcurlv = + curl.( + Geometry.Covariant3Vector.( + curl.(Geometry.Covariant12Vector.(v)), + ), + ) + Spaces.weighted_dss2!(curlcurlv) + + @test Geometry.UVVector.(curlcurlv) ≈ + Geometry.UVVector.(curlcurlv_ref1, curlcurlv_ref2) rtol = 4e-2 + end +end + +@testset "weak curl-strong curl" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(3 .* coords.x .+ 2 .* coords.y), + ) + curlv_ref = + .-3 .* sin.(3 .* coords.x .+ 2 .* coords.y) .- + 2 .* cos.(coords.x .+ 2 .* coords.y) + curlcurlv_ref1 = + .-6 .* cos.(3 .* coords.x .+ 2 .* coords.y) .+ + 4 .* sin.(coords.x .+ 2 .* coords.y) + curlcurlv_ref2 = + 9 .* cos.(3 .* coords.x .+ 2 .* coords.y) .- + 2 .* sin.(coords.x .+ 2 .* coords.y) + + curl = Operators.Curl() + wcurl = Operators.WeakCurl() + curlcurlv = + Geometry.UVVector.( + wcurl.( + Geometry.Covariant3Vector.( + curl.(Geometry.Covariant12Vector.(v)), + ), + ), + ) + Spaces.weighted_dss2!(curlcurlv) + + @test curlcurlv ≈ Geometry.UVVector.(curlcurlv_ref1, curlcurlv_ref2) rtol = + 4e-2 + end +end + +@testset "weak curl" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(3 .* coords.x .+ 4 .* coords.y), + ) + + wcurl = Operators.WeakCurl() + curlv = wcurl.(Geometry.Covariant12Vector.(v)) + Spaces.weighted_dss2!(curlv) + curlv_ref = + .-3 .* sin.(3 .* coords.x .+ 4 .* coords.y) .- + 2 .* cos.(coords.x .+ 2 .* coords.y) + + @test curlv ≈ Geometry.Contravariant3Vector.(curlv_ref) rtol = 1e-2 + end +end + +@testset "div" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(3 .* coords.x .+ 2 .* coords.y), + ) + + div = Operators.Divergence() + divv = div.(v) + Spaces.weighted_dss2!(divv) + divv_ref = + cos.(coords.x .+ 2 .* coords.y) .- + 2 .* sin.(3 .* coords.x .+ 2 .* coords.y) + + @test divv ≈ divv_ref rtol = 1e-2 + end +end + + +@testset "weak div" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = + Geometry.UVVector.( + sin.(coords.x .+ 2 .* coords.y), + cos.(3 .* coords.x .+ 2 .* coords.y), + ) + + wdiv = Operators.WeakDivergence() + divv = wdiv.(v) + Spaces.weighted_dss2!(divv) + divv_ref = + cos.(coords.x .+ 2 .* coords.y) .- + 2 .* sin.(3 .* coords.x .+ 2 .* coords.y) + + @test divv ≈ divv_ref rtol = 1e-2 + end +end + + +@testset "annhilator property: curl-grad" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + f = sin.(coords.x .+ 2 .* coords.y) + + grad = Operators.Gradient() + gradf = grad.(f) + Spaces.weighted_dss2!(gradf) + + curl = Operators.Curl() + curlgradf = curl.(gradf) + Spaces.weighted_dss2!(curlgradf) + + @test norm(curlgradf) < 1e-12 + end +end + +@testset "annhilator property: div-curl" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + v = Geometry.Covariant3Vector.(sin.(coords.x .+ 2 .* coords.y)) + curl = Operators.Curl() + curlv = curl.(v) + Spaces.weighted_dss2!(curlv) + + div = Operators.Divergence() + divcurlv = div.(curlv) + Spaces.weighted_dss2!(divcurlv) + + @test norm(divcurlv) < 1e-12 + end +end + +@testset "scalar hyperdiffusion" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + k = 2 + l = 3 + y = @. sin(k * coords.x + l * coords.y) + ∇⁴y_ref = @. (k^2 + l^2)^2 * sin(k * coords.x + l * coords.y) + + wdiv = Operators.WeakDivergence() + grad = Operators.Gradient() + χ = Spaces.weighted_dss2!(@. wdiv(grad(y))) + ∇⁴y = Spaces.weighted_dss2!(@. wdiv(grad(χ))) + + @test ∇⁴y_ref ≈ ∇⁴y rtol = 2e-2 + end +end + +@testset "vector hyperdiffusion" begin + for (topology, space, coords) in (grid_test_setup, ts_test_setup) + k = 2 + l = 3 + y = @. Geometry.UVVector(sin(k * coords.x + l * coords.y), 0.0) + ∇⁴y_ref = @. Geometry.UVVector( + (k^2 + l^2)^2 * sin(k * coords.x + l * coords.y), + 0.0, + ) + curl = Operators.Curl() + wcurl = Operators.WeakCurl() + + sdiv = Operators.Divergence() + wgrad = Operators.WeakGradient() + + χ = Spaces.weighted_dss2!( + @. Geometry.UVVector(wgrad(sdiv(y))) - Geometry.UVVector( + wcurl( + Geometry.Covariant3Vector( + curl(Geometry.Covariant12Vector(y)), + ), + ), + ) + ) + ∇⁴y = Spaces.weighted_dss2!( + @. Geometry.UVVector(wgrad(sdiv(χ))) - Geometry.UVVector( + wcurl( + Geometry.Covariant3Vector( + curl(Geometry.Covariant12Vector(χ)), + ), + ), + ) + ) + + @test ∇⁴y_ref ≈ ∇⁴y rtol = 2e-2 + end +end +==# \ No newline at end of file From b22119503795a4e47361a8daa507305130f50773 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Wed, 1 Feb 2023 16:56:30 -0800 Subject: [PATCH 06/16] so close --- src/Fields/Fields.jl | 4 +- src/Fields/broadcast.jl | 8 + src/Fields/indices.jl | 4 +- src/Operators/Operators.jl | 4 +- src/Operators/spectralelement.jl | 275 +++++++++++++++++++++---------- src/Spaces/spectralelement.jl | 18 +- 6 files changed, 211 insertions(+), 102 deletions(-) diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index de611d246e..11a22e0bd8 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -51,8 +51,8 @@ comm_context(topology::T) where {T <: Topologies.AbstractTopology} = Adapt.adapt_structure(to, field::Field) = Field( - Adapt.adapt_structure(to, Fields.field_values(field)), - Adapt.adapt_structure(to, axes(field)), + Adapt.adapt(to, Fields.field_values(field)), + Adapt.adapt(to, axes(field)), ) diff --git a/src/Fields/broadcast.jl b/src/Fields/broadcast.jl index fc0d247fa2..55c830ab50 100644 --- a/src/Fields/broadcast.jl +++ b/src/Fields/broadcast.jl @@ -30,6 +30,14 @@ Base.Broadcast.BroadcastStyle( Base.Broadcast.broadcastable(field::Field) = field +function Adapt.adapt_structure(to, bc::Base.Broadcast.Broadcasted{Style}) where {Style<:AbstractFieldStyle} + Base.Broadcast.Broadcasted{Style}( + Adapt.adapt(to, bc.f), + Adapt.adapt(to, bc.args), + Adapt.adapt(to, bc.axes), + ) +end + Base.eltype(bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}) = Base.Broadcast.combine_eltypes(bc.f, bc.args) diff --git a/src/Fields/indices.jl b/src/Fields/indices.jl index be0ba2da85..a7c57ee003 100644 --- a/src/Fields/indices.jl +++ b/src/Fields/indices.jl @@ -131,9 +131,9 @@ end -struct SlabIndex{VIdx} +struct SlabIndex{VIdx,HIdx} v::VIdx - h::Int + h::HIdx end diff --git a/src/Operators/Operators.jl b/src/Operators/Operators.jl index bc6bc721b0..b25ce61806 100644 --- a/src/Operators/Operators.jl +++ b/src/Operators/Operators.jl @@ -1,9 +1,11 @@ module Operators -import LinearAlgebra +import LinearAlgebra, Adapt using StaticArrays, CUDA +import Base.Broadcast: Broadcasted + import ..enable_threading, ..slab, ..slab_args, ..column, ..column_args import ..Device import ..DataLayouts: DataLayouts, Data2D, DataSlab2D diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index 649e0a3f9c..2ab94d6847 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -1,15 +1,34 @@ abstract type AbstractSpectralStyle <: Fields.AbstractFieldStyle end +""" + SpectralStyle() + +Broadcasting requires use of spectral-element operations. +""" struct SpectralStyle <: AbstractSpectralStyle end -struct CompositeSpectralStyle <: AbstractSpectralStyle end +""" + SlabBlockSpectralStyle() + +Applies spectral-element operations using by making use of intermediate +temporaries for each operator. This is used for CPU kernels. +""" +struct SlabBlockSpectralStyle <: AbstractSpectralStyle end + +""" + CUDASpectralStyle() +Applies spectral-element operations by using threads for each node, and +synchronizing when they occur. This is used for GPU kernels. +""" struct CUDASpectralStyle <: AbstractSpectralStyle end + import ClimaComms -AbstractSpectralStyle(::ClimaComms.CPU) = SpectralStyle +AbstractSpectralStyle(::ClimaComms.CPU) = SlabBlockSpectralStyle AbstractSpectralStyle(::ClimaComms.CUDA) = CUDASpectralStyle + """ SpectralElementOperator @@ -75,6 +94,13 @@ SpectralBroadcasted{Style}( ) where {Style, Op, Args, Axes, Work} = SpectralBroadcasted{Style, Op, Args, Axes, Work}(op, args, axes, work) +Adapt.adapt_structure(to, sbc::SpectralBroadcasted{Style}) where {Style} = + SpectralBroadcasted{Style}( + sbc.op, + Adapt.adapt(to, sbc.args), + Adapt.adapt(to, sbc.axes), + ) + return_space(::SpectralElementOperator, space) = space Base.axes(sbc::SpectralBroadcasted) = @@ -117,20 +143,9 @@ function Base.Broadcast.instantiate( return SpectralBroadcasted{Style}(op, args, axes) end -import Adapt - -Adapt.adapt_structure(to, sbc::SpectralBroadcasted{Style}) where {Style} = - SpectralBroadcasted{Style}( - sbc.op, - map(arg -> Adapt.adapt_structure(to, arg), sbc.args), - Adapt.adapt_structure(to, sbc.axes), - ) - - - function Base.Broadcast.instantiate( - bc::Base.Broadcast.Broadcasted{Style}, -) where {Style <: AbstractSpectralStyle} + bc::Base.Broadcast.Broadcasted{<:AbstractSpectralStyle}, +) # recursively instantiate the arguments to allocate intermediate work arrays args = instantiate_args(bc.args) # axes: same logic as Broadcasted @@ -140,6 +155,7 @@ function Base.Broadcast.instantiate( axes = bc.axes Base.Broadcast.check_broadcast_axes(axes, args...) end + Style = AbstractSpectralStyle(Device.device(axes)) return Base.Broadcast.Broadcasted{Style}(bc.f, args, axes) end @@ -168,60 +184,19 @@ function Base.Broadcast.materialize(sbc::SpectralBroadcasted) copy(Base.Broadcast.instantiate(sbc)) end -function Base.copyto!(out::Field, sbc::SpectralBroadcasted) - Fields.byslab(axes(out)) do slabidx - copyto_slab!(out, sbc, slabidx) - end - return out -end - -function Base.copyto!(out::Field, sbc::SpectralBroadcasted{CUDASpectralStyle}) - space = axes(out) - QS = Spaces.quadrature_style(space) - Nq = Quadratures.degrees_of_freedom(QS) - Nh = Topologies.nlocalelems(Spaces.topology(space)) - Nv = Spaces.nlevels(space) - - @cuda threads=(Nq,Nq) blocks=(Nv,Nh) copyto_kernel!(out, sbc) - return out -end - - -function copyto_kernel!(out, sbc) - i = threadIdx().x - j = threadIdx().y - #if axes(out) isa Spaces.AbstractSpectralElementSpace - v = nothing - #elseif axes(out) isa Spaces.CenterExtrudedFiniteDifferenceSpace - # v = blockIdx().x - #elseif axes(out) isa Spaces.FaceExtrudedFiniteDifferenceSpace - # v = blockIdx().x - half - #end - h = blockIdx().y - ij = CartesianIndex((i,j)) - slabidx = Fields.SlabIndex{Nothing}(v,h) - - result = apply_operator_kernel(sbc.op, axes(out), ij, slabidx, sbc.args...) - set_node!(out, ij, slabidx, result) - return nothing -end - - - function Base.Broadcast.materialize!(dest, sbc::SpectralBroadcasted) copyto!(dest, Base.Broadcast.instantiate(sbc)) end -function Base.copyto!( - out::Field, - bc::Base.Broadcast.Broadcasted{Style}, -) where {Style <: AbstractSpectralStyle} +# Functions for SlabBlockSpectralStyle +function Base.copyto!(out::Field, sbc::Union{SpectralBroadcasted{SlabBlockSpectralStyle}, Broadcasted{SlabBlockSpectralStyle}}) Fields.byslab(axes(out)) do slabidx - copyto_slab!(out, bc, slabidx) + copyto_slab!(out, sbc, slabidx) end return out end + """ copyto_slab!(out, bc, slabidx) @@ -238,7 +213,6 @@ function copyto_slab!(out, bc, slabidx) return nothing end - """ resolve_operator(bc, slabidx) @@ -252,16 +226,16 @@ Recursively evaluate any operators in `bc` at `slabidx`, replacing any `IF`/`IJF` data object. - if `bc` is a `Field`, return that """ -function resolve_operator(bc::SpectralBroadcasted, slabidx) +function resolve_operator(bc::SpectralBroadcasted{SlabBlockSpectralStyle}, slabidx) args = _resolve_operator_args(slabidx, bc.args...) apply_operator(bc.op, bc.axes, slabidx, args...) end function resolve_operator( - bc::Base.Broadcast.Broadcasted{CompositeSpectralStyle}, + bc::Base.Broadcast.Broadcasted{SlabBlockSpectralStyle}, slabidx, ) args = _resolve_operator_args(slabidx, bc.args...) - Base.Broadcast.Broadcasted{CompositeSpectralStyle}(bc.f, args, bc.axes) + Base.Broadcast.Broadcasted{SlabBlockSpectralStyle}(bc.f, args, bc.axes) end resolve_operator(x, slabidx) = x @@ -277,6 +251,34 @@ _resolve_operator_args(slabidx) = () ) +# Functions for CUDASpectralStyle +function Base.copyto!(out::Field, sbc::Union{SpectralBroadcasted{CUDASpectralStyle}, Broadcasted{CUDASpectralStyle}}) + space = axes(out) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + Nh = Topologies.nlocalelems(Spaces.topology(space)) + Nv = Spaces.nlevels(space) + + @cuda threads=(Nq,Nq) blocks=(Nv,Nh) copyto_kernel!(out, sbc) + return out +end + +function copyto_kernel!(out::Fields.SpectralElementField2D, sbc) + i = threadIdx().x + j = threadIdx().y + v = nothing + h = blockIdx().y + ij = CartesianIndex((i,j)) + slabidx = Fields.SlabIndex(v,h) + result = get_node(sbc, ij, slabidx) + set_node!(out, ij, slabidx, result) + return nothing +end + +function get_node(sbc::SpectralBroadcasted{CUDASpectralStyle}, ij, slabidx) + apply_operator_kernel(sbc.op, axes(sbc), ij, slabidx, _get_node(ij, slabidx, sbc.args...)...) +end + _get_node(ij, slabidx) = () _get_node(ij, slabidx, arg, xargs...) = @@ -410,18 +412,15 @@ Base.@propagate_inbounds function set_node!( end -function Base.Broadcast.BroadcastStyle( - ::Type{SB}, -) where {SB <: SpectralBroadcasted} - CompositeSpectralStyle() -end +Base.Broadcast.BroadcastStyle( + ::Type{<:SpectralBroadcasted{Style}}, +) where {Style} = + Style() -function Base.Broadcast.BroadcastStyle( - ::CompositeSpectralStyle, +Base.Broadcast.BroadcastStyle( + style::AbstractSpectralStyle, ::Fields.AbstractFieldStyle, -) - CompositeSpectralStyle() -end +) = style @@ -532,6 +531,46 @@ function apply_operator(op::Divergence{(1, 2)}, space, slabidx, arg) end +function apply_operator_kernel(op::Divergence{(1, 2)}, space, ij, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + local_geometry = get_local_geometry(space, ij, slabidx) + + # allocate temp output + RT = operator_return_eltype(op, typeof(arg)) + Nf = DataLayouts.typesize(FT, RT) + work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + Jv¹ = IJF{RT, Nq}(work1_array) + Jv² = IJF{RT, Nq}(work1_array) + i,j = ij.I + + + Jv¹[i,j] = local_geometry.J ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant1(v, local_geometry), + arg, + ) + Jv²[i,j] = local_geometry.J ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant2(v, local_geometry), + arg, + ) + + CUDA.sync_threads() + + DJv = D[i,1] ⊠ Jv¹[1,j] + for k = 2:Nq + DJv = DJv ⊞ D[i,k] ⊠ Jv¹[k,j] + end + for k = 1:Nq + DJv = DJv ⊞ D[j,k] ⊠ Jv²[i,k] + end + return RecursiveApply.rdiv(DJv, local_geometry.J) +end + + """ wdiv = WeakDivergence() wdiv.(u) @@ -644,6 +683,47 @@ function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end + +function apply_operator_kernel(op::WeakDivergence{(1, 2)}, space, ij, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + local_geometry = get_local_geometry(space, ij, slabidx) + + # allocate temp output + RT = operator_return_eltype(op, typeof(arg)) + Nf = DataLayouts.typesize(FT, RT) + work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + WJv¹ = IJF{RT, Nq}(work1_array) + WJv² = IJF{RT, Nq}(work1_array) + i,j = ij.I + + + WJv¹[i,j] = local_geometry.WJ ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant1(v, local_geometry), + arg, + ) + WJv²[i,j] = local_geometry.WJ ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant2(v, local_geometry), + arg, + ) + + CUDA.sync_threads() + + DᵀWJv = D[1,i] ⊠ WJv¹[1,j] + for k = 2:Nq + DᵀWJv = DᵀWJv ⊞ D[k,i] ⊠ WJv¹[k,j] + end + for k = 1:Nq + DᵀWJv = DᵀWJv ⊞ D[k,j] ⊠ WJv²[i,k] + end + return ⊟(RecursiveApply.rdiv(DᵀWJv, local_geometry.WJ)) +end + + """ grad = Gradient() grad.(f) @@ -718,20 +798,19 @@ function apply_operator(op::Gradient{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end - - function apply_operator_kernel(op::Gradient{(1, 2)}, space, ij, slabidx, arg) FT = Spaces.undertype(space) QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) # allocate temp output - IT = eltype(arg) + IT = typeof(arg) Nf = DataLayouts.typesize(FT, IT) array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) work = IJF{IT, Nq}(array) i,j = ij.I - work[i,j] = get_node(arg, ij, slabidx) + work[i,j] = arg + CUDA.sync_threads() ∂f∂ξ₁ = D[i,1] * work[1,j] @@ -743,16 +822,6 @@ function apply_operator_kernel(op::Gradient{(1, 2)}, space, ij, slabidx, arg) return Geometry.Covariant12Vector(∂f∂ξ₁,∂f∂ξ₂) end -function allocate_shared(op::Gradient{(1,2)}, arg) - S = eltype(arg) - space = axes(arg) - FT = Spaces.undertype(space) - QS = Spaces.quadrature_style(space) - Nq = Quadratures.degrees_of_freedom(QS) - Nf = DataLayouts.typesize(FT, S) - array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) - return IJF{S, Nq}(array) -end """ wgrad = WeakGradient() @@ -856,6 +925,36 @@ function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end +function apply_operator_kernel(op::WeakGradient{(1, 2)}, space, ij, slabidx, arg) + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + + # allocate temp output + IT = typeof(arg) + Nf = DataLayouts.typesize(FT, IT) + work_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + Wf = IJF{IT, Nq}(work_array) + i,j = ij.I + Wf[i,j] = W ⊠ arg + + CUDA.sync_threads() + + Dᵀ₁Wf = D[1,i] * work[1,j] + Dᵀ₂Wf = D[1,j] * work[i,1] + for k = 2:Nq + Dᵀ₁Wf += D[k,i] * work[k,j] + Dᵀ₂Wf += D[k,j] * work[i,k] + end + return Geometry.Covariant12Vector(RecursiveApply.rdiv(Dᵀ₁Wf,W),RecursiveApply.rdiv(Dᵀ₂Wf,W)) +end + + + abstract type CurlSpectralElementOperator <: SpectralElementOperator end """ diff --git a/src/Spaces/spectralelement.jl b/src/Spaces/spectralelement.jl index afa1da45b8..df5160643f 100644 --- a/src/Spaces/spectralelement.jl +++ b/src/Spaces/spectralelement.jl @@ -178,15 +178,15 @@ end Adapt.adapt_structure(to, space::SpectralElementSpace2D) = SpectralElementSpace2D( - nothing, - Adapt.adapt_structure(to, space.quadrature_style), - Adapt.adapt_structure(to, space.global_geometry), - Adapt.adapt_structure(to, space.local_geometry), - Adapt.adapt_structure(to, space.ghost_geometry), - Adapt.adapt_structure(to, space.local_dss_weights), - Adapt.adapt_structure(to, space.ghost_dss_weights), - Adapt.adapt_structure(to, space.internal_surface_geometry), - Adapt.adapt_structure(to, space.boundary_surface_geometries), + nothing, # drop topology + Adapt.adapt(to, space.quadrature_style), + Adapt.adapt(to, space.global_geometry), + Adapt.adapt(to, space.local_geometry), + Adapt.adapt(to, space.ghost_geometry), + Adapt.adapt(to, space.local_dss_weights), + Adapt.adapt(to, space.ghost_dss_weights), + Adapt.adapt(to, space.internal_surface_geometry), + Adapt.adapt(to, space.boundary_surface_geometries), ) From 53c04364ac6c9575de500ee23f7bb2e68c4f21c6 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 2 Feb 2023 13:10:58 -0800 Subject: [PATCH 07/16] more operators --- src/Operators/spectralelement.jl | 145 ++++++++++++++++++++++++++----- 1 file changed, 122 insertions(+), 23 deletions(-) diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index 2ab94d6847..e2eea83f1d 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -545,9 +545,8 @@ function apply_operator_kernel(op::Divergence{(1, 2)}, space, ij, slabidx, arg) work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) Jv¹ = IJF{RT, Nq}(work1_array) - Jv² = IJF{RT, Nq}(work1_array) + Jv² = IJF{RT, Nq}(work2_array) i,j = ij.I - Jv¹[i,j] = local_geometry.J ⊠ RecursiveApply.rmap( v -> Geometry.contravariant1(v, local_geometry), @@ -560,14 +559,13 @@ function apply_operator_kernel(op::Divergence{(1, 2)}, space, ij, slabidx, arg) CUDA.sync_threads() - DJv = D[i,1] ⊠ Jv¹[1,j] + D₁Jv¹ = D[i,1] ⊠ Jv¹[1,j] + D₂Jv² = D[j,1] ⊠ Jv²[i,1] for k = 2:Nq - DJv = DJv ⊞ D[i,k] ⊠ Jv¹[k,j] - end - for k = 1:Nq - DJv = DJv ⊞ D[j,k] ⊠ Jv²[i,k] + D₁Jv¹ = D₁Jv¹ ⊞ D[i,k] ⊠ Jv¹[k,j] + D₂Jv² = D₂Jv² ⊞ D[j,k] ⊠ Jv²[i,k] end - return RecursiveApply.rdiv(DJv, local_geometry.J) + return RecursiveApply.rdiv(D₁Jv¹ ⊞ D₂Jv², local_geometry.J) end @@ -683,7 +681,6 @@ function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end - function apply_operator_kernel(op::WeakDivergence{(1, 2)}, space, ij, slabidx, arg) FT = Spaces.undertype(space) QS = Spaces.quadrature_style(space) @@ -698,9 +695,8 @@ function apply_operator_kernel(op::WeakDivergence{(1, 2)}, space, ij, slabidx, a work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) WJv¹ = IJF{RT, Nq}(work1_array) - WJv² = IJF{RT, Nq}(work1_array) + WJv² = IJF{RT, Nq}(work2_array) i,j = ij.I - WJv¹[i,j] = local_geometry.WJ ⊠ RecursiveApply.rmap( v -> Geometry.contravariant1(v, local_geometry), @@ -713,17 +709,15 @@ function apply_operator_kernel(op::WeakDivergence{(1, 2)}, space, ij, slabidx, a CUDA.sync_threads() - DᵀWJv = D[1,i] ⊠ WJv¹[1,j] + Dᵀ₁WJv¹ = D[1,i] ⊠ WJv¹[1,j] + Dᵀ₂WJv² = D[1,j] ⊠ WJv²[i,1] for k = 2:Nq - DᵀWJv = DᵀWJv ⊞ D[k,i] ⊠ WJv¹[k,j] + Dᵀ₁WJv¹ = Dᵀ₁WJv¹ ⊞ D[k,i] ⊠ WJv¹[k,j] + Dᵀ₂WJv² = Dᵀ₂WJv² ⊞ D[k,j] ⊠ WJv²[i,k] end - for k = 1:Nq - DᵀWJv = DᵀWJv ⊞ D[k,j] ⊠ WJv²[i,k] - end - return ⊟(RecursiveApply.rdiv(DᵀWJv, local_geometry.WJ)) + return ⊟(RecursiveApply.rdiv(Dᵀ₁WJv¹ ⊞ Dᵀ₂WJv², local_geometry.WJ)) end - """ grad = Gradient() grad.(f) @@ -944,13 +938,13 @@ function apply_operator_kernel(op::WeakGradient{(1, 2)}, space, ij, slabidx, arg CUDA.sync_threads() - Dᵀ₁Wf = D[1,i] * work[1,j] - Dᵀ₂Wf = D[1,j] * work[i,1] + Dᵀ₁Wf = D[1,i] ⊠ Wf[1,j] + Dᵀ₂Wf = D[1,j] ⊠ Wf[i,1] for k = 2:Nq - Dᵀ₁Wf += D[k,i] * work[k,j] - Dᵀ₂Wf += D[k,j] * work[i,k] + Dᵀ₁Wf = Dᵀ₁Wf ⊞ D[k,i] ⊠ Wf[k,j] + Dᵀ₂Wf = Dᵀ₂Wf ⊞ D[k,j] ⊠ Wf[i,k] end - return Geometry.Covariant12Vector(RecursiveApply.rdiv(Dᵀ₁Wf,W),RecursiveApply.rdiv(Dᵀ₂Wf,W)) + return Geometry.Covariant12Vector(⊟(RecursiveApply.rdiv(Dᵀ₁Wf, W)),⊟(RecursiveApply.rdiv(Dᵀ₂Wf, W))) end @@ -1094,6 +1088,58 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end +function apply_operator_kernel(op::Curl{(1, 2)}, space, ij, slabidx, arg) + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + RT = operator_return_eltype(op, typeof(arg)) + IT = eltype(arg) + Nf = DataLayouts.typesize(FT, IT) + local_geometry = get_local_geometry(space, ij, slabidx) + i,j = ij.I + + if RT <: Geometry.Contravariant3Vector + + work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + v₁ = IJF{IT, Nq}(work1_array) + v₂ = IJF{IT, Nq}(work2_array) + v₁[i,j] = Geometry.covariant1(arg, local_geometry) + v₂[i,j] = Geometry.covariant2(arg, local_geometry) + + CUDA.sync_threads() + + D₁v₂ = D[i, 1] ⊠ v₂[1,j] + D₂v₁ = D[j, 1] ⊠ v₁[i,1] + for k in 2:Nq + D₁v₂ = D₁v₂ ⊠ D[i, k] ⊠ v₂[k,j] + D₂v₁ = D₂v₁ ⊠ D[j, k] ⊠ v₁[i,k] + end + return Geometry.Contravariant3Vector(RecursiveApply.rdiv(D₁v₂ ⊟ D₂v₁, local_geometry.J)) + elseif RT <: Geometry.Contravariant12Vector + work_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + v₃ = IJF{IT, Nq}(work_array) + v₃[i,j] = Geometry.covariant3(arg, local_geometry) + + CUDA.sync_threads() + + D₁v₃ = D[i, 1] ⊠ v₃[1,j] + D₂v₃ = D[j, 1] ⊠ v₃[i,1] + for k in 2:Nq + D₁v₃ = D₁v₃ ⊠ D[i, k] ⊠ v₃[k,j] + D₂v₃ = D₂v₃ ⊠ D[j, k] ⊠ v₃[i,k] + end + return Geometry.Contravariant12Vector(RecursiveApply.rdiv(D₂v₃,local_geometry.J) ⊟(RecursiveApply.rdiv(D₁v₃,local_geometry.J))) + else + error("invalid return type") + end +end + + + """ wcurl = WeakCurl() wcurl.(u) @@ -1231,6 +1277,59 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end + +function apply_operator_kernel(op::WeakCurl{(1, 2)}, space, ij, slabidx, arg) + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + RT = operator_return_eltype(op, typeof(arg)) + IT = eltype(arg) + Nf = DataLayouts.typesize(FT, IT) + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ / local_geometry.J + i,j = ij.I + + if RT <: Geometry.Contravariant3Vector + + work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + Wv₁ = IJF{IT, Nq}(work1_array) + Wv₂ = IJF{IT, Nq}(work2_array) + Wv₁[i,j] = W ⊠ Geometry.covariant1(arg, local_geometry) + Wv₂[i,j] = W ⊠ Geometry.covariant2(arg, local_geometry) + + CUDA.sync_threads() + + Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1,j] + Dᵀ₂Wv₁ = D[1, j] ⊠ Wv₁[i,1] + for k in 2:Nq + Dᵀ₁v₂ = Dᵀ₁Wv₂ ⊠ D[k, i] ⊠ Wv₂[k,j] + Dᵀ₂v₁ = Dᵀ₂Wv₁ ⊠ D[k, j] ⊠ Wv₁[i,k] + end + return Geometry.Contravariant3Vector(RecursiveApply.rdiv(Dᵀ₂Wv₁ ⊟ Dᵀ₁Wv₂, local_geometry.WJ)) + elseif RT <: Geometry.Contravariant12Vector + work_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + Wv₃ = IJF{IT, Nq}(work_array) + Wv₃[i,j] = W ⊠ Geometry.covariant3(arg, local_geometry) + + CUDA.sync_threads() + + Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1,j] + Dᵀ₂Wv₃ = D[1, j] ⊠ Wv₃[i,1] + for k in 2:Nq + Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊠ D[k, i] ⊠ Wv₃[k,j] + Dᵀ₂Wv₃ = Dᵀ₂Wv₃ ⊠ D[k, j] ⊠ Wv₃[i,k] + end + return Geometry.Contravariant12Vector(RecursiveApply.rdiv(Dᵀ₂Wv₃,local_geometry.WJ) ⊟(RecursiveApply.rdiv(Dᵀ₁Wv₃,local_geometry.WJ))) + else + error("invalid return type") + end +end + + # interplation / restriction abstract type TensorOperator <: SpectralElementOperator end From e76771c312a853848d4c666b0b6201ff78de4e99 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 2 Feb 2023 15:44:02 -0800 Subject: [PATCH 08/16] more operator updates --- examples/sphere/shallow_water_cuda.jl | 68 +++++ src/Fields/Fields.jl | 9 +- src/Fields/broadcast.jl | 10 +- src/Fields/indices.jl | 2 +- src/Operators/spectralelement.jl | 262 +++++++++++------- src/Spaces/spectralelement.jl | 26 +- .../spectralelement/rectilinear_cuda.jl | 16 +- 7 files changed, 261 insertions(+), 132 deletions(-) diff --git a/examples/sphere/shallow_water_cuda.jl b/examples/sphere/shallow_water_cuda.jl index 16db587d0c..53e2ba7358 100644 --- a/examples/sphere/shallow_water_cuda.jl +++ b/examples/sphere/shallow_water_cuda.jl @@ -1,6 +1,10 @@ using CUDA using ClimaComms using DocStringExtensions +using LinearAlgebra +using OrdinaryDiffEq + +CUDA.allowscalar(false) import ClimaCore: Device, @@ -442,6 +446,47 @@ function set_initial_condition( return Y end +function rhs!(dYdt, y, parameters, t) + #@nvtx "rhs!" color = colorant"red" begin + (; f, h_s, ghost_buffer, params) = parameters + (; D₄, g) = params + + div = Operators.Divergence() + wdiv = Operators.WeakDivergence() + grad = Operators.Gradient() + wgrad = Operators.WeakGradient() + curl = Operators.Curl() + wcurl = Operators.WeakCurl() + + # Compute hyperviscosity first + #@nvtx "Hyperviscosity (rhs!)" color = colorant"green" begin + @. dYdt.h = wdiv(grad(y.h)) + @. dYdt.u = + wgrad(div(y.u)) - Geometry.Covariant12Vector( + wcurl(Geometry.Covariant3Vector(curl(y.u))), + ) + + Spaces.weighted_dss2!(dYdt, ghost_buffer) + + @. dYdt.h = -D₄ * wdiv(grad(dYdt.h)) + # split to avoid "device kernel image is invalid (code 200, ERROR_INVALID_IMAGE)" + @. dYdt.u = wgrad(div(dYdt.u)) - Geometry.Covariant12Vector(wcurl(Geometry.Covariant3Vector(curl(dYdt.u)))) + @. dYdt.u = -D₄ * dYdt.u + #end + #@nvtx "h and u (rhs!)" color = colorant"blue" begin + # Add in pieces + @. begin + dYdt.h += -wdiv(y.h * y.u) + dYdt.u += + -grad(g * (y.h + h_s) + norm(y.u)^2 / 2) #+ + dYdt.u += y.u × (f + curl(y.u)) + end + Spaces.weighted_dss2!(dYdt, ghost_buffer) + #end + #end + return dYdt +end + function shallow_water_driver_cuda(ARGS, ::Type{FT}) where {FT} device = Device.device() context = ClimaComms.SingletonCommsContext(device) @@ -478,11 +523,34 @@ function shallow_water_driver_cuda(ARGS, ::Type{FT}) where {FT} f = set_coriolis_parameter(space, test) h_s = surface_topography(space, test) Y = set_initial_condition(space, test) + dYdt = similar(Y) ghost_buffer = Spaces.create_dss_buffer(Y) Spaces.weighted_dss_start2!(Y, ghost_buffer) Spaces.weighted_dss_internal2!(Y, ghost_buffer) Spaces.weighted_dss_ghost2!(Y, ghost_buffer) + + parameters = + (; f = f, h_s = h_s, ghost_buffer = ghost_buffer, params = test.params) + rhs!(dYdt, Y, parameters, 0.0) + + # Solve the ODE + dt = 9 * 60 + T = 86400 * 2 + + prob = ODEProblem(rhs!, Y, (0.0, T), parameters) + integrator = OrdinaryDiffEq.init( + prob, + SSPRK33(), + dt = dt, + saveat = dt, + progress = true, + adaptive = false, + progress_message = (dt, u, p, t) -> t, + ) + + + sol = @timev OrdinaryDiffEq.solve!(integrator) return nothing end diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index 11a22e0bd8..8872e952c1 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -49,11 +49,10 @@ comm_context(topology::Topologies.Topology2D) = topology.context comm_context(topology::T) where {T <: Topologies.AbstractTopology} = ClimaComms.SingletonCommsContext() -Adapt.adapt_structure(to, field::Field) = - Field( - Adapt.adapt(to, Fields.field_values(field)), - Adapt.adapt(to, axes(field)), - ) +Adapt.adapt_structure(to, field::Field) = Field( + Adapt.adapt(to, Fields.field_values(field)), + Adapt.adapt(to, axes(field)), +) # Point Field diff --git a/src/Fields/broadcast.jl b/src/Fields/broadcast.jl index 55c830ab50..ad770d52eb 100644 --- a/src/Fields/broadcast.jl +++ b/src/Fields/broadcast.jl @@ -30,7 +30,10 @@ Base.Broadcast.BroadcastStyle( Base.Broadcast.broadcastable(field::Field) = field -function Adapt.adapt_structure(to, bc::Base.Broadcast.Broadcasted{Style}) where {Style<:AbstractFieldStyle} +function Adapt.adapt_structure( + to, + bc::Base.Broadcast.Broadcasted{Style}, +) where {Style <: AbstractFieldStyle} Base.Broadcast.Broadcasted{Style}( Adapt.adapt(to, bc.f), Adapt.adapt(to, bc.args), @@ -259,8 +262,13 @@ end return nothing end +# types aren't isbits +Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::Type{T}, args...) where {T} = + Base.Broadcast.broadcasted(fs, (x...) -> T(x...), args...) + # Specialize handling of +, *, muladd, so that we can support broadcasting over NamedTuple element types # Required for ODE solvers + Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(+), args...) = Base.Broadcast.broadcasted(fs, RecursiveApply.:⊞, args...) diff --git a/src/Fields/indices.jl b/src/Fields/indices.jl index a7c57ee003..865acae8b3 100644 --- a/src/Fields/indices.jl +++ b/src/Fields/indices.jl @@ -131,7 +131,7 @@ end -struct SlabIndex{VIdx,HIdx} +struct SlabIndex{VIdx, HIdx} v::VIdx h::HIdx end diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index e2eea83f1d..47183e52bb 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -80,7 +80,8 @@ This is similar to a `Base.Broadcast.Broadcasted` object, except it contains spa This is returned by `Base.Broadcast.broadcasted(op::SpectralElementOperator)`. """ -struct SpectralBroadcasted{Style, Op, Args, Axes, Work} <: Base.AbstractBroadcasted +struct SpectralBroadcasted{Style, Op, Args, Axes, Work} <: + Base.AbstractBroadcasted op::Op args::Args axes::Axes @@ -100,7 +101,7 @@ Adapt.adapt_structure(to, sbc::SpectralBroadcasted{Style}) where {Style} = Adapt.adapt(to, sbc.args), Adapt.adapt(to, sbc.axes), ) - + return_space(::SpectralElementOperator, space) = space Base.axes(sbc::SpectralBroadcasted) = @@ -125,9 +126,7 @@ Base.eltype(sbc::SpectralBroadcasted) = (Base.Broadcast.instantiate(args[1]),) @inline instantiate_args(::Tuple{}) = () -function Base.Broadcast.instantiate( - sbc::SpectralBroadcasted, -) +function Base.Broadcast.instantiate(sbc::SpectralBroadcasted) op = sbc.op # recursively instantiate the arguments to allocate intermediate work arrays args = instantiate_args(sbc.args) @@ -189,7 +188,13 @@ function Base.Broadcast.materialize!(dest, sbc::SpectralBroadcasted) end # Functions for SlabBlockSpectralStyle -function Base.copyto!(out::Field, sbc::Union{SpectralBroadcasted{SlabBlockSpectralStyle}, Broadcasted{SlabBlockSpectralStyle}}) +function Base.copyto!( + out::Field, + sbc::Union{ + SpectralBroadcasted{SlabBlockSpectralStyle}, + Broadcasted{SlabBlockSpectralStyle}, + }, +) Fields.byslab(axes(out)) do slabidx copyto_slab!(out, sbc, slabidx) end @@ -226,7 +231,10 @@ Recursively evaluate any operators in `bc` at `slabidx`, replacing any `IF`/`IJF` data object. - if `bc` is a `Field`, return that """ -function resolve_operator(bc::SpectralBroadcasted{SlabBlockSpectralStyle}, slabidx) +function resolve_operator( + bc::SpectralBroadcasted{SlabBlockSpectralStyle}, + slabidx, +) args = _resolve_operator_args(slabidx, bc.args...) apply_operator(bc.op, bc.axes, slabidx, args...) end @@ -252,14 +260,20 @@ _resolve_operator_args(slabidx) = () # Functions for CUDASpectralStyle -function Base.copyto!(out::Field, sbc::Union{SpectralBroadcasted{CUDASpectralStyle}, Broadcasted{CUDASpectralStyle}}) +function Base.copyto!( + out::Field, + sbc::Union{ + SpectralBroadcasted{CUDASpectralStyle}, + Broadcasted{CUDASpectralStyle}, + }, +) space = axes(out) QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) Nh = Topologies.nlocalelems(Spaces.topology(space)) Nv = Spaces.nlevels(space) - @cuda threads=(Nq,Nq) blocks=(Nv,Nh) copyto_kernel!(out, sbc) + @cuda threads = (Nq, Nq) blocks = (Nv, Nh) copyto_kernel!(out, sbc) return out end @@ -268,20 +282,26 @@ function copyto_kernel!(out::Fields.SpectralElementField2D, sbc) j = threadIdx().y v = nothing h = blockIdx().y - ij = CartesianIndex((i,j)) - slabidx = Fields.SlabIndex(v,h) + ij = CartesianIndex((i, j)) + slabidx = Fields.SlabIndex(v, h) result = get_node(sbc, ij, slabidx) set_node!(out, ij, slabidx, result) return nothing end function get_node(sbc::SpectralBroadcasted{CUDASpectralStyle}, ij, slabidx) - apply_operator_kernel(sbc.op, axes(sbc), ij, slabidx, _get_node(ij, slabidx, sbc.args...)...) + apply_operator_kernel( + sbc.op, + axes(sbc), + ij, + slabidx, + _get_node(ij, slabidx, sbc.args...)..., + ) end -_get_node(ij, slabidx) = () -_get_node(ij, slabidx, arg, xargs...) = +@inline _get_node(ij, slabidx) = () +@inline _get_node(ij, slabidx, arg, xargs...) = (get_node(arg, ij, slabidx), _get_node(ij, slabidx, xargs...)...) Base.@propagate_inbounds function get_node(scalar, ij, slabidx) @@ -414,8 +434,7 @@ end Base.Broadcast.BroadcastStyle( ::Type{<:SpectralBroadcasted{Style}}, -) where {Style} = - Style() +) where {Style} = Style() Base.Broadcast.BroadcastStyle( style::AbstractSpectralStyle, @@ -538,32 +557,34 @@ function apply_operator_kernel(op::Divergence{(1, 2)}, space, ij, slabidx, arg) D = Quadratures.differentiation_matrix(FT, QS) local_geometry = get_local_geometry(space, ij, slabidx) - + # allocate temp output RT = operator_return_eltype(op, typeof(arg)) Nf = DataLayouts.typesize(FT, RT) - work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) - work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work1_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) Jv¹ = IJF{RT, Nq}(work1_array) Jv² = IJF{RT, Nq}(work2_array) - i,j = ij.I - - Jv¹[i,j] = local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - arg, - ) - Jv²[i,j] = local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant2(v, local_geometry), - arg, - ) + i, j = ij.I + + Jv¹[i, j] = + local_geometry.J ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant1(v, local_geometry), + arg, + ) + Jv²[i, j] = + local_geometry.J ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant2(v, local_geometry), + arg, + ) CUDA.sync_threads() - D₁Jv¹ = D[i,1] ⊠ Jv¹[1,j] - D₂Jv² = D[j,1] ⊠ Jv²[i,1] - for k = 2:Nq - D₁Jv¹ = D₁Jv¹ ⊞ D[i,k] ⊠ Jv¹[k,j] - D₂Jv² = D₂Jv² ⊞ D[j,k] ⊠ Jv²[i,k] + D₁Jv¹ = D[i, 1] ⊠ Jv¹[1, j] + D₂Jv² = D[j, 1] ⊠ Jv²[i, 1] + for k in 2:Nq + D₁Jv¹ = D₁Jv¹ ⊞ D[i, k] ⊠ Jv¹[k, j] + D₂Jv² = D₂Jv² ⊞ D[j, k] ⊠ Jv²[i, k] end return RecursiveApply.rdiv(D₁Jv¹ ⊞ D₂Jv², local_geometry.J) end @@ -681,39 +702,47 @@ function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end -function apply_operator_kernel(op::WeakDivergence{(1, 2)}, space, ij, slabidx, arg) +function apply_operator_kernel( + op::WeakDivergence{(1, 2)}, + space, + ij, + slabidx, + arg, +) FT = Spaces.undertype(space) QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) local_geometry = get_local_geometry(space, ij, slabidx) - + # allocate temp output RT = operator_return_eltype(op, typeof(arg)) Nf = DataLayouts.typesize(FT, RT) - work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) - work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work1_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) WJv¹ = IJF{RT, Nq}(work1_array) WJv² = IJF{RT, Nq}(work2_array) - i,j = ij.I - - WJv¹[i,j] = local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - arg, - ) - WJv²[i,j] = local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant2(v, local_geometry), - arg, - ) + i, j = ij.I + + WJv¹[i, j] = + local_geometry.WJ ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant1(v, local_geometry), + arg, + ) + WJv²[i, j] = + local_geometry.WJ ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant2(v, local_geometry), + arg, + ) CUDA.sync_threads() - Dᵀ₁WJv¹ = D[1,i] ⊠ WJv¹[1,j] - Dᵀ₂WJv² = D[1,j] ⊠ WJv²[i,1] - for k = 2:Nq - Dᵀ₁WJv¹ = Dᵀ₁WJv¹ ⊞ D[k,i] ⊠ WJv¹[k,j] - Dᵀ₂WJv² = Dᵀ₂WJv² ⊞ D[k,j] ⊠ WJv²[i,k] + Dᵀ₁WJv¹ = D[1, i] ⊠ WJv¹[1, j] + Dᵀ₂WJv² = D[1, j] ⊠ WJv²[i, 1] + for k in 2:Nq + Dᵀ₁WJv¹ = Dᵀ₁WJv¹ ⊞ D[k, i] ⊠ WJv¹[k, j] + Dᵀ₂WJv² = Dᵀ₂WJv² ⊞ D[k, j] ⊠ WJv²[i, k] end return ⊟(RecursiveApply.rdiv(Dᵀ₁WJv¹ ⊞ Dᵀ₂WJv², local_geometry.WJ)) end @@ -800,20 +829,20 @@ function apply_operator_kernel(op::Gradient{(1, 2)}, space, ij, slabidx, arg) # allocate temp output IT = typeof(arg) Nf = DataLayouts.typesize(FT, IT) - array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) work = IJF{IT, Nq}(array) - i,j = ij.I - work[i,j] = arg + i, j = ij.I + work[i, j] = arg CUDA.sync_threads() - ∂f∂ξ₁ = D[i,1] * work[1,j] - ∂f∂ξ₂ = D[j,1] * work[i,1] - for k = 2:Nq - ∂f∂ξ₁ += D[i,k] * work[k,j] - ∂f∂ξ₂ += D[j,k] * work[i,k] + ∂f∂ξ₁ = D[i, 1] * work[1, j] + ∂f∂ξ₂ = D[j, 1] * work[i, 1] + for k in 2:Nq + ∂f∂ξ₁ += D[i, k] * work[k, j] + ∂f∂ξ₂ += D[j, k] * work[i, k] end - return Geometry.Covariant12Vector(∂f∂ξ₁,∂f∂ξ₂) + return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂) end @@ -919,7 +948,13 @@ function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) return Field(SArray(out), space) end -function apply_operator_kernel(op::WeakGradient{(1, 2)}, space, ij, slabidx, arg) +function apply_operator_kernel( + op::WeakGradient{(1, 2)}, + space, + ij, + slabidx, + arg, +) FT = Spaces.undertype(space) QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) @@ -931,20 +966,23 @@ function apply_operator_kernel(op::WeakGradient{(1, 2)}, space, ij, slabidx, arg # allocate temp output IT = typeof(arg) Nf = DataLayouts.typesize(FT, IT) - work_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) Wf = IJF{IT, Nq}(work_array) - i,j = ij.I - Wf[i,j] = W ⊠ arg + i, j = ij.I + Wf[i, j] = W ⊠ arg CUDA.sync_threads() - Dᵀ₁Wf = D[1,i] ⊠ Wf[1,j] - Dᵀ₂Wf = D[1,j] ⊠ Wf[i,1] - for k = 2:Nq - Dᵀ₁Wf = Dᵀ₁Wf ⊞ D[k,i] ⊠ Wf[k,j] - Dᵀ₂Wf = Dᵀ₂Wf ⊞ D[k,j] ⊠ Wf[i,k] + Dᵀ₁Wf = D[1, i] ⊠ Wf[1, j] + Dᵀ₂Wf = D[1, j] ⊠ Wf[i, 1] + for k in 2:Nq + Dᵀ₁Wf = Dᵀ₁Wf ⊞ D[k, i] ⊠ Wf[k, j] + Dᵀ₂Wf = Dᵀ₂Wf ⊞ D[k, j] ⊠ Wf[i, k] end - return Geometry.Covariant12Vector(⊟(RecursiveApply.rdiv(Dᵀ₁Wf, W)),⊟(RecursiveApply.rdiv(Dᵀ₂Wf, W))) + return Geometry.Covariant12Vector( + ⊟(RecursiveApply.rdiv(Dᵀ₁Wf, W)), + ⊟(RecursiveApply.rdiv(Dᵀ₂Wf, W)), + ) end @@ -1099,40 +1137,45 @@ function apply_operator_kernel(op::Curl{(1, 2)}, space, ij, slabidx, arg) IT = eltype(arg) Nf = DataLayouts.typesize(FT, IT) local_geometry = get_local_geometry(space, ij, slabidx) - i,j = ij.I + i, j = ij.I + # input data is a Covariant12Vector field if RT <: Geometry.Contravariant3Vector - - work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) - work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work1_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) v₁ = IJF{IT, Nq}(work1_array) v₂ = IJF{IT, Nq}(work2_array) - v₁[i,j] = Geometry.covariant1(arg, local_geometry) - v₂[i,j] = Geometry.covariant2(arg, local_geometry) + v₁[i, j] = Geometry.covariant1(arg, local_geometry) + v₂[i, j] = Geometry.covariant2(arg, local_geometry) CUDA.sync_threads() - D₁v₂ = D[i, 1] ⊠ v₂[1,j] - D₂v₁ = D[j, 1] ⊠ v₁[i,1] + D₁v₂ = D[i, 1] ⊠ v₂[1, j] + D₂v₁ = D[j, 1] ⊠ v₁[i, 1] for k in 2:Nq - D₁v₂ = D₁v₂ ⊠ D[i, k] ⊠ v₂[k,j] - D₂v₁ = D₂v₁ ⊠ D[j, k] ⊠ v₁[i,k] + D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, j] + D₂v₁ = D₂v₁ ⊞ D[j, k] ⊠ v₁[i, k] end - return Geometry.Contravariant3Vector(RecursiveApply.rdiv(D₁v₂ ⊟ D₂v₁, local_geometry.J)) + return Geometry.Contravariant3Vector( + RecursiveApply.rdiv(D₁v₂ ⊟ D₂v₁, local_geometry.J), + ) elseif RT <: Geometry.Contravariant12Vector - work_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) v₃ = IJF{IT, Nq}(work_array) - v₃[i,j] = Geometry.covariant3(arg, local_geometry) + v₃[i, j] = Geometry.covariant3(arg, local_geometry) CUDA.sync_threads() - D₁v₃ = D[i, 1] ⊠ v₃[1,j] - D₂v₃ = D[j, 1] ⊠ v₃[i,1] + D₁v₃ = D[i, 1] ⊠ v₃[1, j] + D₂v₃ = D[j, 1] ⊠ v₃[i, 1] for k in 2:Nq - D₁v₃ = D₁v₃ ⊠ D[i, k] ⊠ v₃[k,j] - D₂v₃ = D₂v₃ ⊠ D[j, k] ⊠ v₃[i,k] + D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, j] + D₂v₃ = D₂v₃ ⊞ D[j, k] ⊠ v₃[i, k] end - return Geometry.Contravariant12Vector(RecursiveApply.rdiv(D₂v₃,local_geometry.J) ⊟(RecursiveApply.rdiv(D₁v₃,local_geometry.J))) + return Geometry.Contravariant12Vector( + RecursiveApply.rdiv(D₂v₃, local_geometry.J), + ⊟(RecursiveApply.rdiv(D₁v₃, local_geometry.J)), + ) else error("invalid return type") end @@ -1290,40 +1333,45 @@ function apply_operator_kernel(op::WeakCurl{(1, 2)}, space, ij, slabidx, arg) Nf = DataLayouts.typesize(FT, IT) local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ / local_geometry.J - i,j = ij.I + i, j = ij.I if RT <: Geometry.Contravariant3Vector - work1_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) - work2_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work1_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) + work2_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) Wv₁ = IJF{IT, Nq}(work1_array) Wv₂ = IJF{IT, Nq}(work2_array) - Wv₁[i,j] = W ⊠ Geometry.covariant1(arg, local_geometry) - Wv₂[i,j] = W ⊠ Geometry.covariant2(arg, local_geometry) + Wv₁[i, j] = W ⊠ Geometry.covariant1(arg, local_geometry) + Wv₂[i, j] = W ⊠ Geometry.covariant2(arg, local_geometry) CUDA.sync_threads() - Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1,j] - Dᵀ₂Wv₁ = D[1, j] ⊠ Wv₁[i,1] + Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, j] + Dᵀ₂Wv₁ = D[1, j] ⊠ Wv₁[i, 1] for k in 2:Nq - Dᵀ₁v₂ = Dᵀ₁Wv₂ ⊠ D[k, i] ⊠ Wv₂[k,j] - Dᵀ₂v₁ = Dᵀ₂Wv₁ ⊠ D[k, j] ⊠ Wv₁[i,k] + Dᵀ₁v₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, j] + Dᵀ₂v₁ = Dᵀ₂Wv₁ ⊞ D[k, j] ⊠ Wv₁[i, k] end - return Geometry.Contravariant3Vector(RecursiveApply.rdiv(Dᵀ₂Wv₁ ⊟ Dᵀ₁Wv₂, local_geometry.WJ)) + return Geometry.Contravariant3Vector( + RecursiveApply.rdiv(Dᵀ₂Wv₁ ⊟ Dᵀ₁Wv₂, local_geometry.WJ), + ) elseif RT <: Geometry.Contravariant12Vector - work_array = CUDA.CuStaticSharedArray(FT, (Nq,Nq,Nf)) + work_array = CUDA.CuStaticSharedArray(FT, (Nq, Nq, Nf)) Wv₃ = IJF{IT, Nq}(work_array) - Wv₃[i,j] = W ⊠ Geometry.covariant3(arg, local_geometry) + Wv₃[i, j] = W ⊠ Geometry.covariant3(arg, local_geometry) CUDA.sync_threads() - Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1,j] - Dᵀ₂Wv₃ = D[1, j] ⊠ Wv₃[i,1] + Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, j] + Dᵀ₂Wv₃ = D[1, j] ⊠ Wv₃[i, 1] for k in 2:Nq - Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊠ D[k, i] ⊠ Wv₃[k,j] - Dᵀ₂Wv₃ = Dᵀ₂Wv₃ ⊠ D[k, j] ⊠ Wv₃[i,k] + Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, j] + Dᵀ₂Wv₃ = Dᵀ₂Wv₃ ⊞ D[k, j] ⊠ Wv₃[i, k] end - return Geometry.Contravariant12Vector(RecursiveApply.rdiv(Dᵀ₂Wv₃,local_geometry.WJ) ⊟(RecursiveApply.rdiv(Dᵀ₁Wv₃,local_geometry.WJ))) + return Geometry.Contravariant12Vector( + RecursiveApply.rdiv(Dᵀ₂Wv₃, local_geometry.WJ), + ⊟(RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ)), + ) else error("invalid return type") end diff --git a/src/Spaces/spectralelement.jl b/src/Spaces/spectralelement.jl index df5160643f..7c2f166a68 100644 --- a/src/Spaces/spectralelement.jl +++ b/src/Spaces/spectralelement.jl @@ -20,7 +20,8 @@ function Base.show(io::IO, space::AbstractSpectralElementSpace) print(iio, " "^(indent + 2), space.quadrature_style) end -Device.device(space::AbstractSpectralElementSpace) = Device.device(topology(space)) +Device.device(space::AbstractSpectralElementSpace) = + Device.device(topology(space)) topology(space::AbstractSpectralElementSpace) = space.topology quadrature_style(space::AbstractSpectralElementSpace) = space.quadrature_style @@ -177,17 +178,17 @@ struct SpectralElementSpace2D{ end Adapt.adapt_structure(to, space::SpectralElementSpace2D) = -SpectralElementSpace2D( - nothing, # drop topology - Adapt.adapt(to, space.quadrature_style), - Adapt.adapt(to, space.global_geometry), - Adapt.adapt(to, space.local_geometry), - Adapt.adapt(to, space.ghost_geometry), - Adapt.adapt(to, space.local_dss_weights), - Adapt.adapt(to, space.ghost_dss_weights), - Adapt.adapt(to, space.internal_surface_geometry), - Adapt.adapt(to, space.boundary_surface_geometries), -) + SpectralElementSpace2D( + nothing, # drop topology + Adapt.adapt(to, space.quadrature_style), + Adapt.adapt(to, space.global_geometry), + Adapt.adapt(to, space.local_geometry), + Adapt.adapt(to, space.ghost_geometry), + Adapt.adapt(to, space.local_dss_weights), + Adapt.adapt(to, space.ghost_dss_weights), + Adapt.adapt(to, space.internal_surface_geometry), + Adapt.adapt(to, space.boundary_surface_geometries), + ) @@ -788,4 +789,3 @@ function Base.iterate( return ((i, j), e), ((i, j), e) end end - diff --git a/test/Operators/spectralelement/rectilinear_cuda.jl b/test/Operators/spectralelement/rectilinear_cuda.jl index bceec3c120..ef47f54556 100644 --- a/test/Operators/spectralelement/rectilinear_cuda.jl +++ b/test/Operators/spectralelement/rectilinear_cuda.jl @@ -20,8 +20,12 @@ quad = Spaces.Quadratures.GLL{Nq}() grid_mesh = Meshes.RectilinearMesh(domain, 17, 16) -grid_topology_cpu = - Topologies.Topology2D(ClimaComms.SingletonCommsContext(ClimaCore.Device.device(;disablegpu=true)), grid_mesh) +grid_topology_cpu = Topologies.Topology2D( + ClimaComms.SingletonCommsContext( + ClimaCore.Device.device(; disablegpu = true), + ), + grid_mesh, +) grid_space_cpu = Spaces.SpectralElementSpace2D(grid_topology_cpu, quad) coords_cpu = Fields.coordinate_field(grid_space_cpu) @@ -31,8 +35,10 @@ grad = Operators.Gradient() gradf_cpu = grad.(f_cpu) -grid_topology = - Topologies.Topology2D(ClimaComms.SingletonCommsContext(ClimaCore.Device.device()), grid_mesh) +grid_topology = Topologies.Topology2D( + ClimaComms.SingletonCommsContext(ClimaCore.Device.device()), + grid_mesh, +) grid_space = Spaces.SpectralElementSpace2D(grid_topology, quad) coords = Fields.coordinate_field(grid_space) @@ -366,4 +372,4 @@ end @test ∇⁴y_ref ≈ ∇⁴y rtol = 2e-2 end end -==# \ No newline at end of file +==# From 10ab6f8f47b15cda3171f8e9a3f7a7239ec2a99c Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 2 Feb 2023 17:44:44 -0800 Subject: [PATCH 09/16] add CUDA reduce --- src/DataLayouts/cuda.jl | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/DataLayouts/cuda.jl b/src/DataLayouts/cuda.jl index 6400acfd36..6d189d5a01 100644 --- a/src/DataLayouts/cuda.jl +++ b/src/DataLayouts/cuda.jl @@ -76,3 +76,58 @@ function Base.copyto!( CUDA.@cuda threads = (Nij, Nij) blocks = (Nh,) knl_copyto!(dest, bc) return dest end + +function knl_mapreduce!(dest, fn, op, src) + + #= + nij, nh = size(dest) + + thread_idx = CUDA.threadIdx().x + block_idx = CUDA.blockIdx().x + block_dim = CUDA.blockDim().x + + # mapping to global idx to make insensitive + # to number of blocks / threads per device + global_idx = thread_idx + (block_idx - 1) * block_dim + + nx, ny = nij, nij + i = global_idx % nx == 0 ? nx : global_idx % nx + j = cld(global_idx, nx) + h = ((global_idx-1) % (nx*nx)) + 1 + =# + + i = CUDA.threadIdx().x + j = CUDA.threadIdx().y + + h = CUDA.blockIdx().x + + p_dest = slab(dest, h) + p_src = slab(src, h) + + if h == 1 + @inbounds p_dest[i, j] = fn(p_src[i, j]) + else + @inbounds p_dest[i, j] = op(p_dest[i, j], fn(p_src[i, j])) + end + + return nothing +end + +function Base.reduce( + op::Op, + bc::Union{ + IJFH{<:Any, Nij, A}, + Base.Broadcast.Broadcasted{IJFHStyle{Nij, A}}, + }, +) where {Op, Nij, A <: CUDA.CuArray} + # mapreduce across DataSlab2D + _, _, _, _, Nh = size(bc) + S = eltype(bc) + T = eltype(A) + Nf = typesize(T, S) + # dest array + dest = IJF{S,Nij}(CuArray{T}(undef,Nij, Nij, Nf)) + CUDA.@cuda threads = (Nij, Nij) blocks = (Nh,) knl_reduce!(dest, op, bc) + # copy to CPU, and final reduce + reduce(op, IJF{S,Nij}(Array(parent(dest)))) +end From 4de5c8c8b590e5eb5d6a0f5e8bf0691e9998c036 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 6 Feb 2023 16:02:07 -0800 Subject: [PATCH 10/16] apply formatter --- examples/sphere/shallow_water_cuda.jl | 73 ++++++++++++++------------- src/DataLayouts/cuda.jl | 6 +-- src/Fields/broadcast.jl | 7 ++- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/examples/sphere/shallow_water_cuda.jl b/examples/sphere/shallow_water_cuda.jl index 53e2ba7358..12f4d2767b 100644 --- a/examples/sphere/shallow_water_cuda.jl +++ b/examples/sphere/shallow_water_cuda.jl @@ -448,41 +448,42 @@ end function rhs!(dYdt, y, parameters, t) #@nvtx "rhs!" color = colorant"red" begin - (; f, h_s, ghost_buffer, params) = parameters - (; D₄, g) = params - - div = Operators.Divergence() - wdiv = Operators.WeakDivergence() - grad = Operators.Gradient() - wgrad = Operators.WeakGradient() - curl = Operators.Curl() - wcurl = Operators.WeakCurl() - - # Compute hyperviscosity first - #@nvtx "Hyperviscosity (rhs!)" color = colorant"green" begin - @. dYdt.h = wdiv(grad(y.h)) - @. dYdt.u = - wgrad(div(y.u)) - Geometry.Covariant12Vector( - wcurl(Geometry.Covariant3Vector(curl(y.u))), - ) - - Spaces.weighted_dss2!(dYdt, ghost_buffer) - - @. dYdt.h = -D₄ * wdiv(grad(dYdt.h)) - # split to avoid "device kernel image is invalid (code 200, ERROR_INVALID_IMAGE)" - @. dYdt.u = wgrad(div(dYdt.u)) - Geometry.Covariant12Vector(wcurl(Geometry.Covariant3Vector(curl(dYdt.u)))) - @. dYdt.u = -D₄ * dYdt.u - #end - #@nvtx "h and u (rhs!)" color = colorant"blue" begin - # Add in pieces - @. begin - dYdt.h += -wdiv(y.h * y.u) - dYdt.u += - -grad(g * (y.h + h_s) + norm(y.u)^2 / 2) #+ - dYdt.u += y.u × (f + curl(y.u)) - end - Spaces.weighted_dss2!(dYdt, ghost_buffer) - #end + (; f, h_s, ghost_buffer, params) = parameters + (; D₄, g) = params + + div = Operators.Divergence() + wdiv = Operators.WeakDivergence() + grad = Operators.Gradient() + wgrad = Operators.WeakGradient() + curl = Operators.Curl() + wcurl = Operators.WeakCurl() + + # Compute hyperviscosity first + #@nvtx "Hyperviscosity (rhs!)" color = colorant"green" begin + @. dYdt.h = wdiv(grad(y.h)) + @. dYdt.u = + wgrad(div(y.u)) - + Geometry.Covariant12Vector(wcurl(Geometry.Covariant3Vector(curl(y.u)))) + + Spaces.weighted_dss2!(dYdt, ghost_buffer) + + @. dYdt.h = -D₄ * wdiv(grad(dYdt.h)) + # split to avoid "device kernel image is invalid (code 200, ERROR_INVALID_IMAGE)" + @. dYdt.u = + wgrad(div(dYdt.u)) - Geometry.Covariant12Vector( + wcurl(Geometry.Covariant3Vector(curl(dYdt.u))), + ) + @. dYdt.u = -D₄ * dYdt.u + #end + #@nvtx "h and u (rhs!)" color = colorant"blue" begin + # Add in pieces + @. begin + dYdt.h += -wdiv(y.h * y.u) + dYdt.u += -grad(g * (y.h + h_s) + norm(y.u)^2 / 2) #+ + dYdt.u += y.u × (f + curl(y.u)) + end + Spaces.weighted_dss2!(dYdt, ghost_buffer) + #end #end return dYdt end @@ -531,7 +532,7 @@ function shallow_water_driver_cuda(ARGS, ::Type{FT}) where {FT} Spaces.weighted_dss_ghost2!(Y, ghost_buffer) parameters = - (; f = f, h_s = h_s, ghost_buffer = ghost_buffer, params = test.params) + (; f = f, h_s = h_s, ghost_buffer = ghost_buffer, params = test.params) rhs!(dYdt, Y, parameters, 0.0) # Solve the ODE diff --git a/src/DataLayouts/cuda.jl b/src/DataLayouts/cuda.jl index 6d189d5a01..4dd729f1fa 100644 --- a/src/DataLayouts/cuda.jl +++ b/src/DataLayouts/cuda.jl @@ -109,7 +109,7 @@ function knl_mapreduce!(dest, fn, op, src) else @inbounds p_dest[i, j] = op(p_dest[i, j], fn(p_src[i, j])) end - + return nothing end @@ -126,8 +126,8 @@ function Base.reduce( T = eltype(A) Nf = typesize(T, S) # dest array - dest = IJF{S,Nij}(CuArray{T}(undef,Nij, Nij, Nf)) + dest = IJF{S, Nij}(CuArray{T}(undef, Nij, Nij, Nf)) CUDA.@cuda threads = (Nij, Nij) blocks = (Nh,) knl_reduce!(dest, op, bc) # copy to CPU, and final reduce - reduce(op, IJF{S,Nij}(Array(parent(dest)))) + reduce(op, IJF{S, Nij}(Array(parent(dest)))) end diff --git a/src/Fields/broadcast.jl b/src/Fields/broadcast.jl index ad770d52eb..e4bc9dd7b0 100644 --- a/src/Fields/broadcast.jl +++ b/src/Fields/broadcast.jl @@ -263,8 +263,11 @@ end end # types aren't isbits -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::Type{T}, args...) where {T} = - Base.Broadcast.broadcasted(fs, (x...) -> T(x...), args...) +Base.Broadcast.broadcasted( + fs::AbstractFieldStyle, + ::Type{T}, + args..., +) where {T} = Base.Broadcast.broadcasted(fs, (x...) -> T(x...), args...) # Specialize handling of +, *, muladd, so that we can support broadcasting over NamedTuple element types # Required for ODE solvers From 45d5a9297e02736ea6c1aabd6cb0d6da3ec8ee8a Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Wed, 8 Feb 2023 16:15:01 -0800 Subject: [PATCH 11/16] define device for extruded spaces --- src/Spaces/extruded.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Spaces/extruded.jl b/src/Spaces/extruded.jl index ba37367287..3f4f39aee9 100644 --- a/src/Spaces/extruded.jl +++ b/src/Spaces/extruded.jl @@ -140,6 +140,8 @@ quadrature_style(space::ExtrudedFiniteDifferenceSpace) = space.horizontal_space.quadrature_style topology(space::ExtrudedFiniteDifferenceSpace) = space.horizontal_space.topology +Device.device(space::ExtrudedFiniteDifferenceSpace) = + Device.device(topology(space)) vertical_topology(space::ExtrudedFiniteDifferenceSpace) = space.vertical_topology From e0277f5a4e602b94eca72f907d59b8f1591002c7 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 9 Feb 2023 13:21:54 -0800 Subject: [PATCH 12/16] define device for IntervalTopology --- src/Topologies/Topologies.jl | 2 ++ src/Topologies/interval.jl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/Topologies/Topologies.jl b/src/Topologies/Topologies.jl index 6dda097e41..319306df09 100644 --- a/src/Topologies/Topologies.jl +++ b/src/Topologies/Topologies.jl @@ -2,6 +2,8 @@ module Topologies using DocStringExtensions +import ClimaComms + import ..Device import ..Geometry import ..Domains: Domains, coordinate_type diff --git a/src/Topologies/interval.jl b/src/Topologies/interval.jl index c89ef76191..5d0ce30ed1 100644 --- a/src/Topologies/interval.jl +++ b/src/Topologies/interval.jl @@ -10,6 +10,8 @@ struct IntervalTopology{M <: Meshes.IntervalMesh, B} <: AbstractIntervalTopology boundaries::B end +Device.device(::IntervalTopology) = ClimaComms.CPU() + function IntervalTopology(mesh::Meshes.IntervalMesh) if Domains.isperiodic(mesh.domain) boundaries = NamedTuple() From 1910ed9350f176c21811ee4f1223078811ad4ab2 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 9 Feb 2023 17:30:49 -0800 Subject: [PATCH 13/16] add operator unit tests --- .../spectralelement/rectilinear_cuda.jl | 29 +++++++++++++++---- test/runtests.jl | 1 + 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/test/Operators/spectralelement/rectilinear_cuda.jl b/test/Operators/spectralelement/rectilinear_cuda.jl index ef47f54556..52c4e6a158 100644 --- a/test/Operators/spectralelement/rectilinear_cuda.jl +++ b/test/Operators/spectralelement/rectilinear_cuda.jl @@ -30,10 +30,7 @@ grid_space_cpu = Spaces.SpectralElementSpace2D(grid_topology_cpu, quad) coords_cpu = Fields.coordinate_field(grid_space_cpu) f_cpu = sin.(coords_cpu.x .+ 2 .* coords_cpu.y) - -grad = Operators.Gradient() -gradf_cpu = grad.(f_cpu) - +g_cpu = Geometry.UVVector.(sin.(coords_cpu.x), 2 .* cos.(coords_cpu.y)) grid_topology = Topologies.Topology2D( ClimaComms.SingletonCommsContext(ClimaCore.Device.device()), @@ -44,10 +41,32 @@ coords = Fields.coordinate_field(grid_space) CUDA.allowscalar(false) f = sin.(coords.x .+ 2 .* coords.y) +g = Geometry.UVVector.(sin.(coords.x), 2 .* cos.(coords.y)) grad = Operators.Gradient() -gradf = grad.(f) +wgrad = Operators.WeakGradient() + +@test Array(parent(grad.(f))) ≈ parent(grad.(f_cpu)) +@test Array(parent(wgrad.(f))) ≈ parent(wgrad.(f_cpu)) + +div = Operators.Divergence() +wdiv = Operators.WeakDivergence() + +@test Array(parent(div.(g))) ≈ parent(div.(g_cpu)) +@test Array(parent(wdiv.(g))) ≈ parent(wdiv.(g_cpu)) +@test Array(parent(div.(grad.(f)))) ≈ parent(div.(grad.(f_cpu))) # composite + +curl = Operators.Curl() +wcurl = Operators.WeakCurl() +@test Array(parent(curl.(Geometry.Covariant12Vector.(g)))) ≈ + parent(curl.(Geometry.Covariant12Vector.(g_cpu))) +@test Array(parent(curl.(Geometry.Covariant3Vector.(f)))) ≈ + parent(curl.(Geometry.Covariant3Vector.(f_cpu))) +@test Array(parent(wcurl.(Geometry.Covariant12Vector.(g)))) ≈ + parent(wcurl.(Geometry.Covariant12Vector.(g))) +@test Array(parent(wcurl.(Geometry.Covariant3Vector.(f)))) ≈ + parent(wcurl.(Geometry.Covariant3Vector.(f_cpu))) diff --git a/test/runtests.jl b/test/runtests.jl index 9348bcd03c..0faf444497 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -99,6 +99,7 @@ if "CUDA" in ARGS @safetestset "GPU - data" begin @time include("gpu/data.jl") end @safetestset "GPU - device" begin @time include("gpu/device.jl") end @safetestset "Spaces - serial CUDA DSS" begin @time include("Spaces/ddss1.jl") end + @safetestset "Operators - CUDA" begin @time include("Operators/spectralelement/rectilinear_cuda.jl") end end #! format: on From 943d4cd6a2905ba8b649d55d40c01723bc1e7ebf Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Fri, 10 Feb 2023 10:37:22 -0800 Subject: [PATCH 14/16] fix WeakCurl, update tests --- src/Operators/spectralelement.jl | 8 +- .../spectralelement/rectilinear_cuda.jl | 335 +----------------- 2 files changed, 11 insertions(+), 332 deletions(-) diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index 47183e52bb..64f6a744ec 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -1349,8 +1349,8 @@ function apply_operator_kernel(op::WeakCurl{(1, 2)}, space, ij, slabidx, arg) Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, j] Dᵀ₂Wv₁ = D[1, j] ⊠ Wv₁[i, 1] for k in 2:Nq - Dᵀ₁v₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, j] - Dᵀ₂v₁ = Dᵀ₂Wv₁ ⊞ D[k, j] ⊠ Wv₁[i, k] + Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, j] + Dᵀ₂Wv₁ = Dᵀ₂Wv₁ ⊞ D[k, j] ⊠ Wv₁[i, k] end return Geometry.Contravariant3Vector( RecursiveApply.rdiv(Dᵀ₂Wv₁ ⊟ Dᵀ₁Wv₂, local_geometry.WJ), @@ -1369,8 +1369,8 @@ function apply_operator_kernel(op::WeakCurl{(1, 2)}, space, ij, slabidx, arg) Dᵀ₂Wv₃ = Dᵀ₂Wv₃ ⊞ D[k, j] ⊠ Wv₃[i, k] end return Geometry.Contravariant12Vector( - RecursiveApply.rdiv(Dᵀ₂Wv₃, local_geometry.WJ), - ⊟(RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ)), + ⊟(RecursiveApply.rdiv(Dᵀ₂Wv₃, local_geometry.WJ)), + RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), ) else error("invalid return type") diff --git a/test/Operators/spectralelement/rectilinear_cuda.jl b/test/Operators/spectralelement/rectilinear_cuda.jl index 52c4e6a158..18cafae580 100644 --- a/test/Operators/spectralelement/rectilinear_cuda.jl +++ b/test/Operators/spectralelement/rectilinear_cuda.jl @@ -30,7 +30,11 @@ grid_space_cpu = Spaces.SpectralElementSpace2D(grid_topology_cpu, quad) coords_cpu = Fields.coordinate_field(grid_space_cpu) f_cpu = sin.(coords_cpu.x .+ 2 .* coords_cpu.y) -g_cpu = Geometry.UVVector.(sin.(coords_cpu.x), 2 .* cos.(coords_cpu.y)) +g_cpu = + Geometry.UVVector.( + sin.(coords_cpu.x), + 2 .* cos.(coords_cpu.y .+ coords_cpu.x), + ) grid_topology = Topologies.Topology2D( ClimaComms.SingletonCommsContext(ClimaCore.Device.device()), @@ -41,7 +45,7 @@ coords = Fields.coordinate_field(grid_space) CUDA.allowscalar(false) f = sin.(coords.x .+ 2 .* coords.y) -g = Geometry.UVVector.(sin.(coords.x), 2 .* cos.(coords.y)) +g = Geometry.UVVector.(sin.(coords.x), 2 .* cos.(coords.y .+ coords.x)) grad = Operators.Gradient() wgrad = Operators.WeakGradient() @@ -64,331 +68,6 @@ wcurl = Operators.WeakCurl() @test Array(parent(curl.(Geometry.Covariant3Vector.(f)))) ≈ parent(curl.(Geometry.Covariant3Vector.(f_cpu))) @test Array(parent(wcurl.(Geometry.Covariant12Vector.(g)))) ≈ - parent(wcurl.(Geometry.Covariant12Vector.(g))) + parent(wcurl.(Geometry.Covariant12Vector.(g_cpu))) @test Array(parent(wcurl.(Geometry.Covariant3Vector.(f)))) ≈ parent(wcurl.(Geometry.Covariant3Vector.(f_cpu))) - - - - - - -#== - -ts_mesh = Meshes.RectilinearMesh(domain, 17, 16) -ts_topology = Topologies.Topology2D(ClimaComms.SingletonCommsContext(), ts_mesh) -ts_space = Spaces.SpectralElementSpace2D(ts_topology, quad) -ts_coords = Fields.coordinate_field(ts_space) - -grid_test_setup = (grid_topology, grid_space, grid_coords) -ts_test_setup = (ts_topology, ts_space, ts_coords) - -@testset "interpolate / restrict" begin - - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - INq = 9 - Iquad = Spaces.Quadratures.GLL{INq}() - Ispace = Spaces.SpectralElementSpace2D(topology, Iquad) - - I = Operators.Interpolate(Ispace) - R = Operators.Restrict(space) - - f = sin.(coords.x .+ 2 .* coords.y) - - interpolated_field = I.(f) - Spaces.weighted_dss2!(interpolated_field) - - @test axes(interpolated_field).quadrature_style == Iquad - @test axes(interpolated_field).topology == topology - - restrict_field = R.(f) - Spaces.weighted_dss2!(restrict_field) - - @test axes(restrict_field).quadrature_style == quad - @test axes(restrict_field).topology == topology - - interp_restrict_field = R.(I.(f)) - Spaces.weighted_dss2!(interp_restrict_field) - - @test axes(interp_restrict_field).quadrature_style == quad - @test axes(interp_restrict_field).topology == topology - - @test norm(interp_restrict_field .- f) ≤ 3.0e-4 - end -end - -@testset "gradient" begin - - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - f = sin.(coords.x .+ 2 .* coords.y) - - grad = Operators.Gradient() - gradf = grad.(f) - Spaces.weighted_dss2!(gradf) - - @test gradf ≈ - Geometry.Covariant12Vector.( - Geometry.UVVector.( - cos.(coords.x .+ 2 .* coords.y), - 2 .* cos.(coords.x .+ 2 .* coords.y), - ), - ) rtol = 1e-2 - - fv = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(coords.x .+ 2 .* coords.y), - ) - gradfv = Geometry.transform.(Ref(Geometry.UVAxis()), grad.(fv)) - Spaces.weighted_dss2!(gradfv) - @test eltype(gradfv) <: Geometry.Axis2Tensor - end -end - - -@testset "weak gradient" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - f = sin.(coords.x .+ 2 .* coords.y) - - wgrad = Operators.WeakGradient() - gradf = wgrad.(f) - Spaces.weighted_dss2!(gradf) - - @test Geometry.UVVector.(gradf) ≈ - Geometry.UVVector.( - cos.(coords.x .+ 2 .* coords.y), - 2 .* cos.(coords.x .+ 2 .* coords.y), - ) rtol = 1e-2 - end -end - -@testset "curl" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(3 .* coords.x .+ 4 .* coords.y), - ) - - curl = Operators.Curl() - curlv = curl.(Geometry.Covariant12Vector.(v)) - Spaces.weighted_dss2!(curlv) - curlv_ref = - Geometry.Contravariant3Vector.( - .-3 .* sin.(3 .* coords.x .+ 4 .* coords.y) .- - 2 .* cos.(coords.x .+ 2 .* coords.y), - ) - - @test curlv ≈ curlv_ref rtol = 1e-2 - end -end - -@testset "curl-curl" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(3 .* coords.x .+ 2 .* coords.y), - ) - curlv_ref = - .-3 .* sin.(3 .* coords.x .+ 2 .* coords.y) .- - 2 .* cos.(coords.x .+ 2 .* coords.y) - curlcurlv_ref1 = - .-6 .* cos.(3 .* coords.x .+ 2 .* coords.y) .+ - 4 .* sin.(coords.x .+ 2 .* coords.y) - curlcurlv_ref2 = - 9 .* cos.(3 .* coords.x .+ 2 .* coords.y) .- - 2 .* sin.(coords.x .+ 2 .* coords.y) - - curl = Operators.Curl() - curlcurlv = - curl.( - Geometry.Covariant3Vector.( - curl.(Geometry.Covariant12Vector.(v)), - ), - ) - Spaces.weighted_dss2!(curlcurlv) - - @test Geometry.UVVector.(curlcurlv) ≈ - Geometry.UVVector.(curlcurlv_ref1, curlcurlv_ref2) rtol = 4e-2 - end -end - -@testset "weak curl-strong curl" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(3 .* coords.x .+ 2 .* coords.y), - ) - curlv_ref = - .-3 .* sin.(3 .* coords.x .+ 2 .* coords.y) .- - 2 .* cos.(coords.x .+ 2 .* coords.y) - curlcurlv_ref1 = - .-6 .* cos.(3 .* coords.x .+ 2 .* coords.y) .+ - 4 .* sin.(coords.x .+ 2 .* coords.y) - curlcurlv_ref2 = - 9 .* cos.(3 .* coords.x .+ 2 .* coords.y) .- - 2 .* sin.(coords.x .+ 2 .* coords.y) - - curl = Operators.Curl() - wcurl = Operators.WeakCurl() - curlcurlv = - Geometry.UVVector.( - wcurl.( - Geometry.Covariant3Vector.( - curl.(Geometry.Covariant12Vector.(v)), - ), - ), - ) - Spaces.weighted_dss2!(curlcurlv) - - @test curlcurlv ≈ Geometry.UVVector.(curlcurlv_ref1, curlcurlv_ref2) rtol = - 4e-2 - end -end - -@testset "weak curl" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(3 .* coords.x .+ 4 .* coords.y), - ) - - wcurl = Operators.WeakCurl() - curlv = wcurl.(Geometry.Covariant12Vector.(v)) - Spaces.weighted_dss2!(curlv) - curlv_ref = - .-3 .* sin.(3 .* coords.x .+ 4 .* coords.y) .- - 2 .* cos.(coords.x .+ 2 .* coords.y) - - @test curlv ≈ Geometry.Contravariant3Vector.(curlv_ref) rtol = 1e-2 - end -end - -@testset "div" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(3 .* coords.x .+ 2 .* coords.y), - ) - - div = Operators.Divergence() - divv = div.(v) - Spaces.weighted_dss2!(divv) - divv_ref = - cos.(coords.x .+ 2 .* coords.y) .- - 2 .* sin.(3 .* coords.x .+ 2 .* coords.y) - - @test divv ≈ divv_ref rtol = 1e-2 - end -end - - -@testset "weak div" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = - Geometry.UVVector.( - sin.(coords.x .+ 2 .* coords.y), - cos.(3 .* coords.x .+ 2 .* coords.y), - ) - - wdiv = Operators.WeakDivergence() - divv = wdiv.(v) - Spaces.weighted_dss2!(divv) - divv_ref = - cos.(coords.x .+ 2 .* coords.y) .- - 2 .* sin.(3 .* coords.x .+ 2 .* coords.y) - - @test divv ≈ divv_ref rtol = 1e-2 - end -end - - -@testset "annhilator property: curl-grad" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - f = sin.(coords.x .+ 2 .* coords.y) - - grad = Operators.Gradient() - gradf = grad.(f) - Spaces.weighted_dss2!(gradf) - - curl = Operators.Curl() - curlgradf = curl.(gradf) - Spaces.weighted_dss2!(curlgradf) - - @test norm(curlgradf) < 1e-12 - end -end - -@testset "annhilator property: div-curl" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - v = Geometry.Covariant3Vector.(sin.(coords.x .+ 2 .* coords.y)) - curl = Operators.Curl() - curlv = curl.(v) - Spaces.weighted_dss2!(curlv) - - div = Operators.Divergence() - divcurlv = div.(curlv) - Spaces.weighted_dss2!(divcurlv) - - @test norm(divcurlv) < 1e-12 - end -end - -@testset "scalar hyperdiffusion" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - k = 2 - l = 3 - y = @. sin(k * coords.x + l * coords.y) - ∇⁴y_ref = @. (k^2 + l^2)^2 * sin(k * coords.x + l * coords.y) - - wdiv = Operators.WeakDivergence() - grad = Operators.Gradient() - χ = Spaces.weighted_dss2!(@. wdiv(grad(y))) - ∇⁴y = Spaces.weighted_dss2!(@. wdiv(grad(χ))) - - @test ∇⁴y_ref ≈ ∇⁴y rtol = 2e-2 - end -end - -@testset "vector hyperdiffusion" begin - for (topology, space, coords) in (grid_test_setup, ts_test_setup) - k = 2 - l = 3 - y = @. Geometry.UVVector(sin(k * coords.x + l * coords.y), 0.0) - ∇⁴y_ref = @. Geometry.UVVector( - (k^2 + l^2)^2 * sin(k * coords.x + l * coords.y), - 0.0, - ) - curl = Operators.Curl() - wcurl = Operators.WeakCurl() - - sdiv = Operators.Divergence() - wgrad = Operators.WeakGradient() - - χ = Spaces.weighted_dss2!( - @. Geometry.UVVector(wgrad(sdiv(y))) - Geometry.UVVector( - wcurl( - Geometry.Covariant3Vector( - curl(Geometry.Covariant12Vector(y)), - ), - ), - ) - ) - ∇⁴y = Spaces.weighted_dss2!( - @. Geometry.UVVector(wgrad(sdiv(χ))) - Geometry.UVVector( - wcurl( - Geometry.Covariant3Vector( - curl(Geometry.Covariant12Vector(χ)), - ), - ), - ) - ) - - @test ∇⁴y_ref ≈ ∇⁴y rtol = 2e-2 - end -end -==# From 5a964387e45c0eacee5cd215a6af88671019a2b9 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 23 Feb 2023 14:23:39 -0800 Subject: [PATCH 15/16] add CUDA shallowwater example to buildkite --- .buildkite/pipeline.yml | 9 +++++++++ examples/sphere/shallow_water_cuda.jl | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index dcc7522643..713a4b8822 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -544,6 +544,15 @@ steps: artifact_paths: - "examples/sphere/output/cg_sphere_shallowwater_rossby_haurwitz_alpha0/*" + - label: ":flower_playing_cards: CUDA Shallow-water 2D sphere" + key: "cuda_shallowwater_2d_cg_sphere" + command: + - "julia --color=yes --project=examples examples/sphere/shallow_water_cuda.jl" + artifact_paths: + - "examples/sphere/output/cuda_shallowwater_2d_cg_sphere/*" + agent: + slurm_gpu: 1 + - group: "Examples hybrid sphere" steps: diff --git a/examples/sphere/shallow_water_cuda.jl b/examples/sphere/shallow_water_cuda.jl index 0263547d1e..77bfe90338 100644 --- a/examples/sphere/shallow_water_cuda.jl +++ b/examples/sphere/shallow_water_cuda.jl @@ -550,9 +550,9 @@ function shallow_water_driver_cuda(ARGS, ::Type{FT}) where {FT} adaptive = false, progress_message = (dt, u, p, t) -> t, ) - =# sol = @timev OrdinaryDiffEq.solve!(integrator) + =# return nothing end From f9ab346a0e6a3b1db5fcbfb6231806d82aaacbf1 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 23 Feb 2023 14:25:26 -0800 Subject: [PATCH 16/16] remove broken mapreduce --- .buildkite/pipeline.yml | 4 +-- src/DataLayouts/cuda.jl | 55 ----------------------------------------- 2 files changed, 2 insertions(+), 57 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 713a4b8822..7e1fc963da 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -550,8 +550,8 @@ steps: - "julia --color=yes --project=examples examples/sphere/shallow_water_cuda.jl" artifact_paths: - "examples/sphere/output/cuda_shallowwater_2d_cg_sphere/*" - agent: - slurm_gpu: 1 + agents: + slurm_gpus: 1 - group: "Examples hybrid sphere" steps: diff --git a/src/DataLayouts/cuda.jl b/src/DataLayouts/cuda.jl index 4dd729f1fa..6400acfd36 100644 --- a/src/DataLayouts/cuda.jl +++ b/src/DataLayouts/cuda.jl @@ -76,58 +76,3 @@ function Base.copyto!( CUDA.@cuda threads = (Nij, Nij) blocks = (Nh,) knl_copyto!(dest, bc) return dest end - -function knl_mapreduce!(dest, fn, op, src) - - #= - nij, nh = size(dest) - - thread_idx = CUDA.threadIdx().x - block_idx = CUDA.blockIdx().x - block_dim = CUDA.blockDim().x - - # mapping to global idx to make insensitive - # to number of blocks / threads per device - global_idx = thread_idx + (block_idx - 1) * block_dim - - nx, ny = nij, nij - i = global_idx % nx == 0 ? nx : global_idx % nx - j = cld(global_idx, nx) - h = ((global_idx-1) % (nx*nx)) + 1 - =# - - i = CUDA.threadIdx().x - j = CUDA.threadIdx().y - - h = CUDA.blockIdx().x - - p_dest = slab(dest, h) - p_src = slab(src, h) - - if h == 1 - @inbounds p_dest[i, j] = fn(p_src[i, j]) - else - @inbounds p_dest[i, j] = op(p_dest[i, j], fn(p_src[i, j])) - end - - return nothing -end - -function Base.reduce( - op::Op, - bc::Union{ - IJFH{<:Any, Nij, A}, - Base.Broadcast.Broadcasted{IJFHStyle{Nij, A}}, - }, -) where {Op, Nij, A <: CUDA.CuArray} - # mapreduce across DataSlab2D - _, _, _, _, Nh = size(bc) - S = eltype(bc) - T = eltype(A) - Nf = typesize(T, S) - # dest array - dest = IJF{S, Nij}(CuArray{T}(undef, Nij, Nij, Nf)) - CUDA.@cuda threads = (Nij, Nij) blocks = (Nh,) knl_reduce!(dest, op, bc) - # copy to CPU, and final reduce - reduce(op, IJF{S, Nij}(Array(parent(dest)))) -end