diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9f76b4e..9a16d46 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,9 +12,8 @@ jobs: fail-fast: false matrix: version: - - '1.9' # Minimum version supporting extensions - - '1.10' # LTS version - - '1' # Latest stable 1.x release of Julia + - '1.10' # Minimum version supporting Data module creation + - '1' # Latest stable 1.x release of Julia #- 'nightly' os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index dd69a5d..3daf8b4 100644 --- a/Project.toml +++ b/Project.toml @@ -28,7 +28,7 @@ Enzyme = "0.11, 0.12, 0.13" MacroTools = "0.5" Polyester = "0.7" StaticArrays = "1" -julia = "1.9" # Minimum version supporting extensions +julia = "1.10" # Minimum version supporting Data module creation [extras] TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" diff --git a/src/FieldAllocators.jl b/src/FieldAllocators.jl new file mode 100644 index 0000000..77635d5 --- /dev/null +++ b/src/FieldAllocators.jl @@ -0,0 +1,50 @@ +""" +Module FieldAllocators + +Provides macros for the allocation of different kind of fields on a grid of size `gridsize`. + +# Usage + using ParallelStencil.FieldAllocators + +# Macros + +###### Multiple fields at once +- [`@allocate`](@ref) + +###### Scalar fields +- [`@Field`](@ref) +- `{X|Y|Z}Field`, e.g. [`@XField`](@ref) +- `B{X|Y|Z}Field`, e.g. [`@BXField`](@ref) +- `{XX|YY|ZZ|XY|XZ|YZ}Field`, e.g. [`@XXField`](@ref) + +###### Vector fields +- [`@VectorField`](@ref) +- [`@BVectorField`](@ref) + +###### Tensor fields +- [`@TensorField`](@ref) + +To see a description of a macro type `?` (including the `@`). +""" +module FieldAllocators + import ..ParallelKernel + @doc replace(ParallelKernel.FieldAllocators.ALLOCATE_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro allocate(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@allocate($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.FIELD_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro Field(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@Field($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.VECTORFIELD_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro VectorField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@VectorField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.BVECTORFIELD_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro BVectorField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@BVectorField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro TensorField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@TensorField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.VECTORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro XField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.BVECTORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro BXField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@BXField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.VECTORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro YField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@YField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.BVECTORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro BYField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@BYField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.VECTORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro ZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@ZField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.BVECTORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro BZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@BZField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro XXField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XXField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro YYField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@YYField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro ZZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@ZZField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro XYField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XYField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro XZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@XZField($(args...)))); end + @doc replace(ParallelKernel.FieldAllocators.TENSORFIELD_COMP_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro YZField(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.FieldAllocators.@YZField($(args...)))); end + + export @allocate, @Field, @VectorField, @BVectorField, @TensorField, @XField, @BXField, @YField, @BYField, @ZField, @BZField, @XXField, @YYField, @ZZField, @XYField, @XZField, @YZField +end \ No newline at end of file diff --git a/src/ParallelKernel/Data.jl b/src/ParallelKernel/Data.jl index c1e1833..ed8c357 100644 --- a/src/ParallelKernel/Data.jl +++ b/src/ParallelKernel/Data.jl @@ -1,7 +1,22 @@ const DATA_DOC = """ Module Data -The module Data is created in the module where `@init_parallel_kernel` is called from. It provides the following types: +The module Data is created in the module where `@init_parallel_kernel` is called from. It provides data types, some of which are organized in submodules. + +It contains the following submodules: + + Data.Fields +!!! note "Advanced" + Data.Device + Data.Fields.Device + + For each datatype in Data and Data.Fields exist a corresponding datatype in Data.Device and Data.Fields.Device, respectively. + + !!! warning + These Device datatypes are not intended for explicit manual usage. [`@parallel`](@ref) and [`@parallel_indices`](@ref) convert datatypes from Data and Data.Fields automatically to corresponding datatypes from Data.Device and Data.Fields.Device, respectively, in kernels when required. + + +The module Data provides the following types at the top level: -------------------------------------------------------------------------------- Data.Number @@ -54,27 +69,71 @@ Expands to: `NTuple{N_tuple, Data.CellArray{N, B}}` | `NamedTuple{names, NTuple{ Expands to: `NTuple{N_tuple, Data.Cell{S}}` | `NamedTuple{names, NTuple{N_tuple, Data.Cell{S}}}` | `Union{Data.CellTuple{N_tuple, S}, Data.NamedCellTuple{N_tuple, S}}` -------------------------------------------------------------------------------- -!!! note "Advanced" - Data.DeviceArray{ndims} +-------------------------------------------------------------------------------- + Submodule Data.Fields - Expands to `Data.DeviceArray{numbertype, ndims}`, where `numbertype` is the datatype selected with [`@init_parallel_kernel`](@ref) and the datatype `Data.DeviceArray` is chosen to be compatible with the package for parallelization selected with [`@init_parallel_kernel`](@ref) (Array for Threads or Polyester, CUDA.CuDeviceArray for CUDA AMDGPU.ROCDeviceArray for AMDGPU). +The submodule Data.Fields provides the types for fields allocated with macros from ParallelKernel.FieldAllocators. - !!! warning - This datatype is not intended for explicit manual usage. [`@parallel`](@ref) and [`@parallel_indices`](@ref) convert CUDA.CuArray and AMDGPU.ROCArray automatically to CUDA.CuDeviceArray and AMDGPU.ROCDeviceArray in kernels when required. +!!! note "Usage" + using .Data.Fields # Note the preceeding dot! - -------------------------------------------------------------------------------- - Data.DeviceCellArray{ndims} +Data.Fields provides the following types: - Expands to `Data.DeviceCellArray{numbertype, ndims}`, where `numbertype` is the datatype selected with [`@init_parallel_kernel`](@ref) and the datatype `Data.DeviceCellArray` is chosen to be compatible with the package for parallelization selected with [`@init_parallel_kernel`](@ref) (CPUCellArray for Threads or Polyester, CuDeviceCellArray for CUDA and ROCDeviceCellArray for AMDGPU). +-------------------------------------------------------------------------------- + Field - !!! warning - This datatype is not intended for explicit manual usage. [`@parallel`](@ref) and [`@parallel_indices`](@ref) convert CUDA.CuArray and AMDGPU.ROCArray automatically to CUDA.CuDeviceArray and AMDGPU.ROCDeviceArray in kernels when required. +A scalar field, on a grid of size `gridsize`; allocated with `@Fields`. + +-------------------------------------------------------------------------------- + {X|Y|Z}Field + +A scalar field of the same size as the {X|Y|Z}-component of a `VectorField`, on a grid of size `gridsize`; allocated with `@{X|Y|Z}Field`. + +-------------------------------------------------------------------------------- + B{X|Y|Z}Field + +A scalar field of the same size as the {X|Y|Z}-component of a `BVectorField` (a vector field including boundaries), on a grid of size `gridsize`; allocated with `@B{X|Y|Z}Field`. + +-------------------------------------------------------------------------------- + {XX|YY|ZZ|XY|XZ|YZ}Field + +A scalar field of the same size as the {XX|YY|ZZ|XY|XZ|YZ}-component of a `TensorField`, on a grid of size `gridsize`; allocated with `@{XX|YY|ZZ|XY|XZ|YZ}Field`. + +-------------------------------------------------------------------------------- + VectorField + +A vector field, on a grid of size `gridsize`; allocated with `@VectorField`. + +-------------------------------------------------------------------------------- + BVectorField + +A vector field including boundaries, on a grid of size `gridsize`; allocated with `@BVectorField`. + +-------------------------------------------------------------------------------- + TensorField + +A tensor field, on a grid of size `gridsize`; allocated with `@TensorField`. """ const DATA_DOC_NUMBERTYPE_NONE = """ Module Data -The module Data is created in the module where `@init_parallel_kernel` is called from. It provides the following types: +The module Data is created in the module where `@init_parallel_kernel` is called from. It provides data types, some of which are organized in submodules. + +It contains the following submodules: + + Data.Fields +!!! note "Advanced" + Data.Device + Data.Fields.Device + + For each datatype in Data and Data.Fields exist a corresponding datatype in Data.Device and Data.Fields.Device, respectively. + + !!! warning + These Device datatypes are not intended for explicit manual usage. [`@parallel`](@ref) and [`@parallel_indices`](@ref) convert datatypes from Data and Data.Fields automatically to corresponding datatypes from Data.Device and Data.Fields.Device, respectively, in kernels when required. + + +The module Data provides the following types at the top level: -------------------------------------------------------------------------------- Data.Index @@ -122,249 +181,483 @@ Expands to: `NTuple{N_tuple, Data.CellArray{numbertype, N, B}}` | `NamedTuple{na Expands to: `NTuple{N_tuple, Data.Cell{numbertype, S}}` | `NamedTuple{names, NTuple{N_tuple, Data.Cell{numbertype, S}}}` | `Union{Data.CellTuple{N_tuple, numbertype, S}, Data.NamedCellTuple{N_tuple, numbertype, S}}` -------------------------------------------------------------------------------- -!!! note "Advanced" - Data.DeviceArray{numbertype, ndims} +-------------------------------------------------------------------------------- + Submodule Data.Fields - The datatype `Data.DeviceArray` is automatically chosen to be compatible with the package for parallelization selected with [`@init_parallel_kernel`](@ref) (Array for Threads or Polyester, CUDA.CuDeviceArray for CUDA and AMDGPU.ROCDeviceArray for AMDGPU). +The submodule Data.Fields provides the types for fields allocated with macros from ParallelKernel.FieldAllocators. - !!! warning - This datatype is not intended for explicit manual usage. [`@parallel`](@ref) and [`@parallel_indices`](@ref) convert CUDA.CuArray and AMDGPU.ROCArray automatically to CUDA.CuDeviceArray and AMDGPU.ROCDeviceArray in kernels when required. +!!! note "Usage" + using .Data.Fields # Note the preceeding dot! - -------------------------------------------------------------------------------- - Data.DeviceCellArray{numbertype, ndims} +Data.Fields provides the following types: - The datatype `Data.DeviceCellArray` is automatically chosen to be compatible with the package for parallelization selected with [`@init_parallel_kernel`](@ref) (CPUCellArray for Threads or Polyester, CuDeviceCellArray for CUDA and ROCDeviceCellArray for AMDGPU). - - !!! warning - This datatype is not intended for explicit manual usage. [`@parallel`](@ref) and [`@parallel_indices`](@ref) convert CUDA.CuArray and AMDGPU.ROCArray automatically to CUDA.CuDeviceArray and AMDGPU.ROCDeviceArray in kernels when required. +-------------------------------------------------------------------------------- + Field + +A scalar field, on a grid of size `gridsize`; allocated with `@Fields`. + +-------------------------------------------------------------------------------- + {X|Y|Z}Field + +A scalar field of the same size as the {X|Y|Z}-component of a `VectorField`, on a grid of size `gridsize`; allocated with `@{X|Y|Z}Field`. + +-------------------------------------------------------------------------------- + B{X|Y|Z}Field + +A scalar field of the same size as the {X|Y|Z}-component of a `BVectorField` (a vector field including boundaries), on a grid of size `gridsize`; allocated with `@B{X|Y|Z}Field`. + +-------------------------------------------------------------------------------- + {XX|YY|ZZ|XY|XZ|YZ}Field + +A scalar field of the same size as the {XX|YY|ZZ|XY|XZ|YZ}-component of a `TensorField`, on a grid of size `gridsize`; allocated with `@{XX|YY|ZZ|XY|XZ|YZ}Field`. + +-------------------------------------------------------------------------------- + VectorField + +A vector field, on a grid of size `gridsize`; allocated with `@VectorField`. + +-------------------------------------------------------------------------------- + BVectorField + +A vector field including boundaries, on a grid of size `gridsize`; allocated with `@BVectorField`. + +-------------------------------------------------------------------------------- + TensorField + +A tensor field, on a grid of size `gridsize`; allocated with `@TensorField`. """ -function Data_cuda(modulename::Symbol, numbertype::DataType, indextype::DataType) + +# EMPTY MODULES + +function Data_none() + :(baremodule Data + end) +end + +function TData_none() + :(baremodule TData + end) +end + + +# CUDA + +function Data_cuda(numbertype::DataType, indextype::DataType) Data_module = if (numbertype == NUMBERTYPE_NONE) - :(baremodule $modulename # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. + :(baremodule $MODULENAME_DATA # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. import Base, CUDA, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays # TODO: the constructors defined by CellArrays.@define_CuCellArray lead to pre-compilation issues due to a bug in Julia. We therefore only create the type alias here for now. const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N}} # CellArrays.@define_CuCellArray # export CuCellArray - const Index = $indextype - const Array{T, N} = CUDA.CuArray{T, N} - const DeviceArray{T, N} = CUDA.CuDeviceArray{T, N} - const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const DeviceCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const CellArray{T_elem, N, B} = CuCellArray{<:Cell{T_elem},N,B,T_elem} - const DeviceCellArray{T_elem, N, B} = CellArrays.CellArray{<:DeviceCell{T_elem},N,B,<:CUDA.CuDeviceArray{T_elem,CellArrays._N}} - $(create_shared_exprs(numbertype, indextype)) + const Index = $indextype + const Array{T, N} = CUDA.CuArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CuCellArray{<:Cell{T_elem},N,B,T_elem} + $(Data_xpu_exprs(numbertype)) + $(Data_Device_cuda(numbertype, indextype)) + $(Data_Fields(numbertype, indextype)) end) else - :(baremodule $modulename # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. + :(baremodule $MODULENAME_DATA import Base, CUDA, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays # TODO: the constructors defined by CellArrays.@define_CuCellArray lead to pre-compilation issues due to a bug in Julia. We therefore only create the type alias here for now. const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N}} # CellArrays.@define_CuCellArray # export CuCellArray - const Index = $indextype - const Number = $numbertype - const Array{N} = CUDA.CuArray{$numbertype, N} - const DeviceArray{N} = CUDA.CuDeviceArray{$numbertype, N} - const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} - const DeviceCell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} - const CellArray{N, B} = CuCellArray{<:Cell,N,B,$numbertype} - const DeviceCellArray{N, B} = CellArrays.CellArray{<:DeviceCell,N,B,<:CUDA.CuDeviceArray{$numbertype,CellArrays._N}} - const TArray{T, N} = CUDA.CuArray{T, N} - const DeviceTArray{T, N} = CUDA.CuDeviceArray{T, N} - const TCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const DeviceTCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const TCellArray{T_elem, N, B} = CuCellArray{<:TCell{T_elem},N,B,T_elem} - const DeviceTCellArray{T_elem, N, B} = CellArrays.CellArray{<:DeviceTCell{T_elem},N,B,<:CUDA.CuDeviceArray{T_elem,CellArrays._N}} - $(create_shared_exprs(numbertype, indextype)) + const Index = $indextype + const Number = $numbertype + const Array{N} = CUDA.CuArray{$numbertype, N} + const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} + const CellArray{N, B} = CuCellArray{<:Cell,N,B,$numbertype} + $(Data_xpu_exprs(numbertype)) + $(Data_Device_cuda(numbertype, indextype)) + $(Data_Fields(numbertype, indextype)) end) end return prewalk(rmlines, flatten(Data_module)) end -function Data_amdgpu(modulename::Symbol, numbertype::DataType, indextype::DataType) +function TData_cuda() + TData_module = :( + baremodule $MODULENAME_TDATA + import Base, CUDA, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + # TODO: the constructors defined by CellArrays.@define_CuCellArray lead to pre-compilation issues due to a bug in Julia. We therefore only create the type alias here for now. + const CuCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,CUDA.CuArray{T_elem,CellArrays._N}} + # CellArrays.@define_CuCellArray + # export CuCellArray + const Array{T, N} = CUDA.CuArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CuCellArray{<:Cell{T_elem},N,B,T_elem} + $(TData_xpu_exprs()) + $(TData_Device_cuda()) + $(TData_Fields()) + end + ) + return prewalk(rmlines, flatten(TData_module)) +end + +function Data_Device_cuda(numbertype::DataType, indextype::DataType) + Device_module = if (numbertype == NUMBERTYPE_NONE) + :(baremodule $MODULENAME_DEVICE + import Base, CUDA, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Index = $indextype + const Array{T, N} = CUDA.CuDeviceArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CellArray{<:Cell{T_elem},N,B,<:CUDA.CuDeviceArray{T_elem,CellArrays._N}} + $(Data_xpu_exprs(numbertype)) + end) + else + :(baremodule $MODULENAME_DEVICE + import Base, CUDA, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Index = $indextype + const Array{N} = CUDA.CuDeviceArray{$numbertype, N} + const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} + const CellArray{N, B} = CellArrays.CellArray{<:Cell,N,B,<:CUDA.CuDeviceArray{$numbertype,CellArrays._N}} + $(Data_xpu_exprs(numbertype)) + end) + end + return Device_module +end + +function TData_Device_cuda() + :(baremodule $MODULENAME_DEVICE + import Base, CUDA, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Array{T, N} = CUDA.CuDeviceArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CellArray{<:Cell{T_elem},N,B,<:CUDA.CuDeviceArray{T_elem,CellArrays._N}} + $(TData_xpu_exprs()) + end) +end + + +# AMDGPU + +function Data_amdgpu(numbertype::DataType, indextype::DataType) Data_module = if (numbertype == NUMBERTYPE_NONE) - :(baremodule $modulename # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. + :(baremodule $MODULENAME_DATA import Base, AMDGPU, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays # TODO: the constructors defined by CellArrays.@define_ROCCellArray lead to pre-compilation issues due to a bug in Julia. We therefore only create the type alias here for now. const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} # CellArrays.@define_ROCCellArray # export ROCCellArray - const Index = $indextype - const Array{T, N} = AMDGPU.ROCArray{T, N} - const DeviceArray{T, N} = AMDGPU.ROCDeviceArray{T, N} - const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const DeviceCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const CellArray{T_elem, N, B} = ROCCellArray{<:Cell{T_elem},N,B,T_elem} - const DeviceCellArray{T_elem, N, B} = CellArrays.CellArray{<:DeviceCell{T_elem},N,B,<:AMDGPU.ROCDeviceArray{T_elem,CellArrays._N}} - $(create_shared_exprs(numbertype, indextype)) + const Index = $indextype + const Array{T, N} = AMDGPU.ROCArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = ROCCellArray{<:Cell{T_elem},N,B,T_elem} + $(Data_xpu_exprs(numbertype)) + $(Data_Device_amdgpu(numbertype, indextype)) + $(Data_Fields(numbertype, indextype)) end) else - :(baremodule $modulename # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. + :(baremodule $MODULENAME_DATA import Base, AMDGPU, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays # TODO: the constructors defined by CellArrays.@define_ROCCellArray lead to pre-compilation issues due to a bug in Julia. We therefore only create the type alias here for now. const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} # CellArrays.@define_ROCCellArray # export ROCCellArray - const Index = $indextype - const Number = $numbertype - const Array{N} = AMDGPU.ROCArray{$numbertype, N} - const DeviceArray{N} = AMDGPU.ROCDeviceArray{$numbertype, N} - const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} - const DeviceCell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} - const CellArray{N, B} = ROCCellArray{<:Cell,N,B,$numbertype} - const DeviceCellArray{N, B} = CellArrays.CellArray{<:DeviceCell,N,B,<:AMDGPU.ROCDeviceArray{$numbertype,CellArrays._N}} - const TArray{T, N} = AMDGPU.ROCArray{T, N} - const DeviceTArray{T, N} = AMDGPU.ROCDeviceArray{T, N} - const TCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const DeviceTCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const TCellArray{T_elem, N, B} = ROCCellArray{<:TCell{T_elem},N,B,T_elem} - const DeviceTCellArray{T_elem, N, B} = CellArrays.CellArray{<:DeviceTCell{T_elem},N,B,<:AMDGPU.ROCDeviceArray{T_elem,CellArrays._N}} - $(create_shared_exprs(numbertype, indextype)) + const Index = $indextype + const Number = $numbertype + const Array{N} = AMDGPU.ROCArray{$numbertype, N} + const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} + const CellArray{N, B} = ROCCellArray{<:Cell,N,B,$numbertype} + $(Data_xpu_exprs(numbertype)) + $(Data_Device_amdgpu(numbertype, indextype)) + $(Data_Fields(numbertype, indextype)) end) end return prewalk(rmlines, flatten(Data_module)) end -function Data_cpu(modulename::Symbol, numbertype::DataType, indextype::DataType) +function TData_amdgpu() + TData_module = :( + baremodule $MODULENAME_TDATA + import Base, AMDGPU, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + # TODO: the constructors defined by CellArrays.@define_ROCCellArray lead to pre-compilation issues due to a bug in Julia. We therefore only create the type alias here for now. + const ROCCellArray{T,N,B,T_elem} = CellArrays.CellArray{T,N,B,AMDGPU.ROCArray{T_elem,CellArrays._N}} + # CellArrays.@define_ROCCellArray + # export ROCCellArray + const Array{T, N} = AMDGPU.ROCArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = ROCCellArray{<:Cell{T_elem},N,B,T_elem} + $(TData_xpu_exprs()) + $(TData_Device_amdgpu()) + $(TData_Fields()) + end + ) + return prewalk(rmlines, flatten(TData_module)) +end + +function Data_Device_amdgpu(numbertype::DataType, indextype::DataType) + Device_module = if (numbertype == NUMBERTYPE_NONE) + :(baremodule $MODULENAME_DEVICE + import Base, AMDGPU, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Index = $indextype + const Array{T, N} = AMDGPU.ROCDeviceArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CellArray{<:Cell{T_elem},N,B,<:AMDGPU.ROCDeviceArray{T_elem,CellArrays._N}} + $(Data_xpu_exprs(numbertype)) + end) + else + :(baremodule $MODULENAME_DEVICE + import Base, AMDGPU, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Index = $indextype + const Array{N} = AMDGPU.ROCDeviceArray{$numbertype, N} + const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} + const CellArray{N, B} = CellArrays.CellArray{<:Cell,N,B,<:AMDGPU.ROCDeviceArray{$numbertype,CellArrays._N}} + $(Data_xpu_exprs(numbertype)) + end) + end + return Device_module +end + +function TData_Device_amdgpu() + :(baremodule $MODULENAME_DEVICE + import Base, AMDGPU, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Array{T, N} = AMDGPU.ROCDeviceArray{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CellArray{<:Cell{T_elem},N,B,<:AMDGPU.ROCDeviceArray{T_elem,CellArrays._N}} + $(TData_xpu_exprs()) + end) +end + + +# CPU + +function Data_cpu(numbertype::DataType, indextype::DataType) Data_module = if (numbertype == NUMBERTYPE_NONE) - :(baremodule $modulename # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. + :(baremodule $MODULENAME_DATA import Base, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays - const Index = $indextype - const Array{T, N} = Base.Array{T, N} - const DeviceArray{T, N} = Base.Array{T, N} - const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const DeviceCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const CellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:Cell{T_elem},N,B,T_elem} - const DeviceCellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:DeviceCell{T_elem},N,B,T_elem} - $(create_shared_exprs(numbertype, indextype)) + const Index = $indextype + const Array{T, N} = Base.Array{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:Cell{T_elem},N,B,T_elem} + $(Data_xpu_exprs(numbertype)) + $(Data_Device_cpu(numbertype, indextype)) + $(Data_Fields(numbertype, indextype)) end) else - :(baremodule $modulename # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. + :(baremodule $MODULENAME_DATA import Base, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays - const Index = $indextype - const Number = $numbertype - const Array{N} = Base.Array{$numbertype, N} - const DeviceArray{N} = Base.Array{$numbertype, N} - const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} - const DeviceCell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} - const CellArray{N, B} = CellArrays.CPUCellArray{<:Cell,N,B,$numbertype} - const DeviceCellArray{N, B} = CellArrays.CPUCellArray{<:DeviceCell,N,B,$numbertype} - const TArray{T, N} = Base.Array{T, N} - const DeviceTArray{T, N} = Base.Array{T, N} - const TCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const DeviceTCell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} - const TCellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:TCell{T_elem},N,B,T_elem} - const DeviceTCellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:DeviceTCell{T_elem},N,B,T_elem} - $(create_shared_exprs(numbertype, indextype)) + const Index = $indextype + const Number = $numbertype + const Array{N} = Base.Array{$numbertype, N} + const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} + const CellArray{N, B} = CellArrays.CPUCellArray{<:Cell,N,B,$numbertype} + $(Data_xpu_exprs(numbertype)) + $(Data_Device_cpu(numbertype, indextype)) + $(Data_Fields(numbertype, indextype)) end) end return prewalk(rmlines, flatten(Data_module)) end -function create_shared_exprs(numbertype::DataType, indextype::DataType) - if numbertype == NUMBERTYPE_NONE - quote - const IndexTuple{N_tuple} = NTuple{N_tuple, Index} - const NumberTuple{N_tuple, T} = NTuple{N_tuple, T} - const ArrayTuple{N_tuple, T, N} = NTuple{N_tuple, Array{T, N}} - const DeviceArrayTuple{N_tuple, T, N} = NTuple{N_tuple, DeviceArray{T, N}} - const CellTuple{N_tuple, T, S} = NTuple{N_tuple, Cell{T, S}} - const DeviceCellTuple{N_tuple, T, S} = NTuple{N_tuple, DeviceCell{T, S}} - const CellArrayTuple{N_tuple, T_elem, N, B} = NTuple{N_tuple, CellArray{T_elem, N, B}} - const DeviceCellArrayTuple{N_tuple, T_elem, N, B} = NTuple{N_tuple, DeviceCellArray{T_elem, N, B}} - - const NamedIndexTuple{N_tuple, names} = NamedTuple{names, <:IndexTuple{N_tuple}} - const NamedNumberTuple{N_tuple, T, names} = NamedTuple{names, <:NumberTuple{N_tuple, T}} - const NamedArrayTuple{N_tuple, T, N, names} = NamedTuple{names, <:ArrayTuple{N_tuple, T, N}} - const NamedDeviceArrayTuple{N_tuple, T, N, names} = NamedTuple{names, <:DeviceArrayTuple{N_tuple, T, N}} - const NamedCellTuple{N_tuple, T, S, names} = NamedTuple{names, <:CellTuple{N_tuple, T, S}} - const NamedDeviceCellTuple{N_tuple, T, S, names} = NamedTuple{names, <:DeviceCellTuple{N_tuple, T, S}} - const NamedCellArrayTuple{N_tuple, T_elem, N, B, names} = NamedTuple{names, <:CellArrayTuple{N_tuple, T_elem, N, B}} - const NamedDeviceCellArrayTuple{N_tuple, T_elem, N, B, names} = NamedTuple{names, <:DeviceCellArrayTuple{N_tuple, T_elem, N, B}} - - const IndexCollection{N_tuple} = Union{IndexTuple{N_tuple}, NamedIndexTuple{N_tuple}} - const NumberCollection{N_tuple, T} = Union{NumberTuple{N_tuple, T}, NamedNumberTuple{N_tuple, T}} - const ArrayCollection{N_tuple, T, N} = Union{ArrayTuple{N_tuple, T, N}, NamedArrayTuple{N_tuple, T, N}} - const DeviceArrayCollection{N_tuple, T, N} = Union{DeviceArrayTuple{N_tuple, T, N}, NamedDeviceArrayTuple{N_tuple, T, N}} - const CellCollection{N_tuple, T, S} = Union{CellTuple{N_tuple, T, S}, NamedCellTuple{N_tuple, T, S}} - const DeviceCellCollection{N_tuple, T, S} = Union{DeviceCellTuple{N_tuple, T, S}, NamedDeviceCellTuple{N_tuple, T, S}} - const CellArrayCollection{N_tuple, T_elem, N, B} = Union{CellArrayTuple{N_tuple, T_elem, N, B}, NamedCellArrayTuple{N_tuple, T_elem, N, B}} - const DeviceCellArrayCollection{N_tuple, T_elem, N, B} = Union{DeviceCellArrayTuple{N_tuple, T_elem, N, B}, NamedDeviceCellArrayTuple{N_tuple, T_elem, N, B}} - - # TODO: the following constructors lead to pre-compilation issues due to a bug in Julia. They are therefore commented out for now. - # NamedIndexTuple{}(t::NamedTuple) = Base.map(Data.Index, t) - # NamedNumberTuple{}(T, t::NamedTuple) = Base.map(T, t) - # NamedArrayTuple{}(T, t::NamedTuple) = Base.map(Data.Array{T}, t) - # NamedCellTuple{}(T, t::NamedTuple) = Base.map(Data.Cell{T}, t) - # NamedCellArrayTuple{}(T, t::NamedTuple) = Base.map(Data.CellArray{T}, t) +function TData_cpu() + TData_module = :( + baremodule $MODULENAME_TDATA + import Base, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Array{T, N} = Base.Array{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:Cell{T_elem},N,B,T_elem} + $(TData_xpu_exprs()) + $(TData_Device_cpu()) + $(TData_Fields()) end + ) + return prewalk(rmlines, flatten(TData_module)) +end + +function Data_Device_cpu(numbertype::DataType, indextype::DataType) + Device_module = if (numbertype == NUMBERTYPE_NONE) + :(baremodule $MODULENAME_DEVICE + import Base, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Index = $indextype + const Array{T, N} = Base.Array{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:Cell{T_elem},N,B,T_elem} + $(Data_xpu_exprs(numbertype)) + end) else - quote - const IndexTuple{N_tuple} = NTuple{N_tuple, Index} - const NumberTuple{N_tuple} = NTuple{N_tuple, Number} - const ArrayTuple{N_tuple, N} = NTuple{N_tuple, Array{N}} - const DeviceArrayTuple{N_tuple, N} = NTuple{N_tuple, DeviceArray{N}} - const CellTuple{N_tuple, S} = NTuple{N_tuple, Cell{S}} - const DeviceCellTuple{N_tuple, S} = NTuple{N_tuple, DeviceCell{S}} - const CellArrayTuple{N_tuple, N, B} = NTuple{N_tuple, CellArray{N, B}} - const DeviceCellArrayTuple{N_tuple, N, B} = NTuple{N_tuple, DeviceCellArray{N, B}} - const TNumberTuple{N_tuple, T} = NTuple{N_tuple, T} - const TArrayTuple{N_tuple, T, N} = NTuple{N_tuple, TArray{T, N}} - const DeviceTArrayTuple{N_tuple, T, N} = NTuple{N_tuple, DeviceTArray{T, N}} - const TCellTuple{N_tuple, T, S} = NTuple{N_tuple, TCell{T, S}} - const DeviceTCellTuple{N_tuple, T, S} = NTuple{N_tuple, DeviceTCell{T, S}} - const TCellArrayTuple{N_tuple, T_elem, N, B} = NTuple{N_tuple, TCellArray{T_elem, N, B}} - const DeviceTCellArrayTuple{N_tuple, T_elem, N, B} = NTuple{N_tuple, DeviceTCellArray{T_elem, N, B}} - - const NamedIndexTuple{N_tuple, names} = NamedTuple{names, <:IndexTuple{N_tuple}} - const NamedNumberTuple{N_tuple, names} = NamedTuple{names, <:NumberTuple{N_tuple}} - const NamedArrayTuple{N_tuple, N, names} = NamedTuple{names, <:ArrayTuple{N_tuple, N}} - const NamedDeviceArrayTuple{N_tuple, N, names} = NamedTuple{names, <:DeviceArrayTuple{N_tuple, N}} - const NamedCellTuple{N_tuple, S, names} = NamedTuple{names, <:CellTuple{N_tuple, S}} - const NamedDeviceCellTuple{N_tuple, S, names} = NamedTuple{names, <:DeviceCellTuple{N_tuple, S}} - const NamedCellArrayTuple{N_tuple, N, B, names} = NamedTuple{names, <:CellArrayTuple{N_tuple, N, B}} - const NamedDeviceCellArrayTuple{N_tuple, N, B, names} = NamedTuple{names, <:DeviceCellArrayTuple{N_tuple, N, B}} - const NamedTNumberTuple{N_tuple, T, names} = NamedTuple{names, <:TNumberTuple{N_tuple, T}} - const NamedTArrayTuple{N_tuple, T, N, names} = NamedTuple{names, <:TArrayTuple{N_tuple, T, N}} - const NamedDeviceTArrayTuple{N_tuple, T, N, names} = NamedTuple{names, <:DeviceTArrayTuple{N_tuple, T, N}} - const NamedTCellTuple{N_tuple, T, S, names} = NamedTuple{names, <:TCellTuple{N_tuple, T, S}} - const NamedDeviceTCellTuple{N_tuple, T, S, names} = NamedTuple{names, <:DeviceTCellTuple{N_tuple, T, S}} - const NamedTCellArrayTuple{N_tuple, T_elem, N, B, names} = NamedTuple{names, <:TCellArrayTuple{N_tuple, T_elem, N, B}} - const NamedDeviceTCellArrayTuple{N_tuple, T_elem, N, B, names} = NamedTuple{names, <:DeviceTCellArrayTuple{N_tuple, T_elem, N, B}} - - const IndexCollection{N_tuple} = Union{IndexTuple{N_tuple}, NamedIndexTuple{N_tuple}} - const NumberCollection{N_tuple} = Union{NumberTuple{N_tuple}, NamedNumberTuple{N_tuple}} - const ArrayCollection{N_tuple, N} = Union{ArrayTuple{N_tuple, N}, NamedArrayTuple{N_tuple, N}} - const DeviceArrayCollection{N_tuple, N} = Union{DeviceArrayTuple{N_tuple, N}, NamedDeviceArrayTuple{N_tuple, N}} - const CellCollection{N_tuple, S} = Union{CellTuple{N_tuple, S}, NamedCellTuple{N_tuple, S}} - const DeviceCellCollection{N_tuple, S} = Union{DeviceCellTuple{N_tuple, S}, NamedDeviceCellTuple{N_tuple, S}} - const CellArrayCollection{N_tuple, N, B} = Union{CellArrayTuple{N_tuple, N, B}, NamedCellArrayTuple{N_tuple, N, B}} - const DeviceCellArrayCollection{N_tuple, N, B} = Union{DeviceCellArrayTuple{N_tuple, N, B}, NamedDeviceCellArrayTuple{N_tuple, N, B}} - const TNumberCollection{N_tuple, T} = Union{TNumberTuple{N_tuple, T}, NamedTNumberTuple{N_tuple, T}} - const TArrayCollection{N_tuple, T, N} = Union{TArrayTuple{N_tuple, T, N}, NamedTArrayTuple{N_tuple, T, N}} - const DeviceTArrayCollection{N_tuple, T, N} = Union{DeviceTArrayTuple{N_tuple, T, N}, NamedDeviceTArrayTuple{N_tuple, T, N}} - const TCellCollection{N_tuple, T, S} = Union{TCellTuple{N_tuple, T, S}, NamedTCellTuple{N_tuple, T, S}} - const DeviceTCellCollection{N_tuple, T, S} = Union{DeviceTCellTuple{N_tuple, T, S}, NamedDeviceTCellTuple{N_tuple, T, S}} - const TCellArrayCollection{N_tuple, T_elem, N, B} = Union{TCellArrayTuple{N_tuple, T_elem, N, B}, NamedTCellArrayTuple{N_tuple, T_elem, N, B}} - const DeviceTCellArrayCollection{N_tuple, T_elem, N, B} = Union{DeviceTCellArrayTuple{N_tuple, T_elem, N, B}, NamedDeviceTCellArrayTuple{N_tuple, T_elem, N, B}} - - # TODO: the following constructors lead to pre-compilation issues due to a bug in Julia. They are therefore commented out for now. - # NamedIndexTuple{}(t::NamedTuple) = Base.map(Data.Index, t) - # NamedNumberTuple{}(t::NamedTuple) = Base.map(Data.Number, t) - # NamedArrayTuple{}(t::NamedTuple) = Base.map(Data.Array, t) - # NamedCellTuple{}(t::NamedTuple) = Base.map(Data.Cell, t) - # NamedCellArrayTuple{}(t::NamedTuple) = Base.map(Data.CellArray, t) - # NamedTNumberTuple{}(T, t::NamedTuple) = Base.map(T, t) - # NamedTArrayTuple{}(T, t::NamedTuple) = Base.map(Data.TArray{T}, t) - # NamedTCellTuple{}(T, t::NamedTuple) = Base.map(Data.TCell{T}, t) - # NamedTCellArrayTuple{}(T, t::NamedTuple) = Base.map(Data.TCellArray{T}, t) - end + :(baremodule $MODULENAME_DEVICE + import Base, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Index = $indextype + const Array{N} = Base.Array{$numbertype, N} + const Cell{S} = Union{StaticArrays.SArray{S, $numbertype}, StaticArrays.FieldArray{S, $numbertype}} + const CellArray{N, B} = CellArrays.CPUCellArray{<:Cell,N,B,$numbertype} + $(Data_xpu_exprs(numbertype)) + end) end + return Device_module end -function Data_none() - :(baremodule Data # NOTE: there cannot be any newline before 'module Data' or it will create a begin end block and the module creation will fail. +function TData_Device_cpu() + :(baremodule $MODULENAME_DEVICE + import Base, ParallelStencil.ParallelKernel.CellArrays, ParallelStencil.ParallelKernel.StaticArrays + const Array{T, N} = Base.Array{T, N} + const Cell{T, S} = Union{StaticArrays.SArray{S, T}, StaticArrays.FieldArray{S, T}} + const CellArray{T_elem, N, B} = CellArrays.CPUCellArray{<:Cell{T_elem},N,B,T_elem} + $(TData_xpu_exprs()) end) end + + +# xPU + +function Data_xpu_exprs(numbertype::DataType) + if (numbertype == NUMBERTYPE_NONE) T_xpu_exprs() + else xpu_exprs() + end +end + +TData_xpu_exprs() = T_xpu_exprs() + +function T_xpu_exprs() + quote + const NumberTuple{N_tuple, T} = NTuple{N_tuple, T} + const ArrayTuple{N_tuple, T, N} = NTuple{N_tuple, Array{T, N}} + const CellTuple{N_tuple, T, S} = NTuple{N_tuple, Cell{T, S}} + const CellArrayTuple{N_tuple, T_elem, N, B} = NTuple{N_tuple, CellArray{T_elem, N, B}} + + const NamedNumberTuple{N_tuple, T, names} = NamedTuple{names, <:NumberTuple{N_tuple, T}} + const NamedArrayTuple{N_tuple, T, N, names} = NamedTuple{names, <:ArrayTuple{N_tuple, T, N}} + const NamedCellTuple{N_tuple, T, S, names} = NamedTuple{names, <:CellTuple{N_tuple, T, S}} + const NamedCellArrayTuple{N_tuple, T_elem, N, B, names} = NamedTuple{names, <:CellArrayTuple{N_tuple, T_elem, N, B}} + + const NumberCollection{N_tuple, T} = Union{NumberTuple{N_tuple, T}, NamedNumberTuple{N_tuple, T}} + const ArrayCollection{N_tuple, T, N} = Union{ArrayTuple{N_tuple, T, N}, NamedArrayTuple{N_tuple, T, N}} + const CellCollection{N_tuple, T, S} = Union{CellTuple{N_tuple, T, S}, NamedCellTuple{N_tuple, T, S}} + const CellArrayCollection{N_tuple, T_elem, N, B} = Union{CellArrayTuple{N_tuple, T_elem, N, B}, NamedCellArrayTuple{N_tuple, T_elem, N, B}} + + # TODO: the following constructors lead to pre-compilation issues due to a bug in Julia. They are therefore commented out for now. + # NamedNumberTuple{}(T, t::NamedTuple) = Base.map(T, t) + # NamedArrayTuple{}(T, t::NamedTuple) = Base.map(Data.Array{T}, t) + # NamedCellTuple{}(T, t::NamedTuple) = Base.map(Data.Cell{T}, t) + # NamedCellArrayTuple{}(T, t::NamedTuple) = Base.map(Data.CellArray{T}, t) + end +end + +function xpu_exprs() + quote + const IndexTuple{N_tuple} = NTuple{N_tuple, Index} + const NumberTuple{N_tuple} = NTuple{N_tuple, Number} + const ArrayTuple{N_tuple, N} = NTuple{N_tuple, Array{N}} + const CellTuple{N_tuple, S} = NTuple{N_tuple, Cell{S}} + const CellArrayTuple{N_tuple, N, B} = NTuple{N_tuple, CellArray{N, B}} + + const NamedIndexTuple{N_tuple, names} = NamedTuple{names, <:IndexTuple{N_tuple}} + const NamedNumberTuple{N_tuple, names} = NamedTuple{names, <:NumberTuple{N_tuple}} + const NamedArrayTuple{N_tuple, N, names} = NamedTuple{names, <:ArrayTuple{N_tuple, N}} + const NamedCellTuple{N_tuple, S, names} = NamedTuple{names, <:CellTuple{N_tuple, S}} + const NamedCellArrayTuple{N_tuple, N, B, names} = NamedTuple{names, <:CellArrayTuple{N_tuple, N, B}} + + const IndexCollection{N_tuple} = Union{IndexTuple{N_tuple}, NamedIndexTuple{N_tuple}} + const NumberCollection{N_tuple} = Union{NumberTuple{N_tuple}, NamedNumberTuple{N_tuple}} + const ArrayCollection{N_tuple, N} = Union{ArrayTuple{N_tuple, N}, NamedArrayTuple{N_tuple, N}} + const CellCollection{N_tuple, S} = Union{CellTuple{N_tuple, S}, NamedCellTuple{N_tuple, S}} + const CellArrayCollection{N_tuple, N, B} = Union{CellArrayTuple{N_tuple, N, B}, NamedCellArrayTuple{N_tuple, N, B}} + + # TODO: the following constructors lead to pre-compilation issues due to a bug in Julia. They are therefore commented out for now. + # NamedIndexTuple{}(t::NamedTuple) = Base.map(Data.Index, t) + # NamedNumberTuple{}(t::NamedTuple) = Base.map(Data.Number, t) + # NamedArrayTuple{}(t::NamedTuple) = Base.map(Data.Array, t) + # NamedCellTuple{}(t::NamedTuple) = Base.map(Data.Cell, t) + # NamedCellArrayTuple{}(t::NamedTuple) = Base.map(Data.CellArray, t) + end +end + + +## (DATA SUBMODULE FIELDS - xPU) # NOTE: custom data types could be implemented for each alias. + +function Data_Fields(numbertype::DataType, indextype::DataType) + Fields_module = if (numbertype == NUMBERTYPE_NONE) + :(baremodule $MODULENAME_FIELDS + import ..$MODULENAME_DATA # NOTE: this requires Julia >=1.10 + import ..$MODULENAME_DATA: Array, NamedArrayTuple + $(generic_Fields_exprs()) + $(T_Fields_exprs()) + $(Data_Fields_Device(numbertype, indextype)) + end) + else + :(baremodule $MODULENAME_FIELDS + import ..$MODULENAME_DATA + import ..$MODULENAME_DATA: Array, NamedArrayTuple + $(generic_Fields_exprs()) + $(Fields_exprs()) + $(Data_Fields_Device(numbertype, indextype)) + end) + end + return Fields_module +end + +function TData_Fields() + :(baremodule $MODULENAME_FIELDS + import ..$MODULENAME_TDATA + import ..$MODULENAME_TDATA: Array, NamedArrayTuple + $(generic_Fields_exprs()) + $(T_Fields_exprs()) + $(TData_Fields_Device()) + end) +end + +function Data_Fields_Device(numbertype::DataType, indextype::DataType) + Device_module = if (numbertype == NUMBERTYPE_NONE) + :(baremodule $MODULENAME_DEVICE + import ..$MODULENAME_DATA.$MODULENAME_DEVICE: Array, NamedArrayTuple + $(generic_Fields_exprs()) + $(T_Fields_exprs()) + end) + else + :(baremodule $MODULENAME_DEVICE + import ..$MODULENAME_DATA.$MODULENAME_DEVICE: Array, NamedArrayTuple + $(generic_Fields_exprs()) + $(Fields_exprs()) + end) + end + return Device_module +end + +function TData_Fields_Device() + :(baremodule $MODULENAME_DEVICE + import ..$MODULENAME_TDATA.$MODULENAME_DEVICE: Array, NamedArrayTuple + $(generic_Fields_exprs()) + $(T_Fields_exprs()) + end) +end + +function T_Fields_exprs() + quote + export VectorField, BVectorField, TensorField + const VectorField{T, N, names} = NamedArrayTuple{N, T, N, names} + const BVectorField{T, N, names} = NamedArrayTuple{N, T, N, names} + const TensorField{T, N, names} = NamedArrayTuple{N, T, N, names} + end +end + +function Fields_exprs() + quote + export VectorField, BVectorField, TensorField + const VectorField{N, names} = NamedArrayTuple{N, N, names} + const BVectorField{N, names} = NamedArrayTuple{N, N, names} + const TensorField{N, names} = NamedArrayTuple{N, N, names} + end +end + +function generic_Fields_exprs() + quote + export Field, XField, YField, ZField, BXField, BYField, BZField, XXField, YYField, ZZField, XYField, XZField, YZField + const Field = Array + const XField = Array + const YField = Array + const ZField = Array + const BXField = Array + const BYField = Array + const BZField = Array + const XXField = Array + const YYField = Array + const ZZField = Array + const XYField = Array + const XZField = Array + const YZField = Array + end +end diff --git a/src/ParallelKernel/FieldAllocators.jl b/src/ParallelKernel/FieldAllocators.jl new file mode 100644 index 0000000..849f1c6 --- /dev/null +++ b/src/ParallelKernel/FieldAllocators.jl @@ -0,0 +1,521 @@ +""" +Module FieldAllocators + +Provides macros for the allocation of different kind of fields on a grid of size `gridsize`. + +# Usage + using ParallelKernel.FieldAllocators + +# Macros + +###### Multiple fields at once +- [`@allocate`](@ref) + +###### Scalar fields +- [`@Field`](@ref) +- `{X|Y|Z}Field`, e.g. [`@XField`](@ref) +- `B{X|Y|Z}Field`, e.g. [`@BXField`](@ref) +- `{XX|YY|ZZ|XY|XZ|YZ}Field`, e.g. [`@XXField`](@ref) + +###### Vector fields +- [`@VectorField`](@ref) +- [`@BVectorField`](@ref) + +###### Tensor fields +- [`@TensorField`](@ref) + +To see a description of a macro type `?` (including the `@`). +""" +module FieldAllocators + +using ..Exceptions +import ..ParallelKernel: check_initialized, get_numbertype, extract_kwargvalues, split_args, clean_args, is_same, extract_tuple, extract_kwargs +import ..ParallelKernel: NUMBERTYPE_NONE, FIELDTYPES + + +## +const ALLOCATE_DOC = """ + @allocate() + +Allocate different kinds of fields on a grid of size `gridsize` at once (and initialize them with zeros). Besides convenience and conciseness, this macro ensures that all fields are allocated using the same `gridsize` and is therefore recommended for the allocation of multiple fields. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Mandatory keyword arguments +- `gridsize::Tuple`: the size of the grid. +- `fields::Pair|NTuple{Pair}`: a tuple of pairs (or a single pair) of a field type and a field name or a tuple of field names. + +# Keyword arguments +- `allocator`::Macro=@zeros`: the macro to use for the allocation of the field arrays (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + !!! note "Advanced" + - `eltype::DataType`: the type of the elements (numbers or indices). + +# Examples + @allocate(gridsize = (nx,ny,nz), + fields = (Field => (Pt, dτPt, ∇V, Radc, Rog, Mus), + VectorField => (R, dVdτ, dτV), + TensorField => τ, + BVectorField => V + ) + ) + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" +@doc ALLOCATE_DOC +macro allocate(args...) + check_initialized(__module__) + checkargs_allocate(args...) + posargs, kwargs_expr = split_args(args) + gridsize, fields, allocator, eltype = extract_kwargvalues(kwargs_expr, (:gridsize, :fields, :allocator, :eltype), "@allocate") + esc(_allocate(__module__, posargs...; gridsize=gridsize, fields=fields, allocator=allocator, eltype=eltype)) +end + + +## +const FIELD_DOC = """ + @Field(gridsize) + @Field(gridsize, allocator) + @Field(gridsize, allocator, ) + +Using the `allocator`, allocate a scalar `Field` on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field array (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" +@doc FIELD_DOC +macro Field(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@Field") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype)) +end + + +## +const VECTORFIELD_DOC = """ + @VectorField(gridsize) + @VectorField(gridsize, allocator) + @VectorField(gridsize, allocator, ) + +Using the `allocator`, allocate a `VectorField` on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field arrays (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" +@doc VECTORFIELD_DOC +macro VectorField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@VectorField") + posargs = clean_args(posargs) + esc(_vectorfield(__module__, posargs...; eltype=eltype)) +end + + +## +const BVECTORFIELD_DOC = """ + @BVectorField(gridsize) + @BVectorField(gridsize, allocator) + @BVectorField(gridsize, allocator, ) + +Using the `allocator`, allocate a `BVectorField, a vector field including boundaries, on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field arrays (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" +@doc BVECTORFIELD_DOC +macro BVectorField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@BVectorField") + posargs = clean_args(posargs) + esc(_vectorfield(__module__, posargs...; eltype=eltype, sizetemplate=:B)) +end + + +## +const TENSORFIELD_DOC = """ + @TensorField(gridsize) + @TensorField(gridsize, allocator) + @TensorField(gridsize, allocator, ) + +Using the `allocator`, allocate a `TensorField` on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field arrays (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" +@doc TENSORFIELD_DOC +macro TensorField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@TensorField") + posargs = clean_args(posargs) + esc(_tensorfield(__module__, posargs...; eltype=eltype)) +end + + +## +const VECTORFIELD_COMP_DOC = """ + @{X|Y|Z}Field(gridsize) + @{X|Y|Z}Field(gridsize, allocator) + @{X|Y|Z}Field(gridsize, allocator, ) + +Using the `allocator`, allocate a `{X|Y|Z}Field`, a scalar field of the same size as the {X|Y|Z}-component of a `VectorField`, on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field array (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" + +@doc VECTORFIELD_COMP_DOC +macro XField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@XField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:X)) +end + +@doc VECTORFIELD_COMP_DOC +macro YField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@YField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:Y)) +end + +@doc VECTORFIELD_COMP_DOC +macro ZField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@ZField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:Z)) +end + + +## +const BVECTORFIELD_COMP_DOC = """ + @B{X|Y|Z}Field(gridsize) + @B{X|Y|Z}Field(gridsize, allocator) + @B{X|Y|Z}Field(gridsize, allocator, ) + +Using the `allocator`, allocate a `B{X|Y|Z}Field`, a scalar field of the same size as the {X|Y|Z}-component of a `BVectorField` (a vector field including boundaries), on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field array (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" + +@doc BVECTORFIELD_COMP_DOC +macro BXField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@BXField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:BX)) +end + +@doc BVECTORFIELD_COMP_DOC +macro BYField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@BYField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:BY)) +end + +@doc BVECTORFIELD_COMP_DOC +macro BZField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@BZField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:BZ)) +end + + +## +const TENSORFIELD_COMP_DOC = """ + @{XX|YY|ZZ|XY|XZ|YZ}Field(gridsize) + @{XX|YY|ZZ|XY|XZ|YZ}Field(gridsize, allocator) + @{XX|YY|ZZ|XY|XZ|YZ}Field(gridsize, allocator, ) + +Using the `allocator`, allocate a `{XX|YY|ZZ|XY|XZ|YZ}Field`, a scalar field of the same size as the {XX|YY|ZZ|XY|XZ|YZ}-component of a `TensorField`, on a grid of size `gridsize`. + +!!! note "Advanced" + The `eltype` can be explicitly passed as keyword argument in order to be used instead of the default `numbertype` chosen with [`@init_parallel_kernel`](@ref). If no default `numbertype` was chosen [`@init_parallel_kernel`](@ref), then the keyword argument `eltype` is mandatory. This needs to be used with care to ensure that no datatype conversions occur in performance critical computations. + +# Arguments +- `gridsize::Tuple`: the size of the grid. +!!! note "Optional argument" + - `allocator`::Macro=@zeros`: the macro to use for the allocation of the field array (`@zeros`, `@ones`, `@rand`, `@falses` or `@trues`). + +# Keyword arguments +- `eltype::DataType`: the type of the elements (numbers or indices). + +See also: [`@allocate`](@ref), [`@Field`](@ref), [`@XField`](@ref), [`@BXField`](@ref), [`@XXField`](@ref), [`@VectorField`](@ref), [`@BVectorField`](@ref), [`@TensorField`](@ref) +""" + +@doc TENSORFIELD_COMP_DOC +macro XXField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@XXField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:XX)) +end + +@doc TENSORFIELD_COMP_DOC +macro YYField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@YYField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:YY)) +end + +@doc TENSORFIELD_COMP_DOC +macro ZZField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@ZZField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:ZZ)) +end + +@doc TENSORFIELD_COMP_DOC +macro XYField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@XYField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:XY)) +end + +@doc TENSORFIELD_COMP_DOC +macro XZField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@XZField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:XZ)) +end + +@doc TENSORFIELD_COMP_DOC +macro YZField(args...) + check_initialized(__module__) + checksargs_field_macros(args...) + posargs, kwargs_expr = split_args(args) + eltype, = extract_kwargvalues(kwargs_expr, (:eltype,), "@YZField") + posargs = clean_args(posargs) + esc(_field(__module__, posargs...; eltype=eltype, sizetemplate=:YZ)) +end + + +## ARGUMENT CHECKS + +function checkargs_allocate(args...) + if isempty(args) @ArgumentError("arguments missing.") end + posargs, kwargs_expr = split_args(args) + if length(posargs) > 0 @ArgumentError("no positional arguments are allowed.") end + if length(kwargs_expr) < 2 @ArgumentError("the gridsize and the fields keyword argument are mandatory.") end + if length(kwargs_expr) > 4 @ArgumentError("too many keyword arguments.") end +end + +function checksargs_field_macros(args...) + if isempty(args) @ArgumentError("arguments missing.") end + posargs, kwargs_expr = split_args(args) + posargs = clean_args(posargs) + if isempty(posargs) @ArgumentError("the gridsize positional argument is mandatory.") end + if length(posargs) > 2 @ArgumentError("too many positional arguments.") end + if (length(posargs) == 2) && !(any(is_same.((posargs[2],), (:@zeros, :@ones, :@rand, :@falses, :@trues)))) @ArgumentError("the second positional argument must be a field allocator macro.") end + if length(kwargs_expr) > 1 @ArgumentError("the only allowed keyword argument is eltype.") end +end + + +## ALLOCATOR FUNCTIONS + +function _allocate(caller::Module; gridsize=nothing, fields=nothing, allocator=nothing, eltype=nothing) + eltype = determine_eltype(caller, eltype) + allocator = isnothing(allocator) ? (:@zeros) : allocator # NOTE: this cannot be set in signature because it can receive the value `nothing`. + if isnothing(gridsize) || isnothing(fields) @ModuleInternalError("gridsize and fields are mandatory.") end + fields_expr = extract_tuple(fields; nested=true) + fields_kwargs = pairs(extract_kwargs(caller, fields_expr, FIELDTYPES, "@allocate"; separator=:(=>))) + allocations = [] + for (T, As_expr) in fields_kwargs + As = extract_tuple(As_expr) + for A in As + if (T == :Field) allocation = :($A = @Field($gridsize, $allocator, eltype=$eltype)) + elseif (T == :XField) allocation = :($A = @XField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :YField) allocation = :($A = @YField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :ZField) allocation = :($A = @ZField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :BXField) allocation = :($A = @BXField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :BYField) allocation = :($A = @BYField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :BZField) allocation = :($A = @BZField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :XXField) allocation = :($A = @XXField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :YYField) allocation = :($A = @YYField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :ZZField) allocation = :($A = @ZZField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :XYField) allocation = :($A = @XYField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :XZField) allocation = :($A = @XZField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :YZField) allocation = :($A = @YZField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :VectorField) allocation = :($A = @VectorField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :BVectorField) allocation = :($A = @BVectorField($gridsize, $allocator, eltype=$eltype)) + elseif (T == :TensorField) allocation = :($A = @TensorField($gridsize, $allocator, eltype=$eltype)) + else @ModuleInternalError("unexpected field type.") + end + push!(allocations, allocation) + end + end + return quote $(allocations...) end +end + +function _field(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, sizetemplate=nothing) + eltype = determine_eltype(caller, eltype) + if (sizetemplate == :X) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-2) : (length($gridsize)==2) ? (-1,-2) : -1)) + elseif (sizetemplate == :Y) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-2) : (length($gridsize)==2) ? (-2,-1) : -2)) + elseif (sizetemplate == :Z) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2,-1) : (length($gridsize)==2) ? (-2,-2) : -2)) + elseif (sizetemplate == :BX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (+1, 0, 0) : (length($gridsize)==2) ? (+1, 0) : +1)) + elseif (sizetemplate == :BY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,+1, 0) : (length($gridsize)==2) ? ( 0,+1) : 0)) + elseif (sizetemplate == :BZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0, 0,+1) : (length($gridsize)==2) ? ( 0, 0) : 0)) + elseif (sizetemplate == :XX) arraysize = :($gridsize .+ ((length($gridsize)==3) ? ( 0,-2,-2) : (length($gridsize)==2) ? ( 0,-2) : 0)) + elseif (sizetemplate == :YY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2, 0,-2) : (length($gridsize)==2) ? (-2, 0) : -2)) + elseif (sizetemplate == :ZZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-2, 0) : (length($gridsize)==2) ? (-2,-2) : -2)) + elseif (sizetemplate == :XY) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-1,-2) : (length($gridsize)==2) ? (-1,-1) : -1)) + elseif (sizetemplate == :XZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-1,-2,-1) : (length($gridsize)==2) ? (-1,-2) : -1)) + elseif (sizetemplate == :YZ) arraysize = :($gridsize .+ ((length($gridsize)==3) ? (-2,-1,-1) : (length($gridsize)==2) ? (-2,-1) : -2)) + else arraysize = gridsize + end + if is_same(allocator, :@zeros) return :(ParallelStencil.ParallelKernel.@zeros($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@ones) return :(ParallelStencil.ParallelKernel.@ones($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@rand) return :(ParallelStencil.ParallelKernel.@rand($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@falses) return :(ParallelStencil.ParallelKernel.@falses($arraysize..., eltype=$eltype)) + elseif is_same(allocator, :@trues) return :(ParallelStencil.ParallelKernel.@trues($arraysize..., eltype=$eltype)) + else @ModuleInternalError("unexpected allocator macro.") + end +end + +function _vectorfield(caller::Module, gridsize, allocator=:@zeros; eltype=nothing, sizetemplate=nothing) + eltype = determine_eltype(caller, eltype) + if (sizetemplate == :B) + return :((length($gridsize)==3) ? (x = ParallelStencil.ParallelKernel.FieldAllocators.@BXField($gridsize, $allocator, eltype=$eltype), + y = ParallelStencil.ParallelKernel.FieldAllocators.@BYField($gridsize, $allocator, eltype=$eltype), + z = ParallelStencil.ParallelKernel.FieldAllocators.@BZField($gridsize, $allocator, eltype=$eltype)) : + length($gridsize)==2 ? (x = ParallelStencil.ParallelKernel.FieldAllocators.@BXField($gridsize, $allocator, eltype=$eltype), + y = ParallelStencil.ParallelKernel.FieldAllocators.@BYField($gridsize, $allocator, eltype=$eltype)) : + (x = ParallelStencil.ParallelKernel.FieldAllocators.@BXField($gridsize, $allocator, eltype=$eltype),)) + else + return :((length($gridsize)==3) ? (x = ParallelStencil.ParallelKernel.FieldAllocators.@XField($gridsize, $allocator, eltype=$eltype), + y = ParallelStencil.ParallelKernel.FieldAllocators.@YField($gridsize, $allocator, eltype=$eltype), + z = ParallelStencil.ParallelKernel.FieldAllocators.@ZField($gridsize, $allocator, eltype=$eltype)) : + length($gridsize)==2 ? (x = ParallelStencil.ParallelKernel.FieldAllocators.@XField($gridsize, $allocator, eltype=$eltype), + y = ParallelStencil.ParallelKernel.FieldAllocators.@YField($gridsize, $allocator, eltype=$eltype)) : + (x = ParallelStencil.ParallelKernel.FieldAllocators.@XField($gridsize, $allocator, eltype=$eltype),)) + end +end + +function _tensorfield(caller::Module, gridsize, allocator=:@zeros; eltype=nothing) + eltype = determine_eltype(caller, eltype) + return :((length($gridsize)==3) ? (xx = ParallelStencil.ParallelKernel.FieldAllocators.@XXField($gridsize, $allocator, eltype=$eltype), + yy = ParallelStencil.ParallelKernel.FieldAllocators.@YYField($gridsize, $allocator, eltype=$eltype), + zz = ParallelStencil.ParallelKernel.FieldAllocators.@ZZField($gridsize, $allocator, eltype=$eltype), + xy = ParallelStencil.ParallelKernel.FieldAllocators.@XYField($gridsize, $allocator, eltype=$eltype), + xz = ParallelStencil.ParallelKernel.FieldAllocators.@XZField($gridsize, $allocator, eltype=$eltype), + yz = ParallelStencil.ParallelKernel.FieldAllocators.@YZField($gridsize, $allocator, eltype=$eltype)) : + length($gridsize)==2 ? (xx = ParallelStencil.ParallelKernel.FieldAllocators.@XXField($gridsize, $allocator, eltype=$eltype), + yy = ParallelStencil.ParallelKernel.FieldAllocators.@YYField($gridsize, $allocator, eltype=$eltype), + xy = ParallelStencil.ParallelKernel.FieldAllocators.@XYField($gridsize, $allocator, eltype=$eltype)) : + (xx = ParallelStencil.ParallelKernel.FieldAllocators.@XXField($gridsize, $allocator, eltype=$eltype),)) +end + +function determine_eltype(caller::Module, eltype) + if isnothing(eltype) + eltype = get_numbertype(caller) + if (eltype == NUMBERTYPE_NONE) @ArgumentError("the keyword argument 'eltype' is mandatory in @allocate, @Field, @VectorField, @TensorField, @XField, @YField, @ZField, @XXField, @YYField, @ZZField, @XYField, @XZField and @YZField when no default is set.") end + end + return eltype +end + + +## Exports + +export @allocate, @Field, @VectorField, @BVectorField, @TensorField, @XField, @BXField, @YField, @BYField, @ZField, @BZField, @XXField, @YYField, @ZZField, @XYField, @XZField, @YZField + + +end # Module FieldAllocators diff --git a/src/ParallelKernel/ParallelKernel.jl b/src/ParallelKernel/ParallelKernel.jl index 51567db..e901acb 100644 --- a/src/ParallelKernel/ParallelKernel.jl +++ b/src/ParallelKernel/ParallelKernel.jl @@ -33,6 +33,10 @@ Enables writing parallel high-performance kernels and whole applications that ca - [`@sync_threads`](@ref) - [`@sharedMem`](@ref) +# Submodules +- [`ParallelKernel.AD`](@ref) +- [`ParallelKernel.FieldAllocators`](@ref) + # Modules generated in caller - [`Data`](@ref) @@ -41,12 +45,11 @@ To see a description of a macro or module type `?` (including the `@` module ParallelKernel ## Include of exception module -include("Exceptions.jl"); +include("Exceptions.jl") using .Exceptions -## Alphabetical include of submodules. -include(joinpath("EnzymeExt", "AD.jl")); -include("Data.jl"); +## Alphabetical include of submodules for extensions +include(joinpath("EnzymeExt", "AD.jl")) ## Alphabetical include of defaults for extensions include(joinpath("AMDGPUExt", "defaults.jl")) @@ -57,12 +60,16 @@ include("shared.jl") ## Alphabetical include of function files include("allocators.jl") +include("Data.jl") include("hide_communication.jl") include("init_parallel_kernel.jl") include("kernel_language.jl") include("parallel.jl") include("reset_parallel_kernel.jl") +## Alphabetical include of submodules (not extensions) +include("FieldAllocators.jl") + ## Exports export @init_parallel_kernel, @parallel, @hide_communication, @parallel_indices, @parallel_async, @synchronize, @zeros, @ones, @rand, @falses, @trues, @fill, @fill!, @CellType export @gridDim, @blockIdx, @blockDim, @threadIdx, @sync_threads, @sharedMem, @pk_show, @pk_println, @∀ diff --git a/src/ParallelKernel/init_parallel_kernel.jl b/src/ParallelKernel/init_parallel_kernel.jl index d70a1b2..e91ed86 100644 --- a/src/ParallelKernel/init_parallel_kernel.jl +++ b/src/ParallelKernel/init_parallel_kernel.jl @@ -26,40 +26,50 @@ macro init_parallel_kernel(args...) end function init_parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, inbounds::Bool; datadoc_call=:(), parent_module::String="ParallelKernel") - modulename = :Data if package == PKG_CUDA if (isinteractive() && !is_installed("CUDA")) @NotInstalledError("CUDA was selected as package for parallelization, but CUDA.jl is not installed. CUDA functionality is provided as an extension of $parent_module and CUDA.jl needs therefore to be installed independently (type `add CUDA` in the julia package manager).") end indextype = INT_CUDA - data_module = Data_cuda(modulename, numbertype, indextype) + data_module = Data_cuda(numbertype, indextype) + tdata_module = TData_cuda() elseif package == PKG_AMDGPU if (isinteractive() && !is_installed("AMDGPU")) @NotInstalledError("AMDGPU was selected as package for parallelization, but AMDGPU.jl is not installed. AMDGPU functionality is provided as an extension of $parent_module and AMDGPU.jl needs therefore to be installed independently (type `add AMDGPU` in the julia package manager).") end indextype = INT_AMDGPU - data_module = Data_amdgpu(modulename, numbertype, indextype) + data_module = Data_amdgpu(numbertype, indextype) + tdata_module = TData_amdgpu() elseif package == PKG_POLYESTER if (isinteractive() && !is_installed("Polyester")) @NotInstalledError("Polyester was selected as package for parallelization, but Polyester.jl is not installed. Multi-threading using Polyester is provided as an extension of $parent_module and Polyester.jl needs therefore to be installed independently (type `add Polyester` in the julia package manager).") end indextype = INT_POLYESTER - data_module = Data_cpu(modulename, numbertype, indextype) + data_module = Data_cpu(numbertype, indextype) + tdata_module = TData_cpu() elseif package == PKG_THREADS indextype = INT_THREADS - data_module = Data_cpu(modulename, numbertype, indextype) + data_module = Data_cpu(numbertype, indextype) + tdata_module = TData_cpu() end pkg_import_cmd = define_import(caller, package, parent_module) # TODO: before it was ParallelStencil.ParallelKernel.PKG_THREADS, which activated it all weight i think, which should not be ad_init_cmd = :(ParallelStencil.ParallelKernel.AD.init_AD($package)) + @eval(caller, $pkg_import_cmd) if !isdefined(caller, :Data) || (@eval(caller, isa(Data, Module)) && length(symbols(caller, :Data)) == 1) # Only if the module Data does not exist in the caller or is empty, create it. if (datadoc_call==:()) if (numbertype == NUMBERTYPE_NONE) datadoc_call = :(@doc ParallelStencil.ParallelKernel.DATA_DOC_NUMBERTYPE_NONE Data) else datadoc_call = :(@doc ParallelStencil.ParallelKernel.DATA_DOC Data) end end - @eval(caller, $pkg_import_cmd) @eval(caller, $data_module) @eval(caller, $datadoc_call) - elseif isdefined(caller, :Data) && isdefined(caller.Data, :DeviceArray) + elseif isdefined(caller, :Data) && isdefined(caller.Data, :Device) if !isinteractive() @warn "Module Data from previous module initialization found in caller module ($caller); module Data not created. Note: this warning is only shown in non-interactive mode." end else @warn "Module Data cannot be created in caller module ($caller) as there is already a user defined symbol (module/variable...) with this name. ParallelStencil is still usable but without the features of the Data module." end + if !isdefined(caller, :TData) || (@eval(caller, isa(TData, Module)) && length(symbols(caller, :TData)) == 1) # Only if the module TData does not exist in the caller or is empty, create it. + @eval(caller, $tdata_module) + elseif isdefined(caller, :TData) && isdefined(caller.TData, :Device) + if !isinteractive() @warn "Module TData from previous module initialization found in caller module ($caller); module TData not created. Note: this warning is only shown in non-interactive mode." end + else + @warn "Module TData cannot be created in caller module ($caller) as there is already a user defined symbol (module/variable...) with this name. ParallelStencil is still usable but without the features of the TData module." + end @eval(caller, $ad_init_cmd) set_package(caller, package) set_numbertype(caller, numbertype) diff --git a/src/ParallelKernel/parallel.jl b/src/ParallelKernel/parallel.jl index 46c991b..44dfd96 100644 --- a/src/ParallelKernel/parallel.jl +++ b/src/ParallelKernel/parallel.jl @@ -160,7 +160,7 @@ function synchronize(caller::Module, args::Union{Symbol,Expr}...; package::Symbo elseif (package == PKG_AMDGPU) synchronize_amdgpu(args...) elseif (package == PKG_THREADS) synchronize_threads(args...) elseif (package == PKG_POLYESTER) synchronize_polyester(args...) - else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") + else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") end end @@ -181,7 +181,7 @@ function parallel_kernel(caller::Module, package::Symbol, numbertype::DataType, body = substitute(body, indices_aliases[i], indices[i]) end end - if isgpu(package) kernel = insert_device_types(kernel) end + if isgpu(package) kernel = insert_device_types(caller, kernel) end kernel = adjust_signatures(kernel, package) body = handle_indices_and_literals(body, indices, package, numbertype) if (inbounds) body = add_inbounds(body) end diff --git a/src/ParallelKernel/reset_parallel_kernel.jl b/src/ParallelKernel/reset_parallel_kernel.jl index 53cd2f2..1f1b7be 100644 --- a/src/ParallelKernel/reset_parallel_kernel.jl +++ b/src/ParallelKernel/reset_parallel_kernel.jl @@ -8,10 +8,14 @@ See also: [`init_parallel_kernel`](@ref) macro reset_parallel_kernel() esc(reset_parallel_kernel(__module__)) end function reset_parallel_kernel(caller::Module) - if isdefined(caller, :Data) && isdefined(caller.Data, :DeviceArray) # "Clear" the Data module if it has been created by ParallelKernel (i.e. contains Data.DeviceArray). + if isdefined(caller, :Data) && isdefined(caller.Data, :Device) # "Clear" the Data module if it has been created by ParallelKernel (i.e. contains Data.Device). data_module = Data_none() @eval(caller, $data_module) end + if isdefined(caller, :TData) && isdefined(caller.TData, :Device) # "Clear" the TData module if it has been created by ParallelKernel (i.e. contains TData.Device). + tdata_module = TData_none() + @eval(caller, $tdata_module) + end set_initialized(caller, false) set_package(caller, PKG_NONE) set_numbertype(caller, NUMBERTYPE_NONE) diff --git a/src/ParallelKernel/shared.jl b/src/ParallelKernel/shared.jl index 17d0a81..1d00a8f 100644 --- a/src/ParallelKernel/shared.jl +++ b/src/ParallelKernel/shared.jl @@ -1,7 +1,6 @@ using CellArrays, StaticArrays, MacroTools import MacroTools: postwalk, splitdef, combinedef, isexpr, unblock, flatten, rmlines, prewalk # NOTE: inexpr_walk used instead of MacroTools.inexpr - ## CONSTANTS AND TYPES (and the macros wrapping them) # NOTE: constants needs to be defined before including the submodules to have them accessible there. @@ -45,6 +44,15 @@ const SUPPORTED_LITERALTYPES = [Float16, Float32, Float64, Complex{Fl const SUPPORTED_NUMBERTYPES = [Float16, Float32, Float64, Complex{Float16}, Complex{Float32}, Complex{Float64}] const PKNumber = Union{Float16, Float32, Float64, Complex{Float16}, Complex{Float32}, Complex{Float64}} # NOTE: this always needs to correspond to SUPPORTED_NUMBERTYPES! const NUMBERTYPE_NONE = DataType +const MODULENAME_DATA = :Data +const MODULENAME_TDATA = :TData +const MODULENAME_DEVICE = :Device +const MODULENAME_FIELDS = :Fields +const SCALARTYPES = (:Index, :Number, :IndexTuple, :NumberTuple, :IndexCollection, :NumberCollection, :NamedIndexTuple, :NamedNumberTuple) +const ARRAYTYPES = (:Array, :Cell, :CellArray, :ArrayTuple, :CellTuple, :CellArrayTuple, :NamedArrayTuple, :NamedCellTuple, :NamedCellArrayTuple, :ArrayCollection, :CellCollection, :CellArrayCollection) +const FIELDTYPES = (:Field, :XField, :YField, :ZField, :BXField, :BYField, :BZField, :XXField, :YYField, :ZZField, :XYField, :XZField, :YZField, :VectorField, :BVectorField, :TensorField) +const VECTORNAMES = (:x, :y, :z) +const TENSORNAMES = (:xx, :yy, :zz, :xy, :xz, :yz) const AD_MODE_DEFAULT = :(Enzyme.Reverse) const AD_DUPLICATE_DEFAULT = :(Enzyme.DuplicatedNoNeed) const AD_ANNOTATION_DEFAULT = :(Enzyme.Const) @@ -193,42 +201,49 @@ function add_inbounds(body::Expr) end end -function insert_device_types(kernel::Expr) - kernel = substitute(kernel, :(Data.Array), :(Data.DeviceArray)) - kernel = substitute(kernel, :(Data.Cell), :(Data.DeviceCell)) - kernel = substitute(kernel, :(Data.CellArray), :(Data.DeviceCellArray)) - kernel = substitute(kernel, :(Data.ArrayTuple), :(Data.DeviceArrayTuple)) - kernel = substitute(kernel, :(Data.CellTuple), :(Data.DeviceCellTuple)) - kernel = substitute(kernel, :(Data.CellArrayTuple), :(Data.DeviceCellArrayTuple)) - kernel = substitute(kernel, :(Data.NamedArrayTuple), :(Data.NamedDeviceArrayTuple)) - kernel = substitute(kernel, :(Data.NamedCellTuple), :(Data.NamedDeviceCellTuple)) - kernel = substitute(kernel, :(Data.NamedCellArrayTuple), :(Data.NamedDeviceCellArrayTuple)) - kernel = substitute(kernel, :(Data.ArrayCollection), :(Data.DeviceArrayCollection)) - kernel = substitute(kernel, :(Data.CellCollection), :(Data.DeviceCellCollection)) - kernel = substitute(kernel, :(Data.CellArrayCollection), :(Data.DeviceCellArrayCollection)) - kernel = substitute(kernel, :(Data.TArray), :(Data.DeviceTArray)) - kernel = substitute(kernel, :(Data.TCell), :(Data.DeviceTCell)) - kernel = substitute(kernel, :(Data.TCellArray), :(Data.DeviceTCellArray)) - kernel = substitute(kernel, :(Data.TArrayTuple), :(Data.DeviceTArrayTuple)) - kernel = substitute(kernel, :(Data.TCellTuple), :(Data.DeviceTCellTuple)) - kernel = substitute(kernel, :(Data.TCellArrayTuple), :(Data.DeviceTCellArrayTuple)) - kernel = substitute(kernel, :(Data.NamedTArrayTuple), :(Data.NamedDeviceTArrayTuple)) - kernel = substitute(kernel, :(Data.NamedTCellTuple), :(Data.NamedDeviceTCellTuple)) - kernel = substitute(kernel, :(Data.NamedTCellArrayTuple), :(Data.NamedDeviceTCellArrayTuple)) - kernel = substitute(kernel, :(Data.TArrayCollection), :(Data.DeviceTArrayCollection)) - kernel = substitute(kernel, :(Data.TCellCollection), :(Data.DeviceTCellCollection)) - kernel = substitute(kernel, :(Data.TCellArrayCollection), :(Data.DeviceTCellArrayCollection)) +function insert_device_types(caller::Module, kernel::Expr) + for T in ARRAYTYPES + if !isnothing(eval_try(caller, :(Data.Device))) + kernel = substitute(kernel, :(Data.$T), :(Data.Device.$T)) + end + if !isnothing(eval_try(caller, :(TData.Device))) + kernel = substitute(kernel, :(TData.$T), :(TData.Device.$T)) + end + end + for T in FIELDTYPES + if !isnothing(eval_try(caller, :(Data.Fields.Device))) + kernel = substitute(kernel, :(Data.Fields.$T), :(Data.Fields.Device.$T)) + end + if !isnothing(eval_try(caller, :(TData.Fields.Device))) + kernel = substitute(kernel, :(TData.Fields.$T), :(TData.Fields.Device.$T)) + end + Device_val = eval_try(caller, :(Fields.Device)) + if !isnothing(Device_val) && Device_val in (eval_try(caller, :(Data.Fields.Device)), eval_try(caller, :(TData.Fields.Device))) + kernel = substitute(kernel, :(Fields.$T), :(Fields.Device.$T)) + end + end + for T in FIELDTYPES + T_val = eval_try(caller, T) + T_d = nothing + if !isnothing(eval_try(caller, :(Data.Fields.Device))) + T_d = (!isnothing(T_val) && T_val == eval_try(caller, :(Data.Fields.$T))) ? :(Data.Fields.Device.$T) : T_d + end + if !isnothing(eval_try(caller, :(TData.Fields.Device))) + T_d = (!isnothing(T_val) && T_val == eval_try(caller, :(TData.Fields.$T))) ? :(TData.Fields.Device.$T) : T_d + end + if !isnothing(T_d) kernel = substitute_in_kernel(kernel, T, T_d, signature_only=true) end + end return kernel end - ## FUNCTIONS TO DEAL WITH KERNEL/MACRO CALLS: CHECK IF DEFINITION/CALL, EXTRACT, SPLIT AND EVALUATE ARGUMENTS is_kernel(arg) = isdef(arg) # NOTE: to be replaced with MacroTools.isdef(arg): isdef is to be merged fixed in MacroTools (see temporary functions at the end of this file) is_call(arg) = ( isa(arg, Expr) && (arg.head == :call) ) is_block(arg) = ( isa(arg, Expr) && (arg.head == :block) ) is_parallel_call(x) = isexpr(x, :macrocall) && (x.args[1] == Symbol("@parallel") || x.args[1] == :(@parallel)) +is_same(x, y) = rmlines(x) == rmlines(y) # NOTE: this serves to compare to macros function extract_args(call::Expr, macroname::Symbol) if (call.head != :macrocall) @ModuleInternalError("argument is not a macro call.") end @@ -241,7 +256,8 @@ extract_kernelcall_name(call::Expr) = call.args[1] function is_kwarg(arg; in_kernelcall=false, separator=:(=), keyword_type=Symbol) if in_kernelcall return ( isa(arg, Expr) && inexpr_walk(arg, :kw; match_only_head=true) ) - else return ( isa(arg, Expr) && (arg.head == separator) && isa(arg.args[1], keyword_type)) + else return ( isa(arg, Expr) && (arg.head == separator) && isa(arg.args[1], keyword_type) ) || + ( isa(arg, Expr) && (arg.head == :call) && (arg.args[1] == separator) && isa(arg.args[2], keyword_type) ) end end @@ -252,6 +268,8 @@ function Base.haskey(kwargs_expr::Array{Expr}, key::Symbol) return key in keys(kwargs) end +clean_args(args) = rmlines.(args) + function split_args(args; in_kernelcall=false) posargs = [x for x in args if !is_kwarg(x; in_kernelcall=in_kernelcall)] kwargs = [x for x in args if is_kwarg(x; in_kernelcall=in_kernelcall)] @@ -260,7 +278,7 @@ end function split_kwargs(kwargs; separator=:(=), keyword_type=Symbol) if !all(is_kwarg.(kwargs; separator=separator, keyword_type=keyword_type)) @ModuleInternalError("not all of kwargs are keyword arguments.") end - return Dict{keyword_type,Any}(x.args[1] => x.args[2] for x in kwargs) + return Dict{keyword_type,Any}((x.head==:call) ? (x.args[2] => x.args[3]) : (x.args[1] => x.args[2]) for x in kwargs) end function validate_kwargkeys(kwargs::Dict, valid_kwargs::Tuple, macroname::String) @@ -292,8 +310,8 @@ function extract_kwargs(caller::Module, kwargs_expr, valid_kwargs, macroname, ha return kwargs_known, kwargs_unknown_expr, kwargs_unknown, kwargs_unknown_dict end -function extract_kwargs(caller::Module, kwargs_expr, valid_kwargs, macroname; eval_args=()) - kwargs_known, = extract_kwargs(caller, kwargs_expr, valid_kwargs, macroname, false; eval_args=eval_args) +function extract_kwargs(caller::Module, kwargs_expr, valid_kwargs, macroname; eval_args=(), separator=:(=), keyword_type=Symbol) + kwargs_known, = extract_kwargs(caller, kwargs_expr, valid_kwargs, macroname, false; eval_args=eval_args, separator=separator, keyword_type=keyword_type) return kwargs_known end @@ -313,6 +331,14 @@ function eval_arg(caller::Module, arg) end end +function eval_try(caller::Module, expr) + try + return @eval(caller, $expr) + catch e + return nothing + end +end + ## FUNCTIONS FOR COMMON MANIPULATIONS ON EXPRESSIONS @@ -400,12 +426,12 @@ check_inbounds(inbounds) = ( if !isa(inbounds, Bool) @ArgumentError("$ERR ## FUNCTIONS AND MACROS FOR UNIT TESTS -symbols(eval_mod::Union{Symbol,Module}, mod::Union{Symbol,Module}) = @eval(eval_mod, names($mod, all=true, imported=true)) -prettystring(expr::Expr) = string(remove_linenumbernodes!(expr)) -gorgeousstring(expr::Expr) = string(simplify_varnames!(remove_linenumbernodes!(expr))) -longnameof(f) = "$(parentmodule(f)).$(nameof(f))" +prettystring(expr::Expr) = string(remove_linenumbernodes!(expr)) +gorgeousstring(expr::Expr) = string(simplify_varnames!(remove_linenumbernodes!(expr))) +longnameof(f) = "$(parentmodule(f)).$(nameof(f))" +symbols(eval_mod::Union{Symbol,Module}, mod::Union{Symbol,Expr,Module}; imported=false, all=true) = @eval(eval_mod, names($mod, all=$all, imported=$imported)) +macro symbols(eval_mod, mod, imported=false, all=true) symbols(eval_mod, mod; all=all, imported=imported) end macro require(condition) condition_str = string(condition); esc(:( if !($condition) error("pre-test requirement not met: $($condition_str).") end )) end # Verify a condition required for a unit test (in the unit test results, this should not be treated as a unit test). -macro symbols(eval_mod, mod) symbols(eval_mod, mod) end macro isgpu(package) isgpu(package) end macro iscpu(package) iscpu(package) end macro macroexpandn(n::Integer, expr) return QuoteNode(macroexpandn(__module__, expr, n)) end diff --git a/src/ParallelStencil.jl b/src/ParallelStencil.jl index 25f173d..2734020 100644 --- a/src/ParallelStencil.jl +++ b/src/ParallelStencil.jl @@ -34,6 +34,8 @@ https://github.com/omlins/ParallelStencil.jl - [`@sharedMem`](@ref) # Submodules +- [`ParallelStencil.AD`](@ref) +- [`ParallelStencil.FieldAllocators`](@ref) - [`ParallelStencil.FiniteDifferences1D`](@ref) - [`ParallelStencil.FiniteDifferences2D`](@ref) - [`ParallelStencil.FiniteDifferences3D`](@ref) @@ -63,8 +65,9 @@ include("kernel_language.jl") include("parallel.jl") include("reset_parallel_stencil.jl") -## Alphabetical include of computation-submodules (must be at end as needs to import from ParallelStencil, .e.g. INDICES). +## Alphabetical include of allocation/computation-submodules (must be at end as needs to import from ParallelStencil, .e.g. INDICES). include("AD.jl") +include("FieldAllocators.jl") include("FiniteDifferences.jl") ## Exports (need to be after include of submodules as re-exports from them) diff --git a/src/init_parallel_stencil.jl b/src/init_parallel_stencil.jl index d1272d8..0cd790a 100644 --- a/src/init_parallel_stencil.jl +++ b/src/init_parallel_stencil.jl @@ -51,8 +51,8 @@ macro init_parallel_stencil(args...) end function init_parallel_stencil(caller::Module, package::Symbol, numbertype::DataType, ndims::Integer, inbounds::Bool, memopt::Bool) - if (numbertype == NUMBERTYPE_NONE) datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC_NUMBERTYPE_NONE, "@init_parallel_kernel" => "@init_parallel_stencil") Data) - else datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") Data) + if (numbertype == NUMBERTYPE_NONE) datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC_NUMBERTYPE_NONE, "ParallelKernel" => "ParallelStencil", "@init_parallel_kernel" => "@init_parallel_stencil") Data) + else datadoc_call = :(@doc replace(ParallelStencil.ParallelKernel.DATA_DOC, "ParallelKernel" => "ParallelStencil", "@init_parallel_kernel" => "@init_parallel_stencil") Data) end return_expr = ParallelKernel.init_parallel_kernel(caller, package, numbertype, inbounds; datadoc_call=datadoc_call, parent_module="ParallelStencil") set_package(caller, package) diff --git a/src/parallel.jl b/src/parallel.jl index fd52b1c..d29baa1 100644 --- a/src/parallel.jl +++ b/src/parallel.jl @@ -279,7 +279,7 @@ function parallel_kernel(metadata_module::Module, metadata_function::Expr, calle onthefly_exprs = insert_onthefly!.(onthefly_exprs, (onthefly_vars,), (onthefly_syms,), (indices,)) create_onthefly_macro.((caller,), onthefly_syms, onthefly_exprs, onthefly_vars, (indices,)) end - if isgpu(package) kernel = insert_device_types(kernel) end + if isgpu(package) kernel = insert_device_types(caller, kernel) end if !memopt kernel = adjust_signatures(kernel, package) body = handle_indices_and_literals(body, indices, package, numbertype) diff --git a/test/ParallelKernel/test_allocators.jl b/test/ParallelKernel/test_allocators.jl index 6ae628c..07a701d 100644 --- a/test/ParallelKernel/test_allocators.jl +++ b/test/ParallelKernel/test_allocators.jl @@ -5,6 +5,8 @@ using ParallelStencil.ParallelKernel import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, @get_numbertype, NUMBERTYPE_NONE, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU import ParallelStencil.ParallelKernel: @require, @prettystring, @gorgeousstring import ParallelStencil.ParallelKernel: checkargs_CellType, _CellType +using ParallelStencil.ParallelKernel.FieldAllocators +import ParallelStencil.ParallelKernel.FieldAllocators: checksargs_field_macros, checkargs_allocate using ParallelStencil.ParallelKernel.Exceptions TEST_PACKAGES = SUPPORTED_PACKAGES @static if PKG_CUDA in TEST_PACKAGES @@ -456,7 +458,169 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not end @reset_parallel_kernel() end; - @testset "6. Exceptions" begin + @testset "6. Fields" begin + @require !@is_initialized() + @init_parallel_kernel($package, Float16) + @require @is_initialized() + (nx, ny, nz) = (3, 4, 5) + @testset "mapping to array allocators" begin + @testset "Field" begin + @test occursin("@zeros", @prettystring(1, @Field((nx, ny, nz)))) + @test occursin("@zeros", @prettystring(1, @Field((nx, ny, nz), @zeros))) + @test occursin("@ones", @prettystring(1, @Field((nx, ny, nz), @ones))) + @test occursin("@rand", @prettystring(1, @Field((nx, ny, nz), @rand))) + @test occursin("@falses",@prettystring(1, @Field((nx, ny, nz), @falses))) + @test occursin("@trues", @prettystring(1, @Field((nx, ny, nz), @trues))) + end; + @testset "[B]{X|Y|Z}Field" begin + @test occursin("@zeros", @prettystring(1, @XField((nx, ny, nz)))) + @test occursin("@zeros", @prettystring(1, @YField((nx, ny, nz), @zeros))) + @test occursin("@ones", @prettystring(1, @ZField((nx, ny, nz), @ones))) + @test occursin("@rand", @prettystring(1, @BXField((nx, ny, nz), @rand))) + @test occursin("@falses",@prettystring(1, @BYField((nx, ny, nz), @falses))) + @test occursin("@trues", @prettystring(1, @BZField((nx, ny, nz), @trues))) + end; + @testset "{XX|YY|ZZ|XY|XZ|YZ}Field" begin + @test occursin("@zeros", @prettystring(1, @XXField((nx, ny, nz), eltype=Float32))) + @test occursin("@zeros", @prettystring(1, @YYField((nx, ny, nz), @zeros, eltype=Float32))) + @test occursin("@ones", @prettystring(1, @ZZField((nx, ny, nz), @ones, eltype=Float32))) + @test occursin("@rand", @prettystring(1, @XYField((nx, ny, nz), @rand, eltype=Float32))) + @test occursin("@falses",@prettystring(1, @XZField((nx, ny, nz), @falses, eltype=Float32))) + @test occursin("@trues", @prettystring(1, @YZField((nx, ny, nz), @trues, eltype=Float32))) + end; + end; + @testset "gridsize (3D)" begin + @test size( @Field((nx, ny, nz))) == (nx, ny, nz ) + @test size( @XField((nx, ny, nz))) == (nx-1, ny-2, nz-2) + @test size( @YField((nx, ny, nz))) == (nx-2, ny-1, nz-2) + @test size( @ZField((nx, ny, nz))) == (nx-2, ny-2, nz-1) + @test size(@BXField((nx, ny, nz))) == (nx+1, ny, nz ) + @test size(@BYField((nx, ny, nz))) == (nx, ny+1, nz ) + @test size(@BZField((nx, ny, nz))) == (nx, ny, nz+1) + @test size(@XXField((nx, ny, nz))) == (nx, ny-2, nz-2) + @test size(@YYField((nx, ny, nz))) == (nx-2, ny, nz-2) + @test size(@ZZField((nx, ny, nz))) == (nx-2, ny-2, nz ) + @test size(@XYField((nx, ny, nz))) == (nx-1, ny-1, nz-2) + @test size(@XZField((nx, ny, nz))) == (nx-1, ny-2, nz-1) + @test size(@YZField((nx, ny, nz))) == (nx-2, ny-1, nz-1) + @test size.(Tuple( @VectorField((nx, ny, nz)))) == (size( @XField((nx, ny, nz))), size( @YField((nx, ny, nz))), size( @ZField((nx, ny, nz)))) + @test size.(Tuple(@BVectorField((nx, ny, nz)))) == (size(@BXField((nx, ny, nz))), size(@BYField((nx, ny, nz))), size(@BZField((nx, ny, nz)))) + @test size.(Tuple( @TensorField((nx, ny, nz)))) == (size(@XXField((nx, ny, nz))), size(@YYField((nx, ny, nz))), size(@ZZField((nx, ny, nz))), + size(@XYField((nx, ny, nz))), size(@XZField((nx, ny, nz))), size(@YZField((nx, ny, nz)))) + end; + @testset "gridsize (2D)" begin + @test size( @Field((nx, ny))) == (nx, ny, ) + @test size( @XField((nx, ny))) == (nx-1, ny-2) + @test size( @YField((nx, ny))) == (nx-2, ny-1) + @test size( @ZField((nx, ny))) == (nx-2, ny-2) + @test size(@BXField((nx, ny))) == (nx+1, ny, ) + @test size(@BYField((nx, ny))) == (nx, ny+1) + @test size(@BZField((nx, ny))) == (nx, ny, ) + @test size(@XXField((nx, ny))) == (nx, ny-2) + @test size(@YYField((nx, ny))) == (nx-2, ny, ) + @test size(@ZZField((nx, ny))) == (nx-2, ny-2) + @test size(@XYField((nx, ny))) == (nx-1, ny-1) + @test size(@XZField((nx, ny))) == (nx-1, ny-2) + @test size(@YZField((nx, ny))) == (nx-2, ny-1) + @test size.(Tuple( @VectorField((nx, ny)))) == (size( @XField((nx, ny))), size( @YField((nx, ny)))) + @test size.(Tuple(@BVectorField((nx, ny)))) == (size(@BXField((nx, ny))), size(@BYField((nx, ny)))) + @test size.(Tuple( @TensorField((nx, ny)))) == (size(@XXField((nx, ny))), size(@YYField((nx, ny))), + size(@XYField((nx, ny)))) + end; + @testset "gridsize (1D)" begin + @test size( @Field((nx,))) == (nx, ) + @test size( @XField((nx,))) == (nx-1,) + @test size( @YField((nx,))) == (nx-2,) + @test size( @ZField((nx,))) == (nx-2,) + @test size(@BXField((nx,))) == (nx+1,) + @test size(@BYField((nx,))) == (nx, ) + @test size(@BZField((nx,))) == (nx, ) + @test size(@XXField((nx,))) == (nx, ) + @test size(@YYField((nx,))) == (nx-2,) + @test size(@ZZField((nx,))) == (nx-2,) + @test size(@XYField((nx,))) == (nx-1,) + @test size(@XZField((nx,))) == (nx-1,) + @test size(@YZField((nx,))) == (nx-2,) + @test size.(Tuple( @VectorField((nx,)))) == (size( @XField((nx,))),) + @test size.(Tuple(@BVectorField((nx,)))) == (size(@BXField((nx,))),) + @test size.(Tuple( @TensorField((nx,)))) == (size(@XXField((nx,))),) + end; + @testset "eltype" begin + @test eltype(@Field((nx, ny, nz))) == Float16 + @test eltype(@Field((nx, ny, nz), eltype=Float32)) == Float32 + @test eltype.(Tuple(@VectorField((nx, ny, nz)))) == (Float16, Float16, Float16) + @test eltype.(Tuple(@VectorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32) + @test eltype.(Tuple(@BVectorField((nx, ny, nz)))) == (Float16, Float16, Float16) + @test eltype.(Tuple(@BVectorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32) + @test eltype.(Tuple(@TensorField((nx, ny, nz)))) == (Float16, Float16, Float16, Float16, Float16, Float16) + @test eltype.(Tuple(@TensorField((nx, ny, nz), eltype=Float32))) == (Float32, Float32, Float32, Float32, Float32, Float32) + end; + @testset "@allocate" begin + @testset "single field" begin + @test occursin("F = @Field((nx, ny, nz), @zeros(), eltype = Float16)", @prettystring(1, @allocate(gridsize = (nx,ny,nz), fields = (Field=>F)))) + @test occursin("F = @Field(nxyz, @zeros(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F))) + @test occursin("F = @Field(nxyz, @ones(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@ones))) + @test occursin("F = @Field(nxyz, @rand(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@rand))) + @test occursin("F = @Field(nxyz, @falses(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@falses))) + @test occursin("F = @Field(nxyz, @trues(), eltype = Float16)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, allocator=@trues))) + @test occursin("F = @Field(nxyz, @zeros(), eltype = Float32)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, eltype=Float32))) + @test occursin("F = @Field(nxyz, @rand(), eltype = Float32)", @prettystring(1, @allocate(gridsize = nxyz, fields = Field=>F, eltype=Float32, allocator=@rand))) + end; + @testset "multiple fields - one per type (default allocator and eltype)" begin + call = @prettystring(1, @allocate(gridsize = nxyz, + fields = (Field => F, + XField => X, + YField => Y, + ZField => Z, + BXField => BX, + BYField => BY, + BZField => BZ, + XXField => XX, + YYField => YY, + ZZField => ZZ, + XYField => XY, + XZField => XZ, + YZField => YZ, + VectorField => V, + BVectorField => BV, + TensorField => T) )) + @test occursin("F = @Field(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("X = @XField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("Y = @YField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("Z = @ZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BX = @BXField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BY = @BYField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BZ = @BZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("XX = @XXField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("YY = @YYField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("ZZ = @ZZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("XY = @XYField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("XZ = @XZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("YZ = @YZField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("V = @VectorField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("BV = @BVectorField(nxyz, @zeros(), eltype = Float16)", call) + @test occursin("T = @TensorField(nxyz, @zeros(), eltype = Float16)", call) + end; + @testset "multiple fields - multiple per type (custom allocator and eltype)" begin + call = @prettystring(1, @allocate(gridsize = nxyz, + fields = (Field => (F1, F2), + XField => X, + VectorField => (V1, V2, V3), + TensorField => T), + allocator = @rand, + eltype = Float32) ) + @test occursin("F1 = @Field(nxyz, @rand(), eltype = Float32)", call) + @test occursin("F2 = @Field(nxyz, @rand(), eltype = Float32)", call) + @test occursin("X = @XField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("V1 = @VectorField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("V2 = @VectorField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("V3 = @VectorField(nxyz, @rand(), eltype = Float32)", call) + @test occursin("T = @TensorField(nxyz, @rand(), eltype = Float32)", call) + end; + end; + @reset_parallel_kernel() + end; + @testset "7. Exceptions" begin @require !@is_initialized() @init_parallel_kernel(package = $package) @require @is_initialized @@ -469,7 +633,48 @@ const DATA_INDEX = ParallelStencil.INT_THREADS # TODO: using Data.Index does not @test_throws ArgumentError _CellType(@__MODULE__, :SymmetricTensor2D, fieldnames=:((xx, zz, xz)), dims=:((2,3))) # Error: isnothing(eltype) && (!parametric && eltype == NUMBERTYPE_NONE) @test_throws ArgumentError _CellType(@__MODULE__, :SymmetricTensor2D, fieldnames=:((xx, zz, xz)), eltype=Float32, parametric=true) # Error: !isnothing(fieldnames) && parametric end; + @testset "arguments field macros" begin + @test_throws ArgumentError checksargs_field_macros(); # Error: isempty(args) + @test_throws ArgumentError checksargs_field_macros(:(eltype=Float32)); # Error: isempty(posargs) + @test_throws ArgumentError checksargs_field_macros(:nxyz, :@rand, :Float32); # Error: length(posargs) > 2 + @test_throws ArgumentError checksargs_field_macros(:nxyz, :@fill); # Error: unsupported allocator + @test_throws ArgumentError checksargs_field_macros(:nxyz, :(eltype=Float32), :(something=x)) # Error: length(kwargs) > 1 + end; + @testset "arguments @allocate" begin + @test_throws ArgumentError checkargs_allocate(); # Error: isempty(args) + @test_throws ArgumentError checkargs_allocate(:nxyz); # Error: !isempty(posargs) + @test_throws ArgumentError checkargs_allocate(:(gridsize=(3,4))); # Error: length(kwargs) < 2 + @test_throws ArgumentError checkargs_allocate(:(fields=(Field=>A))); # Error: length(kwargs) < 2 + @test_throws ArgumentError checkargs_allocate(:(gridsize=(3,4)), :(fields=(Field=>A)), :(allocator=:@rand), :(eltype=Float32), :(something=x)) # Error: length(kwargs) > 4 + end; @reset_parallel_kernel() end; end; )) end == nothing || true; + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/ParallelKernel/test_init_parallel_kernel.jl b/test/ParallelKernel/test_init_parallel_kernel.jl index 39e62f7..d259959 100644 --- a/test/ParallelKernel/test_init_parallel_kernel.jl +++ b/test/ParallelKernel/test_init_parallel_kernel.jl @@ -1,7 +1,7 @@ using Test import ParallelStencil using ParallelStencil.ParallelKernel -import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, @get_package, @get_numbertype, @get_inbounds, NUMBERTYPE_NONE, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU +import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, @get_package, @get_numbertype, @get_inbounds, NUMBERTYPE_NONE, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, SCALARTYPES, ARRAYTYPES, FIELDTYPES import ParallelStencil.ParallelKernel: @require, @symbols import ParallelStencil.ParallelKernel: extract_posargs_init, extract_kwargs_init, check_already_initialized, set_initialized, is_initialized, check_initialized using ParallelStencil.ParallelKernel.Exceptions @@ -29,21 +29,55 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t end; @testset "Data" begin @test @isdefined(Data) - @test length(@symbols($(@__MODULE__), Data)) > 1 - @test Symbol("Index") in @symbols($(@__MODULE__), Data) - @test Symbol("Number") in @symbols($(@__MODULE__), Data) - @test Symbol("Array") in @symbols($(@__MODULE__), Data) - @test Symbol("Cell") in @symbols($(@__MODULE__), Data) - @test Symbol("CellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCell") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("TArray") in @symbols($(@__MODULE__), Data) - @test Symbol("TCell") in @symbols($(@__MODULE__), Data) - @test Symbol("TCellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceTArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceTCell") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceTCellArray") in @symbols($(@__MODULE__), Data) + mods = (:Data, :Device, :Fields) + syms = @symbols($(@__MODULE__), Data) + @test length(syms) > 1 + @test length(syms) >= length(mods) + length(SCALARTYPES) + length(ARRAYTYPES) # +1|2 for metadata symbols + @test all(T ∈ syms for T in mods) + @test all(T ∈ syms for T in SCALARTYPES) + @test all(T ∈ syms for T in ARRAYTYPES) + @testset "Data.Device" begin + syms = @symbols($(@__MODULE__), Data.Device) + @test length(syms) > 0 + @test all(T ∈ syms for T in ARRAYTYPES) + end; + @testset "Data.Fields" begin + mods = (:Fields, :Device) + syms = @symbols($(@__MODULE__), Data.Fields) + @test length(syms) > 0 + @test all(T ∈ syms for T in mods) + @test all(T ∈ syms for T in FIELDTYPES) + end; + @testset "Data.Fields.Device" begin + syms = @symbols($(@__MODULE__), Data.Fields.Device) + @test length(syms) > 0 + @test all(T ∈ syms for T in FIELDTYPES) + end; + end; + @testset "TData" begin # NOTE: no scalar types + @test @isdefined(TData) + mods = (:TData, :Device, :Fields) + syms = @symbols($(@__MODULE__), TData) + @test length(syms) > 1 + @test all(T ∈ syms for T in mods) + @test all(T ∈ syms for T in ARRAYTYPES) + @testset "TData.Device" begin + syms = @symbols($(@__MODULE__), TData.Device) + @test length(syms) > 0 + @test all(T ∈ syms for T in ARRAYTYPES) + end; + @testset "TData.Fields" begin + mods = (:Fields, :Device) + syms = @symbols($(@__MODULE__), TData.Fields) + @test length(syms) > 0 + @test all(T ∈ syms for T in mods) + @test all(T ∈ syms for T in FIELDTYPES) + end; + @testset "TData.Fields.Device" begin + syms = @symbols($(@__MODULE__), TData.Fields.Device) + @test length(syms) > 0 + @test all(T ∈ syms for T in FIELDTYPES) + end; end; @reset_parallel_kernel() end; @@ -56,17 +90,31 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @test @get_numbertype() == NUMBERTYPE_NONE @test @get_inbounds() == true end; - @testset "Data" begin + @testset "Data" begin # NOTE: no scalar types @test @isdefined(Data) - @test length(@symbols($(@__MODULE__), Data)) > 1 - @test Symbol("Index") in @symbols($(@__MODULE__), Data) - @test !(Symbol("Number") in @symbols($(@__MODULE__), Data)) - @test Symbol("Array") in @symbols($(@__MODULE__), Data) - @test Symbol("Cell") in @symbols($(@__MODULE__), Data) - @test Symbol("CellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCell") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCellArray") in @symbols($(@__MODULE__), Data) + mods = (:Data, :Device, :Fields) + syms = @symbols($(@__MODULE__), Data) + @test length(syms) > 1 + @test all(T ∈ syms for T in mods) + @test !(Symbol("Number") in syms) + @test all(T ∈ syms for T in ARRAYTYPES) + @testset "Data.Device" begin + syms = @symbols($(@__MODULE__), Data.Device) + @test length(syms) > 0 + @test all(T ∈ syms for T in ARRAYTYPES) + end; + @testset "Data.Fields" begin + mods = (:Fields, :Device) + syms = @symbols($(@__MODULE__), Data.Fields) + @test length(syms) > 0 + @test all(T ∈ syms for T in mods) + @test all(T ∈ syms for T in FIELDTYPES) + end; + @testset "Data.Fields.Device" begin + syms = @symbols($(@__MODULE__), Data.Fields.Device) + @test length(syms) > 0 + @test all(T ∈ syms for T in FIELDTYPES) + end; end; @reset_parallel_kernel() end; diff --git a/test/ParallelKernel/test_parallel.jl b/test/ParallelKernel/test_parallel.jl index 1c85285..e69d64c 100644 --- a/test/ParallelKernel/test_parallel.jl +++ b/test/ParallelKernel/test_parallel.jl @@ -3,7 +3,7 @@ import ParallelStencil using Enzyme using ParallelStencil.ParallelKernel import ParallelStencil.ParallelKernel.AD -import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_THREADS, PKG_POLYESTER, INDICES +import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_THREADS, PKG_POLYESTER, INDICES, ARRAYTYPES, FIELDTYPES import ParallelStencil.ParallelKernel: @require, @prettystring, @gorgeousstring, @isgpu, @iscpu import ParallelStencil.ParallelKernel: checkargs_parallel, checkargs_parallel_indices, parallel_indices, maxsize using ParallelStencil.ParallelKernel.Exceptions @@ -131,155 +131,233 @@ import Enzyme @testset "addition of range arguments" begin expansion = @gorgeousstring(1, @parallel_indices (ix,iy) f(a::T, b::T) where T <: Union{Array{Float32}, Array{Float64}} = (println("a=$a, b=$b)"); return)) @test occursin("f(a::T, b::T, ranges::Tuple{UnitRange, UnitRange, UnitRange}, rangelength_x::Int64, rangelength_y::Int64, rangelength_z::Int64", expansion) - end - @testset "Data.Array to Data.DeviceArray" begin + end + @testset "Data.Array to Data.Device.Array" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Array, B::Data.Array, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceArray, B::Data.DeviceArray,", expansion) + @test occursin("f(A::Data.Device.Array, B::Data.Device.Array,", expansion) end end - @testset "Data.Cell to Data.DeviceCell" begin + @testset "Data.Cell to Data.Device.Cell" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Cell, B::Data.Cell, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceCell, B::Data.DeviceCell,", expansion) + @test occursin("f(A::Data.Device.Cell, B::Data.Device.Cell,", expansion) end end - @testset "Data.CellArray to Data.DeviceCellArray" begin + @testset "Data.CellArray to Data.Device.CellArray" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellArray, B::Data.CellArray, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceCellArray, B::Data.DeviceCellArray,", expansion) + @test occursin("f(A::Data.Device.CellArray, B::Data.Device.CellArray,", expansion) end end - @testset "Data.ArrayTuple to Data.DeviceArrayTuple" begin + @testset "Data.ArrayTuple to Data.Device.ArrayTuple" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.ArrayTuple, B::Data.ArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.DeviceArrayTuple, B::Data.DeviceArrayTuple,", expansion) + @test occursin("f(A::Data.Device.ArrayTuple, B::Data.Device.ArrayTuple,", expansion) end end - @testset "Data.CellTuple to Data.DeviceCellTuple" begin + @testset "Data.CellTuple to Data.Device.CellTuple" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellTuple, B::Data.CellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.DeviceCellTuple, B::Data.DeviceCellTuple,", expansion) + @test occursin("f(A::Data.Device.CellTuple, B::Data.Device.CellTuple,", expansion) end end - @testset "Data.CellArrayTuple to Data.DeviceCellArrayTuple" begin + @testset "Data.CellArrayTuple to Data.Device.CellArrayTuple" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellArrayTuple, B::Data.CellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.DeviceCellArrayTuple, B::Data.DeviceCellArrayTuple,", expansion) + @test occursin("f(A::Data.Device.CellArrayTuple, B::Data.Device.CellArrayTuple,", expansion) end end - @testset "Data.NamedArrayTuple to Data.NamedDeviceArrayTuple" begin + @testset "Data.NamedArrayTuple to Data.Device.NamedArrayTuple" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedArrayTuple, B::Data.NamedArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.NamedDeviceArrayTuple, B::Data.NamedDeviceArrayTuple,", expansion) + @test occursin("f(A::Data.Device.NamedArrayTuple, B::Data.Device.NamedArrayTuple,", expansion) end end - @testset "Data.NamedCellTuple to Data.NamedDeviceCellTuple" begin + @testset "Data.NamedCellTuple to Data.Device.NamedCellTuple" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedCellTuple, B::Data.NamedCellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.NamedDeviceCellTuple, B::Data.NamedDeviceCellTuple,", expansion) + @test occursin("f(A::Data.Device.NamedCellTuple, B::Data.Device.NamedCellTuple,", expansion) end end - @testset "Data.NamedCellArrayTuple to Data.NamedDeviceCellArrayTuple" begin + @testset "Data.NamedCellArrayTuple to Data.Device.NamedCellArrayTuple" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedCellArrayTuple, B::Data.NamedCellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.NamedDeviceCellArrayTuple, B::Data.NamedDeviceCellArrayTuple,", expansion) + @test occursin("f(A::Data.Device.NamedCellArrayTuple, B::Data.Device.NamedCellArrayTuple,", expansion) end end - @testset "Data.ArrayCollection to Data.DeviceArrayCollection" begin + @testset "Data.ArrayCollection to Data.Device.ArrayCollection" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.ArrayCollection, B::Data.ArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.DeviceArrayCollection, B::Data.DeviceArrayCollection,", expansion) + @test occursin("f!(A::Data.Device.ArrayCollection, B::Data.Device.ArrayCollection,", expansion) end end - @testset "Data.CellCollection to Data.DeviceCellCollection" begin + @testset "Data.CellCollection to Data.Device.CellCollection" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.CellCollection, B::Data.CellCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.DeviceCellCollection, B::Data.DeviceCellCollection,", expansion) + @test occursin("f!(A::Data.Device.CellCollection, B::Data.Device.CellCollection,", expansion) end end - @testset "Data.CellArrayCollection to Data.DeviceCellArrayCollection" begin + @testset "Data.CellArrayCollection to Data.Device.CellArrayCollection" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.CellArrayCollection, B::Data.CellArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.DeviceCellArrayCollection, B::Data.DeviceCellArrayCollection,", expansion) + @test occursin("f!(A::Data.Device.CellArrayCollection, B::Data.Device.CellArrayCollection,", expansion) + end + end + @testset "Data.Fields.Field to Data.Fields.Device.Field" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.Field, B::Data.Fields.Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + end + end + # NOTE: the following GPU tests fail, because the Fields module cannot be imported. + # @testset "Fields.Field to Data.Fields.Device.Field" begin + # @static if @isgpu($package) + # import .Data.Fields + # expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Fields.Field, B::Fields.Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + # @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + # end + # end + # @testset "Field to Data.Fields.Device.Field" begin + # @static if @isgpu($package) + # using .Data.Fields + # expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Field, B::Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + # @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + # end + # end + @testset "Data.Fields.VectorField to Data.Fields.Device.VectorField" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.VectorField, B::Data.Fields.VectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.VectorField, B::Data.Fields.Device.VectorField,", expansion) end end - @testset "Data.TArray to Data.DeviceTArray" begin + @testset "Data.Fields.BVectorField to Data.Fields.Device.BVectorField" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.TArray, B::Data.TArray, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceTArray, B::Data.DeviceTArray,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.BVectorField, B::Data.Fields.BVectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.BVectorField, B::Data.Fields.Device.BVectorField,", expansion) end end - @testset "Data.TCell to Data.DeviceTCell" begin + @testset "Data.Fields.TensorField to Data.Fields.Device.TensorField" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.TCell, B::Data.TCell, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceTCell, B::Data.DeviceTCell,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.TensorField, B::Data.Fields.TensorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.TensorField, B::Data.Fields.Device.TensorField,", expansion) end end - @testset "Data.TCellArray to Data.DeviceTCellArray" begin + @testset "TData.Array to TData.Device.Array" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.TCellArray, B::Data.TCellArray, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceTCellArray, B::Data.DeviceTCellArray,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Array, B::TData.Array, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Device.Array, B::TData.Device.Array,", expansion) end end - @testset "Data.TArrayTuple to Data.DeviceTArrayTuple" begin + @testset "TData.Cell to TData.Device.Cell" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.TArrayTuple, B::Data.TArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.DeviceTArrayTuple, B::Data.DeviceTArrayTuple,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Cell, B::TData.Cell, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Device.Cell, B::TData.Device.Cell,", expansion) end end - @testset "Data.TCellTuple to Data.DeviceTCellTuple" begin + @testset "TData.CellArray to TData.Device.CellArray" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.TCellTuple, B::Data.TCellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.DeviceTCellTuple, B::Data.DeviceTCellTuple,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.CellArray, B::TData.CellArray, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Device.CellArray, B::TData.Device.CellArray,", expansion) end end - @testset "Data.TCellArrayTuple to Data.DeviceTCellArrayTuple" begin + @testset "TData.ArrayTuple to TData.Device.ArrayTuple" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.TCellArrayTuple, B::Data.TCellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.DeviceTCellArrayTuple, B::Data.DeviceTCellArrayTuple,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.ArrayTuple, B::TData.ArrayTuple, c::T) where T <: Integer = return) + @test occursin("f(A::TData.Device.ArrayTuple, B::TData.Device.ArrayTuple,", expansion) end end - @testset "Data.NamedTArrayTuple to Data.NamedDeviceTArrayTuple" begin + @testset "TData.CellTuple to TData.Device.CellTuple" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedTArrayTuple, B::Data.NamedTArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.NamedDeviceTArrayTuple, B::Data.NamedDeviceTArrayTuple,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.CellTuple, B::TData.CellTuple, c::T) where T <: Integer = return) + @test occursin("f(A::TData.Device.CellTuple, B::TData.Device.CellTuple,", expansion) end end - @testset "Data.NamedTCellTuple to Data.NamedDeviceTCellTuple" begin + @testset "TData.CellArrayTuple to TData.Device.CellArrayTuple" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedTCellTuple, B::Data.NamedTCellTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.NamedDeviceTCellTuple, B::Data.NamedDeviceTCellTuple,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.CellArrayTuple, B::TData.CellArrayTuple, c::T) where T <: Integer = return) + @test occursin("f(A::TData.Device.CellArrayTuple, B::TData.Device.CellArrayTuple,", expansion) end end - @testset "Data.NamedTCellArrayTuple to Data.NamedDeviceTCellArrayTuple" begin + @testset "TData.NamedArrayTuple to TData.Device.NamedArrayTuple" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.NamedTCellArrayTuple, B::Data.NamedTCellArrayTuple, c::T) where T <: Integer = return) - @test occursin("f(A::Data.NamedDeviceTCellArrayTuple, B::Data.NamedDeviceTCellArrayTuple,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.NamedArrayTuple, B::TData.NamedArrayTuple, c::T) where T <: Integer = return) + @test occursin("f(A::TData.Device.NamedArrayTuple, B::TData.Device.NamedArrayTuple,", expansion) end end - @testset "Data.TArrayCollection to Data.DeviceTArrayCollection" begin + @testset "TData.NamedCellTuple to TData.Device.NamedCellTuple" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.TArrayCollection, B::Data.TArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.DeviceTArrayCollection, B::Data.DeviceTArrayCollection,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.NamedCellTuple, B::TData.NamedCellTuple, c::T) where T <: Integer = return) + @test occursin("f(A::TData.Device.NamedCellTuple, B::TData.Device.NamedCellTuple,", expansion) end end - @testset "Data.TCellCollection to Data.DeviceTCellCollection" begin + @testset "TData.NamedCellArrayTuple to TData.Device.NamedCellArrayTuple" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.TCellCollection, B::Data.TCellCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.DeviceTCellCollection, B::Data.DeviceTCellCollection,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.NamedCellArrayTuple, B::TData.NamedCellArrayTuple, c::T) where T <: Integer = return) + @test occursin("f(A::TData.Device.NamedCellArrayTuple, B::TData.Device.NamedCellArrayTuple,", expansion) end end - @testset "Data.TCellArrayCollection to Data.DeviceTCellArrayCollection" begin + @testset "TData.ArrayCollection to TData.Device.ArrayCollection" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::Data.TCellArrayCollection, B::Data.TCellArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f!(A::Data.DeviceTCellArrayCollection, B::Data.DeviceTCellArrayCollection,", expansion) + expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::TData.ArrayCollection, B::TData.ArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f!(A::TData.Device.ArrayCollection, B::TData.Device.ArrayCollection,", expansion) end end - @testset "Nested Data.Array to Data.DeviceArray" begin + @testset "TData.CellCollection to TData.Device.CellCollection" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::TData.CellCollection, B::TData.CellCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f!(A::TData.Device.CellCollection, B::TData.Device.CellCollection,", expansion) + end + end + @testset "TData.CellArrayCollection to TData.Device.CellArrayCollection" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f!(A::TData.CellArrayCollection, B::TData.CellArrayCollection, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f!(A::TData.Device.CellArrayCollection, B::TData.Device.CellArrayCollection,", expansion) + end + end + @testset "TData.Fields.Field to TData.Fields.Device.Field" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.Field, B::TData.Fields.Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + end + end + # NOTE: the following GPU tests fail, because the Fields module cannot be imported. + # @testset "Fields.Field to TData.Fields.Device.Field" begin + # @static if @isgpu($package) + # import .TData.Fields + # expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Fields.Field, B::Fields.Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + # @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + # end + # end + # @testset "Field to TData.Fields.Device.Field" begin + # @static if @isgpu($package) + # using .TData.Fields + # expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Field, B::Field, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + # @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + # end + # end + @testset "TData.Fields.VectorField to TData.Fields.Device.VectorField" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.VectorField, B::TData.Fields.VectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Fields.Device.VectorField, B::TData.Fields.Device.VectorField,", expansion) + end + end + @testset "TData.Fields.BVectorField to TData.Fields.Device.BVectorField" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.BVectorField, B::TData.Fields.BVectorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Fields.Device.BVectorField, B::TData.Fields.Device.BVectorField,", expansion) + end + end + @testset "TData.Fields.TensorField to TData.Fields.Device.TensorField" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::TData.Fields.TensorField, B::TData.Fields.TensorField, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::TData.Fields.Device.TensorField, B::TData.Fields.Device.TensorField,", expansion) + end + end + @testset "Nested Data.Array to Data.Device.Array" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::NamedTuple{T1, NTuple{T2,T3}} where {T1,T2} where T3 <: Data.Array, c::T) where T <: Integer = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::((NamedTuple{T1, NTuple{T2, T3}} where {T1, T2}) where T3 <: Data.DeviceArray),", expansion) + @test occursin("f(A::((NamedTuple{T1, NTuple{T2, T3}} where {T1, T2}) where T3 <: Data.Device.Array),", expansion) end end @testset "@parallel_indices (1D)" begin @@ -478,22 +556,46 @@ import Enzyme @require !@is_initialized() @init_parallel_kernel(package = $package) @require @is_initialized - @testset "Data.Array{T} to Data.DeviceArray{T}" begin + @testset "Data.Array{T} to Data.Device.Array{T}" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Array{T}, B::Data.Array{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceArray{T}, B::Data.DeviceArray{T},", expansion) + @test occursin("f(A::Data.Device.Array{T}, B::Data.Device.Array{T},", expansion) end end; - @testset "Data.Cell{T} to Data.DeviceCell{T}" begin + @testset "Data.Cell{T} to Data.Device.Cell{T}" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Cell{T}, B::Data.Cell{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceCell{T}, B::Data.DeviceCell{T},", expansion) + @test occursin("f(A::Data.Device.Cell{T}, B::Data.Device.Cell{T},", expansion) end end; - @testset "Data.CellArray{T} to Data.DeviceCellArray{T}" begin + @testset "Data.CellArray{T} to Data.Device.CellArray{T}" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.CellArray{T}, B::Data.CellArray{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) - @test occursin("f(A::Data.DeviceCellArray{T}, B::Data.DeviceCellArray{T},", expansion) + @test occursin("f(A::Data.Device.CellArray{T}, B::Data.Device.CellArray{T},", expansion) + end + end; + @testset "Data.Fields.Field{T} to Data.Fields.Device.Field{T}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.Field{T}, B::Data.Fields.Field{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.Field{T}, B::Data.Fields.Device.Field{T},", expansion) + end + end; + @testset "Data.Fields.VectorField{T} to Data.Fields.Device.VectorField{T}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.VectorField{T}, B::Data.Fields.VectorField{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.VectorField{T}, B::Data.Fields.Device.VectorField{T},", expansion) + end + end; + @testset "Data.Fields.BVectorField{T} to Data.Fields.Device.BVectorField{T}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.BVectorField{T}, B::Data.Fields.BVectorField{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.BVectorField{T}, B::Data.Fields.Device.BVectorField{T},", expansion) + end + end; + @testset "Data.Fields.TensorField{T} to Data.Fields.Device.TensorField{T}" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel_indices (ix,iy) f(A::Data.Fields.TensorField{T}, B::Data.Fields.TensorField{T}, c<:Integer) where T <: Union{Float32, Float64} = (A[ix,iy] = B[ix,iy]^c; return)) + @test occursin("f(A::Data.Fields.Device.TensorField{T}, B::Data.Fields.Device.TensorField{T},", expansion) end end; @reset_parallel_kernel() diff --git a/test/ParallelKernel/test_reset_parallel_kernel.jl b/test/ParallelKernel/test_reset_parallel_kernel.jl index 4bbde1d..b3e3ae0 100644 --- a/test/ParallelKernel/test_reset_parallel_kernel.jl +++ b/test/ParallelKernel/test_reset_parallel_kernel.jl @@ -29,7 +29,8 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @init_parallel_kernel($package, Float64) @require @is_initialized() && @get_package() == $package @reset_parallel_kernel() - @test length(@symbols($(@__MODULE__), Data)) == 1 + @test length(@symbols($(@__MODULE__), Data)) <= 1 + @test length(@symbols($(@__MODULE__), TData)) <= 1 @test !@is_initialized() @test @get_package() == $PKG_NONE @test @get_numbertype() == $NUMBERTYPE_NONE diff --git a/test/test_init_parallel_stencil.jl b/test/test_init_parallel_stencil.jl index b77a8ff..c4d5170 100644 --- a/test/test_init_parallel_stencil.jl +++ b/test/test_init_parallel_stencil.jl @@ -31,20 +31,28 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @testset "Data" begin @test @isdefined(Data) @test length(@symbols($(@__MODULE__), Data)) > 1 - @test Symbol("Index") in @symbols($(@__MODULE__), Data) - @test Symbol("Number") in @symbols($(@__MODULE__), Data) - @test Symbol("Array") in @symbols($(@__MODULE__), Data) - @test Symbol("Cell") in @symbols($(@__MODULE__), Data) - @test Symbol("CellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCell") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("TArray") in @symbols($(@__MODULE__), Data) - @test Symbol("TCell") in @symbols($(@__MODULE__), Data) - @test Symbol("TCellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceTArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceTCell") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceTCellArray") in @symbols($(@__MODULE__), Data) + @testset "Data.Device" begin + @test length(@symbols($(@__MODULE__), Data.Device)) > 1 + end; + @testset "Data.Fields" begin + @test length(@symbols($(@__MODULE__), Data.Fields)) > 1 + end; + @testset "Data.Fields.Device" begin + @test length(@symbols($(@__MODULE__), Data.Fields.Device)) > 1 + end; + end; + @testset "TData" begin + @test @isdefined(TData) + @test length(@symbols($(@__MODULE__), TData)) > 1 + @testset "TData.Device" begin + @test length(@symbols($(@__MODULE__), TData.Device)) > 1 + end; + @testset "TData.Fields" begin + @test length(@symbols($(@__MODULE__), TData.Fields)) > 1 + end; + @testset "TData.Fields.Device" begin + @test length(@symbols($(@__MODULE__), TData.Fields.Device)) > 1 + end; end; @reset_parallel_stencil() end; @@ -62,14 +70,15 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t @testset "Data" begin @test @isdefined(Data) @test length(@symbols($(@__MODULE__), Data)) > 1 - @test Symbol("Index") in @symbols($(@__MODULE__), Data) - @test !(Symbol("Number") in @symbols($(@__MODULE__), Data)) - @test Symbol("Array") in @symbols($(@__MODULE__), Data) - @test Symbol("Cell") in @symbols($(@__MODULE__), Data) - @test Symbol("CellArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceArray") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCell") in @symbols($(@__MODULE__), Data) - @test Symbol("DeviceCellArray") in @symbols($(@__MODULE__), Data) + @testset "Data.Device" begin + @test length(@symbols($(@__MODULE__), Data.Device)) > 1 + end; + @testset "Data.Fields" begin + @test length(@symbols($(@__MODULE__), Data.Fields)) > 1 + end; + @testset "Data.Fields.Device" begin + @test length(@symbols($(@__MODULE__), Data.Fields.Device)) > 1 + end; end; @reset_parallel_stencil() end; diff --git a/test/test_parallel.jl b/test/test_parallel.jl index b4d6e2f..5809cc1 100644 --- a/test/test_parallel.jl +++ b/test/test_parallel.jl @@ -123,42 +123,84 @@ import ParallelStencil.@gorgeousexpand expansion = @gorgeousstring(1, @parallel f(A, B, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) @test occursin("f(A, B, c::T, ranges::Tuple{UnitRange, UnitRange, UnitRange}, rangelength_x::Int64, rangelength_y::Int64, rangelength_z::Int64", expansion) end - @testset "Data.Array to Data.DeviceArray" begin + @testset "Data.Array to Data.Device.Array" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel f(A::Data.Array, B::Data.Array, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceArray, B::Data.DeviceArray,", expansion) + @test occursin("f(A::Data.Device.Array, B::Data.Device.Array,", expansion) end end - @testset "Data.Cell to Data.DeviceCell" begin + @testset "Data.Cell to Data.Device.Cell" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel f(A::Data.Cell, B::Data.Cell, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceCell, B::Data.DeviceCell,", expansion) + @test occursin("f(A::Data.Device.Cell, B::Data.Device.Cell,", expansion) end end - @testset "Data.CellArray to Data.DeviceCellArray" begin + @testset "Data.CellArray to Data.Device.CellArray" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel f(A::Data.CellArray, B::Data.CellArray, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceCellArray, B::Data.DeviceCellArray,", expansion) + @test occursin("f(A::Data.Device.CellArray, B::Data.Device.CellArray,", expansion) end end - @testset "Data.TArray to Data.DeviceTArray" begin + @testset "Data.Fields.Field to Data.Fields.Device.Field" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.TArray, B::Data.TArray, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceTArray, B::Data.DeviceTArray,", expansion) + expansion = @prettystring(1, @parallel f(A::Data.Fields.Field, B::Data.Fields.Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) end end - @testset "Data.TCell to Data.DeviceTCell" begin + # NOTE: the following GPU tests fail, because the Fields module cannot be imported. + # @testset "Fields.Field to Data.Fields.Device.Field" begin + # @static if @isgpu($package) + # import .Data.Fields + # expansion = @prettystring(1, @parallel f(A::Fields.Field, B::Fields.Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + # @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + # end + # end + # @testset "Field to Data.Fields.Device.Field" begin + # @static if @isgpu($package) + # using .Data.Fields + # expansion = @prettystring(1, @parallel f(A::Field, B::Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + # @test occursin("f(A::Data.Fields.Device.Field, B::Data.Fields.Device.Field,", expansion) + # end + # end + @testset "TData.Array to TData.Device.Array" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.TCell, B::Data.TCell, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceTCell, B::Data.DeviceTCell,", expansion) + expansion = @prettystring(1, @parallel f(A::TData.Array, B::TData.Array, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::TData.Device.Array, B::TData.Device.Array,", expansion) end end - @testset "Data.TCellArray to Data.DeviceTCellArray" begin + @testset "TData.Cell to TData.Device.Cell" begin @static if @isgpu($package) - expansion = @prettystring(1, @parallel f(A::Data.TCellArray, B::Data.TCellArray, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceTCellArray, B::Data.DeviceTCellArray,", expansion) + expansion = @prettystring(1, @parallel f(A::TData.Cell, B::TData.Cell, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::TData.Device.Cell, B::TData.Device.Cell,", expansion) end end + @testset "TData.CellArray to TData.Device.CellArray" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel f(A::TData.CellArray, B::TData.CellArray, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::TData.Device.CellArray, B::TData.Device.CellArray,", expansion) + end + end + @testset "TData.Fields.Field to TData.Fields.Device.Field" begin + @static if @isgpu($package) + expansion = @prettystring(1, @parallel f(A::TData.Fields.Field, B::TData.Fields.Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + end + end + # NOTE: the following GPU tests fail, because the Fields module cannot be imported. + # @testset "Fields.Field to TData.Fields.Device.Field" begin + # @static if @isgpu($package) + # import .TData.Fields + # expansion = @prettystring(1, @parallel f(A::Fields.Field, B::Fields.Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + # @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + # end + # end + # @testset "Field to TData.Fields.Device.Field" begin + # @static if @isgpu($package) + # using .TData.Fields + # expansion = @prettystring(1, @parallel f(A::Field, B::Field, c::T) where T <: Integer = (@all(A) = @all(B)^c; return)) + # @test occursin("f(A::TData.Fields.Device.Field, B::TData.Fields.Device.Field,", expansion) + # end + # end @testset "@parallel (3D)" begin A = @zeros(4, 5, 6) @parallel function write_indices!(A) @@ -909,22 +951,22 @@ import ParallelStencil.@gorgeousexpand @require !@is_initialized() @init_parallel_stencil(package = $package) @require @is_initialized - @testset "Data.Array{T} to Data.DeviceArray{T}" begin + @testset "Data.Array{T} to Data.Device.Array{T}" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel ndims=3 f(A::Data.Array{T}, B::Data.Array{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceArray{T}, B::Data.DeviceArray{T},", expansion) + @test occursin("f(A::Data.Device.Array{T}, B::Data.Device.Array{T},", expansion) end end; - @testset "Data.Cell{T} to Data.DeviceCell{T}" begin + @testset "Data.Cell{T} to Data.Device.Cell{T}" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel ndims=2 f(A::Data.Cell{T}, B::Data.Cell{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceCell{T}, B::Data.DeviceCell{T},", expansion) + @test occursin("f(A::Data.Device.Cell{T}, B::Data.Device.Cell{T},", expansion) end end; - @testset "Data.CellArray{T} to Data.DeviceCellArray{T}" begin + @testset "Data.CellArray{T} to Data.Device.CellArray{T}" begin @static if @isgpu($package) expansion = @prettystring(1, @parallel ndims=1 f(A::Data.CellArray{T}, B::Data.CellArray{T}, c::Integer) where T <: PSNumber = (@all(A) = @all(B)^c; return)) - @test occursin("f(A::Data.DeviceCellArray{T}, B::Data.DeviceCellArray{T},", expansion) + @test occursin("f(A::Data.Device.CellArray{T}, B::Data.Device.CellArray{T},", expansion) end end; @testset "N substitution | ndims tuple expansion" begin