Skip to content

Commit

Permalink
Add kernel-based matrix cumsum (#416)
Browse files Browse the repository at this point in the history
* Add kernel-based matrix cumsum

* Fix some minor issues

* Add tests to increase code coverage

* Fix print devices for AMD

* Expand tests and exclude Metal and kernel functions from coverage / formatting
  • Loading branch information
rkierulf authored Jun 21, 2024
1 parent 13c382d commit d624e8f
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 17 deletions.
2 changes: 1 addition & 1 deletion KomaMRICore/ext/KomaAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ end

function KomaMRICore._print_devices(::ROCBackend)
devices = [
Symbol("($(i-1)$(i == 1 ? "*" : " "))") => d.name for
Symbol("($(i-1)$(i == 1 ? "*" : " "))") => AMDGPU.HIP.name(d) for
(i, d) in enumerate(AMDGPU.devices())
]
@info "$(length(AMDGPU.devices())) AMD capable device(s)." devices...
Expand Down
6 changes: 5 additions & 1 deletion KomaMRICore/ext/KomaMetalExt.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
## COV_EXCL_START

module KomaMetalExt

using Metal
Expand Down Expand Up @@ -32,4 +34,6 @@ function __init__()
@warn "Metal does not support all array operations used by KomaMRI (https://github.com/JuliaGPU/Metal.jl/issues/348). GPU performance may be slower than expected"
end

end
end

## COV_EXCL_STOP
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function run_spin_precession!(
sig::AbstractArray{Complex{T}},
M::Mag{T},
sim_method::BlochDict,
backend::KA.Backend
) where {T<:Real}
#Simulation
#Motion
Expand All @@ -43,7 +44,7 @@ function run_spin_precession!(
Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
#Rotation
if is_ADC_on(seq)
ϕ = T(-2π * γ) .* cumtrapz(seq.Δt', Bz)
ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
else
ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
end
Expand Down
3 changes: 2 additions & 1 deletion KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function run_spin_precession!(
sig::AbstractArray{Complex{T}},
M::Mag{T},
sim_method::SimulationMethod,
backend::KA.Backend
) where {T<:Real}
#Simulation
#Motion
Expand All @@ -52,7 +53,7 @@ function run_spin_precession!(
Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
#Rotation
if is_ADC_on(seq)
ϕ = T(-2π * γ) .* cumtrapz(seq.Δt', Bz)
ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
else
ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
end
Expand Down
50 changes: 50 additions & 0 deletions KomaMRICore/src/simulation/Bloch/KernelFunctions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using KernelAbstractions: @index, @kernel

## COV_EXCL_START
#! format: off

"""
cumsum2_kernel
Simple kernel function, computes the cumulative sum of each row of a matrix. Operates
in-place on the input matrix without allocating additional memory.
# Arguments
- 'A': matrix to compute cumsum on
"""
@kernel function cumsum_matrix_rows_kernel!(A)
i = @index(Global)

for k 2:size(A, 2)
@inbounds A[i, k] += A[i, k-1]
end
end

#! format: on
## COV_EXCL_STOP

"""
cumtrapz
A more efficient GPU implementation of the cumtrapz method defined in TrapezoidalIntegration.jl.
Uses a kernel to compute cumsum along the second dimension.
# Arguments
- `Δt`: (`1 x NΔt ::Matrix{Float64}`, `[s]`) delta time 1-row array
- `x`: (`Ns x (NΔt+1) ::Matrix{Float64}`, `[T]`) magnitude of the field Gx * x + Gy * y +
Gz * z
# Returns
- `y`: (`Ns x NΔt ::Matrix{Float64}`, `[T*s]`) matrix where every column is the
cumulative integration over time of (Gx * x + Gy * y + Gz * z) * Δt for every spin of a
phantom
"""
function KomaMRIBase.cumtrapz(Δt::AbstractArray{T}, x::AbstractArray{T}, backend::KA.GPU) where {T<:Real}
y = (x[:, 2:end] .+ x[:, 1:end-1]) .* (Δt / 2)
cumsum_matrix_rows_kernel!(backend)(y, ndrange=size(y,1))
KA.synchronize(backend)
return y
end

#If on CPU, forward call to cumtrapz in KomaMRIBase
KomaMRIBase.cumtrapz(Δt::AbstractArray{T}, x::AbstractArray{T}, backend::KA.CPU) where {T<:Real} = KomaMRIBase.cumtrapz(Δt, x)
5 changes: 3 additions & 2 deletions KomaMRICore/src/simulation/Functors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ See also [`f32`](@ref) and [`f64`](@ref) to change element type only.
# Examples
```julia
using CUDA
x = gpu(x)
x = x |> gpu
```
"""
function gpu(x)
Expand All @@ -31,7 +31,8 @@ function gpu(x)
if (BACKEND[] isa KA.GPU)
return gpu(x, BACKEND[])
else
@error "function 'gpu' called with no functional backends available. Add 'using CUDA / Metal / AMDGPU / oneAPI' to your code and try again"
@warn "function 'gpu' called with no functional backends available. Add 'using CUDA / Metal / AMDGPU / oneAPI' to your code and try again"
return x
end
end

Expand Down
22 changes: 13 additions & 9 deletions KomaMRICore/src/simulation/SimulatorCore.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ abstract type SimulationMethod end #get all available types by using subtypes(Ko
abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation)

#Defined methods:
include("Bloch/KernelFunctions.jl")
include("Bloch/BlochSimulationMethod.jl") #Defines Bloch simulation method
include("Bloch/BlochDictSimulationMethod.jl") #Defines BlochDict simulation method

Expand Down Expand Up @@ -82,15 +83,16 @@ function run_spin_precession_parallel!(
seq::DiscreteSequence{T},
sig::AbstractArray{Complex{T}},
Xt::SpinStateRepresentation{T},
sim_method::SimulationMethod;
sim_method::SimulationMethod,
backend::KA.Backend;
Nthreads=Threads.nthreads(),
) where {T<:Real}
parts = kfoldperm(length(obj), Nthreads)
dims = [Colon() for i in 1:(ndims(sig) - 1)] # :,:,:,... Ndim times

ThreadsX.foreach(enumerate(parts)) do (i, p)
run_spin_precession!(
@view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method
@view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method, backend
)
end

Expand Down Expand Up @@ -166,7 +168,8 @@ function run_sim_time_iter!(
seq::DiscreteSequence,
sig::AbstractArray{Complex{T}},
Xt::SpinStateRepresentation{T},
sim_method::SimulationMethod;
sim_method::SimulationMethod,
backend::KA.Backend;
Nblocks=1,
Nthreads=Threads.nthreads(),
parts=[1:length(seq)],
Expand All @@ -193,7 +196,7 @@ function run_sim_time_iter!(
rfs += 1
else
run_spin_precession_parallel!(
obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method; Nthreads
obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method, backend; Nthreads
)
end
samples += Nadc
Expand Down Expand Up @@ -357,10 +360,10 @@ function simulate(
if backend isa KA.GPU
isnothing(sim_params["gpu_device"]) || set_device!(backend, sim_params["gpu_device"])
gpu_name = device_name(backend)
obj = gpu(obj, backend) #Phantom
seqd = gpu(seqd, backend) #DiscreteSequence
Xt = gpu(Xt, backend) #SpinStateRepresentation
sig = gpu(sig, backend) #Signal
obj = obj |> gpu #Phantom
seqd = seqd |> gpu #DiscreteSequence
Xt = Xt |> gpu #SpinStateRepresentation
sig = sig |> gpu #Signal
end

# Simulation
Expand All @@ -373,7 +376,8 @@ function simulate(
seqd,
sig,
Xt,
sim_params["sim_method"];
sim_params["sim_method"],
backend;
Nblocks=length(parts),
Nthreads=sim_params["Nthreads"],
parts,
Expand Down
1 change: 1 addition & 0 deletions KomaMRICore/test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[deps]
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
KomaMRIBase = "d0bc0b20-b151-4d03-b2a4-6ca51751cb9c"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
Expand Down
30 changes: 28 additions & 2 deletions KomaMRICore/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
using TestItems, TestItemRunner

### NOTE: by default, tests are run on the CPU with the number of threads set to
# Threads.nthreads(). To run on a specific GPU backend, pass the name of the
# trigger package ("AMDGPU", "CUDA", "Metal", or "oneAPI") as a test argument.
# Threads.nthreads(). To run on a specific GPU backend, add the name of the
# backend package ("AMDGPU", "CUDA", "Metal", or "oneAPI") to the test/Project.toml
# file in KomaMRICore and pass the name as a test argument.
#
# Example:
#
Expand Down Expand Up @@ -385,3 +386,28 @@ end
# For the time being, always pass the test
@test true
end

@testitem "GPU Functions" tags=[:core] begin
using Suppressor
import KernelAbstractions as KA
include("initialize.jl")

x = ones(Float32, 1000)
@suppress begin
y = x |> gpu
if USE_GPU
@test KA.get_backend(y) isa KA.GPU
y = y |> cpu
@test KA.get_backend(y) isa KA.CPU
else
# Test that gpu and cpu are no-ops
@test y == x
y = y |> cpu
@test y == x
end
end


@suppress print_devices()
@test true
end

0 comments on commit d624e8f

Please sign in to comment.