-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #443 from JuliaHealth/optimize-precession
Optimize run_spin_precession! and run_spin_excitation! for CPU
- Loading branch information
Showing
8 changed files
with
251 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
151 changes: 151 additions & 0 deletions
151
KomaMRICore/src/simulation/SimMethods/Bloch/BlochCPU.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
"""Stores preallocated structs for use in Bloch CPU run_spin_precession function.""" | ||
struct BlochCPUPrealloc{T} <: PreallocResult{T} | ||
M::Mag{T} # Mag{T} | ||
Bz_old::AbstractVector{T} # Vector{T}(Nspins x 1) | ||
Bz_new::AbstractVector{T} # Vector{T}(Nspins x 1) | ||
ϕ::AbstractVector{T} # Vector{T}(Nspins x 1) | ||
φ::AbstractVector{T} # Vector{T}(Nspins x 1) | ||
Rot::Spinor{T} # Spinor{T} | ||
end | ||
|
||
Base.view(p::BlochCPUPrealloc, i::UnitRange) = begin | ||
@views BlochCPUPrealloc( | ||
p.M[i], | ||
p.Bz_old[i], | ||
p.Bz_new[i], | ||
p.ϕ[i], | ||
p.φ[i], | ||
p.Rot[i] | ||
) | ||
end | ||
|
||
"""Preallocates arrays for use in run_spin_precession.""" | ||
function prealloc(sim_method::Bloch, backend::KA.CPU, obj::Phantom{T}, M::Mag{T}) where {T<:Real} | ||
return BlochCPUPrealloc( | ||
Mag( | ||
similar(M.xy), | ||
similar(M.z) | ||
), | ||
similar(obj.x), | ||
similar(obj.x), | ||
similar(obj.x), | ||
similar(obj.x), | ||
Spinor( | ||
similar(M.xy), | ||
similar(M.xy) | ||
) | ||
) | ||
end | ||
|
||
""" | ||
run_spin_precession(obj, seq, Xt, sig, M, sim_method, backend, prealloc) | ||
Alternate implementation of the run_spin_precession! function in BlochSimpleSimulationMethod.jl | ||
optimized for the CPU. Uses a loop to step through time instead of allocating a matrix of size | ||
NSpins x seq.t. The Bz_old, Bz_new, ϕ, and Mxy arrays are pre-allocated in run_sim_time_iter! so | ||
that they can be re-used from block to block. | ||
""" | ||
function run_spin_precession!( | ||
p::Phantom{T}, | ||
seq::DiscreteSequence{T}, | ||
sig::AbstractArray{Complex{T}}, | ||
M::Mag{T}, | ||
sim_method::Bloch, | ||
backend::KA.CPU, | ||
prealloc::BlochCPUPrealloc | ||
) where {T<:Real} | ||
#Simulation | ||
#Motion | ||
x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t[1,:]') | ||
|
||
#Initialize arrays | ||
Bz_old = prealloc.Bz_old | ||
Bz_new = prealloc.Bz_new | ||
ϕ = prealloc.ϕ | ||
Mxy = prealloc.M.xy | ||
fill!(ϕ, zero(T)) | ||
@. Bz_old = x[:,1] * seq.Gx[1] + y[:,1] * seq.Gy[1] + z[:,1] * seq.Gz[1] + p.Δw / T(2π * γ) | ||
|
||
# Fill sig[1] if needed | ||
ADC_idx = 1 | ||
if (seq.ADC[1]) | ||
sig[1] = sum(M.xy) | ||
ADC_idx += 1 | ||
end | ||
|
||
t_seq = zero(T) # Time | ||
for seq_idx=2:length(seq.t) | ||
x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t[seq_idx,:]') | ||
t_seq += seq.Δt[seq_idx-1] | ||
|
||
#Effective Field | ||
@. Bz_new = x * seq.Gx[seq_idx] + y * seq.Gy[seq_idx] + z * seq.Gz[seq_idx] + p.Δw / T(2π * γ) | ||
|
||
#Rotation | ||
@. ϕ += (Bz_old + Bz_new) * T(-π * γ) * seq.Δt[seq_idx-1] | ||
|
||
#Acquired Signal | ||
if seq_idx <= length(seq.ADC) && seq.ADC[seq_idx] | ||
@. Mxy = exp(-t_seq / p.T2) * M.xy * cis(ϕ) | ||
sig[ADC_idx] = sum(Mxy) | ||
ADC_idx += 1 | ||
end | ||
|
||
Bz_old, Bz_new = Bz_new, Bz_old | ||
end | ||
|
||
#Final Spin-State | ||
@. M.xy = M.xy * exp(-t_seq / p.T2) * cis(ϕ) | ||
@. M.z = M.z * exp(-t_seq / p.T1) + p.ρ * (T(1) - exp(-t_seq / p.T1)) | ||
|
||
return nothing | ||
end | ||
|
||
""" | ||
run_spin_excitation!(obj, seq, sig, M, sim_method, backend, prealloc) | ||
Alternate implementation of the run_spin_excitation! function in BlochSimpleSimulationMethod.jl | ||
optimized for the CPU. Uses preallocation for all arrays to reduce memory usage. | ||
""" | ||
function run_spin_excitation!( | ||
p::Phantom{T}, | ||
seq::DiscreteSequence{T}, | ||
sig::AbstractArray{Complex{T}}, | ||
M::Mag{T}, | ||
sim_method::Bloch, | ||
backend::KA.CPU, | ||
prealloc::BlochCPUPrealloc | ||
) where {T<:Real} | ||
ΔBz = prealloc.Bz_old | ||
Bz = prealloc.Bz_new | ||
B = prealloc.ϕ | ||
φ = prealloc.φ | ||
α = prealloc.Rot.α | ||
β = prealloc.Rot.β | ||
Maux_xy = prealloc.M.xy | ||
Maux_z = prealloc.M.z | ||
|
||
#Can be calculated outside of loop | ||
@. ΔBz = p.Δw / T(2π * γ) | ||
|
||
#Simulation | ||
for s in seq #This iterates over seq, "s = seq[i,:]" | ||
#Motion | ||
x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, s.t) | ||
#Effective field | ||
@. Bz = (s.Gx * x + s.Gy * y + s.Gz * z) + ΔBz - s.Δf / T(γ) # ΔB_0 = (B_0 - ω_rf/γ), Need to add a component here to model scanner's dB0(x,y,z) | ||
@. B = sqrt(abs(s.B1)^2 + abs(Bz)^2) | ||
@. B[B == 0] = eps(T) | ||
#Spinor Rotation | ||
@. φ = T(-π * γ) * (B * s.Δt) # TODO: Use trapezoidal integration here (?), this is just Forward Euler | ||
@. α = cos(φ) - Complex{T}(im) * (Bz / B) * sin(φ) | ||
@. β = -Complex{T}(im) * (s.B1 / B) * sin(φ) | ||
mul!(Spinor(α, β), M, Maux_xy, Maux_z) | ||
#Relaxation | ||
@. M.xy = M.xy * exp(-s.Δt / p.T2) | ||
@. M.z = M.z * exp(-s.Δt / p.T1) + p.ρ * (T(1) - exp(-s.Δt / p.T1)) | ||
end | ||
#Acquired signal | ||
#sig .= -1.4im #<-- This was to test if an ADC point was inside an RF block | ||
return nothing | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#Bloch is the default simulation method | ||
struct Bloch <: SimulationMethod end | ||
|
||
export Bloch | ||
|
||
include("Magnetization.jl") | ||
|
||
@functor Mag #Gives gpu acceleration capabilities, see GPUFunctions.jl | ||
|
||
function sim_output_dim( | ||
obj::Phantom{T}, seq::Sequence, sys::Scanner, sim_method::SimulationMethod | ||
) where {T<:Real} | ||
return (sum(seq.ADC.N), 1) #Nt x Ncoils, This should consider the coil info from sys | ||
end | ||
|
||
"""Magnetization initialization for Bloch simulation method.""" | ||
function initialize_spins_state( | ||
obj::Phantom{T}, sim_method::SimulationMethod | ||
) where {T<:Real} | ||
Nspins = length(obj) | ||
Mxy = zeros(T, Nspins) | ||
Mz = obj.ρ | ||
Xt = Mag{T}(Mxy, Mz) | ||
return Xt, obj | ||
end | ||
|
||
abstract type PreallocResult{T<:Real} end | ||
|
||
"""Default preallocation struct, stores nothing.""" | ||
struct DefaultPreAlloc{T} <: PreallocResult{T} end | ||
|
||
Base.view(p::DefaultPreAlloc, i::UnitRange) = p | ||
|
||
"""Default preallocation function.""" | ||
prealloc(sim_method::SimulationMethod, backend::KA.Backend, obj::Phantom{T}, M::Mag{T}) where {T<:Real} = DefaultPreAlloc{T}() | ||
|
||
include("KernelFunctions.jl") | ||
include("BlochSimple/BlochSimple.jl") | ||
include("Bloch/BlochCPU.jl") | ||
include("BlochDict/BlochDict.jl") |
Oops, something went wrong.
e09ba44
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
KomaMRI Benchmarks
MRI Lab/Bloch/CPU/2 thread(s)
244928608.5
ns1866110763
ns0.13
MRI Lab/Bloch/CPU/4 thread(s)
140758946
ns945776837
ns0.15
MRI Lab/Bloch/CPU/8 thread(s)
171370827
ns911378432
ns0.19
MRI Lab/Bloch/CPU/1 thread(s)
415982190.5
ns3387409372.5
ns0.12
MRI Lab/Bloch/GPU/CUDA
138386450.5
ns153551114
ns0.90
MRI Lab/Bloch/GPU/oneAPI
18715897786.5
ns26913270216
ns0.70
MRI Lab/Bloch/GPU/Metal
2988417291
ns3261239667
ns0.92
MRI Lab/Bloch/GPU/AMDGPU
1806396508
ns1817370709.5
ns0.99
Slice Selection 3D/Bloch/CPU/2 thread(s)
1141348868.5
ns3756525851
ns0.30
Slice Selection 3D/Bloch/CPU/4 thread(s)
622082227
ns1843011618
ns0.34
Slice Selection 3D/Bloch/CPU/8 thread(s)
462315010
ns1330616070
ns0.35
Slice Selection 3D/Bloch/CPU/1 thread(s)
2273113325
ns6715606353
ns0.34
Slice Selection 3D/Bloch/GPU/CUDA
255739261
ns265940605.5
ns0.96
Slice Selection 3D/Bloch/GPU/oneAPI
1709103436
ns2149205885
ns0.80
Slice Selection 3D/Bloch/GPU/Metal
1203673021
ns1409920896
ns0.85
Slice Selection 3D/Bloch/GPU/AMDGPU
688213296
ns712193120
ns0.97
This comment was automatically generated by workflow using github-action-benchmark.