Merge pull request #125 from JanJereczek/reduce-prealloc

Compat with Float32; reduction of preallocated; remove jld2 + ncdatasets deps
JanJereczek · Jul 31, 2024 · 1afe693 · 1afe693
2 parents 08e34a0 + 3867012
commit 1afe693
Show file tree

Hide file tree

Showing 22 changed files with 263 additions and 302 deletions.
diff --git a/Project.toml b/Project.toml
@@ -12,9 +12,7 @@ DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
-JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
 NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
@@ -41,8 +39,6 @@ EnsembleKalmanProcesses = "1.0"
 FFTW = "1.8"
 FastGaussQuadrature = "1.0"
 Interpolations = "0.14, 0.15"
-JLD2 = "0.4"
-NCDatasets = "0.13, 0.14"
 NLsolve = "4.5"
 OrdinaryDiffEq = "6.58"
 ParallelStencil = "0.13"

diff --git a/docs/src/examples/glacialcycle.jl b/docs/src/examples/glacialcycle.jl
@@ -49,7 +49,7 @@ lb = cat(Tlitho, [fill(lbval, Omega.Nx, Omega.Ny) for lbval in lb_vec]..., dims=
 
 rlb = c.r_equator .- lb
 nlb = size(rlb, 3)
-lv_3D = 10 .^ cat([logeta_itp.(Lon, Lat, rlb[:, :, k]) for k in 1:nlb]..., dims=3);
+lv_3D = 10 .^ cat([logeta_itp.(Lon, Lat, rlb[:, :, k]) for k in 1:nlb]..., dims=3)
 
 #=
 To prevent extreme values of the viscosity, we require it to be larger than a minimal value, fixed to be $$10^{16} \, \mathrm{Pa \, s} $$. We subsequently generate a [`LayeredEarth`](@ref) that embeds all the information that has been loaded so far:

diff --git a/docs/src/examples/inversion.jl b/docs/src/examples/inversion.jl
@@ -33,7 +33,7 @@ lb = [100e3, 200e3, 300e3]
 _, eta, eta_itp = load_dataset("Wiens2022")
 loglv = cat([eta_itp.(Omega.X, Omega.Y, z) for z in lb]..., dims = 3)
 lv = 10 .^ loglv
-p = LayeredEarth(Omega, layer_boundaries = lb, layer_viscosities = lv);
+p = LayeredEarth(Omega, layer_boundaries = lb, layer_viscosities = lv)
 
 #=
 To make this problem more exciting, we place the center of the ice load to $$(-1000, -1000) \: \mathrm{km}$$ where the viscosity field displays a less uniform structure. For the sake of simplicity, the data to fit is obtained from a FastIsostasy simulation with the ground-truth viscosity field.
@@ -48,7 +48,7 @@ t_Hice = copy(t_out)
 
 true_viscosity = copy(p.effective_viscosity)
 fip = FastIsoProblem(Omega, c, p, t_out, t_Hice, Hice, output = "sparse")
-solve!(fip);
+solve!(fip)
 
 #=
 Assuming the result of this simulation to be the ground truth $$\hat{y}$$, we can now build an `InversionData` object that will be passed to an `InversionProblem`. The `InversionData` object contains the time series of the input field `X = Hice` and the output field `Y = extract_fip(fip)` (here the displacement field). Furthermore, we define an `InversionConfig` that uses the unscented Kalman filter as introduced in [huang-improve-2021](@cite). 
@@ -67,7 +67,7 @@ extract_fip = extract_last_viscous_displacement
 Y = extract_fip(fip)
 
 data = InversionData(t_inv, length(t_inv), X, Y, mask, count(mask))
-config = InversionConfig(Unscented, N_iter = 5);
+config = InversionConfig(Unscented, N_iter = 5)
 
 #=
 Before finalising the inversion, we need to specify a `ParameterReduction`, which allows a multiple dispatch of `reconstruct!`, `extract_output` and (optionally) `print_inversion_evolution`. This allows the inversion procedure to update the parameters, extract the relevant output and (optionally) print out meaningful information over the iterations of the inversion procedure. In this example, we define a `ViscosityRegion` that reduces the number of parameters to the number of grid points in the mask.
@@ -82,7 +82,7 @@ function FastIsostasy.reconstruct!(fip, params, reduction::ViscosityRegion)
     fip.p.effective_viscosity[reduction.mask] .= 10 .^ params
 end
 
-function FastIsostasy.extract_output(fip, reduction::ViscosityRegion)
+function FastIsostasy.extract_output(fip, reduction::ViscosityRegion, data::InversionData)
     return extract_fip(fip)[1][reduction.mask]
 end
 
@@ -98,7 +98,7 @@ function FastIsostasy.print_inversion_evolution(paraminv, n, ϕ_n, reduction::Vi
     return nothing
 end
 
-reduction = ViscosityRegion(mask, count(mask));
+reduction = ViscosityRegion(mask, count(mask))
 
 #=
 We can now proceed to the definition of the [`InversionProblem`]:

diff --git a/ext/FastIsoInversion.jl b/ext/FastIsoInversion.jl
@@ -2,11 +2,20 @@ module FastIsoInversion
 
 using FastIsostasy
 using Distributions
-using EnsembleKalmanProcesses: EnsembleKalmanProcesses, EnsembleKalmanProcess,
-    Unscented, get_error, get_g_mean_final, get_ϕ_final, get_ϕ_mean_final
+using EnsembleKalmanProcesses:
+    EnsembleKalmanProcesses,
+    EnsembleKalmanProcess,
+    Unscented,
+    get_error,
+    get_g_mean_final,
+    get_ϕ_final,
+    get_ϕ_mean_final
 using EnsembleKalmanProcesses.Observations: Observations
-using EnsembleKalmanProcesses.ParameterDistributions: ParameterDistributions,
-    ParameterDistribution, combine_distributions, constrained_gaussian
+using EnsembleKalmanProcesses.ParameterDistributions:
+    ParameterDistributions,
+    ParameterDistribution,
+    combine_distributions,
+    constrained_gaussian
 using LinearAlgebra
 using .Threads
 
@@ -27,41 +36,62 @@ defined by `reduction<:ParameterReduction` and the behaviour of
 
 """
 function FastIsostasy.inversion_problem(
-    fip::FastIsoProblem,
+    fip::FastIsoProblem{T,L,M,C,FP,IP,O},
     config::InversionConfig,
-    data::InversionData,
-    reduction::ParameterReduction,
+    data::InversionData{T,M},
+    reduction::R,
     priors;
     save_stride_iter = 1,
-)
-    T = Float64
-    μ_y = zeros(T, data.countmask * data.nt)
-    w_y = ones(T, data.countmask * data.nt)
-    Σ_y = uncorrelated_obs_covariance(config.scale_obscov, w_y)
+) where {T<:AbstractFloat,L,M<:Matrix{T},C,FP,IP,O,R<:ParameterReduction{T}}
+
+
+    println("Generating noisy data set...")
+    Σ_y = uncorrelated_obs_covariance(
+        config.scale_obscov,
+        ones(T, data.countmask * data.nt),
+    )
     yn = zeros(T, data.countmask * data.nt, config.n_samples)
     y = vcat([yy[data.mask] for yy in data.Y]...)
-    @inbounds for j in 1:config.n_samples
-        yn[:, j] .= y .+ rand(Distributions.MvNormal(μ_y, Σ_y))
+    @inbounds for j in axes(yn, 2)
+        view(yn, :, j) .=
+            y .+ rand(Distributions.MvNormal(zeros(T, data.countmask * data.nt), Σ_y))
     end
+
+    println("Observing data...")
     ynoisy = Observations.Observation(yn, Σ_y, ["Noisy truth"])
 
-    # Init process and arrays
-    process = Unscented(mean(priors), cov(priors);  # Could also use process = Inversion()
-        α_reg = config.α_reg, update_freq = config.update_freq)
+    println("Defining process ...")
+    process = Unscented(
+        mean(priors),
+        cov(priors);  # Could also use process = Inversion()
+        α_reg = config.α_reg,
+        update_freq = config.update_freq,
+    )
     ukiobj = EnsembleKalmanProcess(ynoisy.mean, ynoisy.obs_noise_cov, process)
-    error = fill(T(Inf), config.N_iter)
-    out = [fill(Inf, reduction.nparams) for _ in 1:save_stride_iter:config.N_iter+1]
 
+    println("Initializing arrays...")
+    error = fill(T(Inf), config.N_iter)
+    out = [fill(Inf, reduction.nparams) for _ = 1:save_stride_iter:config.N_iter+1]
     ϕ_tool = get_ϕ_final(priors, ukiobj)       # Params in physical/constrained space
     G_ens = zeros(T, data.countmask * data.nt, size(ϕ_tool, 2))
 
-    return InversionProblem(fip, config, data, reduction, priors, ukiobj, error, out, G_ens)
+    return InversionProblem(
+        fip,
+        config,
+        data,
+        reduction,
+        priors,
+        ukiobj,
+        error,
+        out,
+        G_ens,
+    )
 
 end
 
 function uncorrelated_obs_covariance(scale_obscov, loadscaling_obscov)
     diagvar = scale_obscov ./ (loadscaling_obscov .+ 1)   # 10000.0
-    return convert(Array, Diagonal(diagvar) )
+    return convert(Array, Diagonal(diagvar))
 end
 
 """
@@ -74,24 +104,25 @@ function FastIsostasy.solve!(paraminv::InversionProblem; verbose::Bool = false)
 
     paraminv.out[1] .= get_ϕ_mean_final(paraminv.priors, paraminv.ukiobj)
     ϕ_n = get_ϕ_final(paraminv.priors, paraminv.ukiobj)
-    fips = [deepcopy(paraminv.fip) for _ in 1:nthreads()]
+    fips = [deepcopy(paraminv.fip) for _ = 1:nthreads()]
 
-    for n in 1:paraminv.config.N_iter
+    for n = 1:paraminv.config.N_iter
 
         # Get params in physical/constrained space
         ϕ_n .= get_ϕ_final(paraminv.priors, paraminv.ukiobj)
 
         Threads.@threads for j in axes(ϕ_n, 2)
-            if verbose && (rem(j, 10) == 0)
-                println("Populating ensemble displacement matrix at n = $n, j = $j")
-            end
             id = Threads.threadid()
+            if verbose # && (rem(j, 10) == 0)
+                println("Populating ̂Y at: thread = $id,  n = $n,  j = $j")
+            end
             FastIsostasy.reconstruct!(fips[id], ϕ_n[:, j], paraminv.reduction)
-            paraminv.G_ens[:, j] .= forward_fastiso(fips[id], paraminv.reduction)
+            paraminv.G_ens[:, j] .=
+                forward_fastiso(fips[id], paraminv.reduction, paraminv.data)
         end
 
         if verbose
-            println("Extrema of ensemble displacement matrix: $(extrema(paraminv.G_ens))")
+            println("Extrema of ̂Y: $(extrema(paraminv.G_ens))")
         end
 
         EnsembleKalmanProcesses.update_ensemble!(paraminv.ukiobj, paraminv.G_ens)
@@ -100,21 +131,27 @@ function FastIsostasy.solve!(paraminv::InversionProblem; verbose::Bool = false)
         print_inversion_evolution(paraminv, n, ϕ_n, paraminv.reduction)
     end
 
-    FastIsostasy.reconstruct!(paraminv.fip, get_ϕ_mean_final(paraminv.priors, paraminv.ukiobj),
-        paraminv.reduction)
+    FastIsostasy.reconstruct!(
+        paraminv.fip,
+        get_ϕ_mean_final(paraminv.priors, paraminv.ukiobj),
+        paraminv.reduction,
+    )
 
     return nothing
 end
 
-function FastIsostasy.forward_fastiso(fip::FastIsoProblem, r::ParameterReduction)
+function FastIsostasy.forward_fastiso(
+    fip::FastIsoProblem,
+    r::ParameterReduction,
+    d::InversionData,
+)
     remake!(fip)
     solve!(fip)
-    return FastIsostasy.extract_output(fip, r)
+    return FastIsostasy.extract_output(fip, r, d)
 end
 
 # Actually, this should not be diagonal because there is a correlation between points.
-function correlated_obs_covariance()
-end
+function correlated_obs_covariance() end
 
 """
     extract_inversion()

diff --git a/ext/pseudocode.jl b/ext/pseudocode.jl
diff --git a/src/FastIsostasy.jl b/src/FastIsostasy.jl
@@ -8,9 +8,7 @@ using DynamicalSystemsBase: CoupledODEs, trajectory
 using FastGaussQuadrature: gausslegendre
 using FFTW: fft, ifft, plan_fft!, plan_ifft!, cFFTWPlan
 using Interpolations: Flat, Gridded, Linear, OnGrid, Throw, linear_interpolation
-using JLD2: @load, jldopen, load
 using LinearAlgebra: Diagonal, det, diagm, norm
-using NCDatasets: NCDatasets, NCDataset, defDim, defVar
 using NetCDF
 using NLsolve: mcpsolve
 using OrdinaryDiffEq: ODEProblem, solve, remake, OrdinaryDiffEqAlgorithm
@@ -55,7 +53,7 @@ export years2seconds, seconds2years, m_per_sec2mm_per_yr
 export latlon2stereo, stereo2latlon, lon360tolon180
 export reinit_structs_cpu, meshgrid, kernelcollect
 
-export get_r, gauss_distr, generate_gaussian_field, samesize_conv, samesize_conv!, blur
+export get_r, gauss_distr, generate_gaussian_field, samesize_conv, blur
 export uniform_ice_cylinder, stereo_ice_cylinder, stereo_ice_cap
 export write_out!, remake!, savefip, null, not, cudainfo
 

diff --git a/src/adaptive_ocean.jl b/src/adaptive_ocean.jl
@@ -28,7 +28,7 @@ mutable struct OceanSurfaceChange{T<:AbstractFloat}
 end
 
 function OceanSurfaceChange(; T = Float64, A_ocean_pd = T(3.625e14), z0 = T(0.0))
-    z, A, itp = load_oceansurfacefunction(verbose = false)
+    z, A, itp = load_oceansurfacefunction(T = T, verbose = false)
     A_itp(z) = A_ocean_pd / itp(T(0.0)) * itp(z)
     return OceanSurfaceChange(z0, A_itp(z0), z, A, A_itp, A_ocean_pd, T(1e10))
 end

diff --git a/src/analytic_solutions.jl b/src/analytic_solutions.jl
@@ -11,7 +11,7 @@ by `t`.
 function analytic_solution(r::T, t::T, c, p,
     H0::T, R0::T; n_quad_support=5::Int) where {T<:AbstractFloat}
 
-    support = vcat(1e-14, 10 .^ (-10:0.05:-3), 1.0)     # support vector for quadrature
+    support = T.(vcat(1e-14, 10 .^ (-10:0.05:-3), 1.0))     # support vector for quadrature
     scaling = c.rho_ice * c.g * H0 * R0
     if t == T(Inf)
         equilibrium_integrand_r(kappa) = equilibrium_integrand(kappa, r, c, p, R0)

diff --git a/src/convolution.jl b/src/convolution.jl
@@ -1,44 +1,34 @@
-struct InplaceConvolution{T<:AbstractFloat, C<:ComplexMatrix{T}, FP<:ForwardPlan{T},
-    IP<:InversePlan{T}}
+struct InplaceConvolution{T<:AbstractFloat, M<:KernelMatrix{T}, C<:ComplexMatrix{T},
+    FP<:ForwardPlan{T}, IP<:InversePlan{T}}
     pfft!::FP
     pifft!::IP
-    Afft::C
-    Bfft::C
-    Cfft::C
+    kernel_fft::C
+    out_fft::C
+    out::M
     Nx::Int
     Ny::Int
 end
 
-function InplaceConvolution(A::M, use_cuda::Bool) where {T<:AbstractFloat, M<:KernelMatrix{T}}
-    Nx, Ny = size(A)
-    Afft = zeros(Complex{T}, 2*Nx-1, 2*Ny-1)
-    view(Afft, 1:Nx, 1:Ny) .= A
+function InplaceConvolution(kernel::M, use_cuda::Bool) where {T<:AbstractFloat, M<:KernelMatrix{T}}
+    Nx, Ny = size(kernel)
+    kernel_fft = zeros(Complex{T}, 2*Nx-1, 2*Ny-1)
+    view(kernel_fft, 1:Nx, 1:Ny) .= kernel
     if use_cuda
-        Afft = CuMatrix(Afft)
+        kernel_fft = CuMatrix(kernel_fft)
     end
-    pfft!, pifft! = choose_fft_plans(Afft, use_cuda)
-    pfft! * Afft
-    return InplaceConvolution(pfft!, pifft!, Afft, copy(Afft), copy(Afft), Nx, Ny)
+    pfft!, pifft! = choose_fft_plans(kernel_fft, use_cuda)
+    pfft! * kernel_fft
+    return InplaceConvolution(pfft!, pifft!, kernel_fft, copy(kernel_fft), real.(kernel_fft), Nx, Ny)
 end
 
-function (ipconv::InplaceConvolution{T, C, FP, IP})(B::M) where {T<:AbstractFloat,
+function (ipconv::InplaceConvolution{T, M, C, FP, IP})(B::M) where {T<:AbstractFloat,
     M<:KernelMatrix{T}, C<:ComplexMatrix{T}, FP<:ForwardPlan{T}, IP<:InversePlan{T}}
-    (; pfft!, pifft!, Afft, Bfft, Cfft, Nx, Ny) = ipconv
-    Bfft .= 0
-    view(Bfft, 1:Nx, 1:Ny) .= complex.(B)
-    pfft! * Bfft
-    Cfft .= Afft .* Bfft
-    pifft! * Cfft
-    return real.(Cfft)
-end
-
-function (ipconv::InplaceConvolution{T, C, FP, IP})(out, B) where {T<:AbstractFloat,
-    C<:ComplexMatrix{T}, FP<:ForwardPlan{T}, IP<:InversePlan{T}}
-    (; pfft!, pifft!, Afft, Bfft, Cfft, Nx, Ny) = ipconv
-    Bfft .= 0
-    view(Bfft, 1:Nx, 1:Ny) .= complex.(B)
-    pfft! * Bfft
-    Cfft .= Afft .* Bfft
-    pifft! * Cfft
-    @. out = real(Cfft)
+    (; pfft!, pifft!, kernel_fft, out_fft, out, Nx, Ny) = ipconv
+    out_fft .= 0
+    view(out_fft, 1:Nx, 1:Ny) .= complex.(B)
+    pfft! * out_fft
+    out_fft .*= kernel_fft
+    pifft! * out_fft
+    @. out = real(out_fft)
+    return nothing
 end