Skip to content

Commit

Permalink
add Ledoit Wolf shrinkage estimator for cross-validation covariance
Browse files Browse the repository at this point in the history
different priors

change prior again

add reg into cov-mat for optimization

example tweaks

add reg to scalar method and widen default prior

fix lorenz

fix lorenz

major refactor of RF prior cov

fix lorenz

format

refactored user interface

examples work with new interface

format

resolve merge

format

bug-fix n_hp calc
  • Loading branch information
odunbar committed Jul 28, 2023
1 parent 89724aa commit 1f1394c
Show file tree
Hide file tree
Showing 12 changed files with 777 additions and 202 deletions.
20 changes: 11 additions & 9 deletions examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ using LinearAlgebra
using CalibrateEmulateSample.Emulators
using CalibrateEmulateSample.DataContainers
using CalibrateEmulateSample.ParameterDistributions
using CalibrateEmulateSample.EnsembleKalmanProcesses


case = "scalar"
println("running case $case")

diagonalize_input = true

input_cov_structure = "diagonal"
plot_flag = true
if plot_flag
using Plots
Expand All @@ -41,7 +41,7 @@ if !isdir(output_directory)
end

#problem
n = 100 # number of training points
n = 150 # number of training points
p = 2 # input dim
d = 2 # output dim
X = 2.0 * π * rand(p, n)
Expand All @@ -54,7 +54,7 @@ gx[2, :] = g2x

# Add noise η
μ = zeros(d)
Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d
Σ = 0.01 * [[0.8, 0.1] [0.1, 0.5]] # d x d
noise_samples = rand(MvNormal(μ, Σ), n)
# y = G(x) + η
Y = gx .+ noise_samples
Expand Down Expand Up @@ -125,13 +125,15 @@ if plot_flag
end

# setup random features
n_features = 180
optimizer_options = Dict("n_iteration" => 20, "prior_in_scale" => 0.1, "verbose" => true) #"Max" iterations (may do less)
n_features = 20
optimizer_options =
Dict("n_iteration" => 20, "scheduler" => DataMisfitController(terminate_at = 100.0), "verbose" => true) #"Max" iterations (may do less)


srfi = ScalarRandomFeatureInterface(
n_features,
p,
diagonalize_input = diagonalize_input,
input_cov_structure = input_cov_structure,
optimizer_options = optimizer_options,
)
emulator = Emulator(srfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true)
Expand All @@ -155,7 +157,7 @@ println("end predictions at ", n_pts * n_pts, " points")
#plot predictions
for y_i in 1:d
rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,)
rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 2 x 40000
rf_var = permutedims(reduce(vcat, [x' for x in rf_var_temp]), (2, 1)) # 2 x 40000

mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) # 2 x 40000
if plot_flag
Expand Down
16 changes: 7 additions & 9 deletions examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function main()

# Add noise η
μ = zeros(d)
Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d
Σ = 0.01 * [[0.8, 0.1] [0.1, 0.5]] # d x d
noise_samples = rand(MvNormal(μ, Σ), n)
# y = G(x) + η
Y = gx .+ noise_samples
Expand Down Expand Up @@ -132,21 +132,19 @@ function main()


# setup random features
n_features = 200
n_features = 400
if case ["svd-diag", "svd-nondiag"]
optimizer_options =
Dict("n_iteration" => 20, "prior_in_scale" => 0.01, "prior_out_scale" => 1.0, "verbose" => true) #"Max" iterations (may do less)
optimizer_options = Dict("n_iteration" => 40, "verbose" => true) #"Max" iterations (may do less)
else # without svd
optimizer_options =
Dict("n_iteration" => 20, "prior_in_scale" => 0.01, "prior_out_scale" => 0.01, "verbose" => true) #"Max" iterations (may do less)
optimizer_options = Dict("n_iteration" => 40, "verbose" => true) #"Max" iterations (may do less)
end

if case == "svd-diag"
vrfi = VectorRandomFeatureInterface(
n_features,
p,
d,
diagonalize_output = true,
output_cov_structure = "diagonal",
optimizer_options = optimizer_options,
)
emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true)
Expand All @@ -158,7 +156,7 @@ function main()
n_features,
p,
d,
diagonalize_output = true,
output_cov_structure = "diagonal",
optimizer_options = optimizer_options,
)
emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true, decorrelate = false)
Expand Down Expand Up @@ -187,7 +185,7 @@ function main()
for y_i in 1:d

rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,)
rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 2 x 40000
rf_var = permutedims(reduce(vcat, [x' for x in rf_var_temp]), (2, 1)) # 2 x 40000

mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) # 2 x 40000
if plot_flag
Expand Down
1 change: 1 addition & 0 deletions examples/GCM/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"

[compat]
FiniteDiff = "~2.10"
Expand Down
124 changes: 98 additions & 26 deletions examples/GCM/emulate_sample_script.jl
Original file line number Diff line number Diff line change
@@ -1,38 +1,35 @@
# Script to run Emulation and Sampling on data from GCM

# Import modules
using Distributions # probability distributions and associated functions
using Distributions
using LinearAlgebra
using Plots
using Plots.PlotMeasures # for mm
using StatsPlots
using Random
using JLD2
# CES
using CalibrateEmulateSample.Emulators
using CalibrateEmulateSample.MarkovChainMonteCarlo
using CalibrateEmulateSample.ParameterDistributions
using CalibrateEmulateSample.EnsembleKalmanProcesses
using CalibrateEmulateSample.DataContainers
using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers

@time begin

function main()
rng_seed = 2413798
Random.seed!(rng_seed)

# expname = "vrf-nondiag-logdet_newprior"
#emulator_type ∈ ["GPR","ScalarRFR","VectorRFR-svd-diag","VectorRFR-svd-nondiag", "VectorRFR-nondiag"]
# emulator_type = "GPR"
# expname = "gpr"

# emulator_type = "ScalarRFR"
# expname = "srf"
# CHOOSE YOUR CASE
case = 5

# emulator_type = "VectorRFR-svd-diag"
# expname = "vrf-svd-diag"
emulator_types = ["GPR", "ScalarRFR", "VectorRFR-svd-diag", "VectorRFR-svd-nondiag", "VectorRFR-nondiag"]

# emulator_type = "VectorRFR-svd-nondiag"
# expname = "vrf-svd-nondiag"
expnames = ["gpr", "srf", "vrf-svd-diag", "vrf-svd-nondiag", "vrf-nondiag_standardized"]

emulator_type = "VectorRFR-nondiag"
expname = "vrf-nondiag_standardized"
emulator_type = emulator_types[case]
expname = expnames[case]

# Output figure save directory
example_directory = @__DIR__
Expand All @@ -54,20 +51,18 @@ using CalibrateEmulateSample.DataContainers
obs_noise_cov = load(datafile)["obs_noise_cov"] # 96 x 96

#take only first 400 points
iter_mask = 1:4
#data_mask = 1:32
iter_mask = [1, 2, 4]
data_mask = 1:96
# data_mask= 33:64
# data_mask= 65:96
data_mask = 1:96
#data_mask = 1:96
#data_mask = [5*i for i = 1:Int(floor(96/5))]

inputs = inputs[:, :, iter_mask]
outputs = outputs[:, data_mask, iter_mask]
obs_noise_cov = obs_noise_cov[data_mask, data_mask]
truth = truth[data_mask]

# priorfile = "priors.jld2"
# prior = load(priorfile)["prior"]

# derived quantities
N_ens, input_dim, N_iter = size(inputs)
Expand All @@ -79,7 +74,18 @@ using CalibrateEmulateSample.DataContainers
normalized = true

# setup random features
eki_options_override = Dict("tikhonov" => 0, "multithread" => "ensemble") #faster than tullio multithread for training
eki_options_override = Dict(
"verbose" => true,
"tikhonov" => 0.0,
"scheduler" => DataMisfitController(),
"n_iteration" => 20,
"multithread" => "ensemble",
"train_fraction" => 0.8,
"inflation" => 0.0,
"cov_sample_multiplier" => 0.2,
"n_ensemble" => 50,
"localization" => SEC(1.0),
)


if emulator_type == "VectorRFR-svd-nondiag" || emulator_type == "VectorRFR-nondiag"
Expand All @@ -89,23 +95,22 @@ using CalibrateEmulateSample.DataContainers
println("Running Vector RF model - without SVD and assuming non-diagonal variance ")
end

n_features = 80 * Int(floor(5 * sqrt(N_ens * N_iter)))
n_features = 20 * Int(floor(5 * sqrt(N_ens * N_iter))) #80 *
println("build RF with $(N_ens*N_iter) training points and $(n_features) random features.")


mlt = VectorRandomFeatureInterface(n_features, input_dim, output_dim, optimizer_options = eki_options_override)

elseif emulator_type == "VectorRFR-svd-diag"

println("Running Vector RF model - using SVD and assuming diagonal variance")
n_features = 20 * Int(floor(5 * sqrt(N_ens * N_iter)))
n_features = 5 * Int(floor(5 * sqrt(N_ens * N_iter))) #20 *
println("build RF with $(N_ens*N_iter) training points and $(n_features) random features.")

mlt = VectorRandomFeatureInterface(
n_features,
input_dim,
output_dim,
diagonalize_output = true,
output_cov_structure = "diagonal",
optimizer_options = eki_options_override,
)

Expand Down Expand Up @@ -231,8 +236,75 @@ using CalibrateEmulateSample.DataContainers
end
end

end
end # plots

### MCMC

priorfile = "priors.jld2"
prior = load(priorfile)["prior"]

##
### Sample: Markov Chain Monte Carlo
###
# initial values
u0 = vec(mean(get_inputs(input_output_pairs), dims = 2))
println("initial parameters: ", u0)

# First let's run a short chain to determine a good step size
mcmc = MCMCWrapper(RWMHSampling(), truth, prior, emulator; init_params = u0)
new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0)

# Now begin the actual MCMC
println("Begin MCMC - with step size ", new_step)
chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000)
posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain)

post_mean = mean(posterior)
post_cov = cov(posterior)
println("post_mean")
println(post_mean)
println("post_cov")
println(post_cov)
println("D util")
println(det(inv(post_cov)))
println(" ")


# plot posterior

truth_params = [log(0.7 / 0.3) log(7200)]' # parameter value (at truth) - unconstrained

# Save data
save(
joinpath(data_save_directory, expname * "posterior.jld2"),
"posterior",
posterior,
"input_output_pairs",
input_output_pairs,
"truth_params",
truth_params,
)


constrained_truth_params = transform_unconstrained_to_constrained(posterior, truth_params)
param_names = get_name(posterior)

posterior_samples = vcat([get_distribution(posterior)[name] for name in param_names]...) #samples are columns
constrained_posterior_samples =
mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1)

gr(dpi = 300, size = (300, 300))
p = cornerplot(permutedims(constrained_posterior_samples, (2, 1)), label = param_names, compact = true)
plot!(p.subplots[1], [constrained_truth_params[1]], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash) # vline on top histogram
plot!(p.subplots[3], [constrained_truth_params[2]], seriestype = "hline", w = 1.5, c = :steelblue, ls = :dash) # hline on right histogram
plot!(p.subplots[2], [constrained_truth_params[1]], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash) # v & h line on scatter.
plot!(p.subplots[2], [constrained_truth_params[2]], seriestype = "hline", w = 1.5, c = :steelblue, ls = :dash)
figpath = joinpath(figure_save_directory, "plot_posterior_" * expname * ".png")
savefig(figpath)


end # main

@time begin
main()
end # for @time
2 changes: 1 addition & 1 deletion examples/GCM/sbatch_emulate_sample_jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#Submit this script with: sbatch thefilename
#SBATCH --time=6:00:00 # walltime
#SBATCH --time=12:00:00 # walltime
#SBATCH --ntasks-per-node=1 # number of processor cores (i.e. tasks)
#SBATCH --cpus-per-task=28
#SBATCH --mem-per-cpu=6000
Expand Down
5 changes: 3 additions & 2 deletions examples/Lorenz/GModel_common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ end
# tend: End of simulation Float64(1), nstep:
function lorenz_solve(settings::LSettings, params::LParams)
# Initialize
nstep = Int32(ceil((settings.tend - settings.t_start) / settings.dt))
# nstep = Int32(ceil((settings.tend - settings.t_start) / settings.dt))
nstep = Int32(ceil(settings.tend / settings.dt))
xn = zeros(Float64, settings.N, nstep)
t = zeros(Float64, nstep)
# Initial perturbation
Expand All @@ -137,7 +138,7 @@ function lorenz_solve(settings::LSettings, params::LParams)
Xbuffer = zeros(4, settings.N)
# March forward in time
for j in 1:nstep
t[j] = settings.t_start + settings.dt * j
t[j] = settings.dt * j
#use view to update a slice
# RK4! modifies first and last arguments
if settings.dynamics == 1
Expand Down
Loading

0 comments on commit 1f1394c

Please sign in to comment.