diff --git a/src/GPEmulator.jl b/src/GPEmulator.jl index 6ae78e7b0..992d3489b 100644 --- a/src/GPEmulator.jl +++ b/src/GPEmulator.jl @@ -50,19 +50,26 @@ models. # Fields $(DocStringExtensions.FIELDS) """ -struct GPObj{FT<:AbstractFloat,GPM} - "training points/parameters; N_parameters x N_samples" +struct GPObj{FT<:AbstractFloat, GPM} + "training inputs/parameters; N_samples x N_parameters" inputs::Array{FT, 2} - "output data (`outputs = G(inputs)`); N_parameters x N_samples" + "training data/targets; N_samples x N_data" data::Array{FT, 2} - "the Gaussian Process Regression model(s) that are fitted to the given input-output pairs" + "mean of input; 1 x N_parameters" + input_mean::Array{FT, 2} + "square root of the inverse of the input covariance matrix; N_parameters x N_parameters" + sqrt_inv_input_cov::Array{FT, 2} + "the Gaussian Process (GP) Regression model(s) that are fitted to the given input-data pairs" models::Vector - "Data prediction type" - prediction_type::Union{Nothing,PredictionType} + "whether to fit GP models on normalized inputs ((inputs - input_mean) * sqrt_inv_input_cov)" + normalized::Bool + "prediction type (`y` to predict the data, `f` to predict the latent function)" + prediction_type::Union{Nothing, PredictionType} end + """ - GPObj(inputs, data, package::GPJL, GPkernel::Union{K, KPy, Nothing}=nothing; prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject} + GPObj(inputs, data, package::GPJL; GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true, prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject} Inputs and data of size N_samples x N_parameters (both arrays will be transposed in the construction of the GPObj) @@ -70,12 +77,20 @@ Inputs and data of size N_samples x N_parameters (both arrays will be transposed kernel is used. The default kernel is the sum of a Squared Exponential kernel and white noise. """ -function GPObj(inputs, data, package::GPJL, GPkernel::Union{K, KPy, Nothing}=nothing; prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject} +function GPObj(inputs, data, package::GPJL; GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true, prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject} FT = eltype(data) models = Any[] - outputs = convert(Array{FT}, data') - inputs = convert(Array{FT}, inputs') + # Make sure inputs and data are arrays of type FT + inputs = convert(Array{FT}, inputs) + data = convert(Array{FT}, data) + + input_mean = reshape(mean(inputs, dims=1), 1, :) + sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1)))) + if normalized + inputs = (inputs .- input_mean) * sqrt_inv_input_cov + end + # Use a default kernel unless a kernel was supplied to GPObj if GPkernel==nothing # Construct kernel: @@ -91,36 +106,49 @@ function GPObj(inputs, data, package::GPJL, GPkernel::Union{K, KPy, Nothing}=not GPkernel = rbf + white end - for i in 1:size(outputs, 1) + for i in 1:size(data, 2) # Make a copy of GPkernel (because the kernel gets altered in # every iteration) GPkernel_i = deepcopy(GPkernel) - # inputs: N_param x N_samples - # outputs: N_data x N_samples + # inputs: N_samples x N_parameters + # data: N_samples x N_data logstd_obs_noise = log(sqrt(0.5)) # log standard dev of obs noise # Zero mean function kmean = MeanZero() - m = GPE(inputs, outputs[i, :], kmean, GPkernel_i, logstd_obs_noise) + m = GPE(inputs', dropdims(data[:, i]', dims=1), kmean, GPkernel_i, + logstd_obs_noise) optimize!(m, noise=false) push!(models, m) end - return GPObj{FT, typeof(package)}(inputs, outputs, models, prediction_type) + return GPObj{FT, typeof(package)}(inputs, data, input_mean, + sqrt_inv_input_cov, models, normalized, + prediction_type) end + """ - GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=nothing) where {K<:Kernel, KPy<:PyObject} + GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true) where {K<:Kernel, KPy<:PyObject} Inputs and data of size N_samples x N_parameters (both arrays will be transposed in the construction of the GPObj) - - `GPkernel` - GaussianProcesses kernel object. If not supplied, a default - kernel is used. The default kernel is the sum of a Squared - Exponential kernel and white noise. + - `GPkernel` - GaussianProcesses or ScikitLearn kernel object. If not supplied, + a default kernel is used. The default kernel is the sum of a + Squared Exponential kernel and white noise. """ -function GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=nothing) where {K<:Kernel, KPy<:PyObject} +function GPObj(inputs, data, package::SKLJL; GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true) where {K<:Kernel, KPy<:PyObject} FT = eltype(data) models = Any[] - outputs = convert(Array{FT}, data') + + # Make sure inputs and data are arrays of type FT inputs = convert(Array{FT}, inputs) + data = convert(Array{FT}, data) + + input_mean = reshape(mean(inputs, dims=1), 1, :) + sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1)))) + if normalized + inputs = (inputs .- input_mean) * sqrt_inv_input_cov + end + if GPkernel==nothing const_value = 1.0 var_kern = ConstantKernel(constant_value=const_value, @@ -132,12 +160,11 @@ function GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=no noise_level_bounds=(1e-05, 10.0)) GPkernel = var_kern * rbf + white end - for i in 1:size(outputs,1) - out = reshape(outputs[i,:], (size(outputs, 2), 1)) + for i in 1:size(data, 2) m = GaussianProcessRegressor(kernel=GPkernel, n_restarts_optimizer=10, alpha=0.0, normalize_y=true) - ScikitLearn.fit!(m, inputs, out) + ScikitLearn.fit!(m, inputs, data[:, i]) if i==1 println(m.kernel.hyperparameters) print("Completed training of: ") @@ -145,35 +172,57 @@ function GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=no print(i,", ") push!(models, m) end - return GPObj{FT, typeof(package)}(inputs, outputs, models, nothing) + return GPObj{FT, typeof(package)}(inputs, data, input_mean, sqrt_inv_input_cov, + models, normalized, YType()) end -predict(gp::GPObj{FT,GPJL}, new_inputs::Array{FT}) where {FT} = predict(gp, new_inputs, gp.prediction_type) +""" + predict(gp::GPObj{FT,GPJL}, new_inputs::Array{FT}) where {FT} = predict(gp, new_inputs, gp.prediction_type) + +Evaluate the GP model(s) at new inputs. + - `gp` - a GPObj + - `new_inputs` - inputs for which GP model(s) is/are evaluated; N_new_inputs x N_parameters + +Note: If gp.normalized == true, the new inputs are normalized prior to the prediction +""" +predict(gp::GPObj{FT, GPJL}, new_inputs::Array{FT}) where {FT} = predict(gp, new_inputs, gp.prediction_type) -function predict(gp::GPObj{FT,GPJL}, +function predict(gp::GPObj{FT, GPJL}, new_inputs::Array{FT}, ::FType) where {FT} + if gp.normalized + new_inputs = (new_inputs .- gp.input_mean) * gp.sqrt_inv_input_cov + end M = length(gp.models) μσ2 = [predict_f(gp.models[i], new_inputs) for i in 1:M] - return first.(μσ2), last.(μσ2) + # Return mean(s) and standard deviation(s) + return vcat(first.(μσ2)...), vcat(last.(μσ2)...) end -function predict(gp::GPObj{FT,GPJL}, + +function predict(gp::GPObj{FT, GPJL}, new_inputs::Array{FT}, ::YType) where {FT} + if gp.normalized + new_inputs = (new_inputs .- gp.input_mean) * gp.sqrt_inv_input_cov + end M = length(gp.models) # predicts columns of inputs so must be transposed new_inputs = convert(Array{FT}, new_inputs') μσ2 = [predict_y(gp.models[i], new_inputs) for i in 1:M] - return first.(μσ2), last.(μσ2) + # Return mean(s) and standard deviation(s) + return vcat(first.(μσ2)...), vcat(last.(μσ2)...) end -function predict(gp::GPObj{FT,SKLJL}, +function predict(gp::GPObj{FT, SKLJL}, new_inputs::Array{FT}) where {FT} + if gp.normalized + new_inputs = (new_inputs .- gp.input_mean) * gp.sqrt_inv_input_cov + end M = length(gp.models) μσ = [gp.models[i].predict(new_inputs, return_std=true) for i in 1:M] - mean = first.(μσ) - return first.(μσ), last.(μσ).^2 + # Return mean(s) and standard deviation(s) + return vcat(first.(μσ)...), vcat(last.(μσ)...).^2 end end # module diff --git a/src/MCMC.jl b/src/MCMC.jl index 32f8c884f..b1099a3c8 100644 --- a/src/MCMC.jl +++ b/src/MCMC.jl @@ -54,15 +54,6 @@ struct MCMCObj{FT<:AbstractFloat, IT<:Int} accept::Array{IT} "MCMC algorithm to use - currently implemented: 'rmw' (random walk Metropolis)" algtype::String - "whether or not the GP Emulator has been trained on - standardized (i.e., z scores of) parameters - if true, - the evaluation of the prior at the current parameter value - is done after transforming the parameters back to their - original value (i.e., multiply by standard deviation and - add mean). - TODO: Hopefully FreeParams will be able to deal with - standardized parameters in a more elegant way." - standardized::Bool end """ @@ -74,7 +65,6 @@ end max_iter::IT, algtype::String, burnin::IT, - standardized::Bool) where {FT<:AbstractFloat, IT<:Int} where max_iter is the number of MCMC steps to perform (e.g., 100_000) @@ -86,8 +76,7 @@ function MCMCObj(obs_sample::Vector{FT}, param_init::Vector{FT}, max_iter::IT, algtype::String, - burnin::IT, - standardized::Bool) where {FT<:AbstractFloat, IT<:Int} + burnin::IT) where {FT<:AbstractFloat, IT<:Int} # first row is param_init posterior = zeros(max_iter + 1, length(param_init)) @@ -102,11 +91,10 @@ function MCMCObj(obs_sample::Vector{FT}, sys.exit() end MCMCObj{FT,IT}(obs_sample, obs_cov, obs_covinv, priors, [step], burnin, - param, posterior, log_posterior, iter, accept, algtype, - standardized) - + param, posterior, log_posterior, iter, accept, algtype) end + function reset_with_step!(mcmc::MCMCObj{FT}, step::FT) where {FT} # reset to beginning with new stepsize mcmc.step[1] = step @@ -174,15 +162,8 @@ function log_prior(mcmc::MCMCObj{FT}) where {FT} # Assume independent priors for each parameter priors = mcmc.prior for (param, prior_dist) in zip(mcmc.param, priors) - if mcmc.standardized - param_std = std(prior_dist) - param_mean = mean(prior_dist) - # get density at current parameter value - log_rho[1] += logpdf(prior_dist, param*param_std + param_mean) - else - # get density at current parameter value - log_rho[1] += logpdf(prior_dist, param) - end + # get density at current parameter value + log_rho[1] += logpdf(prior_dist, param) end return log_rho[1] @@ -229,10 +210,7 @@ function find_mcmc_step!(mcmc_test::MCMCObj{FT}, gpobj::GPObj{FT}) where {FT} while mcmc_accept == false param = convert(Array{FT, 2}, mcmc_test.param') - # test predictions param' is 1xN_params gp_pred, gp_predvar = predict(gpobj, param) - gp_pred = cat(gp_pred..., dims=2) - gp_predvar = cat(gp_predvar..., dims=2) mcmc_sample!(mcmc_test, vec(gp_pred), vec(gp_predvar)) it += 1 diff --git a/test/Cloudy/runtests.jl b/test/Cloudy/runtests.jl index 98deaff14..fd2246eb5 100644 --- a/test/Cloudy/runtests.jl +++ b/test/Cloudy/runtests.jl @@ -35,9 +35,9 @@ N0_true = 360. θ_true = 4.1 k_true = 2.0 u_true = [N0_true, θ_true, k_true] -priors = [Distributions.Normal(300., 50.), # prior on N0 - Distributions.Normal(6.0, 2.0), # prior on θ - Distributions.Normal(3.0, 1.5)] # prior on k +priors = [Distributions.Uniform(100.0, 600.0), # prior on N0 + Distributions.Uniform(0.0, 12.0), # prior on θ + Distributions.Uniform(0.0, 6.0)] # prior on k # We assume that the true particle mass distribution is a Gamma distribution # with parameters N0_true, θ_true, k_true. Note that dist_true has to be a @@ -106,8 +106,7 @@ for i in 1:N_iter g_settings, PDistributions.update_params, PDistributions.moment, - Cloudy.Sources.get_int_coalescence - ) + Cloudy.Sources.get_int_coalescence) EKI.update_ensemble!(ekiobj, g_ens) end @@ -123,6 +122,7 @@ println(mean(exp_transform(ekiobj.u[end]), dims=1)) ### gppackage = GPJL() # use the GaussianProcesses.jl package +pred_type = YType() # predict the data, not the latent function # Construct kernel: # Sum kernel consisting of Matern 5/2 ARD kernel, a Squared Exponential Iso @@ -140,26 +140,15 @@ GPkernel = kern1 + kern2 + white log_u_tp, g_tp = Utilities.extract_GP_tp(ekiobj, 5) u_tp = exp_transform(log_u_tp) -# Standardize the parameters -param_mean = [mean(prior) for prior in priors] -param_std = [std(prior) for prior in priors] -u_tp_zscore = Utilities.orig2zscore(u_tp, param_mean, param_std) -# Standardize the data -data_mean = vec(mean(g_tp, dims=1)) -data_std = vec(std(g_tp, dims=1)) -g_tp_zscore = Utilities.orig2zscore(g_tp, data_mean, data_std) - # Fit a Gaussian Process regression to the training points -gpobj = GPEmulator.GPObj(u_tp_zscore, g_tp_zscore, gppackage, GPkernel) +gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel, + normalized=true, prediction_type=pred_type) # Check how well the Gaussian Process regression predicts on the # (standardized) true parameters -u_true_zscore = Utilities.orig2zscore(u_true, param_mean, param_std) -y_mean, y_var = GPEmulator.predict(gpobj, reshape(u_true_zscore, 1, :)) -y_mean = cat(y_mean..., dims=2) -y_var = cat(y_var..., dims=2) +y_mean, y_var = GPEmulator.predict(gpobj, reshape(u_true, 1, :)) println("GP prediction on true parameters: ") -println(vec(y_mean) .* data_std + data_mean) +println(y_mean) println("true data: ") println(truth.mean) @@ -176,15 +165,9 @@ mcmc_alg = "rwm" # random walk Metropolis burnin = 0 step = 0.1 # first guess max_iter = 5000 -standardized = true # we are using z scores yt_sample = Utilities.get_obs_sample(truth) -yt_sample_zscore = Utilities.orig2zscore(yt_sample, data_mean, data_std) -# Normalize covariance of obs noise to determinant 1 -truth_cov_norm = (1.0/(det(truth.cov)))^(1.0/n_param) * truth.cov - -mcmc_test = MCMC.MCMCObj(yt_sample_zscore, truth_cov_norm, - priors, step, u0, - max_iter, mcmc_alg, burnin, standardized) +mcmc_test = MCMC.MCMCObj(yt_sample, truth.cov, priors, step, u0, max_iter, + mcmc_alg, burnin) new_step = MCMC.find_mcmc_step!(mcmc_test, gpobj) # Now begin the actual MCMC @@ -192,10 +175,9 @@ println("Begin MCMC - with step size ", new_step) u0 = vec(mean(u_tp, dims=1)) # reset parameters burnin = 1000 -max_iter = 100000 -mcmc = MCMC.MCMCObj(yt_sample_zscore, truth_cov_norm, priors, - new_step, u0, max_iter, mcmc_alg, burnin, - standardized) +max_iter = 500_000 +mcmc = MCMC.MCMCObj(yt_sample, truth.cov, priors, new_step, u0, max_iter, + mcmc_alg, burnin) MCMC.sample_posterior!(mcmc, gpobj, max_iter) posterior = MCMC.get_posterior(mcmc) @@ -211,7 +193,7 @@ println("post_cov") for idx in 1:n_param if idx == 1 param = "N0" - xs = collect(100:1:500) + xs = collect(100:1:600) elseif idx == 2 param = "Theta" xs = collect(0:0.01:12.0) @@ -223,8 +205,8 @@ for idx in 1:n_param end label = "true " * param - histogram(posterior[:, idx] .* param_std[idx] .+ param_mean[idx], - bins=100, normed=true, fill=:slategray, lab="posterior") + histogram(posterior[:, idx], bins=100, normed=true, fill=:slategray, + lab="posterior") plot!(xs, mcmc.prior[idx], w=2.6, color=:blue, lab="prior") plot!([u_true[idx]], seriestype="vline", w=2.6, lab=label)