Skip to content

Commit

Permalink
Merge #42
Browse files Browse the repository at this point in the history
42: Normalize GP input r=charleskawczynski a=bielim

Normalize inputs to the GP training and prediction. 

Normalization is done by centering the inputs (i.e., subtracting their mean) and multiplying them by the square root of the inverse of the input covariance.
Whether or not the GP is trained on normalized inputs can be specified with an optional input argument to GPObj ("normalized"), which defaults to true. If the GP has been trained on normalized
inputs, the "predict" function automatically applies the same same normalization when predicting on new inputs.

The idea is that normalization will make the GP hyperparameters more independent of the problem - e.g., the length scales used for the default kernels can be assumed to be reasonable defaults for many problems.

Co-authored-by: Charles Kawczynski <kawczynski.charles@gmail.com>
  • Loading branch information
bors[bot] and charleskawczynski authored Mar 2, 2020
2 parents f85852c + 0d22d17 commit 35d3feb
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 95 deletions.
115 changes: 82 additions & 33 deletions src/GPEmulator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,32 +50,47 @@ models.
# Fields
$(DocStringExtensions.FIELDS)
"""
struct GPObj{FT<:AbstractFloat,GPM}
"training points/parameters; N_parameters x N_samples"
struct GPObj{FT<:AbstractFloat, GPM}
"training inputs/parameters; N_samples x N_parameters"
inputs::Array{FT, 2}
"output data (`outputs = G(inputs)`); N_parameters x N_samples"
"training data/targets; N_samples x N_data"
data::Array{FT, 2}
"the Gaussian Process Regression model(s) that are fitted to the given input-output pairs"
"mean of input; 1 x N_parameters"
input_mean::Array{FT, 2}
"square root of the inverse of the input covariance matrix; N_parameters x N_parameters"
sqrt_inv_input_cov::Array{FT, 2}
"the Gaussian Process (GP) Regression model(s) that are fitted to the given input-data pairs"
models::Vector
"Data prediction type"
prediction_type::Union{Nothing,PredictionType}
"whether to fit GP models on normalized inputs ((inputs - input_mean) * sqrt_inv_input_cov)"
normalized::Bool
"prediction type (`y` to predict the data, `f` to predict the latent function)"
prediction_type::Union{Nothing, PredictionType}
end


"""
GPObj(inputs, data, package::GPJL, GPkernel::Union{K, KPy, Nothing}=nothing; prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject}
GPObj(inputs, data, package::GPJL; GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true, prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject}
Inputs and data of size N_samples x N_parameters (both arrays will be transposed in the construction of the GPObj)
- `GPkernel` - GaussianProcesses kernel object. If not supplied, a default
kernel is used. The default kernel is the sum of a Squared
Exponential kernel and white noise.
"""
function GPObj(inputs, data, package::GPJL, GPkernel::Union{K, KPy, Nothing}=nothing; prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject}
function GPObj(inputs, data, package::GPJL; GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true, prediction_type::PredictionType=YType()) where {K<:Kernel, KPy<:PyObject}
FT = eltype(data)
models = Any[]
outputs = convert(Array{FT}, data')
inputs = convert(Array{FT}, inputs')

# Make sure inputs and data are arrays of type FT
inputs = convert(Array{FT}, inputs)
data = convert(Array{FT}, data)

input_mean = reshape(mean(inputs, dims=1), 1, :)
sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1))))
if normalized
inputs = (inputs .- input_mean) * sqrt_inv_input_cov
end

# Use a default kernel unless a kernel was supplied to GPObj
if GPkernel==nothing
# Construct kernel:
Expand All @@ -91,36 +106,49 @@ function GPObj(inputs, data, package::GPJL, GPkernel::Union{K, KPy, Nothing}=not
GPkernel = rbf + white
end

for i in 1:size(outputs, 1)
for i in 1:size(data, 2)
# Make a copy of GPkernel (because the kernel gets altered in
# every iteration)
GPkernel_i = deepcopy(GPkernel)
# inputs: N_param x N_samples
# outputs: N_data x N_samples
# inputs: N_samples x N_parameters
# data: N_samples x N_data
logstd_obs_noise = log(sqrt(0.5)) # log standard dev of obs noise
# Zero mean function
kmean = MeanZero()
m = GPE(inputs, outputs[i, :], kmean, GPkernel_i, logstd_obs_noise)
m = GPE(inputs', dropdims(data[:, i]', dims=1), kmean, GPkernel_i,
logstd_obs_noise)
optimize!(m, noise=false)
push!(models, m)
end
return GPObj{FT, typeof(package)}(inputs, outputs, models, prediction_type)
return GPObj{FT, typeof(package)}(inputs, data, input_mean,
sqrt_inv_input_cov, models, normalized,
prediction_type)
end


"""
GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=nothing) where {K<:Kernel, KPy<:PyObject}
GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true) where {K<:Kernel, KPy<:PyObject}
Inputs and data of size N_samples x N_parameters (both arrays will be transposed in the construction of the GPObj)
- `GPkernel` - GaussianProcesses kernel object. If not supplied, a default
kernel is used. The default kernel is the sum of a Squared
Exponential kernel and white noise.
- `GPkernel` - GaussianProcesses or ScikitLearn kernel object. If not supplied,
a default kernel is used. The default kernel is the sum of a
Squared Exponential kernel and white noise.
"""
function GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=nothing) where {K<:Kernel, KPy<:PyObject}
function GPObj(inputs, data, package::SKLJL; GPkernel::Union{K, KPy, Nothing}=nothing, normalized::Bool=true) where {K<:Kernel, KPy<:PyObject}
FT = eltype(data)
models = Any[]
outputs = convert(Array{FT}, data')

# Make sure inputs and data are arrays of type FT
inputs = convert(Array{FT}, inputs)
data = convert(Array{FT}, data)

input_mean = reshape(mean(inputs, dims=1), 1, :)
sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1))))
if normalized
inputs = (inputs .- input_mean) * sqrt_inv_input_cov
end

if GPkernel==nothing
const_value = 1.0
var_kern = ConstantKernel(constant_value=const_value,
Expand All @@ -132,48 +160,69 @@ function GPObj(inputs, data, package::SKLJL, GPkernel::Union{K, KPy, Nothing}=no
noise_level_bounds=(1e-05, 10.0))
GPkernel = var_kern * rbf + white
end
for i in 1:size(outputs,1)
out = reshape(outputs[i,:], (size(outputs, 2), 1))
for i in 1:size(data, 2)
m = GaussianProcessRegressor(kernel=GPkernel,
n_restarts_optimizer=10,
alpha=0.0, normalize_y=true)
ScikitLearn.fit!(m, inputs, out)
ScikitLearn.fit!(m, inputs, data[:, i])
if i==1
println(m.kernel.hyperparameters)
print("Completed training of: ")
end
print(i,", ")
push!(models, m)
end
return GPObj{FT, typeof(package)}(inputs, outputs, models, nothing)
return GPObj{FT, typeof(package)}(inputs, data, input_mean, sqrt_inv_input_cov,
models, normalized, YType())
end


predict(gp::GPObj{FT,GPJL}, new_inputs::Array{FT}) where {FT} = predict(gp, new_inputs, gp.prediction_type)
"""
predict(gp::GPObj{FT,GPJL}, new_inputs::Array{FT}) where {FT} = predict(gp, new_inputs, gp.prediction_type)
Evaluate the GP model(s) at new inputs.
- `gp` - a GPObj
- `new_inputs` - inputs for which GP model(s) is/are evaluated; N_new_inputs x N_parameters
Note: If gp.normalized == true, the new inputs are normalized prior to the prediction
"""
predict(gp::GPObj{FT, GPJL}, new_inputs::Array{FT}) where {FT} = predict(gp, new_inputs, gp.prediction_type)

function predict(gp::GPObj{FT,GPJL},
function predict(gp::GPObj{FT, GPJL},
new_inputs::Array{FT},
::FType) where {FT}
if gp.normalized
new_inputs = (new_inputs .- gp.input_mean) * gp.sqrt_inv_input_cov
end
M = length(gp.models)
μσ2 = [predict_f(gp.models[i], new_inputs) for i in 1:M]
return first.(μσ2), last.(μσ2)
# Return mean(s) and standard deviation(s)
return vcat(first.(μσ2)...), vcat(last.(μσ2)...)
end
function predict(gp::GPObj{FT,GPJL},

function predict(gp::GPObj{FT, GPJL},
new_inputs::Array{FT},
::YType) where {FT}
if gp.normalized
new_inputs = (new_inputs .- gp.input_mean) * gp.sqrt_inv_input_cov
end
M = length(gp.models)
# predicts columns of inputs so must be transposed
new_inputs = convert(Array{FT}, new_inputs')
μσ2 = [predict_y(gp.models[i], new_inputs) for i in 1:M]
return first.(μσ2), last.(μσ2)
# Return mean(s) and standard deviation(s)
return vcat(first.(μσ2)...), vcat(last.(μσ2)...)
end

function predict(gp::GPObj{FT,SKLJL},
function predict(gp::GPObj{FT, SKLJL},
new_inputs::Array{FT}) where {FT}
if gp.normalized
new_inputs = (new_inputs .- gp.input_mean) * gp.sqrt_inv_input_cov
end
M = length(gp.models)
μσ = [gp.models[i].predict(new_inputs, return_std=true) for i in 1:M]
mean = first.(μσ)
return first.(μσ), last.(μσ).^2
# Return mean(s) and standard deviation(s)
return vcat(first.(μσ)...), vcat(last.(μσ)...).^2
end

end # module
32 changes: 5 additions & 27 deletions src/MCMC.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,6 @@ struct MCMCObj{FT<:AbstractFloat, IT<:Int}
accept::Array{IT}
"MCMC algorithm to use - currently implemented: 'rmw' (random walk Metropolis)"
algtype::String
"whether or not the GP Emulator has been trained on
standardized (i.e., z scores of) parameters - if true,
the evaluation of the prior at the current parameter value
is done after transforming the parameters back to their
original value (i.e., multiply by standard deviation and
add mean).
TODO: Hopefully FreeParams will be able to deal with
standardized parameters in a more elegant way."
standardized::Bool
end

"""
Expand All @@ -74,7 +65,6 @@ end
max_iter::IT,
algtype::String,
burnin::IT,
standardized::Bool) where {FT<:AbstractFloat, IT<:Int}
where max_iter is the number of MCMC steps to perform (e.g., 100_000)
Expand All @@ -86,8 +76,7 @@ function MCMCObj(obs_sample::Vector{FT},
param_init::Vector{FT},
max_iter::IT,
algtype::String,
burnin::IT,
standardized::Bool) where {FT<:AbstractFloat, IT<:Int}
burnin::IT) where {FT<:AbstractFloat, IT<:Int}

# first row is param_init
posterior = zeros(max_iter + 1, length(param_init))
Expand All @@ -102,11 +91,10 @@ function MCMCObj(obs_sample::Vector{FT},
sys.exit()
end
MCMCObj{FT,IT}(obs_sample, obs_cov, obs_covinv, priors, [step], burnin,
param, posterior, log_posterior, iter, accept, algtype,
standardized)

param, posterior, log_posterior, iter, accept, algtype)
end


function reset_with_step!(mcmc::MCMCObj{FT}, step::FT) where {FT}
# reset to beginning with new stepsize
mcmc.step[1] = step
Expand Down Expand Up @@ -174,15 +162,8 @@ function log_prior(mcmc::MCMCObj{FT}) where {FT}
# Assume independent priors for each parameter
priors = mcmc.prior
for (param, prior_dist) in zip(mcmc.param, priors)
if mcmc.standardized
param_std = std(prior_dist)
param_mean = mean(prior_dist)
# get density at current parameter value
log_rho[1] += logpdf(prior_dist, param*param_std + param_mean)
else
# get density at current parameter value
log_rho[1] += logpdf(prior_dist, param)
end
# get density at current parameter value
log_rho[1] += logpdf(prior_dist, param)
end

return log_rho[1]
Expand Down Expand Up @@ -229,10 +210,7 @@ function find_mcmc_step!(mcmc_test::MCMCObj{FT}, gpobj::GPObj{FT}) where {FT}
while mcmc_accept == false

param = convert(Array{FT, 2}, mcmc_test.param')
# test predictions param' is 1xN_params
gp_pred, gp_predvar = predict(gpobj, param)
gp_pred = cat(gp_pred..., dims=2)
gp_predvar = cat(gp_predvar..., dims=2)

mcmc_sample!(mcmc_test, vec(gp_pred), vec(gp_predvar))
it += 1
Expand Down
52 changes: 17 additions & 35 deletions test/Cloudy/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ N0_true = 360.
θ_true = 4.1
k_true = 2.0
u_true = [N0_true, θ_true, k_true]
priors = [Distributions.Normal(300., 50.), # prior on N0
Distributions.Normal(6.0, 2.0), # prior on θ
Distributions.Normal(3.0, 1.5)] # prior on k
priors = [Distributions.Uniform(100.0, 600.0), # prior on N0
Distributions.Uniform(0.0, 12.0), # prior on θ
Distributions.Uniform(0.0, 6.0)] # prior on k

# We assume that the true particle mass distribution is a Gamma distribution
# with parameters N0_true, θ_true, k_true. Note that dist_true has to be a
Expand Down Expand Up @@ -106,8 +106,7 @@ for i in 1:N_iter
g_settings,
PDistributions.update_params,
PDistributions.moment,
Cloudy.Sources.get_int_coalescence
)
Cloudy.Sources.get_int_coalescence)
EKI.update_ensemble!(ekiobj, g_ens)
end

Expand All @@ -123,6 +122,7 @@ println(mean(exp_transform(ekiobj.u[end]), dims=1))
###

gppackage = GPJL() # use the GaussianProcesses.jl package
pred_type = YType() # predict the data, not the latent function

# Construct kernel:
# Sum kernel consisting of Matern 5/2 ARD kernel, a Squared Exponential Iso
Expand All @@ -140,26 +140,15 @@ GPkernel = kern1 + kern2 + white
log_u_tp, g_tp = Utilities.extract_GP_tp(ekiobj, 5)
u_tp = exp_transform(log_u_tp)

# Standardize the parameters
param_mean = [mean(prior) for prior in priors]
param_std = [std(prior) for prior in priors]
u_tp_zscore = Utilities.orig2zscore(u_tp, param_mean, param_std)
# Standardize the data
data_mean = vec(mean(g_tp, dims=1))
data_std = vec(std(g_tp, dims=1))
g_tp_zscore = Utilities.orig2zscore(g_tp, data_mean, data_std)

# Fit a Gaussian Process regression to the training points
gpobj = GPEmulator.GPObj(u_tp_zscore, g_tp_zscore, gppackage, GPkernel)
gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel,
normalized=true, prediction_type=pred_type)

# Check how well the Gaussian Process regression predicts on the
# (standardized) true parameters
u_true_zscore = Utilities.orig2zscore(u_true, param_mean, param_std)
y_mean, y_var = GPEmulator.predict(gpobj, reshape(u_true_zscore, 1, :))
y_mean = cat(y_mean..., dims=2)
y_var = cat(y_var..., dims=2)
y_mean, y_var = GPEmulator.predict(gpobj, reshape(u_true, 1, :))
println("GP prediction on true parameters: ")
println(vec(y_mean) .* data_std + data_mean)
println(y_mean)
println("true data: ")
println(truth.mean)

Expand All @@ -176,26 +165,19 @@ mcmc_alg = "rwm" # random walk Metropolis
burnin = 0
step = 0.1 # first guess
max_iter = 5000
standardized = true # we are using z scores
yt_sample = Utilities.get_obs_sample(truth)
yt_sample_zscore = Utilities.orig2zscore(yt_sample, data_mean, data_std)
# Normalize covariance of obs noise to determinant 1
truth_cov_norm = (1.0/(det(truth.cov)))^(1.0/n_param) * truth.cov

mcmc_test = MCMC.MCMCObj(yt_sample_zscore, truth_cov_norm,
priors, step, u0,
max_iter, mcmc_alg, burnin, standardized)
mcmc_test = MCMC.MCMCObj(yt_sample, truth.cov, priors, step, u0, max_iter,
mcmc_alg, burnin)
new_step = MCMC.find_mcmc_step!(mcmc_test, gpobj)

# Now begin the actual MCMC
println("Begin MCMC - with step size ", new_step)
u0 = vec(mean(u_tp, dims=1))
# reset parameters
burnin = 1000
max_iter = 100000
mcmc = MCMC.MCMCObj(yt_sample_zscore, truth_cov_norm, priors,
new_step, u0, max_iter, mcmc_alg, burnin,
standardized)
max_iter = 500_000
mcmc = MCMC.MCMCObj(yt_sample, truth.cov, priors, new_step, u0, max_iter,
mcmc_alg, burnin)
MCMC.sample_posterior!(mcmc, gpobj, max_iter)

posterior = MCMC.get_posterior(mcmc)
Expand All @@ -211,7 +193,7 @@ println("post_cov")
for idx in 1:n_param
if idx == 1
param = "N0"
xs = collect(100:1:500)
xs = collect(100:1:600)
elseif idx == 2
param = "Theta"
xs = collect(0:0.01:12.0)
Expand All @@ -223,8 +205,8 @@ for idx in 1:n_param
end

label = "true " * param
histogram(posterior[:, idx] .* param_std[idx] .+ param_mean[idx],
bins=100, normed=true, fill=:slategray, lab="posterior")
histogram(posterior[:, idx], bins=100, normed=true, fill=:slategray,
lab="posterior")
plot!(xs, mcmc.prior[idx], w=2.6, color=:blue, lab="prior")
plot!([u_true[idx]], seriestype="vline", w=2.6, lab=label)

Expand Down

0 comments on commit 35d3feb

Please sign in to comment.