Skip to content

Commit

Permalink
Adding large verification example
Browse files Browse the repository at this point in the history
  • Loading branch information
dmcdougall committed Aug 2, 2017
1 parent 3d5c1af commit f50e7fa
Show file tree
Hide file tree
Showing 8 changed files with 10,085 additions and 88 deletions.
11 changes: 0 additions & 11 deletions examples/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -446,18 +446,7 @@ dist_gpmsa_vector_DATA += gp/scalar/ctf_dat.txt

gpmsa_linverifdir = $(prefix)/examples/gp/linear_verif

gpmsa_linverif_PROGRAMS =
gpmsa_linverif_PROGRAMS += gpmsa_linverif

gpmsa_linverif_SOURCES = gp/linear_verif/gpmsa_linear_verif.C
gpmsa_linverif_LDADD = $(top_builddir)/src/libqueso.la
gpmsa_linverif_CPPFLAGS = -I$(top_srcdir)/examples/gp/linear_verif $(AM_CPPFLAGS)

dist_gpmsa_linverif_DATA =
dist_gpmsa_linverif_DATA += ${gpmsa_linverif_SOURCES}
dist_gpmsa_linverif_DATA += gp/linear_verif/gpmsa_scalar.txt
dist_gpmsa_linverif_DATA += gp/linear_verif/sim_scalar.dat
dist_gpmsa_linverif_DATA += gp/linear_verif/y_exp_scalar.txt
dist_gpmsa_linverif_DATA += gp/linear_verif/gpmsa_mv.txt
dist_gpmsa_linverif_DATA += gp/linear_verif/sim_mv.dat
dist_gpmsa_linverif_DATA += gp/linear_verif/y_exp_mv.txt
Expand Down
8 changes: 8 additions & 0 deletions test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ check_PROGRAMS += test_custom_tk_am
check_PROGRAMS += test_no_initial_point
check_PROGRAMS += test_parallel_h5
check_PROGRAMS += test_gpmsa_scalar_pdf_small
check_PROGRAMS += test_gpmsa_scalar_pdf_large

LDADD = $(top_builddir)/src/libqueso.la

Expand Down Expand Up @@ -260,6 +261,7 @@ test_no_initial_point_SOURCES = test_StatisticalInverseProblem/test_no_initial_p
test_parallel_h5_SOURCES = test_StatisticalInverseProblem/test_parallel_h5.C

test_gpmsa_scalar_pdf_small_SOURCES = test_gpmsa/scalar_pdf_small.C
test_gpmsa_scalar_pdf_large_SOURCES = test_gpmsa/scalar_pdf_large.C

TESTS =
TESTS += unit_driver
Expand Down Expand Up @@ -341,6 +343,7 @@ TESTS += test_custom_tk_am
TESTS += test_no_initial_point
TESTS += test_StatisticalInverseProblem/test_parallel_h5.sh
TESTS += test_gpmsa/scalar_pdf_small.sh
TESTS += test_gpmsa/scalar_pdf_large.sh

if ! MPI_ENABLED
XFAIL_TESTS = test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh
Expand Down Expand Up @@ -416,6 +419,11 @@ EXTRA_DIST += test_gpmsa/sim_scalar_small.dat
EXTRA_DIST += test_gpmsa/y_exp_scalar_small.txt
EXTRA_DIST += test_gpmsa/regression_solution_pdf_small.txt
EXTRA_DIST += test_gpmsa/scalar_pdf_small.sh
EXTRA_DIST += test_gpmsa/gpmsa_scalar_pdf_large_input.txt
EXTRA_DIST += test_gpmsa/sim_scalar_large.dat
EXTRA_DIST += test_gpmsa/y_exp_scalar_large.txt
EXTRA_DIST += test_gpmsa/regression_solution_pdf_large.txt
EXTRA_DIST += test_gpmsa/scalar_pdf_large.sh

CLEANFILES =
CLEANFILES += test_Environment/debug_output_sub0.txt
Expand Down
File renamed without changes.
10,000 changes: 10,000 additions & 0 deletions test/test_gpmsa/regression_solution_pdf_large.txt

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@

#include <cstdio>

#define TOL 1e-5

void open_data_file(const std::string& data_filename, std::ifstream& data_stream) {
std::string example_dir = "gp/linear_verify";
data_stream.open(example_dir + "/" + data_filename);
Expand Down Expand Up @@ -132,6 +134,7 @@ void readData
unsigned int block_offset = i * num_responses;
for (unsigned int j = 0; j < num_responses; ++j) {
(*(experimentOutputs[i]))[j] -= meansim[j];

(*(experimentOutputs[i]))[j] /= stdsim[j];
for (unsigned int k = 0; k < num_responses; ++k)
experimentMat(block_offset + j, block_offset + k) /=
Expand Down Expand Up @@ -182,7 +185,6 @@ void run_scalar(const QUESO::FullEnvironment& env)
// x1 0.9 1.1
// x2 -1.5 1.0
// x3 -1.5 0.5

QUESO::VectorSpace<QUESO::GslVector, QUESO::GslMatrix> configSpace(env,
"scenario_", numConfigVars, NULL);

Expand All @@ -197,22 +199,11 @@ void run_scalar(const QUESO::FullEnvironment& env)
"experimentspace_", experimentSize * numExperiments, NULL);

// Step 5: Instantiate the Gaussian process emulator object
//
// Regarding simulation scenario input values, the user should standardise
// them so that they exist inside a hypercube.
//
// Regarding simulation output data, the user should transform it so that the
// mean is zero and the variance is one.
//
// Regarding experimental scenario input values, the user should standardize
// them so that they exist inside a hypercube.
//
// Regarding experimental data, the user should transformed it so that it has
// zero mean and variance one.

// GPMSA stores all the information about our simulation
// data and experimental data. It also stores default information about the
// hyperparameter distributions.
// GPMSA stores all the information about our simulation data and experimental
// data, and if the users opts to, will normalise this to have zero mean and
// unit variance. It also stores default information about the hyperparameter
// distributions.
QUESO::GPMSAFactory<QUESO::GslVector, QUESO::GslMatrix>
gpmsaFactory(env,
NULL,
Expand All @@ -229,10 +220,6 @@ void run_scalar(const QUESO::FullEnvironment& env)
// Must set scaling before adding experiments due to construct order
// As of this implementation autoscale only affects params/scenarios

// TODO: Ask Brian Williams about preferred scaling and probably set
// to scale to domain bounds instead of data bounds...
gp_opts.set_autoscale_minmax();

// std::vector containing all the points in scenario space where we have
// simulations
std::vector<QUESO::SharedPtr<QUESO::GslVector>::Type>
Expand Down Expand Up @@ -265,6 +252,12 @@ void run_scalar(const QUESO::FullEnvironment& env)
simulationScenarios[i].reset(new QUESO::GslVector(configSpace.zeroVector())); // 'x_{i+1}^*' in paper
paramVecs [i].reset(new QUESO::GslVector(paramSpace.zeroVector())); // 't_{i+1}^*' in paper
outputVecs [i].reset(new QUESO::GslVector(nEtaSpace.zeroVector())); // 'eta_{i+1}' in paper

// Must set scaling before adding experiments due to construct order
// As of this implementation autoscale only affects params/scenarios
gp_opts.set_autoscale_minmax_uncertain_parameter(i);
gp_opts.set_autoscale_minmax_scenario_parameter(i);
gp_opts.set_autoscale_meanvar_output(i);
}

for (unsigned int i = 0; i < numExperiments; i++) {
Expand All @@ -280,78 +273,80 @@ void run_scalar(const QUESO::FullEnvironment& env)

// Read in data and store the standard deviation of the simulation
// data (ignored for now).
//double stdsim =
readData("sim_scalar.dat", "y_exp_scalar.txt", simulationScenarios, paramVecs,
outputVecs, experimentScenarios, experimentVecs, *experimentMat);
const char * test_srcdir = std::getenv("srcdir");

std::string simInputFileName = "test_gpmsa/sim_scalar_large.dat";
std::string expInputFileName = "test_gpmsa/y_exp_scalar_large.txt";
std::string solInputFileName = "test_gpmsa/regression_solution_pdf_large.txt";

if (test_srcdir) {
simInputFileName = test_srcdir + ('/' + simInputFileName);
expInputFileName = test_srcdir + ('/' + expInputFileName);
solInputFileName = test_srcdir + ('/' + solInputFileName);
}

readData(simInputFileName,
expInputFileName,
simulationScenarios,
paramVecs,
outputVecs,
experimentScenarios,
experimentVecs,
*experimentMat);

// Add simulation and experimental data
gpmsaFactory.addSimulations(simulationScenarios, paramVecs, outputVecs);
gpmsaFactory.addExperiments(experimentScenarios, experimentVecs, experimentMat);

QUESO::GenericVectorRV<QUESO::GslVector, QUESO::GslMatrix> postRv(
"post_",
gpmsaFactory.prior().imageSet().vectorSpace());
QUESO::StatisticalInverseProblem<QUESO::GslVector, QUESO::GslMatrix> ip("",
NULL, gpmsaFactory, postRv);

QUESO::GslVector paramInitials(
QUESO::GslVector point(
gpmsaFactory.prior().imageSet().vectorSpace().zeroVector());

// Start with the mean of the prior
gpmsaFactory.prior().pdf().distributionMean(paramInitials);

// Initial condition of the chain: may need to tweak to Brian's expectations
std::cout << "\nPrior-based initial position:\n" << paramInitials << std::endl;

// Brian Williams' recommended initial point
paramInitials[0] = 0.175; // beta0
paramInitials[1] = -0.3; // beta1
paramInitials[2] = 0.1; // beta2
// [ truncation error precision ] (truncated SVD case only)
// BJW: max(100, shape * scale)
paramInitials[3] = 1.0; // emul precision
paramInitials[4] = std::exp(-0.025); // emul corr strength (scenario/beta?)
paramInitials[5] = std::exp(-0.025); // emul corr strength (scenario/beta?)
paramInitials[6] = std::exp(-0.025); // emul corr strength (scenario/beta?)
paramInitials[7] = std::exp(-0.025); // emul corr strength (scenario/beta?)
paramInitials[8] = std::exp(-0.025); // emul corr strength (scenario/beta?)
paramInitials[9] = std::exp(-0.025); // emul corr strength (scenario/beta?)
paramInitials[10] = 20.0; // discrepancy precision
paramInitials[11] = std::exp(-0.025); // discrepancy corr strength x1
paramInitials[12] = std::exp(-0.025); // discrepancy corr strength x2
paramInitials[13] = std::exp(-0.025); // discrepancy corr strength x3
paramInitials[14] = 1000.0; // emul data precision
if (gpmsaFactory.options().m_calibrateObservationalPrecision)
// BJW: max(20, shape * scale)
paramInitials[15] = 20.0; // obs precision
unsigned int num_lines = 10000;
std::vector<double> expected_log_likelihoods(num_lines);
std::vector<double> expected_log_priors(num_lines);
std::vector<double> expected_log_posteriors(num_lines);
std::vector<double> computed_log_likelihoods(num_lines);
std::vector<double> computed_log_priors(num_lines);
std::vector<double> computed_log_posteriors(num_lines);

std::cout << "\nAdjusted initial position:\n" << paramInitials << std::endl;

QUESO::GslMatrix proposalCovMatrix(
gpmsaFactory.prior().imageSet().vectorSpace().zeroVector());
// File containing x, theta, y (vertical concatenation of lhs.txt, y_mod.txt)
std::ifstream solution_data;
open_data_file(solInputFileName, solution_data);
for (unsigned int i = 0; i < num_lines; i++) {
for (unsigned int j = 0; j < point.sizeLocal(); ++j)
solution_data >> point[j];

// Start with the covariance matrix for the whole prior, including
// GPMSA hyper-parameters.
gpmsaFactory.prior().pdf().distributionVariance(proposalCovMatrix);
double log_pdf;
solution_data >> log_pdf;
expected_log_likelihoods[i] = log_pdf;

std::cout << "\nPrior proposal covariance diagonal:\n";
for (unsigned int i=0; i<proposalCovMatrix.numCols(); ++i)
std::cout << proposalCovMatrix(i,i) << " ";
std::cout << std::endl;
solution_data >> log_pdf;
expected_log_priors[i] = log_pdf;

// FIXME: The default doesn't seem to work for this case; fudge it:
proposalCovMatrix(10, 10) = 1.0e2;
proposalCovMatrix(14, 14) = 1.0e1;
solution_data >> log_pdf;
expected_log_posteriors[i] = log_pdf;

std::cout << "\nFinal proposal covariance diagonal:\n";
for (unsigned int i=0; i<proposalCovMatrix.numCols(); ++i)
std::cout << proposalCovMatrix(i,i) << " ";
std::cout << std::endl;
log_pdf = (gpmsaFactory.prior().pdf().lnValue(point));
computed_log_priors[i] = log_pdf;

std::cout << "\nFinal GPMSA Options:" << gpmsaFactory.options() << std::endl;
log_pdf = (gpmsaFactory.getGPMSAEmulator().lnValue(point, NULL, NULL, NULL, NULL));
computed_log_likelihoods[i] = log_pdf;
}
solution_data.close();

ip.solveWithBayesMetropolisHastings(NULL, paramInitials, &proposalCovMatrix);
double initial_diff_prior = expected_log_priors[0] - computed_log_priors[0];
for (unsigned int i = 0; i < expected_log_priors.size(); i++) {
double diff_prior = expected_log_priors[i] - computed_log_priors[i] - initial_diff_prior;
queso_require_less_equal_msg(std::abs(diff_prior), TOL, "computed log prior differs too much from expected");
}

// We don't subtract off the initial because the normalisation constants
// between our implementation and Matlab's implementation appear to be the
// same
for (unsigned int i = 0; i < expected_log_likelihoods.size(); i++) {
double diff_likelihood = expected_log_likelihoods[i] - computed_log_likelihoods[i];
queso_require_less_equal_msg(std::abs(diff_likelihood), TOL, "computed log likelihood differs too much from expected");
}
}


Expand Down
5 changes: 5 additions & 0 deletions test/test_gpmsa/scalar_pdf_large.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -eu
set -o pipefail

./test_gpmsa_scalar_pdf_large ${srcdir}/test_gpmsa/gpmsa_scalar_pdf_large_input.txt
File renamed without changes.
File renamed without changes.

0 comments on commit f50e7fa

Please sign in to comment.