Skip to content

Commit

Permalink
Add an example of using a Gaussian process for spatial modelling
Browse files Browse the repository at this point in the history
which estimates one day's average temperature using GSOD data.
  • Loading branch information
akleeman committed Jun 13, 2018
1 parent 6c7b5cd commit c7da1aa
Show file tree
Hide file tree
Showing 27 changed files with 9,680 additions and 34 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Data
*.csv
*.tar
*.nc

# Prerequisites
*.d

Expand Down Expand Up @@ -39,4 +44,4 @@ build*/
.pydevproject
.settings/
.idea/
/Debug/
/Debug/
21 changes: 11 additions & 10 deletions albatross/core/indexing.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ namespace albatross {
*/

template <typename SizeType, typename X>
std::vector<X> subset(const std::vector<SizeType> &indices,
const std::vector<X> &v) {
inline std::vector<X> subset(const std::vector<SizeType> &indices,
const std::vector<X> &v) {
std::vector<X> out(indices.size());
for (std::size_t i = 0; i < static_cast<std::size_t>(indices.size()); i++) {
out[i] = v[static_cast<std::size_t>(indices[i])];
Expand All @@ -37,8 +37,8 @@ std::vector<X> subset(const std::vector<SizeType> &indices,
* Extract a subset of an Eigen::Vector
*/
template <typename SizeType>
Eigen::VectorXd subset(const std::vector<SizeType> &indices,
const Eigen::VectorXd &v) {
inline Eigen::VectorXd subset(const std::vector<SizeType> &indices,
const Eigen::VectorXd &v) {
Eigen::VectorXd out(static_cast<Eigen::Index>(indices.size()));
for (std::size_t i = 0; i < indices.size(); i++) {
out[static_cast<Eigen::Index>(i)] =
Expand All @@ -52,9 +52,9 @@ Eigen::VectorXd subset(const std::vector<SizeType> &indices,
* indices.
*/
template <typename SizeType>
Eigen::MatrixXd subset(const std::vector<SizeType> &row_indices,
const std::vector<SizeType> &col_indices,
const Eigen::MatrixXd &v) {
inline Eigen::MatrixXd subset(const std::vector<SizeType> &row_indices,
const std::vector<SizeType> &col_indices,
const Eigen::MatrixXd &v) {
Eigen::MatrixXd out(row_indices.size(), col_indices.size());
for (std::size_t i = 0; i < row_indices.size(); i++) {
for (std::size_t j = 0; j < col_indices.size(); j++) {
Expand All @@ -74,8 +74,8 @@ Eigen::MatrixXd subset(const std::vector<SizeType> &row_indices,
* columns.
*/
template <typename SizeType>
Eigen::MatrixXd symmetric_subset(const std::vector<SizeType> &indices,
const Eigen::MatrixXd &v) {
inline Eigen::MatrixXd symmetric_subset(const std::vector<SizeType> &indices,
const Eigen::MatrixXd &v) {
assert(v.rows() == v.cols());
return subset(indices, indices, v);
}
Expand All @@ -84,11 +84,12 @@ Eigen::MatrixXd symmetric_subset(const std::vector<SizeType> &indices,
* Extract a subset of an Eigen::DiagonalMatrix
*/
template <typename SizeType, typename Scalar, int Size>
Eigen::DiagonalMatrix<Scalar, Size>
inline Eigen::DiagonalMatrix<Scalar, Size>
symmetric_subset(const std::vector<SizeType> &indices,
const Eigen::DiagonalMatrix<Scalar, Size> &v) {
return subset(indices, v.diagonal()).asDiagonal();
}

} // namespace albatross

#endif
3 changes: 2 additions & 1 deletion albatross/covariance_functions/covariance_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ template <typename Term> struct CovarianceFunction {
/*
* Creates a covariance matrix given a single vector of
* predictors. Element i, j of the resulting covariance
* matrix will hold
* matrix will hold the covariance between quantities
* derived from feature i and feature j.
*/
template <typename Covariance, typename Feature>
Eigen::MatrixXd symmetric_covariance(const CovarianceFunction<Covariance> &f,
Expand Down
31 changes: 30 additions & 1 deletion albatross/covariance_functions/distance_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,16 @@ class AngularDistance : public DistanceMetric {
std::string get_name() const override { return "angular_distance"; };

double operator()(const Eigen::VectorXd &x, const Eigen::VectorXd &y) const {
return acos(x.dot(y) / (x.norm() * y.norm()));
// The acos operator doesn't behave well near |1|. acos(1.), for example,
// returns NaN, so here we do some special casing,
double dot_product = x.dot(y) / (x.norm() * y.norm());
if (dot_product > 1. - 1e-16) {
return 0.;
} else if (dot_product < -1. + 1e-16) {
return M_PI;
} else {
return acos(dot_product);
}
}
};

Expand All @@ -70,6 +79,26 @@ class ScalarDistance : public DistanceMetric {
return fabs(x - y);
}
};

template <typename Feature, typename DistanceMetrixType>
Eigen::MatrixXd distance_matrix(const DistanceMetrixType &distance_metric,
const std::vector<Feature> &xs) {
int n = static_cast<int>(xs.size());
Eigen::MatrixXd D(n, n);

int i, j;
std::size_t si, sj;
for (i = 0; i < n; i++) {
si = static_cast<std::size_t>(i);
for (j = 0; j <= i; j++) {
sj = static_cast<std::size_t>(j);
D(i, j) = distance_metric(xs[si], xs[sj]);
D(j, i) = D(i, j);
}
}
return D;
}

} // namespace albatross

#endif
8 changes: 4 additions & 4 deletions albatross/covariance_functions/radial.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class SquaredExponential : public RadialCovariance<DistanceMetricImpl> {
public:
SquaredExponential(double length_scale = 100000.,
double sigma_squared_exponential = 10.) {
this->params_["length_scale"] = length_scale;
this->params_["squared_exponential_length_scale"] = length_scale;
this->params_["sigma_squared_exponential"] = sigma_squared_exponential;
};

Expand All @@ -78,7 +78,7 @@ class SquaredExponential : public RadialCovariance<DistanceMetricImpl> {
int>::type = 0>
double operator()(const X &x, const X &y) const {
double distance = this->distance_metric_(x, y);
double length_scale = this->params_.at("length_scale");
double length_scale = this->params_.at("squared_exponential_length_scale");
distance /= length_scale;
double sigma = this->params_.at("sigma_squared_exponential");
return sigma * sigma * exp(-distance * distance);
Expand All @@ -93,7 +93,7 @@ template <class DistanceMetricImpl>
class Exponential : public RadialCovariance<DistanceMetricImpl> {
public:
Exponential(double length_scale = 100000., double sigma_exponential = 10.) {
this->params_["length_scale"] = length_scale;
this->params_["exponential_length_scale"] = length_scale;
this->params_["sigma_exponential"] = sigma_exponential;
};

Expand All @@ -112,7 +112,7 @@ class Exponential : public RadialCovariance<DistanceMetricImpl> {
int>::type = 0>
double operator()(const X &x, const X &y) const {
double distance = this->distance_metric_(x, y);
double length_scale = this->params_.at("length_scale");
double length_scale = this->params_.at("exponential_length_scale");
distance /= length_scale;
double sigma = this->params_.at("sigma_exponential");
return sigma * sigma * exp(-fabs(distance));
Expand Down
2 changes: 1 addition & 1 deletion albatross/covariance_functions/scaling_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ template <typename ScalingFunction> class ScalingTerm : public CovarianceTerm {
scaling_function_.set_params(params);
}

virtual ParameterStore get_params() const {
virtual ParameterStore get_params() const override {
return scaling_function_.get_params();
}

Expand Down
20 changes: 10 additions & 10 deletions albatross/tuning_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,20 @@ inline double gp_fast_loo_nll(const RegressionDataset<FeatureType> &dataset,
return negative_log_likelihood(deviations, predictions.covariance);
}

inline double loo_nll(const albatross::RegressionDataset<double> &dataset,
albatross::RegressionModel<double> *model) {
auto loo_folds = albatross::leave_one_out(dataset);
return albatross::cross_validated_scores(
albatross::evaluation_metrics::negative_log_likelihood, loo_folds,
inline double loo_nll(const RegressionDataset<double> &dataset,
RegressionModel<double> *model) {
auto loo_folds = leave_one_out(dataset);
return cross_validated_scores(
evaluation_metrics::negative_log_likelihood, loo_folds,
model)
.sum();
}

inline double loo_rmse(const albatross::RegressionDataset<double> &dataset,
albatross::RegressionModel<double> *model) {
auto loo_folds = albatross::leave_one_out(dataset);
return albatross::cross_validated_scores(
albatross::evaluation_metrics::root_mean_square_error, loo_folds,
inline double loo_rmse(const RegressionDataset<double> &dataset,
RegressionModel<double> *model) {
auto loo_folds = leave_one_out(dataset);
return cross_validated_scores(
evaluation_metrics::root_mean_square_error, loo_folds,
model)
.mean();
}
Expand Down
2 changes: 1 addition & 1 deletion ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cd build
# Run the long integration tests in release mode so they're fast.
cmake -DENABLE_AUTOLINT=ON \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER="$C_COMPILER" -DCMAKE_CXX_COMPILER="$CXX_COMPILER" ../
make run_albatross_unit_tests run_inspection_example run_sinc_example run_tune_example -j4
make run_albatross_unit_tests run_inspection_example run_sinc_example run_tune_example run_temperature_example -j4
make clang-format-all
if [[ $(git --no-pager diff --name-only HEAD) ]]; then
echo "######################################################"
Expand Down
12 changes: 12 additions & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ Features

.. _`Gaussian Process Regression`: http://www.gaussianprocess.org/gpml/chapters/RW2.pdf

**********
Examples
**********

- :ref:`A One dimensional example using a sinc function. <1d-example>`
- :ref:`An example spatial model for temperature estimation. <temperature-example>`

.. image:: https://github.com/swift-nav/albatross/raw/akleeman/temperature-example/examples/temperature_example/mean_temperature.png
:align: center
:target: temperature-example.html

**********
Install
**********
Expand Down Expand Up @@ -63,6 +74,7 @@ and plot the results (though this'll require a numerical python environment),
:maxdepth: 1

1d-example
temperature-example


#########
Expand Down
127 changes: 127 additions & 0 deletions doc/temperature-example.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
###################
Temperature Example
###################

.. _temperature-example:

--------------
Introduction
--------------

Gaussian processes are quite popular in geostatistics, though they are perhaps more commonly used in `Kriging`_. The idea is very similar to the :ref:`1D Example <1d-example>`, there is some unknown function (in our example it will be temperature as a function of location) and you have noisy observations of the function (from weather stations) which you would like to use to make estimates at new locations.

.. _`Kriging` : https://en.wikipedia.org/wiki/Kriging
------------------
Temperature Data
------------------

For this example we'll use the `Global Summary of the Day`_ (GSOD) data to produce an estimate of the average temperature for a given day over the continental United States (CONUS).

.. _`Global Summary of the Day` : https://data.nodc.noaa.gov/cgi-bin/iso?id=gov.noaa.ncdc:C00516
Here are the GSOD observation of average temperature for May 1st, 2018,

.. image:: https://github.com/swift-nav/albatross/raw/akleeman/temperature-example/examples/temperature_example/observations.png
:align: center

------------------
Temperature Model
------------------

We're going to build our temperature model to incorporate the following priors about how temperature should be have.

- Temperature at neighboring locations should be similar.
- There is some non-zero average temperature.
- Temperature decreases with elevation

To accomplish this we can build a covariance function that is not dissimilar from the one used in the :ref:`1D Example <1d-example>`.
We can start with measurement noise,

.. code-block:: c
using Noise = IndependentNoise<Station>;
CovarianceFunction<Noise> noise = {Noise(2.0)};
Which will include a term in our model which states that each weather station makes observations of the average temperature which are centered around the true average temperature but with a noise having standard deviation of :math:`2` degrees.

We can then define a mean value for our field. In reality we might expect that the mean average temperature for a day would vary geographically, for this model we'll simply claim that there is some mean average temperature for all of CONUS,

.. code-block:: c
CovarianceFunction<Constant> mean = {Constant(1.5)};
Then as we'd mentioned we'd like to include the concept of temperature decreasing with elevation. To do this we can
create a scaling term which scales the mean value based on the elevation. The


.. math::
\mbox{elevation_scaling}(h) = 1. + \alpha \left(H - h\right)_{+}.
Where :math:`\left(\cdot\right)_{+}` could also be written :math:`\mbox{max}(0, \cdot)` and means it returns the
argument if positive and is otherwise :math:`0`. The resulting function will decrease at a rate of :math:`\alpha`
until :math:`h >= H` afterwhich the scaling term will flatten out to a constant value of :math:`1`. By multiplying
this term through with the mean we get a prior which will allow for higher temperatures at low elevations.

.. code-block:: c
// Scale the constant temperature value in a way that defaults
// to colder values for higher elevations.
using ElevationScalar = ScalingTerm<ElevationScalingFunction>;
CovarianceFunction<ElevationScalar> elevation_scalar = {ElevationScalar()};
auto elevation_scaled_mean = elevation_scalar * mean;
Now we capture the spatial variation by saying that location near
each other in terms of great circle distance will be similar,

.. code-block:: c
// The angular distance is equivalent to the great circle distance
using AngularSqrExp = SquaredExponential<StationDistance<AngularDistance>>;
CovarianceFunction<AngularSqrExp> angular_sqrexp = {AngularSqrExp(9e-2, 3.5)};
Then add that stations at different elevations will be dissimilar,

.. code-block:: c
// Radial distance is the difference in lengths of the X, Y, Z
// vectors, which translates into a difference in height so
// this term means "station at different elevations will be less correlated"
using RadialExp = Exponential<StationDistance<RadialDistance>>;
CovarianceFunction<RadialExp> radial_exp = {RadialExp(15000., 2.5)};
These can be combined to get our final covariance function,

.. code-block:: c
auto spatial_cov = angular_sqrexp * radial_exp;
auto covariance = elevation_scaled_mean + noise + spatial_cov;
For the full implementation details see the `example code`_.

.. _`example code` : https://github.com/swift-nav/albatross/blob/akleeman/temperature-example/examples/temperature_example/temperature_example.cc
-------------------
Gridded Predictions
-------------------

Now that we've defined the covariance function we can let ``albatross`` do the rest!

.. code-block:: c
auto model = gp_from_covariance<Station>(covariance);
model.fit(data);
const auto predictions = model.predict(grid_locations);
The ``predictions`` hold information about the mean and variance of the resulting estimates. We can look at the mean of the estimates,

.. image:: https://github.com/swift-nav/albatross/raw/akleeman/temperature-example/examples/temperature_example/mean_temperature.png
:align: center

and perhaps more interestingly we can also get out the variance, or how confident the model is about its predictions,

.. image:: https://github.com/swift-nav/albatross/raw/akleeman/temperature-example/examples/temperature_example/sd_temperature.png
:align: center

Notice that the model is capable of realizing that it's estimates should be trusted less in mountainous regions!
23 changes: 23 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,27 @@ add_custom_target(

add_dependencies(run_inspection_example
inspection_example
)


add_executable(temperature_example
EXCLUDE_FROM_ALL
temperature_example/temperature_example.cc
)

add_dependencies(temperature_example
albatross
)

target_link_libraries(temperature_example m gflags pthread nlopt)

add_custom_target(
run_temperature_example ALL
DEPENDS ${example_HEADERS}
COMMAND temperature_example -input ${PROJECT_SOURCE_DIR}/examples/temperature_example/gsod.csv -predict ${PROJECT_SOURCE_DIR}/examples/temperature_example/prediction_locations.csv -output ./test_temperature_predictions.csv -thin 5
COMMENT "Running temperature_example"
)

add_dependencies(run_temperature_example
temperature_example
)
Loading

0 comments on commit c7da1aa

Please sign in to comment.