Skip to content

Commit

Permalink
Pull request #9: Testing
Browse files Browse the repository at this point in the history
* commit '2dd74a21380be960918b6067abec8ec163721853':
  Added a simulation file for testing
  Fixed likelihood function and added additional tests
  Fixed likelihood function and added additional test
  • Loading branch information
ericbair-sciome authored and shail-choksi committed Nov 14, 2023
2 parents 4e0e1ec + 2dd74a2 commit f52ebbb
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 20 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
^R/scratch.R$
^.*\.Rproj$
^\.Rproj\.user$
^doc$
^Meta$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,5 @@ data
# Rcpp compiled binary files
src/*.so
src/*.o
/doc/
/Meta/
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: PrestoGP
Type: Package
Title: Penalized Regression for Spatio-Temporal Outcomes via Gaussian Processes
Version: 0.2.0.9017
Version: 0.2.0.9018
Authors@R: c(
person(given = "Eric",
family = "Bair",
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ export(SpatialModel)
export(SpatiotemporalFullModel)
export(SpatiotemporalModel)
export(createUMultivariate)
export(negloglik.full)
export(negloglik_full_ST)
export(negloglik_full_spatial)
export(negloglik_vecchia)
export(negloglik_vecchia_ST)
export(vecchia_Mprediction)
Expand Down
38 changes: 24 additions & 14 deletions R/Log_Likelihood.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ negloglik_full_ST=function(logparms,locs,y,N){
-mvtnorm::dmvnorm(y, rep(0,N), cov.mat, log=TRUE)
}

#' negloglik_full_spatial
#' negloglik.full
#'
#' Spatial Full Kriging negative loglikelihood
#'
Expand All @@ -74,12 +74,15 @@ negloglik_full_ST=function(logparms,locs,y,N){
#'
#' @examples
#' @noRd
negloglik_full_spatial=function(logparms,locs,y,N){
parms = exp(logparms)
d <- fields::rdist(locs)
cov.mat=parms[1]*fields::Exponential(d,range=parms[2])+
parms[3]*diag(N)
-mvtnorm::dmvnorm(y,rep(0,N),cov.mat,log=TRUE)
negloglik.full=function(logparams,locs,y){
params <- c(exp(logparams[1:2]),
gtools::inv.logit(logparams[3], 0, 2.5),
exp(logparams[4]))
d <- fields::rdist(locs)
N <- nrow(d)
cov.mat=params[1]*fields::Matern(d,range=params[2], smoothness=params[3])+
params[4]*diag(N)
return(-1*mvtnorm::dmvnorm(y,rep(0,N),cov.mat,log=TRUE))
}


Expand Down Expand Up @@ -157,7 +160,7 @@ mvnegloglik_ST =function(logparams,vecchia.approx,y,param.seq,P,scaling,nscale){
for (j in 1:nscale) {
locs.scaled[vecchia.approx$ondx==i,scaling==j] <-
locs.scaled[vecchia.approx$ondx==i,scaling==j] /
params[param.seq[2,1]+nscale*(i-1)+j-1]
params[param.seq[2,1]+nscale*(i-1)+j-1]
}
}
vecchia.approx$locsord <- locs.scaled
Expand All @@ -171,7 +174,7 @@ mvnegloglik_ST =function(logparams,vecchia.approx,y,param.seq,P,scaling,nscale){
##############################################################################
### Full Multivariate Matern Negative Loglikelihood Function ###########

mvnegloglik.full=function(logparams,locs,y,param.seq,P){
mvnegloglik.full=function(logparams,locs,y,param.seq){
# Input-
# logparams: A numeric vector of length (4*P)+(4*choose(P,2)).
# To construct these parameters we unlist a list of the 7 covariance
Expand All @@ -191,6 +194,7 @@ mvnegloglik.full=function(logparams,locs,y,param.seq,P){

#P <- length(y)
# transform the postively constrained parameters from log-space to normal-space
P <- length(locs)
params <- c(exp(logparams[1:param.seq[2,2]]),
gtools::inv.logit(logparams[param.seq[3,1]:param.seq[3,2]], 0, 2.5),
exp(logparams[param.seq[4,1]:param.seq[4,2]]))
Expand Down Expand Up @@ -279,12 +283,17 @@ cat.covariances <- function(locs.list,sig2,range,smoothness,nugget){
for (iter in 1:nrow(combs)){
i <- combs[iter,1]
j <- combs[iter,2]
d <- fields::rdist.earth(locs.list[[i]],locs.list[[j]],miles = FALSE)

# d <- fields::rdist.earth(locs.list[[i]],locs.list[[j]],miles = FALSE)
d <- fields::rdist(locs.list[[i]],locs.list[[j]])
# Calculate the covariance matrix - if/then based on its location in the super-matrix
N <- nrow(d)
cov.mat.ij <- sig2[i,j]*geoR::matern(d,phi = range[i,j],kappa = smoothness[i,j])+
nugget[i,j]*diag(N)
N <- nrow(d)
if (i==j){ # To accomodate varying size outcomes- the nugget is not included on cross-covariances
cov.mat.ij <- sig2[i,j]*geoR::matern(d,phi = range[i,j],kappa = smoothness[i,j])+
nugget[i,j]*diag(N)
}else{
cov.mat.ij <- sig2[i,j]*geoR::matern(d,phi = range[i,j],kappa = smoothness[i,j])
}


if (combs[iter,1]==1){
row.idx <- 1:dims[1]
Expand All @@ -307,6 +316,7 @@ cat.covariances <- function(locs.list,sig2,range,smoothness,nugget){

}


return(cov.mat.out)
}

Expand Down
4 changes: 2 additions & 2 deletions R/PrestoGP_Model.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ setMethod("initialize", "PrestoGPModel", function(.Object, ...) {
})

setGeneric("show_theta", function(object, Y_names)standardGeneric("show_theta") )
setGeneric("prestogp_fit", function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-4, maxit=5000), parallel=FALSE, foldid=NULL) standardGeneric("prestogp_fit") )
setGeneric("prestogp_fit", function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-3, maxit=5000), parallel=FALSE, foldid=NULL) standardGeneric("prestogp_fit") )
setGeneric("prestogp_predict", function(model, X="matrix", locs="matrix", m="numeric", ordering.pred=c("obspred", "general"), pred.cond=c("independent", "general"), return.values=c("mean", "meanvar")) standardGeneric("prestogp_predict") )
setGeneric("calc_covparams", function(model, locs, Y) standardGeneric("calc_covparams") )
setGeneric("specify", function(model, locs, m)standardGeneric("specify") )
Expand Down Expand Up @@ -181,7 +181,7 @@ setMethod("show_theta", "PrestoGPModel",
#' model <- prestogp_fit(model, logNO2, X, locs)
#' ...
setMethod("prestogp_fit", "PrestoGPModel",
function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters=100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-4, maxit=5000), parallel=FALSE, foldid=NULL) {
function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters=100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-3, maxit=5000), parallel=FALSE, foldid=NULL) {
#parameter validation
#TODO: This method should check for input errors in the
#multivariate case (where Y, X, and locs are lists)
Expand Down
2 changes: 1 addition & 1 deletion man/estimate_betas-PrestoGPModel-method.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/prestogp_fit-PrestoGPModel-method.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions tests/testthat/sim_multivariate_lik.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
set.seed(1212)

ny <- 3 # number of response variables
n.spatial.xy <- 10 # number of spatial coordinates per dimension

library(MASS)
library(fields)
library(psych)

n.rho <- choose(ny, 2)
rho.vec <- runif(n.rho, 0.2, 0.8)
rho <- matrix(0, nrow=ny, ncol=ny)
rho[upper.tri(rho)] <- rho.vec
rho <- rho + t(rho) + diag(1, ny)

marg.smoothness <- 0.5 + rnorm(ny, 0.1, 0.05)
nuggets <- runif(ny, 0.5, 2)
x.variance <- runif(ny, 1.5, 4)
marg.var <- nuggets + x.variance
ranges <- runif(ny, 0.5, 1.2)

params.all <- c(x.variance, ranges, marg.smoothness, nuggets, rho.vec)

locs.list <- list()
for (i in 1:ny) {
loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
locs.list[[i]] <- as.matrix(expand.grid(loc1, loc2))
}

Sigma.All <- matrix(nrow=ny*n.spatial.xy^2, ncol=ny*n.spatial.xy^2)
for (i in 1:ny) {
for (j in i:ny) {
if (i==j) {
ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
dij <- fields::rdist(locs.list[[i]])
Sigma.All[ndx1,ndx1] <- marg.var[i] *
fields::Matern(dij, range = ranges[i],
smoothness = marg.smoothness[i])
}
else {
ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
ndx2 <- ((j-1)*n.spatial.xy^2+1):(n.spatial.xy^2*j)
dij <- fields::rdist(locs.list[[i]], locs.list[[j]])
vii <- marg.smoothness[i]
vjj <- marg.smoothness[j]
vij <- (vii+vjj)/2
aii <- 1/ranges[i]
ajj <- 1/ranges[j]
aij <- sqrt((aii^2+ajj^2)/2)
Sigma.All[ndx1,ndx2] <- rho[i,j] * sqrt(marg.var[i]) *
sqrt(marg.var[j]) * aii^vii * ajj^vjj * gamma(vij) /
(aij^(2*vij) * sqrt(gamma(vii) * gamma(vjj))) *
fields::Matern(dij, smoothness=vij, alpha=aij)
Sigma.All[ndx2,ndx1] <- t(Sigma.All[ndx1,ndx2])
}
}
}

L.C <- chol(Sigma.All)

st.error <- rnorm(n.spatial.xy^2 * ny) %*% L.C

nug.error <- NULL
for (i in 1:ny) {
nug.error <- c(nug.error, nuggets[i]*rnorm(n.spatial.xy^2))
}

y.list <- list()
for (i in 1:ny) {
ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
y.list[[i]] <- st.error[ndx1] + nug.error[ndx1]
}

rm(ny, n.spatial.xy, i, j, n.rho,
loc1, loc2, ndx1, ndx2, dij, vii, vjj, vij, aii, ajj, aij, L.C,
st.error, nug.error, rho, rho.vec, ranges, Sigma.All, nuggets,
marg.smoothness, marg.var)
112 changes: 112 additions & 0 deletions tests/testthat/test-Log_Likelihood.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,70 @@ test_that("create.initial.values.flex", {
expect_equal(c(-0.105, -0.105, -0.693, -0.693, -1.386, -1.386, -2.303, -2.303, 0.584), logparams, tolerance=1e-2)
})

test_that("negloglik.full", {
set.seed(1234)

y.orig <- rnorm(100)

locs.dim <- seq(1,10,length=sqrt(length(y.orig)))
locs <- as.matrix(expand.grid(locs.dim, locs.dim))

covmat.true <- Matern(rdist(locs), range=1, smoothness=0.5) + diag(100)

y <- y.orig %*% chol(covmat.true)
y <- as.vector(y)

params.init <- rep(NA, 4)
params.init[1] <- 0.9*var(y)
params.init[2] <- 1
params.init[3] <- 0.5
params.init[4] <- 0.1*var(y)

params.init <- create.initial.values.flex(params.init[1],
params.init[2],
params.init[3],
params.init[4],
1, 1)

d <- rdist(locs)

res.optim.NM <- optim(par=params.init, fn=negloglik.full, locs=locs, y=y,
control=list(maxit=5000))

LL.full <- negloglik.full(res.optim.NM$par, locs, y)

params.final <- c(exp(res.optim.NM$par[1:2]),
gtools::inv.logit(res.optim.NM$par[3], 0, 2.5),
exp(res.optim.NM$par[4]))

pgp.params <- PrestoGP:::create.initial.values.flex(params.final[1],
params.final[2],
params.final[3],
params.final[4],
1, 1)
pseq <- create.param.sequence(1)

LL.full.pgp <- mvnegloglik.full(pgp.params, list(locs), y, pseq)

vec.approx <- vecchia_specify(locs, 99)

LL.vecchia <- -1*vecchia_likelihood(y, vec.approx, params.final[1:3],
params.final[4])

vec.approx.pgp <- vecchia_Mspecify(list(locs), 99)
vec.U.pgp <- createUMultivariate(vec.approx.pgp, c(params.final, 1))

LL.pgp <- -1*GPvecchia:::vecchia_likelihood_U(y, vec.U.pgp)

expect_equal(173.315, LL.full, tolerance=1e-3)
# Univariate likelihood should equal the multivariate likelihood
expect_equal(LL.full, LL.full.pgp, tolerance=1e-3)
# Full likelihood should equal both the univariate and multivariate
# Vecchia approximations
expect_equal(LL.full, LL.vecchia, tolerance=1e-3)
expect_equal(LL.full, LL.pgp, tolerance=1e-3)
})

test_that("mvnegloglik", {
source("sim_multivariate_big.R")
P <- 3
Expand All @@ -100,6 +164,54 @@ test_that("mvnegloglik", {
expect_equal(34474.4, neg_likelihood, tolerance=1e-2)
})

test_that("mvnegloglik.full", {
source("sim_multivariate_lik.R")

pseq <- create.param.sequence(3)

param.marg.var <- 0.9*unlist(lapply(y.list, var))
param.marg.scale <- rep(1,3)
param.marg.smooth <- rep(0.5,3)
param.marg.nugget <- 0.1*unlist(lapply(y.list, var))
param.rho <- rep(0.5, 3)
params.init <- create.initial.values.flex(param.marg.var,
param.marg.scale,
param.marg.smooth,
param.marg.nugget,
param.rho, 3)

res.optim.NM<- optim(par = params.init,fn=mvnegloglik.full,
locs=locs.list, y=unlist(y.list),
param.seq = pseq,
method = "Nelder-Mead",
control = list(trace = 0, maxit = 10000, reltol=1e-4))

LL.full.mv <- mvnegloglik.full(res.optim.NM$par, locs.list,
unlist(y.list), pseq)

param.seq.begin <- pseq[,1]
param.seq.end <- pseq[,2]
params.init.final <- res.optim.NM$par
params.init.final.t <- c(exp(params.init.final[param.seq.begin[1]:
param.seq.end[2]]),
gtools::inv.logit(params.init.final[
param.seq.begin[3]:
param.seq.end[3]],0,2.5),
exp(params.init.final[param.seq.begin[4]:
param.seq.end[4]]),
tanh(params.init.final[
param.seq.begin[5]:param.seq.end[5]]))

vec.mapprox <- vecchia_Mspecify(locs.list, length(unlist(y.list))-1)
U.mobj <- createUMultivariate(vec.mapprox, params.init.final.t)

LL.vecchia.mv <- -1*GPvecchia:::vecchia_likelihood_U(unlist(y.list), U.mobj)

expect_equal(541.31, LL.full.mv, tolerance=1e-3)
# Full likelihood should equal the Vecchia likelihood
expect_equal(LL.full.mv, LL.vecchia.mv, tolerance=1e-3)
})

#TODO implement this test
test_that("cat.covariances", {
set.seed(7919)
Expand Down

0 comments on commit f52ebbb

Please sign in to comment.