Pull request #9: Testing

* commit '2dd74a21380be960918b6067abec8ec163721853': Added a simulation file for testing Fixed likelihood function and added additional tests Fixed likelihood function and added additional test
NIEHS · Nov 14, 2023 · f52ebbb · f52ebbb
2 parents 4e0e1ec + 2dd74a2
commit f52ebbb
Show file tree

Hide file tree

Showing 10 changed files with 225 additions and 20 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -3,3 +3,5 @@
 ^R/scratch.R$
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
@@ -85,3 +85,5 @@ data
 # Rcpp compiled binary files
 src/*.so
 src/*.o
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: PrestoGP
 Type: Package
 Title: Penalized Regression for Spatio-Temporal Outcomes via Gaussian Processes 
-Version: 0.2.0.9017
+Version: 0.2.0.9018
 Authors@R: c(
     person(given = "Eric",
            family = "Bair",

diff --git a/NAMESPACE b/NAMESPACE
@@ -9,8 +9,8 @@ export(SpatialModel)
 export(SpatiotemporalFullModel)
 export(SpatiotemporalModel)
 export(createUMultivariate)
+export(negloglik.full)
 export(negloglik_full_ST)
-export(negloglik_full_spatial)
 export(negloglik_vecchia)
 export(negloglik_vecchia_ST)
 export(vecchia_Mprediction)

diff --git a/R/Log_Likelihood.R b/R/Log_Likelihood.R
@@ -60,7 +60,7 @@ negloglik_full_ST=function(logparms,locs,y,N){
   -mvtnorm::dmvnorm(y, rep(0,N), cov.mat, log=TRUE)
 }
 
-#' negloglik_full_spatial
+#' negloglik.full
 #'
 #' Spatial Full Kriging negative loglikelihood
 #'
@@ -74,12 +74,15 @@ negloglik_full_ST=function(logparms,locs,y,N){
 #'
 #' @examples
 #' @noRd
-negloglik_full_spatial=function(logparms,locs,y,N){
-  parms = exp(logparms)
-  d <- fields::rdist(locs)
-  cov.mat=parms[1]*fields::Exponential(d,range=parms[2])+
-    parms[3]*diag(N)
-  -mvtnorm::dmvnorm(y,rep(0,N),cov.mat,log=TRUE)
+negloglik.full=function(logparams,locs,y){
+    params <- c(exp(logparams[1:2]),
+                gtools::inv.logit(logparams[3], 0, 2.5),
+                exp(logparams[4]))
+    d <- fields::rdist(locs)
+    N <- nrow(d)
+    cov.mat=params[1]*fields::Matern(d,range=params[2], smoothness=params[3])+
+        params[4]*diag(N)
+    return(-1*mvtnorm::dmvnorm(y,rep(0,N),cov.mat,log=TRUE))
 }
 
 
@@ -157,7 +160,7 @@ mvnegloglik_ST =function(logparams,vecchia.approx,y,param.seq,P,scaling,nscale){
         for (j in 1:nscale) {
             locs.scaled[vecchia.approx$ondx==i,scaling==j] <-
                 locs.scaled[vecchia.approx$ondx==i,scaling==j] /
-                params[param.seq[2,1]+nscale*(i-1)+j-1] 
+                params[param.seq[2,1]+nscale*(i-1)+j-1]
         }
     }
     vecchia.approx$locsord <- locs.scaled
@@ -171,7 +174,7 @@ mvnegloglik_ST =function(logparams,vecchia.approx,y,param.seq,P,scaling,nscale){
 ##############################################################################
 ### Full Multivariate Matern Negative Loglikelihood Function ###########
 
-mvnegloglik.full=function(logparams,locs,y,param.seq,P){
+mvnegloglik.full=function(logparams,locs,y,param.seq){
   #  Input-
   #  logparams: A numeric vector of length (4*P)+(4*choose(P,2)).
   #             To construct these parameters we unlist a list of the 7 covariance
@@ -191,6 +194,7 @@ mvnegloglik.full=function(logparams,locs,y,param.seq,P){
 
   #P <- length(y)
   # transform the postively constrained parameters from log-space to normal-space
+  P <- length(locs)
   params <- c(exp(logparams[1:param.seq[2,2]]),
               gtools::inv.logit(logparams[param.seq[3,1]:param.seq[3,2]], 0, 2.5),
               exp(logparams[param.seq[4,1]:param.seq[4,2]]))
@@ -279,12 +283,17 @@ cat.covariances <- function(locs.list,sig2,range,smoothness,nugget){
   for (iter in 1:nrow(combs)){
     i <- combs[iter,1]
     j <- combs[iter,2]
-    d <- fields::rdist.earth(locs.list[[i]],locs.list[[j]],miles = FALSE)
-
+    # d <- fields::rdist.earth(locs.list[[i]],locs.list[[j]],miles = FALSE)
+    d <- fields::rdist(locs.list[[i]],locs.list[[j]])
     # Calculate the covariance matrix - if/then based on its location in the super-matrix
-    N <- nrow(d)
-    cov.mat.ij <- sig2[i,j]*geoR::matern(d,phi = range[i,j],kappa = smoothness[i,j])+
-      nugget[i,j]*diag(N)
+      N <- nrow(d)
+      if (i==j){ # To accomodate varying size outcomes- the nugget is not included on cross-covariances
+        cov.mat.ij <- sig2[i,j]*geoR::matern(d,phi = range[i,j],kappa = smoothness[i,j])+
+          nugget[i,j]*diag(N)
+      }else{
+        cov.mat.ij <- sig2[i,j]*geoR::matern(d,phi = range[i,j],kappa = smoothness[i,j])
+      }
+
 
     if (combs[iter,1]==1){
       row.idx <- 1:dims[1]
@@ -307,6 +316,7 @@ cat.covariances <- function(locs.list,sig2,range,smoothness,nugget){
 
   }
 
+
   return(cov.mat.out)
 }
 

diff --git a/R/PrestoGP_Model.R b/R/PrestoGP_Model.R
@@ -60,7 +60,7 @@ setMethod("initialize", "PrestoGPModel", function(.Object, ...) {
 })
 
 setGeneric("show_theta", function(object, Y_names)standardGeneric("show_theta") )
-setGeneric("prestogp_fit", function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-4, maxit=5000), parallel=FALSE, foldid=NULL) standardGeneric("prestogp_fit") )
+setGeneric("prestogp_fit", function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-3, maxit=5000), parallel=FALSE, foldid=NULL) standardGeneric("prestogp_fit") )
 setGeneric("prestogp_predict", function(model, X="matrix", locs="matrix", m="numeric", ordering.pred=c("obspred", "general"), pred.cond=c("independent", "general"), return.values=c("mean", "meanvar")) standardGeneric("prestogp_predict") )
 setGeneric("calc_covparams", function(model, locs, Y) standardGeneric("calc_covparams") )
 setGeneric("specify", function(model, locs, m)standardGeneric("specify") )
@@ -181,7 +181,7 @@ setMethod("show_theta", "PrestoGPModel",
 #' model <- prestogp_fit(model, logNO2, X, locs)
 #' ...
 setMethod("prestogp_fit", "PrestoGPModel",
-          function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters=100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-4, maxit=5000), parallel=FALSE, foldid=NULL) {
+          function(model, Y, X, locs, scaling=NULL, apanasovich=FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters=100, verbose=FALSE, optim.method="Nelder-Mead", optim.control=list(trace=0, reltol=1e-3, maxit=5000), parallel=FALSE, foldid=NULL) {
             #parameter validation
             #TODO: This method should check for input errors in the
             #multivariate case (where Y, X, and locs are lists)

diff --git a/man/estimate_betas-PrestoGPModel-method.Rd b/man/estimate_betas-PrestoGPModel-method.Rd
diff --git a/man/prestogp_fit-PrestoGPModel-method.Rd b/man/prestogp_fit-PrestoGPModel-method.Rd
diff --git a/tests/testthat/sim_multivariate_lik.R b/tests/testthat/sim_multivariate_lik.R
@@ -0,0 +1,78 @@
+set.seed(1212)
+
+ny <- 3 # number of response variables
+n.spatial.xy <- 10 # number of spatial coordinates per dimension
+
+library(MASS)
+library(fields)
+library(psych)
+
+n.rho <- choose(ny, 2)
+rho.vec <- runif(n.rho, 0.2, 0.8)
+rho <- matrix(0, nrow=ny, ncol=ny)
+rho[upper.tri(rho)] <- rho.vec
+rho <- rho + t(rho) + diag(1, ny)
+
+marg.smoothness <- 0.5 + rnorm(ny, 0.1, 0.05)
+nuggets <- runif(ny, 0.5, 2)
+x.variance <- runif(ny, 1.5, 4)
+marg.var <- nuggets + x.variance
+ranges <- runif(ny, 0.5, 1.2)
+
+params.all <- c(x.variance, ranges, marg.smoothness, nuggets, rho.vec)
+
+locs.list <- list()
+for (i in 1:ny) {
+    loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+    loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+    locs.list[[i]] <- as.matrix(expand.grid(loc1, loc2))
+}
+
+Sigma.All <- matrix(nrow=ny*n.spatial.xy^2, ncol=ny*n.spatial.xy^2)
+for (i in 1:ny) {
+    for (j in i:ny) {
+        if (i==j) {
+            ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
+            dij <- fields::rdist(locs.list[[i]])
+            Sigma.All[ndx1,ndx1] <- marg.var[i] *
+                fields::Matern(dij, range = ranges[i],
+                               smoothness = marg.smoothness[i])
+        }
+        else {
+            ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
+            ndx2 <- ((j-1)*n.spatial.xy^2+1):(n.spatial.xy^2*j)
+            dij <- fields::rdist(locs.list[[i]], locs.list[[j]])
+            vii <- marg.smoothness[i]
+            vjj <- marg.smoothness[j]
+            vij <- (vii+vjj)/2
+            aii <- 1/ranges[i]
+            ajj <- 1/ranges[j]
+            aij <- sqrt((aii^2+ajj^2)/2)
+            Sigma.All[ndx1,ndx2] <- rho[i,j] * sqrt(marg.var[i]) *
+                sqrt(marg.var[j]) * aii^vii * ajj^vjj * gamma(vij) /
+                (aij^(2*vij) * sqrt(gamma(vii) * gamma(vjj))) *
+                fields::Matern(dij, smoothness=vij, alpha=aij)
+            Sigma.All[ndx2,ndx1] <- t(Sigma.All[ndx1,ndx2])
+        }
+    }
+}
+
+L.C <- chol(Sigma.All)
+
+st.error <- rnorm(n.spatial.xy^2 * ny) %*% L.C
+
+nug.error <- NULL
+for (i in 1:ny) {
+    nug.error <- c(nug.error, nuggets[i]*rnorm(n.spatial.xy^2))
+}
+
+y.list <- list()
+for (i in 1:ny) {
+    ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
+    y.list[[i]] <- st.error[ndx1] + nug.error[ndx1]
+}
+
+rm(ny, n.spatial.xy, i, j, n.rho,
+   loc1, loc2, ndx1, ndx2, dij, vii, vjj, vij, aii, ajj, aij, L.C,
+   st.error, nug.error, rho, rho.vec, ranges, Sigma.All, nuggets,
+   marg.smoothness, marg.var)
diff --git a/tests/testthat/test-Log_Likelihood.R b/tests/testthat/test-Log_Likelihood.R
@@ -81,6 +81,70 @@ test_that("create.initial.values.flex", {
   expect_equal(c(-0.105, -0.105, -0.693, -0.693, -1.386, -1.386, -2.303, -2.303, 0.584), logparams, tolerance=1e-2)
 })
 
+test_that("negloglik.full", {
+    set.seed(1234)
+
+    y.orig <- rnorm(100)
+
+    locs.dim <- seq(1,10,length=sqrt(length(y.orig)))
+    locs <- as.matrix(expand.grid(locs.dim, locs.dim))
+
+    covmat.true <- Matern(rdist(locs), range=1, smoothness=0.5) + diag(100)
+
+    y <- y.orig %*% chol(covmat.true)
+    y <- as.vector(y)
+
+    params.init <- rep(NA, 4)
+    params.init[1] <- 0.9*var(y)
+    params.init[2] <- 1
+    params.init[3] <- 0.5
+    params.init[4] <- 0.1*var(y)
+
+    params.init <- create.initial.values.flex(params.init[1],
+                                              params.init[2],
+                                              params.init[3],
+                                              params.init[4],
+                                              1, 1)
+
+    d <- rdist(locs)
+
+    res.optim.NM <- optim(par=params.init, fn=negloglik.full, locs=locs, y=y,
+                          control=list(maxit=5000))
+
+    LL.full <- negloglik.full(res.optim.NM$par, locs, y)
+
+    params.final <- c(exp(res.optim.NM$par[1:2]),
+                      gtools::inv.logit(res.optim.NM$par[3], 0, 2.5),
+                      exp(res.optim.NM$par[4]))
+
+    pgp.params <- PrestoGP:::create.initial.values.flex(params.final[1],
+                                                    params.final[2],
+                                                    params.final[3],
+                                                    params.final[4],
+                                                    1, 1)
+    pseq <- create.param.sequence(1)
+
+    LL.full.pgp <- mvnegloglik.full(pgp.params, list(locs), y, pseq)
+
+    vec.approx <- vecchia_specify(locs, 99)
+
+    LL.vecchia <- -1*vecchia_likelihood(y, vec.approx, params.final[1:3],
+                                        params.final[4])
+
+    vec.approx.pgp <- vecchia_Mspecify(list(locs), 99)
+    vec.U.pgp <- createUMultivariate(vec.approx.pgp, c(params.final, 1))
+
+    LL.pgp <- -1*GPvecchia:::vecchia_likelihood_U(y, vec.U.pgp)
+
+    expect_equal(173.315, LL.full, tolerance=1e-3)
+# Univariate likelihood should equal the multivariate likelihood
+    expect_equal(LL.full, LL.full.pgp, tolerance=1e-3)
+# Full likelihood should equal both the univariate and multivariate
+# Vecchia approximations
+    expect_equal(LL.full, LL.vecchia, tolerance=1e-3)
+    expect_equal(LL.full, LL.pgp, tolerance=1e-3)
+})
+
 test_that("mvnegloglik", {
     source("sim_multivariate_big.R")
     P <- 3
@@ -100,6 +164,54 @@ test_that("mvnegloglik", {
     expect_equal(34474.4, neg_likelihood, tolerance=1e-2)
 })
 
+test_that("mvnegloglik.full", {
+    source("sim_multivariate_lik.R")
+
+    pseq <- create.param.sequence(3)
+
+    param.marg.var <- 0.9*unlist(lapply(y.list, var))
+    param.marg.scale <- rep(1,3)
+    param.marg.smooth <- rep(0.5,3)
+    param.marg.nugget <- 0.1*unlist(lapply(y.list, var))
+    param.rho <- rep(0.5, 3)
+    params.init <- create.initial.values.flex(param.marg.var,
+                                              param.marg.scale,
+                                              param.marg.smooth,
+                                              param.marg.nugget,
+                                              param.rho, 3)
+
+    res.optim.NM<- optim(par = params.init,fn=mvnegloglik.full,
+                         locs=locs.list, y=unlist(y.list),
+                         param.seq = pseq,
+                         method = "Nelder-Mead",
+                         control = list(trace = 0, maxit = 10000, reltol=1e-4))
+
+    LL.full.mv <- mvnegloglik.full(res.optim.NM$par, locs.list,
+                                   unlist(y.list), pseq)
+
+    param.seq.begin <- pseq[,1]
+    param.seq.end   <- pseq[,2]
+    params.init.final <- res.optim.NM$par
+    params.init.final.t <- c(exp(params.init.final[param.seq.begin[1]:
+                                                   param.seq.end[2]]),
+                             gtools::inv.logit(params.init.final[
+                                         param.seq.begin[3]:
+                                         param.seq.end[3]],0,2.5),
+                             exp(params.init.final[param.seq.begin[4]:
+                                                   param.seq.end[4]]),
+                             tanh(params.init.final[
+                                 param.seq.begin[5]:param.seq.end[5]]))
+
+    vec.mapprox <- vecchia_Mspecify(locs.list, length(unlist(y.list))-1)
+    U.mobj <- createUMultivariate(vec.mapprox, params.init.final.t)
+
+    LL.vecchia.mv <- -1*GPvecchia:::vecchia_likelihood_U(unlist(y.list), U.mobj)
+
+    expect_equal(541.31, LL.full.mv, tolerance=1e-3)
+# Full likelihood should equal the Vecchia likelihood
+    expect_equal(LL.full.mv, LL.vecchia.mv, tolerance=1e-3)
+})
+
 #TODO implement this test
 test_that("cat.covariances", {
   set.seed(7919)
-Original file line number
+Diff line change
@@ Expand Up / @@ -85,3 +85,5 @@ data @@
     # Rcpp compiled binary files
     src/*.so
     src/*.o
+    /doc/
+    /Meta/