Make large DOF infinite.

Change QMC routne for qrng with scrambling
lbelzile · Jul 8, 2024 · dcb3ead · dcb3ead
1 parent b0c8640
commit dcb3ead
Show file tree

Hide file tree

Showing 17 changed files with 76 additions and 73 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: TruncatedNormal
 Type: Package
 Title: Truncated Multivariate Normal and Student Distributions
-Version: 2.2.4.0002
+Version: 2.3
 Authors@R: c(person(given="Zdravko", family="Botev", role = "aut", email = "botev@unsw.edu.au", comment = c(ORCID = "0000-0001-9054-3452")), person(given="Leo", family="Belzile", role = c("aut", "cre"), email = "belzilel@gmail.com", comment = c(ORCID = "0000-0002-9135-014X")))
 Description: A collection of functions to deal with the truncated univariate and multivariate normal and Student distributions, described in Botev (2017) <doi:10.1111/rssb.12162> and Botev and L'Ecuyer (2015) <doi:10.1109/WSC.2015.7408180>.
 License: GPL-3
@@ -10,13 +10,14 @@ BugReports:
 Depends: R (>= 2.10)
 Imports: 
     nleqslv, 
-    randtoolbox, 
+    qrng,
+    spacefillr, 
     alabama, 
     Rcpp (>= 0.12.16)
 LinkingTo: 
     Rcpp, 
     RcppArmadillo
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 VignetteBuilder: knitr
 Encoding: UTF-8
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -29,7 +29,8 @@ export(tregress)
 importFrom(Rcpp,evalCpp)
 importFrom(alabama,"auglag")
 importFrom(nleqslv,nleqslv)
-importFrom(randtoolbox,sobol)
+importFrom(qrng,sobol)
+importFrom(spacefillr,generate_sobol_owen_set)
 importFrom(stats,"dnorm")
 importFrom(stats,"pnorm")
 importFrom(stats,"pt")

diff --git a/NEWS.md b/NEWS.md
@@ -1,18 +1,18 @@
-## Version 2.4
+## Version 2.3
 
 ### Bug fixes
 
+- `rtnorm` now checks arguments length and throw an error rather than try to recycle mean and standard deviation vectors (issue #6).
 - `rtnorm` recycles arguments when `ltrunc` or `rtrunc` are length 1.
-- `*tmvt` and `*tmvnorm` now check that the scale matrix `sigma` is symmetric, positive-definite and non-degenerate (issue #8)
+- `*tmvt` and `*tmvnorm` now check that the scale matrix `sigma` is symmetric, positive-definite and non-degenerate (issue #8) by default.
 - `rmvnorm` returns a vector rather than an 1 by n matrix for the unidimensional setting.
+- Degrees of freedom larger than 350 are not treated as infinite, as the code suffers from overflow and returns missing values otherwise.
 
 ### Changes
 
 - The package uses `tinytest` rather than `testthat` for unit tests.
-
-## Version 2.3
-
-- `rtnorm` now checks arguments length and throw an error rather than try to recycle mean and standard deviation vectors.
+- Since scrambling is disabled in `randtoolbox` (throwing warning messages), the package now relies on the `qrng` package and the Owen scrambling method from `spacefillr` package for quasi Monte Carlo.
+- Added alias in package documentation, as requested by Kurt Hornik.
 
 ## Version 2.2.2
 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -54,7 +54,7 @@ lnNpr <- function(a, b, check = TRUE) {
     .Call(`_TruncatedNormal_lnNpr`, a, b, check)
 }
 
-#' Cholesky matrix decomposition with GGE ordering
+#' Cholesky matrix decomposition with GB ordering
 #' 
 #' This function computes the Cholesky decomposition of a covariance matrix
 #' \code{Sigma} and returns a list containing the permuted bounds for integration. 

diff --git a/R/mvNqmc.R b/R/mvNqmc.R
@@ -5,10 +5,11 @@
 #' where \eqn{X} is a zero-mean multivariate normal vector
 #' with covariance matrix \eqn{\Sigma}, that is, \eqn{X} is drawn from \eqn{N(0,\Sigma)}.
 #' Infinite values for vectors \eqn{u} and \eqn{l} are accepted.
-#' The Monte Carlo method uses sample size \eqn{n}: 
+#' The Monte Carlo method uses sample size \eqn{n}:
 #' the larger \eqn{n}, the smaller the relative error of the estimator.
 #'
 #' @inheritParams mvNcdf
+#' @importFrom spacefillr generate_sobol_owen_set
 #' @details Suppose you wish to estimate Pr\eqn{(l<AX<u)},
 #'  where \eqn{A} is a full rank matrix
 #'  and \eqn{X} is drawn from \eqn{N(\mu,\Sigma)}, then you simply compute
@@ -37,10 +38,10 @@
 #' @export
 #' @keywords internal
 #' @examples
-#' d <- 15 
+#' d <- 15
 #' l <- 1:d
 #' u <- rep(Inf, d)
-#' Sig <- matrix(rnorm(d^2), d, d)*2 
+#' Sig <- matrix(rnorm(d^2), d, d)*2
 #' Sig <- Sig %*% t(Sig)
 #' mvNqmc(l, u, Sig, 1e4) # compute the probability
 mvNqmc <- function(l, u, Sig, n = 1e5){
@@ -53,10 +54,10 @@ mvNqmc <- function(l, u, Sig, n = 1e5){
       return(list(prob = exp(lnNpr(a = l / sqrt(Sig[1]), b = u / sqrt(Sig[1]))), err = NA, relErr = NA, upbnd = NA))
     }
     # Cholesky decomposition of matrix
-    out <- cholperm(Sig, l, u) 
-    L <- out$L 
-    l <- out$l 
-    u <- out$u 
+    out <- cholperm(Sig, l, u)
+    L <- out$L
+    l <- out$l
+    u <- out$u
     D <- diag(L)
     if (any(D < 1e-10)){
       warning('Method may fail as covariance matrix is singular!')
@@ -67,11 +68,11 @@ mvNqmc <- function(l, u, Sig, n = 1e5){
     diag(L) <- rep(0, d) # remove diagonal
   # find optimal tilting parameter via non-linear equation solver
   x0 <- rep(0, 2 * length(l) - 2)
-  solvneq <- nleqslv::nleqslv(x0, 
-                              fn = gradpsi, 
+  solvneq <- nleqslv::nleqslv(x0,
+                              fn = gradpsi,
                               jac = jacpsi,
-                              L = L, l = l, u = u, 
-                              global = "pwldog", 
+                              L = L, l = l, u = u,
+                              global = "pwldog",
                               method = "Newton",
                               control = list(maxit = 500L))
   xmu <- solvneq$x
@@ -82,7 +83,7 @@ mvNqmc <- function(l, u, Sig, n = 1e5){
   }
   # assign saddlepoint x* and mu*
   x <- xmu[1:(d-1)]
-  mu <- xmu[d:(2*d-2)] 
+  mu <- xmu[d:(2*d-2)]
   # check the constraints
   if(any((out$L %*% c(x,0) - out$u)[-d] > 0, (-out$L %*% c(x,0) + out$l)[-d] > 0)){
    warning("Solution to exponential tilting problem using Powell's dogleg method \n  does not lie in convex set l < Lx < u.")
@@ -103,9 +104,9 @@ mvNqmc <- function(l, u, Sig, n = 1e5){
                   control.outer = list(trace = FALSE,method="nlminb"))
     if(solvneqc$convergence == 0){
       x <- solvneqc$par[1:(d-1)]
-      mu <- solvneqc$par[d:(2*d-2)] 
+      mu <- solvneqc$par[d:(2*d-2)]
     } else{
-      stop('Did not find a solution to the nonlinear system in `mvNqmc`!') 
+      stop('Did not find a solution to the nonlinear system in `mvNqmc`!')
     }
   }
   p <- rep(0, 12)

diff --git a/R/mvTcdf.R b/R/mvTcdf.R
@@ -42,7 +42,7 @@
 #' @export
 #' @keywords internal
 #' @author \code{Matlab} code by Zdravko Botev, \code{R} port by Leo Belzile
-#' @importFrom randtoolbox sobol
+#' @importFrom qrng sobol
 mvTcdf <- function(l, u, Sig, df, n = 1e5){
   d <- length(l)
   if (length(u) != d | d != sqrt(length(Sig)) | any(l > u)) {

diff --git a/R/mvnprqmc.R b/R/mvnprqmc.R
@@ -9,14 +9,17 @@ mvnprqmc <- function(n, L, l, u, mu){
     if(n*(d-1) > 2e7){
       warning("High memory requirements for storage of QMC sequence\nConsider reducing n")
     }
-    x <- as.matrix(randtoolbox::sobol(n, dim = d-1, init =TRUE, scrambling = 1, seed=ceiling(1e6*runif(1))))
+    x <- as.matrix(qrng::sobol(n,
+                               d = d-1,
+                               randomize = "Owen",
+                               seed = ceiling(1e6*runif(1))))
     ## Similar option in fOptions package. Problem: sobol sequence can overflow (values above 1).
     # x <- qrng::sobol(n = n, d = d - 1, randomize = TRUE)
     p <- 0
     for (k in 1:(d-1)){
       # compute matrix multiplication L*Z
       if(k > 1){
-        col <- crossprod(L[k, 1:(k-1)], Z[1:(k-1),]) 
+        col <- crossprod(L[k, 1:(k-1)], Z[1:(k-1),])
       } else{
         col <- rep(0, n)
       }

diff --git a/R/mvtprqmc.R b/R/mvtprqmc.R
@@ -10,8 +10,11 @@ mvtprqmc <- function(n, L, l, u, nu, mu){
   if(n*(d-1) > 2e7){
     warning("High memory requirements for storage of QMC sequence\nConsider reducing n")
   }
-  x <- as.matrix(randtoolbox::sobol(n, dim = d - 1, init = TRUE, scrambling = 1, seed = ceiling(1e6 * runif(1))))
-  # x <- as.matrix(qrng::sobol(n = n, d = d - 1, randomize = TRUE))
+  # x <- as.matrix(randtoolbox::sobol(n, dim = d - 1, init = TRUE, scrambling = 1, seed = ceiling(1e6 * runif(1))))
+  x <- as.matrix(qrng::sobol(n = n,
+                             d = d - 1,
+                             randomize = "digital.shift",
+                             seed = ceiling(1e6 * runif(1))))
   #Fixed 21.03.2018 to ensure that if d=2, no error returned
   # Monte Carlo uses 'n' samples;
   # precompute constants

diff --git a/R/tmvt.R b/R/tmvt.R
@@ -145,7 +145,7 @@ ptmvt <- function(q, mu, sigma, df, lb, ub, type = c("mc", "qmc"), log = FALSE,
   }
   type <- match.arg(type)
   df <- as.vector(df)[1]
-  if(isTRUE(all.equal(df, 0)) || isTRUE(all.equal(df, Inf))){
+  if(isTRUE(all.equal(df, 0)) || df > 350){
     return(ptmvnorm(q = q, mu = mu, sigma = sigma, lb = lb, ub = ub, B = B, log = log, type = type))
   }
   stopifnot(df > 0, length(mu) == ncol(sigma), nrow(sigma) == ncol(sigma), is.logical(log))

diff --git a/inst/tinytest/test_TruncatedNormal.R b/inst/tinytest/test_TruncatedNormal.R
@@ -3,26 +3,31 @@ library(tinytest)
 lb <- c(0, 0)
 ub <- c(740.0, 76.2)
 mu <- c(344.31293403, 62.6937066)
-sigma <- matrix(c(36407.0005966, -1167.50805662, -1167.50805662, 290.76915744), 2, 2)
+sigma <- matrix(c(36407.0005966, -1167.50805662, -1167.50805662, 290.76915744),
+                nrow = 2, ncol = 2)
 df1 <- 3
 df2 <- 300
 x = c(100, 50)
 set.seed(1234)
 
-## "Truncated normal DF (MC versus QMC) give similar answers" 
+## "Truncated normal DF (MC versus QMC) give similar answers"
 
-  expect_equal(ptmvnorm(x, mu=mu, sigma=sigma, lb=lb, ub=ub, log=FALSE, type="qmc"),
-                         ptmvnorm(x, mu=mu, sigma=sigma, lb=lb, ub=ub, log=FALSE, type="mc"), 
+  expect_equal(
+    ptmvnorm(x, mu=mu, B = 1e5, sigma=sigma, lb=lb, ub=ub, log=FALSE, type="qmc"),
+    ptmvnorm(x, mu=mu, B = 1e5, sigma=sigma, lb=lb, ub=ub, log=FALSE, type="mc"),
                          tolerance = 1e-4)
-  expect_equal(ptmvt(x, B = 1e5, sigma=sigma, df = 2, lb=lb, ub=ub, log=FALSE, type="mc"),
-                         ptmvt(x, B = 1e5, sigma=sigma, lb=lb, ub=ub, df = 2, log=FALSE, type="qmc"), 
+  expect_equal(
+    ptmvt(x, B = 1e5, sigma=sigma, df = 2, lb=lb, ub=ub, log=FALSE, type="mc"),
+    ptmvt(x, B = 1e5, sigma=sigma, lb=lb, ub=ub, df = 2, log=FALSE, type="qmc"),
                          tolerance = 3e-3)
 
 
 ## "Student with large df gives same answer as normal"
-
-  expect_equal(ptmvnorm(x, B = 1e6, sigma=sigma, lb=lb, ub=ub, log=FALSE, type="qmc"),
-                         ptmvt(x, B = 1e6, sigma=sigma, lb=lb, ub=ub, df = 300, log=FALSE, type="qmc"), 
+
+  expect_equal(
+    ptmvnorm(x, B = 1e6, sigma=sigma, lb=lb, ub=ub, log=FALSE, type="qmc"),
+    ptmvt(x, B = 1e6, sigma=sigma, lb=lb, ub=ub, df = 300, log=FALSE,
+          type="mc"),
                          tolerance = 2e-3)
 
 
@@ -35,7 +40,7 @@ prob <- pmvnorm(lb = lower, ub = upper, mu = mean, sigma = corr)
 
 
 ## "Univariate probabilities",{
-  
+
   expect_equivalent(pmvnorm(lb = -Inf, ub = 3, mu = 2, sigma = 1), pnorm(3, mean = 2))
   expect_equivalent(pmvt(lb = -Inf, ub = 3, df = 2, mu = 0, sigma = 1), pt(3, 2))
 
@@ -58,7 +63,7 @@ D <- 10
 muV <- 1:D
 Smat <- diag(0.5, D) + matrix(0.5, D, D)
 ## "Expectation of (truncated) elliptical distributions"
-  
+
   expect_equal(colMeans(rtmvnorm(n = B, sigma = Smat)),
                          rep(0, D), tolerance = 5/sqrt(B))
   expect_equal(colMeans(rtmvnorm(n = B, mu = muV, sigma = Smat)),
@@ -69,22 +74,22 @@ Smat <- diag(0.5, D) + matrix(0.5, D, D)
   expect_equal(colMeans(rtmvt(n = B, lb = (1:D)/D, df = 3, ub = 2*(1:D)/D, mu = rep(0,D), sigma = diag(1, D))),
                          mean_tt(lb = (1:D)/D, ub = 2*(1:D)/D, df = 3),
                          tolerance = 5/sqrt(B))
-  
+
 
 
 lb <- rnorm(n = D, mean = 0, sd = 10)
 ub <- lb + rgamma(D, shape = 4, rate = 1)
 ## "Bounds of simulated variables"
-  
+
   expect_true(isTRUE(all(apply(rtmvnorm(n = 1e4, lb = lb, ub = ub, mu = muV, 100*Smat), 2, min) > lb)))
   expect_true(isTRUE(all(apply(rtmvnorm(n = 1e4, lb = lb, ub = ub, mu = muV, 100*Smat), 2, min) < ub)))
   expect_true(isTRUE(all(apply(rtmvt(n = 1e4, df = 3, lb = lb, ub = ub, mu = muV, 100*Smat), 2, min) > lb)))
   expect_true(isTRUE(all(apply(rtmvt(n = 1e4, df = 3, lb = lb, ub = ub, mu = muV, 100*Smat), 2, min) < ub)))
-  
+
 
 
 ## "Bounds on distribution function beyond truncation points"
-  
+
   expect_equal(ptmvnorm(q = ub + runif(D), lb = lb, ub = ub, mu = muV, Smat), 1)
   expect_equal(ptmvt(q = ub + runif(D), df = 2, lb = lb, ub = ub, mu = muV, Smat), 1)
   expect_equal(ptmvnorm(q = lb + c(-1, runif(D-1)), lb = lb, ub = ub, mu = muV/D, Smat), 0)
@@ -93,27 +98,29 @@ ub <- lb + rgamma(D, shape = 4, rate = 1)
 
 pt <- rnorm(D)
 ## "Untruncated density agrees with that in the mvtnorm package"
-  
-  expect_equal(mvtnorm::dmvnorm(x = pt, mean = muV, sigma = Smat, log = TRUE), 
+
+  expect_equal(mvtnorm::dmvnorm(x = pt, mean = muV, sigma = Smat, log = TRUE),
                          TruncatedNormal::dtmvnorm(x = pt, mu = muV, lb = rep(-Inf, D), ub = rep(Inf, D), sigma = Smat, log = TRUE))
-  expect_equal(mvtnorm::dmvt(x = pt, df = 2, delta = muV, sigma = Smat, log = TRUE), 
+  expect_equal(mvtnorm::dmvt(x = pt, df = 2, delta = muV, sigma = Smat, log = TRUE),
                          TruncatedNormal::dtmvt(x = pt, df = 2, mu = muV, lb = rep(-Inf, D), ub = rep(Inf, D), sigma = Smat, log = TRUE))
 
 
 
 pt <- rnorm(D)
 ## "Untruncated density agrees with that in the mvtnorm package"
-  
-  expect_equal(mvtnorm::dmvnorm(x = pt, mean = muV, sigma = Smat, log = TRUE), 
+
+  expect_equal(mvtnorm::dmvnorm(x = pt, mean = muV, sigma = Smat, log = TRUE),
                          TruncatedNormal::dtmvnorm(x = pt, mu = muV, lb = rep(-Inf, D), ub = rep(Inf, D), sigma = Smat, log = TRUE))
-  expect_equal(mvtnorm::dmvt(x = pt, df = 2, delta = muV, sigma = Smat, log = TRUE), 
+  expect_equal(mvtnorm::dmvt(x = pt, df = 2, delta = muV, sigma = Smat, log = TRUE),
                          TruncatedNormal::dtmvt(x = pt, df = 2, mu = muV, lb = rep(-Inf, D), ub = rep(Inf, D), sigma = Smat, log = TRUE))
 
 
 d <- 15
 sigma <- 0.5 * (diag(d) + matrix(1, d, d))
 ## "Known probability"
-
-  expect_equivalent((d+1)*pmvnorm(sigma = sigma, lb = rep(0, d), type = "qmc", B = B),
-  1, tolerance = 1/sqrt(B))
-
+est <- pmvnorm(sigma = sigma, lb = rep(0, d), type = "qmc", B = 1e6)
+expect_equivalent(
+  (d+1)*as.numeric(est),
+  1,
+  tolerance = attr(est, "relerr")*(d+1)*2)
+
diff --git a/man/dot-cholpermGB.Rd b/man/dot-cholpermGB.Rd
diff --git a/man/man.Rproj b/man/man.Rproj
diff --git a/man/mvNqmc.Rd b/man/mvNqmc.Rd
diff --git a/src/RcppExports.o b/src/RcppExports.o
diff --git a/src/TruncatedNormal.so b/src/TruncatedNormal.so
diff --git a/src/densities.o b/src/densities.o
diff --git a/src/lnNpr_cholperm_Phinv.o b/src/lnNpr_cholperm_Phinv.o