Skip to content

Commit

Permalink
fix confidence bands when length(x) != length(y); PR#18562
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.r-project.org/R/trunk@84800 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
maechler committed Jul 31, 2023
1 parent 64b28a5 commit 5cecae8
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 11 deletions.
4 changes: 4 additions & 0 deletions doc/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@
where a restriction to \code{INT_MAX} bytes is documented, without
doing a partial read unlike Linux). The payload is now split into
1Gb chunks to avoid that problem. (\PR{18571})
\item \code{qqplot(x,y, conf.level=.)} gives better confidence bounds
when \code{length(x) != length(y)}, thanks to Alexander Ploner's
report and patch proposal (\PR{18557}).
}
}
}
Expand Down
28 changes: 18 additions & 10 deletions src/library/stats/R/qqplot.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# File src/library/stats/R/qqplot.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2022 The R Core Team
# Copyright (C) 1995-2023 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -23,13 +23,15 @@ qqplot <- function(x, y, plot.it = TRUE,
conf.args = list(exact = NULL, simulate.p.value = FALSE,
B = 2000, col = NA, border = NULL))
{
sx <- sort(x)
sy <- sort(y)
force(xlab) ; force(ylab) # get defaults explicitly before touching x, y
## Sort x, y in place without length adjustment
x <- sx <- sort(x)
y <- sy <- sort(y)
lenx <- length(sx)
leny <- length(sy)
if( leny < lenx )
sx <- approx(1L:lenx, sx, n = leny)$y
if( leny > lenx )
else if( leny > lenx )
sy <- approx(1L:leny, sy, n = lenx)$y

if (is.null(conf.level)) {
Expand All @@ -39,7 +41,7 @@ qqplot <- function(x, y, plot.it = TRUE,
}

if (conf.level < 0 || conf.level > 1)
stop("'conf.level' is not a probability")
stop("'conf.level' is not a probability")

N <- lenx + leny
if (is.null(conf.args$exact)) conf.args$exact <- NULL
Expand All @@ -58,17 +60,23 @@ qqplot <- function(x, y, plot.it = TRUE,
exact = exact && !simulate,
simulate = simulate, B = conf.args$B)

### Switzer (1976, Biometrika) 10.1093/biomet/63.1.13
### Switzer (1976, Biometrika) 10.1093/biomet/63.1.13, eq. 4 + 5
### NB, all indices are in terms of sorted x, y
i <- as.double(seq_along(x))
Re <- ceiling(i * N / lenx) - i
Rl <- ceiling(i * N / lenx - ca * as.double(leny)) - i
Rl[Rl < 1] <- NA
Rl[Rl > leny] <- NA
Rr <- floor(i * N / lenx - leny / lenx + ca * as.double(leny)) - i + 1
Rr[Rr < 1] <- NA
Rr[Rr > leny] <- NA
lwr <- sy[Rl]
upr <- sy[Rr]
## Indices refer to original sorted data
lwr <- y[Rl]
upr <- y[Rr]
## Reduce length of confidence bands when needed
if (leny < lenx) {
lwr <- approx(x, lwr, xout = sx)$y
upr <- approx(x, upr, xout = sx)$y
}
ret <- list(x = sx, y = sy, lwr = lwr, upr = upr)

if (plot.it) {
Expand All @@ -90,5 +98,5 @@ qqplot <- function(x, y, plot.it = TRUE,
points(sx, sy, ...)
}

return(invisible(ret))
invisible(ret)
}
16 changes: 15 additions & 1 deletion tests/reg-tests-1e.R
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,9 @@ cls <- c("raw", "logical", "integer", "numeric", "complex",
names(cls) <- cls
be <- baseenv()
asF <- lapply(cls, \(cl) be[[paste0("as.",cl)]] %||% be[[cl]])
obs <- lapply(cls, \(cl) asF[[cl]](switch(cl, "difftime" = "2:1:0", "noquote" = letters, "numeric_version" = as.character(1:2), 1:2)))
## objects of the respective class:
obs <- lapply(cls, \(cl) asF[[cl]](switch(cl, "difftime" = "2:1:0", "noquote" = letters,
"numeric_version" = as.character(1:2), 1:2)))
asDF <- lapply(cls, \(cl) getVaW(be[[paste0("as.data.frame.", cl)]](obs[[cl]])))
r <- local({ g <- as.data.frame.logical; f <- function(L=TRUE) g(L)
getVaW(f()) })
Expand Down Expand Up @@ -734,6 +736,18 @@ stopifnot(is.list(d2), identical(unlist(unname(d2)), 1:3))
## gave Error .. sys.call(-1L)[[1L]] .. comparison (!=) is possible only ..


## qqplot(x,y, *) confidence bands for unequal sized x,y, PR#18570:
x <- (7:1)/8; y <- (1:63)/64
r <- qqplot(x,y, plot.it=FALSE, conf.level = 0.90)
r2<- qqplot(y,x, plot.it=FALSE, conf.level = 0.90)
(d <- 64 * as.data.frame(r)[,3:4])
stopifnot(identical(d, data.frame(lwr = c(NA, NA, NA, 6, 15, 24, 33),
upr = c(31, 40, 49, 58, NA, NA, NA))),
identical(8 * as.data.frame(r2[3:4]),
data.frame(lwr = c(NA,NA,NA, 1:4 +0), upr = c(4:7 +0, NA,NA,NA))))
## lower and upper confidence bands were nonsensical in R <= 4.3.1



## keep at end
rbind(last = proc.time() - .pt,
Expand Down

0 comments on commit 5cecae8

Please sign in to comment.