diff --git a/R/xts.methods.R b/R/xts.methods.R index da4c1845..e8bc1675 100644 --- a/R/xts.methods.R +++ b/R/xts.methods.R @@ -283,19 +283,7 @@ window_idx <- function(x, index. = NULL, start = NULL, end = NULL) # We get back upper bound of index as per findInterval tmp <- base_idx[firstlast] - # Iterate in reverse to grab all matches - # We have to do this to handle duplicate dates in the xts index. - tmp <- rev(tmp) - res <- NULL - for(i in tmp) { - dt <- idx[i] - j <- i - repeat { - res <- c(res, j) - j <- j -1 - if(j < 1 || idx[j] != dt) break - } - } + res <- .Call("fill_window_dups_rev", tmp, .index(x), PACKAGE = "xts") firstlast <- rev(res) } diff --git a/inst/benchmarks/benchmark.subset.R b/inst/benchmarks/benchmark.subset.R index 4b82e596..c1673b5e 100644 --- a/inst/benchmarks/benchmark.subset.R +++ b/inst/benchmarks/benchmark.subset.R @@ -1,13 +1,13 @@ +stopifnot(require("xts")) stopifnot(require("microbenchmark")) # Benchmark [.xts using ISO8601 range on large objects N <- 2e7 -secPerYr <- 86400*365 -x <- xts::.xts(1:N, 1.0*seq(secPerYr*20, secPerYr*40, length.out = N)) -rng <- "1990/2000" +s <- 86400*365.25 +x <- .xts(1:N, 1.0*seq(s*20, s*40, length.out = N), tzone = "UTC") # warmup, in case there's any JIT for (i in 1:2) { - x[rng,] + x["1999/2001",] } profile <- FALSE @@ -20,5 +20,32 @@ if (profile) { Rprof(NULL) print(srp <- summaryRprof()) } else { - microbenchmark(x[rng,], times = 10) + cat("Subset using ISO-8601 range\n") + microbenchmark(x["1990",], x["1990/",], x["/2009",], + x["1990/1994",], x["1990/1999",], x["1990/2009",], times = 5) } + +cat("Subset using integer vector\n") +i001 <- seq(1, N, 1) +i005 <- seq(1, N, 5) +i010 <- seq(1, N, 10) +i050 <- seq(1, N, 50) +i100 <- seq(1, N, 100) +microbenchmark(x[i001,], x[i005,], x[i010,], x[i050,], x[i100,], times = 5) + +cat("Subset using logical vector\n") +l001 <- l005 <- l010 <- l050 <- l100 <- logical(N) +l001[i001] <- TRUE +l005[i005] <- TRUE +l010[i010] <- TRUE +l050[i050] <- TRUE +l100[i100] <- TRUE +microbenchmark(x[l001,], x[l005,], x[l010,], x[l050,], x[l100,], times = 5) + +cat("Subset using date-time vector\n") +t001 <- index(x)[i001] +t005 <- index(x)[i005] +t010 <- index(x)[i010] +t050 <- index(x)[i050] +t100 <- index(x)[i100] +microbenchmark(x[t001,], x[t005,], x[t010,], x[t050,], x[t100,], times = 5) diff --git a/src/binsearch.c b/src/binsearch.c index 1cc6f5f6..124a750b 100644 --- a/src/binsearch.c +++ b/src/binsearch.c @@ -151,3 +151,33 @@ SEXP binsearch(SEXP key, SEXP vec, SEXP start) return ScalarInteger(lo); } + +SEXP fill_window_dups_rev(SEXP _x, SEXP _index) +{ + /* Translate user index (_x) to xts index (_index). '_x' contains the + * upper bound of the location of the user index in the xts index. + * This is necessary to handle duplicate dates in the xts index. + */ + int n_x = length(_x); + double *x = REAL(_x); + double *index = REAL(_index); + + SEXP _out = PROTECT(allocVector(INTSXP, length(_index))); + int *out = INTEGER(_out); + + /* Loop over locations in _x in reverse order */ + int i, xi, j, k = 0; + for (i = n_x; i > 0; i--) { + xi = x[i-1]; + j = xi; + do { + out[k++] = j--; + } while (j > 0 && index[xi-1] == index[j-1]); + } + + /* truncate so length(_out) = k + * NB: output is in reverse order! + */ + UNPROTECT(1); + return lengthgets(_out, k); +}