Skip to content

Commit

Permalink
Correct endpoints for sparse daily data
Browse files Browse the repository at this point in the history
endpoints() uses POSIXlt elements to determine the period breakpoints
for some values of 'on' (quarters, months, and days). This can be a
problem for sparse data, when there are contiguous observations that
have the same POSIXlt element used for aggregation, but different
years.

For each of the 3 cases where POSIXlt elements are used, ensure the
endpoints always vary by year, even if the POSIXlt element being used
does not vary across observations.

Fixes #169.
  • Loading branch information
joshuaulrich committed May 3, 2017
1 parent e714046 commit 775f61a
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
10 changes: 6 additions & 4 deletions R/endpoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,14 @@ function(x,on='months',k=1) {
as.integer(c(0, which(diff(posixltindex$year %/% k + 1) != 0), NR))
},
"quarters" = {
xi <- (posixltindex$mon%/%3) + 1
as.integer(c(0,which(diff(xi) != 0),NR))
ixqtr <- posixltindex$year * 100L + 190000L + posixltindex$mon %/% 3 + 1
as.integer(c(0,which(diff(ixqtr) != 0),NR))
},
"months" = {
#as.integer(c(0, which(diff(posixltindex$mon %/% k + 1) != 0), NR) )
# x[which(diff(as.POSIXlt(index(x))$mon) != 0)[seq(0,328,12)]]
ep <- .Call("endpoints", posixltindex$mon, 1L, 1L, addlast, PACKAGE='xts')
ixmon <- posixltindex$year * 100L + 190000L + posixltindex$mon
ep <- .Call("endpoints", ixmon, 1L, 1L, addlast, PACKAGE='xts')
if(k > 1)
ep[seq(1,length(ep),k)]
else ep
Expand All @@ -68,7 +69,8 @@ function(x,on='months',k=1) {
"days" = {
#as.integer(c(0, which(diff(.index(x) %/% 86400L %/% k + 1) != 0), NR))
#as.integer(c(0, which(diff(posixltindex$yday %/% k + 1) != 0), NR))
.Call("endpoints", posixltindex$yday, 1L, k, addlast, PACKAGE='xts')
ixyday <- posixltindex$year * 1000L + 1900000L + posixltindex$yday
.Call("endpoints", ixyday, 1L, k, addlast, PACKAGE='xts')
},
# non-date slicing should be indifferent to TZ and DST, so use math instead
"hours" = {
Expand Down
27 changes: 27 additions & 0 deletions inst/unitTests/runit.endpoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,30 @@ test.years_integer_index <- function() {
}
#}}}

# sparse endpoints could be a problem with POSIXlt elements (#169)
# TODO: sparse intraday endpoints
test.sparse_years <- function() {
x <- xts(2:6, as.Date(sprintf("199%d-06-01", 2:6)))
ep <- endpoints(x, "years")
checkIdentical(ep, 0:5)
}
test.sparse_quarters <- function() {
x <- xts(2:6, as.Date(sprintf("199%d-06-01", 2:6)))
ep <- endpoints(x, "quarters")
checkIdentical(ep, 0:5)
}
test.sparse_months <- function() {
x <- xts(2:6, as.Date(sprintf("199%d-06-01", 2:6)))
ep <- endpoints(x, "months")
checkIdentical(ep, 0:5)
}
test.sparse_weeks <- function() {
x <- xts(2:6, as.Date(sprintf("199%d-06-01", 2:6)))
ep <- endpoints(x, "weeks")
checkIdentical(ep, 0:5)
}
test.sparse_days <- function() {
x <- xts(2:6, as.Date(sprintf("199%d-06-01", 2:6)))
ep <- endpoints(x, "days")
checkIdentical(ep, 0:5)
}

0 comments on commit 775f61a

Please sign in to comment.