Skip to content

291 date period #297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
dff767e
add period utils
dajmcdon Mar 7, 2024
0667845
ensure joins happen silently
dajmcdon Mar 8, 2024
edd5134
fix time_type processing in forecast/target date
dajmcdon Mar 9, 2024
e3b2907
import tsibble
dajmcdon Mar 9, 2024
a624b63
use panel_data vignette for experiments, to be removed
dajmcdon Mar 9, 2024
69b21c6
remove browser()
dajmcdon Mar 9, 2024
3498fcf
prefix with utils to avoid check warning
dajmcdon Mar 9, 2024
b2b8134
redocument
dajmcdon Mar 9, 2024
f2f39a2
ahead/lag will always be integer type now
dajmcdon Mar 9, 2024
f9859fe
we no longer warn if as_of > forecast_date
dajmcdon Mar 9, 2024
fb7faae
upstream changes to prep() no require data
dajmcdon Mar 9, 2024
78ff1f9
remove unused test
dajmcdon Mar 9, 2024
3d059f4
always a scalar
dajmcdon Mar 18, 2024
de9e00a
add some additional tests
dajmcdon Mar 18, 2024
0c38723
this shouldnt be on this branch
dajmcdon Mar 18, 2024
e036b8d
abort message typo
dajmcdon Mar 18, 2024
e6e60cd
bump version, add to news
dajmcdon Mar 18, 2024
845c1a9
run styler
dajmcdon Mar 18, 2024
8feef06
Apply @dsweber2 suggestions from code review
dajmcdon Mar 27, 2024
6d5c379
add back omitted test, include some comments
dajmcdon Mar 27, 2024
047caa6
validate scalar forecast_date
dajmcdon Mar 27, 2024
cfde0c9
Merge branch '291-date-period' of https://github.com/cmu-delphi/epipr…
dajmcdon Mar 27, 2024
fc05cd9
symmetrize scalar validation
dajmcdon Mar 27, 2024
1e76059
add uncommitted stuff from code review
dajmcdon Mar 27, 2024
f5ef88b
clarify some test purposes
dajmcdon Mar 27, 2024
c1e3ff9
refactor validate_date() to avoid duplication
dajmcdon Mar 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: epipredict
Title: Basic epidemiology forecasting methods
Version: 0.0.10
Version: 0.0.11
Authors@R: c(
person("Daniel", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")),
person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"),
Expand Down Expand Up @@ -46,6 +46,7 @@ Imports:
tibble,
tidyr,
tidyselect,
tsibble,
usethis,
vctrs,
workflows (>= 1.0.0)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
- Working vignette
- use `checkmate` for input validation
- refactor quantile extrapolation (possibly creates different results)
- force `target_date` + `forecast_date` handling to match the time_type of
the epi_df. allows for annual and weekly data
4 changes: 2 additions & 2 deletions R/dist_quantiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ quantile_extrapolate <- function(x, tau_out, middle) {
dplyr::arrange(q)
}
if (any(indl)) {
qvals_out[indl] <- tail_extrapolate(tau_out[indl], head(qv, 2))
qvals_out[indl] <- tail_extrapolate(tau_out[indl], utils::head(qv, 2))
}
if (any(indr)) {
qvals_out[indr] <- tail_extrapolate(tau_out[indr], tail(qv, 2))
qvals_out[indr] <- tail_extrapolate(tau_out[indr], utils::tail(qv, 2))
}
qvals_out
}
Expand Down
9 changes: 4 additions & 5 deletions R/epi_workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,10 @@ fit.epi_workflow <- function(object, data, ..., control = workflows::control_wor
#' preds
predict.epi_workflow <- function(object, new_data, ...) {
if (!workflows::is_trained_workflow(object)) {
rlang::abort(
c("Can't predict on an untrained epi_workflow.",
i = "Do you need to call `fit()`?"
)
)
cli::cli_abort(c(
"Can't predict on an untrained epi_workflow.",
i = "Do you need to call `fit()`?"
))
}
components <- list()
components$mold <- workflows::extract_mold(object)
Expand Down
34 changes: 17 additions & 17 deletions R/layer_add_forecast_date.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@
#' p3
layer_add_forecast_date <-
function(frosting, forecast_date = NULL, id = rand_id("add_forecast_date")) {
arg_is_chr_scalar(id)
arg_is_scalar(forecast_date, allow_null = TRUE)
# can't validate the type of forecast_date until we know the time_type
add_layer(
frosting,
layer_add_forecast_date_new(
Expand All @@ -78,8 +81,6 @@ layer_add_forecast_date <-
}

layer_add_forecast_date_new <- function(forecast_date, id) {
forecast_date <- arg_to_date(forecast_date, allow_null = TRUE)
arg_is_chr_scalar(id)
layer("add_forecast_date", forecast_date = forecast_date, id = id)
}

Expand All @@ -91,26 +92,25 @@ slather.layer_add_forecast_date <- function(object, components, workflow, new_da
workflow$fit$meta$max_time_value,
max(new_data$time_value)
)
object$forecast_date <- max_time_value
forecast_date <- max_time_value
} else {
forecast_date <- object$forecast_date
}
as_of_pre <- attributes(workflows::extract_preprocessor(workflow)$template)$metadata$as_of
as_of_fit <- workflow$fit$meta$as_of
as_of_post <- attributes(new_data)$metadata$as_of

as_of_date <- as.Date(max(as_of_pre, as_of_fit, as_of_post))

if (object$forecast_date < as_of_date) {
cli_warn(
c("The forecast_date is less than the most ",
"recent update date of the data: ",
i = "forecast_date = {object$forecast_date} while data is from {as_of_date}."
)
)
}
expected_time_type <- attr(
workflows::extract_preprocessor(workflow)$template, "metadata"
)$time_type
if (expected_time_type == "week") expected_time_type <- "day"
validate_date(forecast_date, expected_time_type,
call = expr(layer_add_forecast_date())
)
forecast_date <- coerce_time_type(forecast_date, expected_time_type)
object$forecast_date <- forecast_date
components$predictions <- dplyr::bind_cols(
components$predictions,
forecast_date = as.Date(object$forecast_date)
forecast_date = forecast_date
)

components
}

Expand Down
57 changes: 31 additions & 26 deletions R/layer_add_target_date.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@
#' p3
layer_add_target_date <-
function(frosting, target_date = NULL, id = rand_id("add_target_date")) {
target_date <- arg_to_date(target_date, allow_null = TRUE)
arg_is_chr_scalar(id)
arg_is_scalar(target_date, allow_null = TRUE)
# can't validate the type of target_date until we know the time_type
add_layer(
frosting,
layer_add_target_date_new(
Expand All @@ -84,35 +85,39 @@ slather.layer_add_target_date <- function(object, components, workflow, new_data
the_recipe <- workflows::extract_recipe(workflow)
the_frosting <- extract_frosting(workflow)

expected_time_type <- attr(
workflows::extract_preprocessor(workflow)$template, "metadata"
)$time_type
if (expected_time_type == "week") expected_time_type <- "day"

if (!is.null(object$target_date)) {
target_date <- as.Date(object$target_date)
} else { # null target date case
if (detect_layer(the_frosting, "layer_add_forecast_date") &&
!is.null(extract_argument(
the_frosting,
"layer_add_forecast_date", "forecast_date"
target_date <- object$target_date
validate_date(target_date, expected_time_type,
call = expr(layer_add_target_date())
)
target_date <- coerce_time_type(target_date, expected_time_type)
} else if (
detect_layer(the_frosting, "layer_add_forecast_date") &&
!is.null(forecast_date <- extract_argument(
the_frosting, "layer_add_forecast_date", "forecast_date"
))) {
forecast_date <- extract_argument(
the_frosting,
"layer_add_forecast_date", "forecast_date"
)

ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")

target_date <- forecast_date + ahead
} else {
max_time_value <- max(
workflows::extract_preprocessor(workflow)$max_time_value,
workflow$fit$meta$max_time_value,
max(new_data$time_value)
)

ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")

target_date <- max_time_value + ahead
}
validate_date(forecast_date, expected_time_type,
call = expr(layer_add_forecast_date())
)
forecast_date <- coerce_time_type(forecast_date, expected_time_type)
ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")
target_date <- forecast_date + ahead
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does having this go from max_time_value + ahead to forecast_date + ahead cause this to function to behave differently? If yes and testing this PR becomes tough, then we could reduce scope here and punt these changes to another PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separate question: how do the units in ahead change depending on the time type? Is this primarily up to the user to make sure they specify aheads in the right units?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On max_time_value + ahead, this was the default previously, and remains the fall back (see lines below). However, if there is a forecast_date, then I think the default for target_date should be to use the specified forecast_date first.

This PR is meant to allow for ahead to always be an integer. Then it should be in units relative to the time_type of the epi_df. It took a lot of effort to make that happen though.

} else {
max_time_value <- max(
workflows::extract_preprocessor(workflow)$max_time_value,
workflow$fit$meta$max_time_value,
max(new_data$time_value)
)
ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")
target_date <- max_time_value + ahead
}

object$target_date <- target_date
components$predictions <- dplyr::bind_cols(components$predictions,
target_date = target_date
)
Expand Down
10 changes: 8 additions & 2 deletions R/layer_population_scaling.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ slather.layer_population_scaling <-
length(object$df_pop_col) == 1
)

if (is.null(object$by)) {
object$by <- intersect(
kill_time_value(epi_keys(components$predictions)),
colnames(dplyr::select(object$df, !object$df_pop_col))
)
}
try_join <- try(
dplyr::left_join(components$predictions, object$df,
by = object$by
Expand All @@ -157,8 +163,8 @@ slather.layer_population_scaling <-
))
}

object$df <- object$df %>%
dplyr::mutate(dplyr::across(tidyselect::where(is.character), tolower))
# object$df <- object$df %>%
# dplyr::mutate(dplyr::across(tidyselect::where(is.character), tolower))
Comment on lines +166 to +167
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is wrong, but I'm not yet certain that it's safe to remove.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doing some timetravel, it looks like it's been here in one form or another forever. What's confusing to me is that this looks like it does nothing. Was the point to make all the characters columns lowercase (such as geo_value) to match correctly maybe, but there was a parenthesis problem? Was dplyr::mutate(dplyr::across(tidyselect::where(is.character)), tolower) meant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm not sure why it was there in the first place. I suspect that the included state population dataset used capitals while typical usage from the API gives geos in lower case. But that shouldn't have resulted in hardcoded workarounds here (that are prone to failure).

So I think this should go forever, but I wanted to be sure that if some downstream use errored out, I could find this and try to track it more carefully.

pop_col <- rlang::sym(object$df_pop_col)
exprs <- rlang::expr(c(!!!object$terms))
pos <- tidyselect::eval_select(exprs, components$predictions)
Expand Down
26 changes: 13 additions & 13 deletions R/step_epi_shift.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,20 @@
step_epi_lag <-
function(recipe,
...,
lag,
role = "predictor",
trained = FALSE,
lag,
prefix = "lag_",
default = NA,
columns = NULL,
skip = FALSE,
id = rand_id("epi_lag")) {
if (!is_epi_recipe(recipe)) {
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
cli::cli_abort("This step can only operate on an `epi_recipe`.")
}

if (missing(lag)) {
rlang::abort(
cli::cli_abort(
c("The `lag` argument must not be empty.",
i = "Did you perhaps pass an integer in `...` accidentally?"
)
Expand All @@ -75,7 +75,8 @@ step_epi_lag <-
arg_is_nonneg_int(lag)
arg_is_chr_scalar(prefix, id)
if (!is.null(columns)) {
rlang::abort(c("The `columns` argument must be `NULL.",
cli::cli_abort(c(
"The `columns` argument must be `NULL.",
i = "Use `tidyselect` methods to choose columns to lag."
))
}
Expand All @@ -85,7 +86,7 @@ step_epi_lag <-
terms = dplyr::enquos(...),
role = role,
trained = trained,
lag = lag,
lag = as.integer(lag),
prefix = prefix,
default = default,
keys = epi_keys(recipe),
Expand All @@ -104,24 +105,23 @@ step_epi_lag <-
step_epi_ahead <-
function(recipe,
...,
ahead,
role = "outcome",
trained = FALSE,
ahead,
prefix = "ahead_",
default = NA,
columns = NULL,
skip = FALSE,
id = rand_id("epi_ahead")) {
if (!is_epi_recipe(recipe)) {
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
cli::cli_abort("This step can only operate on an `epi_recipe`.")
}

if (missing(ahead)) {
rlang::abort(
c("The `ahead` argument must not be empty.",
i = "Did you perhaps pass an integer in `...` accidentally?"
)
)
cli::cli_abort(c(
"The `ahead` argument must not be empty.",
i = "Did you perhaps pass an integer in `...` accidentally?"
))
}
arg_is_nonneg_int(ahead)
arg_is_chr_scalar(prefix, id)
Expand All @@ -136,7 +136,7 @@ step_epi_ahead <-
terms = dplyr::enquos(...),
role = role,
trained = trained,
ahead = ahead,
ahead = as.integer(ahead),
prefix = prefix,
default = default,
keys = epi_keys(recipe),
Expand Down
73 changes: 73 additions & 0 deletions R/time_types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
guess_time_type <- function(time_value) {
# similar to epiprocess:::guess_time_type() but w/o the gap handling
arg_is_scalar(time_value)
if (is.character(time_value)) {
if (nchar(time_value) <= "10") {
new_time_value <- tryCatch(
{
as.Date(time_value)
},
error = function(e) NULL
)
} else {
new_time_value <- tryCatch(
{
as.POSIXct(time_value)
},
error = function(e) NULL
)
}
if (!is.null(new_time_value)) time_value <- new_time_value
}
if (inherits(time_value, "POSIXct")) {
return("day-time")
}
if (inherits(time_value, "Date")) {
return("day")
}
if (inherits(time_value, "yearweek")) {
return("yearweek")
}
if (inherits(time_value, "yearmonth")) {
return("yearmonth")
}
if (inherits(time_value, "yearquarter")) {
return("yearquarter")
}
if (is.numeric(time_value) && all(time_value == as.integer(time_value)) &&
all(time_value >= 1582)) {
return("year")
}
return("custom")
}

coerce_time_type <- function(x, target_type) {
if (target_type == "year") {
if (is.numeric(x)) {
return(as.integer(x))
} else {
return(as.POSIXlt(x)$year + 1900L)
}
}
switch(target_type,
"day-time" = as.POSIXct(x),
"day" = as.Date(x),
"week" = as.Date(x),
"yearweek" = tsibble::yearweek(x),
"yearmonth" = tsibble::yearmonth(x),
"yearquarter" = tsibble::yearquarter(x)
)
}

validate_date <- function(x, expected, arg = rlang::caller_arg(x),
call = rlang::caller_env()) {
time_type_x <- guess_time_type(x)
ok <- time_type_x == expected
if (!ok) {
cli::cli_abort(c(
"The {.arg {arg}} was given as a {.val {time_type_x}} while the",
`!` = "`time_type` of the training data was {.val {expected}}.",
i = "See {.topic epiprocess::epi_df} for descriptions of these are determined."
), call = call)
}
}
Loading
Loading