cmu-delphi · dajmcdon · Mar 27, 2024 · Mar 7, 2024 · Mar 8, 2024 · Mar 9, 2024
@@ -1,6 +1,6 @@
 Package: epipredict
 Title: Basic epidemiology forecasting methods
-Version: 0.0.10
+Version: 0.0.11
 Authors@R: c(
     person("Daniel", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")),
     person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"),
@@ -46,6 +46,7 @@ Imports:
     tibble,
     tidyr,
     tidyselect,
+    tsibble,
     usethis,
     vctrs,
     workflows (>= 1.0.0)

@@ -33,3 +33,5 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
 -   Working vignette
 -   use `checkmate` for input validation
 -   refactor quantile extrapolation (possibly creates different results)
+-   force `target_date` + `forecast_date` handling to match the time_type of 
+    the epi_df. allows for annual and weekly data
@@ -233,10 +233,10 @@ quantile_extrapolate <- function(x, tau_out, middle) {
       dplyr::arrange(q)
   }
   if (any(indl)) {
-    qvals_out[indl] <- tail_extrapolate(tau_out[indl], head(qv, 2))
+    qvals_out[indl] <- tail_extrapolate(tau_out[indl], utils::head(qv, 2))
   }
   if (any(indr)) {
-    qvals_out[indr] <- tail_extrapolate(tau_out[indr], tail(qv, 2))
+    qvals_out[indr] <- tail_extrapolate(tau_out[indr], utils::tail(qv, 2))
   }
   qvals_out
 }

@@ -251,11 +251,10 @@ fit.epi_workflow <- function(object, data, ..., control = workflows::control_wor
 #' preds
 predict.epi_workflow <- function(object, new_data, ...) {
   if (!workflows::is_trained_workflow(object)) {
-    rlang::abort(
-      c("Can't predict on an untrained epi_workflow.",
-        i = "Do you need to call `fit()`?"
-      )
-    )
+    cli::cli_abort(c(
+      "Can't predict on an untrained epi_workflow.",
+      i = "Do you need to call `fit()`?"
+    ))
   }
   components <- list()
   components$mold <- workflows::extract_mold(object)

@@ -68,6 +68,9 @@
 #' p3
 layer_add_forecast_date <-
   function(frosting, forecast_date = NULL, id = rand_id("add_forecast_date")) {
+    arg_is_chr_scalar(id)
+    arg_is_scalar(forecast_date, allow_null = TRUE)
+    # can't validate the type of forecast_date until we know the time_type
     add_layer(
       frosting,
       layer_add_forecast_date_new(
@@ -78,8 +81,6 @@ layer_add_forecast_date <-
   }
 
 layer_add_forecast_date_new <- function(forecast_date, id) {
-  forecast_date <- arg_to_date(forecast_date, allow_null = TRUE)
-  arg_is_chr_scalar(id)
   layer("add_forecast_date", forecast_date = forecast_date, id = id)
 }
 
@@ -91,26 +92,25 @@ slather.layer_add_forecast_date <- function(object, components, workflow, new_da
       workflow$fit$meta$max_time_value,
       max(new_data$time_value)
     )
-    object$forecast_date <- max_time_value
+    forecast_date <- max_time_value
+  } else {
+    forecast_date <- object$forecast_date
   }
-  as_of_pre <- attributes(workflows::extract_preprocessor(workflow)$template)$metadata$as_of
-  as_of_fit <- workflow$fit$meta$as_of
-  as_of_post <- attributes(new_data)$metadata$as_of
 
-  as_of_date <- as.Date(max(as_of_pre, as_of_fit, as_of_post))
-
-  if (object$forecast_date < as_of_date) {
-    cli_warn(
-      c("The forecast_date is less than the most ",
-        "recent update date of the data: ",
-        i = "forecast_date = {object$forecast_date} while data is from {as_of_date}."
-      )
-    )
-  }
+  expected_time_type <- attr(
+    workflows::extract_preprocessor(workflow)$template, "metadata"
+  )$time_type
+  if (expected_time_type == "week") expected_time_type <- "day"
+  validate_date(forecast_date, expected_time_type,
+    call = expr(layer_add_forecast_date())
+  )
+  forecast_date <- coerce_time_type(forecast_date, expected_time_type)
+  object$forecast_date <- forecast_date
   components$predictions <- dplyr::bind_cols(
     components$predictions,
-    forecast_date = as.Date(object$forecast_date)
+    forecast_date = forecast_date
   )
+
   components
 }
 

@@ -63,8 +63,9 @@
 #' p3
 layer_add_target_date <-
   function(frosting, target_date = NULL, id = rand_id("add_target_date")) {
-    target_date <- arg_to_date(target_date, allow_null = TRUE)
     arg_is_chr_scalar(id)
+    arg_is_scalar(target_date, allow_null = TRUE)
+    # can't validate the type of target_date until we know the time_type
     add_layer(
       frosting,
       layer_add_target_date_new(
@@ -84,35 +85,39 @@ slather.layer_add_target_date <- function(object, components, workflow, new_data
   the_recipe <- workflows::extract_recipe(workflow)
   the_frosting <- extract_frosting(workflow)
 
+  expected_time_type <- attr(
+    workflows::extract_preprocessor(workflow)$template, "metadata"
+  )$time_type
+  if (expected_time_type == "week") expected_time_type <- "day"
+
   if (!is.null(object$target_date)) {
-    target_date <- as.Date(object$target_date)
-  } else { # null target date case
-    if (detect_layer(the_frosting, "layer_add_forecast_date") &&
-      !is.null(extract_argument(
-        the_frosting,
-        "layer_add_forecast_date", "forecast_date"
+    target_date <- object$target_date
+    validate_date(target_date, expected_time_type,
+      call = expr(layer_add_target_date())
+    )
+    target_date <- coerce_time_type(target_date, expected_time_type)
+  } else if (
+    detect_layer(the_frosting, "layer_add_forecast_date") &&
+      !is.null(forecast_date <- extract_argument(
+        the_frosting, "layer_add_forecast_date", "forecast_date"
       ))) {
-      forecast_date <- extract_argument(
-        the_frosting,
-        "layer_add_forecast_date", "forecast_date"
-      )
-
-      ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")
-
-      target_date <- forecast_date + ahead
-    } else {
-      max_time_value <- max(
-        workflows::extract_preprocessor(workflow)$max_time_value,
-        workflow$fit$meta$max_time_value,
-        max(new_data$time_value)
-      )
-
-      ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")
-
-      target_date <- max_time_value + ahead
-    }
+    validate_date(forecast_date, expected_time_type,
+      call = expr(layer_add_forecast_date())
+    )
+    forecast_date <- coerce_time_type(forecast_date, expected_time_type)
+    ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")
+    target_date <- forecast_date + ahead
+  } else {
+    max_time_value <- max(
+      workflows::extract_preprocessor(workflow)$max_time_value,
+      workflow$fit$meta$max_time_value,
+      max(new_data$time_value)
+    )
+    ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead")
+    target_date <- max_time_value + ahead
   }
 
+  object$target_date <- target_date
   components$predictions <- dplyr::bind_cols(components$predictions,
     target_date = target_date
   )

@@ -144,6 +144,12 @@ slather.layer_population_scaling <-
         length(object$df_pop_col) == 1
     )
 
+    if (is.null(object$by)) {
+      object$by <- intersect(
+        kill_time_value(epi_keys(components$predictions)),
+        colnames(dplyr::select(object$df, !object$df_pop_col))
+      )
+    }
     try_join <- try(
       dplyr::left_join(components$predictions, object$df,
         by = object$by
@@ -157,8 +163,8 @@ slather.layer_population_scaling <-
       ))
     }
 
-    object$df <- object$df %>%
-      dplyr::mutate(dplyr::across(tidyselect::where(is.character), tolower))
+    # object$df <- object$df %>%
+    #  dplyr::mutate(dplyr::across(tidyselect::where(is.character), tolower))
     pop_col <- rlang::sym(object$df_pop_col)
     exprs <- rlang::expr(c(!!!object$terms))
     pos <- tidyselect::eval_select(exprs, components$predictions)

@@ -53,20 +53,20 @@
 step_epi_lag <-
   function(recipe,
            ...,
+           lag,
            role = "predictor",
            trained = FALSE,
-           lag,
            prefix = "lag_",
            default = NA,
            columns = NULL,
            skip = FALSE,
            id = rand_id("epi_lag")) {
     if (!is_epi_recipe(recipe)) {
-      rlang::abort("This recipe step can only operate on an `epi_recipe`.")
+      cli::cli_abort("This step can only operate on an `epi_recipe`.")
     }
 
     if (missing(lag)) {
-      rlang::abort(
+      cli::cli_abort(
         c("The `lag` argument must not be empty.",
           i = "Did you perhaps pass an integer in `...` accidentally?"
         )
@@ -75,7 +75,8 @@ step_epi_lag <-
     arg_is_nonneg_int(lag)
     arg_is_chr_scalar(prefix, id)
     if (!is.null(columns)) {
-      rlang::abort(c("The `columns` argument must be `NULL.",
+      cli::cli_abort(c(
+        "The `columns` argument must be `NULL.",
         i = "Use `tidyselect` methods to choose columns to lag."
       ))
     }
@@ -85,7 +86,7 @@ step_epi_lag <-
         terms = dplyr::enquos(...),
         role = role,
         trained = trained,
-        lag = lag,
+        lag = as.integer(lag),
         prefix = prefix,
         default = default,
         keys = epi_keys(recipe),
@@ -104,24 +105,23 @@ step_epi_lag <-
 step_epi_ahead <-
   function(recipe,
            ...,
+           ahead,
            role = "outcome",
            trained = FALSE,
-           ahead,
            prefix = "ahead_",
            default = NA,
            columns = NULL,
            skip = FALSE,
            id = rand_id("epi_ahead")) {
     if (!is_epi_recipe(recipe)) {
-      rlang::abort("This recipe step can only operate on an `epi_recipe`.")
+      cli::cli_abort("This step can only operate on an `epi_recipe`.")
     }
 
     if (missing(ahead)) {
-      rlang::abort(
-        c("The `ahead` argument must not be empty.",
-          i = "Did you perhaps pass an integer in `...` accidentally?"
-        )
-      )
+      cli::cli_abort(c(
+        "The `ahead` argument must not be empty.",
+        i = "Did you perhaps pass an integer in `...` accidentally?"
+      ))
     }
     arg_is_nonneg_int(ahead)
     arg_is_chr_scalar(prefix, id)
@@ -136,7 +136,7 @@ step_epi_ahead <-
         terms = dplyr::enquos(...),
         role = role,
         trained = trained,
-        ahead = ahead,
+        ahead = as.integer(ahead),
         prefix = prefix,
         default = default,
         keys = epi_keys(recipe),

@@ -0,0 +1,73 @@
+guess_time_type <- function(time_value) {
+  # similar to epiprocess:::guess_time_type() but w/o the gap handling
+  arg_is_scalar(time_value)
+  if (is.character(time_value)) {
+    if (nchar(time_value) <= "10") {
+      new_time_value <- tryCatch(
+        {
+          as.Date(time_value)
+        },
+        error = function(e) NULL
+      )
+    } else {
+      new_time_value <- tryCatch(
+        {
+          as.POSIXct(time_value)
+        },
+        error = function(e) NULL
+      )
+    }
+    if (!is.null(new_time_value)) time_value <- new_time_value
+  }
+  if (inherits(time_value, "POSIXct")) {
+    return("day-time")
+  }
+  if (inherits(time_value, "Date")) {
+    return("day")
+  }
+  if (inherits(time_value, "yearweek")) {
+    return("yearweek")
+  }
+  if (inherits(time_value, "yearmonth")) {
+    return("yearmonth")
+  }
+  if (inherits(time_value, "yearquarter")) {
+    return("yearquarter")
+  }
+  if (is.numeric(time_value) && all(time_value == as.integer(time_value)) &&
+    all(time_value >= 1582)) {
+    return("year")
+  }
+  return("custom")
+}
+
+coerce_time_type <- function(x, target_type) {
+  if (target_type == "year") {
+    if (is.numeric(x)) {
+      return(as.integer(x))
+    } else {
+      return(as.POSIXlt(x)$year + 1900L)
+    }
+  }
+  switch(target_type,
+    "day-time" = as.POSIXct(x),
+    "day" = as.Date(x),
+    "week" = as.Date(x),
+    "yearweek" = tsibble::yearweek(x),
+    "yearmonth" = tsibble::yearmonth(x),
+    "yearquarter" = tsibble::yearquarter(x)
+  )
+}
+
+validate_date <- function(x, expected, arg = rlang::caller_arg(x),
+                          call = rlang::caller_env()) {
+  time_type_x <- guess_time_type(x)
+  ok <- time_type_x == expected
+  if (!ok) {
+    cli::cli_abort(c(
+      "The {.arg {arg}} was given as a {.val {time_type_x}} while the",
+      `!` = "`time_type` of the training data was {.val {expected}}.",
+      i = "See {.topic epiprocess::epi_df} for descriptions of these are determined."
+    ), call = call)
+  }
+}