cmu-delphi · brookslogan · Jan 24, 2025 · Dec 18, 2024 · Oct 21, 2024 · Oct 9, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: epiprocess
 Type: Package
 Title: Tools for basic signal processing in epidemiology
-Version: 0.10.1
+Version: 0.10.3
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
@@ -56,13 +56,15 @@ Imports:
     tibble,
     tidyr,
     tidyselect (>= 1.2.0),
+    tools,
     tsibble,
     utils,
     vctrs,
     waldo
 Suggests:
     devtools,
     epidatr,
+    epipredict,
     here,
     knitr,
     outbreaks,
@@ -76,6 +78,7 @@ Remotes:
     cmu-delphi/delphidocs,
     cmu-delphi/epidatasets,
     cmu-delphi/epidatr,
+    cmu-delphi/epipredict,
     glmgen/genlasso,
     reconverse/outbreaks
 Config/Needs/website: cmu-delphi/delphidocs
@@ -103,5 +106,6 @@ Collate:
     'reexports.R'
     'revision_analysis.R'
     'slide.R'
+    'time-utils.R'
     'utils.R'
     'utils_pipe.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -39,9 +39,9 @@ S3method(guess_period,Date)
 S3method(guess_period,POSIXt)
 S3method(guess_period,default)
 S3method(key_colnames,data.frame)
-S3method(key_colnames,default)
 S3method(key_colnames,epi_archive)
 S3method(key_colnames,epi_df)
+S3method(key_colnames,tbl_ts)
 S3method(mean,epi_df)
 S3method(print,epi_archive)
 S3method(print,epi_df)
@@ -130,6 +130,8 @@ importFrom(cli,cli_li)
 importFrom(cli,cli_vec)
 importFrom(cli,cli_warn)
 importFrom(cli,format_message)
+importFrom(cli,pluralize)
+importFrom(cli,qty)
 importFrom(data.table,":=")
 importFrom(data.table,address)
 importFrom(data.table,as.data.table)
@@ -194,6 +196,8 @@ importFrom(rlang,arg_match)
 importFrom(rlang,caller_arg)
 importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty)
+importFrom(rlang,check_dots_empty0)
+importFrom(rlang,dots_n)
 importFrom(rlang,enquo)
 importFrom(rlang,enquos)
 importFrom(rlang,env)
@@ -231,8 +235,11 @@ importFrom(tidyr,unnest)
 importFrom(tidyselect,any_of)
 importFrom(tidyselect,eval_select)
 importFrom(tidyselect,starts_with)
+importFrom(tools,toTitleCase)
 importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
+importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_data)
+importFrom(vctrs,vec_detect_missing)
 importFrom(vctrs,vec_equal)
diff --git a/NEWS.md b/NEWS.md
@@ -2,16 +2,37 @@
 
 Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicate PR's.
 
-# epiprocess 0.10
+# epiprocess 0.11
 
 ## Breaking changes
-
+- In `revision_summary()`:
+  - Output now uses the name `lag_near_latest` instead of `time_near_latest`. To
+    migrate, update references to `time_near_latest` to `lag_near_latest`.
+  - `revision_summary(epi_arch)` without specifying the measurement column to
+    analyze in `...` will no longer attempt to guess which one you intended if
+    there are multiple possibilities to choose from (#571). If you attempt a
+    complicated tidyselection that selects zero columns, this is also now an
+    error. If you encounter such errors, manually specify the measurement column
+    in `...`.
+  - `min_waiting_period` now defines a nonstrict inequality instead of a strict
+    one. To obtain the old bounds, bump the `min_waiting_period` up to the next
+    possible value for your `time_type`.
+- In `key_colnames()`:
+  - On regular (non-`epi_df`) data frames, now requires manual specification of
+    `geo_keys`, `other_keys`, and `time_keys`.
+  - The `extra_keys` argument has been deprecated and replaced with
+    `other_keys`.
 
 ## Improvements
-
+- `revision_summary()` now supports all `time_type`s.
 
 ## Bug fixes
 
+- Fixed aggregation of age-group-specific rates to overall rates in `epi_df` vignette (#587).
+- Fixed `key_colnames()` omitting some key columns on `epi_archive`s (#565).
+- Fixed `epi_archive` compactification raising an error on certain value column
+  classes such as `"distribution"` (#541); it's now easier to form an archive of
+  forecasts in that format.
 
 ## Cleanup
 

diff --git a/R/archive.R b/R/archive.R
@@ -376,22 +376,19 @@ removed_by_compactify <- function(df, keys, tolerance) {
 #'   [`dplyr::near`], otherwise it uses equality.  `NA`'s and `NaN`'s are
 #'   considered equal to themselves and each other.
 #' @importFrom dplyr lag if_else near
+#' @importFrom vctrs vec_detect_missing vec_equal
 #' @keywords internal
 is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
-  lag_vec <- dplyr::lag(vec)
-  if (typeof(vec) == "double") {
+  lag_vec <- lag(vec, 1L)
+  if (inherits(vec, "numeric")) { # (no matrix/array/general support)
     res <- if_else(
       !is.na(vec) & !is.na(lag_vec),
       near(vec, lag_vec, tol = tolerance),
       is.na(vec) & is.na(lag_vec)
     )
     return(res)
   } else {
-    res <- if_else(
-      !is.na(vec) & !is.na(lag_vec),
-      vec == lag_vec,
-      is.na(vec) & is.na(lag_vec)
-    )
+    res <- vec_equal(vec, lag_vec, na_equal = TRUE)
     return(res)
   }
 }

diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
@@ -14,6 +14,8 @@
 #' @importFrom checkmate check_names
 #' @importFrom checkmate test_subset test_set_equal vname
 #' @importFrom cli cli_abort cli_warn
+#' @importFrom cli pluralize
+#' @importFrom cli qty
 #' @importFrom data.table as.data.table
 #' @importFrom data.table key
 #' @importFrom data.table setkeyv
@@ -23,6 +25,7 @@
 #' @importFrom lifecycle deprecated
 #' @importFrom rlang %||%
 #' @importFrom rlang is_bare_integerish
+#' @importFrom tools toTitleCase
 #' @importFrom vctrs vec_data
 #' @importFrom vctrs vec_equal
 ## usethis namespace: end
@@ -32,6 +35,6 @@ utils::globalVariables(c(
   ".x", ".group_key", ".ref_time_value", "resid",
   "fitted", ".response", "geo_value", "time_value",
   "value", ".real", "lag", "max_value", "min_value",
-  "median_value", "spread", "rel_spread", "time_to",
-  "time_near_latest", "n_revisions", "min_lag", "max_lag"
+  "median_value", "spread", "rel_spread", "lag_to",
+  "lag_near_latest", "n_revisions", "min_lag", "max_lag"
 ))
diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -1,47 +1,133 @@
-#' Grab any keys associated to an epi_df
+#' Get names of columns that form a (unique) key associated with an object
 #'
-#' @param x a data.frame, tibble, or epi_df
+#' This is entirely based on metadata and arguments passed; there are no
+#' explicit checks that the key actually is unique in any associated data
+#' structures.
+#'
+#' @param x an object, often a data frame or something similar. `{epiprocess}`
+#'   includes implementations for [`epi_df`]s, [`epi_archive`]s,
+#'   [`tsibble::tsibble`]s, and other data frames (including
+#'   [`tibble::tibble`]s); other packages, like `{epipredict}`, can add more.
 #' @param ... additional arguments passed on to methods
-#' @param other_keys an optional character vector of other keys to include
-#' @param exclude an optional character vector of keys to exclude
-#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
+#' @param geo_keys,other_keys,time_keys character vectors, sometimes optional;
+#'   which variables (if any) should be considered as part of a unique
+#'   key/identifier for data in `x`, dealing respectively with the associated
+#'   geographical region, demographic/strain/other information needed in
+#'   addition to the geographical region to identify individual time series in
+#'   `x`, and time interval during which associated events occurred.
+#'
+#'   Mandatory if `x` is a regular `data.frame` or `tibble`. Optional if `x` is
+#'   an `epi_df`; the defaults are `"geo_value"`, the `epi_df`'s `other_keys`
+#'   metadata, and `"time_value"`, respectively; if you provide these manually,
+#'   they must match the defaults. (This behavior is to enable consistent and
+#'   sane results when you can't guarantee whether `x` is an `epi_df` or just a
+#'   `tibble`/`data.frame`. You don't need to use it if you know that `x` is
+#'   definitely an `epi_df`.) Not accepted when `x` is a `tsibble` or an
+#'   `epi_archive`.
+#' @param exclude an optional character vector of key column names to exclude
+#'   from the result
+#' @return character vector
 #' @keywords internal
 #' @export
-key_colnames <- function(x, ...) {
-  UseMethod("key_colnames")
-}
-
-#' @rdname key_colnames
-#' @method key_colnames default
-#' @export
-key_colnames.default <- function(x, ...) {
-  character(0L)
+key_colnames <- function(x, ..., exclude = character()) {
+  provided_args <- rlang::call_args_names(rlang::call_match())
+  if ("extra_keys" %in% provided_args) {
+    lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
+    redispatch <- function(..., extra_keys) {
+      key_colnames(..., other_keys = extra_keys)
+    }
+    redispatch(x, ..., exclude = exclude)
+  } else {
+    UseMethod("key_colnames")
+  }
 }
 
 #' @rdname key_colnames
+#' @importFrom rlang check_dots_empty0
 #' @method key_colnames data.frame
 #' @export
-key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
+key_colnames.data.frame <- function(x, ...,
+                                    geo_keys,
+                                    other_keys,
+                                    time_keys,
+                                    exclude = character()) {
+  check_dots_empty0(...)
+  assert_character(geo_keys)
+  assert_character(time_keys)
   assert_character(other_keys)
   assert_character(exclude)
-  nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
-  intersect(nm, colnames(x))
+  keys <- c(geo_keys, other_keys, time_keys)
+  if (!all(keys %in% names(x))) {
+    cli_abort(c(
+      "Some of the specified key columns aren't present in `x`",
+      "i" = "Specified keys: {format_varnames(keys)}",
+      "i" = "Columns of x: {format_varnames(names(x))}",
+      "x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
+    ), class = "epiprocess__key_colnames__keys_not_in_colnames")
+  }
+  setdiff(keys, exclude)
 }
 
 #' @rdname key_colnames
 #' @method key_colnames epi_df
 #' @export
-key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
+key_colnames.epi_df <- function(x, ...,
+                                geo_keys = "geo_value",
+                                other_keys = attr(x, "metadata")$other_keys,
+                                time_keys = "time_value",
+                                exclude = character()) {
+  check_dots_empty0(...)
+  if (!identical(geo_keys, "geo_value")) {
+    cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
+      class = "epiprocess__key_colnames__mismatched_geo_keys"
+    )
+  }
+  if (!identical(time_keys, "time_value")) {
+    cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
+      class = "epiprocess__key_colnames__mismatched_time_keys"
+    )
+  }
+  expected_other_keys <- attr(x, "metadata")$other_keys
+  if (!identical(other_keys, expected_other_keys)) {
+    cli_abort(c(
+      "The provided `other_keys` argument didn't match the `other_keys` of `x`",
+      "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
+      "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
+      "i" = "If you know that `x` will always be an `epi_df` and
+             resolve this discrepancy by adjusting the metadata of `x`, you
+             shouldn't have to pass `other_keys =` here anymore,
+             unless you want to continue to perform this check."
+    ), class = "epiprocess__key_colnames__mismatched_other_keys")
+  }
   assert_character(exclude)
-  other_keys <- attr(x, "metadata")$other_keys
   setdiff(c("geo_value", other_keys, "time_value"), exclude)
 }
 
+#' @rdname key_colnames
+#' @method key_colnames tbl_ts
+#' @export
+key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
+  check_dots_empty0(...)
+  assert_character(exclude)
+  idx <- tsibble::index_var(x)
+  idx2 <- tsibble::index2_var(x)
+  if (!identical(idx, idx2)) {
+    cli_abort(c(
+      "`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
+       whether we should output the old unique key or the new unique key-to-be",
+      "i" = "Old index: {format_varname(idx)}",
+      "i" = "Pending new index: {format_varname(idx2)}",
+      "Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
+    ), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
+  }
+  setdiff(c(tsibble::key_vars(x), idx), exclude)
+}
+
 #' @rdname key_colnames
 #' @method key_colnames epi_archive
 #' @export
-key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
+key_colnames.epi_archive <- function(x, ..., exclude = character()) {
+  check_dots_empty0(...)
   assert_character(exclude)
-  other_keys <- attr(x, "metadata")$other_keys
-  setdiff(c("geo_value", other_keys, "time_value"), exclude)
+  setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
 }