markfairbanks · markfairbanks · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,7 @@
 
 #### Functionality improvements
 * `distinct()` now works on data frames with list columns (#773)
+* `pivot_wider()`: Gains `unused_fn` argument (#698)
 
 #### Deprecations
 * `verb.()` functions have been removed

diff --git a/R/pivot_wider.R b/R/pivot_wider.R
@@ -22,6 +22,8 @@
 #' @param names_repair Treatment of duplicate names. See `?vctrs::vec_as_names` for options/details.
 #' @param values_fn Should the data be aggregated before casting? If the formula doesn't identify a single observation for each cell, then aggregation defaults to length with a message.
 #' @param values_fill If values are missing, what value should be filled in
+#' @param unused_fn Aggregation function to be applied to unused columns.
+#'   Default is to ignore unused columns.
 #'
 #' @examples
 #' df <- tidytable(
@@ -48,7 +50,8 @@ pivot_wider <- function(.df,
                         names_sort = FALSE,
                         names_repair = "unique",
                         values_fill = NULL,
-                        values_fn = NULL) {
+                        values_fn = NULL,
+                        unused_fn = NULL) {
   UseMethod("pivot_wider")
 }
 
@@ -63,15 +66,18 @@ pivot_wider.tidytable <- function(.df,
                                   names_sort = FALSE,
                                   names_repair = "unique",
                                   values_fill = NULL,
-                                  values_fn = NULL) {
+                                  values_fn = NULL,
+                                  unused_fn = NULL) {
   names_from <- tidyselect_names(.df, {{ names_from }})
   values_from <- tidyselect_names(.df, {{ values_from }})
 
   id_cols <- enquo(id_cols)
-  if (quo_is_null(id_cols)) {
+  id_cols_is_null <- quo_is_null(id_cols)
+  if (id_cols_is_null) {
     id_cols <- setdiff(names(.df), c(names_from, values_from))
   } else {
     id_cols <- tidyselect_names(.df, !!id_cols)
+    unused_cols <- setdiff(names(.df), c(names_from, values_from, id_cols))
   }
 
   values_fn <- quo_squash(enquo(values_fn))
@@ -138,7 +144,7 @@ pivot_wider.tidytable <- function(.df,
   out <- remove_key(out)
 
   if (no_id) {
-    out <- dt_j(out, . := NULL)
+    out <- select(out, -any_of("."))
   }
 
   if (uses_dot_value) {
@@ -149,7 +155,15 @@ pivot_wider.tidytable <- function(.df,
 
   out <- df_name_repair(out, names_repair)
 
-  as_tidytable(out)
+  if (!id_cols_is_null && !is.null(unused_fn)) {
+    unused_fn <- as_function(unused_fn)
+    unused_df <- select(.df, all_of(id_cols), all_of(unused_cols))
+    unused_df <- summarize(unused_df,
+                           across(any_of(unused_cols), unused_fn),
+                           .by = any_of(id_cols))
+    out <- left_join(out, unused_df, by = id_cols)
+  }
+  out
 }
 
 #' @export
@@ -163,14 +177,16 @@ pivot_wider.data.frame <- function(.df,
                                    names_sort = FALSE,
                                    names_repair = "unique",
                                    values_fill = NULL,
-                                   values_fn = NULL) {
+                                   values_fn = NULL,
+                                   unused_fn = NULL) {
   .df <- as_tidytable(.df)
   pivot_wider(
     .df, names_from = {{ names_from }}, values_from = {{ values_from }},
     id_cols = {{ id_cols }}, names_sep = names_sep,
     names_prefix = names_prefix, names_glue = names_glue,
     names_sort = names_sort, names_repair = names_repair,
-    values_fill = values_fill, values_fn = {{ values_fn }}
+    values_fill = values_fill, values_fn = {{ values_fn }},
+    unused_fn = unused_fn
   )
 }
 

diff --git a/man/pivot_wider.Rd b/man/pivot_wider.Rd
diff --git a/tests/testthat/test-pivot_wider.R b/tests/testthat/test-pivot_wider.R
@@ -239,3 +239,118 @@ test_that("correctly labels columns when `names_glue` is used, #579", {
   expect_named(result2, c("v1_b", "v1_a", "v1_c", "v2_b", "v2_a", "v2_c"))
   expect_equal(unname(unlist(result2)), c("b", "a", "c", "b", "a", "c"))
 })
+
+# unused -------------------------------------------------------------------
+
+test_that("`unused_fn` can summarize unused columns (#990)", {
+  df <- tidytable(
+    id = c(1, 1, 2, 2),
+    unused1 = c(1, 2, 4, 3),
+    unused2 = c(1, 2, 4, 3),
+    name = c("a", "b", "a", "b"),
+    value = c(1, 2, 3, 4)
+  )
+
+  # # By name
+  # res <- pivot_wider(df, id_cols = id, unused_fn = list(unused1 = max))
+  # expect_named(res, c("id", "a", "b", "unused1"))
+  # expect_identical(res$unused1, c(2, 4))
+
+  # Globally
+  res <- pivot_wider(df, id_cols = id, unused_fn = list)
+  expect_named(res, c("id", "a", "b", "unused1", "unused2"))
+  expect_identical(res$unused1, list(c(1, 2), c(4, 3)))
+  expect_identical(res$unused2, list(c(1, 2), c(4, 3)))
+
+  # https://stackoverflow.com/a/73554147
+  df <- data.frame(A = c(1, 1, 1, 2 , 2, 2),
+                   B = c(3, 3, 3, 6, 6, 6),
+                   C = c(2, 3, 9, 12, 2, 6),
+                   D = c("a1", "a2", "a3", "a1", "a2", "a3"))
+
+  res <- df %>%
+    pivot_wider(id_cols = A, names_from = D, values_from = C, unused_fn = mean)
+  expect_named(res, c("A", "a1", "a2", "a3", "B"))
+  expect_equal(res$B, c(3, 6))
+
+  # Works with anonymous functions
+  res <- df %>%
+    pivot_wider(id_cols = A, names_from = D, values_from = C, unused_fn = ~ mean(.x))
+  expect_named(res, c("A", "a1", "a2", "a3", "B"))
+  expect_equal(res$B, c(3, 6))
+})
+
+test_that("`unused_fn` works with anonymous functions", {
+  df <- tidytable(
+    id = c(1, 1, 2, 2),
+    unused = c(1, NA, 4, 3),
+    name = c("a", "b", "a", "b"),
+    value = c(1, 2, 3, 4)
+  )
+
+  res <- pivot_wider(df, id_cols = id, unused_fn = ~ mean(.x, na.rm = TRUE))
+  expect_identical(res$unused, c(1, 3.5))
+})
+
+# test_that("`unused_fn` must result in single summary values", {
+#   df <- tidytable(
+#     id = c(1, 1, 2, 2),
+#     unused = c(1, 2, 4, 3),
+#     name = c("a", "b", "a", "b"),
+#     value = c(1, 2, 3, 4)
+#   )
+#
+#   expect_snapshot(
+#     (expect_error(pivot_wider(df, id_cols = id, unused_fn = identity)))
+#   )
+# })
+
+# test_that("`unused_fn` works with expanded key from `id_expand`", {
+#   df <- tidytable(
+#     id = factor(c(1, 1, 2, 2), levels = 1:3),
+#     unused = c(1, 2, 4, 3),
+#     name = c("a", "b", "a", "b"),
+#     value = c(1, 2, 3, 4)
+#   )
+#
+#   res <- pivot_wider(df, id_cols = id, id_expand = TRUE, unused_fn = max)
+#   expect_identical(res$id, factor(1:3))
+#   expect_identical(res$unused, c(2, 4, NA))
+#
+#   res <- pivot_wider(df, id_cols = id, id_expand = TRUE, unused_fn = ~ sum(is.na(.x)))
+#   expect_identical(res$unused, c(0L, 0L, 1L))
+# })
+
+# test_that("can't fill implicit missings in unused column with `values_fill`", {
+#   # (in theory this would need `unused_fill`, but it would only be used when
+#   # `id_expand = TRUE`, which doesn't feel that useful)
+#
+#   df <- tidytable(
+#     id = factor(c(1, 1, 2, 2), levels = 1:3),
+#     unused = c(1, 2, 4, 3),
+#     name = c("a", "b", "a", "b"),
+#     value = c(1, 2, 3, 4)
+#   )
+#
+#   res <- pivot_wider(
+#     data = df,
+#     id_cols = id,
+#     id_expand = TRUE,
+#     unused_fn = list,
+#     values_fill = 0
+#   )
+#
+#   expect_identical(res$a, c(1, 3, 0))
+#   expect_identical(res$b, c(2, 4, 0))
+#   expect_identical(res$unused, list(c(1, 2), c(4, 3), NA_real_))
+#
+#   res <- pivot_wider(
+#     data = df,
+#     id_cols = id,
+#     id_expand = TRUE,
+#     unused_fn = list,
+#     values_fill = list(unused = 0)
+#   )
+#
+#   expect_identical(res$unused, list(c(1, 2), c(4, 3), NA_real_))
+# })