tidyverse · lionel- · Aug 7, 2020 · Aug 7, 2020 · Aug 7, 2020 · Aug 10, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -21,6 +21,7 @@ BugReports: https://github.com/tidyverse/purrr/issues
 Depends: 
     R (>= 3.2)
 Imports: 
+    ellipsis (>= 0.3.1),
     lifecycle (>= 0.2.0),
     magrittr (>= 1.5.0),
     rlang (>= 0.4.7),

diff --git a/NEWS.md b/NEWS.md
@@ -9,6 +9,41 @@
   See #768 for more information.
 
 
+## Switch to vctrs
+
+* The atomic vector variants of `flatten()`, like `flatten_int()`, now
+  use vctrs for coercion and concatenation. This change makes these
+  functions more flexible and consistent. For instance lists
+  containing factors are properly flattened thanks to vctrs coercions:
+
+  ```r
+  flatten_chr(list("foo", factor("bar")))
+  #> [1] "foo" "bar"
+  ```
+
+  And you can now flatten double vectors into an integer vector:
+  ```{r}
+  flatten_int(list(1.0, c(2.0, 3.0)))
+  #> [1] 1 2 3
+  ```
+
+  The switch to vctrs coercions does entail a stricter behaviour.
+  `map_chr()` used to succeed with any type by deparsing the
+  inputs:
+
+  ```{r}
+  flatten_chr(list("foo", 1))
+  #> [1] "foo"      "1.000000"
+  ```
+
+  With vctrs coercions, this is now deprecated and will become an
+  error in the future.
+
+* The atomic vector variants of `flatten()` gain a `name_spec`
+  argument to control how the outer list names are merged with the
+  inner vector names.
+
+
 ## Features and fixes
 
 * `every()` and `some()` now properly check the return value of their

diff --git a/R/flatten.R b/R/flatten.R
@@ -1,8 +1,16 @@
-#' Flatten a list of lists into a simple vector.
+#' Flatten a list of into a simpler structure
+#'
+#' @description
+#'
+#' These functions remove a level hierarchy from a list. They are
+#' similar to [unlist()], but:
+#'
+#' - They only ever remove a single layer of hierarchy.
+#'
+#' - They are type-stable, so you always know what the type of the
+#'   output is. On the other hand the final size is not stable and
+#'   depends on the contents of the list.
 #'
-#' These functions remove a level hierarchy from a list. They are similar to
-#' [unlist()], but they only ever remove a single layer of hierarchy and they
-#' are type-stable, so you always know what the type of the output is.
 #'
 #' @param .x A list to flatten. The contents of the list can be anything for
 #'   `flatten()` (as a list is returned), but the contents must match the
@@ -14,7 +22,52 @@
 #'   `flatten_dfr()` and `flatten_dfc()` return data frames created by
 #'   row-binding and column-binding respectively. They require dplyr to
 #'   be installed.
+#' @inheritParams ellipsis::dots_empty
 #' @inheritParams map
+#'
+#'
+#' @section List and atomic flattening:
+#'
+#' The behaviours of `flatten()` and of the atomic variants are a bit
+#' different:
+#'
+#' - `flatten()` accepts lists that contain any kind of elements. The
+#'   elements whose `typeof()` is a list (including data frames) are
+#'   spliced into the containing list. This operation always returns a
+#'   list. If the list contains other lists, one level of nestedness
+#'   removed. If the list doesn't contain other lists, `flatten()`
+#'   doesn't do anything. The final size is equal to the sum of the
+#'   sizes of the list elements, plus the number of non-list elements.
+#'
+#'   These expressions are equivalent:
+#'
+#'   ```
+#'   flatten(list(1, list(2), list(list(3))))
+#'   c(list(1), list(2), list(list(3)))
+#'   list(1, 2, list(3))
+#'   ```
+#'
+#' - The atomic variants like `flatten_int()` expect lists containing
+#'   elements that can be coerced to the target type. For example
+#'   `flatten_int(list(FALSE, 1L, c(2.0, 3.0)))` returns `0:3`. The
+#'   elements are assembled with [vctrs::vec_c()] (via
+#'   [vctrs::vec_unchop()], a wrapper that takes lists of
+#'   vectors). The final size is equal to the sum of the sizes of all
+#'   elements.
+#'
+#'   These expressions are equivalent:
+#'
+#'   ```
+#'   flatten_int(list(1, 2, 3:4))
+#'   c(1, 2, 3:4)
+#'   ```
+#'
+#' Despite these differences, these functions are said to be
+#' "flattening" because of the dependence of the final size on the
+#' contents of the input list. The output is usually larger after
+#' flattening (though it could well be smaller if some of the
+#' flattened elements are empty).
+#'
 #' @export
 #' @examples
 #' x <- rerun(2, sample(4))
@@ -31,33 +84,94 @@ flatten <- function(.x) {
 }
 
 #' @export
+#' @inheritParams vctrs::vec_unchop
 #' @rdname flatten
-flatten_lgl <- function(.x) {
-  .Call(vflatten_impl, .x, "logical")
+flatten_lgl <- function(.x, ..., name_spec = NULL) {
+  ellipsis::check_dots_empty()
+
+  .x <- validate_flatten_vec(.x)
+  name_spec <- name_spec %||% flatten_name_spec
+
+  vec_unchop(.x, ptype = logical(), name_spec = name_spec)
 }
 
 #' @export
 #' @rdname flatten
-flatten_int <- function(.x) {
-  .Call(vflatten_impl, .x, "integer")
+flatten_int <- function(.x, ..., name_spec = NULL) {
+  ellipsis::check_dots_empty()
+
+  .x <- validate_flatten_vec(.x)
+  name_spec <- name_spec %||% flatten_name_spec
+
+  vec_unchop(.x, ptype = integer(), name_spec = name_spec)
 }
 
 #' @export
 #' @rdname flatten
-flatten_dbl <- function(.x) {
-  .Call(vflatten_impl, .x, "double")
+flatten_dbl <- function(.x, ..., name_spec = NULL) {
+  ellipsis::check_dots_empty()
+
+  .x <- validate_flatten_vec(.x)
+  name_spec <- name_spec %||% flatten_name_spec
+
+  vec_unchop(.x, ptype = double(), name_spec = name_spec)
 }
 
 #' @export
 #' @rdname flatten
-flatten_chr <- function(.x) {
-  .Call(vflatten_impl, .x, "character")
+flatten_chr <- function(.x, ..., name_spec = NULL) {
+  ellipsis::check_dots_empty()
+
+  .x <- validate_flatten_vec(.x)
+  name_spec <- name_spec %||% flatten_name_spec
+
+  deprecate <- FALSE
+  out <- map(.x, function(x) {
+    tryCatch(
+      # Compatibility with historical deparsing behaviour
+      vctrs_error_incompatible_type = function(...) {
+        deprecate <<- TRUE
+        coerce_chr(x)
+      },
+      vec_cast(x, character())
+    )
+  })
+  if (deprecate) {
+    signal_soft_deprecated("Numeric to character coercion is deprecated as of purrr 0.4.0.")
+  }
+
+  vec_unchop(out, ptype = character(), name_spec = name_spec)
+}
+
+is_chr_coercible <- function(x) {
+  typeof(x) %in% c("logical", "integer", "double", "raw")
 }
 
 #' @export
 #' @rdname flatten
-flatten_raw <- function(.x) {
-  .Call(vflatten_impl, .x, "raw")
+flatten_raw <- function(.x, ..., name_spec = NULL) {
+  ellipsis::check_dots_empty()
+
+  .x <- validate_flatten_vec(.x)
+  name_spec <- name_spec %||% flatten_name_spec
+
+  vec_unchop(.x, ptype = raw())
+}
+
+flatten_name_spec <- function(outer, inner) {
+  if (is_character(inner)) {
+    inner
+  } else {
+    rep_along(inner, "")
+  }
+}
+validate_flatten_vec <- function(x) {
+  if (is.data.frame(x)) {
+    # Do we want to deprecate this historical behaviour?
+    unstructure(x)
+  } else {
+    x
+  }
 }
 
 #' @export

diff --git a/R/utils.R b/R/utils.R
@@ -326,3 +326,8 @@ quo_is_same_env <- function(x, env) {
   quo_env <- quo_get_env(x)
   is_reference(quo_env, env) || is_reference(quo_env, empty_env())
 }
+
+unstructure <- function(x) {
+  attributes(x) <- NULL
+  x
+}
diff --git a/man/flatten.Rd b/man/flatten.Rd