metrumresearchgroup · kylebaron · Jan 3, 2022 · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021
diff --git a/NAMESPACE b/NAMESPACE
@@ -103,6 +103,7 @@ export(ys_mild_sanitize)
 export(ys_namespace)
 export(ys_project)
 export(ys_project_file)
+export(ys_prune)
 export(ys_rename)
 export(ys_sanitize)
 export(ys_select)

diff --git a/R/class-yspec.R b/R/class-yspec.R
@@ -429,6 +429,58 @@ ys_add_labels <- function(data,spec,fun=label.ycol) {
   data
 }
 
+
+#' Prune a data frame, keeping columns in a yspec object
+#' 
+#' Use this to scavenge a data frame for columns that you want to keep. Do not
+#' use this for final column selection; use [dplyr::select()] instead. 
+#' 
+#' @param data a data frame with at least one column that is found in `spec`
+#' @param spec a `yspec` object
+#' @param report if `TRUE`, report missing columns
+#' 
+#' @examples
+#' data <- ys_help$data()
+#' spec <- ys_help$spec()
+#' data$STUDY <- NULL
+#' 
+#' head(ys_prune(data, spec))
+#' head(ys_prune(data, spec, report = TRUE))
+#' 
+#' # Use this for final subsetting
+#' # It will fail if all the columns aren't there
+#' data <- ys_help$data()
+#' head(dplyr::select(data, names(spec)))
+#'  
+#' @details
+#' An error is generated if there are no columns in common between `data` and 
+#' `spec`. 
+#' 
+#' @return 
+#' A data frame with common columns with `spec`, in the order they appear
+#' in `spec`. 
+#'   
+#' @md
+#' @export
+ys_prune <- function(data, spec, report = FALSE) {
+  assert_that(is.data.frame(data))
+  assert_that(is_yspec(spec))
+  # spec positions for matching names in the data set
+  igrab <- sort(match(names(data), names(spec)), na.last = NA)
+  if(length(igrab)==0) {
+    stop("there are no names common between `data` and `spec`", call. = FALSE)  
+  }
+  # convert igrab to names in spec, ordered by spec; this is what we'll take
+  grab <- names(spec)[igrab]
+  if(isTRUE(report)) {
+    missing <- setdiff(names(spec), names(data))
+    for(col in missing) {
+      message("Column not found: ", col)  
+    }
+  }
+  data[, grab, drop = FALSE]
+}
+
 as_spec_list <- function(...) {
   x <- list(...)
   names(x) <- map_chr(map(x,get_meta),"name")

diff --git a/man/ys_prune.Rd b/man/ys_prune.Rd
diff --git a/tests/testthat/test-prune.R b/tests/testthat/test-prune.R
@@ -0,0 +1,34 @@
+library(yspec)
+library(testthat)
+
+context("test-prune")
+
+test_that("ys_prune selects available columns", {
+  data <- ys_help$data()
+  spec <- ys_help$spec()
+  data$STUDY <- NULL
+  data$TAD <- NULL
+  data$FOO <- 1
+  data$BAR <- 2
+  set.seed(1103)
+  data <- data[, sample(names(data)), drop = FALSE]
+  ans <- ys_prune(data, spec)
+  spec2 <- ys_select(spec, -STUDY, -TAD)
+  expect_identical(class(data), class(ans))
+  expect_true(is.data.frame(ans))
+  expect_equal(names(ans), names(spec2))
+  expect_error(
+    ys_prune(data.frame(a = 2), spec), 
+    regexp = "there are no names common between"
+  )
+  expect_message(
+    ans <- ys_prune(data, spec, report = TRUE), 
+    regexp = "Column not found: STUDY", 
+    all = FALSE, fixed = TRUE
+  )
+  expect_message(
+    ans <- ys_prune(data, spec, report = TRUE), 
+    regexp = "Column not found: TAD", 
+    all = FALSE, fixed = TRUE
+  )
+})