From f0f9624ddc0b4830e82ed6ab48089f9861ad3435 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Wed, 27 Sep 2023 12:34:05 -0400 Subject: [PATCH 1/6] moved data checks to separate file --- R/data_checks.R | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ R/helper_funs.R | 81 --------------------------------------------- 2 files changed, 87 insertions(+), 81 deletions(-) create mode 100644 R/data_checks.R diff --git a/R/data_checks.R b/R/data_checks.R new file mode 100644 index 0000000..3edc45f --- /dev/null +++ b/R/data_checks.R @@ -0,0 +1,87 @@ +# functions for checking and pre-processing input data + +# check that an input variable is numeric +check_dim_class <- function(x, varname){ + if(!is.numeric(x)){ + cli::cli_abort( + c("All dimensions must be numeric", + "i" = "{.var {varname}} has class {.cls {class(x)}}.") + ) + } +} + +# check that x and y inputs have the same length +check_dim_size <- function(x, y, xname, yname){ + if(length(x) != length(y)){ + xlen = length(x) + ylen = length(y) + cli::cli_abort( + c("Data dimensions must have the same length", + "i" = "{.var {xname}} has {xlen} value{?s}.", + "i" = "{.var {yname}} has {ylen} value{?s}.") + ) + } +} + +# check the probabilities +check_probs <- function(probs) { + if (!is.numeric(probs)) { + cli::cli_abort( + c("{.var probs} must be numeric", + "i" = "{.var probs} has class {.cls {class(probs)}}") + ) + } + + if (!all(is.finite(probs))) { + non_finites <- unique(probs[which(!is.finite(probs))]) + cli::cli_abort( + c("All {.var probs} must be finite.", + "i" = "{.var probs} included values of {.val {non_finites}}") + ) + } + + if (any(probs <= 0)) { + n_less <- sum(probs <= 0) + cli::cli_abort( + c("All {.var probs} must be greater than 0.", + "i" = "{.var probs} contained {n_less} value{?s} <= 0.") + ) + } + + if (any(probs >= 1)) { + n_greater <- sum(probs >= 1) + cli::cli_abort( + c("All {.var probs} must be less than 1.", + "i" = "{.var probs} contained {n_greater} value{?s} >= 1") + ) + } +} + + +# drop na values from input dimensions +na_filter <- function(...){ + + dots <- rlang::dots_list(...) + dots_name <- names(dots) + + na_vec <- purrr::map(dots, is.na) + + if(purrr::reduce(na_vec, any)){ + na_loc <- purrr::reduce(na_vec, `|`) + new_values <- purrr::map(dots, \(x)x[!na_loc]) + output <- list( + filtered = TRUE, + values = new_values, + total = purrr::map(na_vec, sum) + ) + }else{ + output <- list( + filtered = FALSE, + values = dots, + total = purrr::map(na_vec, sum) + ) + } + + return(output) + +} diff --git a/R/helper_funs.R b/R/helper_funs.R index 8dd71f9..80f7ce4 100644 --- a/R/helper_funs.R +++ b/R/helper_funs.R @@ -22,84 +22,3 @@ xyz_to_isolines <- function(data, breaks) { levels = breaks[-length(breaks)] ) } - -check_dim_class <- function(x, varname){ - if(!is.numeric(x)){ - cli::cli_abort( - c("All dimensions must be numeric", - "i" = "{.var {varname}} has class {.cls {class(x)}}.") - ) - } -} - -check_dim_size <- function(x, y, xname, yname){ - if(length(x) != length(y)){ - xlen = length(x) - ylen = length(y) - cli::cli_abort( - c("Data dimensions must have the same length", - "i" = "{.var {xname}} has {xlen} value{?s}.", - "i" = "{.var {yname}} has {ylen} value{?s}.") - ) - } -} - -check_probs <- function(probs) { - if (!is.numeric(probs)) { - cli::cli_abort( - c("{.var probs} must be numeric", - "i" = "{.var probs} has class {.cls {class(probs)}}") - ) - } - - if (!all(is.finite(probs))) { - non_finites <- unique(probs[which(!is.finite(probs))]) - cli::cli_abort( - c("All {.var probs} must be finite.", - "i" = "{.var probs} included values of {.val {non_finites}}") - ) - } - - if (any(probs <= 0)) { - n_less <- sum(probs <= 0) - cli::cli_abort( - c("All {.var probs} must be greater than 0.", - "i" = "{.var probs} contained {n_less} value{?s} <= 0.") - ) - } - - if (any(probs >= 1)) { - n_greater <- sum(probs >= 1) - cli::cli_abort( - c("All {.var probs} must be less than 1.", - "i" = "{.var probs} contained {n_greater} value{?s} >= 1") - ) - } -} - -na_filter <- function(...){ - - dots <- rlang::dots_list(...) - dots_name <- names(dots) - - na_vec <- purrr::map(dots, is.na) - - if(purrr::reduce(na_vec, any)){ - na_loc <- purrr::reduce(na_vec, `|`) - new_values <- purrr::map(dots, \(x)x[!na_loc]) - output <- list( - filtered = TRUE, - values = new_values, - total = purrr::map(na_vec, sum) - ) - }else{ - output <- list( - filtered = FALSE, - values = dots, - total = purrr::map(na_vec, sum) - ) - } - - return(output) - -} From 1198ca31328af48f66aed17ad6ed4ea08ac9c7f8 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Wed, 27 Sep 2023 12:46:42 -0400 Subject: [PATCH 2/6] refactoring probability checks --- R/data_checks.R | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/R/data_checks.R b/R/data_checks.R index 3edc45f..8f4e521 100644 --- a/R/data_checks.R +++ b/R/data_checks.R @@ -23,14 +23,21 @@ check_dim_size <- function(x, y, xname, yname){ } } -# check the probabilities -check_probs <- function(probs) { - if (!is.numeric(probs)) { +# check for minumum argument size +check_min_size <- function(x, varname, min_size = 1){ + if(len(x) < min_size){ cli::cli_abort( - c("{.var probs} must be numeric", - "i" = "{.var probs} has class {.cls {class(probs)}}") + c("{.var {varname}} must have at least {.val {min_size}} value{?s}.", + "i" = "{.var {varname}} has {.val {length(x)}} value{?s}") ) } +} + +# check the probabilities +check_probs <- function(probs) { + + check_dim_class(probs, "probs") + check_min_size(probs, "probs", min_size = 1) if (!all(is.finite(probs))) { non_finites <- unique(probs[which(!is.finite(probs))]) @@ -40,21 +47,16 @@ check_probs <- function(probs) { ) } - if (any(probs <= 0)) { + if (any(probs <= 0 | probs >= 1)) { n_less <- sum(probs <= 0) - cli::cli_abort( - c("All {.var probs} must be greater than 0.", - "i" = "{.var probs} contained {n_less} value{?s} <= 0.") - ) - } - - if (any(probs >= 1)) { n_greater <- sum(probs >= 1) cli::cli_abort( - c("All {.var probs} must be less than 1.", + c("All {.var probs} must be greater than 0 and less than 1.", + "i" = "{.var probs} contained {n_less} value{?s} <= 0.", "i" = "{.var probs} contained {n_greater} value{?s} >= 1") ) } + } From 0339833974789ea873331566ad514f74496d45f1 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Wed, 27 Sep 2023 12:46:54 -0400 Subject: [PATCH 3/6] refactor na filtering --- R/data_checks.R | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/R/data_checks.R b/R/data_checks.R index 8f4e521..338acf4 100644 --- a/R/data_checks.R +++ b/R/data_checks.R @@ -68,20 +68,18 @@ na_filter <- function(...){ na_vec <- purrr::map(dots, is.na) + output <- list( + filtered = FALSE, + values = dots, + total = purrr::map(na_vec, sum) + ) + + if(purrr::reduce(na_vec, any)){ na_loc <- purrr::reduce(na_vec, `|`) new_values <- purrr::map(dots, \(x)x[!na_loc]) - output <- list( - filtered = TRUE, - values = new_values, - total = purrr::map(na_vec, sum) - ) - }else{ - output <- list( - filtered = FALSE, - values = dots, - total = purrr::map(na_vec, sum) - ) + output$filtered <- TRUE + output$values <- new_values } return(output) From 32d437c0f6fa735fc945fa468f8f192a6835fcbf Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Wed, 27 Sep 2023 12:48:55 -0400 Subject: [PATCH 4/6] filter non-finite values, not just NA --- R/data_checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_checks.R b/R/data_checks.R index 338acf4..b777976 100644 --- a/R/data_checks.R +++ b/R/data_checks.R @@ -66,7 +66,7 @@ na_filter <- function(...){ dots <- rlang::dots_list(...) dots_name <- names(dots) - na_vec <- purrr::map(dots, is.na) + na_vec <- purrr::map(dots, purrr::negate(is.finite)) output <- list( filtered = FALSE, From 5bec0532586ec19aca4edbb5248ec5bf538d5861 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Wed, 27 Sep 2023 13:04:46 -0400 Subject: [PATCH 5/6] len/length typo --- R/data_checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_checks.R b/R/data_checks.R index b777976..2384bd9 100644 --- a/R/data_checks.R +++ b/R/data_checks.R @@ -25,7 +25,7 @@ check_dim_size <- function(x, y, xname, yname){ # check for minumum argument size check_min_size <- function(x, varname, min_size = 1){ - if(len(x) < min_size){ + if(length(x) < min_size){ cli::cli_abort( c("{.var {varname}} must have at least {.val {min_size}} value{?s}.", "i" = "{.var {varname}} has {.val {length(x)}} value{?s}") From e82c366599186cc84fc9adf729b99ca01059552a Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Wed, 27 Sep 2023 13:05:15 -0400 Subject: [PATCH 6/6] moved data processing into separate function --- R/data_checks.R | 36 ++++++++++++++++++++++++++++++++++++ R/density_area.R | 27 ++++++--------------------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/R/data_checks.R b/R/data_checks.R index 2384bd9..47fe193 100644 --- a/R/data_checks.R +++ b/R/data_checks.R @@ -85,3 +85,39 @@ na_filter <- function(...){ return(output) } + + +process_data <- function(x, xname, y, yname, probs){ + + check_dim_class(x, xname) + check_dim_class(y, yname) + check_dim_size(x, y, xname, yname) + check_probs(probs) + + na_filtered <- na_filter(x = x, y = y) + + if(na_filtered$filtered){ + x <- na_filtered$values$x + y <- na_filtered$values$y + + x_total <- na_filtered$total$x + y_total <- na_filtered$total$y + cli::cli_warn( + c("Missing and non-finite values dropped", + "i" = "{x_total} missing or non-finite value{?s} in {xname}", + "i" = "{y_total} missing or non-finite value{?s} in {yname}" + ) + ) + } + + return( + list( + x = x, + y = y, + probs = probs, + xname = xname, + yname = yname + ) + ) + +} diff --git a/R/density_area.R b/R/density_area.R index 06dd792..19bdd2b 100644 --- a/R/density_area.R +++ b/R/density_area.R @@ -97,34 +97,19 @@ density_polygons <- function(x, xname <- deparse(substitute(x)) yname <- deparse(substitute(y)) + processed_data <- process_data(x=x, + xname = xname, + y=y, + yname = yname, + probs) - check_dim_class(x, xname) - check_dim_class(y, yname) - check_dim_size(x, y, xname, yname) - check_probs(probs) + list2env(processed_data, envir = environment()) nameswap <- c("x", "y") names(nameswap) <- vctrs::vec_as_names(c(xname, yname), repair = "unique", quiet = TRUE) - na_filtered <- na_filter(x = x, y = y) - - if(na_filtered$filtered){ - x = na_filtered$values$x - y = na_filtered$values$y - - x_total <- na_filtered$total$x - y_total <- na_filtered$total$y - cli::cli_warn( - c("Missing values dropped", - "i" = "{x_total} missing value{?s} in {xname}", - "i" = "{y_total} missing value{?s} in {yname}" - ) - ) - } - - isolines <- get_isolines_safely(x=x, y=y, probs=probs, ...) isolines |>