Closes #2247 left join relationship (#2433)

* #2247 - added `relationship` argument to `create_single_dose_dataset()`. * #2247 - Update `derive_vars_merged()` function with relationship argument. * #2247 - Update `create_single_dose_dataset()` according to feedback. Set `relationship = many-to-one`. * #2247 - add in assertion to check new `relationship` argument contains only the options allowed. * #2247 - Update `create_single_dose_dataset()` to error if there are duplicates in the `lookup_table` and create associated test. * #2247 - Update `derive_vars_transposed()` with relationship argument. * #2247 - Update NEWS.md, running `styler::style_pkg()`, `lintr::lint_package()` * #2247 - Update documentation to ensure URLs are inserted as links as opposed to plain text. * #2247 - Update to use `signal_duplicate_records()` as recommended by feedback. * #2247 - Revert unintended changes to `test-derive_var_atoxgr.R` * #2247 - Update `derive_vars_merged()` and `derive_vars_transposed()` to only allow one-to-one and many-to-one relationship values according to feedback. * #2247 - Update according to review comment. Catch the dplyr relationship error in `derive_merged.R`, `derive_vars_transposed.R`. * #2247 - Update documentation for `get_hori_data()` due to misaligned text. * #2247 - Running final devtools checks. * Update R/derive_merged.R Apply requested change. Co-authored-by: Stefan Bundfuss <80953585+bundfussr@users.noreply.github.com> * #2247 - Update functions to take dplyr error and replace argument names with parent function argument names. * #2274 - Forgot to remove some dummy testing code! Have now removed. * #2247 - Re-run devtools checks. * docs: #2247 clarify arguments; tests: snapshot of my life * chore: #2247 lintr * tests: #2247 new snapshot * chore: #2247 that lint life --------- Co-authored-by: Ben Straub <ben.x.straub@gsk.com> Co-authored-by: Stefan Bundfuss <80953585+bundfussr@users.noreply.github.com>
pharmaverse · Jun 3, 2024 · d20071d · d20071d
1 parent d93f97b
commit d20071d
Show file tree

Hide file tree

Showing 14 changed files with 317 additions and 15 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -14,6 +14,10 @@
 
 - Templates for ADPC, ADPPK and ADPP are updated to handle urine records. (#2392)
 
+- `create_single_dose_dataset()` has been updated to error if the `lookup_table` contains duplicates. (#2247)
+
+- `derive_vars_merged()` and `derive_vars_transposed()` have a `relationship` argument added (the same as found in `dplyr::*_join()` functions) for users to specify what type of join (one-to-one, one-to-many, etc.) should take place. (#2247)
+
 - `basket_select()` function updated to add `...` argument to allow other qualifiers to be passed to user-defined function specified in `get_terms_fun()` argument for function `create_query_data()`. (#2265)
 
 - The `id_vars` argument was added to `derive_vars_transposed()` and `derive_vars_atc()` to allow additional variables, beyond those in `by_vars`, to uniquely identify records in the `dataset_merge` argument. (#2325)

diff --git a/R/create_single_dose_dataset.R b/R/create_single_dose_dataset.R
@@ -555,6 +555,12 @@ create_single_dose_dataset <- function(dataset,
     cli_abort(err_msg)
   }
 
+  # Check lookup_table does not contain duplicates
+  signal_duplicate_records(
+    dataset = lookup_table,
+    by_vars = exprs(!!lookup_column)
+  )
+
   # Use compute_duration to determine the number of completed dose periods
 
   if (is.null(start_datetime)) {

diff --git a/R/derive_merged.R b/R/derive_merged.R
@@ -146,6 +146,16 @@
 #'   )
 #'   ```
 #'
+#' @param relationship Expected merge-relationship between the `by_vars`
+#'   variable(s) in `dataset` (input dataset) and the `dataset_add` (additional dataset)
+#'    containing the additional `new_vars`.
+#'
+#'   This argument is passed to the `dplyr::left_join()` function. See
+#'   <https://dplyr.tidyverse.org/reference/mutate-joins.html#arguments> for
+#'   more details.
+#'
+#'   **Permitted Values:** `"one-to-one"`, `"many-to-one"`, `NULL`.
+#'
 #' @return The output dataset contains all observations and variables of the
 #'   input dataset and additionally the variables specified for `new_vars` from
 #'   the additional dataset (`dataset_add`).
@@ -317,7 +327,8 @@ derive_vars_merged <- function(dataset,
                                false_value = NA_character_,
                                missing_values = NULL,
                                check_type = "warning",
-                               duplicate_msg = NULL) {
+                               duplicate_msg = NULL,
+                               relationship = NULL) {
   filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE)
   assert_vars(by_vars)
   by_vars_left <- replace_values_by_names(by_vars)
@@ -357,6 +368,13 @@ derive_vars_merged <- function(dataset,
       ))
     }
   }
+  relationship <- assert_character_scalar(
+    relationship,
+    values = c("one-to-one", "many-to-one"),
+    case_sensitive = TRUE,
+    optional = TRUE
+  )
+
 
   add_data <- dataset_add %>%
     mutate(!!!new_vars) %>%
@@ -412,7 +430,49 @@ derive_vars_merged <- function(dataset,
       )
     )
   }
-  dataset <- left_join(dataset, add_data, by = vars2chr(by_vars))
+
+  tryCatch(
+    dataset <- left_join(
+      dataset,
+      add_data,
+      by = vars2chr(by_vars),
+      relationship = relationship
+    ),
+    "dplyr_error_join_relationship_one_to_one" = function(cnd) {
+      cli_abort(
+        message = c(
+          str_replace(
+            str_replace(
+              cnd$message, "`x`", "`dataset`"
+            ), "`y`", "`dataset_add`"
+          ),
+          i = str_replace(
+            str_replace(
+              cnd$body, "`x`", "`dataset`"
+            ), "`y`", "`dataset_add`"
+          )
+        ),
+        call = parent.frame(n = 4)
+      )
+    },
+    "dplyr_error_join_relationship_many_to_one" = function(cnd) {
+      cli_abort(
+        message = c(
+          str_replace(
+            str_replace(
+              cnd$message, "`x`", "`dataset`"
+            ), "`y`", "`dataset_add`"
+          ),
+          i = str_replace(
+            str_replace(
+              cnd$body, "`x`", "`dataset`"
+            ), "`y`", "`dataset_add`"
+          )
+        ),
+        call = parent.frame(n = 4)
+      )
+    }
+  )
 
   if (!is.null(match_flag_var)) {
     update_missings <- map2(

diff --git a/R/derive_param_computed.R b/R/derive_param_computed.R
@@ -442,7 +442,7 @@ assert_parameters_argument <- function(parameters, optional = TRUE) {
 #'
 #' @return A dataset with one observation per by group. It contains the
 #'   variables specified for `by_vars` and all variables of the form
-#'   `<variable>.<parameter>` occurring in `analysis_value`.
+#'   `<variable>.<parameter>` occurring in `set_values_to`.
 #'
 #' @keywords internal
 get_hori_data <- function(dataset,

diff --git a/R/derive_vars_transposed.R b/R/derive_vars_transposed.R
@@ -28,6 +28,16 @@
 #'
 #' @param filter Expression used to restrict the records of `dataset_merge` prior to transposing
 #'
+#' @param relationship Expected merge-relationship between the `by_vars`
+#'   variable(s) in `dataset` and `dataset_merge` (after transposition)
+#'
+#'   This argument is passed to the `dplyr::left_join()` function. See
+#'   <https://dplyr.tidyverse.org/reference/mutate-joins.html#arguments> for
+#'   more details.
+#'
+#'   Permitted Values for `relationship`: `"one-to-one"`, `"one-to-many"`,
+#'   `"many-to-one"`, `"many-to-many"`, `NULL`.
+#'
 #' @details
 #' After filtering `dataset_merge` based upon the condition provided in `filter`, this
 #' dataset is transposed and subsequently merged onto `dataset` using `by_vars` as
@@ -90,14 +100,21 @@ derive_vars_transposed <- function(dataset,
                                    id_vars = NULL,
                                    key_var,
                                    value_var,
-                                   filter = NULL) {
+                                   filter = NULL,
+                                   relationship = NULL) {
   key_var <- assert_symbol(enexpr(key_var))
   value_var <- assert_symbol(enexpr(value_var))
   filter <- assert_filter_cond(enexpr(filter), optional = TRUE)
   assert_vars(by_vars)
   assert_vars(id_vars, optional = TRUE)
   assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
   assert_data_frame(dataset_merge, required_vars = expr_c(by_vars, key_var, value_var))
+  relationship <- assert_character_scalar(
+    relationship,
+    values = c("one-to-one", "one-to-many", "many-to-one", "many-to-many"),
+    case_sensitive = TRUE,
+    optional = TRUE
+  )
 
   dataset_transposed <- dataset_merge %>%
     filter_if(filter) %>%
@@ -107,7 +124,65 @@ derive_vars_transposed <- function(dataset,
       id_cols = c(as.character(by_vars), as.character(id_vars))
     )
 
-  left_join(dataset, dataset_transposed, by = vars2chr(by_vars))
+  tryCatch(
+    left_join(
+      dataset,
+      dataset_transposed,
+      by = vars2chr(by_vars),
+      relationship = relationship
+    ),
+    "dplyr_error_join_relationship_one_to_one" = function(cnd) {
+      cli_abort(
+        message = c(
+          str_replace(
+            str_replace(
+              cnd$message, "`x`", "`dataset`"
+            ), "`y`", "the transposed `dataset_merge`"
+          ),
+          i = str_replace(
+            str_replace(
+              cnd$body, "`x`", "`dataset`"
+            ), "`y`", "the transposed `dataset_merge`"
+          )
+        ),
+        call = parent.frame(n = 4)
+      )
+    },
+    "dplyr_error_join_relationship_many_to_one" = function(cnd) {
+      cli_abort(
+        message = c(
+          str_replace(
+            str_replace(
+              cnd$message, "`x`", "`dataset`"
+            ), "`y`", "the transposed `dataset_merge`"
+          ),
+          i = str_replace(
+            str_replace(
+              cnd$body, "`x`", "`dataset`"
+            ), "`y`", "the transposed `dataset_merge`"
+          )
+        ),
+        call = parent.frame(n = 4)
+      )
+    },
+    "dplyr_error_join_relationship_one_to_many" = function(cnd) {
+      cli_abort(
+        message = c(
+          str_replace(
+            str_replace(
+              cnd$message, "`x`", "`dataset`"
+            ), "`y`", "the transposed `dataset_merge`"
+          ),
+          i = str_replace(
+            str_replace(
+              cnd$body, "`x`", "`dataset`"
+            ), "`y`", "the transposed `dataset_merge`"
+          )
+        ),
+        call = parent.frame(n = 4)
+      )
+    }
+  )
 }
 
 #' Derive ATC Class Variables

diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd
diff --git a/man/derive_vars_transposed.Rd b/man/derive_vars_transposed.Rd
diff --git a/man/get_hori_data.Rd b/man/get_hori_data.Rd
diff --git a/tests/testthat/_snaps/derive_merged.md b/tests/testthat/_snaps/derive_merged.md
@@ -100,3 +100,30 @@
       1 DIABP Diastolic Blood Pressure
       i Run `admiral::get_not_mapped()` to access the full list.
 
+# derive_var_merged_summary Test 28: error when relatioship is
+          incorrectly specificed 'one-to-one'
+
+    Code
+      derive_vars_merged(advs, dataset_add = adsl, by_vars = exprs(USUBJID),
+      new_vars = exprs(SEX), relationship = "one-to-one")
+    Condition
+      Error in `tryCatch()`:
+      ! Each row in `dataset_add` must match at most 1 row in `dataset`.
+      i Row 1 of `dataset_add` matches multiple rows in `dataset`.
+
+# derive_var_merged_summary Test 29: merge selected variables with
+          relatioship as 'one-to-one'
+
+    Code
+      derive_vars_merged(adsl, dataset_add = advs, by_vars = exprs(USUBJID),
+      new_vars = exprs(WEIGHTBL = AVAL), filter_add = AVISIT == "BASELINE",
+      relationship = "one-to-one")
+    Output
+      # A tibble: 4 x 5
+        USUBJID SEX   COUNTRY STUDYID WEIGHTBL
+        <chr>   <chr> <chr>   <chr>      <dbl>
+      1 ST42-1  F     AUT     ST42          66
+      2 ST42-2  M     MWI     ST42          88
+      3 ST42-3  M     NOR     ST42          NA
+      4 ST42-4  F     UGA     ST42          NA
+
diff --git a/tests/testthat/_snaps/derive_vars_query.md b/tests/testthat/_snaps/derive_vars_query.md
@@ -8,7 +8,7 @@
       ! The source variables (values of `SRCVAR`) must be numeric or character.
       i AELLTCD is of type logical
 
-# derive_vars_for_query Test 6: Error is given if both TERMCHAR/TERMNUM are NA/empty
+# derive_vars_query Test 6: Error is given if both TERMCHAR/TERMNUM are NA/empty
 
     Code
       derive_vars_query(my_ae, query)

diff --git a/tests/testthat/_snaps/derive_vars_transposed.md b/tests/testthat/_snaps/derive_vars_transposed.md
@@ -0,0 +1,14 @@
+# derive_vars_transposed Test 3: filtering the merge dataset works
+          with relationship 'many-to-one'
+
+    Code
+      derive_vars_transposed(dataset, dataset_merge, by_vars = exprs(USUBJID),
+      key_var = TESTCD, value_var = VALUE, filter = TESTCD == "T01", relationship = "many-to-one")
+    Output
+      # A tibble: 3 x 3
+        USUBJID  VAR1   T01
+        <chr>   <dbl> <dbl>
+      1 P01         3    31
+      2 P02        31     3
+      3 P03        42    NA
+
diff --git a/tests/testthat/test-create_single_dose_dataset.R b/tests/testthat/test-create_single_dose_dataset.R
@@ -393,3 +393,33 @@ test_that("create_single_dose_dataset Test 11: Works as expected for cases with
     keys = "ASTDTM"
   )
 })
+
+## Test 12: Error if lookup_column contains duplicates ----
+test_that("create_single_dose_dataset Test 12: Error if lookup_column contains duplicates", {
+  custom_lookup <- tribble(
+    ~Value,   ~DOSE_COUNT, ~DOSE_WINDOW, ~CONVERSION_FACTOR,
+    "Q30MIN", (1 / 30),    "MINUTE",                      1,
+    "Q30MIN", (1 / 30),    "MINUTE",                      1,
+    "Q90MIN", (1 / 90),    "MINUTE",                      1
+  )
+
+  input <- tribble(
+    ~USUBJID, ~EXDOSFRQ, ~ASTDT, ~ASTDTM, ~AENDT, ~AENDTM,
+    "P01", "Q30MIN", ymd("2021-01-01"), ymd_hms("2021-01-01T06:00:00"),
+    ymd("2021-01-01"), ymd_hms("2021-01-01T07:00:00"),
+    "P02", "Q90MIN", ymd("2021-01-01"), ymd_hms("2021-01-01T06:00:00"),
+    ymd("2021-01-01"), ymd_hms("2021-01-01T09:00:00")
+  )
+
+  expect_error(
+    create_single_dose_dataset(input,
+      lookup_table = custom_lookup,
+      lookup_column = Value,
+      start_datetime = ASTDTM,
+      end_datetime = AENDTM
+    ),
+    regexp = paste0(
+      "Dataset contains duplicate records with respect to `Value`"
+    )
+  )
+})