Add new calc_eLOD() function

- added function that calculates the estimated limit of detection (eLOD) for SeqId columns of an input `soma_adat` or `data.frame` - included examples in function documentation of filtering an adat to buffer samples as well as filtering based on vector of SampleIds - updated spelling WORDLIST
SomaLogic · Sep 25, 2024 · adb64fd · adb64fd
1 parent 34ec758
commit adb64fd
Show file tree

Hide file tree

Showing 7 changed files with 270 additions and 10 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -68,6 +68,7 @@ export(anti_join)
 export(antilog)
 export(apt2seqid)
 export(arrange)
+export(calc_eLOD)
 export(checkSomaScanVersion)
 export(cleanNames)
 export(col2rn)
@@ -129,6 +130,7 @@ export(slice_sample)
 export(ungroup)
 export(unite)
 export(write_adat)
+importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,anti_join)
 importFrom(dplyr,any_of)
@@ -148,6 +150,8 @@ importFrom(dplyr,select)
 importFrom(dplyr,semi_join)
 importFrom(dplyr,slice)
 importFrom(dplyr,slice_sample)
+importFrom(dplyr,starts_with)
+importFrom(dplyr,summarise)
 importFrom(dplyr,ungroup)
 importFrom(lifecycle,deprecate_soft)
 importFrom(lifecycle,deprecate_stop)
@@ -167,6 +171,7 @@ importFrom(stats,setNames)
 importFrom(tibble,as_tibble)
 importFrom(tibble,deframe)
 importFrom(tibble,enframe)
+importFrom(tibble,is_tibble)
 importFrom(tibble,tibble)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,separate)

diff --git a/R/0-declare-global-variables.R b/R/0-declare-global-variables.R
@@ -12,6 +12,7 @@ utils::globalVariables(
     "array_id",
     "blank_col",
     "Dilution",
+    "eLOD",
     "feature",
     "prefix",
     "rn",

diff --git a/R/calc_eLOD.R b/R/calc_eLOD.R
@@ -0,0 +1,93 @@
+#' Calculate Estimated Limit of Detection (eLOD)
+#'
+#' Calculate the estimated limit of detection (eLOD) for SOMAmer reagent
+#' analytes in the provided input data. The input data should be filtered to
+#' include only buffer samples desired for eLOD calculation.
+#'
+#' eLOD is calculated using the following steps:
+#'
+#' 1. For each SOMAmer, the median and adjusted median absolute
+#'    deviation (\eqn{MAD_{Adjusted}}) are calculated, where
+#'    \deqn{MAD_{Adjusted} = 1.4826 * MAD}
+#'    The 1.4826 is a set constant used to adjust the MAD to be reflective of
+#'    the standard deviation of the normal distribution.
+#' 2. For each SOMAmer, calculate \deqn{eLOD = median + 3.3 * MAD_{Adjusted}}
+#'
+#' Note: The eLOD is useful for non-core matrices, including cell lysate
+#' and CSF, but should be used carefully for evaluating background signal in
+#' plasma and serum.
+#'
+#' @param data A `soma_adat`, `data.frame`, or `tibble` object including
+#' SeqId columns (`seq.xxxxx.xx`) containing RFU values.
+#' @return A `tibble` object with 2 columns: SeqId and eLOD.
+#' @author Caleb Scheidel, Christopher Dimapasok
+#' @examples
+#' # filter data frame using vector of SampleId controls
+#' df <- withr::with_seed(101, {
+#'   data.frame(
+#'     SampleType = rep(c("Sample", "Buffer"), each = 10),
+#'     SampleId = paste0("Sample_", 1:20),
+#'     seq.20.1.100 = runif(20, 1, 100),
+#'     seq.21.1.100 = runif(20, 1, 100),
+#'     seq.22.2.100 = runif(20, 1, 100)
+#'   )
+#' })
+#' sample_ids <- paste0("Sample_", 11:20)
+#' selected_samples <- df |> filter(SampleId %in% sample_ids)
+#'
+#' selected_elod <- calc_eLOD(selected_samples)
+#' head(selected_elod)
+#' \dontrun{
+#' # filter `soma_adat` object to buffer samples
+#' buffer_samples <- example_data |> filter(SampleType == "Buffer")
+#'
+#' # calculate eLOD
+#' buffer_elod <- calc_eLOD(buffer_samples)
+#' head(buffer_elod)
+#'
+#' # use eLOD to calculate signal to noise ratio of samples
+#' samples_median <- example_data |> dplyr::filter(SampleType == "Sample") |>
+#'   dplyr::summarise(across(starts_with("seq"), median, .names = "median_{col}")) |>
+#'   tidyr::pivot_longer(starts_with("median_"), names_to = "SeqId",
+#'                       values_to = "median_signal") |>
+#'   dplyr::mutate(SeqId = gsub("median_seq", "seq", SeqId))
+#'
+#' # analytes with signal to noise > 2
+#' ratios <- samples_median |>
+#'   dplyr::mutate(signal_to_noise = median_signal / buffer_elod$eLOD) |>
+#'   dplyr::filter(signal_to_noise > 2) |>
+#'   dplyr::arrange(desc(signal_to_noise))
+#'
+#' head(ratios)
+#' }
+#' @importFrom dplyr across mutate select summarise starts_with
+#' @importFrom stats mad median
+#' @importFrom tibble as_tibble is_tibble
+#' @importFrom tidyr pivot_longer
+#' @export
+calc_eLOD <- function(data) {
+
+  stopifnot("`data` must be a soma_adat, tibble, or data.frame" =
+              is.soma_adat(data) | is.data.frame(data) | is_tibble(data))
+
+  # if `SampleType` in adat, check for buffer samples only
+  if ("SampleType" %in% names(data) ) {
+    if ( any(c("Sample", "Calibrator", "QC") %in% unique(data$SampleType)) ) {
+      warning("Ensure input data includes buffer samples only!", call. = FALSE)
+    }
+  }
+
+  # formula to calculate eLOD
+  elod <- function(x) {
+    median(x) + 3.3 * mad(x, constant = 1.4826)
+  }
+
+  # Calculate eLOD for each SeqId
+  result <- data |>
+    summarise(across(starts_with("seq"), elod, .names = "eLOD_{col}")) |>
+    pivot_longer(starts_with("eLOD"), names_to = "SeqId", values_to = "eLOD") |>
+    mutate(SeqId = gsub("eLOD_seq", "seq", SeqId)) |>
+    select(SeqId, eLOD)
+
+  return(tibble::as_tibble(result))
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -167,6 +167,11 @@ reference:
     - cleanNames
     - getAdatVersion
 
+  - title: Data Summaries
+    desc: Functions to assist with summarizing SOMAmer RFU values.
+    contents:
+    - calc_eLOD
+
   - title: Data Objects
     desc: Objects provided with `SomaDataIO`.
     contents:

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -8,14 +8,17 @@ AptName
 AssayNotes
 Barcode
 Biobase
+Biometrics
 CCC
 CLI
 CMD
+CSF
 CalQcRatio
 CalReference
 Codecov
 ColCheck
 Covance
+Dimapasok
 EDTA
 EID
 EOL
@@ -26,11 +29,11 @@ EntrezGeneSymbol
 ExpressionSet
 ExtIdentifier
 HybControlNormScale
+Kuei
 LF
 Lifecycle
 MERCHANTABILITY
 MacOS
-magrittr
 NormScale
 ORCID
 PII
@@ -39,15 +42,13 @@ PercentDilution
 PlateId
 PlatePosition
 PlateScale
-plex
-proteomic
 QcReference
 README
 RFU
 RFUs
 RUO
 ReferenceRFU
-Rmarkdown
+Reproducibility
 RowCheck
 SELEX
 SG
@@ -61,9 +62,11 @@ SampleMatrix
 SampleNotes
 SampleType
 ScannerID
+Scheidel
 SeqId
 SeqIds
 SeqidVersion
+Setdiff
 SiteId
 SlideId
 SomaId
@@ -76,27 +79,40 @@ Tabacman
 TargetFullName
 TimePoint
 TubeUniqueID
+Un
 UniProt
+YAML
 adat
 aliquot
 analyte
 analytes
 barcode
-bioconductor
-choosealicense
+cli
 dplyr
+eLOD
 eSet
+frac
 funder
-https
+intra
+leftrightarrow
 lifecycle
+lysate
+magrittr
 medNormRef
-mit
 nd
+normals
 pkgdown
+plex
 pre
+proteomic
+readxl
+rightarrow
+rowname
+rsample
 subarray
 tada
 th
 tibble
-tldrlegal
-www
+tidyr
+usethis
+vectorized
diff --git a/man/calc_eLOD.Rd b/man/calc_eLOD.Rd
diff --git a/tests/testthat/test-calc_eLOD.R b/tests/testthat/test-calc_eLOD.R
@@ -0,0 +1,62 @@
+# Setup ----
+# soma_adat input filtered to "Buffer" samples
+buffer_samples <- example_data |> filter(SampleType == "Buffer")
+
+drop_seqs <- length(getAnalytes(example_data)) - 10
+drop_seqs <- getAnalytes(example_data)[1:drop_seqs]
+
+buffer_samples <- buffer_samples |> select(-all_of(drop_seqs))
+
+# data.frame input
+df <- withr::with_seed(101, {
+  data.frame(
+    SampleType = rep(c("Sample", "Buffer"), each = 10),
+    SampleId = paste0("Sample_", 1:20),
+    seq.20.1.100 = runif(20, 1, 100),
+    seq.21.1.100 = runif(20, 1, 100),
+    seq.22.2.100 = runif(20, 1, 100)
+  )
+})
+sample_ids <- paste0("Sample_", 11:20)
+selected_samples <- df |> filter(SampleId %in% sample_ids)
+
+# Testing ----
+test_that("`calc_eLOD` produces a warning when it should", {
+  expect_warning(
+    calc_eLOD(example_data),
+    "Ensure input data includes buffer samples only!"
+  )
+})
+
+test_that("`calc_eLOD` produces an error when it should", {
+  expect_error(
+    calc_eLOD(list(SampleId = 1:3, seq.1000.123 = 100:102)),
+    "`data` must be a soma_adat, tibble, or data.frame"
+  )
+})
+
+test_that("`calc_eLOD` works on a soma_adat input filtered to buffer samples", {
+  out <- calc_eLOD(buffer_samples)
+
+  expect_s3_class(out, "tbl_df")
+  expect_equal(dim(out), c(10L, 2L))
+  expect_equal(
+    head(out, 3),
+    tibble(SeqId = c("seq.9981.18", "seq.9983.97", "seq.9984.12"),
+               eLOD  = c(45.08555, 52.98848, 123.02824)),
+    tolerance = 0.00001
+  )
+})
+
+test_that("`calc_eLOD` works on a data.frame input", {
+  out <- calc_eLOD(selected_samples)
+
+  expect_s3_class(out, "tbl_df")
+  expect_equal(dim(out), c(3L, 2L))
+  expect_equal(
+    head(out, 3),
+    tibble(SeqId = c("seq.20.1.100", "seq.21.1.100", "seq.22.2.100"),
+           eLOD  = c(168.0601, 130.7047, 115.9958)),
+    tolerance = 0.0001
+  )
+})