Skip to content

Commit

Permalink
Add new calc_eLOD() function
Browse files Browse the repository at this point in the history
- added function that calculates the estimated
  limit of detection (eLOD) for SeqId columns
  of an input `soma_adat` or `data.frame`
- included examples in function documentation
  of filtering an adat to buffer samples as
  well as filtering based on vector of SampleIds
- updated spelling WORDLIST
  • Loading branch information
scheidec committed Sep 25, 2024
1 parent 34ec758 commit adb64fd
Show file tree
Hide file tree
Showing 7 changed files with 270 additions and 10 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export(anti_join)
export(antilog)
export(apt2seqid)
export(arrange)
export(calc_eLOD)
export(checkSomaScanVersion)
export(cleanNames)
export(col2rn)
Expand Down Expand Up @@ -129,6 +130,7 @@ export(slice_sample)
export(ungroup)
export(unite)
export(write_adat)
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,anti_join)
importFrom(dplyr,any_of)
Expand All @@ -148,6 +150,8 @@ importFrom(dplyr,select)
importFrom(dplyr,semi_join)
importFrom(dplyr,slice)
importFrom(dplyr,slice_sample)
importFrom(dplyr,starts_with)
importFrom(dplyr,summarise)
importFrom(dplyr,ungroup)
importFrom(lifecycle,deprecate_soft)
importFrom(lifecycle,deprecate_stop)
Expand All @@ -167,6 +171,7 @@ importFrom(stats,setNames)
importFrom(tibble,as_tibble)
importFrom(tibble,deframe)
importFrom(tibble,enframe)
importFrom(tibble,is_tibble)
importFrom(tibble,tibble)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,separate)
Expand Down
1 change: 1 addition & 0 deletions R/0-declare-global-variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ utils::globalVariables(
"array_id",
"blank_col",
"Dilution",
"eLOD",
"feature",
"prefix",
"rn",
Expand Down
93 changes: 93 additions & 0 deletions R/calc_eLOD.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#' Calculate Estimated Limit of Detection (eLOD)
#'
#' Calculate the estimated limit of detection (eLOD) for SOMAmer reagent
#' analytes in the provided input data. The input data should be filtered to
#' include only buffer samples desired for eLOD calculation.
#'
#' eLOD is calculated using the following steps:
#'
#' 1. For each SOMAmer, the median and adjusted median absolute
#' deviation (\eqn{MAD_{Adjusted}}) are calculated, where
#' \deqn{MAD_{Adjusted} = 1.4826 * MAD}
#' The 1.4826 is a set constant used to adjust the MAD to be reflective of
#' the standard deviation of the normal distribution.
#' 2. For each SOMAmer, calculate \deqn{eLOD = median + 3.3 * MAD_{Adjusted}}
#'
#' Note: The eLOD is useful for non-core matrices, including cell lysate
#' and CSF, but should be used carefully for evaluating background signal in
#' plasma and serum.
#'
#' @param data A `soma_adat`, `data.frame`, or `tibble` object including
#' SeqId columns (`seq.xxxxx.xx`) containing RFU values.
#' @return A `tibble` object with 2 columns: SeqId and eLOD.
#' @author Caleb Scheidel, Christopher Dimapasok
#' @examples
#' # filter data frame using vector of SampleId controls
#' df <- withr::with_seed(101, {
#' data.frame(
#' SampleType = rep(c("Sample", "Buffer"), each = 10),
#' SampleId = paste0("Sample_", 1:20),
#' seq.20.1.100 = runif(20, 1, 100),
#' seq.21.1.100 = runif(20, 1, 100),
#' seq.22.2.100 = runif(20, 1, 100)
#' )
#' })
#' sample_ids <- paste0("Sample_", 11:20)
#' selected_samples <- df |> filter(SampleId %in% sample_ids)
#'
#' selected_elod <- calc_eLOD(selected_samples)
#' head(selected_elod)
#' \dontrun{
#' # filter `soma_adat` object to buffer samples
#' buffer_samples <- example_data |> filter(SampleType == "Buffer")
#'
#' # calculate eLOD
#' buffer_elod <- calc_eLOD(buffer_samples)
#' head(buffer_elod)
#'
#' # use eLOD to calculate signal to noise ratio of samples
#' samples_median <- example_data |> dplyr::filter(SampleType == "Sample") |>
#' dplyr::summarise(across(starts_with("seq"), median, .names = "median_{col}")) |>
#' tidyr::pivot_longer(starts_with("median_"), names_to = "SeqId",
#' values_to = "median_signal") |>
#' dplyr::mutate(SeqId = gsub("median_seq", "seq", SeqId))
#'
#' # analytes with signal to noise > 2
#' ratios <- samples_median |>
#' dplyr::mutate(signal_to_noise = median_signal / buffer_elod$eLOD) |>
#' dplyr::filter(signal_to_noise > 2) |>
#' dplyr::arrange(desc(signal_to_noise))
#'
#' head(ratios)
#' }
#' @importFrom dplyr across mutate select summarise starts_with
#' @importFrom stats mad median
#' @importFrom tibble as_tibble is_tibble
#' @importFrom tidyr pivot_longer
#' @export
calc_eLOD <- function(data) {

stopifnot("`data` must be a soma_adat, tibble, or data.frame" =
is.soma_adat(data) | is.data.frame(data) | is_tibble(data))

# if `SampleType` in adat, check for buffer samples only
if ("SampleType" %in% names(data) ) {
if ( any(c("Sample", "Calibrator", "QC") %in% unique(data$SampleType)) ) {
warning("Ensure input data includes buffer samples only!", call. = FALSE)
}
}

# formula to calculate eLOD
elod <- function(x) {
median(x) + 3.3 * mad(x, constant = 1.4826)
}

# Calculate eLOD for each SeqId
result <- data |>
summarise(across(starts_with("seq"), elod, .names = "eLOD_{col}")) |>
pivot_longer(starts_with("eLOD"), names_to = "SeqId", values_to = "eLOD") |>
mutate(SeqId = gsub("eLOD_seq", "seq", SeqId)) |>
select(SeqId, eLOD)

return(tibble::as_tibble(result))
}
5 changes: 5 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ reference:
- cleanNames
- getAdatVersion

- title: Data Summaries
desc: Functions to assist with summarizing SOMAmer RFU values.
contents:
- calc_eLOD

- title: Data Objects
desc: Objects provided with `SomaDataIO`.
contents:
Expand Down
36 changes: 26 additions & 10 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ AptName
AssayNotes
Barcode
Biobase
Biometrics
CCC
CLI
CMD
CSF
CalQcRatio
CalReference
Codecov
ColCheck
Covance
Dimapasok
EDTA
EID
EOL
Expand All @@ -26,11 +29,11 @@ EntrezGeneSymbol
ExpressionSet
ExtIdentifier
HybControlNormScale
Kuei
LF
Lifecycle
MERCHANTABILITY
MacOS
magrittr
NormScale
ORCID
PII
Expand All @@ -39,15 +42,13 @@ PercentDilution
PlateId
PlatePosition
PlateScale
plex
proteomic
QcReference
README
RFU
RFUs
RUO
ReferenceRFU
Rmarkdown
Reproducibility
RowCheck
SELEX
SG
Expand All @@ -61,9 +62,11 @@ SampleMatrix
SampleNotes
SampleType
ScannerID
Scheidel
SeqId
SeqIds
SeqidVersion
Setdiff
SiteId
SlideId
SomaId
Expand All @@ -76,27 +79,40 @@ Tabacman
TargetFullName
TimePoint
TubeUniqueID
Un
UniProt
YAML
adat
aliquot
analyte
analytes
barcode
bioconductor
choosealicense
cli
dplyr
eLOD
eSet
frac
funder
https
intra
leftrightarrow
lifecycle
lysate
magrittr
medNormRef
mit
nd
normals
pkgdown
plex
pre
proteomic
readxl
rightarrow
rowname
rsample
subarray
tada
th
tibble
tldrlegal
www
tidyr
usethis
vectorized
78 changes: 78 additions & 0 deletions man/calc_eLOD.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions tests/testthat/test-calc_eLOD.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Setup ----
# soma_adat input filtered to "Buffer" samples
buffer_samples <- example_data |> filter(SampleType == "Buffer")

drop_seqs <- length(getAnalytes(example_data)) - 10
drop_seqs <- getAnalytes(example_data)[1:drop_seqs]

buffer_samples <- buffer_samples |> select(-all_of(drop_seqs))

# data.frame input
df <- withr::with_seed(101, {
data.frame(
SampleType = rep(c("Sample", "Buffer"), each = 10),
SampleId = paste0("Sample_", 1:20),
seq.20.1.100 = runif(20, 1, 100),
seq.21.1.100 = runif(20, 1, 100),
seq.22.2.100 = runif(20, 1, 100)
)
})
sample_ids <- paste0("Sample_", 11:20)
selected_samples <- df |> filter(SampleId %in% sample_ids)

# Testing ----
test_that("`calc_eLOD` produces a warning when it should", {
expect_warning(
calc_eLOD(example_data),
"Ensure input data includes buffer samples only!"
)
})

test_that("`calc_eLOD` produces an error when it should", {
expect_error(
calc_eLOD(list(SampleId = 1:3, seq.1000.123 = 100:102)),
"`data` must be a soma_adat, tibble, or data.frame"
)
})

test_that("`calc_eLOD` works on a soma_adat input filtered to buffer samples", {
out <- calc_eLOD(buffer_samples)

expect_s3_class(out, "tbl_df")
expect_equal(dim(out), c(10L, 2L))
expect_equal(
head(out, 3),
tibble(SeqId = c("seq.9981.18", "seq.9983.97", "seq.9984.12"),
eLOD = c(45.08555, 52.98848, 123.02824)),
tolerance = 0.00001
)
})

test_that("`calc_eLOD` works on a data.frame input", {
out <- calc_eLOD(selected_samples)

expect_s3_class(out, "tbl_df")
expect_equal(dim(out), c(3L, 2L))
expect_equal(
head(out, 3),
tibble(SeqId = c("seq.20.1.100", "seq.21.1.100", "seq.22.2.100"),
eLOD = c(168.0601, 130.7047, 115.9958)),
tolerance = 0.0001
)
})

0 comments on commit adb64fd

Please sign in to comment.