From 41729d3f8d4e265a0b6e18ed82f7a3b50d3a5032 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 01:19:31 -0700 Subject: [PATCH 01/16] syntax cleanup for combineExpression --- R/combineExpression.R | 83 ++++++++++++++++++------------------------- R/utils.R | 4 +-- 2 files changed, 37 insertions(+), 50 deletions(-) diff --git a/R/combineExpression.R b/R/combineExpression.R index 5b7d6e66..440fe655 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -82,7 +82,7 @@ combineExpression <- function(input.data, stop("Adjust the cloneSize parameter - there are groupings < 1") } cloneSize <- c(None = 0, cloneSize) - + cloneCall <- .theCall(input.data, cloneCall) if (chain != "both") { #Retain the full clone information @@ -101,9 +101,16 @@ combineExpression <- function(input.data, Con.df <- NULL meta <- .grabMeta(sc.data) cell.names <- rownames(meta) + + conDfColnamesNoCloneSize <- unique(c( + "barcode", "CTgene", "CTnt", "CTaa", "CTstrict", + cloneCall, "clonalProportion", "clonalFrequency" + )) + if (is.null(group.by) || group.by == "none") { + for (i in seq_along(input.data)) { - + data <- data.frame(input.data[[i]], stringsAsFactors = FALSE) data2 <- unique(data[,c("barcode", cloneCall)]) #This ensures all calculations are based on the cells in the SCO @@ -114,17 +121,11 @@ combineExpression <- function(input.data, clonalFrequency = dplyr::n()) colnames(data2)[1] <- cloneCall data <- merge(data, data2, by = cloneCall, all = TRUE) - if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) { - data <- data[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", cloneCall, - "clonalProportion", "clonalFrequency")] - } else { - data <- data[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", - "clonalProportion", "clonalFrequency")] } + data <- data[, conDfColnamesNoCloneSize] Con.df <- rbind.data.frame(Con.df, data) } - } else if (group.by != "none" || !is.null(group.by)) { + + } else { data <- data.frame(bind_rows(input.data), stringsAsFactors = FALSE) data2 <- na.omit(unique(data[,c("barcode", cloneCall, group.by)])) #This ensures all calculations are based on the cells in the SCO @@ -134,18 +135,12 @@ combineExpression <- function(input.data, summarise(clonalProportion = dplyr::n()/nrow(data2), clonalFrequency = dplyr::n()) ) - + colnames(data2)[c(1,2)] <- c(cloneCall, group.by) data <- merge(data, data2, by = c(cloneCall, group.by), all = TRUE) - if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) { - Con.df <- data[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", cloneCall, - "clonalProportion", "clonalFrequency")] - } else { - Con.df <- data[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", - "clonalProportion", "clonalFrequency")] } - } + Con.df <- data[, conDfColnamesNoCloneSize] + } + #Detect if largest cloneSize category is too small for experiment and amend #this prevents a ton of NA values in the data if(!proportion && max(na.omit(Con.df[,"clonalFrequency"])) > cloneSize[length(cloneSize)]) { @@ -158,32 +153,19 @@ combineExpression <- function(input.data, names(cloneSize)[x] <- paste0(names(cloneSize[x]), ' (', cloneSize[x-1], ' < X <= ', cloneSize[x], ')') } - - if(proportion) { - c.column <- "clonalProportion" - } else { - c.column <- "clonalFrequency" - } + + cloneRatioColname <- ifelse(proportion, "clonalProportion", "clonalFrequency") + #Assigning cloneSize for (i in 2:length(cloneSize)) { - Con.df$cloneSize <- ifelse(Con.df[,c.column] > cloneSize[i-1] & - Con.df[,c.column] <= cloneSize[i], - names(cloneSize[i]), - Con.df$cloneSize) - } - - #Formating the meta data to add - if ( cloneCall %!in% c("CTgene", "CTnt", - "CTaa", "CTstrict") ) { - PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", cloneCall, - "clonalProportion", "clonalFrequency", "cloneSize")]) - } else { - PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", "clonalProportion", - "clonalFrequency", "cloneSize")]) + Con.df$cloneSize <- ifelse(Con.df[, cloneRatioColname] > cloneSize[i-1] & + Con.df[, cloneRatioColname] <= cloneSize[i], + names(cloneSize[i]), + Con.df$cloneSize) } - #Removing any duplicate barcodes, should not be an issue + + #Formating the meta data to add and removing any duplicate barcodes + preMeta <- unique(Con.df[, c(conDfColnamesNoCloneSize, "cloneSize")]) dup <- PreMeta$barcode[which(duplicated(PreMeta$barcode))] PreMeta <- PreMeta[PreMeta$barcode %!in% dup,] @@ -208,7 +190,7 @@ combineExpression <- function(input.data, if (is_seurat_object(sc.data)) { if (length(which(rownames(PreMeta) %in% rownames(sc.data[[]])))/length(rownames(sc.data[[]])) < 0.01) { - warning(.warn_str) + warning(getHighBarcodeMismatchWarning()) } col.name <- names(PreMeta) %||% colnames(PreMeta) sc.data[[col.name]] <- PreMeta @@ -216,7 +198,7 @@ combineExpression <- function(input.data, rownames <- rownames(colData(sc.data)) if (length(which(rownames(PreMeta) %in% rownames))/length(rownames) < 0.01) { - warning(.warn_str) } + warning(getHighBarcodeMismatchWarning()) } combined_col_names <- unique(c(colnames(colData(sc.data)), colnames(PreMeta))) full_data <- merge(colData(sc.data), PreMeta[rownames, , drop = FALSE], by = "row.names", all.x = TRUE) @@ -237,6 +219,11 @@ combineExpression <- function(input.data, ) } return(sc.data) -} +} -.warn_str <- "< 1% of barcodes match: Ensure the barcodes in the single-cell object match the barcodes in the combined immune receptor output from scRepertoire. If getting this error, please check https://www.borch.dev/uploads/screpertoire/articles/faq." +getHighBarcodeMismatchWarning <- function() paste( + "< 1% of barcodes match: Ensure the barcodes in the single-cell object", + "match the barcodes in the combined immune receptor output from", + "scRepertoire. If getting this error, please check", + "https://www.borch.dev/uploads/screpertoire/articles/faq." +) diff --git a/R/utils.R b/R/utils.R index 75bd06f7..32b48661 100644 --- a/R/utils.R +++ b/R/utils.R @@ -13,10 +13,10 @@ return(data.frame) } -#Use to shuffle between chains Qile: the NA handling here *might* be related to the unnamed combineTCR bugs from the new rcpp con.df construction +#Use to shuffle between chains #' @importFrom stringr str_split #' @keywords internal -#' @author Ye-Lin Son Nick Borcherding +#' @author Ye-Lin Son, Nick Borcherding .off.the.chain <- function(dat, chain, cloneCall, check = TRUE) { chain1 <- toupper(chain) #to just make it easier if (chain1 %in% c("TRA", "TRG", "IGH")) { From cd968b541f6cf83969239d721be5cdaa3361a465 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 01:24:10 -0700 Subject: [PATCH 02/16] fix minor syntax issue with PreMeta local var in combineExpression --- R/combineExpression.R | 2 +- ...clonalclonalrarefaction-h2-p3-plot.new.svg | 60 ------------------- 2 files changed, 1 insertion(+), 61 deletions(-) delete mode 100644 tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg diff --git a/R/combineExpression.R b/R/combineExpression.R index 440fe655..dc9e82aa 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -165,7 +165,7 @@ combineExpression <- function(input.data, } #Formating the meta data to add and removing any duplicate barcodes - preMeta <- unique(Con.df[, c(conDfColnamesNoCloneSize, "cloneSize")]) + PreMeta <- unique(Con.df[, c(conDfColnamesNoCloneSize, "cloneSize")]) dup <- PreMeta$barcode[which(duplicated(PreMeta$barcode))] PreMeta <- PreMeta[PreMeta$barcode %!in% dup,] diff --git a/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg b/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg deleted file mode 100644 index 33091eda..00000000 --- a/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg +++ /dev/null @@ -1,60 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -500 -1000 -1500 - - - - - - - - - -0.0 -0.2 -0.4 -0.6 -Sample coverage -Species diversity -clonalclonalRarefaction_h2_p3_plot - - From fbc2d9ea7e849b848205b6e7d5717eb6bc5be668 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 02:31:35 -0700 Subject: [PATCH 03/16] slightly improve readability of combineExpression again --- R/combineExpression.R | 8 +- R/typecheck.R | 31 +- R/utils.R | 2 +- .../startracdiversity-plot.new.svg | 344 --------- ...lonaloverlay-clonalproportion-plot.new.svg | 654 ----------------- .../clonalOverlay/clonaloverlay-plot.new.svg | 681 ------------------ .../clonalsizedistribution-plot.new.svg | 94 --- 7 files changed, 10 insertions(+), 1804 deletions(-) delete mode 100644 tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg delete mode 100644 tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.new.svg delete mode 100644 tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg delete mode 100644 tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg diff --git a/R/combineExpression.R b/R/combineExpression.R index dc9e82aa..f19012c8 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -67,7 +67,7 @@ combineExpression <- function(input.data, call_time <- Sys.time() # rudimentary type checking - assert_that(isAnyValidProductOfCombineContigs(input.data)) + assert_that(isListOfNonEmptyDataFrames(input.data)) assert_that(is_seurat_or_se_object(sc.data)) assert_that(is.string(cloneCall)) assert_that(is.string(chain)) @@ -87,8 +87,7 @@ combineExpression <- function(input.data, if (chain != "both") { #Retain the full clone information full.clone <- lapply(input.data, function(x) { - x[,c("barcode", cloneCall)] - + x[, c("barcode", cloneCall)] }) full.clone <- bind_rows(full.clone) for(i in seq_along(input.data)) { @@ -103,8 +102,7 @@ combineExpression <- function(input.data, cell.names <- rownames(meta) conDfColnamesNoCloneSize <- unique(c( - "barcode", "CTgene", "CTnt", "CTaa", "CTstrict", - cloneCall, "clonalProportion", "clonalFrequency" + "barcode", CT_lines, cloneCall, "clonalProportion", "clonalFrequency" )) if (is.null(group.by) || group.by == "none") { diff --git a/R/typecheck.R b/R/typecheck.R index 16bc922f..e9b86bc3 100644 --- a/R/typecheck.R +++ b/R/typecheck.R @@ -1,30 +1,11 @@ -# scRepertoire objects +# base R type check functions -isCombineContigsOutput <- function(obj) { - is.list(obj) && all(sapply(obj, is.data.frame)) +isListOfNonEmptyDataFrames <- function(obj) { + is.list(obj) && + all(sapply(obj, function(x) is.data.frame(x) && sum(dim(x)) > 0)) } -assertthat::on_failure(isCombineContigsOutput) <- function(call, env) { - paste0(deparse(call$obj), " is not an output of combineTCR or combineBCR") -} - -isListOfTwoCombineContigsOutputs <- function(obj) { - is.list(obj) && length(obj) == 2 && all(sapply(obj, isCombineContigsOutput)) -} -assertthat::on_failure(isListOfTwoCombineContigsOutputs) <- function(call, env) { - paste0( - deparse(call$obj), - " is not a list of two outputs of combineTCR and combineBCR" - ) -} - -isAnyValidProductOfCombineContigs <- function(obj) { - isCombineContigsOutput(obj) || isListOfTwoCombineContigsOutputs(obj) -} -assertthat::on_failure(isAnyValidProductOfCombineContigs) <- function(call, env) { - paste0( - deparse(call$obj), - " is not a valid output of combineTCR or combineBCR, nor a list of them" - ) +assertthat::on_failure(isListOfNonEmptyDataFrames) <- function(call, env) { + paste0(deparse(call$obj), " is not a list of non-empty `data.frame`s") } # bio objects diff --git a/R/utils.R b/R/utils.R index 32b48661..12b0c5f8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -340,7 +340,7 @@ return(x) } -# helper for .theCall +# helper for .theCall # Qile: on second thought - converting to x to lowercase may be a bad idea... .convertClonecall <- function(x) { clonecall_dictionary <- hash::hash( diff --git a/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg b/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg deleted file mode 100644 index af181bdd..00000000 --- a/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg +++ /dev/null @@ -1,344 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -migr - - - - - - - - - - -tran - - - - - - - - - - -expa - - - - - - - - - - - - - - - - - - - - -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 - -0.0 -0.1 -0.2 -0.3 - - - - - -0.0 -0.1 -0.2 -0.3 - - - - - -0.0 -0.1 -0.2 -0.3 - - - - -Index Score -StartracDiversity_plot - - diff --git a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.new.svg b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.new.svg deleted file mode 100644 index 530df22f..00000000 --- a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-clonalproportion-plot.new.svg +++ /dev/null @@ -1,654 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -P19 - - - - - - - - - - -P20 - - - - - - - - - - -P17 - - - - - - - - - - -P18 - - - - - - - --4 -0 -4 -8 - - - - - --4 -0 -4 -8 - --5 -0 -5 - - - - --5 -0 -5 - - - -Dimension 1 -Dimension 2 -clonalOverlay_clonalProportion_plot - - diff --git a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg deleted file mode 100644 index d44f8275..00000000 --- a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg +++ /dev/null @@ -1,681 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -P19 - - - - - - - - - - -P20 - - - - - - - - - - -P17 - - - - - - - - - - -P18 - - - - - - - --4 -0 -4 -8 - - - - - --4 -0 -4 -8 - --5 -0 -5 - - - - --5 -0 -5 - - - -Dimension 1 -Dimension 2 -clonalOverlay_plot - - diff --git a/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg b/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg deleted file mode 100644 index 11e5d3c7..00000000 --- a/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -P19B -P20B -P20L -P17L -P19L -P17B -P18B -P18L - - - - - - - - - - - - - - - - - - - --0.1 -0.0 -0.1 -0.2 -0.3 -0.4 -0.5 -clonalSizeDistribution_plot - - From fdc4d51ec5dd4b6c0ac556ecae0cc6c4b7f367d4 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 02:48:40 -0700 Subject: [PATCH 04/16] added extra entries to gitignore --- .gitignore | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 7163266c..b4bd4d45 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,63 @@ .Rproj.user .DS_Store .Rhistory -.Rhistory local_tests.R .RData docs vignettes/articles/scRep_example_full.rds .vscode -qile \ No newline at end of file + +# remove if linting is ever strictly enforced / a standard is set +.lintr + +# below is the github R gitignore + +# History files +.Rhistory +.Rapp.history + +# Session Data files +.RData +.RDataTmp + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron + +# pkgdown site +docs/ + +# translation temp files +po/*~ + +# RStudio Connect folder +rsconnect/ From 36aa8b68aa7b0caf34f7c94a22d0dfad64584f3f Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 23:34:22 -0700 Subject: [PATCH 05/16] finish naive version of getContigDoublets --- DESCRIPTION | 3 +- NAMESPACE | 1 + R/combineContigs.R | 2 +- R/combineExpression.R | 1 + R/getContigDoublets.R | 77 ++++++++++++++++++++++++++++++++++++++++ man/getContigDoublets.Rd | 29 +++++++++++++++ 6 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 R/getContigDoublets.R create mode 100644 man/getContigDoublets.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e4453f48..44dbc3be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,8 @@ Imports: truncdist, utils, VGAM, - hash + hash, + purrr Suggests: BiocManager, BiocStyle, diff --git a/NAMESPACE b/NAMESPACE index 32ff5a24..1508faae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(combineTCR) export(createHTOContigList) export(exportClones) export(getCirclize) +export(getContigDoublets) export(highlightClones) export(loadContigs) export(percentAA) diff --git a/R/combineContigs.R b/R/combineContigs.R index 955bb702..9690aac9 100644 --- a/R/combineContigs.R +++ b/R/combineContigs.R @@ -208,7 +208,7 @@ combineBCR <- function(input.data, filterMulti = TRUE, filterNonproductive = TRUE) { if(is.null(samples)) { - stop("combineBCR() requires the samples paramter for the calculation of edit distance.") + stop("combineBCR() requires the samples parameter for the calculation of edit distance.") } # rudimentary input checking diff --git a/R/combineExpression.R b/R/combineExpression.R index f19012c8..546946f9 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -105,6 +105,7 @@ combineExpression <- function(input.data, "barcode", CT_lines, cloneCall, "clonalProportion", "clonalFrequency" )) + # Computes the clonalProportion and clonalFrequency for each clone if (is.null(group.by) || group.by == "none") { for (i in seq_along(input.data)) { diff --git a/R/getContigDoublets.R b/R/getContigDoublets.R new file mode 100644 index 00000000..780a3cbc --- /dev/null +++ b/R/getContigDoublets.R @@ -0,0 +1,77 @@ +#' Get Contig Doublets +#' +#' This function identifies potential doublets by finding common barcodes +#' between TCR and BCR outputs. It extracts unique barcodes from each list +#' of dataframes, finds the intersection of the barcodes, and joins the +#' resulting data. +#' +#' @param tcrOutput Output of [combineTCR()]. A list of data.frames containing TCR contig +#' information, each dataframe must have a `barcode` column. +#' @param bcrOutput Output of [combineBCR()]. A list of data.frames containing BCR contig +#' information, each dataframe must have a `barcode` column. +#' +#' @return A dataframe of barcodes that exist in both the TCR and BCR data, with +#' columns from both sets of data. If there are no doublets, the returned +#' data.frame will have the same colnames but no rows. +#' @export +#' @examples +#' # TODO +getContigDoublets <- function(tcrOutput, bcrOutput) { + + assert_that(isListOfNonEmptyDataFrames(tcrOutput)) + assert_that(isListOfNonEmptyDataFrames(bcrOutput)) + + rawBarcodeColname <- tempColnameForDfList( + c(tcrOutput, bcrOutput), "raw_barcode" + ) + + listOfTcrBcrWithRawBarcode <- list(tcrOutput, bcrOutput) %>% + lapplyOnAll(function(df) { + df[[rawBarcodeColname]] <- extractBarcodeStrings(df$barcode) + df + }) + + doubletBarcodes <- listOfTcrBcrWithRawBarcode %>% + lapplyOnAll(function(df) { + unique(df[[rawBarcodeColname]]) + }) %>% + lapply(purrr::list_flatten) %>% + purrr::reduce(intersect) + + if (length(doubletBarcodes) == 0) { + return(makeEmptyIntersectionDf(tcrOutput[[1]], bcrOutput[[1]])) + } + + listOfTcrBcrWithRawBarcode %>% + lapplyOnAll(function(df) { + df[df[[rawBarcodeColname]] %in% doubletBarcodes, ] + }) %>% + lapply(dplyr::bind_rows) %>% + purrr::reduce(autoFullJoin) +} + +tempColnameForDfList <- function(dfList, baseName = "temp") { + colnameSet <- unique(unlist(lapply(dfList, colnames))) + tail(make.unique(c(colnameSet, baseName)), 1) +} + +lapplyOnAll <- function(listOfLists, fun) { + lapply(listOfLists, function(x) lapply(x, fun)) +} + +makeEmptyIntersectionDf <- function(...) { + purrr::reduce(list(...), function(df1, df2) { + autoFullJoin(df1[0, ], df2[0, ]) + }) +} + +autoFullJoin <- function(df1, df2) { + suppressMessages(dplyr::full_join(df1, df2)) +} + +extractBarcodeStrings <- function(inputStrings) { + matches <- unlist(lapply(inputStrings, function(x) { + regmatches(x, gregexpr("[a-zA-Z_]+_[ATGC]+-\\d+", x)) + })) + matches[matches != ""] +} diff --git a/man/getContigDoublets.Rd b/man/getContigDoublets.Rd new file mode 100644 index 00000000..9075f365 --- /dev/null +++ b/man/getContigDoublets.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getContigDoublets.R +\name{getContigDoublets} +\alias{getContigDoublets} +\title{Get Contig Doublets} +\usage{ +getContigDoublets(tcrOutput, bcrOutput) +} +\arguments{ +\item{tcrOutput}{Output of [combineTCR()]. A list of data.frames containing TCR contig +information, each dataframe must have a `barcode` column.} + +\item{bcrOutput}{Output of [combineBCR()]. A list of data.frames containing BCR contig +information, each dataframe must have a `barcode` column.} +} +\value{ +A dataframe of barcodes that exist in both the TCR and BCR data, with +columns from both sets of data. If there are no doublets, the returned +data.frame will have the same colnames but no rows. +} +\description{ +This function identifies potential doublets by finding common barcodes +between TCR and BCR outputs. It extracts unique barcodes from each list +of dataframes, finds the intersection of the barcodes, and joins the +resulting data. +} +\examples{ +# TODO +} From e9859371c8da337675e45668e29a5935ae629177 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 23:39:45 -0700 Subject: [PATCH 06/16] converted to roxygenmd and import lifecycle --- DESCRIPTION | 4 ++- NAMESPACE | 1 + R/addVariable.R | 12 +++---- R/alluvialClones.R | 12 +++---- R/clonalAbundance.R | 18 +++++----- R/clonalBias.R | 18 +++++----- R/clonalCluster.R | 22 ++++++------ R/clonalCompare.R | 24 ++++++------- R/clonalDiversity.R | 34 +++++++++--------- R/clonalHomeostasis.R | 18 +++++----- R/clonalLength.R | 16 ++++----- R/clonalNetwork.R | 14 ++++---- R/clonalOccupy.R | 12 +++---- R/clonalOverlap.R | 30 ++++++++-------- R/clonalOverlay.R | 6 ++-- R/clonalProportion.R | 16 ++++----- R/clonalQuant.R | 12 +++---- R/clonalRarefaction.R | 26 +++++++------- R/clonalScatter.R | 12 +++---- R/clonalSizeDistribution.R | 20 +++++------ R/combineContigs.R | 20 +++++------ R/combineExpression.R | 28 +++++++-------- R/createHTOContigList.R | 12 +++---- R/data.R | 8 ++--- R/exportClones.R | 14 ++++---- R/getCircilize.R | 18 +++++----- R/getContigDoublets.R | 3 ++ R/highlightClones.R | 4 +-- R/loadContigs.R | 8 ++--- R/percentAA.R | 6 ++-- R/percentGenes.R | 10 +++--- R/percentKmer.R | 16 ++++----- R/percentVJ.R | 8 ++--- R/positionalEntropy.R | 8 ++--- R/positionalProperty.R | 16 ++++----- R/scRepertoire-package.R | 1 + R/startracDiversity.R | 16 ++++----- R/subsetClone.R | 8 ++--- R/vizGenes.R | 6 ++-- man/StartracDiversity.Rd | 24 ++++++------- man/addVariable.Rd | 14 ++++---- man/alluvialClones.Rd | 26 +++++++------- man/clonalAbundance.Rd | 28 +++++++-------- man/clonalBias.Rd | 22 ++++++------ man/clonalCluster.Rd | 28 +++++++-------- man/clonalCompare.Rd | 32 ++++++++--------- man/clonalDiversity.Rd | 40 ++++++++++----------- man/clonalHomeostasis.Rd | 26 +++++++------- man/clonalLength.Rd | 28 +++++++-------- man/clonalNetwork.Rd | 22 ++++++------ man/clonalOccupy.Rd | 18 +++++----- man/clonalOverlap.Rd | 40 ++++++++++----------- man/clonalOverlay.Rd | 8 ++--- man/clonalProportion.Rd | 24 ++++++------- man/clonalQuant.Rd | 18 +++++----- man/clonalRarefaction.Rd | 36 +++++++++---------- man/clonalScatter.Rd | 22 ++++++------ man/clonalSizeDistribution.Rd | 42 +++++++++++------------ man/combineBCR.Rd | 38 ++++++++++---------- man/combineExpression.Rd | 38 ++++++++++---------- man/combineTCR.Rd | 22 ++++++------ man/contig_list.Rd | 6 ++-- man/createHTOContigList.Rd | 22 ++++++------ man/exportClones.Rd | 16 ++++----- man/figures/lifecycle-archived.svg | 21 ++++++++++++ man/figures/lifecycle-defunct.svg | 21 ++++++++++++ man/figures/lifecycle-deprecated.svg | 21 ++++++++++++ man/figures/lifecycle-experimental.svg | 21 ++++++++++++ man/figures/lifecycle-maturing.svg | 21 ++++++++++++ man/figures/lifecycle-questioning.svg | 21 ++++++++++++ man/figures/lifecycle-soft-deprecated.svg | 21 ++++++++++++ man/figures/lifecycle-stable.svg | 29 ++++++++++++++++ man/figures/lifecycle-superseded.svg | 21 ++++++++++++ man/getCirclize.Rd | 22 ++++++------ man/getContigDoublets.Rd | 10 +++--- man/highlightClones.Rd | 10 +++--- man/loadContigs.Rd | 36 +++++++++---------- man/mini_contig_list.Rd | 12 +++---- man/percentAA.Rd | 8 ++--- man/percentGenes.Rd | 14 ++++---- man/percentKmer.Rd | 14 ++++---- man/percentVJ.Rd | 10 +++--- man/positionalEntropy.Rd | 18 +++++----- man/positionalProperty.Rd | 12 +++---- man/scRep_example.Rd | 8 ++--- man/subsetClones.Rd | 8 ++--- man/vizGenes.Rd | 22 ++++++------ 87 files changed, 892 insertions(+), 686 deletions(-) create mode 100644 man/figures/lifecycle-archived.svg create mode 100644 man/figures/lifecycle-defunct.svg create mode 100644 man/figures/lifecycle-deprecated.svg create mode 100644 man/figures/lifecycle-experimental.svg create mode 100644 man/figures/lifecycle-maturing.svg create mode 100644 man/figures/lifecycle-questioning.svg create mode 100644 man/figures/lifecycle-soft-deprecated.svg create mode 100644 man/figures/lifecycle-stable.svg create mode 100644 man/figures/lifecycle-superseded.svg diff --git a/DESCRIPTION b/DESCRIPTION index 44dbc3be..82c27ffa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,7 +44,8 @@ Imports: utils, VGAM, hash, - purrr + purrr, + lifecycle Suggests: BiocManager, BiocStyle, @@ -64,3 +65,4 @@ LinkingTo: Rcpp URL: https://www.borch.dev/uploads/scRepertoire/ BugReports: https://github.com/ncborcherding/scRepertoire/issues +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 1508faae..35a6d396 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -103,6 +103,7 @@ importFrom(igraph,graph_from_data_frame) importFrom(igraph,graph_from_edgelist) importFrom(igraph,set_vertex_attr) importFrom(igraph,union) +importFrom(lifecycle,deprecated) importFrom(methods,slot) importFrom(plyr,join) importFrom(plyr,llply) diff --git a/R/addVariable.R b/R/addVariable.R index d09958a1..e2b79e28 100644 --- a/R/addVariable.R +++ b/R/addVariable.R @@ -1,10 +1,10 @@ #' Adding variables after combineTCR() or combineBCR() #' -#' This function adds variables to the product of \code{\link{combineTCR}}, -#' or \code{\link{combineBCR}} to be used in later visualizations. +#' This function adds variables to the product of [combineTCR()], +#' or [combineBCR()] to be used in later visualizations. #' For each element, the function will add a column (labeled by -#' \strong{variable.name}) with the variable. The length of the -#' \strong{variables} parameter needs to match the length of the +#' **variable.name**) with the variable. The length of the +#' **variables** parameter needs to match the length of the #' combined object. #' #' @examples @@ -15,8 +15,8 @@ #' variable.name = "Type", #' variables = rep(c("B", "L"), 4)) #' -#' @param input.data The product of \code{\link{combineTCR}} or -#' \code{\link{combineBCR}}. +#' @param input.data The product of [combineTCR()] or +#' [combineBCR()]. #' @param variable.name The new column name/header. #' @param variables The exact values to add to each element of the list. #' @export diff --git a/R/alluvialClones.R b/R/alluvialClones.R index 66642b9d..e35e0900 100644 --- a/R/alluvialClones.R +++ b/R/alluvialClones.R @@ -20,7 +20,7 @@ #' Alluvial plotting for single-cell object meta data #' #' View the proportional contribution of clones by Seurat or SCE object -#' meta data after \code{\link{combineExpression}}. The visualization +#' meta data after [combineExpression()]. The visualization #' is based on the ggalluvial package, which requires the aesthetics #' to be part of the axes that are visualized. Therefore, alpha, facet, #' and color should be part of the the axes you wish to view or will @@ -46,10 +46,10 @@ #' color = "ident") #' #' @param sc.data The single-cell object to visualize -#' after \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' after [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". @@ -61,7 +61,7 @@ #' @param exportTable Exports a table of the data into the global #' environment in addition to the visualization. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' #' @import ggplot2 #' @importFrom ggalluvial StatStratum geom_flow geom_stratum to_lodes_form geom_alluvium diff --git a/R/clonalAbundance.R b/R/clonalAbundance.R index 3e8bc946..f27c083e 100644 --- a/R/clonalAbundance.R +++ b/R/clonalAbundance.R @@ -2,11 +2,11 @@ #' #' Displays the number of clones at specific frequencies by sample #' or group. Visualization can either be a line graph ( -#' \strong{scale} = FALSE) using calculated numbers or density -#' plot (\strong{scale} = TRUE). Multiple sequencing runs can +#' **scale** = FALSE) using calculated numbers or density +#' plot (**scale** = TRUE). Multiple sequencing runs can #' be group together using the group parameter. If a matrix #' output for the data is preferred, set -#' \strong{exportTable} = TRUE. +#' **exportTable** = TRUE. #' #' @examples #' #Making combined contig data @@ -17,11 +17,11 @@ #' cloneCall = "gene", #' scale = FALSE) #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" @@ -33,7 +33,7 @@ #' @param exportTable Returns the data frame used for forming the graph #' to the visualization. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @importFrom ggplot2 ggplot #' @export #' @concept Visualizing_Clones diff --git a/R/clonalBias.R b/R/clonalBias.R index b90400ca..afcef489 100644 --- a/R/clonalBias.R +++ b/R/clonalBias.R @@ -1,12 +1,12 @@ #' Examine skew of clones towards a cluster or compartment #' #' The metric seeks to quantify how individual clones are skewed towards -#' a specific cellular compartment or cluster. A clone bias of \strong{1} - +#' a specific cellular compartment or cluster. A clone bias of **1** - #' indicates that a clone is composed of cells from a single -#' compartment or cluster, while a clone bias of \strong{0} - matches the +#' compartment or cluster, while a clone bias of **0** - matches the #' background subtype distribution. Please read and cite the following -#' \href{https://pubmed.ncbi.nlm.nih.gov/35829695/}{manuscript} -#' if using \code{\link{clonalBias}}. +#' [manuscript](https://pubmed.ncbi.nlm.nih.gov/35829695/) +#' if using [clonalBias()]. #' #' @examples #' #Making combined contig data @@ -30,10 +30,10 @@ #' min.expand = 2) #' #' -#' @param sc.data The single-cell object after \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param sc.data The single-cell object after [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param group.by The variable to use for calculating bias #' @param split.by The variable to use for calculating the baseline frequencies. @@ -42,7 +42,7 @@ #' @param min.expand clone frequency cut off for the purpose of comparison. #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom quantreg rqss #' @importFrom stringr str_sort diff --git a/R/clonalCluster.R b/R/clonalCluster.R index 3253a1a7..e8bd4f61 100644 --- a/R/clonalCluster.R +++ b/R/clonalCluster.R @@ -2,9 +2,9 @@ #' #' This function uses edit distances of either the nucleotide or amino acid #' sequences of the CDR3 and V genes to cluster similar TCR/BCRs together. -#' As a default, the function takes the input from \code{\link{combineTCR}}, -#' \code{\link{combineBCR}} or \code{\link{combineExpression}} and amends a -#' cluster to the data frame or meta data. If \strong{exportGraph} is set +#' As a default, the function takes the input from [combineTCR()], +#' [combineBCR()] or [combineExpression()] and amends a +#' cluster to the data frame or meta data. If **exportGraph** is set #' to TRUE, the function returns an igraph object of the connected sequences. #' If multiple sequences per chain are present, this function only compares #' the first sequence. @@ -19,21 +19,21 @@ #' chain = "TRA", #' sequence = "aa") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}} or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()] or [combineExpression()]. #' @param chain Indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". -#' @param sequence Clustering based on either \strong{"aa"} or -#' \strong{"nt"}. +#' @param sequence Clustering based on either **"aa"** or +#' **"nt"**. #' @param samples The specific samples to isolate for visualization. #' @param threshold The normalized edit distance to consider. #' The higher the number the more similarity of sequence will be #' used for clustering. #' @param group.by The column header used for to group contigs. -#' If (\strong{NULL}), clusters will be calculated across samples. +#' If (**NULL**), clusters will be calculated across samples. #' @param exportGraph Return an igraph object of connected -#' sequences (\strong{TRUE}) or the amended input with a -#' new cluster-based variable (\strong{FALSE}). +#' sequences (**TRUE**) or the amended input with a +#' new cluster-based variable (**FALSE**). #' @importFrom stringdist stringdist #' @importFrom igraph set_vertex_attr V union #' @importFrom plyr join @@ -207,4 +207,4 @@ clonalCluster <- function(input.data, } - \ No newline at end of file + diff --git a/R/clonalCompare.R b/R/clonalCompare.R index e21f8863..e81aa00b 100644 --- a/R/clonalCompare.R +++ b/R/clonalCompare.R @@ -2,9 +2,9 @@ #' #' This function produces an alluvial or area graph of the proportion of #' the indicated clones for all or selected samples (using the -#' \strong{samples} parameter). Individual clones can be selected -#' using the \strong{clones} parameter with the specific sequence of -#' interest or using the \strong{top.clones} parameter with the top +#' **samples** parameter). Individual clones can be selected +#' using the **clones** parameter with the specific sequence of +#' interest or using the **top.clones** parameter with the top #' n clones by proportion to be visualized. #' #' @examples @@ -17,11 +17,11 @@ #' samples = c("P17B", "P17L"), #' cloneCall="aa") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" @@ -34,15 +34,15 @@ #' @param relabel.clones Simplify the legend of the graph by returning #' clones that are numerically indexed #' @param group.by If using a single-cell object, the column header -#' to group the new list. \strong{NULL} will return the active +#' to group the new list. **NULL** will return the active #' identity or cluster #' @param order.by A vector of specific plotting order or "alphanumeric" #' to plot groups in order -#' @param graph The type of graph produced, either \strong{"alluvial"} -#' or \strong{"area"} +#' @param graph The type of graph produced, either **"alluvial"** +#' or **"area"** #' @param exportTable Returns the data frame used for forming the graph #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @import ggplot2 #' @importFrom stringr str_sort #' diff --git a/R/clonalDiversity.R b/R/clonalDiversity.R index 99dd42f1..692fd3c1 100644 --- a/R/clonalDiversity.R +++ b/R/clonalDiversity.R @@ -1,32 +1,32 @@ #' Calculate the clonal diversity for samples or groupings #' -#' This function calculates traditional measures of diversity - \strong{Shannon}, -#' \strong{inverse Simpson}, \strong{normalized entropy}, \strong{Gini-Simpson}, \strong{Chao1 index}, and -#' \strong{abundance-based coverage estimators (ACE)} measure of species evenness by sample or group. +#' This function calculates traditional measures of diversity - **Shannon**, +#' **inverse Simpson**, **normalized entropy**, **Gini-Simpson**, **Chao1 index**, and +#' **abundance-based coverage estimators (ACE)** measure of species evenness by sample or group. #' The function automatically down samples the diversity metrics using -#' 100 boot straps (\strong{n.boots = 100}) and outputs the mean of the values. +#' 100 boot straps (**n.boots = 100**) and outputs the mean of the values. #'The group parameter can be used to condense the individual -#' samples. If a matrix output for the data is preferred, set \strong{exportTable} = TRUE. +#' samples. If a matrix output for the data is preferred, set **exportTable** = TRUE. #' #' @details #' The formulas for the indices and estimators are as follows: #' -#' \strong{Shannon Index:} +#' **Shannon Index:** #' \deqn{Index = - \sum p_i * \log(p_i)} #' -#' \strong{Inverse Simpson Index:} +#' **Inverse Simpson Index:** #' \deqn{Index = \frac{1}{(\sum_{i=1}^{S} p_i^2)}} #' -#' \strong{Normalized Entropy:} +#' **Normalized Entropy:** #' \deqn{Index = -\frac{\sum_{i=1}^{S} p_i \ln(p_i)}{\ln(S)}} #' -#' \strong{Gini-Simpson Index:} +#' **Gini-Simpson Index:** #' \deqn{Index = 1 - \sum_{i=1}^{S} p_i^2} #' -#' \strong{Chao1 Index:} +#' **Chao1 Index:** #' \deqn{Index = S_{obs} + \frac{n_1(n_1-1)}{2*n_2+1}} #' -#' \strong{Abundance-based Coverage Estimator (ACE):} +#' **Abundance-based Coverage Estimator (ACE):** #' \deqn{Index = S_{abund} + \frac{S_{rare}}{C_{ace}} + \frac{F_1}{C_{ace}}} #' #' Where: @@ -44,11 +44,11 @@ #' "P19B","P19L", "P20B", "P20L")) #' clonalDiversity(combined, cloneCall = "gene") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" @@ -61,7 +61,7 @@ #' @param exportTable Exports a table of the data into the global environment #' in addition to the visualization #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @param n.boots number of bootstraps to down sample in order to #' get mean diversity #' @param return.boots export boot strapped values calculated - diff --git a/R/clonalHomeostasis.R b/R/clonalHomeostasis.R index e547ac2f..4162891a 100644 --- a/R/clonalHomeostasis.R +++ b/R/clonalHomeostasis.R @@ -1,11 +1,11 @@ #' Examining the clonal homeostasis of the repertoire #' #' This function calculates the space occupied by clone proportions. -#' The grouping of these clones is based on the parameter \strong{cloneSize}, -#' at default, \strong{cloneSize} will group the clones into bins of Rare = 0 +#' The grouping of these clones is based on the parameter **cloneSize**, +#' at default, **cloneSize** will group the clones into bins of Rare = 0 #' to 0.0001, Small = 0.0001 to 0.001, etc. To adjust the proportions, #' change the number or labeling of the cloneSize parameter. If a matrix -#' output for the data is preferred, set \strong{exportTable} = TRUE. +#' output for the data is preferred, set **exportTable** = TRUE. #' #' @examples #' #Making combined contig data @@ -14,12 +14,12 @@ #' "P19B","P19L", "P20B", "P20L")) #' clonalHomeostasis(combined, cloneCall = "gene") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. #' @param cloneSize The cut points of the proportions. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". @@ -29,7 +29,7 @@ #' @param exportTable Exports a table of the data into the global #' environment in addition to the visualization. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom stringr str_split #' @importFrom reshape2 melt diff --git a/R/clonalLength.R b/R/clonalLength.R index a1817c3b..a674da69 100644 --- a/R/clonalLength.R +++ b/R/clonalLength.R @@ -1,10 +1,10 @@ #' Demonstrate the distribution of clonal length #' -#' This function displays either the nucleotide (\strong{nt}) or amino -#' acid (\strong{aa}) sequence length. The sequence length visualized +#' This function displays either the nucleotide (**nt**) or amino +#' acid (**aa**) sequence length. The sequence length visualized #' can be selected using the chains parameter, either the combined clone #' (both chains) or across all single chains. Visualization can either -#' be a histogram or if \strong{scale} = TRUE, the output will +#' be a histogram or if **scale** = TRUE, the output will #' be a density plot. Multiple sequencing runs can be group together #' using the group.by parameter. #' @@ -15,10 +15,10 @@ #' "P19B","P19L", "P20B", "P20L")) #' clonalLength(combined, cloneCall="aa", chain = "both") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}} -#' @param cloneCall How to call the clone - CDR3 nucleotide (\strong{nt}) -#' or CDR3 amino acid (\strong{aa}) +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()] +#' @param cloneCall How to call the clone - CDR3 nucleotide (**nt**) +#' or CDR3 amino acid (**aa**) #' @param group.by The variable to use for grouping #' @param order.by A vector of specific plotting order or "alphanumeric" #' to plot groups in order description @@ -28,7 +28,7 @@ #' e.g. "both", "TRA", "TRG", "IGH", "IGL" #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @importFrom stringr str_split #' @importFrom ggplot2 ggplot #' @export diff --git a/R/clonalNetwork.R b/R/clonalNetwork.R index 9b15d7ce..2f46fb59 100644 --- a/R/clonalNetwork.R +++ b/R/clonalNetwork.R @@ -25,22 +25,22 @@ #' group.by = "seurat_clusters") #' } #' -#' @param sc.data The single-cell object after \code{\link{combineExpression}}. +#' @param sc.data The single-cell object after [combineExpression()]. #' @param reduction The name of the dimensional reduction of the #' single-cell object. #' @param group.by The variable to use for the nodes. -#' @param filter.clones Use to select the top n clones (e.g., \strong{filter.clones} +#' @param filter.clones Use to select the top n clones (e.g., **filter.clones** #' = 2000) or n of clones based on the minimum number of all the comparators -#' (e.g., \strong{filter.clone} = "min"). +#' (e.g., **filter.clone** = "min"). #' @param filter.identity Display the network for a specific level of the #' indicated identity. #' @param filter.proportion Remove clones from the network below a specific #' proportion. #' @param filter.graph Remove the reciprocal edges from the half of the graph, #' allowing for cleaner visualization. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". @@ -49,7 +49,7 @@ #' across multiple identity groups and ordered by the total number #' of clone copies. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom stringr str_sort diff --git a/R/clonalOccupy.R b/R/clonalOccupy.R index 0b30bb45..5d8b63bf 100644 --- a/R/clonalOccupy.R +++ b/R/clonalOccupy.R @@ -1,11 +1,11 @@ #' Visualize the number of single cells with cloneSizes by cluster #' #' View the count of clones frequency group in Seurat or SCE object -#' meta data after \code{\link{combineExpression}}. The visualization -#' will take the new meta data variable \strong{"cloneSize"} and +#' meta data after [combineExpression()]. The visualization +#' will take the new meta data variable **"cloneSize"** and #' plot the number of cells with each designation using a secondary #' variable, like cluster. Credit to the idea goes to Drs. Carmona -#' and Andreatta and their work with \href{https://github.com/carmonalab/ProjecTILs}{ProjectTIL}. +#' and Andreatta and their work with [ProjectTIL](https://github.com/carmonalab/ProjecTILs). #' #' @examples #' #Getting the combined contigs @@ -23,7 +23,7 @@ #' clonalOccupy(scRep_example, x.axis = "ident") #' table <- clonalOccupy(scRep_example, x.axis = "ident", exportTable = TRUE) #' -#' @param sc.data The single-cell object after \code{\link{combineExpression}} +#' @param sc.data The single-cell object after [combineExpression()] #' @param x.axis The variable in the meta data to graph along the x.axis. #' @param label Include the number of clone in each category by x.axis variable #' @param facet.by The column header used for faceting the graph @@ -34,7 +34,7 @@ #' @param exportTable Exports a table of the data into the global #' environment in addition to the visualization #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @importFrom dplyr %>% group_by mutate count #' @importFrom reshape2 melt #' @import ggplot2 @@ -118,4 +118,4 @@ clonalOccupy <- function(sc.data, geom_text(aes(label = n), position = position_stack(vjust = 0.5)) } plot -} \ No newline at end of file +} diff --git a/R/clonalOverlap.R b/R/clonalOverlap.R index fd215862..a0b03c9e 100644 --- a/R/clonalOverlap.R +++ b/R/clonalOverlap.R @@ -2,27 +2,27 @@ #' #' This functions allows for the calculation and visualizations of #' various overlap metrics for clones. The methods include overlap -#' coefficient (\strong{overlap}), Morisita's overlap index -#' (\strong{morisita}), Jaccard index (\strong{jaccard}), cosine -#' similarity (\strong{cosine}) or the exact number of clonal -#' overlap (\strong{raw}). +#' coefficient (**overlap**), Morisita's overlap index +#' (**morisita**), Jaccard index (**jaccard**), cosine +#' similarity (**cosine**) or the exact number of clonal +#' overlap (**raw**). #' #' @details #' The formulas for the indices are as follows: #' -#' \strong{Overlap Coefficient:} +#' **Overlap Coefficient:** #' \deqn{overlap = \frac{\sum \min(a, b)}{\min(\sum a, \sum b)}} #' -#' \strong{Raw Count Overlap:} +#' **Raw Count Overlap:** #' \deqn{raw = \sum \min(a, b)} #' -#' \strong{Morisita Index:} +#' **Morisita Index:** #' \deqn{morisita = \frac{\sum a b}{(\sum a)(\sum b)}} #' -#' \strong{Jaccard Index:} +#' **Jaccard Index:** #' \deqn{jaccard = \frac{\sum \min(a, b)}{\sum a + \sum b - \sum \min(a, b)}} #' -#' \strong{Cosine Similarity:} +#' **Cosine Similarity:** #' \deqn{cosine = \frac{\sum a b}{\sqrt{(\sum a^2)(\sum b^2)}}} #' #' Where: @@ -40,11 +40,11 @@ #' cloneCall = "aa", #' method = "jaccard") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}} -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()] +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" @@ -55,7 +55,7 @@ #' to plot groups in order #' @param exportTable Returns the data frame used for forming the graph #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @importFrom stringr str_sort str_to_title #' @importFrom reshape2 melt #' @importFrom stats quantile diff --git a/R/clonalOverlay.R b/R/clonalOverlay.R index 85043dfd..0717b341 100644 --- a/R/clonalOverlay.R +++ b/R/clonalOverlay.R @@ -3,7 +3,7 @@ #' This function allows the user to visualize the clonal expansion by overlaying the #' cells with specific clonal frequency onto the dimensional reduction plots in Seurat. #' Credit to the idea goes to Drs Andreatta and Carmona and their work with -#' \href{https://github.com/carmonalab/ProjecTILs}{ProjectTIL}. +#' [ProjectTIL](https://github.com/carmonalab/ProjecTILs). #' #' @examples #' #Getting the combined contigs @@ -23,7 +23,7 @@ #' cutpoint = 3, #' bins = 5) #' -#' @param sc.data The single-cell object after \code{\link{combineExpression}}. +#' @param sc.data The single-cell object after [combineExpression()]. #' @param reduction The dimensional reduction to visualize. #' @param cut.category Meta data variable of the single-cell object to use for #' filtering. @@ -81,4 +81,4 @@ clonalOverlay <- function(sc.data, plot <- plot + facet_wrap(~facet.by) } return(plot) -} \ No newline at end of file +} diff --git a/R/clonalProportion.R b/R/clonalProportion.R index b2ee199e..0d2c7b23 100644 --- a/R/clonalProportion.R +++ b/R/clonalProportion.R @@ -2,10 +2,10 @@ #' #' This function calculates the relative clonal space occupied by the #' clones. The grouping of these clones is based on the parameter -#' \strong{clonalSplit}, at default, \strong{clonalSplit} will group the clones +#' **clonalSplit**, at default, **clonalSplit** will group the clones #' into bins of 1:10, 11:100, 101:1001, etc. To adjust the clones #' selected, change the numbers in the variable split. If a matrix output -#' for the data is preferred, set \strong{exportTable} = TRUE. +#' for the data is preferred, set **exportTable** = TRUE. #' #' @examples #' #Making combined contig data @@ -14,12 +14,12 @@ #' "P19B","P19L", "P20B", "P20L")) #' clonalProportion(combined, cloneCall = "gene") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. #' @param clonalSplit The cut points for the specific clones -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" @@ -29,7 +29,7 @@ #' @param exportTable Exports a table of the data into the global. #' environment in addition to the visualization #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' #' @import ggplot2 #' @importFrom stringr str_sort diff --git a/R/clonalQuant.R b/R/clonalQuant.R index 01e7ed8a..d3264ebd 100644 --- a/R/clonalQuant.R +++ b/R/clonalQuant.R @@ -13,11 +13,11 @@ #' cloneCall="strict", #' scale = TRUE) #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" @@ -27,7 +27,7 @@ #' @param scale Converts the graphs into percentage of unique clones #' @param exportTable Returns the data frame used for forming the graph #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @import ggplot2 #' @export #' @concept Visualizing_Clones diff --git a/R/clonalRarefaction.R b/R/clonalRarefaction.R index 7ec5d02a..58c71167 100644 --- a/R/clonalRarefaction.R +++ b/R/clonalRarefaction.R @@ -1,11 +1,11 @@ #' Calculate rarefaction based on the abundance of clones #' -#' This functions uses the Hill numbers of order q: species richness (\strong{q = 0}), -#' Shannon diversity (\strong{q = 1}), the exponential of Shannon entropy and Simpson -#' diversity (\strong{q = 2}, the inverse of Simpson concentration) to compute diversity +#' This functions uses the Hill numbers of order q: species richness (**q = 0**), +#' Shannon diversity (**q = 1**), the exponential of Shannon entropy and Simpson +#' diversity (**q = 2**, the inverse of Simpson concentration) to compute diversity #' estimates for rarefaction and extrapolation. The function relies on the -#' \code{\link[iNEXT]{iNEXT}} R package. Please read and cite the -#' \href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript} +#' [iNEXT::iNEXT()] R package. Please read and cite the +#' [manuscript](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613) #' if using this function. The input into the iNEXT calculation is abundance, #' incidence-based calculations are not supported. #' @@ -17,18 +17,18 @@ #' clonalRarefaction(combined[c(1,2)], cloneCall = "gene", n.boots = 3) #' #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". #' @param group.by The variable to use for grouping. #' @param plot.type sample-size-based rarefaction/extrapolation curve -#' (\code{type = 1}); sample completeness curve (\code{type = 2}); -#' coverage-based rarefaction/extrapolation curve (\code{type = 3}). +#' (`type = 1`); sample completeness curve (`type = 2`); +#' coverage-based rarefaction/extrapolation curve (`type = 3`). #' @param hill.numbers The Hill numbers to be plotted out #' (0 - species richness, 1 - Shannon, 2 - Simpson) #' @param n.boots The number of bootstraps to downsample in order @@ -36,7 +36,7 @@ #' @param exportTable Exports a table of the data into the global #' environment in addition to the visualization. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' #' @importFrom iNEXT iNEXT ggiNEXT #' @import ggplot2 diff --git a/R/clonalScatter.R b/R/clonalScatter.R index 0e978422..690d9297 100644 --- a/R/clonalScatter.R +++ b/R/clonalScatter.R @@ -15,11 +15,11 @@ #' y.axis = "P17L", #' graph = "proportion") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". @@ -31,7 +31,7 @@ #' @param graph graph either the clonal "proportion" or "count". #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' #' @import ggplot2 #' diff --git a/R/clonalSizeDistribution.R b/R/clonalSizeDistribution.R index 5d76e6a2..20e9b955 100644 --- a/R/clonalSizeDistribution.R +++ b/R/clonalSizeDistribution.R @@ -3,10 +3,10 @@ #' This function produces a hierarchical clustering of clones by sample #' using discrete gamma-GPD spliced threshold model. If using this #' model please read and cite powerTCR (more info available at -#' \href{https://pubmed.ncbi.nlm.nih.gov/30485278/}{PMID: 30485278}). +#' [PMID: 30485278](https://pubmed.ncbi.nlm.nih.gov/30485278/)). #' #' @details -#' The probability density function (pdf) for the \strong{Generalized Pareto Distribution (GPD)} is given by: +#' The probability density function (pdf) for the **Generalized Pareto Distribution (GPD)** is given by: #' \deqn{f(x|\mu, \sigma, \xi) = \frac{1}{\sigma} \left( 1 + \xi \left( \frac{x - \mu}{\sigma} \right) \right)^{-\left( \frac{1}{\xi} + 1 \right)}} #' #' Where: @@ -17,7 +17,7 @@ #' \item{\eqn{x \ge \mu} if \eqn{\xi \ge 0} and \eqn{\mu \le x \le \mu - \sigma/\xi} if \eqn{\xi < 0}} #' } #' -#' The probability density function (pdf) for the \strong{Gamma Distribution} is given by: +#' The probability density function (pdf) for the **Gamma Distribution** is given by: #' \deqn{f(x|\alpha, \beta) = \frac{x^{\alpha-1} e^{-x/\beta}}{\beta^\alpha \Gamma(\alpha)}} #' #' Where: @@ -35,11 +35,11 @@ #' "P19B","P19L", "P20B", "P20L")) #' clonalSizeDistribution(combined, cloneCall = "strict", method="ward.D2") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". @@ -49,7 +49,7 @@ #' @param group.by The variable to use for grouping. #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom dplyr bind_rows #' @importFrom ggdendro dendro_data segment label @@ -654,4 +654,4 @@ pdiscgpd <- function(q, thresh, sigma, xi, phiu){ (log(dpareto(t, scale=Cminq, shape=alphaq))-log(M)) } out -} \ No newline at end of file +} diff --git a/R/combineContigs.R b/R/combineContigs.R index 9690aac9..0c40abba 100644 --- a/R/combineContigs.R +++ b/R/combineContigs.R @@ -21,12 +21,12 @@ utils::globalVariables(c( #' @title Combining the list of T cell receptor contigs into clones #' #' @description This function consolidates a list of TCR sequencing results to -#' the level of the individual cell barcodes. Using the \strong{samples} and -#' \strong{ID} parameters, the function will add the strings as prefixes to +#' the level of the individual cell barcodes. Using the **samples** and +#' **ID** parameters, the function will add the strings as prefixes to #' prevent issues with repeated barcodes. The resulting new barcodes will #' need to match the Seurat or SCE object in order to use, -#' \code{\link{combineExpression}}. Several levels of filtering exist - -#' \emph{removeNA}, \emph{removeMulti}, or \emph{filterMulti} are parameters +#' [combineExpression()]. Several levels of filtering exist - +#' *removeNA*, *removeMulti*, or *filterMulti* are parameters #' that control how the function deals with barcodes with multiple chains #' recovered. #' @@ -36,7 +36,7 @@ utils::globalVariables(c( #' "P19B","P19L", "P20B", "P20L")) #' #' @param input.data List of filtered contig annotations or -#' outputs from \code{\link{loadContigs}}. +#' outputs from [loadContigs()]. #' @param samples The labels of samples (recommended). #' @param ID The additional sample labeling (optional). #' @param removeNA This will remove any chain without values. @@ -159,14 +159,14 @@ combineTCR <- function(input.data, #' of the individual cell barcodes. Using the samples and ID parameters, #' the function will add the strings as prefixes to prevent issues with #' repeated barcodes. The resulting new barcodes will need to match the -#' Seurat or SCE object in order to use, \code{\link{combineExpression}}. -#' Unlike \code{\link{combineTCR}}, combineBCR produces a column -#' \strong{CTstrict} of an index of nucleotide sequence and the +#' Seurat or SCE object in order to use, [combineExpression()]. +#' Unlike [combineTCR()], combineBCR produces a column +#' **CTstrict** of an index of nucleotide sequence and the #' corresponding V gene. This index automatically calculates the #' Levenshtein distance between sequences with the same V gene and will #' index sequences using a normalized Levenshtein distance with the same #' ID. After which, clone clusters are called using the -#' \code{\link[igraph]{components}} function. Clones that are clustered +#' [igraph::components()] function. Clones that are clustered #' across multiple sequences will then be labeled with "Cluster" in the #' CTstrict header. #' @@ -178,7 +178,7 @@ combineTCR <- function(input.data, #' threshold = 0.85) #' #' @param input.data List of filtered contig annotations or outputs from -#' \code{\link{loadContigs}}. +#' [loadContigs()]. #' @param samples The labels of samples (required). #' @param ID The additional sample labeling (optional). #' @param call.related.clones Use the nucleotide sequence and V gene diff --git a/R/combineExpression.R b/R/combineExpression.R index 546946f9..227731d6 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -3,11 +3,11 @@ #' This function adds the immune receptor information to the Seurat or #' SCE object to the meta data. By default this function also calculates #' the frequencies and proportion of the clones by sequencing -#' run (\strong{group.by} = NULL). To change how the frequencies/proportions -#' are calculated, select a column header for the \strong{group.by} variable. -#' Importantly, before using \code{\link{combineExpression}} ensure the +#' run (**group.by** = NULL). To change how the frequencies/proportions +#' are calculated, select a column header for the **group.by** variable. +#' Importantly, before using [combineExpression()] ensure the #' barcodes of the single-cell object object match the barcodes in the output -#' of the \code{\link{combineTCR}} or \code{\link{combineBCR}}. +#' of the [combineTCR()] or [combineBCR()]. #' #' @examples #' #Getting the combined contigs @@ -21,23 +21,23 @@ #' #Using combineExpresion() #' scRep_example <- combineExpression(combined, scRep_example) #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}} or a list of -#' both c(\code{\link{combineTCR}}, \code{\link{combineBCR}}). +#' @param input.data The product of [combineTCR()], +#' [combineBCR()] or a list of +#' both c([combineTCR()], [combineBCR()]). #' @param sc.data The Seurat or Single-Cell Experiment (SCE) object to attach -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". #' @param group.by The column label in the combined clones in which -#' clone frequency will be calculated. \strong{NULL} or \strong{"none"} will +#' clone frequency will be calculated. **NULL** or **"none"** will #' keep the format of input.data. -#' @param proportion Whether to proportion (\strong{TRUE}) or total -#' frequency (\strong{FALSE}) of the clone based on the group.by variable. +#' @param proportion Whether to proportion (**TRUE**) or total +#' frequency (**FALSE**) of the clone based on the group.by variable. #' @param cloneSize The bins for the grouping based on proportion or frequency. -#' If proportion is \strong{FALSE} and the cloneSizes are not set high enough +#' If proportion is **FALSE** and the cloneSizes are not set high enough #' based on frequency, the upper limit of cloneSizes will be automatically #' updated.S #' @param filterNA Method to subset Seurat/SCE object of barcodes without diff --git a/R/createHTOContigList.R b/R/createHTOContigList.R index a157fdbe..72e7b3a6 100644 --- a/R/createHTOContigList.R +++ b/R/createHTOContigList.R @@ -1,12 +1,12 @@ #' Generate a contig list from a multiplexed experiment #' #' This function reprocess and forms a list of contigs for downstream analysis -#' in scRepertoire, \code{\link{createHTOContigList}} take the filtered contig +#' in scRepertoire, [createHTOContigList()] take the filtered contig #' annotation output and the single-cell RNA object to create the list. #' If using an integrated single-cell object, it is recommended to split the #' object by sequencing run and remove extra prefixes and suffixes on the -#' barcode before using \code{\link{createHTOContigList}}. Alternatively, -#' the variable \strong{multi.run} can be used to separate a list of contigs +#' barcode before using [createHTOContigList()]. Alternatively, +#' the variable **multi.run** can be used to separate a list of contigs #' by a meta data variable. This may have issues with the repeated barcodes. #' #' @examples @@ -27,8 +27,8 @@ #' variable that indicates the sequencing run. #' @export #' @concept Loading_and_Processing_Contigs -#' @return Returns a list of contigs as input for \code{\link{combineBCR}} -#' or \code{\link{combineTCR}} +#' @return Returns a list of contigs as input for [combineBCR()] +#' or [combineTCR()] createHTOContigList <- function(contig, @@ -73,4 +73,4 @@ createHTOContigList <- function(contig, names(contig.list) <- unique.groups } contig.list -} \ No newline at end of file +} diff --git a/R/data.R b/R/data.R index 531ba7e8..c337657c 100644 --- a/R/data.R +++ b/R/data.R @@ -3,7 +3,7 @@ #' @description A list of 8 `filtered_contig_annotations.csv` files #' outputted from 10X Cell Ranger. More information on the #' data can be found in the following -#' \href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. +#' [manuscript](https://pubmed.ncbi.nlm.nih.gov/33622974/). #' #' @docType data #' @concept Data @@ -18,7 +18,7 @@ NULL #' from 4 patients with acute respiratory distress, with samples taken #' from both the lung and peripheral blood. More information on the #' data can be found in the following -#' \href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. +#' [manuscript](https://pubmed.ncbi.nlm.nih.gov/33622974/). #' #' @name scRep_example #' @concept Data @@ -32,7 +32,7 @@ NULL #' `filtered_contig_annotation` files, but subsetted to 365 valid T cells #' which correspond to the same barcodes found in `scRep_example`. The #' data is originally derived from the following -#' \href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. +#' [manuscript](https://pubmed.ncbi.nlm.nih.gov/33622974/). #' #' @usage data("mini_contig_list") #' @@ -42,6 +42,6 @@ NULL #' #' @docType data #' -#' @seealso \code{\link{contig_list}} +#' @seealso [contig_list()] #' "mini_contig_list" diff --git a/R/exportClones.R b/R/exportClones.R index 1069647a..6720c7c6 100644 --- a/R/exportClones.R +++ b/R/exportClones.R @@ -1,10 +1,10 @@ #' Exporting clones #' #' This function saves a csv file of clones (genes, amino acid, and -#' nucleotide sequences) by barcodes. \strong{format} determines -#' the structure of the csv file - \emph{paired} will export sequences -#' by barcodes and include multiple chains, \emph{airr} will export a data -#' frame that is consistent with the AIRR format, and \emph{TCRMatch} will +#' nucleotide sequences) by barcodes. **format** determines +#' the structure of the csv file - *paired* will export sequences +#' by barcodes and include multiple chains, *airr* will export a data +#' frame that is consistent with the AIRR format, and *TCRMatch* will #' export a data frame that has the TRB chain with count information. #' #' @examples @@ -17,11 +17,11 @@ #' format = "paired") #' } #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. #' @param format The format to export the clones - "paired", "airr", or "TCRMatch". #' @param group.by The variable to use for grouping. -#' @param write.file \strong{TRUE}, save the file or \strong{FALSE}, +#' @param write.file **TRUE**, save the file or **FALSE**, #' return a data.frame #' @param dir directory location to save the csv #' @param file.name the csv file name diff --git a/R/getCircilize.R b/R/getCircilize.R index ea97be78..bd52b735 100644 --- a/R/getCircilize.R +++ b/R/getCircilize.R @@ -2,13 +2,13 @@ #' clones as a chord diagram. #' #' This function will take the meta data from the product of -#' \code{\link{combineExpression}} and generate a relational data frame to +#' [combineExpression()] and generate a relational data frame to #' be used for a chord diagram. Each cord will represent the number of -#' clone unique and shared across the multiple \strong{group.by} variable. +#' clone unique and shared across the multiple **group.by** variable. #' If using the downstream circlize R package, please read and cite the -#' following \href{https://pubmed.ncbi.nlm.nih.gov/24930139/}{manuscript}. +#' following [manuscript](https://pubmed.ncbi.nlm.nih.gov/24930139/). #' If looking for more advance ways for circular visualizations, there -#' is a great \href{https://jokergoo.github.io/circlize_book/book/}{cookbook} +#' is a great [cookbook](https://jokergoo.github.io/circlize_book/book/) #' for the circlize package. #' #' @examples @@ -27,10 +27,10 @@ #' group.by = "seurat_clusters") #' #' -#' @param sc.data The single-cell object after \code{\link{combineExpression}}. -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param sc.data The single-cell object after [combineExpression()]. +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param group.by The group header for which you would like to analyze #' the data. @@ -42,7 +42,7 @@ #' #' @export #' @concept SC_Functions -#' @return A data frame of shared clones between groups formated for \link[circlize]{chordDiagram} +#' @return A data frame of shared clones between groups formated for [chordDiagram][circlize::chordDiagram] #' @author Dillon Corvino, Nick Borcherding getCirclize <- function(sc.data, cloneCall = "strict", diff --git a/R/getContigDoublets.R b/R/getContigDoublets.R index 780a3cbc..79652041 100644 --- a/R/getContigDoublets.R +++ b/R/getContigDoublets.R @@ -1,5 +1,8 @@ #' Get Contig Doublets #' +#' @description +#' `r lifecycle::badge("experimental")` +#' #' This function identifies potential doublets by finding common barcodes #' between TCR and BCR outputs. It extracts unique barcodes from each list #' of dataframes, finds the intersection of the barcodes, and joins the diff --git a/R/highlightClones.R b/R/highlightClones.R index f6cd61fe..b8ba2430 100644 --- a/R/highlightClones.R +++ b/R/highlightClones.R @@ -22,7 +22,7 @@ #' sequence = c("CVVSDNTGGFKTIF_CASSVRRERANTGELFF")) #' #' @param sc.data The single-cell object to attach after -#' \code{\link{combineExpression}} +#' [combineExpression()] #' @param cloneCall How to call the clone - VDJC gene (gene), #' CDR3 nucleotide (nt), CDR3 amino acid (aa), #' VDJC gene + CDR3 nucleotide (strict) or a custom variable in the data. @@ -56,4 +56,4 @@ highlightClones <- function(sc.data, sc.data[[col.name]] <- meta } return(sc.data) -} \ No newline at end of file +} diff --git a/R/loadContigs.R b/R/loadContigs.R index 469c9f3f..89dc2b3e 100644 --- a/R/loadContigs.R +++ b/R/loadContigs.R @@ -1,7 +1,7 @@ #' Loading the contigs derived from single-cell sequencing #' #' This function generates a contig list and formats the data to allow for -#' function with \code{\link{combineTCR}} or \code{\link{combineBCR}}. If +#' function with [combineTCR()] or [combineBCR()]. If #' using data derived from filtered outputs of 10X Genomics, there is no #' need to use this function as the data is already compatible. #' @@ -37,8 +37,8 @@ #' @importFrom rjson fromJSON #' @export #' @concept Loading_and_Processing_Contigs -#' @return List of contigs for compatibility with \code{\link{combineTCR}} or -#' \code{\link{combineBCR}} +#' @return List of contigs for compatibility with [combineTCR()] or +#' [combineBCR()] loadContigs <- function(input, format = "10X") { #Loading from directory, recursively @@ -285,4 +285,4 @@ loadContigs <- function(input, colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "productive") } return(df) -} \ No newline at end of file +} diff --git a/R/percentAA.R b/R/percentAA.R index 0a795d2a..1b8277d4 100644 --- a/R/percentAA.R +++ b/R/percentAA.R @@ -12,15 +12,15 @@ #' chain = "TRB", #' aa.length = 20) -#' @param input.data The product of \code{\link{combineTCR}}, \code{\link{combineBCR}}, or -#' \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], [combineBCR()], or +#' [combineExpression()]. #' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL". #' @param group.by The variable to use for grouping. #' @param order.by A vector of specific plotting order or "alphanumeric" #' to plot groups in order #' @param aa.length The maximum length of the CDR3 amino acid sequence. #' @param exportTable Returns the data frame used for forming the graph. -#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}. +#' @param palette Colors to use in visualization - input any [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom reshape2 melt #' @importFrom dplyr mutate_at %>% mutate_if diff --git a/R/percentGenes.R b/R/percentGenes.R index cc8d7747..8dd1f6d6 100644 --- a/R/percentGenes.R +++ b/R/percentGenes.R @@ -2,8 +2,8 @@ #' #' This function the proportion V or J genes used by #' grouping variables. This function only quantifies -#' single gene loci for indicated \strong{chain}. For -#' examining VJ pairing, please see \code{\link{percentVJ}}. +#' single gene loci for indicated **chain**. For +#' examining VJ pairing, please see [percentVJ()]. #' #' @examples #' #Making combined contig data @@ -14,8 +14,8 @@ #' chain = "TRB", #' gene = "Vgene") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. #' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL". #' @param gene "V", "D" or "J" #' @param group.by The variable to use for grouping @@ -23,7 +23,7 @@ #' to plot groups in order #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom stringr str_split str_sort #' @importFrom reshape2 melt diff --git a/R/percentKmer.R b/R/percentKmer.R index d26de920..4f1b5e09 100644 --- a/R/percentKmer.R +++ b/R/percentKmer.R @@ -1,8 +1,8 @@ #' Examining the relative composition of kmer motifs in clones. #' -#' This function the of kmer for nucleotide (\strong{nt}) or -#' amino acid (\strong{aa}) sequences. Select the length of the -#' kmer to quantify using the \strong{motif.length} parameter. +#' This function the of kmer for nucleotide (**nt**) or +#' amino acid (**aa**) sequences. Select the length of the +#' kmer to quantify using the **motif.length** parameter. #' #' @examples #' #Making combined contig data @@ -13,11 +13,11 @@ #' chain = "TRB", #' motif.length = 3) #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}} +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()] #' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL" -#' @param cloneCall How to call the clone - CDR3 nucleotide (\strong{nt}) or -#' CDR3 amino acid (\strong{aa}) +#' @param cloneCall How to call the clone - CDR3 nucleotide (**nt**) or +#' CDR3 amino acid (**aa**) #' @param group.by The variable to use for grouping #' @param order.by A vector of specific plotting order or "alphanumeric" #' to plot groups in order @@ -26,7 +26,7 @@ #' median absolute deviation #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals} +#' [hcl.pals][grDevices::hcl.pals] #' @import ggplot2 #' @importFrom reshape2 melt #' @importFrom stats mad diff --git a/R/percentVJ.R b/R/percentVJ.R index 4e2e66dd..54d1c9f3 100644 --- a/R/percentVJ.R +++ b/R/percentVJ.R @@ -1,7 +1,7 @@ #' Quantifying the V and J gene usage across clones #' #' This function the proportion V and J genes used by -#' grouping variables for an indicated \strong{chain} to +#' grouping variables for an indicated **chain** to #' produce a matrix of VJ gene pairings. #' #' @examples @@ -11,15 +11,15 @@ #' "P19B","P19L", "P20B", "P20L")) #' percentVJ(combined, chain = "TRB") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. #' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL" #' @param group.by The variable to use for grouping #' @param order.by A vector of specific plotting order or "alphanumeric" #' to plot groups in order #' @param exportTable Returns the data frame used for forming the graph #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom stringr str_split str_sort #' @importFrom reshape2 melt diff --git a/R/positionalEntropy.R b/R/positionalEntropy.R index af2483b0..696965dc 100644 --- a/R/positionalEntropy.R +++ b/R/positionalEntropy.R @@ -2,7 +2,7 @@ #' #' This function the diversity amino acids along the residues #' of the CDR3 amino acid sequence. Please see -#' \code{\link{clonalDiversity}} for more information on +#' [clonalDiversity()] for more information on #' the underlying methods for diversity/entropy calculations. #' Positions without variance will have a value reported as 0 #' for the purposes of comparison. @@ -16,8 +16,8 @@ #' chain = "TRB", #' aa.length = 20) -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}} +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()] #' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL" #' @param group.by The variable to use for grouping #' @param order.by A vector of specific plotting order or "alphanumeric" @@ -26,7 +26,7 @@ #' @param method The method to calculate the entropy/diversity - #' "shannon", "inv.simpson", "norm.entropy" #' @param exportTable Returns the data frame used for forming the graph -#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals} +#' @param palette Colors to use in visualization - input any [hcl.pals][grDevices::hcl.pals] #' @import ggplot2 #' @importFrom stringr str_split #' @export diff --git a/R/positionalProperty.R b/R/positionalProperty.R index 9b930ba1..49d43e6a 100644 --- a/R/positionalProperty.R +++ b/R/positionalProperty.R @@ -8,15 +8,15 @@ #' @details #' More information for the individual methods can be found at the following citations: #' -#' \strong{Atchley:} \href{https://pubmed.ncbi.nlm.nih.gov/15851683/}{citation} +#' **Atchley:** [citation](https://pubmed.ncbi.nlm.nih.gov/15851683/) #' -#' \strong{Kidera:} \href{https://link.springer.com/article/10.1007/BF01025492}{citation} +#' **Kidera:** [citation](https://link.springer.com/article/10.1007/BF01025492) #' -#' \strong{stScales:} \href{https://pubmed.ncbi.nlm.nih.gov/19373543/}{citation} +#' **stScales:** [citation](https://pubmed.ncbi.nlm.nih.gov/19373543/) #' -#' \strong{tScales:} \href{https://www.sciencedirect.com/science/article/pii/S0022286006006314?casa_token=uDj97DwXDDEAAAAA:VZfahldPRwU1WObySJlohudtMSDwF7nJSUzcEGwPhvkY13ALLKhs08Cf0_FyyfYZjxJlj-fVf0SM}{citation} +#' **tScales:** [citation](https://www.sciencedirect.com/science/article/pii/S0022286006006314?casa_token=uDj97DwXDDEAAAAA:VZfahldPRwU1WObySJlohudtMSDwF7nJSUzcEGwPhvkY13ALLKhs08Cf0_FyyfYZjxJlj-fVf0SM) #' -#' \strong{VHSE:} \href{https://pubmed.ncbi.nlm.nih.gov/15895431/}{citation} +#' **VHSE:** [citation](https://pubmed.ncbi.nlm.nih.gov/15895431/) #' #' #' @examples @@ -29,8 +29,8 @@ #' method = "Atchley", #' aa.length = 20) -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}} +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()] #' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL" #' @param group.by The variable to use for grouping #' @param order.by A vector of specific plotting order or "alphanumeric" @@ -39,7 +39,7 @@ #' @param method The method to calculate the property - "Atchley", "Kidera", #' "stScales", "tScales", or "VHSE" #' @param exportTable Returns the data frame used for forming the graph -#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals} +#' @param palette Colors to use in visualization - input any [hcl.pals][grDevices::hcl.pals] #' @import ggplot2 #' @importFrom stringr str_split #' @importFrom stats qt diff --git a/R/scRepertoire-package.R b/R/scRepertoire-package.R index ce705cf9..ea79e499 100644 --- a/R/scRepertoire-package.R +++ b/R/scRepertoire-package.R @@ -2,6 +2,7 @@ "_PACKAGE" ## usethis namespace: start +#' @importFrom lifecycle deprecated #' @importFrom Rcpp sourceCpp #' @useDynLib scRepertoire, .registration = TRUE ## usethis namespace: end diff --git a/R/startracDiversity.R b/R/startracDiversity.R index bfcda0d1..51c80c7c 100644 --- a/R/startracDiversity.R +++ b/R/startracDiversity.R @@ -1,11 +1,11 @@ #' Startrac-based diversity indices for single-cell RNA-seq #' #' @description This function utilizes the Startrac approach derived from -#' \href{https://pubmed.ncbi.nlm.nih.gov/30479382/}{PMID: 30479382}. +#' [PMID: 30479382](https://pubmed.ncbi.nlm.nih.gov/30479382/). #' Required to run the function, the "type" variable needs to include the #' difference in where the cells were derived. The output of this function -#' will produce 3 indices: \strong{expa} (clonal expansion), \strong{migra} -#' (cross-tissue migration), and \strong{trans} (state transition). In order +#' will produce 3 indices: **expa** (clonal expansion), **migra** +#' (cross-tissue migration), and **trans** (state transition). In order #' to understand the underlying analyses of the outputs please #' read and cite the linked manuscript. #' @@ -26,19 +26,19 @@ #' type = "Type", #' group.by = "Patient") #' -#' @param sc.data The single-cell object after \code{\link{combineExpression}}. +#' @param sc.data The single-cell object after [combineExpression()]. #' For SCE objects, the cluster variable must be in the meta data under #' "cluster". -#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), -#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +#' @param cloneCall How to call the clone - VDJC gene (**gene**), +#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), +#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable #' in the data. #' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL". #' @param type The variable in the meta data that provides tissue type. #' @param group.by The variable in the meta data to group by, often samples. #' @param exportTable Returns the data frame used for forming the graph. -#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}. +#' @param palette Colors to use in visualization - input any [hcl.pals][grDevices::hcl.pals]. #' @importFrom reshape2 melt #' @importFrom dplyr %>% mutate group_by #' @import ggplot2 diff --git a/R/subsetClone.R b/R/subsetClone.R index 0a9d1314..5106f372 100644 --- a/R/subsetClone.R +++ b/R/subsetClone.R @@ -1,7 +1,7 @@ #' Subset the product of combineTCR() or combineBCR() #' #' This function allows for the subsetting of the product of -#' \code{\link{combineTCR}} or \code{\link{combineBCR}} +#' [combineTCR()] or [combineBCR()] #' by the name of the individual list element. #' #' @examples @@ -10,8 +10,8 @@ #' "P19B","P19L", "P20B", "P20L")) #' subset <- subsetClones(combined, name = "sample", variables = c("P17B")) #' -#' @param input.data The product of \code{\link{combineTCR}} or -#' \code{\link{combineBCR}}. +#' @param input.data The product of [combineTCR()] or +#' [combineBCR()]. #' @param name The column header/name to use for subsetting. #' @param variables The values to subset by, must be in the names(input.data). @@ -35,4 +35,4 @@ subsetClones <- function(input.data, } names(input.data2) <- names2 return(input.data2) -} \ No newline at end of file +} diff --git a/R/vizGenes.R b/R/vizGenes.R index 2f0002e1..03a40095 100644 --- a/R/vizGenes.R +++ b/R/vizGenes.R @@ -17,8 +17,8 @@ #' y.axis = NULL, #' plot = "heatmap") #' -#' @param input.data The product of \code{\link{combineTCR}}, -#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param input.data The product of [combineTCR()], +#' [combineBCR()], or [combineExpression()]. #' @param plot The type of plot to return - heatmap or barplot. #' @param x.axis Gene segments to separate the x-axis, such as "TRAV", #' "TRBD", "IGKJ". @@ -31,7 +31,7 @@ #' the total respective repertoire size #' @param exportTable Returns the data frame used for forming the graph. #' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' [hcl.pals][grDevices::hcl.pals]. #' @import ggplot2 #' @importFrom stringr str_split #' @importFrom stats sd diff --git a/man/StartracDiversity.Rd b/man/StartracDiversity.Rd index 8696ee79..edaec32f 100644 --- a/man/StartracDiversity.Rd +++ b/man/StartracDiversity.Rd @@ -15,16 +15,16 @@ StartracDiversity( ) } \arguments{ -\item{sc.data}{The single-cell object after \code{\link{combineExpression}}. -For SCE objects, the cluster variable must be in the meta data under +\item{sc.data}{The single-cell object after \code{\link[=combineExpression]{combineExpression()}}. +For SCE objects, the cluster variable must be in the meta data under "cluster".} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{type}{The variable in the meta data that provides tissue type.} @@ -33,19 +33,19 @@ e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any \link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot object of Startrac diversity metrics } \description{ -This function utilizes the Startrac approach derived from +This function utilizes the Startrac approach derived from \href{https://pubmed.ncbi.nlm.nih.gov/30479382/}{PMID: 30479382}. -Required to run the function, the "type" variable needs to include the -difference in where the cells were derived. The output of this function -will produce 3 indices: \strong{expa} (clonal expansion), \strong{migra} -(cross-tissue migration), and \strong{trans} (state transition). In order -to understand the underlying analyses of the outputs please +Required to run the function, the "type" variable needs to include the +difference in where the cells were derived. The output of this function +will produce 3 indices: \strong{expa} (clonal expansion), \strong{migra} +(cross-tissue migration), and \strong{trans} (state transition). In order +to understand the underlying analyses of the outputs please read and cite the linked manuscript. } \examples{ diff --git a/man/addVariable.Rd b/man/addVariable.Rd index a8b87941..5ab6d561 100644 --- a/man/addVariable.Rd +++ b/man/addVariable.Rd @@ -7,8 +7,8 @@ addVariable(input.data, variable.name = NULL, variables = NULL) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}} or -\code{\link{combineBCR}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}} or +\code{\link[=combineBCR]{combineBCR()}}.} \item{variable.name}{The new column name/header.} @@ -18,11 +18,11 @@ addVariable(input.data, variable.name = NULL, variables = NULL) input.data list with the variable column added to each element. } \description{ -This function adds variables to the product of \code{\link{combineTCR}}, -or \code{\link{combineBCR}} to be used in later visualizations. -For each element, the function will add a column (labeled by -\strong{variable.name}) with the variable. The length of the -\strong{variables} parameter needs to match the length of the +This function adds variables to the product of \code{\link[=combineTCR]{combineTCR()}}, +or \code{\link[=combineBCR]{combineBCR()}} to be used in later visualizations. +For each element, the function will add a column (labeled by +\strong{variable.name}) with the variable. The length of the +\strong{variables} parameter needs to match the length of the combined object. } \examples{ diff --git a/man/alluvialClones.Rd b/man/alluvialClones.Rd index cf2a0991..9b0fc055 100644 --- a/man/alluvialClones.Rd +++ b/man/alluvialClones.Rd @@ -17,15 +17,15 @@ alluvialClones( ) } \arguments{ -\item{sc.data}{The single-cell object to visualize -after \code{\link{combineExpression}}.} +\item{sc.data}{The single-cell object to visualize +after \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{y.axes}{The columns that will separate the proportional . @@ -37,21 +37,21 @@ visualizations.} \item{facet}{The column label to separate.} -\item{exportTable}{Exports a table of the data into the global +\item{exportTable}{Exports a table of the data into the global environment in addition to the visualization.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ Alluvial ggplot comparing clone distribution. } \description{ -View the proportional contribution of clones by Seurat or SCE object -meta data after \code{\link{combineExpression}}. The visualization -is based on the ggalluvial package, which requires the aesthetics -to be part of the axes that are visualized. Therefore, alpha, facet, -and color should be part of the the axes you wish to view or will +View the proportional contribution of clones by Seurat or SCE object +meta data after \code{\link[=combineExpression]{combineExpression()}}. The visualization +is based on the ggalluvial package, which requires the aesthetics +to be part of the axes that are visualized. Therefore, alpha, facet, +and color should be part of the the axes you wish to view or will add an additional stratum/column to the end of the graph. } \examples{ diff --git a/man/clonalAbundance.Rd b/man/clonalAbundance.Rd index 52352220..4fa73a83 100644 --- a/man/clonalAbundance.Rd +++ b/man/clonalAbundance.Rd @@ -16,18 +16,18 @@ clonalAbundance( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} -\item{scale}{Converts the graphs into density plots in order to show +\item{scale}{Converts the graphs into density plots in order to show relative distributions.} \item{group.by}{The variable to use for grouping} @@ -38,20 +38,20 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph to the visualization.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ -ggplot of the total or relative abundance of clones +ggplot of the total or relative abundance of clones across quanta } \description{ -Displays the number of clones at specific frequencies by sample +Displays the number of clones at specific frequencies by sample or group. Visualization can either be a line graph ( -\strong{scale} = FALSE) using calculated numbers or density -plot (\strong{scale} = TRUE). Multiple sequencing runs can -be group together using the group parameter. If a matrix -output for the data is preferred, set +\strong{scale} = FALSE) using calculated numbers or density +plot (\strong{scale} = TRUE). Multiple sequencing runs can +be group together using the group parameter. If a matrix +output for the data is preferred, set \strong{exportTable} = TRUE. } \examples{ diff --git a/man/clonalBias.Rd b/man/clonalBias.Rd index 69f74497..8f189666 100644 --- a/man/clonalBias.Rd +++ b/man/clonalBias.Rd @@ -16,11 +16,11 @@ clonalBias( ) } \arguments{ -\item{sc.data}{The single-cell object after \code{\link{combineExpression}}.} +\item{sc.data}{The single-cell object after \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} \item{split.by}{The variable to use for calculating the baseline frequencies. @@ -34,20 +34,20 @@ For example, "Type" for lung vs peripheral blood comparison} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot scatter plot with clone bias } \description{ -The metric seeks to quantify how individual clones are skewed towards -a specific cellular compartment or cluster. A clone bias of \strong{1} - -indicates that a clone is composed of cells from a single -compartment or cluster, while a clone bias of \strong{0} - matches the +The metric seeks to quantify how individual clones are skewed towards +a specific cellular compartment or cluster. A clone bias of \strong{1} - +indicates that a clone is composed of cells from a single +compartment or cluster, while a clone bias of \strong{0} - matches the background subtype distribution. Please read and cite the following -\href{https://pubmed.ncbi.nlm.nih.gov/35829695/}{manuscript} -if using \code{\link{clonalBias}}. +\href{https://pubmed.ncbi.nlm.nih.gov/35829695/}{manuscript} +if using \code{\link[=clonalBias]{clonalBias()}}. } \examples{ #Making combined contig data diff --git a/man/clonalCluster.Rd b/man/clonalCluster.Rd index 72af6aca..eef61aab 100644 --- a/man/clonalCluster.Rd +++ b/man/clonalCluster.Rd @@ -15,39 +15,39 @@ clonalCluster( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}} or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}} or \code{\link[=combineExpression]{combineExpression()}}.} -\item{chain}{Indicate if both or a specific chain should be used - +\item{chain}{Indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} -\item{sequence}{Clustering based on either \strong{"aa"} or +\item{sequence}{Clustering based on either \strong{"aa"} or \strong{"nt"}.} \item{samples}{The specific samples to isolate for visualization.} -\item{threshold}{The normalized edit distance to consider. -The higher the number the more similarity of sequence will be +\item{threshold}{The normalized edit distance to consider. +The higher the number the more similarity of sequence will be used for clustering.} \item{group.by}{The column header used for to group contigs. If (\strong{NULL}), clusters will be calculated across samples.} -\item{exportGraph}{Return an igraph object of connected +\item{exportGraph}{Return an igraph object of connected sequences (\strong{TRUE}) or the amended input with a new cluster-based variable (\strong{FALSE}).} } \value{ -Either amended input with edit-distanced clusters added +Either amended input with edit-distanced clusters added or igraph object of connect sequences } \description{ -This function uses edit distances of either the nucleotide or amino acid -sequences of the CDR3 and V genes to cluster similar TCR/BCRs together. -As a default, the function takes the input from \code{\link{combineTCR}}, -\code{\link{combineBCR}} or \code{\link{combineExpression}} and amends a -cluster to the data frame or meta data. If \strong{exportGraph} is set -to TRUE, the function returns an igraph object of the connected sequences. +This function uses edit distances of either the nucleotide or amino acid +sequences of the CDR3 and V genes to cluster similar TCR/BCRs together. +As a default, the function takes the input from \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}} or \code{\link[=combineExpression]{combineExpression()}} and amends a +cluster to the data frame or meta data. If \strong{exportGraph} is set +to TRUE, the function returns an igraph object of the connected sequences. If multiple sequences per chain are present, this function only compares the first sequence. } diff --git a/man/clonalCompare.Rd b/man/clonalCompare.Rd index 6204f684..27810188 100644 --- a/man/clonalCompare.Rd +++ b/man/clonalCompare.Rd @@ -21,15 +21,15 @@ clonalCompare( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{samples}{The specific samples to isolate for visualization.} @@ -39,37 +39,37 @@ e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{top.clones}{The top number of clonal sequences per group. (e.g., top.clones = 5)} -\item{highlight.clones}{Clonal sequences to highlight, if present, +\item{highlight.clones}{Clonal sequences to highlight, if present, all other clones returned will be grey} \item{relabel.clones}{Simplify the legend of the graph by returning clones that are numerically indexed} \item{group.by}{If using a single-cell object, the column header -to group the new list. \strong{NULL} will return the active +to group the new list. \strong{NULL} will return the active identity or cluster} \item{order.by}{A vector of specific plotting order or "alphanumeric" to plot groups in order} -\item{graph}{The type of graph produced, either \strong{"alluvial"} +\item{graph}{The type of graph produced, either \strong{"alluvial"} or \strong{"area"}} \item{exportTable}{Returns the data frame used for forming the graph} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ -ggplot of the proportion of total sequencing read of +ggplot of the proportion of total sequencing read of selecting clones } \description{ -This function produces an alluvial or area graph of the proportion of -the indicated clones for all or selected samples (using the -\strong{samples} parameter). Individual clones can be selected -using the \strong{clones} parameter with the specific sequence of -interest or using the \strong{top.clones} parameter with the top +This function produces an alluvial or area graph of the proportion of +the indicated clones for all or selected samples (using the +\strong{samples} parameter). Individual clones can be selected +using the \strong{clones} parameter with the specific sequence of +interest or using the \strong{top.clones} parameter with the top n clones by proportion to be visualized. } \examples{ diff --git a/man/clonalDiversity.Rd b/man/clonalDiversity.Rd index f05abff3..6837bc3b 100644 --- a/man/clonalDiversity.Rd +++ b/man/clonalDiversity.Rd @@ -20,15 +20,15 @@ clonalDiversity( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{group.by}{Variable in which to combine for the diversity calculation} @@ -38,19 +38,19 @@ to plot groups in order} \item{x.axis}{Additional variable grouping that will space the sample along the x-axis} -\item{metrics}{The indices to use in diversity calculations - +\item{metrics}{The indices to use in diversity calculations - "shannon", "inv.simpson", "norm.entropy", "gini.simpson", "chao1", "ACE"} -\item{exportTable}{Exports a table of the data into the global environment +\item{exportTable}{Exports a table of the data into the global environment in addition to the visualization} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} -\item{n.boots}{number of bootstraps to down sample in order to +\item{n.boots}{number of bootstraps to down sample in order to get mean diversity} -\item{return.boots}{export boot strapped values calculated - +\item{return.boots}{export boot strapped values calculated - will automatically exportTable = TRUE.} \item{skip.boots}{remove down sampling and boot strapping from the calculation.} @@ -59,12 +59,12 @@ will automatically exportTable = TRUE.} ggplot of the diversity of clones by group } \description{ -This function calculates traditional measures of diversity - \strong{Shannon}, +This function calculates traditional measures of diversity - \strong{Shannon}, \strong{inverse Simpson}, \strong{normalized entropy}, \strong{Gini-Simpson}, \strong{Chao1 index}, and -\strong{abundance-based coverage estimators (ACE)} measure of species evenness by sample or group. -The function automatically down samples the diversity metrics using +\strong{abundance-based coverage estimators (ACE)} measure of species evenness by sample or group. +The function automatically down samples the diversity metrics using 100 boot straps (\strong{n.boots = 100}) and outputs the mean of the values. -The group parameter can be used to condense the individual +The group parameter can be used to condense the individual samples. If a matrix output for the data is preferred, set \strong{exportTable} = TRUE. } \details{ @@ -86,14 +86,14 @@ The formulas for the indices and estimators are as follows: \deqn{Index = S_{obs} + \frac{n_1(n_1-1)}{2*n_2+1}} \strong{Abundance-based Coverage Estimator (ACE):} -\deqn{Index = S_{abund} + \frac{S_{rare}}{C_{ace}} + \frac{F_1}{C_{ace}}} +\deqn{Index = S_{abund} + \frac{S_{rare}}{C_{ace}} + \frac{F_1}{C_{ace}}} Where: \itemize{ - \item{\eqn{p_i}{p[i]} is the proportion of species \eqn{i}{i} in the dataset.} - \item{\eqn{S}{S} is the total number of species.} - \item{\eqn{n_1}{n[1]} and \eqn{n_2}{n[2]} are the number of singletons and doubletons, respectively.} - \item{\eqn{S_{abund}}{S[abund]}, \eqn{S_{rare}}{S[rare]}, \eqn{C_{ace}}{C[ace]}, and \eqn{F_1}{F[1]} are parameters derived from the data.} +\item{\eqn{p_i}{p[i]} is the proportion of species \eqn{i}{i} in the dataset.} +\item{\eqn{S}{S} is the total number of species.} +\item{\eqn{n_1}{n[1]} and \eqn{n_2}{n[2]} are the number of singletons and doubletons, respectively.} +\item{\eqn{S_{abund}}{S[abund]}, \eqn{S_{rare}}{S[rare]}, \eqn{C_{ace}}{C[ace]}, and \eqn{F_1}{F[1]} are parameters derived from the data.} } } \examples{ diff --git a/man/clonalHomeostasis.Rd b/man/clonalHomeostasis.Rd index db4d7d75..e6404a3d 100644 --- a/man/clonalHomeostasis.Rd +++ b/man/clonalHomeostasis.Rd @@ -17,17 +17,17 @@ clonalHomeostasis( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} \item{cloneSize}{The cut points of the proportions.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{group.by}{The variable to use for grouping} @@ -35,21 +35,21 @@ e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{order.by}{A vector of specific plotting order or "alphanumeric" to plot groups in order} -\item{exportTable}{Exports a table of the data into the global +\item{exportTable}{Exports a table of the data into the global environment in addition to the visualization.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot of the space occupied by the specific proportion of clones } \description{ -This function calculates the space occupied by clone proportions. -The grouping of these clones is based on the parameter \strong{cloneSize}, -at default, \strong{cloneSize} will group the clones into bins of Rare = 0 -to 0.0001, Small = 0.0001 to 0.001, etc. To adjust the proportions, -change the number or labeling of the cloneSize parameter. If a matrix +This function calculates the space occupied by clone proportions. +The grouping of these clones is based on the parameter \strong{cloneSize}, +at default, \strong{cloneSize} will group the clones into bins of Rare = 0 +to 0.0001, Small = 0.0001 to 0.001, etc. To adjust the proportions, +change the number or labeling of the cloneSize parameter. If a matrix output for the data is preferred, set \strong{exportTable} = TRUE. } \examples{ diff --git a/man/clonalLength.Rd b/man/clonalLength.Rd index e98a09a3..86da1a4c 100644 --- a/man/clonalLength.Rd +++ b/man/clonalLength.Rd @@ -16,13 +16,13 @@ clonalLength( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}} -\item{cloneCall}{How to call the clone - CDR3 nucleotide (\strong{nt}) +\item{cloneCall}{How to call the clone - CDR3 nucleotide (\strong{nt}) or CDR3 amino acid (\strong{aa})} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{group.by}{The variable to use for grouping} @@ -30,25 +30,25 @@ e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{order.by}{A vector of specific plotting order or "alphanumeric" to plot groups in order description} -\item{scale}{Converts the graphs into density plots in order to show +\item{scale}{Converts the graphs into density plots in order to show relative distributions.} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ -ggplot of the discrete or relative length distributions of +ggplot of the discrete or relative length distributions of clone sequences } \description{ -This function displays either the nucleotide (\strong{nt}) or amino -acid (\strong{aa}) sequence length. The sequence length visualized -can be selected using the chains parameter, either the combined clone -(both chains) or across all single chains. Visualization can either -be a histogram or if \strong{scale} = TRUE, the output will -be a density plot. Multiple sequencing runs can be group together +This function displays either the nucleotide (\strong{nt}) or amino +acid (\strong{aa}) sequence length. The sequence length visualized +can be selected using the chains parameter, either the combined clone +(both chains) or across all single chains. Visualization can either +be a histogram or if \strong{scale} = TRUE, the output will +be a density plot. Multiple sequencing runs can be group together using the group.by parameter. } \examples{ diff --git a/man/clonalNetwork.Rd b/man/clonalNetwork.Rd index 7b2be773..0fd49cb2 100644 --- a/man/clonalNetwork.Rd +++ b/man/clonalNetwork.Rd @@ -20,18 +20,18 @@ clonalNetwork( ) } \arguments{ -\item{sc.data}{The single-cell object after \code{\link{combineExpression}}.} +\item{sc.data}{The single-cell object after \code{\link[=combineExpression]{combineExpression()}}.} -\item{reduction}{The name of the dimensional reduction of the +\item{reduction}{The name of the dimensional reduction of the single-cell object.} \item{group.by}{The variable to use for the nodes.} -\item{filter.clones}{Use to select the top n clones (e.g., \strong{filter.clones} -= 2000) or n of clones based on the minimum number of all the comparators +\item{filter.clones}{Use to select the top n clones (e.g., \strong{filter.clones} += 2000) or n of clones based on the minimum number of all the comparators (e.g., \strong{filter.clone} = "min").} -\item{filter.identity}{Display the network for a specific level of the +\item{filter.identity}{Display the network for a specific level of the indicated identity.} \item{filter.proportion}{Remove clones from the network below a specific @@ -40,12 +40,12 @@ proportion.} \item{filter.graph}{Remove the reciprocal edges from the half of the graph, allowing for cleaner visualization.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{exportClones}{Exports a table of clones that are shared @@ -54,14 +54,14 @@ of clone copies.} \item{exportTable}{Exports a table of the data into the global} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot object } \description{ -This function generates a network based on clonal +This function generates a network based on clonal proportions of an indicated identity and then superimposes the network onto a single-cell object dimensional reduction plot. diff --git a/man/clonalOccupy.Rd b/man/clonalOccupy.Rd index 208b94e5..e549a712 100644 --- a/man/clonalOccupy.Rd +++ b/man/clonalOccupy.Rd @@ -17,7 +17,7 @@ clonalOccupy( ) } \arguments{ -\item{sc.data}{The single-cell object after \code{\link{combineExpression}}} +\item{sc.data}{The single-cell object after \code{\link[=combineExpression]{combineExpression()}}} \item{x.axis}{The variable in the meta data to graph along the x.axis.} @@ -32,21 +32,21 @@ to plot groups in order description} \item{na.include}{Visualize NA values or not} -\item{exportTable}{Exports a table of the data into the global +\item{exportTable}{Exports a table of the data into the global environment in addition to the visualization} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ Stacked bar plot of counts of cells by clone frequency group } \description{ -View the count of clones frequency group in Seurat or SCE object -meta data after \code{\link{combineExpression}}. The visualization -will take the new meta data variable \strong{"cloneSize"} and -plot the number of cells with each designation using a secondary -variable, like cluster. Credit to the idea goes to Drs. Carmona +View the count of clones frequency group in Seurat or SCE object +meta data after \code{\link[=combineExpression]{combineExpression()}}. The visualization +will take the new meta data variable \strong{"cloneSize"} and +plot the number of cells with each designation using a secondary +variable, like cluster. Credit to the idea goes to Drs. Carmona and Andreatta and their work with \href{https://github.com/carmonalab/ProjecTILs}{ProjectTIL}. } \examples{ diff --git a/man/clonalOverlap.Rd b/man/clonalOverlap.Rd index 9a39abc8..112d0765 100644 --- a/man/clonalOverlap.Rd +++ b/man/clonalOverlap.Rd @@ -16,18 +16,18 @@ clonalOverlap( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data} -\item{method}{The method to calculate the "overlap", "morisita", +\item{method}{The method to calculate the "overlap", "morisita", "jaccard", "cosine" indices or "raw" for the base numbers} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{group.by}{The variable to use for grouping} @@ -37,41 +37,41 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ ggplot of the overlap of clones by group } \description{ -This functions allows for the calculation and visualizations of -various overlap metrics for clones. The methods include overlap -coefficient (\strong{overlap}), Morisita's overlap index -(\strong{morisita}), Jaccard index (\strong{jaccard}), cosine -similarity (\strong{cosine}) or the exact number of clonal +This functions allows for the calculation and visualizations of +various overlap metrics for clones. The methods include overlap +coefficient (\strong{overlap}), Morisita's overlap index +(\strong{morisita}), Jaccard index (\strong{jaccard}), cosine +similarity (\strong{cosine}) or the exact number of clonal overlap (\strong{raw}). } \details{ The formulas for the indices are as follows: \strong{Overlap Coefficient:} -\deqn{overlap = \frac{\sum \min(a, b)}{\min(\sum a, \sum b)}} +\deqn{overlap = \frac{\sum \min(a, b)}{\min(\sum a, \sum b)}} \strong{Raw Count Overlap:} \deqn{raw = \sum \min(a, b)} \strong{Morisita Index:} -\deqn{morisita = \frac{\sum a b}{(\sum a)(\sum b)}} +\deqn{morisita = \frac{\sum a b}{(\sum a)(\sum b)}} \strong{Jaccard Index:} -\deqn{jaccard = \frac{\sum \min(a, b)}{\sum a + \sum b - \sum \min(a, b)}} +\deqn{jaccard = \frac{\sum \min(a, b)}{\sum a + \sum b - \sum \min(a, b)}} \strong{Cosine Similarity:} -\deqn{cosine = \frac{\sum a b}{\sqrt{(\sum a^2)(\sum b^2)}}} +\deqn{cosine = \frac{\sum a b}{\sqrt{(\sum a^2)(\sum b^2)}}} -Where: -\itemize{ - \item{\eqn{a} and \eqn{b} are the abundances of species \eqn{i} in groups A and B, respectively.} +Where: +\itemize{ +\item{\eqn{a} and \eqn{b} are the abundances of species \eqn{i} in groups A and B, respectively.} } } \examples{ diff --git a/man/clonalOverlay.Rd b/man/clonalOverlay.Rd index 6e0810dd..d0e24cec 100644 --- a/man/clonalOverlay.Rd +++ b/man/clonalOverlay.Rd @@ -14,14 +14,14 @@ clonalOverlay( ) } \arguments{ -\item{sc.data}{The single-cell object after \code{\link{combineExpression}}.} +\item{sc.data}{The single-cell object after \code{\link[=combineExpression]{combineExpression()}}.} \item{reduction}{The dimensional reduction to visualize.} -\item{cut.category}{Meta data variable of the single-cell object to use for +\item{cut.category}{Meta data variable of the single-cell object to use for filtering.} -\item{cutpoint}{The overlay cut point to include, this corresponds to the +\item{cutpoint}{The overlay cut point to include, this corresponds to the cut.category variable in the meta data of the single-cell object.} \item{bins}{The number of contours to the overlay} @@ -32,7 +32,7 @@ cut.category variable in the meta data of the single-cell object.} ggplot object } \description{ -This function allows the user to visualize the clonal expansion by overlaying the +This function allows the user to visualize the clonal expansion by overlaying the cells with specific clonal frequency onto the dimensional reduction plots in Seurat. Credit to the idea goes to Drs Andreatta and Carmona and their work with \href{https://github.com/carmonalab/ProjecTILs}{ProjectTIL}. diff --git a/man/clonalProportion.Rd b/man/clonalProportion.Rd index d72a4a10..1e6ef63c 100644 --- a/man/clonalProportion.Rd +++ b/man/clonalProportion.Rd @@ -16,17 +16,17 @@ clonalProportion( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} \item{clonalSplit}{The cut points for the specific clones} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{group.by}{The variable to use for grouping} @@ -37,18 +37,18 @@ to plot groups in order} \item{exportTable}{Exports a table of the data into the global. environment in addition to the visualization} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ ggplot of the space occupied by the specific rank of clones } \description{ -This function calculates the relative clonal space occupied by the -clones. The grouping of these clones is based on the parameter -\strong{clonalSplit}, at default, \strong{clonalSplit} will group the clones -into bins of 1:10, 11:100, 101:1001, etc. To adjust the clones -selected, change the numbers in the variable split. If a matrix output +This function calculates the relative clonal space occupied by the +clones. The grouping of these clones is based on the parameter +\strong{clonalSplit}, at default, \strong{clonalSplit} will group the clones +into bins of 1:10, 11:100, 101:1001, etc. To adjust the clones +selected, change the numbers in the variable split. If a matrix output for the data is preferred, set \strong{exportTable} = TRUE. } \examples{ diff --git a/man/clonalQuant.Rd b/man/clonalQuant.Rd index 7b6fb07b..8bb76d65 100644 --- a/man/clonalQuant.Rd +++ b/man/clonalQuant.Rd @@ -16,15 +16,15 @@ clonalQuant( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL"} \item{scale}{Converts the graphs into percentage of unique clones} @@ -36,15 +36,15 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ ggplot of the total or relative unique clones } \description{ -This function quantifies unique clones. The unique clones -can be either reported as a raw output or scaled to the total number of +This function quantifies unique clones. The unique clones +can be either reported as a raw output or scaled to the total number of clones recovered using the scale parameter. } \examples{ diff --git a/man/clonalRarefaction.Rd b/man/clonalRarefaction.Rd index 9ca6d458..7c271d94 100644 --- a/man/clonalRarefaction.Rd +++ b/man/clonalRarefaction.Rd @@ -17,43 +17,43 @@ clonalRarefaction( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{group.by}{The variable to use for grouping.} -\item{plot.type}{sample-size-based rarefaction/extrapolation curve -(\code{type = 1}); sample completeness curve (\code{type = 2}); +\item{plot.type}{sample-size-based rarefaction/extrapolation curve +(\code{type = 1}); sample completeness curve (\code{type = 2}); coverage-based rarefaction/extrapolation curve (\code{type = 3}).} -\item{hill.numbers}{The Hill numbers to be plotted out +\item{hill.numbers}{The Hill numbers to be plotted out (0 - species richness, 1 - Shannon, 2 - Simpson)} -\item{n.boots}{The number of bootstraps to downsample in order +\item{n.boots}{The number of bootstraps to downsample in order to get mean diversity.} -\item{exportTable}{Exports a table of the data into the global +\item{exportTable}{Exports a table of the data into the global environment in addition to the visualization.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \description{ -This functions uses the Hill numbers of order q: species richness (\strong{q = 0}), -Shannon diversity (\strong{q = 1}), the exponential of Shannon entropy and Simpson -diversity (\strong{q = 2}, the inverse of Simpson concentration) to compute diversity +This functions uses the Hill numbers of order q: species richness (\strong{q = 0}), +Shannon diversity (\strong{q = 1}), the exponential of Shannon entropy and Simpson +diversity (\strong{q = 2}, the inverse of Simpson concentration) to compute diversity estimates for rarefaction and extrapolation. The function relies on the -\code{\link[iNEXT]{iNEXT}} R package. Please read and cite the -\href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript} -if using this function. The input into the iNEXT calculation is abundance, +\code{\link[iNEXT:iNEXT]{iNEXT::iNEXT()}} R package. Please read and cite the +\href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript} +if using this function. The input into the iNEXT calculation is abundance, incidence-based calculations are not supported. } \examples{ diff --git a/man/clonalScatter.Rd b/man/clonalScatter.Rd index 6b8d408a..8608b994 100644 --- a/man/clonalScatter.Rd +++ b/man/clonalScatter.Rd @@ -18,22 +18,22 @@ clonalScatter( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} \item{x.axis}{name of the list element to appear on the x.axis.} \item{y.axis}{name of the list element to appear on the y.axis.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} -\item{dot.size}{either total or the name of the list element to +\item{dot.size}{either total or the name of the list element to use for size of dots.} \item{group.by}{The variable to use for grouping.} @@ -42,16 +42,16 @@ use for size of dots.} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot of the relative clone numbers between two sequencing runs or groups } \description{ -This function produces a scatter plot directly comparing -the specific clones between two samples. The clones will -be categorized by counts into singlets or expanded, either +This function produces a scatter plot directly comparing +the specific clones between two samples. The clones will +be categorized by counts into singlets or expanded, either exclusive or shared between the selected samples. } \examples{ diff --git a/man/clonalSizeDistribution.Rd b/man/clonalSizeDistribution.Rd index 2d34bb37..13e12719 100644 --- a/man/clonalSizeDistribution.Rd +++ b/man/clonalSizeDistribution.Rd @@ -16,59 +16,59 @@ clonalSizeDistribution( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} \item{method}{The clustering parameter for the dendrogram.} -\item{threshold}{Numerical vector containing the thresholds +\item{threshold}{Numerical vector containing the thresholds the grid search was performed over.} \item{group.by}{The variable to use for grouping.} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot dendrogram of the clone size distribution } \description{ -This function produces a hierarchical clustering of clones by sample -using discrete gamma-GPD spliced threshold model. If using this -model please read and cite powerTCR (more info available at +This function produces a hierarchical clustering of clones by sample +using discrete gamma-GPD spliced threshold model. If using this +model please read and cite powerTCR (more info available at \href{https://pubmed.ncbi.nlm.nih.gov/30485278/}{PMID: 30485278}). } \details{ The probability density function (pdf) for the \strong{Generalized Pareto Distribution (GPD)} is given by: - \deqn{f(x|\mu, \sigma, \xi) = \frac{1}{\sigma} \left( 1 + \xi \left( \frac{x - \mu}{\sigma} \right) \right)^{-\left( \frac{1}{\xi} + 1 \right)}} +\deqn{f(x|\mu, \sigma, \xi) = \frac{1}{\sigma} \left( 1 + \xi \left( \frac{x - \mu}{\sigma} \right) \right)^{-\left( \frac{1}{\xi} + 1 \right)}} Where: \itemize{ - \item{\eqn{\mu} is a location parameter} - \item{\eqn{\sigma > 0} is a scale parameter} - \item{\eqn{\xi} is a shape parameter} - \item{\eqn{x \ge \mu} if \eqn{\xi \ge 0} and \eqn{\mu \le x \le \mu - \sigma/\xi} if \eqn{\xi < 0}} +\item{\eqn{\mu} is a location parameter} +\item{\eqn{\sigma > 0} is a scale parameter} +\item{\eqn{\xi} is a shape parameter} +\item{\eqn{x \ge \mu} if \eqn{\xi \ge 0} and \eqn{\mu \le x \le \mu - \sigma/\xi} if \eqn{\xi < 0}} } - + The probability density function (pdf) for the \strong{Gamma Distribution} is given by: \deqn{f(x|\alpha, \beta) = \frac{x^{\alpha-1} e^{-x/\beta}}{\beta^\alpha \Gamma(\alpha)}} Where: \itemize{ - \item{\eqn{\alpha > 0} is the shape parameter} - \item{\eqn{\beta > 0} is the scale parameter} - \item{\eqn{x \ge 0}} - \item{\eqn{\Gamma(\alpha)} is the gamma function of \eqn{\alpha}} +\item{\eqn{\alpha > 0} is the shape parameter} +\item{\eqn{\beta > 0} is the scale parameter} +\item{\eqn{x \ge 0}} +\item{\eqn{\Gamma(\alpha)} is the gamma function of \eqn{\alpha}} } } \examples{ diff --git a/man/combineBCR.Rd b/man/combineBCR.Rd index 073d4f80..394af4cf 100644 --- a/man/combineBCR.Rd +++ b/man/combineBCR.Rd @@ -17,28 +17,28 @@ combineBCR( ) } \arguments{ -\item{input.data}{List of filtered contig annotations or outputs from -\code{\link{loadContigs}}.} +\item{input.data}{List of filtered contig annotations or outputs from +\code{\link[=loadContigs]{loadContigs()}}.} \item{samples}{The labels of samples (required).} \item{ID}{The additional sample labeling (optional).} -\item{call.related.clones}{Use the nucleotide sequence and V gene -to call related clones. Default is set to TRUE. FALSE will return +\item{call.related.clones}{Use the nucleotide sequence and V gene +to call related clones. Default is set to TRUE. FALSE will return a CTstrict or strict clone as V gene + amino acid sequence.} -\item{threshold}{The normalized edit distance to consider. The higher +\item{threshold}{The normalized edit distance to consider. The higher the number the more similarity of sequence will be used for clustering.} \item{removeNA}{This will remove any chain without values.} \item{removeMulti}{This will remove barcodes with greater than 2 chains.} -\item{filterMulti}{This option will allow for the selection of the +\item{filterMulti}{This option will allow for the selection of the highest-expressing light and heavy chains, if not calling related clones.} -\item{filterNonproductive}{This option will allow for the removal of +\item{filterNonproductive}{This option will allow for the removal of nonproductive chains if the variable exists in the contig data. Default is set to TRUE to remove nonproductive contigs.} } @@ -46,19 +46,19 @@ is set to TRUE to remove nonproductive contigs.} List of clones for individual cell barcodes } \description{ -This function consolidates a list of BCR sequencing results to the level -of the individual cell barcodes. Using the samples and ID parameters, -the function will add the strings as prefixes to prevent issues with -repeated barcodes. The resulting new barcodes will need to match the -Seurat or SCE object in order to use, \code{\link{combineExpression}}. -Unlike \code{\link{combineTCR}}, combineBCR produces a column -\strong{CTstrict} of an index of nucleotide sequence and the -corresponding V gene. This index automatically calculates the +This function consolidates a list of BCR sequencing results to the level +of the individual cell barcodes. Using the samples and ID parameters, +the function will add the strings as prefixes to prevent issues with +repeated barcodes. The resulting new barcodes will need to match the +Seurat or SCE object in order to use, \code{\link[=combineExpression]{combineExpression()}}. +Unlike \code{\link[=combineTCR]{combineTCR()}}, combineBCR produces a column +\strong{CTstrict} of an index of nucleotide sequence and the +corresponding V gene. This index automatically calculates the Levenshtein distance between sequences with the same V gene and will -index sequences using a normalized Levenshtein distance with the same -ID. After which, clone clusters are called using the -\code{\link[igraph]{components}} function. Clones that are clustered -across multiple sequences will then be labeled with "Cluster" in the +index sequences using a normalized Levenshtein distance with the same +ID. After which, clone clusters are called using the +\code{\link[igraph:components]{igraph::components()}} function. Clones that are clustered +across multiple sequences will then be labeled with "Cluster" in the CTstrict header. } \examples{ diff --git a/man/combineExpression.Rd b/man/combineExpression.Rd index 5f0dd4a5..80bdde26 100644 --- a/man/combineExpression.Rd +++ b/man/combineExpression.Rd @@ -18,37 +18,37 @@ combineExpression( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}} or a list of -both c(\code{\link{combineTCR}}, \code{\link{combineBCR}}).} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}} or a list of +both c(\code{\link[=combineTCR]{combineTCR()}}, \code{\link[=combineBCR]{combineBCR()}}).} \item{sc.data}{The Seurat or Single-Cell Experiment (SCE) object to attach} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{chain}{indicate if both or a specific chain should be used - +\item{chain}{indicate if both or a specific chain should be used - e.g. "both", "TRA", "TRG", "IGH", "IGL".} -\item{group.by}{The column label in the combined clones in which -clone frequency will be calculated. \strong{NULL} or \strong{"none"} will +\item{group.by}{The column label in the combined clones in which +clone frequency will be calculated. \strong{NULL} or \strong{"none"} will keep the format of input.data.} -\item{proportion}{Whether to proportion (\strong{TRUE}) or total +\item{proportion}{Whether to proportion (\strong{TRUE}) or total frequency (\strong{FALSE}) of the clone based on the group.by variable.} -\item{filterNA}{Method to subset Seurat/SCE object of barcodes without +\item{filterNA}{Method to subset Seurat/SCE object of barcodes without clone information} -\item{cloneSize}{The bins for the grouping based on proportion or frequency. +\item{cloneSize}{The bins for the grouping based on proportion or frequency. If proportion is \strong{FALSE} and the cloneSizes are not set high enough based on frequency, the upper limit of cloneSizes will be automatically updated.S} \item{addLabel}{This will add a label to the frequency header, allowing -the user to try multiple group.by variables or recalculate frequencies after +the user to try multiple group.by variables or recalculate frequencies after subsetting the data.} } \value{ @@ -56,14 +56,14 @@ Single-cell object with clone information added to meta data information } \description{ -This function adds the immune receptor information to the Seurat or -SCE object to the meta data. By default this function also calculates -the frequencies and proportion of the clones by sequencing +This function adds the immune receptor information to the Seurat or +SCE object to the meta data. By default this function also calculates +the frequencies and proportion of the clones by sequencing run (\strong{group.by} = NULL). To change how the frequencies/proportions -are calculated, select a column header for the \strong{group.by} variable. -Importantly, before using \code{\link{combineExpression}} ensure the -barcodes of the single-cell object object match the barcodes in the output -of the \code{\link{combineTCR}} or \code{\link{combineBCR}}. +are calculated, select a column header for the \strong{group.by} variable. +Importantly, before using \code{\link[=combineExpression]{combineExpression()}} ensure the +barcodes of the single-cell object object match the barcodes in the output +of the \code{\link[=combineTCR]{combineTCR()}} or \code{\link[=combineBCR]{combineBCR()}}. } \examples{ #Getting the combined contigs diff --git a/man/combineTCR.Rd b/man/combineTCR.Rd index 4702c537..6c030fa7 100644 --- a/man/combineTCR.Rd +++ b/man/combineTCR.Rd @@ -15,8 +15,8 @@ combineTCR( ) } \arguments{ -\item{input.data}{List of filtered contig annotations or -outputs from \code{\link{loadContigs}}.} +\item{input.data}{List of filtered contig annotations or +outputs from \code{\link[=loadContigs]{loadContigs()}}.} \item{samples}{The labels of samples (recommended).} @@ -26,10 +26,10 @@ outputs from \code{\link{loadContigs}}.} \item{removeMulti}{This will remove barcodes with greater than 2 chains.} -\item{filterMulti}{This option will allow for the selection of the 2 +\item{filterMulti}{This option will allow for the selection of the 2 corresponding chains with the highest expression for a single barcode.} -\item{filterNonproductive}{This option will allow for the removal of +\item{filterNonproductive}{This option will allow for the removal of nonproductive chains if the variable exists in the contig data. Default is set to TRUE to remove nonproductive contigs.} } @@ -38,13 +38,13 @@ List of clones for individual cell barcodes } \description{ This function consolidates a list of TCR sequencing results to -the level of the individual cell barcodes. Using the \strong{samples} and -\strong{ID} parameters, the function will add the strings as prefixes to -prevent issues with repeated barcodes. The resulting new barcodes will -need to match the Seurat or SCE object in order to use, -\code{\link{combineExpression}}. Several levels of filtering exist - -\emph{removeNA}, \emph{removeMulti}, or \emph{filterMulti} are parameters -that control how the function deals with barcodes with multiple chains +the level of the individual cell barcodes. Using the \strong{samples} and +\strong{ID} parameters, the function will add the strings as prefixes to +prevent issues with repeated barcodes. The resulting new barcodes will +need to match the Seurat or SCE object in order to use, +\code{\link[=combineExpression]{combineExpression()}}. Several levels of filtering exist - +\emph{removeNA}, \emph{removeMulti}, or \emph{filterMulti} are parameters +that control how the function deals with barcodes with multiple chains recovered. } \examples{ diff --git a/man/contig_list.Rd b/man/contig_list.Rd index 48357a39..daa266d8 100644 --- a/man/contig_list.Rd +++ b/man/contig_list.Rd @@ -5,9 +5,9 @@ \alias{contig_list} \title{A list of 8 single-cell T cell receptor sequences runs.} \description{ -A list of 8 `filtered_contig_annotations.csv` files -outputted from 10X Cell Ranger. More information on the +A list of 8 \code{filtered_contig_annotations.csv} files +outputted from 10X Cell Ranger. More information on the data can be found in the following - \href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. +\href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. } \concept{Data} diff --git a/man/createHTOContigList.Rd b/man/createHTOContigList.Rd index 234bb94c..2f47f053 100644 --- a/man/createHTOContigList.Rd +++ b/man/createHTOContigList.Rd @@ -11,24 +11,24 @@ createHTOContigList(contig, sc.data, group.by = NULL, multi.run = NULL) \item{sc.data}{The Seurat or Single-Cell Experiment object.} -\item{group.by}{One or more meta data headers to create the contig -list based on. If more than one header listed, the function combines +\item{group.by}{One or more meta data headers to create the contig +list based on. If more than one header listed, the function combines them into a single variable.} -\item{multi.run}{If using integrated single-cell object, the meta data +\item{multi.run}{If using integrated single-cell object, the meta data variable that indicates the sequencing run.} } \value{ -Returns a list of contigs as input for \code{\link{combineBCR}} -or \code{\link{combineTCR}} +Returns a list of contigs as input for \code{\link[=combineBCR]{combineBCR()}} +or \code{\link[=combineTCR]{combineTCR()}} } \description{ -This function reprocess and forms a list of contigs for downstream analysis -in scRepertoire, \code{\link{createHTOContigList}} take the filtered contig -annotation output and the single-cell RNA object to create the list. -If using an integrated single-cell object, it is recommended to split the -object by sequencing run and remove extra prefixes and suffixes on the -barcode before using \code{\link{createHTOContigList}}. Alternatively, +This function reprocess and forms a list of contigs for downstream analysis +in scRepertoire, \code{\link[=createHTOContigList]{createHTOContigList()}} take the filtered contig +annotation output and the single-cell RNA object to create the list. +If using an integrated single-cell object, it is recommended to split the +object by sequencing run and remove extra prefixes and suffixes on the +barcode before using \code{\link[=createHTOContigList]{createHTOContigList()}}. Alternatively, the variable \strong{multi.run} can be used to separate a list of contigs by a meta data variable. This may have issues with the repeated barcodes. } diff --git a/man/exportClones.Rd b/man/exportClones.Rd index 0c0e6bac..e501b718 100644 --- a/man/exportClones.Rd +++ b/man/exportClones.Rd @@ -14,14 +14,14 @@ exportClones( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} \item{format}{The format to export the clones - "paired", "airr", or "TCRMatch".} \item{group.by}{The variable to use for grouping.} -\item{write.file}{\strong{TRUE}, save the file or \strong{FALSE}, +\item{write.file}{\strong{TRUE}, save the file or \strong{FALSE}, return a data.frame} \item{dir}{directory location to save the csv} @@ -32,11 +32,11 @@ return a data.frame} CSV file of the paired sequences. } \description{ -This function saves a csv file of clones (genes, amino acid, and -nucleotide sequences) by barcodes. \strong{format} determines -the structure of the csv file - \emph{paired} will export sequences -by barcodes and include multiple chains, \emph{airr} will export a data -frame that is consistent with the AIRR format, and \emph{TCRMatch} will +This function saves a csv file of clones (genes, amino acid, and +nucleotide sequences) by barcodes. \strong{format} determines +the structure of the csv file - \emph{paired} will export sequences +by barcodes and include multiple chains, \emph{airr} will export a data +frame that is consistent with the AIRR format, and \emph{TCRMatch} will export a data frame that has the TRB chain with count information. } \examples{ diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg new file mode 100644 index 00000000..745ab0c7 --- /dev/null +++ b/man/figures/lifecycle-archived.svg @@ -0,0 +1,21 @@ + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg new file mode 100644 index 00000000..d5c9559e --- /dev/null +++ b/man/figures/lifecycle-defunct.svg @@ -0,0 +1,21 @@ + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 00000000..b61c57c3 --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 00000000..5d88fc2c --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1,21 @@ + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg new file mode 100644 index 00000000..897370ec --- /dev/null +++ b/man/figures/lifecycle-maturing.svg @@ -0,0 +1,21 @@ + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg new file mode 100644 index 00000000..7c1721d0 --- /dev/null +++ b/man/figures/lifecycle-questioning.svg @@ -0,0 +1,21 @@ + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/man/figures/lifecycle-soft-deprecated.svg b/man/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 00000000..9c166ff3 --- /dev/null +++ b/man/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 00000000..9bf21e76 --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1,29 @@ + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 00000000..db8d757f --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1,21 @@ + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + diff --git a/man/getCirclize.Rd b/man/getCirclize.Rd index d3280816..cef78004 100644 --- a/man/getCirclize.Rd +++ b/man/getCirclize.Rd @@ -14,31 +14,31 @@ getCirclize( ) } \arguments{ -\item{sc.data}{The single-cell object after \code{\link{combineExpression}}.} +\item{sc.data}{The single-cell object after \code{\link[=combineExpression]{combineExpression()}}.} -\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), +\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), -VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable +VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable in the data.} -\item{group.by}{The group header for which you would like to analyze +\item{group.by}{The group header for which you would like to analyze the data.} -\item{proportion}{Calculate the relationship unique -clones (proportion = FALSE) or normalized by +\item{proportion}{Calculate the relationship unique +clones (proportion = FALSE) or normalized by proportion (proportion = TRUE)} \item{include.self}{Include counting the clones within a single group.by comparison} } \value{ -A data frame of shared clones between groups formated for \link[circlize]{chordDiagram} +A data frame of shared clones between groups formated for \link[circlize:chordDiagram]{chordDiagram} } \description{ -This function will take the meta data from the product of -\code{\link{combineExpression}} and generate a relational data frame to -be used for a chord diagram. Each cord will represent the number of -clone unique and shared across the multiple \strong{group.by} variable. +This function will take the meta data from the product of +\code{\link[=combineExpression]{combineExpression()}} and generate a relational data frame to +be used for a chord diagram. Each cord will represent the number of +clone unique and shared across the multiple \strong{group.by} variable. If using the downstream circlize R package, please read and cite the following \href{https://pubmed.ncbi.nlm.nih.gov/24930139/}{manuscript}. If looking for more advance ways for circular visualizations, there diff --git a/man/getContigDoublets.Rd b/man/getContigDoublets.Rd index 9075f365..c12a3a67 100644 --- a/man/getContigDoublets.Rd +++ b/man/getContigDoublets.Rd @@ -7,11 +7,11 @@ getContigDoublets(tcrOutput, bcrOutput) } \arguments{ -\item{tcrOutput}{Output of [combineTCR()]. A list of data.frames containing TCR contig -information, each dataframe must have a `barcode` column.} +\item{tcrOutput}{Output of \code{\link[=combineTCR]{combineTCR()}}. A list of data.frames containing TCR contig +information, each dataframe must have a \code{barcode} column.} -\item{bcrOutput}{Output of [combineBCR()]. A list of data.frames containing BCR contig -information, each dataframe must have a `barcode` column.} +\item{bcrOutput}{Output of \code{\link[=combineBCR]{combineBCR()}}. A list of data.frames containing BCR contig +information, each dataframe must have a \code{barcode} column.} } \value{ A dataframe of barcodes that exist in both the TCR and BCR data, with @@ -19,6 +19,8 @@ columns from both sets of data. If there are no doublets, the returned data.frame will have the same colnames but no rows. } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + This function identifies potential doublets by finding common barcodes between TCR and BCR outputs. It extracts unique barcodes from each list of dataframes, finds the intersection of the barcodes, and joins the diff --git a/man/highlightClones.Rd b/man/highlightClones.Rd index c7198526..9e037c63 100644 --- a/man/highlightClones.Rd +++ b/man/highlightClones.Rd @@ -11,21 +11,21 @@ highlightClones( ) } \arguments{ -\item{sc.data}{The single-cell object to attach after -\code{\link{combineExpression}}} +\item{sc.data}{The single-cell object to attach after +\code{\link[=combineExpression]{combineExpression()}}} -\item{cloneCall}{How to call the clone - VDJC gene (gene), +\item{cloneCall}{How to call the clone - VDJC gene (gene), CDR3 nucleotide (nt), CDR3 amino acid (aa), VDJC gene + CDR3 nucleotide (strict) or a custom variable in the data.} \item{sequence}{The specific sequence or sequence to highlight} } \value{ -Single-cell object object with new meta data column +Single-cell object object with new meta data column for indicated clones } \description{ -Use a specific clonal sequence to highlight on top of the dimensional +Use a specific clonal sequence to highlight on top of the dimensional reduction in single-cell object. } \examples{ diff --git a/man/loadContigs.Rd b/man/loadContigs.Rd index a96001be..76cfb164 100644 --- a/man/loadContigs.Rd +++ b/man/loadContigs.Rd @@ -9,33 +9,33 @@ loadContigs(input, format = "10X") \arguments{ \item{input}{The directory in which contigs are located or a list with contig elements} -\item{format}{The format of the single-cell contig, currently supporting: +\item{format}{The format of the single-cell contig, currently supporting: "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope", "TRUST4", and "WAT3R"} } \value{ -List of contigs for compatibility with \code{\link{combineTCR}} or -\code{\link{combineBCR}} +List of contigs for compatibility with \code{\link[=combineTCR]{combineTCR()}} or +\code{\link[=combineBCR]{combineBCR()}} } \description{ -This function generates a contig list and formats the data to allow for -function with \code{\link{combineTCR}} or \code{\link{combineBCR}}. If -using data derived from filtered outputs of 10X Genomics, there is no +This function generates a contig list and formats the data to allow for +function with \code{\link[=combineTCR]{combineTCR()}} or \code{\link[=combineBCR]{combineBCR()}}. If +using data derived from filtered outputs of 10X Genomics, there is no need to use this function as the data is already compatible. } \details{ -The files that this function parses includes: +The files that this function parses includes: \itemize{ - \item 10X = "filtered_contig_annotations.csv" - \item AIRR = "airr_rearrangement.tsv" - \item BD = "Contigs_AIRR.tsv" - \item Dandelion = "all_contig_dandelion.tsv" - \item Immcantation = "data.tsv" - \item JSON = ".json" - \item ParseBio = "barcode_report.tsv" - \item MiXCR = "clones.tsv" - \item Omniscope = ".csv" - \item TRUST4 = "barcode_report.tsv" - \item WAT3R = "barcode_results.csv" +\item 10X = "filtered_contig_annotations.csv" +\item AIRR = "airr_rearrangement.tsv" +\item BD = "Contigs_AIRR.tsv" +\item Dandelion = "all_contig_dandelion.tsv" +\item Immcantation = "data.tsv" +\item JSON = ".json" +\item ParseBio = "barcode_report.tsv" +\item MiXCR = "clones.tsv" +\item Omniscope = ".csv" +\item TRUST4 = "barcode_report.tsv" +\item WAT3R = "barcode_results.csv" } } \examples{ diff --git a/man/mini_contig_list.Rd b/man/mini_contig_list.Rd index f5269d53..2c98d1a0 100644 --- a/man/mini_contig_list.Rd +++ b/man/mini_contig_list.Rd @@ -3,22 +3,22 @@ \docType{data} \name{mini_contig_list} \alias{mini_contig_list} -\title{Processed subset of `contig_list`} +\title{Processed subset of \code{contig_list}} \format{ -An R `list` of `data.frame` objects +An R \code{list} of \code{data.frame} objects } \usage{ data("mini_contig_list") } \description{ A list of 8 data frames of T cell contigs outputted from the -`filtered_contig_annotation` files, but subsetted to 365 valid T cells -which correspond to the same barcodes found in `scRep_example`. The +\code{filtered_contig_annotation} files, but subsetted to 365 valid T cells +which correspond to the same barcodes found in \code{scRep_example}. The data is originally derived from the following - \href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. +\href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. } \seealso{ -\code{\link{contig_list}} +\code{\link[=contig_list]{contig_list()}} } \concept{Data} \keyword{datasets} diff --git a/man/percentAA.Rd b/man/percentAA.Rd index eac3893d..83f88c65 100644 --- a/man/percentAA.Rd +++ b/man/percentAA.Rd @@ -15,8 +15,8 @@ percentAA( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, \code{\link{combineBCR}}, or -\code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, \code{\link[=combineBCR]{combineBCR()}}, or +\code{\link[=combineExpression]{combineExpression()}}.} \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL".} @@ -29,13 +29,13 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any \link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot of stacked bar graphs of amino acid proportions } \description{ -This function the proportion of amino acids along the residues +This function the proportion of amino acids along the residues of the CDR3 amino acid sequence. } \examples{ diff --git a/man/percentGenes.Rd b/man/percentGenes.Rd index 7cb0b342..9167e892 100644 --- a/man/percentGenes.Rd +++ b/man/percentGenes.Rd @@ -15,8 +15,8 @@ percentGenes( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL".} @@ -29,17 +29,17 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot of percentage of indicated genes as a heatmap } \description{ -This function the proportion V or J genes used by +This function the proportion V or J genes used by grouping variables. This function only quantifies -single gene loci for indicated \strong{chain}. For -examining VJ pairing, please see \code{\link{percentVJ}}. +single gene loci for indicated \strong{chain}. For +examining VJ pairing, please see \code{\link[=percentVJ]{percentVJ()}}. } \examples{ #Making combined contig data diff --git a/man/percentKmer.Rd b/man/percentKmer.Rd index c4a162a8..d6c3e692 100644 --- a/man/percentKmer.Rd +++ b/man/percentKmer.Rd @@ -17,12 +17,12 @@ percentKmer( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}} \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL"} -\item{cloneCall}{How to call the clone - CDR3 nucleotide (\strong{nt}) or +\item{cloneCall}{How to call the clone - CDR3 nucleotide (\strong{nt}) or CDR3 amino acid (\strong{aa})} \item{group.by}{The variable to use for grouping} @@ -32,19 +32,19 @@ to plot groups in order} \item{motif.length}{The length of the kmer to analyze} -\item{top.motifs}{Return the n most variable motifs as a function of +\item{top.motifs}{Return the n most variable motifs as a function of median absolute deviation} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}} } \value{ ggplot of percentage of kmers as a heatmap } \description{ -This function the of kmer for nucleotide (\strong{nt}) or +This function the of kmer for nucleotide (\strong{nt}) or amino acid (\strong{aa}) sequences. Select the length of the kmer to quantify using the \strong{motif.length} parameter. } diff --git a/man/percentVJ.Rd b/man/percentVJ.Rd index b3d8a4da..122f1e95 100644 --- a/man/percentVJ.Rd +++ b/man/percentVJ.Rd @@ -14,8 +14,8 @@ percentVJ( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL"} @@ -26,14 +26,14 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot of percentage of V and J gene pairings as a heatmap } \description{ -This function the proportion V and J genes used by +This function the proportion V and J genes used by grouping variables for an indicated \strong{chain} to produce a matrix of VJ gene pairings. } diff --git a/man/positionalEntropy.Rd b/man/positionalEntropy.Rd index 0b79ca92..aa2255ba 100644 --- a/man/positionalEntropy.Rd +++ b/man/positionalEntropy.Rd @@ -16,8 +16,8 @@ positionalEntropy( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}} \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL"} @@ -28,22 +28,22 @@ to plot groups in order} \item{aa.length}{The maximum length of the CDR3 amino acid sequence.} -\item{method}{The method to calculate the entropy/diversity - +\item{method}{The method to calculate the entropy/diversity - "shannon", "inv.simpson", "norm.entropy"} \item{exportTable}{Returns the data frame used for forming the graph} -\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any \link[grDevices:palettes]{hcl.pals}} } \value{ ggplot of line graph of diversity by position } \description{ -This function the diversity amino acids along the residues -of the CDR3 amino acid sequence. Please see -\code{\link{clonalDiversity}} for more information on -the underlying methods for diversity/entropy calculations. -Positions without variance will have a value reported as 0 +This function the diversity amino acids along the residues +of the CDR3 amino acid sequence. Please see +\code{\link[=clonalDiversity]{clonalDiversity()}} for more information on +the underlying methods for diversity/entropy calculations. +Positions without variance will have a value reported as 0 for the purposes of comparison. } \examples{ diff --git a/man/positionalProperty.Rd b/man/positionalProperty.Rd index 787c33c3..1c67ed5f 100644 --- a/man/positionalProperty.Rd +++ b/man/positionalProperty.Rd @@ -16,8 +16,8 @@ positionalProperty( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}} \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL"} @@ -33,15 +33,15 @@ to plot groups in order} \item{exportTable}{Returns the data frame used for forming the graph} -\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}} +\item{palette}{Colors to use in visualization - input any \link[grDevices:palettes]{hcl.pals}} } \value{ ggplot of line graph of diversity by position } \description{ -This function calculates the mean selected property for -amino acids along the residues of the CDR3 amino acid sequence. -The ribbon surrounding the individual line represents the 95% +This function calculates the mean selected property for +amino acids along the residues of the CDR3 amino acid sequence. +The ribbon surrounding the individual line represents the 95\% confidence interval. } \details{ diff --git a/man/scRep_example.Rd b/man/scRep_example.Rd index 944f7deb..5d6fc3ea 100644 --- a/man/scRep_example.Rd +++ b/man/scRep_example.Rd @@ -5,11 +5,11 @@ \alias{scRep_example} \title{A Seurat object of 500 single T cells,} \description{ -The object is compatible with `contig_list` and the TCR -sequencing data can be added with `combineExpression`. The data is +The object is compatible with \code{contig_list} and the TCR +sequencing data can be added with \code{combineExpression}. The data is from 4 patients with acute respiratory distress, with samples taken -from both the lung and peripheral blood. More information on the +from both the lung and peripheral blood. More information on the data can be found in the following - \href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. +\href{https://pubmed.ncbi.nlm.nih.gov/33622974/}{manuscript}. } \concept{Data} diff --git a/man/subsetClones.Rd b/man/subsetClones.Rd index 27e891f4..9e72a6bc 100644 --- a/man/subsetClones.Rd +++ b/man/subsetClones.Rd @@ -7,8 +7,8 @@ subsetClones(input.data, name, variables = NULL) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}} or -\code{\link{combineBCR}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}} or +\code{\link[=combineBCR]{combineBCR()}}.} \item{name}{The column header/name to use for subsetting.} @@ -18,8 +18,8 @@ subsetClones(input.data, name, variables = NULL) list of contigs that have been filtered for the name parameter } \description{ -This function allows for the subsetting of the product of -\code{\link{combineTCR}} or \code{\link{combineBCR}} +This function allows for the subsetting of the product of +\code{\link[=combineTCR]{combineTCR()}} or \code{\link[=combineBCR]{combineBCR()}} by the name of the individual list element. } \examples{ diff --git a/man/vizGenes.Rd b/man/vizGenes.Rd index 12cfeabc..8d062013 100644 --- a/man/vizGenes.Rd +++ b/man/vizGenes.Rd @@ -17,38 +17,38 @@ vizGenes( ) } \arguments{ -\item{input.data}{The product of \code{\link{combineTCR}}, -\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} +\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, +\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} -\item{x.axis}{Gene segments to separate the x-axis, such as "TRAV", +\item{x.axis}{Gene segments to separate the x-axis, such as "TRAV", "TRBD", "IGKJ".} -\item{y.axis}{Variable to separate the y-axis, can be both categorical +\item{y.axis}{Variable to separate the y-axis, can be both categorical or other gene gene segments, such as "TRAV", "TRBD", "IGKJ".} \item{group.by}{Variable in which to group the diversity calculation.} \item{plot}{The type of plot to return - heatmap or barplot.} -\item{order}{Categorical variable to organize the x-axis, either +\item{order}{Categorical variable to organize the x-axis, either "gene" or "variance"} -\item{scale}{Converts the individual count of genes to proportion using +\item{scale}{Converts the individual count of genes to proportion using the total respective repertoire size} \item{exportTable}{Returns the data frame used for forming the graph.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Colors to use in visualization - input any +\link[grDevices:palettes]{hcl.pals}.} } \value{ ggplot bar diagram or heatmap of gene usage } \description{ -This function will allow for the visualizing the distribution +This function will allow for the visualizing the distribution of the any VDJ and C gene of the TCR or BCR using heatmap or -bar chart. This function requires assumes two chains were used in -defining clone, if not, it will default to the only chain +bar chart. This function requires assumes two chains were used in +defining clone, if not, it will default to the only chain present regardless of the chain parameter. } \examples{ From 53f7620e3a48b2111f53d8bcb64359264f4aa294 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 13 Oct 2024 23:43:49 -0700 Subject: [PATCH 07/16] add development NEWS section --- NEWS.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/NEWS.md b/NEWS.md index b3409df8..a6247938 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# scRepertoire VERSION 2.0.8.9000 (development version) + +## NEW FEATURES +* Added ```getContigDoublets()``` experimental function to identify TCR and BCR doublets as a preprocessing step to ```combineExpression()``` + +## UNDERLYING CHANGES +* convert documentation to use markdown (`roxygen2md`) +* import `lifecycle` to add badges to functions +* import `purrr` + # scRepertoire VERSION 2.0.8 ## UNDERLYING CHANGES From cc17499582e2ea1f37585c1026f1b8e0fe58205e Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Mon, 14 Oct 2024 02:18:40 -0700 Subject: [PATCH 08/16] improve getContigDoublets and prelim test --- R/getContigDoublets.R | 79 +++++++++++-------------- man/getContigDoublets.Rd | 16 ++--- tests/testthat/test-getContigDoublets.R | 27 +++++++++ 3 files changed, 68 insertions(+), 54 deletions(-) create mode 100644 tests/testthat/test-getContigDoublets.R diff --git a/R/getContigDoublets.R b/R/getContigDoublets.R index 79652041..ac49148c 100644 --- a/R/getContigDoublets.R +++ b/R/getContigDoublets.R @@ -2,79 +2,66 @@ #' #' @description #' `r lifecycle::badge("experimental")` -#' +#' #' This function identifies potential doublets by finding common barcodes #' between TCR and BCR outputs. It extracts unique barcodes from each list #' of dataframes, finds the intersection of the barcodes, and joins the #' resulting data. #' -#' @param tcrOutput Output of [combineTCR()]. A list of data.frames containing TCR contig -#' information, each dataframe must have a `barcode` column. -#' @param bcrOutput Output of [combineBCR()]. A list of data.frames containing BCR contig -#' information, each dataframe must have a `barcode` column. +#' @param tcrOutput Output of [combineTCR()]. A list of data.frames containing +#' TCR contig information, each dataframe must have a `barcode` column. +#' @param bcrOutput Output of [combineBCR()]. A list of data.frames containing +#' BCR contig information, each dataframe must have a `barcode` column. +#' +#' @return +#' A dataframe of barcodes that exist in both the TCR and BCR data, with +#' columns from both sets of data. There will be an additional column `contigType` +#' of type factor with levels 'TCR' and 'BCR' indicating the origin of the contig. #' -#' @return A dataframe of barcodes that exist in both the TCR and BCR data, with -#' columns from both sets of data. If there are no doublets, the returned +#' If there are no doublets, the returned #' data.frame will have the same colnames but no rows. +#' #' @export -#' @examples -#' # TODO getContigDoublets <- function(tcrOutput, bcrOutput) { assert_that(isListOfNonEmptyDataFrames(tcrOutput)) + assert_that(all(sapply(tcrOutput, function(d) "barcode" %in% colnames(d)))) assert_that(isListOfNonEmptyDataFrames(bcrOutput)) + assert_that(all(sapply(bcrOutput, function(d) "barcode" %in% colnames(d)))) - rawBarcodeColname <- tempColnameForDfList( - c(tcrOutput, bcrOutput), "raw_barcode" - ) - - listOfTcrBcrWithRawBarcode <- list(tcrOutput, bcrOutput) %>% - lapplyOnAll(function(df) { - df[[rawBarcodeColname]] <- extractBarcodeStrings(df$barcode) - df - }) - - doubletBarcodes <- listOfTcrBcrWithRawBarcode %>% - lapplyOnAll(function(df) { - unique(df[[rawBarcodeColname]]) - }) %>% - lapply(purrr::list_flatten) %>% - purrr::reduce(intersect) + doubletBarcodes <- getContigDoubletBarcodes(tcrOutput, bcrOutput) if (length(doubletBarcodes) == 0) { - return(makeEmptyIntersectionDf(tcrOutput[[1]], bcrOutput[[1]])) + output <- autoFullJoin(tcrOutput[[1]][0, ], bcrOutput[[1]][0, ]) + output$contigType <- factor(character(0), levels = c("BCR", "TCR")) + return(output) } - listOfTcrBcrWithRawBarcode %>% + RbindedTcrBcrDoublets <- list(tcrOutput, bcrOutput) %>% lapplyOnAll(function(df) { - df[df[[rawBarcodeColname]] %in% doubletBarcodes, ] + df[df$barcode %in% doubletBarcodes, ] }) %>% - lapply(dplyr::bind_rows) %>% - purrr::reduce(autoFullJoin) + lapply(dplyr::bind_rows) + + RbindedTcrBcrDoublets[[1]]$contigType <- "TCR" + RbindedTcrBcrDoublets[[2]]$contigType <- "BCR" + + RbindedTcrBcrDoublets %>% + purrr::reduce(autoFullJoin) %>% + dplyr::mutate(contigType = factor(contigType)) } -tempColnameForDfList <- function(dfList, baseName = "temp") { - colnameSet <- unique(unlist(lapply(dfList, colnames))) - tail(make.unique(c(colnameSet, baseName)), 1) +getContigDoubletBarcodes <- function(tcrOutput, bcrOutput) { + list(tcrOutput, bcrOutput) %>% + lapplyOnAll(function(df) unique(df$barcode)) %>% + lapply(purrr::list_flatten) %>% + purrr::reduce(intersect) } lapplyOnAll <- function(listOfLists, fun) { lapply(listOfLists, function(x) lapply(x, fun)) } -makeEmptyIntersectionDf <- function(...) { - purrr::reduce(list(...), function(df1, df2) { - autoFullJoin(df1[0, ], df2[0, ]) - }) -} - autoFullJoin <- function(df1, df2) { suppressMessages(dplyr::full_join(df1, df2)) } - -extractBarcodeStrings <- function(inputStrings) { - matches <- unlist(lapply(inputStrings, function(x) { - regmatches(x, gregexpr("[a-zA-Z_]+_[ATGC]+-\\d+", x)) - })) - matches[matches != ""] -} diff --git a/man/getContigDoublets.Rd b/man/getContigDoublets.Rd index c12a3a67..a884aa55 100644 --- a/man/getContigDoublets.Rd +++ b/man/getContigDoublets.Rd @@ -7,15 +7,18 @@ getContigDoublets(tcrOutput, bcrOutput) } \arguments{ -\item{tcrOutput}{Output of \code{\link[=combineTCR]{combineTCR()}}. A list of data.frames containing TCR contig -information, each dataframe must have a \code{barcode} column.} +\item{tcrOutput}{Output of \code{\link[=combineTCR]{combineTCR()}}. A list of data.frames containing +TCR contig information, each dataframe must have a \code{barcode} column.} -\item{bcrOutput}{Output of \code{\link[=combineBCR]{combineBCR()}}. A list of data.frames containing BCR contig -information, each dataframe must have a \code{barcode} column.} +\item{bcrOutput}{Output of \code{\link[=combineBCR]{combineBCR()}}. A list of data.frames containing +BCR contig information, each dataframe must have a \code{barcode} column.} } \value{ A dataframe of barcodes that exist in both the TCR and BCR data, with -columns from both sets of data. If there are no doublets, the returned +columns from both sets of data. There will be an additional column \code{contigType} +of type factor with levels 'TCR' and 'BCR' indicating the origin of the contig. + +If there are no doublets, the returned data.frame will have the same colnames but no rows. } \description{ @@ -26,6 +29,3 @@ between TCR and BCR outputs. It extracts unique barcodes from each list of dataframes, finds the intersection of the barcodes, and joins the resulting data. } -\examples{ -# TODO -} diff --git a/tests/testthat/test-getContigDoublets.R b/tests/testthat/test-getContigDoublets.R new file mode 100644 index 00000000..12110a38 --- /dev/null +++ b/tests/testthat/test-getContigDoublets.R @@ -0,0 +1,27 @@ +test_that("getContigDoublets works for no doublets", { + + tcr <- getdata("combineContigs", "combineTCR_list_expected") + + # create a BCR list from testdata with no doublets + bcr <- getdata("combineContigs", "combineBCR_list_expected") + bcr <- list(bcr[[1]][1:10, ], bcr[[1]][20:30, ], bcr[[1]][100:110, ]) + names(bcr) <- names(tcr) + + expected_no_doublet_output <- structure( + list( + barcode = character(0), sample = character(0), + TCR1 = character(0), cdr3_aa1 = character(0), + cdr3_nt1 = character(0), TCR2 = character(0), + cdr3_aa2 = character(0), cdr3_nt2 = character(0), + CTgene = character(0), CTnt = character(0), CTaa = character(0), + CTstrict = character(0), IGH = character(0), IGLC = character(0), + contigType = structure( + integer(0), levels = c("BCR", "TCR"), class = "factor" + ) + ), + row.names = integer(0), + class = "data.frame" + ) + + expect_identical(getContigDoublets(tcr, bcr), expected_no_doublet_output) +}) From 13b116109bd3ffc8d3cc169b47dc1ea23a7a8cd0 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Mon, 14 Oct 2024 02:32:06 -0700 Subject: [PATCH 09/16] added dev to CI on PR, add unfinished getContig test --- .github/workflows/R-CMD-check.yaml | 2 +- tests/testthat/test-getContigDoublets.R | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 0ce19cc8..9e047d18 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,7 +4,7 @@ on: push: branches: [main, master, v2] pull_request: - branches: [main, master, v2] + branches: [main, master, v2, dev] name: R-CMD-check diff --git a/tests/testthat/test-getContigDoublets.R b/tests/testthat/test-getContigDoublets.R index 12110a38..aefc3ea2 100644 --- a/tests/testthat/test-getContigDoublets.R +++ b/tests/testthat/test-getContigDoublets.R @@ -25,3 +25,18 @@ test_that("getContigDoublets works for no doublets", { expect_identical(getContigDoublets(tcr, bcr), expected_no_doublet_output) }) + +test_that("getContigDoublets works for inputs with doublets", { + + tcr <- getdata("combineContigs", "combineTCR_list_expected") + + # create a BCR list from testdata with doublets + bcr <- getdata("combineContigs", "combineBCR_list_expected") + bcr <- list(bcr[[1]][1:10, ], bcr[[1]][20:30, ], bcr[[1]][100:110, ]) + names(bcr) <- names(tcr) + + # UNFINISHED + # TODO purposely introduce doublets into testing data + # TODO test expected output + expect_equal(2 * 2, 4) +}) From de4f4f8ff2d905373d379e96c4ad21db1ba5a298 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Mon, 14 Oct 2024 16:01:01 -0700 Subject: [PATCH 10/16] fix getContigDoublets --- .gitignore | 1 + DESCRIPTION | 3 +- NEWS.md | 4 +- R/getContigDoublets.R | 39 +++++++------ man/getContigDoublets.Rd | 5 +- tests/testthat/test-alluvialClones.R | 8 +-- tests/testthat/test-getContigDoublets.R | 73 ++++++++++++++++++------- 7 files changed, 88 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index b4bd4d45..0fd94378 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ local_tests.R docs vignettes/articles/scRep_example_full.rds .vscode +dev # remove if linting is ever strictly enforced / a standard is set .lintr diff --git a/DESCRIPTION b/DESCRIPTION index 82c27ffa..840efb35 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,8 @@ Imports: VGAM, hash, purrr, - lifecycle + lifecycle, + withr Suggests: BiocManager, BiocStyle, diff --git a/NEWS.md b/NEWS.md index a6247938..1a683b3d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,8 +5,8 @@ ## UNDERLYING CHANGES * convert documentation to use markdown (`roxygen2md`) -* import `lifecycle` to add badges to functions -* import `purrr` +* import `lifecycle`, `purrr`, `withr` +* suppressed "using discrete variable for alpha is not recommended" warning in alluvialClones unit tests. # scRepertoire VERSION 2.0.8 diff --git a/R/getContigDoublets.R b/R/getContigDoublets.R index ac49148c..6d9e0646 100644 --- a/R/getContigDoublets.R +++ b/R/getContigDoublets.R @@ -15,8 +15,9 @@ #' #' @return #' A dataframe of barcodes that exist in both the TCR and BCR data, with -#' columns from both sets of data. There will be an additional column `contigType` -#' of type factor with levels 'TCR' and 'BCR' indicating the origin of the contig. +#' columns from both sets of data. There will be an additional column +#' `contigType` of type factor with levels 'TCR' and 'BCR' indicating the +#' origin of the contig - this will be the new first column. #' #' If there are no doublets, the returned #' data.frame will have the same colnames but no rows. @@ -32,30 +33,34 @@ getContigDoublets <- function(tcrOutput, bcrOutput) { doubletBarcodes <- getContigDoubletBarcodes(tcrOutput, bcrOutput) if (length(doubletBarcodes) == 0) { - output <- autoFullJoin(tcrOutput[[1]][0, ], bcrOutput[[1]][0, ]) - output$contigType <- factor(character(0), levels = c("BCR", "TCR")) - return(output) + autoFullJoin(tcrOutput[[1]][0, ], bcrOutput[[1]][0, ]) %>% + dplyr::mutate( + contigType = factor(character(0), levels = c("BCR", "TCR")), + .before = 1 + ) %>% + return() } - RbindedTcrBcrDoublets <- list(tcrOutput, bcrOutput) %>% + list(TCR = tcrOutput, BCR = bcrOutput) %>% lapplyOnAll(function(df) { df[df$barcode %in% doubletBarcodes, ] }) %>% - lapply(dplyr::bind_rows) - - RbindedTcrBcrDoublets[[1]]$contigType <- "TCR" - RbindedTcrBcrDoublets[[2]]$contigType <- "BCR" - - RbindedTcrBcrDoublets %>% + purrr::imap(function(x, type) { + dplyr::bind_rows(x) %>% + dplyr::mutate(contigType = type) + }) %>% purrr::reduce(autoFullJoin) %>% - dplyr::mutate(contigType = factor(contigType)) + dplyr::mutate( + contigType = factor(contigType, levels = c("BCR", "TCR")) + ) %>% + dplyr::relocate(contigType) } getContigDoubletBarcodes <- function(tcrOutput, bcrOutput) { - list(tcrOutput, bcrOutput) %>% - lapplyOnAll(function(df) unique(df$barcode)) %>% - lapply(purrr::list_flatten) %>% - purrr::reduce(intersect) + intersect( + dplyr::bind_rows(tcrOutput)$barcode, + dplyr::bind_rows(bcrOutput)$barcode + ) } lapplyOnAll <- function(listOfLists, fun) { diff --git a/man/getContigDoublets.Rd b/man/getContigDoublets.Rd index a884aa55..05887af3 100644 --- a/man/getContigDoublets.Rd +++ b/man/getContigDoublets.Rd @@ -15,8 +15,9 @@ BCR contig information, each dataframe must have a \code{barcode} column.} } \value{ A dataframe of barcodes that exist in both the TCR and BCR data, with -columns from both sets of data. There will be an additional column \code{contigType} -of type factor with levels 'TCR' and 'BCR' indicating the origin of the contig. +columns from both sets of data. There will be an additional column +\code{contigType} of type factor with levels 'TCR' and 'BCR' indicating the +origin of the contig - this will be the new first column. If there are no doublets, the returned data.frame will have the same colnames but no rows. diff --git a/tests/testthat/test-alluvialClones.R b/tests/testthat/test-alluvialClones.R index 42584b76..0fc63c21 100644 --- a/tests/testthat/test-alluvialClones.R +++ b/tests/testthat/test-alluvialClones.R @@ -32,21 +32,21 @@ test_that("alluvialClones works", { color = NULL) ) - expect_doppelganger( + suppressWarnings(expect_doppelganger( "alluvialClones_alpha_plot", - suppressWarnings(alluvialClones(test_obj, + alluvialClones(test_obj, cloneCall = "aa", y.axes = c("Type", "ident"), alpha = "Patient")) ) - expect_doppelganger( + suppressWarnings(expect_doppelganger( "alluvialClones_alphapluscolor_plot", alluvialClones(test_obj, cloneCall = "aa", y.axes = c("Type", "ident"), alpha = "Patient", - color = "Type") + color = "Type")) ) diff --git a/tests/testthat/test-getContigDoublets.R b/tests/testthat/test-getContigDoublets.R index aefc3ea2..48c385fe 100644 --- a/tests/testthat/test-getContigDoublets.R +++ b/tests/testthat/test-getContigDoublets.R @@ -1,42 +1,77 @@ -test_that("getContigDoublets works for no doublets", { +# testcases are NOT comprehensive - tcr <- getdata("combineContigs", "combineTCR_list_expected") +getTestTcrList <- function() { + getdata("combineContigs", "combined")[1:3] +} - # create a BCR list from testdata with no doublets +getTestBcrListNoDoublets <- function() { bcr <- getdata("combineContigs", "combineBCR_list_expected") bcr <- list(bcr[[1]][1:10, ], bcr[[1]][20:30, ], bcr[[1]][100:110, ]) - names(bcr) <- names(tcr) + names(bcr) <- names(getTestTcrList()) + purrr::imap(bcr, function(df, sampleName) { + df$sample <- sampleName + df + }) +} + +getTestBcrListWithDoublets <- function(doubletsPerSample, seed = 42) { + if (!is.null(seed)) withr::local_seed(seed) + purrr:::map2( + getTestBcrListNoDoublets(), getTestTcrList(), + makeRandomBcrBarcodesMatchTcr, n = doubletsPerSample + ) +} + +makeRandomBcrBarcodesMatchTcr <- function(bcrDf, tcrDf, n) { + + sampleUniqueBarcodeDf <- function(contigDf) { + contigDf %>% + dplyr::select(barcode) %>% + dplyr::distinct() %>% + dplyr::slice_sample(n = min(n, nrow(.))) + } + + bcrToTcrBarcodeMap <- sampleUniqueBarcodeDf(bcrDf) %>% + dplyr::mutate(tcrBarcode = sampleUniqueBarcodeDf(tcrDf)$barcode) + + bcrDf %>% + dplyr::full_join(bcrToTcrBarcodeMap, by = "barcode") %>% + dplyr::mutate( + barcode = ifelse(is.na(tcrBarcode), barcode, tcrBarcode) + ) %>% + dplyr::select(-tcrBarcode) +} + +test_that("getContigDoublets works for no doublets", { expected_no_doublet_output <- structure( list( + contigType = structure( + integer(0), levels = c("BCR", "TCR"), class = "factor" + ), barcode = character(0), sample = character(0), TCR1 = character(0), cdr3_aa1 = character(0), cdr3_nt1 = character(0), TCR2 = character(0), cdr3_aa2 = character(0), cdr3_nt2 = character(0), CTgene = character(0), CTnt = character(0), CTaa = character(0), - CTstrict = character(0), IGH = character(0), IGLC = character(0), - contigType = structure( - integer(0), levels = c("BCR", "TCR"), class = "factor" - ) + CTstrict = character(0), IGH = character(0), IGLC = character(0) ), row.names = integer(0), class = "data.frame" ) - expect_identical(getContigDoublets(tcr, bcr), expected_no_doublet_output) + expect_identical( + getContigDoublets(getTestTcrList(), getTestBcrListNoDoublets()), + expected_no_doublet_output + ) }) test_that("getContigDoublets works for inputs with doublets", { - tcr <- getdata("combineContigs", "combineTCR_list_expected") - - # create a BCR list from testdata with doublets - bcr <- getdata("combineContigs", "combineBCR_list_expected") - bcr <- list(bcr[[1]][1:10, ], bcr[[1]][20:30, ], bcr[[1]][100:110, ]) - names(bcr) <- names(tcr) + NUM_DOUBLETS_PER_SAMPLE <- 3 + tcr <- getTestTcrList() + bcr <- getTestBcrListWithDoublets(NUM_DOUBLETS_PER_SAMPLE) - # UNFINISHED - # TODO purposely introduce doublets into testing data - # TODO test expected output - expect_equal(2 * 2, 4) + expect_true(nrow(getContigDoublets(tcr, bcr)) > 0) + # TODO }) From 49583a8b7f120f2d4cf3a4d566b2f1c3256e10e8 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Mon, 14 Oct 2024 16:02:19 -0700 Subject: [PATCH 11/16] update wordlist --- inst/WORDLIST | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index 40858001..7b94b609 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,10 +8,16 @@ Autoencoder BCR BCRs BD +BLOSUM +BPPARAM Barcode BioC +BiocParallel +Biotechnologies CAERGSGGSYIPTF CARKVRDSSYKLIF +CAS +CASF CASSDPSGRQGPRWDTQYF CASSDSGYNEQFF CASSVRRERANTGELFF @@ -36,9 +42,13 @@ Contig Contigs Corvino Drs +Evercode +FASGAI FilteringMulti +Florian Francesco GEX +GIANA GSE GSEA GSEABase @@ -50,16 +60,20 @@ Hyperexpanded IGH IGKJ IGL +IMGT Ig +ImMunoGeneTics Immcantation Inv JC JSON Jaccard KF +Keras Kidera Levenshtein Liangtao +MSWHIM Massimo Mazziotta McPAS @@ -74,10 +88,12 @@ OHE Omniscope PIRD PMID +PWM ParseBio Pielou Preprocessed ProjectTIL +ProtFP Readded Rebasing Rebumping @@ -103,6 +119,7 @@ TRBV TRD TRG TUST +TenX Trex UCell UMAP @@ -122,12 +139,16 @@ Zheng aa abundanceContig addVariable +adjacencyMatrix airr al +allelic alluvialClones alluvialClonotype alluvialClonotypes +assertthat asy +atchleyFactors autoencoder autoencoding barcode @@ -181,12 +202,16 @@ comparators compareClonotype compareClonotypes complementarity +constructConDFAndparseTCR contig contigs convolutional +cpp createHTOContigList +crucianiProperties csv customizable +de densityEnrichment dev df @@ -194,6 +219,7 @@ dir doubletons downsample downsampling +dplyr epitope epitopes erroring @@ -206,11 +232,15 @@ exportTable filterMulti filterNonproductive filteringMulti +formatGenes formated frac gd +generateSequences +geometricEncoder getCirclize getCoord +getIMGT geyserEnrichment ggalluvial ggdendrogram @@ -232,13 +262,19 @@ hypermutation iNEXT iedb igraph +imgt +immApex incongruent +inferCDR +inframe ingle inv io +isometry jaccard jk json +kideraFactors kmer kmers lapply @@ -258,15 +294,18 @@ migr migra morisita multisystem +mutateSequences nFeature na nalysis +novo nrichment nt nucleotides occupiedClonotype occupiedRepertoire occupiedscRepertoire +onehotEncoder parseAIRR parseBCR parseTCR @@ -279,9 +318,10 @@ performNormalization positionalEntropy positionalProperty powerTCR +probabilityMatrix +propertyEncoder quantContig rda -rder readRDS reclustering regressClonotype @@ -302,6 +342,7 @@ scatterClonotype scatterClonotypes scatterEnrichment screp +sequenceDecoder seurat shannon simpson @@ -309,6 +350,7 @@ splitEnrichment ssGSEA stScales startrac +startstop str stripBarcodes subsetClones @@ -316,8 +358,11 @@ subtype subtypes summarise tScales +tensorflow testthat theCall +tokenizeSequences +tokenizing trackable tran transcriptomic @@ -325,8 +370,10 @@ trex tsv unreturned variational +variationalSequences viridis visVgene vizGene vizGenes vizVgene +zScales From 20a323cea203ff6d66514e2e1600fb37267d1369 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Wed, 16 Oct 2024 21:07:07 -0700 Subject: [PATCH 12/16] sync with dev --- NEWS.md | 1 + R/positionalProperty.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 1a683b3d..efbd9e02 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,7 @@ * Fixed issue with single chain output for ```clonalLength()``` * Removed unnecessary code remnant in ```clonalLength()``` * Allow one sample to be plotted by ```percentVJ()``` +* Fixed issue with ```positionalProperty()``` and exportTable # scRepertoire VERSION 2.0.7 diff --git a/R/positionalProperty.R b/R/positionalProperty.R index 49d43e6a..79949038 100644 --- a/R/positionalProperty.R +++ b/R/positionalProperty.R @@ -154,7 +154,7 @@ positionalProperty <- function(input.data, theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) if (exportTable == TRUE) { - return(mat_melt) + return(mat) } return(plot) From f58f9096f9e1324163653ecb5f3fd621df8f7c63 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sat, 19 Oct 2024 13:22:59 -0700 Subject: [PATCH 13/16] test sync --- tests/testthat/test-getContigDoublets.R | 29 +++++++++++-------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/tests/testthat/test-getContigDoublets.R b/tests/testthat/test-getContigDoublets.R index 48c385fe..b30f543a 100644 --- a/tests/testthat/test-getContigDoublets.R +++ b/tests/testthat/test-getContigDoublets.R @@ -23,25 +23,22 @@ getTestBcrListWithDoublets <- function(doubletsPerSample, seed = 42) { } makeRandomBcrBarcodesMatchTcr <- function(bcrDf, tcrDf, n) { - - sampleUniqueBarcodeDf <- function(contigDf) { - contigDf %>% - dplyr::select(barcode) %>% - dplyr::distinct() %>% - dplyr::slice_sample(n = min(n, nrow(.))) - } - - bcrToTcrBarcodeMap <- sampleUniqueBarcodeDf(bcrDf) %>% - dplyr::mutate(tcrBarcode = sampleUniqueBarcodeDf(tcrDf)$barcode) - - bcrDf %>% - dplyr::full_join(bcrToTcrBarcodeMap, by = "barcode") %>% + sampleUniqueBarcodeDf(bcrDf, n) %>% + dplyr::mutate(tcrBarcode = sampleUniqueBarcodeDf(tcrDf, n)$barcode) %>% + dplyr::full_join(bcrDf, by = "barcode") %>% dplyr::mutate( barcode = ifelse(is.na(tcrBarcode), barcode, tcrBarcode) ) %>% dplyr::select(-tcrBarcode) } +sampleUniqueBarcodeDf <- function(contigDf, n) { + contigDf %>% + dplyr::select(barcode) %>% + dplyr::distinct() %>% + dplyr::slice_sample(n = n) +} + test_that("getContigDoublets works for no doublets", { expected_no_doublet_output <- structure( @@ -68,10 +65,10 @@ test_that("getContigDoublets works for no doublets", { test_that("getContigDoublets works for inputs with doublets", { - NUM_DOUBLETS_PER_SAMPLE <- 3 + NUM_UNIQUE_DOUBLETS_PER_SAMPLE <- 3 tcr <- getTestTcrList() - bcr <- getTestBcrListWithDoublets(NUM_DOUBLETS_PER_SAMPLE) + bcr <- getTestBcrListWithDoublets(NUM_UNIQUE_DOUBLETS_PER_SAMPLE) - expect_true(nrow(getContigDoublets(tcr, bcr)) > 0) + expect_true(nrow(getContigDoublets(tcr, bcr)) >= NUM_UNIQUE_DOUBLETS_PER_SAMPLE * length(tcr)) # TODO }) From 1d490c5d2a2925b873de4d80d14eadf55fe04b54 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 20 Oct 2024 14:24:44 -0700 Subject: [PATCH 14/16] re-sync ImmApex vignette --- vignettes/articles/immApex.Rmd | 53 ++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/vignettes/articles/immApex.Rmd b/vignettes/articles/immApex.Rmd index 6464590f..12b5015d 100644 --- a/vignettes/articles/immApex.Rmd +++ b/vignettes/articles/immApex.Rmd @@ -42,19 +42,30 @@ suppressMessages(library(keras)) suppressMessages(library(ggplot2)) suppressMessages(library(viridis)) suppressMessages(library(dplyr)) +set.seed(42) ``` # Getting Started -**immApex** is meant to serve as an API for deep-learning models based on immune receptor sequencing. These functions extract or generate amino acid or nucleotide sequences and prepare them for deep learning tasks through [Keras](https://tensorflow.rstudio.com/guides/keras/basics). **immApex** is the underlying structure for the BCR models in [Ibex](https://github.com/ncborcherding/Ibex) and TCR models in [Trex](https://github.com/ncborcherding/Trex). It should be noted, although the tools here are created for immune receptor sequences, they will work more generally for nucleotide or amino acid sequences. +**immApex** is meant to serve as an API for deep-learning models based on immune receptor sequencing. These functions extract or generate amino acid or nucleotide sequences and prepare them for deep learning tasks through [Keras](https://tensorflow.rstudio.com/guides/keras/basics). **immApex** is the underlying structure for the BCR models in [Ibex](https://github.com/ncborcherding/Ibex) and TCR models in [Trex](https://github.com/ncborcherding/Trex). It should be noted that the tools here are created for immune receptor sequences; they will work more generally for nucleotide or amino acid sequences. The package itself supports AIRR, Adaptive, and 10x formats and interacts with the **scRepertoire** R package. More information is available at the [immApex GitHub Repo](https://github.com/ncborcherding/immApex). +## Loading Libraries + +```{r} +suppressMessages(library(immApex)) +suppressMessages(library(keras)) +suppressMessages(library(ggplot2)) +suppressMessages(library(viridis)) +suppressMessages(library(dplyr)) +``` + # Getting and Manipulating Sequences ## generateSequences -Generating synthetic sequences is a quick way to start testing the model code. ```generateSequences()``` can also generate realistic noise for generative adversarial networks. +Generating synthetic sequences is a quick way to start testing the model code. ```generateSequences()``` can also generate realistic noise for generative adversarial networks. Parameters for ```generateSequences()``` @@ -74,7 +85,7 @@ sequences <- generateSequences(prefix.motif = "CAS", head(sequences) ``` -If we want to generate nucleotide sequences instead of amino acids, we just need to change the **sequence.dictionary**. +If we want to generate nucleotide sequences instead of amino acids, we must to change the **sequence.dictionary**. ```{r tidy = FALSE} nucleotide.sequences <- generateSequences(number.of.sequences = 1000, @@ -119,9 +130,10 @@ variational.sequences <- variationalSequences(sequences, call.threshold = 0.1) head(variational.sequences) ``` + ## mutateSequences -A common approach is to mutate sequences randomly or at specific intervals. This can be particularly helpful if we have fewer sequences or want to test a model for accuracy given new, altered sequences. mutateSequences() allows us to tune the type of mutation, where along the sequences to introduce the mutation and the overall number of mutations. +A common approach is to mutate sequences randomly or at specific intervals. This can be particularly helpful if we have fewer sequences or want to test a model for accuracy given new, altered sequences. ```mutateSequences()``` allows us to tune the type of mutation, where along the sequences to introduce the mutation and the overall number of mutations. Parameters for ```mutateSequences()``` @@ -149,7 +161,7 @@ Parameters for ```formatGenes()``` * **input.data** Data frame of sequencing data or scRepertoire outputs * **region** Sequence gene loci to access - 'v', 'd', 'j', 'c' or a combination using c('v', 'd', 'j') -* **technology** The sequencing technology employed - 'TenX', "Adaptive', 'AIRR', or 'Omniscope'. +* **technology** The sequencing technology employed - 'TenX', "Adaptive', or 'AIRR' * **species** One or two word designation of species. Currently supporting: "human", "mouse", "rat", "rabbit", "rhesus monkey", "sheep", "pig", "platypus", "alpaca", "dog", "chicken", and "ferret" * **simplify.format** If applicable, remove the allelic designation (TRUE) or retain all information (FALSE) @@ -166,7 +178,7 @@ head(Adaptive_example[,c("aminoAcid","vGeneName", "v_IMGT", "v_IMGT.check")]) ## getIMGT -Depending on the sequencing technology and the version, we might want to expand the length of our sequence embedding approach. The first step in the process is pulling the reference sequences from the ImMunoGeneTics (IMGT) system using ```getIMGT()```. More information for IMGT can be found at [imgt.org](https://www.imgt.org/). +Depending on the sequencing technology and the version, we might want to expand the length of our sequence embedding approach. The first step in the process is pulling the reference sequences from the ImMunoGeneTics (IMGT) system using ```getIMGT()```. More information for IMGT can be found at [imgt.org](https://www.imgt.org/). Data from IMGT is under a CC BY-NC-ND 4.0 license. Please be aware that attribution is required for usage and should not be used to create commercial or derivative work. Parameters for ```getIMGT()``` @@ -196,7 +208,7 @@ Parameters for ```inferCDR``` * **input.data** Data frame of sequencing data or output from formatGenes(). * **reference** IMGT sequences from ```getIMGT()``` -* **technology** The sequencing technology employed - 'TenX', "Adaptive', 'AIRR', or 'Omniscope' +* **technology** The sequencing technology employed - 'TenX', "Adaptive', or 'AIRR', * **sequence.type** Type of sequence - "aa" for amino acid or "nt" for nucleotide * **sequences** The specific regions of the CDR loop to get from the data. @@ -280,7 +292,7 @@ head(median.property.matrix[,1:3]) ## geometricEncoder -One approach to encode amino acid sequences is geometric isometry, such as [GIANA](https://pubmed.ncbi.nlm.nih.gov/34349111/). +One approach to encoding amino acid sequences is geometric isometry, such as [GIANA](https://pubmed.ncbi.nlm.nih.gov/34349111/). Parameters for ```geometricEncoder()``` @@ -296,7 +308,7 @@ head(geometric.matrix) ## tokenizeSequences -Another approach to transforming a sequence into numerical values is tokenizing it into numbers. This is a common approach for recurrent neural networks where one letter corresponds to a single integer. In addition, we can add a start and stop tokens to our original sequences to differentiate between the beginning and end of the sequences. +Another approach to transforming a sequence into numerical values is tokenizing it into numbers. This is a common approach for recurrent neural networks where one letter corresponds to a single integer. In addition, we can add start and stop tokens to our original sequences to differentiate between the beginning and end of the sequences. Parameters for ```tokenizeSequences()``` @@ -352,7 +364,7 @@ adj.matrix ## sequenceDecoder -We have a function called ```sequenceDecoder()``` that extracts sequences from one-hot or property-encoded matrices or arrays. This function can be applied to any generative approach to sequence generation. +We have a function called ```sequenceDecoder()``` that extracts sequences from one-hot or property-encoded matrices or arrays. This function can be applied to any generative approach to sequence generation. Parameters for ```sequenceDecoder()``` @@ -400,7 +412,7 @@ The steps to train the model include: 4. Fitting the model ```{r tidy = FALSE} -#Sampling to make Training/Valid Data +#Sampling to make Training/Validation Data Cohorts set.seed(42) num_sequences <- nrow(sequence.matrix) indices <- 1:num_sequences @@ -418,11 +430,11 @@ encoding_dim <- 40 hidden_dim1 <- 256 # Hidden layer 1 size hidden_dim2 <- 128 # Hidden layer 2 size -es = callback_early_stopping(monitor = "val_loss", - min_delta = 0, - patience = 4, - verbose = 1, - mode = "min") +es <- callback_early_stopping(monitor = "val_loss", + min_delta = 0, + patience = 4, + verbose = 1, + mode = "min") # Define the Model input_seq <- layer_input(shape = c(input_shape)) @@ -472,7 +484,7 @@ plot(history) + We can also build classifiers directly using deep or shallow neural networks. Building deep classifiers requires more data than classical machine learning methods, like random forests, so the vignette may not be ideal. -The first step is to generate distinct types of sequences using ```generateSequences()``` and ```onehotEncoder()``` to prepare the data for the model. +The first step is to generate distinct types of sequences using ```generateSequences()``` and ```onehotEncoder()``` to prepare the data for the model. ```{r tidy = FALSE} class1.sequences <- generateSequences(prefix.motif = "CAS", @@ -512,7 +524,7 @@ classifier.model %>% compile( metrics = c("accuracy") ) -#Seperating data and labels +#Separating data and labels set.seed(42) val_indices <- sample(nrow(classifier.matrix), 10000*0.2) x_val <- classifier.matrix[val_indices,] @@ -541,11 +553,10 @@ Here, we can achieve a validation accuracy of 98.25%, which is impressive. But t *** # Conclusion -This has been a general overview of the capabilities of immApex for processing immune receptor sequences and making deep learning models. If you have any questions, comments, or suggestions, feel free to visit the [GitHub repository](https://github.com/ncborcherding/immApex). +This has been a general overview of the capabilities of **immApex** for processing immune receptor sequences and making deep learning models. If you have any questions, comments, or suggestions, feel free to visit the [GitHub repository](https://github.com/ncborcherding/immApex). ## Session Info ```{r} sessionInfo() -``` - +``` \ No newline at end of file From 900f5b2fbf25ee8d7056d3ca62370155cf142c8c Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 20 Oct 2024 15:25:34 -0700 Subject: [PATCH 15/16] improve getContigDoublets tests --- tests/testthat/test-getContigDoublets.R | 71 ++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-getContigDoublets.R b/tests/testthat/test-getContigDoublets.R index b30f543a..75130863 100644 --- a/tests/testthat/test-getContigDoublets.R +++ b/tests/testthat/test-getContigDoublets.R @@ -4,6 +4,8 @@ getTestTcrList <- function() { getdata("combineContigs", "combined")[1:3] } +#' Generate a [combineBCR()] output example with matching sample names +#' as the output of [getTestTcrList()] above and no doublets. getTestBcrListNoDoublets <- function() { bcr <- getdata("combineContigs", "combineBCR_list_expected") bcr <- list(bcr[[1]][1:10, ], bcr[[1]][20:30, ], bcr[[1]][100:110, ]) @@ -18,13 +20,16 @@ getTestBcrListWithDoublets <- function(doubletsPerSample, seed = 42) { if (!is.null(seed)) withr::local_seed(seed) purrr:::map2( getTestBcrListNoDoublets(), getTestTcrList(), - makeRandomBcrBarcodesMatchTcr, n = doubletsPerSample + makeRandomBcrBarcodesMatchTcr, + n = doubletsPerSample ) } makeRandomBcrBarcodesMatchTcr <- function(bcrDf, tcrDf, n) { sampleUniqueBarcodeDf(bcrDf, n) %>% - dplyr::mutate(tcrBarcode = sampleUniqueBarcodeDf(tcrDf, n)$barcode) %>% + dplyr::mutate( + tcrBarcode = sampleUniqueBarcodeDf(tcrDf, n, asDf = FALSE) + ) %>% dplyr::full_join(bcrDf, by = "barcode") %>% dplyr::mutate( barcode = ifelse(is.na(tcrBarcode), barcode, tcrBarcode) @@ -32,11 +37,12 @@ makeRandomBcrBarcodesMatchTcr <- function(bcrDf, tcrDf, n) { dplyr::select(-tcrBarcode) } -sampleUniqueBarcodeDf <- function(contigDf, n) { +sampleUniqueBarcodeDf <- function(contigDf, n, asDf = TRUE) { contigDf %>% dplyr::select(barcode) %>% dplyr::distinct() %>% - dplyr::slice_sample(n = n) + dplyr::slice_sample(n = n) %>% + (if (asDf) identity else function(x) x$barcode) } test_that("getContigDoublets works for no doublets", { @@ -69,6 +75,59 @@ test_that("getContigDoublets works for inputs with doublets", { tcr <- getTestTcrList() bcr <- getTestBcrListWithDoublets(NUM_UNIQUE_DOUBLETS_PER_SAMPLE) - expect_true(nrow(getContigDoublets(tcr, bcr)) >= NUM_UNIQUE_DOUBLETS_PER_SAMPLE * length(tcr)) - # TODO + doubletDf <- getContigDoublets(tcr, bcr) + + expect_equal( + nrow(doubletDf), + NUM_UNIQUE_DOUBLETS_PER_SAMPLE * length(tcr) * 2 + ) + + expect_identical( + colnames(doubletDf), + c("contigType", "barcode", "sample", "TCR1", "cdr3_aa1", "cdr3_nt1", + "TCR2", "cdr3_aa2", "cdr3_nt2", "CTgene", "CTnt", "CTaa", "CTstrict", + "IGH", "IGLC") + ) + + getBarcodeSampleForContigType <- function(contigType) { + doubletDf %>% + dplyr::filter(contigType == contigType) %>% + dplyr::select(barcode, sample) %>% + dplyr::arrange(barcode, sample) + } + + expect_identical( + getBarcodeSampleForContigType("BCR"), + getBarcodeSampleForContigType("TCR") + ) + + makeCharNaDf <- function(dfColnames, nrow) { + matrix(nrow = nrow, ncol = length(dfColnames)) %>% + data.frame() %>% + (function(df) { + colnames(df) <- dfColnames + df + }) %>% + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) + } + + expect_identical( + doubletDf %>% + dplyr::filter(contigType == "TCR") %>% + dplyr::select(IGH, IGLC), + makeCharNaDf( + c("IGH", "IGLC"), NUM_UNIQUE_DOUBLETS_PER_SAMPLE * length(tcr) + ) + ) + + expect_identical( + doubletDf %>% + dplyr::filter(contigType == "BCR") %>% + dplyr::select(TCR1, TCR2), + makeCharNaDf( + c("TCR1", "TCR2"), + NUM_UNIQUE_DOUBLETS_PER_SAMPLE * length(bcr) + ) + ) + }) From 855ec261c05ef73fee76d9e38293751019d8212b Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Sun, 20 Oct 2024 15:26:27 -0700 Subject: [PATCH 16/16] merge NEWS.md section --- NEWS.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index efbd9e02..5d431e9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,20 +1,16 @@ -# scRepertoire VERSION 2.0.8.9000 (development version) +# scRepertoire VERSION 2.0.8 ## NEW FEATURES * Added ```getContigDoublets()``` experimental function to identify TCR and BCR doublets as a preprocessing step to ```combineExpression()``` -## UNDERLYING CHANGES -* convert documentation to use markdown (`roxygen2md`) -* import `lifecycle`, `purrr`, `withr` -* suppressed "using discrete variable for alpha is not recommended" warning in alluvialClones unit tests. - -# scRepertoire VERSION 2.0.8 - ## UNDERLYING CHANGES * Fixed issue with single chain output for ```clonalLength()``` * Removed unnecessary code remnant in ```clonalLength()``` * Allow one sample to be plotted by ```percentVJ()``` * Fixed issue with ```positionalProperty()``` and exportTable +* convert documentation to use markdown (`roxygen2md`) +* import `lifecycle`, `purrr`, `withr` +* suppressed "using discrete variable for alpha is not recommended" warning in alluvialClones unit tests. # scRepertoire VERSION 2.0.7