From 731b6606d85c1f06b613ad255e4d5ccae1a0ac1f Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Sun, 30 Oct 2022 08:31:14 -0600 Subject: [PATCH 1/9] Fixes for bioc - update description description, version, biocviews, and lazydata - slot accessors in vignettes --- DESCRIPTION | 20 ++++++++++++-------- NEWS.md | 4 ++-- vignettes/abundance.Rmd | 3 ++- vignettes/import_vdj.Rmd | 9 ++++++--- vignettes/mutate.Rmd | 15 ++++++++++----- 5 files changed, 32 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 63b40b72..a01cf3db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,17 @@ Package: djvdj Title: A collection of single-cell V(D)J tools -Version: 0.0.0.9000 +Version: 0.99.0 Authors@R: c( person("Ryan", "Sheridan", , "ryan.sheridan@cuanschutz.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4012-3147")), - person("Jay", "Hesselberth", , "jay.hesselberth@gmail.com", role = "ctb"), + person("Jay", "Hesselberth", , "jay.hesselberth@cuanschutz.com", role = "ctb"), person("Rui", "Fu", , "raysinensis@gmail.com", role = "ctb"), - person("Kent", "Riemondy", , "kent.riemondy@ucdenver.edu", role = "ctb"), - person("RNA Bioscience Initiative", role = "fnd") + person("Kent", "Riemondy", , "kent.riemondy@cuanschutz.edu", role = "ctb"), + person("RNA Bioscience Initiative", role = c("fnd", "cph")) ) -Description: djvdj provides a range of tools to analyze single-cell V(D)J - data. +Description: djvdj provides import, manipulation, analysis, and plotting tools + for analyzing single-cell V(D)J gene expression data, integrating with both + Seurat and SigleCellExperiment-based workflows. License: MIT + file LICENSE URL: https://rnabioco.github.io/djvdj/, https://github.com/rnabioco/djvdj/ BugReports: https://github.com/rnabioco/djvdj/issues @@ -29,6 +30,8 @@ Imports: ggrepel, ggseqlogo, glue, + graphics, + grid, igraph (>= 1.3.0), iNEXT, MASS, @@ -44,6 +47,7 @@ Imports: stringr, tibble, tidyr, + utils, uwot Suggests: covr, @@ -59,8 +63,8 @@ VignetteBuilder: knitr Remotes: tidyverse/magrittr -Config/Needs/website:pkgdown, rnabioco/rbitemplate +Config/Needs/website: pkgdown, rnabioco/rbitemplate +biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology Encoding: UTF-8 -LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.1 diff --git a/NEWS.md b/NEWS.md index e4362e93..111e272f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,3 @@ -# djvdj 0.0.0.9000 +# djvdj 0.99.0 -* Added a `NEWS.md` file to track changes to the package. +* Initial release. diff --git a/vignettes/abundance.Rmd b/vignettes/abundance.Rmd index 7d19e201..776284ee 100644 --- a/vignettes/abundance.Rmd +++ b/vignettes/abundance.Rmd @@ -81,7 +81,8 @@ so_vdj <- so %>% When `cluster_col` is specified, an additional meta.data column ('shared') will be added indicating whether the clonotype is shared between multiple clusters. ```{r} -so_vdj@meta.data %>% +# or use `so_vdj@meta.data` +slot(so_vdj, 'meta.data') %>% head(2) ``` diff --git a/vignettes/import_vdj.Rmd b/vignettes/import_vdj.Rmd index 9cb3a7fe..eca5950b 100644 --- a/vignettes/import_vdj.Rmd +++ b/vignettes/import_vdj.Rmd @@ -60,7 +60,8 @@ so_vdj <- so %>% `import_vdj()` adds a variety of per-chain metrics to the object meta.data. Information for each chain identified for the cell is separated by a semicolon. The separator used for storing and parsing per-chain V(D)J data can be specified using the `sep` argument included for most djvdj functions. `NA`s will be included for cells that lack V(D)J data. ```{r} -so_vdj@meta.data %>% +# or use `so_vdj@meta.data` +slot(so_vdj, 'meta.data') %>% head(2) ``` @@ -206,7 +207,8 @@ so_vdj <- so %>% The additional columns added to the meta.data will include the number of insertions, deletions, and mismatches (ending in 'ins', 'del', or 'mis') for each V(D)J segment (prefixed with 'v', 'd', 'j', or 'c'). Columns containing junction information will be prefixed with either 'vd' or 'dj'. Columns ending in 'freq' show the event frequency which is calculated as the number of events divided by the length of the region. ```{r} -so_vdj@meta.data %>% +# or use `so_vdj@meta.data` +slot(so_vdj, 'meta.data') %>% head(2) ``` @@ -235,7 +237,8 @@ so_vdj <- so %>% This results in two sets of new columns being added to the meta.data. When performing downstream analysis using other djvdj functions, be sure to specify the correct columns, i.e. 'bcr_clonotype_id' or 'tcr_clonotype_id'. ```{r} -so_vdj@meta.data %>% +# or use `so_vdj@meta.data` +slot(so_vdj, 'meta.data') %>% head(3) ``` diff --git a/vignettes/mutate.Rmd b/vignettes/mutate.Rmd index a703dece..aeef4d0c 100644 --- a/vignettes/mutate.Rmd +++ b/vignettes/mutate.Rmd @@ -73,7 +73,8 @@ res <- so %>% all(c("IGH", "IGK", "IGL") %in% chains) ) -res@meta.data %>% +# or use `res@meta.data` +slot(res, 'meta.data') %>% filter(!is.na(clonotype_id)) %>% select(chains, cdr3) %>% head(3) @@ -85,7 +86,8 @@ In this example we are removing V(D)J data for all chains except IGH. res <- so %>% filter_vdj(chains == "IGH") -res@meta.data %>% +# or use `res@meta.data` +slot(res, 'meta.data') %>% filter(!is.na(clonotype_id)) %>% select(chains, cdr3) %>% head(3) @@ -105,7 +107,8 @@ res <- so %>% col_names = "median_{.col}" ) -res@meta.data %>% +# or use `res@meta.data` +slot(res, 'meta.data') %>% select(all_del, all_ins, median_all_del, median_all_ins) %>% head(2) ``` @@ -120,7 +123,8 @@ res <- so %>% col_names = "unique_chains" ) -res@meta.data %>% +# or use `res@meta.data` +slot(res, 'meta.data') %>% filter(n_chains > 2) %>% select(chains, unique_chains) %>% head(2) @@ -140,7 +144,8 @@ res <- so %>% total_indels = sum(all_ins, all_del) ) -res@meta.data %>% +# or use `res@meta.data` +slot(res, 'meta.data') %>% select(all_ins, all_del, total_indels) %>% head() ``` From f4d4b7a18394c3aa2cbb49127e4070fcac0a3a6c Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 15:34:00 -0600 Subject: [PATCH 2/9] more fixes for bioccheck --- .Rbuildignore | 2 ++ DESCRIPTION | 8 ++++---- R/calc-diversity.R | 8 ++++---- man/calc_diversity.Rd | 8 ++++---- man/djvdj-package.Rd | 8 ++++---- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index c6056136..ef18855e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,5 @@ ^codecov\.yml$ ^vignettes/$ ^vignettes/avid\.Rmd$ +splenocytes.zip +splenocytes diff --git a/DESCRIPTION b/DESCRIPTION index a01cf3db..c31d4ba2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ License: MIT + file LICENSE URL: https://rnabioco.github.io/djvdj/, https://github.com/rnabioco/djvdj/ BugReports: https://github.com/rnabioco/djvdj/issues Depends: - R (>= 4.0.0) + R (>= 4.2.0) Imports: abdiv, Biostrings, @@ -61,9 +61,9 @@ Suggests: testthat VignetteBuilder: knitr -Remotes: - tidyverse/magrittr -Config/Needs/website: pkgdown, rnabioco/rbitemplate +Config/Needs/website: + pkgdown, + rnabioco/rbitemplate biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/R/calc-diversity.R b/R/calc-diversity.R index e2c26f35..a42c1d04 100644 --- a/R/calc-diversity.R +++ b/R/calc-diversity.R @@ -34,7 +34,7 @@ #' method = abdiv::simpson #' ) #' -#' head(res@meta.data, 1) +#' head(slot(res, 'meta.data'), 1) #' #' # Group cells based on meta.data column before calculating diversity #' res <- calc_diversity( @@ -43,7 +43,7 @@ #' cluster_col = "orig.ident" #' ) #' -#' head(res@colData, 1) +#' head(slot(res, 'colData'), 1) #' #' # Add a prefix to the new columns #' # this is useful if multiple diversity calculations are stored in the @@ -54,7 +54,7 @@ #' prefix = "bcr_" #' ) #' -#' head(res@meta.data, 1) +#' head(slot(res, 'meta.data'), 1) #' #' # Calculate multiple metrics #' res <- calc_diversity( @@ -66,7 +66,7 @@ #' ) #' ) #' -#' head(res@colData, 1) +#' head(slot(res, 'colData'), 1) #' #' # Return a data.frame instead of adding the results to the input object #' res <- calc_diversity( diff --git a/man/calc_diversity.Rd b/man/calc_diversity.Rd index 583abcbb..d01b7b66 100644 --- a/man/calc_diversity.Rd +++ b/man/calc_diversity.Rd @@ -64,7 +64,7 @@ res <- calc_diversity( method = abdiv::simpson ) -head(res@meta.data, 1) +head(slot(res, 'meta.data'), 1) # Group cells based on meta.data column before calculating diversity res <- calc_diversity( @@ -73,7 +73,7 @@ res <- calc_diversity( cluster_col = "orig.ident" ) -head(res@colData, 1) +head(slot(res, 'colData'), 1) # Add a prefix to the new columns # this is useful if multiple diversity calculations are stored in the @@ -84,7 +84,7 @@ res <- calc_diversity( prefix = "bcr_" ) -head(res@meta.data, 1) +head(slot(res, 'meta.data'), 1) # Calculate multiple metrics res <- calc_diversity( @@ -96,7 +96,7 @@ res <- calc_diversity( ) ) -head(res@colData, 1) +head(slot(res, 'colData'), 1) # Return a data.frame instead of adding the results to the input object res <- calc_diversity( diff --git a/man/djvdj-package.Rd b/man/djvdj-package.Rd index 6e45a80b..5ad85b94 100644 --- a/man/djvdj-package.Rd +++ b/man/djvdj-package.Rd @@ -8,7 +8,7 @@ \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -djvdj provides a range of tools to analyze single-cell V(D)J data. +djvdj provides import, manipulation, analysis, and plotting tools for analyzing single-cell V(D)J gene expression data, integrating with both Seurat and SigleCellExperiment-based workflows. } \seealso{ Useful links: @@ -24,10 +24,10 @@ Useful links: Other contributors: \itemize{ - \item Jay Hesselberth \email{jay.hesselberth@gmail.com} [contributor] + \item Jay Hesselberth \email{jay.hesselberth@cuanschutz.com} [contributor] \item Rui Fu \email{raysinensis@gmail.com} [contributor] - \item Kent Riemondy \email{kent.riemondy@ucdenver.edu} [contributor] - \item RNA Bioscience Initiative [funder] + \item Kent Riemondy \email{kent.riemondy@cuanschutz.edu} [contributor] + \item RNA Bioscience Initiative [funder, copyright holder] } } From 1c4ce97ec4b4d6fb173791a58461cedc4e7712af Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 15:43:19 -0600 Subject: [PATCH 3/9] add collapsible session info to vignettes --- vignettes/abundance.Rmd | 9 +++++++++ vignettes/clustering.Rmd | 9 +++++++++ vignettes/diversity.Rmd | 9 +++++++++ vignettes/gene-usage.Rmd | 9 +++++++++ vignettes/import_vdj.Rmd | 9 +++++++++ vignettes/mutate.Rmd | 8 ++++++++ vignettes/plotting.Rmd | 15 ++++++++------- 7 files changed, 61 insertions(+), 7 deletions(-) diff --git a/vignettes/abundance.Rmd b/vignettes/abundance.Rmd index 776284ee..38ef3b3e 100644 --- a/vignettes/abundance.Rmd +++ b/vignettes/abundance.Rmd @@ -218,3 +218,12 @@ so %>% plot_colors = c(MD4 = "#fec44f", BL6 = "#3182bd") ) ``` + +
+ + Session info + +```{r} +sessionInfo() +``` +
diff --git a/vignettes/clustering.Rmd b/vignettes/clustering.Rmd index c03482f3..1c3bc66b 100644 --- a/vignettes/clustering.Rmd +++ b/vignettes/clustering.Rmd @@ -213,3 +213,12 @@ so_vdj %>% axis.ticks.x = element_blank() ) ``` + +
+ + Session info + +```{r} +sessionInfo() +``` +
diff --git a/vignettes/diversity.Rmd b/vignettes/diversity.Rmd index f4d2bcca..4ca3112a 100644 --- a/vignettes/diversity.Rmd +++ b/vignettes/diversity.Rmd @@ -194,3 +194,12 @@ so %>% n_boots = 0 ) ``` + +
+ + Session info + +```{r} +sessionInfo() +``` +
diff --git a/vignettes/gene-usage.Rmd b/vignettes/gene-usage.Rmd index 29d8cee9..895caf07 100644 --- a/vignettes/gene-usage.Rmd +++ b/vignettes/gene-usage.Rmd @@ -183,3 +183,12 @@ so %>% scale = TRUE ) ``` + +
+ + Session info + +```{r} +sessionInfo() +``` +
diff --git a/vignettes/import_vdj.Rmd b/vignettes/import_vdj.Rmd index eca5950b..6b05e26c 100644 --- a/vignettes/import_vdj.Rmd +++ b/vignettes/import_vdj.Rmd @@ -257,3 +257,12 @@ vdj_dirs <- c( # This will load V(D)J data and return a data.frame df_vdj <- import_vdj(vdj_dir = vdj_dirs) ``` + +
+ + Session info + +```{r} +sessionInfo() +``` +
diff --git a/vignettes/mutate.Rmd b/vignettes/mutate.Rmd index aeef4d0c..b16a68c7 100644 --- a/vignettes/mutate.Rmd +++ b/vignettes/mutate.Rmd @@ -150,3 +150,11 @@ slot(res, 'meta.data') %>% head() ``` +
+ + Session info + +```{r} +sessionInfo() +``` +
diff --git a/vignettes/plotting.Rmd b/vignettes/plotting.Rmd index 94f37446..f21fcaf9 100644 --- a/vignettes/plotting.Rmd +++ b/vignettes/plotting.Rmd @@ -148,10 +148,11 @@ so %>% ) ``` - - - - - - - +
+ + Session info + +```{r} +sessionInfo() +``` +
From d8d3041ac3975cd65df06baaab4f9590f63bc15e Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 16:02:35 -0600 Subject: [PATCH 4/9] explicity load data after LazyData: false --- DESCRIPTION | 3 ++- tests/testthat/test-calc-diversity.R | 3 +++ tests/testthat/test-calc-frequency.R | 3 +++ tests/testthat/test-calc-gene-usage.R | 3 +++ tests/testthat/test-calc-similarity.R | 3 +++ tests/testthat/test-cluster-seqs.R | 2 ++ tests/testthat/test-filter-vdj.R | 3 +++ tests/testthat/test-import-vdj.R | 3 +++ tests/testthat/test-utils.R | 3 +++ 9 files changed, 25 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c31d4ba2..3181f1ca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,7 +58,7 @@ Suggests: RColorBrewer, rmarkdown, roxygen2, - testthat + testthat (>= 3.0.0) VignetteBuilder: knitr Config/Needs/website: @@ -68,3 +68,4 @@ biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.1 +Config/testthat/edition: 3 diff --git a/tests/testthat/test-calc-diversity.R b/tests/testthat/test-calc-diversity.R index 33b84717..b2a83ccf 100644 --- a/tests/testthat/test-calc-diversity.R +++ b/tests/testthat/test-calc-diversity.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Test data df_1 <- vdj_so@meta.data diff --git a/tests/testthat/test-calc-frequency.R b/tests/testthat/test-calc-frequency.R index 15c760b4..48d718aa 100644 --- a/tests/testthat/test-calc-frequency.R +++ b/tests/testthat/test-calc-frequency.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Test data df_1 <- vdj_so@meta.data diff --git a/tests/testthat/test-calc-gene-usage.R b/tests/testthat/test-calc-gene-usage.R index 9dcc125d..566df5bf 100644 --- a/tests/testthat/test-calc-gene-usage.R +++ b/tests/testthat/test-calc-gene-usage.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Test data df_1 <- vdj_so@meta.data diff --git a/tests/testthat/test-calc-similarity.R b/tests/testthat/test-calc-similarity.R index fccbb5e0..ecbff94e 100644 --- a/tests/testthat/test-calc-similarity.R +++ b/tests/testthat/test-calc-similarity.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Test data df_1 <- vdj_so@meta.data diff --git a/tests/testthat/test-cluster-seqs.R b/tests/testthat/test-cluster-seqs.R index 2e4ea976..bc872a56 100644 --- a/tests/testthat/test-cluster-seqs.R +++ b/tests/testthat/test-cluster-seqs.R @@ -1,3 +1,5 @@ +data(vdj_so) +data(vdj_sce) test_cols <- c( "#E69F00", "#56B4E9", "#009E73", diff --git a/tests/testthat/test-filter-vdj.R b/tests/testthat/test-filter-vdj.R index 3036e3af..3e476070 100644 --- a/tests/testthat/test-filter-vdj.R +++ b/tests/testthat/test-filter-vdj.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Check NAs in result check_nas <- function(df_in) { diff --git a/tests/testthat/test-import-vdj.R b/tests/testthat/test-import-vdj.R index 9d97bb12..2f836f63 100644 --- a/tests/testthat/test-import-vdj.R +++ b/tests/testthat/test-import-vdj.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Test data ctigs <- c( system.file("extdata/bcr_1/outs", package = "djvdj"), diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 951985e7..c8b48f89 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,3 +1,6 @@ +data(vdj_so) +data(vdj_sce) + # Test data df_1 <- vdj_so@meta.data From 0cefeefb6ac79989b850486feae6288a151ce811 Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 16:06:12 -0600 Subject: [PATCH 5/9] move benchmarks to inst/bench --- .Rbuildignore | 1 + inst/bench/large-data.R | 69 ++++++++++++++++++++++++++++++++++++ tests/testthat.R | 78 +++++------------------------------------ 3 files changed, 78 insertions(+), 70 deletions(-) create mode 100644 inst/bench/large-data.R diff --git a/.Rbuildignore b/.Rbuildignore index ef18855e..cb4457b1 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^vignettes/avid\.Rmd$ splenocytes.zip splenocytes +^inst/bench$ diff --git a/inst/bench/large-data.R b/inst/bench/large-data.R new file mode 100644 index 00000000..c58919ea --- /dev/null +++ b/inst/bench/large-data.R @@ -0,0 +1,69 @@ +library(dplyr) +library(tidyr) +library(purrr) +library(tibble) +library(stringr) +library(SingleCellExperiment) +library(djvdj) + +### TESTING LARGE DATA ### +# +# # generating test data +# load("data/avid/so_avid.rda") +# +# test_vdj <- so_avid %>% +# import_vdj("data/avid/bcr/") +# +# test_vdj <- test_vdj@meta.data +# +# walk(1:5, ~ { +# test_vdj <<- bind_rows(test_vdj, test_vdj) +# }) +# +# test_vdj <- test_vdj %>% +# mutate(clonotype_id = str_c(clonotype_id, row_number(clonotype_id) %% 5)) +# +# # testing functions +# test_vdj %>% +# calc_abundance(cluster_col = "seurat_clusters") +# +# test_vdj %>% +# plot_abundance( +# cluster_col = "seurat_clusters", +# type = "line", +# n_clonotypes = 2 +# ) +# +# test_vdj %>% +# plot_similarity( +# cluster_col = "seurat_clusters" +# ) +# +# test_vdj %>% +# plot_diversity( +# cluster_col = "seurat_clusters", +# ) +# +# clmns <- c( +# "v_gene", "d_gene", "chains", +# "umis", "reads", "cdr3_length", +# "cdr3_nt_length", "productive", +# "full_length" +# ) +# +# tictoc::tic() +# x <- fetch_vdj(test_vdj, clmns) +# tictoc::toc() +# +# tictoc::tic() +# y <- summarize_vdj(test_vdj, clmns) +# tictoc::toc() +# +# tictoc::tic() +# x <- fetch_vdj(test_vdj, clonotype_col = NULL) +# tictoc::toc() +# +# tictoc::tic() +# y <- fetch_vdj(test_vdj, clonotype_col = "clonotype_id") +# tictoc::toc() + diff --git a/tests/testthat.R b/tests/testthat.R index 39954cb3..cbdcfffc 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,74 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/tests.html +# * https://testthat.r-lib.org/reference/test_package.html#special-files + library(testthat) -library(dplyr) -library(tidyr) -library(purrr) -library(tibble) -library(stringr) -library(SingleCellExperiment) library(djvdj) test_check("djvdj") - - - -### TESTING LARGE DATA ### -# -# # generating test data -# load("data/avid/so_avid.rda") -# -# test_vdj <- so_avid %>% -# import_vdj("data/avid/bcr/") -# -# test_vdj <- test_vdj@meta.data -# -# walk(1:5, ~ { -# test_vdj <<- bind_rows(test_vdj, test_vdj) -# }) -# -# test_vdj <- test_vdj %>% -# mutate(clonotype_id = str_c(clonotype_id, row_number(clonotype_id) %% 5)) -# -# # testing functions -# test_vdj %>% -# calc_abundance(cluster_col = "seurat_clusters") -# -# test_vdj %>% -# plot_abundance( -# cluster_col = "seurat_clusters", -# type = "line", -# n_clonotypes = 2 -# ) -# -# test_vdj %>% -# plot_similarity( -# cluster_col = "seurat_clusters" -# ) -# -# test_vdj %>% -# plot_diversity( -# cluster_col = "seurat_clusters", -# ) -# -# clmns <- c( -# "v_gene", "d_gene", "chains", -# "umis", "reads", "cdr3_length", -# "cdr3_nt_length", "productive", -# "full_length" -# ) -# -# tictoc::tic() -# x <- fetch_vdj(test_vdj, clmns) -# tictoc::toc() -# -# tictoc::tic() -# y <- summarize_vdj(test_vdj, clmns) -# tictoc::toc() -# -# tictoc::tic() -# x <- fetch_vdj(test_vdj, clonotype_col = NULL) -# tictoc::toc() -# -# tictoc::tic() -# y <- fetch_vdj(test_vdj, clonotype_col = "clonotype_id") -# tictoc::toc() - From 7d8bac2ee1d3bca9be665c2efdaa7d58b74f2fe0 Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 16:14:57 -0600 Subject: [PATCH 6/9] add return values --- R/calc-similarity.R | 2 ++ R/cluster-seqs.R | 1 + man/plot_mds.Rd | 3 +++ man/plot_motifs.Rd | 3 +++ man/plot_similarity.Rd | 3 +++ 5 files changed, 12 insertions(+) diff --git a/R/calc-similarity.R b/R/calc-similarity.R index 8303695e..6aa2b4b3 100644 --- a/R/calc-similarity.R +++ b/R/calc-similarity.R @@ -241,6 +241,7 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar #' circos plot #' @importFrom abdiv jaccard #' @seealso [calc_similarity()], [calc_mds()], [plot_mds()] +#' @return ggplot object #' #' @examples #' # Plot repertoire overlap @@ -492,6 +493,7 @@ calc_mds <- function(input, data_col, cluster_col, method = abdiv::jaccard, #' @param sep Separator used for storing per-chain V(D)J data for each cell #' @param ... Additional arguments to pass to [ggplot2::geom_point()] #' @seealso [calc_mds()], [calc_similarity()], [plot_similarity()], [MASS::isoMDS()] +#' @return ggplot object #' @export plot_mds <- function(input, data_col, cluster_col, method = abdiv::jaccard, chain = NULL, diff --git a/R/cluster-seqs.R b/R/cluster-seqs.R index 5800c6de..5040c0ad 100644 --- a/R/cluster-seqs.R +++ b/R/cluster-seqs.R @@ -188,6 +188,7 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL, #' @param ... Additional parameters to pass to [ggseqlogo::geom_logo()] #' @importFrom stringr str_trunc #' @seealso [cluster_sequences()] +#' @return ggplot object #' @export plot_motifs <- function(input, data_col = "cdr3", cluster_col = NULL, chain, plot_colors = NULL, diff --git a/man/plot_mds.Rd b/man/plot_mds.Rd index 5cb0fc58..f5638b10 100644 --- a/man/plot_mds.Rd +++ b/man/plot_mds.Rd @@ -51,6 +51,9 @@ all chains.} \item{...}{Additional arguments to pass to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}} } +\value{ +ggplot object +} \description{ Perform multidimensional scaling and plot results } diff --git a/man/plot_motifs.Rd b/man/plot_motifs.Rd index bb2cbd36..3e3f1c2f 100644 --- a/man/plot_motifs.Rd +++ b/man/plot_motifs.Rd @@ -52,6 +52,9 @@ align sequences at the 5' or 3' end when plotting.} \item{...}{Additional parameters to pass to \code{\link[ggseqlogo:geom_logo]{ggseqlogo::geom_logo()}}} } +\value{ +ggplot object +} \description{ Create sequence logos for clusters } diff --git a/man/plot_similarity.Rd b/man/plot_similarity.Rd index 01772572..c42bbd6a 100644 --- a/man/plot_similarity.Rd +++ b/man/plot_similarity.Rd @@ -68,6 +68,9 @@ be shown.} \code{\link[ComplexHeatmap:Heatmap]{ComplexHeatmap::Heatmap()}} for heatmap, \code{\link[circlize:chordDiagram]{circlize::chordDiagram()}} for circos plot} } +\value{ +ggplot object +} \description{ Plot cluster similarity } From 86815d60f1b35fb87e316e8e6f7deb03d1e187f4 Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 16:15:28 -0600 Subject: [PATCH 7/9] skip as bioc requires r >= 4.2.0 --- .github/workflows/check-standard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml index a3ac6182..b39f0e90 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/check-standard.yaml @@ -22,7 +22,7 @@ jobs: - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + # - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} From ea3730a0ed43abc070b7dbe9c948789708caf927 Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Mon, 31 Oct 2022 17:13:26 -0600 Subject: [PATCH 8/9] remove `.data$` to eliminate tidyselect warnings --- R/calc-abundance.R | 8 +- R/calc-diversity.R | 28 ++--- R/calc-gene-usage.R | 10 +- R/calc-similarity.R | 12 +-- R/cluster-seqs.R | 6 +- R/filter-vdj.R | 2 +- R/import-vdj.R | 176 +++++++++++++++---------------- R/utils-plots.R | 12 +-- tests/testthat/test-import-vdj.R | 1 + 9 files changed, 128 insertions(+), 127 deletions(-) diff --git a/R/calc-abundance.R b/R/calc-abundance.R index 366cabff..9e9eff32 100644 --- a/R/calc-abundance.R +++ b/R/calc-abundance.R @@ -130,7 +130,7 @@ calc_frequency <- function(input, data_col, cluster_col = NULL, prefix = paste0 res <- dplyr::mutate( res, .freq = dplyr::n_distinct(!!sym(cell_col)), - .pct = (.data$.freq / .data$.n_cells) * 100 + .pct = (.freq / .n_cells) * 100 ) # Identify shared labels @@ -188,7 +188,7 @@ calc_frequency <- function(input, data_col, cluster_col = NULL, prefix = paste0 ) # Format labels - labs <- dplyr::group_by(labs, .data$grp) + labs <- dplyr::group_by(labs, grp) labs <- dplyr::mutate( labs, lab = paste0(unique(range(x)), collapse = "-") @@ -442,7 +442,7 @@ plot_clonal_abundance <- function(input, cluster_col = NULL, if (n_clones > 0) { res <- res + ggrepel::geom_text_repel( - ggplot2::aes(label = .data$.lab), + ggplot2::aes(label = .lab), data = top_clones, nudge_x = 500, direction = "y", @@ -524,7 +524,7 @@ plot_frequency <- function(input, data_col, cluster_col = NULL, plt_dat <- dplyr::group_by(plt_dat, !!sym(data_col)) rnk <- dplyr::summarize(plt_dat, mn = mean(!!sym(abun_col))) - rnk <- dplyr::arrange(rnk, desc(.data$mn)) + rnk <- dplyr::arrange(rnk, desc(mn)) rnk <- pull(rnk, data_col) plt_dat <- dplyr::ungroup(plt_dat) diff --git a/R/calc-diversity.R b/R/calc-diversity.R index a42c1d04..f15b4631 100644 --- a/R/calc-diversity.R +++ b/R/calc-diversity.R @@ -186,18 +186,18 @@ calc_diversity <- function(input, data_col, cluster_col = NULL, sam, met = .y, diversity = list(.calc_div(!!sym(data_col), met = .x)), - stderr = purrr::map_dbl(.data$diversity, pull, "std.error"), - diversity = purrr::map_dbl(.data$diversity, pull, "statistic") + stderr = purrr::map_dbl(diversity, pull, "std.error"), + diversity = purrr::map_dbl(diversity, pull, "statistic") ) }) div_cols <- "diversity" if (n_boots > 1) div_cols <- c(div_cols, "stderr") - else div <- dplyr::select(div, -.data$stderr) + else div <- dplyr::select(div, -stderr) div <- tidyr::pivot_longer(div, all_of(div_cols)) - div <- tidyr::unite(div, "name", .data$met, .data$name) + div <- tidyr::unite(div, "name", met, name) div <- tidyr::pivot_wider(div) # Format results @@ -372,12 +372,12 @@ plot_diversity <- function(input, data_col, cluster_col = NULL, re <- "^(.+)_(diversity|stderr)$" - plt_dat <- tidyr::extract(plt_dat, .data$name, into = c("met", "type"), re) + plt_dat <- tidyr::extract(plt_dat, name, into = c("met", "type"), re) plt_dat <- tidyr::pivot_wider( plt_dat, - names_from = .data$type, - values_from = .data$value + names_from = type, + values_from = value ) # Set plot levels @@ -439,8 +439,8 @@ plot_diversity <- function(input, data_col, cluster_col = NULL, ggplot2::geom_linerange( aes( !!sym(cluster_col), - ymin = .data$diversity - .data$stderr, - ymax = .data$diversity + .data$stderr + ymin = diversity - stderr, + ymax = diversity + stderr ) ) } @@ -560,13 +560,13 @@ plot_rarefaction <- function(input, data_col, cluster_col = NULL, plt_dat <- dplyr::mutate( plt_dat, - method = dplyr::recode(.data$Method, "Observed" = "Rarefaction"), + method = dplyr::recode(Method, "Observed" = "Rarefaction"), method = stringr::str_to_lower(method), - Order.q = met_labs[as.character(.data$Order.q)] + Order.q = met_labs[as.character(Order.q)] ) if (!is.null(cluster_col)) { - plt_dat <- dplyr::rename(plt_dat, !!sym(cluster_col) := .data$Assemblage) + plt_dat <- dplyr::rename(plt_dat, !!sym(cluster_col) := Assemblage) } plt_dat <- .set_lvls(plt_dat, cluster_col, plot_lvls) @@ -576,13 +576,13 @@ plot_rarefaction <- function(input, data_col, cluster_col = NULL, # Plot standard error res <- ggplot2::ggplot( plt_dat, - ggplot2::aes(.data$m, .data$qD, linetype = method) + ggplot2::aes(m, qD, linetype = method) ) + ggplot2::guides(linetype = ggplot2::guide_legend(title = NULL)) if (n_boots > 1) { gg_aes <- ggplot2::aes( - x = .data$m, ymin = .data$qD.LCL, ymax = .data$qD.UCL + x = m, ymin = qD.LCL, ymax = qD.UCL ) if (!is.null(cluster_col)) gg_aes$fill <- sym(cluster_col) diff --git a/R/calc-gene-usage.R b/R/calc-gene-usage.R index 52eed287..5ce8b3ba 100644 --- a/R/calc-gene-usage.R +++ b/R/calc-gene-usage.R @@ -127,7 +127,7 @@ calc_gene_usage <- function(input, data_cols, cluster_col = NULL, chain = NULL, res <- tidyr::pivot_wider( res, names_from = all_of(clst_nm), - values_from = .data$freq, + values_from = freq, values_fill = 0 ) @@ -154,8 +154,8 @@ calc_gene_usage <- function(input, data_cols, cluster_col = NULL, chain = NULL, ) } - res <- dplyr::mutate(res, pct = (.data$freq / .data$n_cells) * 100) - res <- dplyr::arrange(res, desc(.data$pct)) + res <- dplyr::mutate(res, pct = (freq / n_cells) * 100) + res <- dplyr::arrange(res, desc(pct)) res } @@ -617,12 +617,12 @@ plot_gene_usage <- function(input, data_cols, cluster_col = NULL, res <- dplyr::mutate(res, rnk = row_number(desc(!!sym(dat_col)))) if (is.null(clst_col)) { - res <- dplyr::filter(res, !!gn_2 %in% top_2 | .data$rnk == 1) + res <- dplyr::filter(res, !!gn_2 %in% top_2 | rnk == 1) } else { res <- dplyr::filter( res, - !!gn_2 %in% top_2[[dplyr::cur_group()[[clst_col]]]] | .data$rnk == 1 + !!gn_2 %in% top_2[[dplyr::cur_group()[[clst_col]]]] | rnk == 1 ) } diff --git a/R/calc-similarity.R b/R/calc-similarity.R index 6aa2b4b3..16d522aa 100644 --- a/R/calc-similarity.R +++ b/R/calc-similarity.R @@ -128,7 +128,7 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar vdj <- tidyr::pivot_wider( vdj, names_from = all_of(cluster_col), - values_from = .data$n, + values_from = n, values_fill = 0 ) @@ -158,20 +158,20 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar }) # Combine with inverse combinations - res_i <- dplyr::rename(res, Var1 = .data$Var2, Var2 = .data$Var1) + res_i <- dplyr::rename(res, Var1 = Var2, Var2 = Var1) res <- dplyr::bind_rows(res, res_i, res_s) # Format data.frame clmns <- sort(unique(res$Var2)) - res <- dplyr::arrange(res, .data$Var2) + res <- dplyr::arrange(res, Var2) res <- tidyr::pivot_wider( res, - names_from = .data$Var1, - values_from = .data$sim + names_from = Var1, + values_from = sim ) - res <- dplyr::select(res, !!sym(cluster_col) := .data$Var2, all_of(clmns)) + res <- dplyr::select(res, !!sym(cluster_col) := Var2, all_of(clmns)) # Return matrix if (return_mat) { diff --git a/R/cluster-seqs.R b/R/cluster-seqs.R index 5040c0ad..419f8905 100644 --- a/R/cluster-seqs.R +++ b/R/cluster-seqs.R @@ -97,7 +97,7 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL, make_adj_df <- function(mat) { res <- tibble::as_tibble(mat, rownames = "Var1") - res <- tidyr::pivot_longer(res, -.data$Var1, values_to = "Var2") + res <- tidyr::pivot_longer(res, -Var1, values_to = "Var2") res } @@ -105,8 +105,8 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL, adj_df <- dplyr::mutate( adj_df, - Var1 = seqs[as.integer(.data$Var1)], - Var2 = seqs[.data$Var2] + Var1 = seqs[as.integer(Var1)], + Var2 = seqs[Var2] ) # Create adjacency graph diff --git a/R/filter-vdj.R b/R/filter-vdj.R index c761bf76..ab937af0 100644 --- a/R/filter-vdj.R +++ b/R/filter-vdj.R @@ -112,7 +112,7 @@ filter_vdj <- function(input, filt, data_cols = NULL, } keep_rows <- vdj$.KEEP - vdj <- dplyr::select(vdj, -.data$.KEEP) + vdj <- dplyr::select(vdj, -.KEEP) # If vectors in keep_rows are all length 1, filter cells if (length_one) { diff --git a/R/import-vdj.R b/R/import-vdj.R index f7fd745e..e7651890 100644 --- a/R/import-vdj.R +++ b/R/import-vdj.R @@ -193,7 +193,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains bcs <- .get_meta(input)[[CELL_COL]] prfx_df <- .extract_cell_prefix(bcs, strip_bcs = FALSE) - prfx_df <- dplyr::distinct(prfx_df, .data$prfx, .data$sfx) + prfx_df <- dplyr::distinct(prfx_df, prfx, sfx) prfxs <- prfx_df$prfx sfxs <- prfx_df$sfx @@ -274,8 +274,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains contigs <- purrr::map( contigs, mutate, - contig_sfx = unlist(.str_extract_all(.data$contig_id, "_contig_[0-9]+$")), - contig_id = paste0(.data$barcode, .data$contig_sfx), + contig_sfx = unlist(.str_extract_all(contig_id, "_contig_[0-9]+$")), + contig_id = paste0(barcode, contig_sfx), contig_sfx = NULL ) @@ -368,7 +368,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains "these contigs will be removed." ) - contigs <- dplyr::filter(contigs, !is.na(.data$clonotype_id)) + contigs <- dplyr::filter(contigs, !is.na(clonotype_id)) } # Select V(D)J columns to keep @@ -402,7 +402,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains vdj_cols <- c(vdj_cols, "paired") if (filter_paired) { - contigs <- dplyr::filter(contigs, .data$paired) + contigs <- dplyr::filter(contigs, paired) } # Order chains and CDR3 sequences @@ -411,7 +411,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains # the clonotype ID contigs <- dplyr::arrange( contigs, - .data$barcode, .data$chains, .data$cdr3_nt + barcode, chains, cdr3_nt ) # Extract isotypes from c_gene for IGH chain (for BCR data only) @@ -438,13 +438,13 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains ) # Reorder columns - meta <- dplyr::relocate(meta, .data$paired, .after = "full_length") + meta <- dplyr::relocate(meta, paired, .after = "full_length") meta <- dplyr::relocate(meta, all_of(len_cols), .after = last(cdr3_cols)) - meta <- dplyr::relocate(meta, .data$n_chains, .after = "chains") + meta <- dplyr::relocate(meta, n_chains, .after = "chains") meta <- dplyr::relocate(meta, all_of(gene_cols), .after = last(len_cols)) if (vdj_class %in% c("BCR", "Multi")) { - meta <- dplyr::relocate(meta, .data$isotype, .after = "c_gene") + meta <- dplyr::relocate(meta, isotype, .after = "c_gene") } # Check for duplicated cell barcodes @@ -487,7 +487,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains # Filter to only include cells with valid clonotype_id # cells with missing clonotype have a clonotype_id of 'None' - res <- dplyr::filter(res, .data$clonotype_id != "None") + res <- dplyr::filter(res, clonotype_id != "None") if (nrow(res) == 0) { warning("No valid clonotypes present, check input data.") @@ -539,8 +539,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains d <- dplyr::rename( d, - chains = .data$chain, - clonotype_id = .data$raw_clonotype_id + chains = chain, + clonotype_id = raw_clonotype_id ) d @@ -595,15 +595,15 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains ) # Filter for contigs in cells - res <- dplyr::filter(res, .data$is_cell) + res <- dplyr::filter(res, is_cell) # Replace 'None' with FALSE for QC columns res <- .replace_none(res, chk_none) res <- dplyr::rename( res, - chains = .data$chain, - clonotype_id = .data$raw_clonotype_id + chains = chain, + clonotype_id = raw_clonotype_id ) # Format cell barcode prefixes @@ -633,7 +633,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains prfx_df <- .extract_cell_prefix(bcs, strip_bcs = TRUE) # Match old and new prefixes - new <- dplyr::distinct(prfx_df, .data$prfx, .data$sfx) + new <- dplyr::distinct(prfx_df, prfx, sfx) if (nrow(new) != length(cell_prfxs)) { stop( @@ -650,9 +650,9 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains # Format cell barcodes prfx_df <- dplyr::mutate( prfx_df, - prfx = ifelse(is.na(.data$new_prfx), .data$prfx, .data$new_prfx), - sfx = ifelse(is.na(.data$new_sfx), .data$sfx, .data$new_sfx), - bc = paste0(.data$prfx, .data$bc, .data$sfx) + prfx = ifelse(is.na(new_prfx), prfx, new_prfx), + sfx = ifelse(is.na(new_sfx), sfx, new_sfx), + bc = paste0(prfx, bc, sfx) ) df_in[[bc_col]] <- prfx_df$bc @@ -678,8 +678,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains if (strip_bcs) { res <- dplyr::mutate( res, - bc = stringr::str_remove(.data$bc, paste0("^", .data$prfx)), - bc = stringr::str_remove(.data$bc, paste0(.data$sfx, "$")) + bc = stringr::str_remove(bc, paste0("^", prfx)), + bc = stringr::str_remove(bc, paste0(sfx, "$")) ) } @@ -786,7 +786,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains res <- purrr::map( res, mutate, - barcode = unlist(.str_extract_all(.data$contig_id, id_re)) + barcode = unlist(.str_extract_all(contig_id, id_re)) ) # Format cell barcode prefixes @@ -801,8 +801,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains res <- purrr::map( res, mutate, - contig_sfx = unlist(.str_extract_all(.data$contig_id, "_contig_[0-9]+$")), - contig_id = paste0(.data$barcode, .data$contig_sfx), + contig_sfx = unlist(.str_extract_all(contig_id, "_contig_[0-9]+$")), + contig_id = paste0(barcode, contig_sfx), contig_sfx = NULL, barcode = NULL ) @@ -824,30 +824,30 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains bam_info <- dplyr::filter( bam_info, - grepl("_contig_[0-9]+$", .data$contig_id) + grepl("_contig_[0-9]+$", contig_id) ) # Get 0-based coordinates for mutations # set width of deletion coordinates as 0 res <- dplyr::mutate( bam_info, - n = .str_extract_all(.data$cigar, "[0-9]+(?=[^0-9])"), - type = .str_extract_all(.data$cigar, "(?<=[0-9])[^0-9]{1}") + n = .str_extract_all(cigar, "[0-9]+(?=[^0-9])"), + type = .str_extract_all(cigar, "(?<=[0-9])[^0-9]{1}") ) - res <- tidyr::unnest(res, c(.data$n, .data$type)) - res <- dplyr::group_by(res, .data$contig_id) + res <- tidyr::unnest(res, c(n, type)) + res <- dplyr::group_by(res, contig_id) res <- dplyr::mutate( res, - n = as.numeric(.data$n), - idx = ifelse(.data$type != "D", .data$n, 0), - end = cumsum(.data$idx), - start = lag(.data$end, default = 0) + n = as.numeric(n), + idx = ifelse(type != "D", n, 0), + end = cumsum(idx), + start = lag(end, default = 0) ) res <- dplyr::ungroup(res) - res <- dplyr::filter(res, .data$type != "=") + res <- dplyr::filter(res, type != "=") res <- dplyr::select( res, @@ -871,7 +871,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains res <- dplyr::select( airr, - contig_id = .data$sequence_id, + contig_id = sequence_id, dplyr::matches(coord_cols_re, perl = TRUE) ) @@ -879,15 +879,15 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains stop("V(D)J coordinates not found, check ", airr_file) } - res <- tidyr::pivot_longer(res, -.data$contig_id) - res <- dplyr::filter(res, !is.na(.data$value)) - res <- tidyr::extract(res, .data$name, c("seg", "pos"), coord_cols_re) - res <- tidyr::pivot_wider(res, names_from = .data$pos) + res <- tidyr::pivot_longer(res, -contig_id) + res <- dplyr::filter(res, !is.na(value)) + res <- tidyr::extract(res, name, c("seg", "pos"), coord_cols_re) + res <- tidyr::pivot_wider(res, names_from = pos) res <- dplyr::mutate( res, - start = .data$start - 1, - len = .data$end - .data$start + start = start - 1, + len = end - start ) res <- dplyr::select( @@ -908,15 +908,15 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains mut_coords <- dplyr::mutate( mut_coords, - type = dplyr::recode(.data$type, !!!mut_key) + type = dplyr::recode(type, !!!mut_key) ) # If no vdj_coords, return mutation totals if (identical(vdj_coords, NA)) { res <- all_muts %>% tidyr::pivot_wider( - names_from = .data$type, - values_from = .data$n, + names_from = type, + values_from = n, values_fill = 0 ) @@ -924,7 +924,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains res, across( starts_with("all_"), - ~ .x / .data$len, + ~ .x / len, .names = "{.col}_freq" ) ) @@ -944,73 +944,73 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains vdj_muts <- dplyr::filter( vdj_muts, - .data$start < .data$end.seg & - .data$end > .data$start.seg + start < end.seg & + end > start.seg ) vdj_muts <- dplyr::mutate( vdj_muts, - len = .data$len.seg, + len = len.seg, new_start = ifelse( - .data$start >= .data$start.seg, .data$start, .data$start.seg + start >= start.seg, start, start.seg ), new_end = ifelse( - .data$end <= .data$end.seg, .data$end, .data$end.seg + end <= end.seg, end, end.seg ), new_end = ifelse( - .data$type == mut_key[["D"]], .data$new_end + 1, .data$new_end + type == mut_key[["D"]], new_end + 1, new_end ), n = ifelse( - .data$type != mut_key[["D"]], .data$new_end - .data$new_start, .data$n + type != mut_key[["D"]], new_end - new_start, n ) ) # Identify junction indels - jxn_muts <- filter(vdj_muts, .data$type %in% unname(mut_key[c("I", "D")])) + jxn_muts <- filter(vdj_muts, type %in% unname(mut_key[c("I", "D")])) jxn_muts <- mutate( jxn_muts, seg = case_when( - .data$seg == "v" & .data$end.seg == .data$new_end ~ "vd", - .data$seg == "d" & .data$start.seg == .data$new_start ~ "vd", - .data$seg == "d" & .data$end.seg == .data$new_end ~ "dj", - .data$seg == "j" & .data$start.seg == .data$new_start ~ "dj", + seg == "v" & end.seg == new_end ~ "vd", + seg == "d" & start.seg == new_start ~ "vd", + seg == "d" & end.seg == new_end ~ "dj", + seg == "j" & start.seg == new_start ~ "dj", TRUE ~ as.character(NA) ) ) - jxn_muts <- dplyr::filter(jxn_muts, !is.na(.data$seg)) - jxn_muts <- dplyr::select(jxn_muts, -.data$len) + jxn_muts <- dplyr::filter(jxn_muts, !is.na(seg)) + jxn_muts <- dplyr::select(jxn_muts, -len) vdj_muts <- bind_rows(vdj_muts, jxn_muts) # Summarize mutation counts vdj_muts <- dplyr::group_by( vdj_muts, - .data$contig_id, .data$len, .data$type, .data$seg + contig_id, len, type, seg ) - vdj_muts <- dplyr::summarize(vdj_muts, n = sum(.data$n), .groups = "drop") + vdj_muts <- dplyr::summarize(vdj_muts, n = sum(n), .groups = "drop") # Summarize total mutations and total length per contig # for each mutation type, sum total for v, d, j, and c segments, exclude jxns - all_muts <- dplyr::filter(vdj_muts, !.data$seg %in% c("vd", "dj")) - all_muts <- dplyr::group_by(all_muts, .data$contig_id, .data$type) + all_muts <- dplyr::filter(vdj_muts, !seg %in% c("vd", "dj")) + all_muts <- dplyr::group_by(all_muts, contig_id, type) all_muts <- dplyr::summarize( all_muts, - n = sum(.data$n), - len = sum(.data$len), + n = sum(n), + len = sum(len), seg = "all", .groups = "drop" ) vdj_muts <- dplyr::bind_rows(vdj_muts, all_muts) - res <- tidyr::unite(vdj_muts, "type", .data$seg, .data$type, sep = "_") + res <- tidyr::unite(vdj_muts, "type", seg, type, sep = "_") # Set final output columns freq_cols <- mut_cols <- c("v", "d", "j", "c", "all") @@ -1026,22 +1026,22 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains freq_cols <- purrr::map_chr(freq_cols, paste0, "_", mut_key[["X"]]) # Calculate mismatch frequency - freq <- dplyr::filter(res, .data$type %in% freq_cols) + freq <- dplyr::filter(res, type %in% freq_cols) freq <- dplyr::mutate( freq, - n = round(.data$n / .data$len, 6), - type = paste0(.data$type, "_freq"), + n = round(n / len, 6), + type = paste0(type, "_freq"), len = NULL ) res <- dplyr::bind_rows(res, freq) - res <- dplyr::select(res, -.data$len) + res <- dplyr::select(res, -len) res <- tidyr::pivot_wider( res, - names_from = .data$type, - values_from = .data$n, + names_from = type, + values_from = n, values_fill = 0 ) @@ -1219,12 +1219,12 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains #' @noRd .identify_paired <- function(df_in) { - res <- dplyr::group_by(df_in, .data$barcode) + res <- dplyr::group_by(df_in, barcode) res <- dplyr::mutate( res, - paired = (all(c("TRA", "TRB") %in% .data$chains)) | - ("IGH" %in% .data$chains & any(c("IGL", "IGK") %in% .data$chains)) + paired = (all(c("TRA", "TRB") %in% chains)) | + ("IGH" %in% chains & any(c("IGL", "IGK") %in% chains)) ) res <- dplyr::ungroup(res) @@ -1258,7 +1258,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains iso_df[iso_col] <- isos - iso_df <- dplyr::distinct(iso_df, .data$barcode, .data$c_gene) + iso_df <- dplyr::distinct(iso_df, barcode, c_gene) iso_df <- stats::na.omit(iso_df) dups <- iso_df$barcode @@ -1267,7 +1267,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains # Add isotypes to meta.data iso_df <- mutate( iso_df, - isotype = ifelse(.data$barcode %in% dups, "Multi", !!sym(iso_col)) + isotype = ifelse(barcode %in% dups, "Multi", !!sym(iso_col)) ) isos <- purrr::set_names( @@ -1277,8 +1277,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains res <- mutate( df_in, - isotype = unname(isos[.data$barcode]), - isotype = tidyr::replace_na(.data$isotype, "None") + isotype = unname(isos[barcode]), + isotype = tidyr::replace_na(isotype, "None") ) res @@ -1378,7 +1378,7 @@ define_clonotypes <- function(input, data_cols, clonotype_col = "clonotype_id", dplyr::across( dplyr::all_of(data_cols), - ~ paste0(.x[.data$.clone_idx], collapse = ""), + ~ paste0(.x[.clone_idx], collapse = ""), .names = ".clone_{.col}" ), @@ -1395,12 +1395,12 @@ define_clonotypes <- function(input, data_cols, clonotype_col = "clonotype_id", vdj <- dplyr::mutate( vdj, .new_clone = paste(!!!syms(data_cols), sep = ""), - .new_id = rank(.data$.new_clone, ties.method = "min"), + .new_id = rank(.new_clone, ties.method = "min"), !!sym(clonotype_col) := ifelse( - .data$.new_clone == "", + .new_clone == "", "None", - paste0("clonotype", .data$.new_id) + paste0("clonotype", .new_id) ) ) @@ -1440,17 +1440,17 @@ define_clonotypes <- function(input, data_cols, clonotype_col = "clonotype_id", # contig_id = bam_lst[[1]]$qname # ) # -# res <- dplyr::filter(res, grepl("_contig_[0-9]+$", .data$contig_id)) +# res <- dplyr::filter(res, grepl("_contig_[0-9]+$", contig_id)) # # # Add indel columns # res <- dplyr::mutate( # res, -# n_insertion = .extract_pat(.data$cigar, "[0-9]+(?=I)"), -# n_deletion = .extract_pat(.data$cigar, "[0-9]+(?=D)"), -# n_mismatch = .extract_pat(.data$cigar, "[0-9]+(?=X)"), +# n_insertion = .extract_pat(cigar, "[0-9]+(?=I)"), +# n_deletion = .extract_pat(cigar, "[0-9]+(?=D)"), +# n_mismatch = .extract_pat(cigar, "[0-9]+(?=X)"), # ) # -# res <- dplyr::select(res, -.data$cigar) +# res <- dplyr::select(res, -cigar) # # res # } diff --git a/R/utils-plots.R b/R/utils-plots.R index b7e44dd0..e3f5702c 100644 --- a/R/utils-plots.R +++ b/R/utils-plots.R @@ -632,7 +632,7 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") { # Only plot percent for histogram if (identical(units, "percent") && identical(method, "histogram")) { - plt_aes <- ggplot2::aes(y = .data$..count.. / sum(.data$..count..) * 100) + plt_aes <- ggplot2::aes(y = ..count.. / sum(..count..) * 100) } plt_aes$x <- sym(x) @@ -728,8 +728,8 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") { res <- dplyr::mutate( res, - !!ft := ifelse(.data$pct > mn, !!ft, NA), - !!ft := ifelse(.data$pct <= mn, min(!!ft, na.rm = TRUE), !!ft) + !!ft := ifelse(pct > mn, !!ft, NA), + !!ft := ifelse(pct <= mn, min(!!ft, na.rm = TRUE), !!ft) ) } @@ -738,12 +738,12 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") { res <- dplyr::mutate( res, - !!ft := ifelse(.data$pct < mx, !!ft, NA), - !!ft := ifelse(.data$pct >= mx, max(!!ft, na.rm = TRUE), !!ft) + !!ft := ifelse(pct < mx, !!ft, NA), + !!ft := ifelse(pct >= mx, max(!!ft, na.rm = TRUE), !!ft) ) } - res <- dplyr::select(res, -.data$pct) + res <- dplyr::select(res, -pct) res } diff --git a/tests/testthat/test-import-vdj.R b/tests/testthat/test-import-vdj.R index 2f836f63..562120c9 100644 --- a/tests/testthat/test-import-vdj.R +++ b/tests/testthat/test-import-vdj.R @@ -1,5 +1,6 @@ data(vdj_so) data(vdj_sce) +data(tiny_so) # Test data ctigs <- c( From 5433048a94822a5d556d2a44d59fcc3dc44d14d8 Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Tue, 1 Nov 2022 06:22:46 -0600 Subject: [PATCH 9/9] fix import-vdj test --- tests/testthat/test-import-vdj.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-import-vdj.R b/tests/testthat/test-import-vdj.R index 562120c9..99e52e9d 100644 --- a/tests/testthat/test-import-vdj.R +++ b/tests/testthat/test-import-vdj.R @@ -1,6 +1,7 @@ data(vdj_so) data(vdj_sce) data(tiny_so) +data(tiny_sce) # Test data ctigs <- c(