From 731b6606d85c1f06b613ad255e4d5ccae1a0ac1f Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Sun, 30 Oct 2022 08:31:14 -0600
Subject: [PATCH 1/9] Fixes for bioc - update description description, version,
 biocviews, and lazydata - slot accessors in vignettes

---
 DESCRIPTION              | 20 ++++++++++++--------
 NEWS.md                  |  4 ++--
 vignettes/abundance.Rmd  |  3 ++-
 vignettes/import_vdj.Rmd |  9 ++++++---
 vignettes/mutate.Rmd     | 15 ++++++++++-----
 5 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 63b40b72..a01cf3db 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,16 +1,17 @@
 Package: djvdj
 Title: A collection of single-cell V(D)J tools
-Version: 0.0.0.9000
+Version: 0.99.0
 Authors@R: c(
     person("Ryan", "Sheridan", , "ryan.sheridan@cuanschutz.edu", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-4012-3147")),
-    person("Jay", "Hesselberth", , "jay.hesselberth@gmail.com", role = "ctb"),
+    person("Jay", "Hesselberth", , "jay.hesselberth@cuanschutz.com", role = "ctb"),
     person("Rui", "Fu", , "raysinensis@gmail.com", role = "ctb"),
-    person("Kent", "Riemondy", , "kent.riemondy@ucdenver.edu", role = "ctb"),
-    person("RNA Bioscience Initiative", role = "fnd")
+    person("Kent", "Riemondy", , "kent.riemondy@cuanschutz.edu", role = "ctb"),
+    person("RNA Bioscience Initiative", role = c("fnd", "cph"))
   )
-Description: djvdj provides a range of tools to analyze single-cell V(D)J
-    data.
+Description: djvdj provides import, manipulation, analysis, and plotting tools
+  for analyzing single-cell V(D)J gene expression data, integrating with both
+  Seurat and SigleCellExperiment-based workflows.
 License: MIT + file LICENSE
 URL: https://rnabioco.github.io/djvdj/, https://github.com/rnabioco/djvdj/
 BugReports: https://github.com/rnabioco/djvdj/issues
@@ -29,6 +30,8 @@ Imports:
     ggrepel,
     ggseqlogo,
     glue,
+    graphics,
+    grid,
     igraph (>= 1.3.0),
     iNEXT,
     MASS,
@@ -44,6 +47,7 @@ Imports:
     stringr,
     tibble,
     tidyr,
+    utils,
     uwot
 Suggests: 
     covr,
@@ -59,8 +63,8 @@ VignetteBuilder:
     knitr
 Remotes:
     tidyverse/magrittr
-Config/Needs/website:pkgdown, rnabioco/rbitemplate
+Config/Needs/website: pkgdown, rnabioco/rbitemplate
+biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology
 Encoding: UTF-8
-LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.1
diff --git a/NEWS.md b/NEWS.md
index e4362e93..111e272f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,3 @@
-# djvdj 0.0.0.9000
+# djvdj 0.99.0
 
-* Added a `NEWS.md` file to track changes to the package.
+* Initial release.
diff --git a/vignettes/abundance.Rmd b/vignettes/abundance.Rmd
index 7d19e201..776284ee 100644
--- a/vignettes/abundance.Rmd
+++ b/vignettes/abundance.Rmd
@@ -81,7 +81,8 @@ so_vdj <- so %>%
 When `cluster_col` is specified, an additional meta.data column ('shared') will be added indicating whether the clonotype is shared between multiple clusters.
 
 ```{r}
-so_vdj@meta.data %>%
+# or use `so_vdj@meta.data`
+slot(so_vdj, 'meta.data') %>%
   head(2)
 ```
 
diff --git a/vignettes/import_vdj.Rmd b/vignettes/import_vdj.Rmd
index 9cb3a7fe..eca5950b 100644
--- a/vignettes/import_vdj.Rmd
+++ b/vignettes/import_vdj.Rmd
@@ -60,7 +60,8 @@ so_vdj <- so %>%
 `import_vdj()` adds a variety of per-chain metrics to the object meta.data. Information for each chain identified for the cell is separated by a semicolon. The separator used for storing and parsing per-chain V(D)J data can be specified using the `sep` argument included for most djvdj functions. `NA`s will be included for cells that lack V(D)J data. 
 
 ```{r}
-so_vdj@meta.data %>%
+# or use `so_vdj@meta.data`
+slot(so_vdj, 'meta.data') %>%
   head(2)
 ```
 
@@ -206,7 +207,8 @@ so_vdj <- so %>%
 The additional columns added to the meta.data will include the number of insertions, deletions, and mismatches (ending in 'ins', 'del', or 'mis') for each V(D)J segment (prefixed with 'v', 'd', 'j', or 'c'). Columns containing junction information will be prefixed with either 'vd' or 'dj'. Columns ending in 'freq' show the event frequency which is calculated as the number of events divided by the length of the region.
 
 ```{r}
-so_vdj@meta.data %>%
+# or use `so_vdj@meta.data`
+slot(so_vdj, 'meta.data') %>%
   head(2)
 ```
 
@@ -235,7 +237,8 @@ so_vdj <- so %>%
 This results in two sets of new columns being added to the meta.data. When performing downstream analysis using other djvdj functions, be sure to specify the correct columns, i.e. 'bcr_clonotype_id' or 'tcr_clonotype_id'.
 
 ```{r}
-so_vdj@meta.data %>%
+# or use `so_vdj@meta.data`
+slot(so_vdj, 'meta.data') %>%
   head(3)
 ```
 
diff --git a/vignettes/mutate.Rmd b/vignettes/mutate.Rmd
index a703dece..aeef4d0c 100644
--- a/vignettes/mutate.Rmd
+++ b/vignettes/mutate.Rmd
@@ -73,7 +73,8 @@ res <- so %>%
     all(c("IGH", "IGK", "IGL") %in% chains)
   )
 
-res@meta.data %>%
+# or use `res@meta.data`
+slot(res, 'meta.data') %>%
   filter(!is.na(clonotype_id)) %>%
   select(chains, cdr3) %>%
   head(3)
@@ -85,7 +86,8 @@ In this example we are removing V(D)J data for all chains except IGH.
 res <- so %>%
   filter_vdj(chains == "IGH")
 
-res@meta.data %>%
+# or use `res@meta.data`
+slot(res, 'meta.data') %>%
   filter(!is.na(clonotype_id)) %>%
   select(chains, cdr3) %>%
   head(3)
@@ -105,7 +107,8 @@ res <- so %>%
     col_names = "median_{.col}"
   )
 
-res@meta.data %>%
+# or use `res@meta.data`
+slot(res, 'meta.data') %>%
   select(all_del, all_ins, median_all_del, median_all_ins) %>%
   head(2)
 ```
@@ -120,7 +123,8 @@ res <- so %>%
     col_names = "unique_chains"
   )
 
-res@meta.data %>%
+# or use `res@meta.data`
+slot(res, 'meta.data') %>%
   filter(n_chains > 2) %>%
   select(chains, unique_chains) %>%
   head(2)
@@ -140,7 +144,8 @@ res <- so %>%
     total_indels = sum(all_ins, all_del)
   )
 
-res@meta.data %>%
+# or use `res@meta.data`
+slot(res, 'meta.data') %>%
   select(all_ins, all_del, total_indels) %>%
   head()
 ```

From f4d4b7a18394c3aa2cbb49127e4070fcac0a3a6c Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 15:34:00 -0600
Subject: [PATCH 2/9] more fixes for bioccheck

---
 .Rbuildignore         | 2 ++
 DESCRIPTION           | 8 ++++----
 R/calc-diversity.R    | 8 ++++----
 man/calc_diversity.Rd | 8 ++++----
 man/djvdj-package.Rd  | 8 ++++----
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index c6056136..ef18855e 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -10,3 +10,5 @@
 ^codecov\.yml$
 ^vignettes/$
 ^vignettes/avid\.Rmd$
+splenocytes.zip
+splenocytes
diff --git a/DESCRIPTION b/DESCRIPTION
index a01cf3db..c31d4ba2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,7 +16,7 @@ License: MIT + file LICENSE
 URL: https://rnabioco.github.io/djvdj/, https://github.com/rnabioco/djvdj/
 BugReports: https://github.com/rnabioco/djvdj/issues
 Depends: 
-    R (>= 4.0.0)
+    R (>= 4.2.0)
 Imports: 
     abdiv,
     Biostrings,
@@ -61,9 +61,9 @@ Suggests:
     testthat
 VignetteBuilder: 
     knitr
-Remotes:
-    tidyverse/magrittr
-Config/Needs/website: pkgdown, rnabioco/rbitemplate
+Config/Needs/website:
+  pkgdown,
+  rnabioco/rbitemplate
 biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
diff --git a/R/calc-diversity.R b/R/calc-diversity.R
index e2c26f35..a42c1d04 100644
--- a/R/calc-diversity.R
+++ b/R/calc-diversity.R
@@ -34,7 +34,7 @@
 #'   method   = abdiv::simpson
 #' )
 #'
-#' head(res@meta.data, 1)
+#' head(slot(res, 'meta.data'), 1)
 #'
 #' # Group cells based on meta.data column before calculating diversity
 #' res <- calc_diversity(
@@ -43,7 +43,7 @@
 #'   cluster_col = "orig.ident"
 #' )
 #'
-#' head(res@colData, 1)
+#' head(slot(res, 'colData'), 1)
 #'
 #' # Add a prefix to the new columns
 #' # this is useful if multiple diversity calculations are stored in the
@@ -54,7 +54,7 @@
 #'   prefix   = "bcr_"
 #' )
 #'
-#' head(res@meta.data, 1)
+#' head(slot(res, 'meta.data'), 1)
 #'
 #' # Calculate multiple metrics
 #' res <- calc_diversity(
@@ -66,7 +66,7 @@
 #'   )
 #' )
 #'
-#' head(res@colData, 1)
+#' head(slot(res, 'colData'), 1)
 #'
 #' # Return a data.frame instead of adding the results to the input object
 #' res <- calc_diversity(
diff --git a/man/calc_diversity.Rd b/man/calc_diversity.Rd
index 583abcbb..d01b7b66 100644
--- a/man/calc_diversity.Rd
+++ b/man/calc_diversity.Rd
@@ -64,7 +64,7 @@ res <- calc_diversity(
   method   = abdiv::simpson
 )
 
-head(res@meta.data, 1)
+head(slot(res, 'meta.data'), 1)
 
 # Group cells based on meta.data column before calculating diversity
 res <- calc_diversity(
@@ -73,7 +73,7 @@ res <- calc_diversity(
   cluster_col = "orig.ident"
 )
 
-head(res@colData, 1)
+head(slot(res, 'colData'), 1)
 
 # Add a prefix to the new columns
 # this is useful if multiple diversity calculations are stored in the
@@ -84,7 +84,7 @@ res <- calc_diversity(
   prefix   = "bcr_"
 )
 
-head(res@meta.data, 1)
+head(slot(res, 'meta.data'), 1)
 
 # Calculate multiple metrics
 res <- calc_diversity(
@@ -96,7 +96,7 @@ res <- calc_diversity(
   )
 )
 
-head(res@colData, 1)
+head(slot(res, 'colData'), 1)
 
 # Return a data.frame instead of adding the results to the input object
 res <- calc_diversity(
diff --git a/man/djvdj-package.Rd b/man/djvdj-package.Rd
index 6e45a80b..5ad85b94 100644
--- a/man/djvdj-package.Rd
+++ b/man/djvdj-package.Rd
@@ -8,7 +8,7 @@
 \description{
 \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
 
-djvdj provides a range of tools to analyze single-cell V(D)J data.
+djvdj provides import, manipulation, analysis, and plotting tools for analyzing single-cell V(D)J gene expression data, integrating with both Seurat and SigleCellExperiment-based workflows.
 }
 \seealso{
 Useful links:
@@ -24,10 +24,10 @@ Useful links:
 
 Other contributors:
 \itemize{
-  \item Jay Hesselberth \email{jay.hesselberth@gmail.com} [contributor]
+  \item Jay Hesselberth \email{jay.hesselberth@cuanschutz.com} [contributor]
   \item Rui Fu \email{raysinensis@gmail.com} [contributor]
-  \item Kent Riemondy \email{kent.riemondy@ucdenver.edu} [contributor]
-  \item RNA Bioscience Initiative [funder]
+  \item Kent Riemondy \email{kent.riemondy@cuanschutz.edu} [contributor]
+  \item RNA Bioscience Initiative [funder, copyright holder]
 }
 
 }

From 1c4ce97ec4b4d6fb173791a58461cedc4e7712af Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 15:43:19 -0600
Subject: [PATCH 3/9] add collapsible session info to vignettes

---
 vignettes/abundance.Rmd  |  9 +++++++++
 vignettes/clustering.Rmd |  9 +++++++++
 vignettes/diversity.Rmd  |  9 +++++++++
 vignettes/gene-usage.Rmd |  9 +++++++++
 vignettes/import_vdj.Rmd |  9 +++++++++
 vignettes/mutate.Rmd     |  8 ++++++++
 vignettes/plotting.Rmd   | 15 ++++++++-------
 7 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/vignettes/abundance.Rmd b/vignettes/abundance.Rmd
index 776284ee..38ef3b3e 100644
--- a/vignettes/abundance.Rmd
+++ b/vignettes/abundance.Rmd
@@ -218,3 +218,12 @@ so %>%
     plot_colors = c(MD4 = "#fec44f", BL6 = "#3182bd")
   )
 ```
+
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/clustering.Rmd b/vignettes/clustering.Rmd
index c03482f3..1c3bc66b 100644
--- a/vignettes/clustering.Rmd
+++ b/vignettes/clustering.Rmd
@@ -213,3 +213,12 @@ so_vdj %>%
     axis.ticks.x = element_blank()
   )
 ```
+
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/diversity.Rmd b/vignettes/diversity.Rmd
index f4d2bcca..4ca3112a 100644
--- a/vignettes/diversity.Rmd
+++ b/vignettes/diversity.Rmd
@@ -194,3 +194,12 @@ so %>%
     n_boots     = 0
   )
 ```
+
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/gene-usage.Rmd b/vignettes/gene-usage.Rmd
index 29d8cee9..895caf07 100644
--- a/vignettes/gene-usage.Rmd
+++ b/vignettes/gene-usage.Rmd
@@ -183,3 +183,12 @@ so %>%
     scale       = TRUE
   )
 ```
+
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/import_vdj.Rmd b/vignettes/import_vdj.Rmd
index eca5950b..6b05e26c 100644
--- a/vignettes/import_vdj.Rmd
+++ b/vignettes/import_vdj.Rmd
@@ -257,3 +257,12 @@ vdj_dirs <- c(
 # This will load V(D)J data and return a data.frame
 df_vdj <- import_vdj(vdj_dir = vdj_dirs)
 ```
+
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/mutate.Rmd b/vignettes/mutate.Rmd
index aeef4d0c..b16a68c7 100644
--- a/vignettes/mutate.Rmd
+++ b/vignettes/mutate.Rmd
@@ -150,3 +150,11 @@ slot(res, 'meta.data') %>%
   head()
 ```
 
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/plotting.Rmd b/vignettes/plotting.Rmd
index 94f37446..f21fcaf9 100644
--- a/vignettes/plotting.Rmd
+++ b/vignettes/plotting.Rmd
@@ -148,10 +148,11 @@ so %>%
   )
 ```
 
-
-
-
-
-
-
-
+<details style="margin-bottom:10px;">
+<summary>
+    Session info
+</summary>
+```{r}
+sessionInfo()
+```
+</details>

From d8d3041ac3975cd65df06baaab4f9590f63bc15e Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 16:02:35 -0600
Subject: [PATCH 4/9] explicity load data after LazyData: false

---
 DESCRIPTION                           | 3 ++-
 tests/testthat/test-calc-diversity.R  | 3 +++
 tests/testthat/test-calc-frequency.R  | 3 +++
 tests/testthat/test-calc-gene-usage.R | 3 +++
 tests/testthat/test-calc-similarity.R | 3 +++
 tests/testthat/test-cluster-seqs.R    | 2 ++
 tests/testthat/test-filter-vdj.R      | 3 +++
 tests/testthat/test-import-vdj.R      | 3 +++
 tests/testthat/test-utils.R           | 3 +++
 9 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c31d4ba2..3181f1ca 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -58,7 +58,7 @@ Suggests:
     RColorBrewer,
     rmarkdown,
     roxygen2,
-    testthat
+    testthat (>= 3.0.0)
 VignetteBuilder: 
     knitr
 Config/Needs/website:
@@ -68,3 +68,4 @@ biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.1
+Config/testthat/edition: 3
diff --git a/tests/testthat/test-calc-diversity.R b/tests/testthat/test-calc-diversity.R
index 33b84717..b2a83ccf 100644
--- a/tests/testthat/test-calc-diversity.R
+++ b/tests/testthat/test-calc-diversity.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Test data
 df_1 <- vdj_so@meta.data
 
diff --git a/tests/testthat/test-calc-frequency.R b/tests/testthat/test-calc-frequency.R
index 15c760b4..48d718aa 100644
--- a/tests/testthat/test-calc-frequency.R
+++ b/tests/testthat/test-calc-frequency.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Test data
 df_1 <- vdj_so@meta.data
 
diff --git a/tests/testthat/test-calc-gene-usage.R b/tests/testthat/test-calc-gene-usage.R
index 9dcc125d..566df5bf 100644
--- a/tests/testthat/test-calc-gene-usage.R
+++ b/tests/testthat/test-calc-gene-usage.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Test data
 df_1 <- vdj_so@meta.data
 
diff --git a/tests/testthat/test-calc-similarity.R b/tests/testthat/test-calc-similarity.R
index fccbb5e0..ecbff94e 100644
--- a/tests/testthat/test-calc-similarity.R
+++ b/tests/testthat/test-calc-similarity.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Test data
 df_1 <- vdj_so@meta.data
 
diff --git a/tests/testthat/test-cluster-seqs.R b/tests/testthat/test-cluster-seqs.R
index 2e4ea976..bc872a56 100644
--- a/tests/testthat/test-cluster-seqs.R
+++ b/tests/testthat/test-cluster-seqs.R
@@ -1,3 +1,5 @@
+data(vdj_so)
+data(vdj_sce)
 
 test_cols <- c(
   "#E69F00", "#56B4E9", "#009E73",
diff --git a/tests/testthat/test-filter-vdj.R b/tests/testthat/test-filter-vdj.R
index 3036e3af..3e476070 100644
--- a/tests/testthat/test-filter-vdj.R
+++ b/tests/testthat/test-filter-vdj.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Check NAs in result
 check_nas <- function(df_in) {
 
diff --git a/tests/testthat/test-import-vdj.R b/tests/testthat/test-import-vdj.R
index 9d97bb12..2f836f63 100644
--- a/tests/testthat/test-import-vdj.R
+++ b/tests/testthat/test-import-vdj.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Test data
 ctigs <- c(
   system.file("extdata/bcr_1/outs", package = "djvdj"),
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 951985e7..c8b48f89 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -1,3 +1,6 @@
+data(vdj_so)
+data(vdj_sce)
+
 # Test data
 df_1 <- vdj_so@meta.data
 

From 0cefeefb6ac79989b850486feae6288a151ce811 Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 16:06:12 -0600
Subject: [PATCH 5/9] move benchmarks to inst/bench

---
 .Rbuildignore           |  1 +
 inst/bench/large-data.R | 69 ++++++++++++++++++++++++++++++++++++
 tests/testthat.R        | 78 +++++------------------------------------
 3 files changed, 78 insertions(+), 70 deletions(-)
 create mode 100644 inst/bench/large-data.R

diff --git a/.Rbuildignore b/.Rbuildignore
index ef18855e..cb4457b1 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^vignettes/avid\.Rmd$
 splenocytes.zip
 splenocytes
+^inst/bench$
diff --git a/inst/bench/large-data.R b/inst/bench/large-data.R
new file mode 100644
index 00000000..c58919ea
--- /dev/null
+++ b/inst/bench/large-data.R
@@ -0,0 +1,69 @@
+library(dplyr)
+library(tidyr)
+library(purrr)
+library(tibble)
+library(stringr)
+library(SingleCellExperiment)
+library(djvdj)
+
+### TESTING LARGE DATA ###
+#
+# # generating test data
+# load("data/avid/so_avid.rda")
+#
+# test_vdj <- so_avid %>%
+#   import_vdj("data/avid/bcr/")
+#
+# test_vdj <- test_vdj@meta.data
+#
+# walk(1:5, ~ {
+#   test_vdj <<- bind_rows(test_vdj, test_vdj)
+# })
+#
+# test_vdj <- test_vdj %>%
+#   mutate(clonotype_id = str_c(clonotype_id, row_number(clonotype_id) %% 5))
+#
+# # testing functions
+# test_vdj %>%
+#   calc_abundance(cluster_col = "seurat_clusters")
+#
+# test_vdj %>%
+#   plot_abundance(
+#     cluster_col = "seurat_clusters",
+#     type = "line",
+#     n_clonotypes = 2
+#   )
+#
+# test_vdj %>%
+#   plot_similarity(
+#     cluster_col = "seurat_clusters"
+#   )
+#
+# test_vdj %>%
+#   plot_diversity(
+#     cluster_col = "seurat_clusters",
+#   )
+#
+# clmns <- c(
+#   "v_gene", "d_gene", "chains",
+#   "umis", "reads", "cdr3_length",
+#   "cdr3_nt_length", "productive",
+#   "full_length"
+# )
+#
+# tictoc::tic()
+# x <- fetch_vdj(test_vdj, clmns)
+# tictoc::toc()
+#
+# tictoc::tic()
+# y <- summarize_vdj(test_vdj, clmns)
+# tictoc::toc()
+#
+# tictoc::tic()
+# x <- fetch_vdj(test_vdj, clonotype_col = NULL)
+# tictoc::toc()
+#
+# tictoc::tic()
+# y <- fetch_vdj(test_vdj, clonotype_col = "clonotype_id")
+# tictoc::toc()
+
diff --git a/tests/testthat.R b/tests/testthat.R
index 39954cb3..cbdcfffc 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,74 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/tests.html
+# * https://testthat.r-lib.org/reference/test_package.html#special-files
+
 library(testthat)
-library(dplyr)
-library(tidyr)
-library(purrr)
-library(tibble)
-library(stringr)
-library(SingleCellExperiment)
 library(djvdj)
 
 test_check("djvdj")
-
-
-
-### TESTING LARGE DATA ###
-#
-# # generating test data
-# load("data/avid/so_avid.rda")
-#
-# test_vdj <- so_avid %>%
-#   import_vdj("data/avid/bcr/")
-#
-# test_vdj <- test_vdj@meta.data
-#
-# walk(1:5, ~ {
-#   test_vdj <<- bind_rows(test_vdj, test_vdj)
-# })
-#
-# test_vdj <- test_vdj %>%
-#   mutate(clonotype_id = str_c(clonotype_id, row_number(clonotype_id) %% 5))
-#
-# # testing functions
-# test_vdj %>%
-#   calc_abundance(cluster_col = "seurat_clusters")
-#
-# test_vdj %>%
-#   plot_abundance(
-#     cluster_col = "seurat_clusters",
-#     type = "line",
-#     n_clonotypes = 2
-#   )
-#
-# test_vdj %>%
-#   plot_similarity(
-#     cluster_col = "seurat_clusters"
-#   )
-#
-# test_vdj %>%
-#   plot_diversity(
-#     cluster_col = "seurat_clusters",
-#   )
-#
-# clmns <- c(
-#   "v_gene", "d_gene", "chains",
-#   "umis", "reads", "cdr3_length",
-#   "cdr3_nt_length", "productive",
-#   "full_length"
-# )
-#
-# tictoc::tic()
-# x <- fetch_vdj(test_vdj, clmns)
-# tictoc::toc()
-#
-# tictoc::tic()
-# y <- summarize_vdj(test_vdj, clmns)
-# tictoc::toc()
-#
-# tictoc::tic()
-# x <- fetch_vdj(test_vdj, clonotype_col = NULL)
-# tictoc::toc()
-#
-# tictoc::tic()
-# y <- fetch_vdj(test_vdj, clonotype_col = "clonotype_id")
-# tictoc::toc()
-

From 7d8bac2ee1d3bca9be665c2efdaa7d58b74f2fe0 Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 16:14:57 -0600
Subject: [PATCH 6/9] add return values

---
 R/calc-similarity.R    | 2 ++
 R/cluster-seqs.R       | 1 +
 man/plot_mds.Rd        | 3 +++
 man/plot_motifs.Rd     | 3 +++
 man/plot_similarity.Rd | 3 +++
 5 files changed, 12 insertions(+)

diff --git a/R/calc-similarity.R b/R/calc-similarity.R
index 8303695e..6aa2b4b3 100644
--- a/R/calc-similarity.R
+++ b/R/calc-similarity.R
@@ -241,6 +241,7 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar
 #' circos plot
 #' @importFrom abdiv jaccard
 #' @seealso [calc_similarity()], [calc_mds()], [plot_mds()]
+#' @return ggplot object
 #'
 #' @examples
 #' # Plot repertoire overlap
@@ -492,6 +493,7 @@ calc_mds <- function(input, data_col, cluster_col, method = abdiv::jaccard,
 #' @param sep Separator used for storing per-chain V(D)J data for each cell
 #' @param ... Additional arguments to pass to [ggplot2::geom_point()]
 #' @seealso [calc_mds()], [calc_similarity()], [plot_similarity()], [MASS::isoMDS()]
+#' @return ggplot object
 #' @export
 plot_mds <- function(input, data_col, cluster_col,
                      method = abdiv::jaccard, chain = NULL,
diff --git a/R/cluster-seqs.R b/R/cluster-seqs.R
index 5800c6de..5040c0ad 100644
--- a/R/cluster-seqs.R
+++ b/R/cluster-seqs.R
@@ -188,6 +188,7 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL,
 #' @param ... Additional parameters to pass to [ggseqlogo::geom_logo()]
 #' @importFrom stringr str_trunc
 #' @seealso [cluster_sequences()]
+#' @return ggplot object
 #' @export
 plot_motifs <- function(input, data_col = "cdr3", cluster_col = NULL,
                         chain, plot_colors = NULL,
diff --git a/man/plot_mds.Rd b/man/plot_mds.Rd
index 5cb0fc58..f5638b10 100644
--- a/man/plot_mds.Rd
+++ b/man/plot_mds.Rd
@@ -51,6 +51,9 @@ all chains.}
 
 \item{...}{Additional arguments to pass to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}}
 }
+\value{
+ggplot object
+}
 \description{
 Perform multidimensional scaling and plot results
 }
diff --git a/man/plot_motifs.Rd b/man/plot_motifs.Rd
index bb2cbd36..3e3f1c2f 100644
--- a/man/plot_motifs.Rd
+++ b/man/plot_motifs.Rd
@@ -52,6 +52,9 @@ align sequences at the 5' or 3' end when plotting.}
 
 \item{...}{Additional parameters to pass to \code{\link[ggseqlogo:geom_logo]{ggseqlogo::geom_logo()}}}
 }
+\value{
+ggplot object
+}
 \description{
 Create sequence logos for clusters
 }
diff --git a/man/plot_similarity.Rd b/man/plot_similarity.Rd
index 01772572..c42bbd6a 100644
--- a/man/plot_similarity.Rd
+++ b/man/plot_similarity.Rd
@@ -68,6 +68,9 @@ be shown.}
 \code{\link[ComplexHeatmap:Heatmap]{ComplexHeatmap::Heatmap()}} for heatmap, \code{\link[circlize:chordDiagram]{circlize::chordDiagram()}} for
 circos plot}
 }
+\value{
+ggplot object
+}
 \description{
 Plot cluster similarity
 }

From 86815d60f1b35fb87e316e8e6f7deb03d1e187f4 Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 16:15:28 -0600
Subject: [PATCH 7/9] skip as bioc requires r >= 4.2.0

---
 .github/workflows/check-standard.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml
index a3ac6182..b39f0e90 100644
--- a/.github/workflows/check-standard.yaml
+++ b/.github/workflows/check-standard.yaml
@@ -22,7 +22,7 @@ jobs:
           - {os: windows-latest, r: 'release'}
           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-latest,   r: 'release'}
-          - {os: ubuntu-latest,   r: 'oldrel-1'}
+          # - {os: ubuntu-latest,   r: 'oldrel-1'}
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

From ea3730a0ed43abc070b7dbe9c948789708caf927 Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Mon, 31 Oct 2022 17:13:26 -0600
Subject: [PATCH 8/9] remove `.data$` to eliminate tidyselect warnings

---
 R/calc-abundance.R               |   8 +-
 R/calc-diversity.R               |  28 ++---
 R/calc-gene-usage.R              |  10 +-
 R/calc-similarity.R              |  12 +--
 R/cluster-seqs.R                 |   6 +-
 R/filter-vdj.R                   |   2 +-
 R/import-vdj.R                   | 176 +++++++++++++++----------------
 R/utils-plots.R                  |  12 +--
 tests/testthat/test-import-vdj.R |   1 +
 9 files changed, 128 insertions(+), 127 deletions(-)

diff --git a/R/calc-abundance.R b/R/calc-abundance.R
index 366cabff..9e9eff32 100644
--- a/R/calc-abundance.R
+++ b/R/calc-abundance.R
@@ -130,7 +130,7 @@ calc_frequency <- function(input, data_col, cluster_col = NULL, prefix = paste0
   res <- dplyr::mutate(
     res,
     .freq = dplyr::n_distinct(!!sym(cell_col)),
-    .pct  = (.data$.freq / .data$.n_cells) * 100
+    .pct  = (.freq / .n_cells) * 100
   )
 
   # Identify shared labels
@@ -188,7 +188,7 @@ calc_frequency <- function(input, data_col, cluster_col = NULL, prefix = paste0
   )
 
   # Format labels
-  labs <- dplyr::group_by(labs, .data$grp)
+  labs <- dplyr::group_by(labs, grp)
   labs <- dplyr::mutate(
     labs,
     lab = paste0(unique(range(x)), collapse = "-")
@@ -442,7 +442,7 @@ plot_clonal_abundance <- function(input, cluster_col = NULL,
   if (n_clones > 0) {
     res <- res +
       ggrepel::geom_text_repel(
-        ggplot2::aes(label = .data$.lab),
+        ggplot2::aes(label = .lab),
         data          = top_clones,
         nudge_x       = 500,
         direction     = "y",
@@ -524,7 +524,7 @@ plot_frequency <- function(input, data_col, cluster_col = NULL,
   plt_dat <- dplyr::group_by(plt_dat, !!sym(data_col))
 
   rnk <- dplyr::summarize(plt_dat, mn = mean(!!sym(abun_col)))
-  rnk <- dplyr::arrange(rnk, desc(.data$mn))
+  rnk <- dplyr::arrange(rnk, desc(mn))
   rnk <- pull(rnk, data_col)
 
   plt_dat <- dplyr::ungroup(plt_dat)
diff --git a/R/calc-diversity.R b/R/calc-diversity.R
index a42c1d04..f15b4631 100644
--- a/R/calc-diversity.R
+++ b/R/calc-diversity.R
@@ -186,18 +186,18 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
       sam,
       met       = .y,
       diversity = list(.calc_div(!!sym(data_col), met = .x)),
-      stderr    = purrr::map_dbl(.data$diversity, pull, "std.error"),
-      diversity = purrr::map_dbl(.data$diversity, pull, "statistic")
+      stderr    = purrr::map_dbl(diversity, pull, "std.error"),
+      diversity = purrr::map_dbl(diversity, pull, "statistic")
     )
   })
 
   div_cols <- "diversity"
 
   if (n_boots > 1) div_cols <- c(div_cols, "stderr")
-  else             div <- dplyr::select(div, -.data$stderr)
+  else             div <- dplyr::select(div, -stderr)
 
   div <- tidyr::pivot_longer(div, all_of(div_cols))
-  div <- tidyr::unite(div, "name", .data$met, .data$name)
+  div <- tidyr::unite(div, "name", met, name)
   div <- tidyr::pivot_wider(div)
 
   # Format results
@@ -372,12 +372,12 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
 
   re <- "^(.+)_(diversity|stderr)$"
 
-  plt_dat <- tidyr::extract(plt_dat, .data$name, into = c("met", "type"), re)
+  plt_dat <- tidyr::extract(plt_dat, name, into = c("met", "type"), re)
 
   plt_dat <- tidyr::pivot_wider(
     plt_dat,
-    names_from  = .data$type,
-    values_from = .data$value
+    names_from  = type,
+    values_from = value
   )
 
   # Set plot levels
@@ -439,8 +439,8 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
       ggplot2::geom_linerange(
         aes(
           !!sym(cluster_col),
-          ymin = .data$diversity - .data$stderr,
-          ymax = .data$diversity + .data$stderr
+          ymin = diversity - stderr,
+          ymax = diversity + stderr
         )
       )
   }
@@ -560,13 +560,13 @@ plot_rarefaction <- function(input, data_col, cluster_col = NULL,
 
   plt_dat <- dplyr::mutate(
     plt_dat,
-    method = dplyr::recode(.data$Method, "Observed" = "Rarefaction"),
+    method = dplyr::recode(Method, "Observed" = "Rarefaction"),
     method = stringr::str_to_lower(method),
-    Order.q = met_labs[as.character(.data$Order.q)]
+    Order.q = met_labs[as.character(Order.q)]
   )
 
   if (!is.null(cluster_col)) {
-    plt_dat <- dplyr::rename(plt_dat, !!sym(cluster_col) := .data$Assemblage)
+    plt_dat <- dplyr::rename(plt_dat, !!sym(cluster_col) := Assemblage)
   }
 
   plt_dat <- .set_lvls(plt_dat, cluster_col, plot_lvls)
@@ -576,13 +576,13 @@ plot_rarefaction <- function(input, data_col, cluster_col = NULL,
   # Plot standard error
   res <- ggplot2::ggplot(
     plt_dat,
-    ggplot2::aes(.data$m, .data$qD, linetype = method)
+    ggplot2::aes(m, qD, linetype = method)
   ) +
     ggplot2::guides(linetype = ggplot2::guide_legend(title = NULL))
 
   if (n_boots > 1) {
     gg_aes <- ggplot2::aes(
-      x = .data$m, ymin = .data$qD.LCL, ymax = .data$qD.UCL
+      x = m, ymin = qD.LCL, ymax = qD.UCL
     )
 
     if (!is.null(cluster_col))   gg_aes$fill <- sym(cluster_col)
diff --git a/R/calc-gene-usage.R b/R/calc-gene-usage.R
index 52eed287..5ce8b3ba 100644
--- a/R/calc-gene-usage.R
+++ b/R/calc-gene-usage.R
@@ -127,7 +127,7 @@ calc_gene_usage <- function(input, data_cols, cluster_col = NULL, chain = NULL,
     res <- tidyr::pivot_wider(
       res,
       names_from  = all_of(clst_nm),
-      values_from = .data$freq,
+      values_from = freq,
       values_fill = 0
     )
 
@@ -154,8 +154,8 @@ calc_gene_usage <- function(input, data_cols, cluster_col = NULL, chain = NULL,
     )
   }
 
-  res <- dplyr::mutate(res, pct = (.data$freq / .data$n_cells) * 100)
-  res <- dplyr::arrange(res, desc(.data$pct))
+  res <- dplyr::mutate(res, pct = (freq / n_cells) * 100)
+  res <- dplyr::arrange(res, desc(pct))
 
   res
 }
@@ -617,12 +617,12 @@ plot_gene_usage <- function(input, data_cols, cluster_col = NULL,
   res <- dplyr::mutate(res, rnk = row_number(desc(!!sym(dat_col))))
 
   if (is.null(clst_col)) {
-    res <- dplyr::filter(res, !!gn_2 %in% top_2 | .data$rnk == 1)
+    res <- dplyr::filter(res, !!gn_2 %in% top_2 | rnk == 1)
 
   } else {
     res <- dplyr::filter(
       res,
-      !!gn_2 %in% top_2[[dplyr::cur_group()[[clst_col]]]] | .data$rnk == 1
+      !!gn_2 %in% top_2[[dplyr::cur_group()[[clst_col]]]] | rnk == 1
     )
   }
 
diff --git a/R/calc-similarity.R b/R/calc-similarity.R
index 6aa2b4b3..16d522aa 100644
--- a/R/calc-similarity.R
+++ b/R/calc-similarity.R
@@ -128,7 +128,7 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar
   vdj <- tidyr::pivot_wider(
     vdj,
     names_from  = all_of(cluster_col),
-    values_from = .data$n,
+    values_from = n,
     values_fill = 0
   )
 
@@ -158,20 +158,20 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar
   })
 
   # Combine with inverse combinations
-  res_i <- dplyr::rename(res, Var1 = .data$Var2, Var2 = .data$Var1)
+  res_i <- dplyr::rename(res, Var1 = Var2, Var2 = Var1)
   res   <- dplyr::bind_rows(res, res_i, res_s)
 
   # Format data.frame
   clmns <- sort(unique(res$Var2))
-  res   <- dplyr::arrange(res, .data$Var2)
+  res   <- dplyr::arrange(res, Var2)
 
   res <- tidyr::pivot_wider(
     res,
-    names_from  = .data$Var1,
-    values_from = .data$sim
+    names_from  = Var1,
+    values_from = sim
   )
 
-  res <- dplyr::select(res, !!sym(cluster_col) := .data$Var2, all_of(clmns))
+  res <- dplyr::select(res, !!sym(cluster_col) := Var2, all_of(clmns))
 
   # Return matrix
   if (return_mat) {
diff --git a/R/cluster-seqs.R b/R/cluster-seqs.R
index 5040c0ad..419f8905 100644
--- a/R/cluster-seqs.R
+++ b/R/cluster-seqs.R
@@ -97,7 +97,7 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL,
 
   make_adj_df <- function(mat) {
     res <- tibble::as_tibble(mat, rownames = "Var1")
-    res <- tidyr::pivot_longer(res, -.data$Var1, values_to = "Var2")
+    res <- tidyr::pivot_longer(res, -Var1, values_to = "Var2")
     res
   }
 
@@ -105,8 +105,8 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL,
 
   adj_df <- dplyr::mutate(
     adj_df,
-    Var1 = seqs[as.integer(.data$Var1)],
-    Var2 = seqs[.data$Var2]
+    Var1 = seqs[as.integer(Var1)],
+    Var2 = seqs[Var2]
   )
 
   # Create adjacency graph
diff --git a/R/filter-vdj.R b/R/filter-vdj.R
index c761bf76..ab937af0 100644
--- a/R/filter-vdj.R
+++ b/R/filter-vdj.R
@@ -112,7 +112,7 @@ filter_vdj <- function(input, filt, data_cols = NULL,
   }
 
   keep_rows <- vdj$.KEEP
-  vdj       <- dplyr::select(vdj, -.data$.KEEP)
+  vdj       <- dplyr::select(vdj, -.KEEP)
 
   # If vectors in keep_rows are all length 1, filter cells
   if (length_one) {
diff --git a/R/import-vdj.R b/R/import-vdj.R
index f7fd745e..e7651890 100644
--- a/R/import-vdj.R
+++ b/R/import-vdj.R
@@ -193,7 +193,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
     bcs <- .get_meta(input)[[CELL_COL]]
 
     prfx_df <- .extract_cell_prefix(bcs, strip_bcs = FALSE)
-    prfx_df <- dplyr::distinct(prfx_df, .data$prfx, .data$sfx)
+    prfx_df <- dplyr::distinct(prfx_df, prfx, sfx)
 
     prfxs <- prfx_df$prfx
     sfxs  <- prfx_df$sfx
@@ -274,8 +274,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
     contigs <- purrr::map(
       contigs,
       mutate,
-      contig_sfx = unlist(.str_extract_all(.data$contig_id, "_contig_[0-9]+$")),
-      contig_id  = paste0(.data$barcode, .data$contig_sfx),
+      contig_sfx = unlist(.str_extract_all(contig_id, "_contig_[0-9]+$")),
+      contig_id  = paste0(barcode, contig_sfx),
       contig_sfx = NULL
     )
 
@@ -368,7 +368,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
       "these contigs will be removed."
     )
 
-    contigs <- dplyr::filter(contigs, !is.na(.data$clonotype_id))
+    contigs <- dplyr::filter(contigs, !is.na(clonotype_id))
   }
 
   # Select V(D)J columns to keep
@@ -402,7 +402,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   vdj_cols  <- c(vdj_cols, "paired")
 
   if (filter_paired) {
-    contigs <- dplyr::filter(contigs, .data$paired)
+    contigs <- dplyr::filter(contigs, paired)
   }
 
   # Order chains and CDR3 sequences
@@ -411,7 +411,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   # the clonotype ID
   contigs <- dplyr::arrange(
     contigs,
-    .data$barcode, .data$chains, .data$cdr3_nt
+    barcode, chains, cdr3_nt
   )
 
   # Extract isotypes from c_gene for IGH chain (for BCR data only)
@@ -438,13 +438,13 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   )
 
   # Reorder columns
-  meta <- dplyr::relocate(meta, .data$paired, .after = "full_length")
+  meta <- dplyr::relocate(meta, paired, .after = "full_length")
   meta <- dplyr::relocate(meta, all_of(len_cols), .after = last(cdr3_cols))
-  meta <- dplyr::relocate(meta, .data$n_chains, .after = "chains")
+  meta <- dplyr::relocate(meta, n_chains, .after = "chains")
   meta <- dplyr::relocate(meta, all_of(gene_cols), .after = last(len_cols))
 
   if (vdj_class %in% c("BCR", "Multi")) {
-    meta <- dplyr::relocate(meta, .data$isotype, .after = "c_gene")
+    meta <- dplyr::relocate(meta, isotype, .after = "c_gene")
   }
 
   # Check for duplicated cell barcodes
@@ -487,7 +487,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   # Filter to only include cells with valid clonotype_id
   # cells with missing clonotype have a clonotype_id of 'None'
-  res <- dplyr::filter(res, .data$clonotype_id != "None")
+  res <- dplyr::filter(res, clonotype_id != "None")
 
   if (nrow(res) == 0) {
     warning("No valid clonotypes present, check input data.")
@@ -539,8 +539,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
     d <- dplyr::rename(
       d,
-      chains       = .data$chain,
-      clonotype_id = .data$raw_clonotype_id
+      chains       = chain,
+      clonotype_id = raw_clonotype_id
     )
 
     d
@@ -595,15 +595,15 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   )
 
   # Filter for contigs in cells
-  res <- dplyr::filter(res, .data$is_cell)
+  res <- dplyr::filter(res, is_cell)
 
   # Replace 'None' with FALSE for QC columns
   res <- .replace_none(res, chk_none)
 
   res <- dplyr::rename(
     res,
-    chains       = .data$chain,
-    clonotype_id = .data$raw_clonotype_id
+    chains       = chain,
+    clonotype_id = raw_clonotype_id
   )
 
   # Format cell barcode prefixes
@@ -633,7 +633,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   prfx_df <- .extract_cell_prefix(bcs, strip_bcs = TRUE)
 
   # Match old and new prefixes
-  new <- dplyr::distinct(prfx_df, .data$prfx, .data$sfx)
+  new <- dplyr::distinct(prfx_df, prfx, sfx)
 
   if (nrow(new) != length(cell_prfxs)) {
     stop(
@@ -650,9 +650,9 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   # Format cell barcodes
   prfx_df <- dplyr::mutate(
     prfx_df,
-    prfx = ifelse(is.na(.data$new_prfx), .data$prfx, .data$new_prfx),
-    sfx  = ifelse(is.na(.data$new_sfx), .data$sfx, .data$new_sfx),
-    bc   = paste0(.data$prfx, .data$bc, .data$sfx)
+    prfx = ifelse(is.na(new_prfx), prfx, new_prfx),
+    sfx  = ifelse(is.na(new_sfx), sfx, new_sfx),
+    bc   = paste0(prfx, bc, sfx)
   )
 
   df_in[[bc_col]] <- prfx_df$bc
@@ -678,8 +678,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   if (strip_bcs) {
     res <- dplyr::mutate(
       res,
-      bc = stringr::str_remove(.data$bc, paste0("^", .data$prfx)),
-      bc = stringr::str_remove(.data$bc, paste0(.data$sfx, "$"))
+      bc = stringr::str_remove(bc, paste0("^", prfx)),
+      bc = stringr::str_remove(bc, paste0(sfx, "$"))
     )
   }
 
@@ -786,7 +786,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   res <- purrr::map(
     res,
     mutate,
-    barcode = unlist(.str_extract_all(.data$contig_id, id_re))
+    barcode = unlist(.str_extract_all(contig_id, id_re))
   )
 
   # Format cell barcode prefixes
@@ -801,8 +801,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   res <- purrr::map(
     res,
     mutate,
-    contig_sfx = unlist(.str_extract_all(.data$contig_id, "_contig_[0-9]+$")),
-    contig_id  = paste0(.data$barcode, .data$contig_sfx),
+    contig_sfx = unlist(.str_extract_all(contig_id, "_contig_[0-9]+$")),
+    contig_id  = paste0(barcode, contig_sfx),
     contig_sfx = NULL,
     barcode    = NULL
   )
@@ -824,30 +824,30 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   bam_info <- dplyr::filter(
     bam_info,
-    grepl("_contig_[0-9]+$", .data$contig_id)
+    grepl("_contig_[0-9]+$", contig_id)
   )
 
   # Get 0-based coordinates for mutations
   # set width of deletion coordinates as 0
   res <- dplyr::mutate(
     bam_info,
-    n    = .str_extract_all(.data$cigar, "[0-9]+(?=[^0-9])"),
-    type = .str_extract_all(.data$cigar, "(?<=[0-9])[^0-9]{1}")
+    n    = .str_extract_all(cigar, "[0-9]+(?=[^0-9])"),
+    type = .str_extract_all(cigar, "(?<=[0-9])[^0-9]{1}")
   )
 
-  res <- tidyr::unnest(res, c(.data$n, .data$type))
-  res <- dplyr::group_by(res, .data$contig_id)
+  res <- tidyr::unnest(res, c(n, type))
+  res <- dplyr::group_by(res, contig_id)
 
   res <- dplyr::mutate(
     res,
-    n     = as.numeric(.data$n),
-    idx   = ifelse(.data$type != "D", .data$n, 0),
-    end   = cumsum(.data$idx),
-    start = lag(.data$end, default = 0)
+    n     = as.numeric(n),
+    idx   = ifelse(type != "D", n, 0),
+    end   = cumsum(idx),
+    start = lag(end, default = 0)
   )
 
   res <- dplyr::ungroup(res)
-  res <- dplyr::filter(res, .data$type != "=")
+  res <- dplyr::filter(res, type != "=")
 
   res <- dplyr::select(
     res,
@@ -871,7 +871,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   res <- dplyr::select(
     airr,
-    contig_id = .data$sequence_id,
+    contig_id = sequence_id,
     dplyr::matches(coord_cols_re, perl = TRUE)
   )
 
@@ -879,15 +879,15 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
     stop("V(D)J coordinates not found, check ", airr_file)
   }
 
-  res <- tidyr::pivot_longer(res, -.data$contig_id)
-  res <- dplyr::filter(res, !is.na(.data$value))
-  res <- tidyr::extract(res, .data$name, c("seg", "pos"), coord_cols_re)
-  res <- tidyr::pivot_wider(res, names_from = .data$pos)
+  res <- tidyr::pivot_longer(res, -contig_id)
+  res <- dplyr::filter(res, !is.na(value))
+  res <- tidyr::extract(res, name, c("seg", "pos"), coord_cols_re)
+  res <- tidyr::pivot_wider(res, names_from = pos)
 
   res <- dplyr::mutate(
     res,
-    start = .data$start - 1,
-    len   = .data$end - .data$start
+    start = start - 1,
+    len   = end - start
   )
 
   res <- dplyr::select(
@@ -908,15 +908,15 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   mut_coords <- dplyr::mutate(
     mut_coords,
-    type = dplyr::recode(.data$type, !!!mut_key)
+    type = dplyr::recode(type, !!!mut_key)
   )
 
   # If no vdj_coords, return mutation totals
   if (identical(vdj_coords, NA)) {
     res <- all_muts %>%
       tidyr::pivot_wider(
-        names_from  = .data$type,
-        values_from = .data$n,
+        names_from  = type,
+        values_from = n,
         values_fill = 0
       )
 
@@ -924,7 +924,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
       res,
       across(
         starts_with("all_"),
-        ~ .x / .data$len,
+        ~ .x / len,
         .names = "{.col}_freq"
       )
     )
@@ -944,73 +944,73 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   vdj_muts <- dplyr::filter(
     vdj_muts,
-    .data$start < .data$end.seg &
-    .data$end   > .data$start.seg
+    start < end.seg &
+    end   > start.seg
   )
 
   vdj_muts <- dplyr::mutate(
     vdj_muts,
-    len = .data$len.seg,
+    len = len.seg,
 
     new_start = ifelse(
-      .data$start >= .data$start.seg, .data$start, .data$start.seg
+      start >= start.seg, start, start.seg
     ),
 
     new_end = ifelse(
-      .data$end <= .data$end.seg, .data$end, .data$end.seg
+      end <= end.seg, end, end.seg
     ),
 
     new_end = ifelse(
-      .data$type == mut_key[["D"]], .data$new_end + 1, .data$new_end
+      type == mut_key[["D"]], new_end + 1, new_end
     ),
 
     n = ifelse(
-      .data$type != mut_key[["D"]], .data$new_end - .data$new_start, .data$n
+      type != mut_key[["D"]], new_end - new_start, n
     )
   )
 
   # Identify junction indels
-  jxn_muts <- filter(vdj_muts, .data$type %in% unname(mut_key[c("I", "D")]))
+  jxn_muts <- filter(vdj_muts, type %in% unname(mut_key[c("I", "D")]))
 
   jxn_muts <- mutate(
     jxn_muts,
     seg = case_when(
-      .data$seg == "v" & .data$end.seg   == .data$new_end   ~ "vd",
-      .data$seg == "d" & .data$start.seg == .data$new_start ~ "vd",
-      .data$seg == "d" & .data$end.seg   == .data$new_end   ~ "dj",
-      .data$seg == "j" & .data$start.seg == .data$new_start ~ "dj",
+      seg == "v" & end.seg   == new_end   ~ "vd",
+      seg == "d" & start.seg == new_start ~ "vd",
+      seg == "d" & end.seg   == new_end   ~ "dj",
+      seg == "j" & start.seg == new_start ~ "dj",
       TRUE ~ as.character(NA)
     )
   )
 
-  jxn_muts <- dplyr::filter(jxn_muts, !is.na(.data$seg))
-  jxn_muts <- dplyr::select(jxn_muts, -.data$len)
+  jxn_muts <- dplyr::filter(jxn_muts, !is.na(seg))
+  jxn_muts <- dplyr::select(jxn_muts, -len)
 
   vdj_muts <- bind_rows(vdj_muts, jxn_muts)
 
   # Summarize mutation counts
   vdj_muts <- dplyr::group_by(
     vdj_muts,
-    .data$contig_id, .data$len, .data$type, .data$seg
+    contig_id, len, type, seg
   )
 
-  vdj_muts <- dplyr::summarize(vdj_muts, n = sum(.data$n), .groups = "drop")
+  vdj_muts <- dplyr::summarize(vdj_muts, n = sum(n), .groups = "drop")
 
   # Summarize total mutations and total length per contig
   # for each mutation type, sum total for v, d, j, and c segments, exclude jxns
-  all_muts <- dplyr::filter(vdj_muts, !.data$seg %in% c("vd", "dj"))
-  all_muts <- dplyr::group_by(all_muts, .data$contig_id, .data$type)
+  all_muts <- dplyr::filter(vdj_muts, !seg %in% c("vd", "dj"))
+  all_muts <- dplyr::group_by(all_muts, contig_id, type)
 
   all_muts <- dplyr::summarize(
     all_muts,
-    n       = sum(.data$n),
-    len     = sum(.data$len),
+    n       = sum(n),
+    len     = sum(len),
     seg     = "all",
     .groups = "drop"
   )
 
   vdj_muts <- dplyr::bind_rows(vdj_muts, all_muts)
-  res      <- tidyr::unite(vdj_muts, "type", .data$seg, .data$type, sep = "_")
+  res      <- tidyr::unite(vdj_muts, "type", seg, type, sep = "_")
 
   # Set final output columns
   freq_cols <- mut_cols <- c("v", "d", "j", "c", "all")
@@ -1026,22 +1026,22 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   freq_cols <- purrr::map_chr(freq_cols, paste0, "_", mut_key[["X"]])
 
   # Calculate mismatch frequency
-  freq <- dplyr::filter(res, .data$type %in% freq_cols)
+  freq <- dplyr::filter(res, type %in% freq_cols)
 
   freq <- dplyr::mutate(
     freq,
-    n    = round(.data$n / .data$len, 6),
-    type = paste0(.data$type, "_freq"),
+    n    = round(n / len, 6),
+    type = paste0(type, "_freq"),
     len  = NULL
   )
 
   res <- dplyr::bind_rows(res, freq)
-  res <- dplyr::select(res, -.data$len)
+  res <- dplyr::select(res, -len)
 
   res <- tidyr::pivot_wider(
     res,
-    names_from  = .data$type,
-    values_from = .data$n,
+    names_from  = type,
+    values_from = n,
     values_fill = 0
   )
 
@@ -1219,12 +1219,12 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 #' @noRd
 .identify_paired <- function(df_in) {
 
-  res <- dplyr::group_by(df_in, .data$barcode)
+  res <- dplyr::group_by(df_in, barcode)
 
   res <- dplyr::mutate(
     res,
-    paired = (all(c("TRA", "TRB") %in% .data$chains)) |
-      ("IGH" %in% .data$chains & any(c("IGL", "IGK") %in% .data$chains))
+    paired = (all(c("TRA", "TRB") %in% chains)) |
+      ("IGH" %in% chains & any(c("IGL", "IGK") %in% chains))
   )
 
   res <- dplyr::ungroup(res)
@@ -1258,7 +1258,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   iso_df[iso_col] <- isos
 
-  iso_df <- dplyr::distinct(iso_df, .data$barcode, .data$c_gene)
+  iso_df <- dplyr::distinct(iso_df, barcode, c_gene)
   iso_df <- stats::na.omit(iso_df)
 
   dups <- iso_df$barcode
@@ -1267,7 +1267,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
   # Add isotypes to meta.data
   iso_df <- mutate(
     iso_df,
-    isotype = ifelse(.data$barcode %in% dups, "Multi", !!sym(iso_col))
+    isotype = ifelse(barcode %in% dups, "Multi", !!sym(iso_col))
   )
 
   isos <- purrr::set_names(
@@ -1277,8 +1277,8 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains
 
   res <- mutate(
     df_in,
-    isotype = unname(isos[.data$barcode]),
-    isotype = tidyr::replace_na(.data$isotype, "None")
+    isotype = unname(isos[barcode]),
+    isotype = tidyr::replace_na(isotype, "None")
   )
 
   res
@@ -1378,7 +1378,7 @@ define_clonotypes <- function(input, data_cols, clonotype_col = "clonotype_id",
 
       dplyr::across(
         dplyr::all_of(data_cols),
-        ~ paste0(.x[.data$.clone_idx], collapse = ""),
+        ~ paste0(.x[.clone_idx], collapse = ""),
         .names = ".clone_{.col}"
       ),
 
@@ -1395,12 +1395,12 @@ define_clonotypes <- function(input, data_cols, clonotype_col = "clonotype_id",
   vdj <- dplyr::mutate(
     vdj,
     .new_clone = paste(!!!syms(data_cols), sep = ""),
-    .new_id    = rank(.data$.new_clone, ties.method = "min"),
+    .new_id    = rank(.new_clone, ties.method = "min"),
 
     !!sym(clonotype_col) := ifelse(
-      .data$.new_clone == "",
+      .new_clone == "",
       "None",
-      paste0("clonotype", .data$.new_id)
+      paste0("clonotype", .new_id)
     )
   )
 
@@ -1440,17 +1440,17 @@ define_clonotypes <- function(input, data_cols, clonotype_col = "clonotype_id",
 #       contig_id = bam_lst[[1]]$qname
 #     )
 #
-#     res <- dplyr::filter(res, grepl("_contig_[0-9]+$", .data$contig_id))
+#     res <- dplyr::filter(res, grepl("_contig_[0-9]+$", contig_id))
 #
 #     # Add indel columns
 #     res <- dplyr::mutate(
 #       res,
-#       n_insertion = .extract_pat(.data$cigar, "[0-9]+(?=I)"),
-#       n_deletion  = .extract_pat(.data$cigar, "[0-9]+(?=D)"),
-#       n_mismatch  = .extract_pat(.data$cigar, "[0-9]+(?=X)"),
+#       n_insertion = .extract_pat(cigar, "[0-9]+(?=I)"),
+#       n_deletion  = .extract_pat(cigar, "[0-9]+(?=D)"),
+#       n_mismatch  = .extract_pat(cigar, "[0-9]+(?=X)"),
 #     )
 #
-#     res <- dplyr::select(res, -.data$cigar)
+#     res <- dplyr::select(res, -cigar)
 #
 #     res
 #   }
diff --git a/R/utils-plots.R b/R/utils-plots.R
index b7e44dd0..e3f5702c 100644
--- a/R/utils-plots.R
+++ b/R/utils-plots.R
@@ -632,7 +632,7 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
 
   # Only plot percent for histogram
   if (identical(units, "percent") && identical(method, "histogram")) {
-    plt_aes <- ggplot2::aes(y = .data$..count.. / sum(.data$..count..) * 100)
+    plt_aes <- ggplot2::aes(y = ..count.. / sum(..count..) * 100)
   }
 
   plt_aes$x <- sym(x)
@@ -728,8 +728,8 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
 
     res <- dplyr::mutate(
       res,
-      !!ft := ifelse(.data$pct > mn, !!ft, NA),
-      !!ft := ifelse(.data$pct <= mn, min(!!ft, na.rm = TRUE), !!ft)
+      !!ft := ifelse(pct > mn, !!ft, NA),
+      !!ft := ifelse(pct <= mn, min(!!ft, na.rm = TRUE), !!ft)
     )
   }
 
@@ -738,12 +738,12 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
 
     res <- dplyr::mutate(
       res,
-      !!ft := ifelse(.data$pct < mx, !!ft, NA),
-      !!ft := ifelse(.data$pct >= mx, max(!!ft, na.rm = TRUE), !!ft)
+      !!ft := ifelse(pct < mx, !!ft, NA),
+      !!ft := ifelse(pct >= mx, max(!!ft, na.rm = TRUE), !!ft)
     )
   }
 
-  res <- dplyr::select(res, -.data$pct)
+  res <- dplyr::select(res, -pct)
 
   res
 }
diff --git a/tests/testthat/test-import-vdj.R b/tests/testthat/test-import-vdj.R
index 2f836f63..562120c9 100644
--- a/tests/testthat/test-import-vdj.R
+++ b/tests/testthat/test-import-vdj.R
@@ -1,5 +1,6 @@
 data(vdj_so)
 data(vdj_sce)
+data(tiny_so)
 
 # Test data
 ctigs <- c(

From 5433048a94822a5d556d2a44d59fcc3dc44d14d8 Mon Sep 17 00:00:00 2001
From: Jay Hesselberth <jay.hesselberth@gmail.com>
Date: Tue, 1 Nov 2022 06:22:46 -0600
Subject: [PATCH 9/9] fix import-vdj test

---
 tests/testthat/test-import-vdj.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/test-import-vdj.R b/tests/testthat/test-import-vdj.R
index 562120c9..99e52e9d 100644
--- a/tests/testthat/test-import-vdj.R
+++ b/tests/testthat/test-import-vdj.R
@@ -1,6 +1,7 @@
 data(vdj_so)
 data(vdj_sce)
 data(tiny_so)
+data(tiny_sce)
 
 # Test data
 ctigs <- c(