rnabioco · jayhesselberth · Oct 30, 2022 · Oct 31, 2022 · Oct 31, 2022 · Oct 31, 2022
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -10,3 +10,6 @@
 ^codecov\.yml$
 ^vignettes/$
 ^vignettes/avid\.Rmd$
+splenocytes.zip
+splenocytes
+^inst/bench$
diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml
@@ -22,7 +22,7 @@ jobs:
           - {os: windows-latest, r: 'release'}
           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-latest,   r: 'release'}
-          - {os: ubuntu-latest,   r: 'oldrel-1'}
+          # - {os: ubuntu-latest,   r: 'oldrel-1'}
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,21 +1,22 @@
 Package: djvdj
 Title: A collection of single-cell V(D)J tools
-Version: 0.0.0.9000
+Version: 0.99.0
 Authors@R: c(
     person("Ryan", "Sheridan", , "ryan.sheridan@cuanschutz.edu", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-4012-3147")),
-    person("Jay", "Hesselberth", , "jay.hesselberth@gmail.com", role = "ctb"),
+    person("Jay", "Hesselberth", , "jay.hesselberth@cuanschutz.com", role = "ctb"),
     person("Rui", "Fu", , "raysinensis@gmail.com", role = "ctb"),
-    person("Kent", "Riemondy", , "kent.riemondy@ucdenver.edu", role = "ctb"),
-    person("RNA Bioscience Initiative", role = "fnd")
+    person("Kent", "Riemondy", , "kent.riemondy@cuanschutz.edu", role = "ctb"),
+    person("RNA Bioscience Initiative", role = c("fnd", "cph"))
   )
-Description: djvdj provides a range of tools to analyze single-cell V(D)J
-    data.
+Description: djvdj provides import, manipulation, analysis, and plotting tools
+  for analyzing single-cell V(D)J gene expression data, integrating with both
+  Seurat and SigleCellExperiment-based workflows.
 License: MIT + file LICENSE
 URL: https://rnabioco.github.io/djvdj/, https://github.com/rnabioco/djvdj/
 BugReports: https://github.com/rnabioco/djvdj/issues
 Depends: 
-    R (>= 4.0.0)
+    R (>= 4.2.0)
 Imports: 
     abdiv,
     Biostrings,
@@ -29,6 +30,8 @@ Imports:
     ggrepel,
     ggseqlogo,
     glue,
+    graphics,
+    grid,
     igraph (>= 1.3.0),
     iNEXT,
     MASS,
@@ -44,6 +47,7 @@ Imports:
     stringr,
     tibble,
     tidyr,
+    utils,
     uwot
 Suggests: 
     covr,
@@ -54,13 +58,14 @@ Suggests:
     RColorBrewer,
     rmarkdown,
     roxygen2,
-    testthat
+    testthat (>= 3.0.0)
 VignetteBuilder: 
     knitr
-Remotes:
-    tidyverse/magrittr
-Config/Needs/website:pkgdown, rnabioco/rbitemplate
+Config/Needs/website:
+  pkgdown,
+  rnabioco/rbitemplate
+biocViews: DataImport, GeneExpression, SingleCell, RNASeq, ImmunoOncology
 Encoding: UTF-8
-LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.1
+Config/testthat/edition: 3
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,3 @@
-# djvdj 0.0.0.9000
+# djvdj 0.99.0
 
-* Added a `NEWS.md` file to track changes to the package.
+* Initial release.
diff --git a/R/calc-abundance.R b/R/calc-abundance.R
@@ -130,7 +130,7 @@ calc_frequency <- function(input, data_col, cluster_col = NULL, prefix = paste0
   res <- dplyr::mutate(
     res,
     .freq = dplyr::n_distinct(!!sym(cell_col)),
-    .pct  = (.data$.freq / .data$.n_cells) * 100
+    .pct  = (.freq / .n_cells) * 100
   )
 
   # Identify shared labels
@@ -188,7 +188,7 @@ calc_frequency <- function(input, data_col, cluster_col = NULL, prefix = paste0
   )
 
   # Format labels
-  labs <- dplyr::group_by(labs, .data$grp)
+  labs <- dplyr::group_by(labs, grp)
   labs <- dplyr::mutate(
     labs,
     lab = paste0(unique(range(x)), collapse = "-")
@@ -442,7 +442,7 @@ plot_clonal_abundance <- function(input, cluster_col = NULL,
   if (n_clones > 0) {
     res <- res +
       ggrepel::geom_text_repel(
-        ggplot2::aes(label = .data$.lab),
+        ggplot2::aes(label = .lab),
         data          = top_clones,
         nudge_x       = 500,
         direction     = "y",
@@ -524,7 +524,7 @@ plot_frequency <- function(input, data_col, cluster_col = NULL,
   plt_dat <- dplyr::group_by(plt_dat, !!sym(data_col))
 
   rnk <- dplyr::summarize(plt_dat, mn = mean(!!sym(abun_col)))
-  rnk <- dplyr::arrange(rnk, desc(.data$mn))
+  rnk <- dplyr::arrange(rnk, desc(mn))
   rnk <- pull(rnk, data_col)
 
   plt_dat <- dplyr::ungroup(plt_dat)

diff --git a/R/calc-diversity.R b/R/calc-diversity.R
@@ -34,7 +34,7 @@
 #'   method   = abdiv::simpson
 #' )
 #'
-#' head(res@meta.data, 1)
+#' head(slot(res, 'meta.data'), 1)
 #'
 #' # Group cells based on meta.data column before calculating diversity
 #' res <- calc_diversity(
@@ -43,7 +43,7 @@
 #'   cluster_col = "orig.ident"
 #' )
 #'
-#' head(res@colData, 1)
+#' head(slot(res, 'colData'), 1)
 #'
 #' # Add a prefix to the new columns
 #' # this is useful if multiple diversity calculations are stored in the
@@ -54,7 +54,7 @@
 #'   prefix   = "bcr_"
 #' )
 #'
-#' head(res@meta.data, 1)
+#' head(slot(res, 'meta.data'), 1)
 #'
 #' # Calculate multiple metrics
 #' res <- calc_diversity(
@@ -66,7 +66,7 @@
 #'   )
 #' )
 #'
-#' head(res@colData, 1)
+#' head(slot(res, 'colData'), 1)
 #'
 #' # Return a data.frame instead of adding the results to the input object
 #' res <- calc_diversity(
@@ -186,18 +186,18 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
       sam,
       met       = .y,
       diversity = list(.calc_div(!!sym(data_col), met = .x)),
-      stderr    = purrr::map_dbl(.data$diversity, pull, "std.error"),
-      diversity = purrr::map_dbl(.data$diversity, pull, "statistic")
+      stderr    = purrr::map_dbl(diversity, pull, "std.error"),
+      diversity = purrr::map_dbl(diversity, pull, "statistic")
     )
   })
 
   div_cols <- "diversity"
 
   if (n_boots > 1) div_cols <- c(div_cols, "stderr")
-  else             div <- dplyr::select(div, -.data$stderr)
+  else             div <- dplyr::select(div, -stderr)
 
   div <- tidyr::pivot_longer(div, all_of(div_cols))
-  div <- tidyr::unite(div, "name", .data$met, .data$name)
+  div <- tidyr::unite(div, "name", met, name)
   div <- tidyr::pivot_wider(div)
 
   # Format results
@@ -372,12 +372,12 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
 
   re <- "^(.+)_(diversity|stderr)$"
 
-  plt_dat <- tidyr::extract(plt_dat, .data$name, into = c("met", "type"), re)
+  plt_dat <- tidyr::extract(plt_dat, name, into = c("met", "type"), re)
 
   plt_dat <- tidyr::pivot_wider(
     plt_dat,
-    names_from  = .data$type,
-    values_from = .data$value
+    names_from  = type,
+    values_from = value
   )
 
   # Set plot levels
@@ -439,8 +439,8 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
       ggplot2::geom_linerange(
         aes(
           !!sym(cluster_col),
-          ymin = .data$diversity - .data$stderr,
-          ymax = .data$diversity + .data$stderr
+          ymin = diversity - stderr,
+          ymax = diversity + stderr
         )
       )
   }
@@ -560,13 +560,13 @@ plot_rarefaction <- function(input, data_col, cluster_col = NULL,
 
   plt_dat <- dplyr::mutate(
     plt_dat,
-    method = dplyr::recode(.data$Method, "Observed" = "Rarefaction"),
+    method = dplyr::recode(Method, "Observed" = "Rarefaction"),
     method = stringr::str_to_lower(method),
-    Order.q = met_labs[as.character(.data$Order.q)]
+    Order.q = met_labs[as.character(Order.q)]
   )
 
   if (!is.null(cluster_col)) {
-    plt_dat <- dplyr::rename(plt_dat, !!sym(cluster_col) := .data$Assemblage)
+    plt_dat <- dplyr::rename(plt_dat, !!sym(cluster_col) := Assemblage)
   }
 
   plt_dat <- .set_lvls(plt_dat, cluster_col, plot_lvls)
@@ -576,13 +576,13 @@ plot_rarefaction <- function(input, data_col, cluster_col = NULL,
   # Plot standard error
   res <- ggplot2::ggplot(
     plt_dat,
-    ggplot2::aes(.data$m, .data$qD, linetype = method)
+    ggplot2::aes(m, qD, linetype = method)
   ) +
     ggplot2::guides(linetype = ggplot2::guide_legend(title = NULL))
 
   if (n_boots > 1) {
     gg_aes <- ggplot2::aes(
-      x = .data$m, ymin = .data$qD.LCL, ymax = .data$qD.UCL
+      x = m, ymin = qD.LCL, ymax = qD.UCL
     )
 
     if (!is.null(cluster_col))   gg_aes$fill <- sym(cluster_col)

diff --git a/R/calc-gene-usage.R b/R/calc-gene-usage.R
@@ -127,7 +127,7 @@ calc_gene_usage <- function(input, data_cols, cluster_col = NULL, chain = NULL,
     res <- tidyr::pivot_wider(
       res,
       names_from  = all_of(clst_nm),
-      values_from = .data$freq,
+      values_from = freq,
       values_fill = 0
     )
 
@@ -154,8 +154,8 @@ calc_gene_usage <- function(input, data_cols, cluster_col = NULL, chain = NULL,
     )
   }
 
-  res <- dplyr::mutate(res, pct = (.data$freq / .data$n_cells) * 100)
-  res <- dplyr::arrange(res, desc(.data$pct))
+  res <- dplyr::mutate(res, pct = (freq / n_cells) * 100)
+  res <- dplyr::arrange(res, desc(pct))
 
   res
 }
@@ -617,12 +617,12 @@ plot_gene_usage <- function(input, data_cols, cluster_col = NULL,
   res <- dplyr::mutate(res, rnk = row_number(desc(!!sym(dat_col))))
 
   if (is.null(clst_col)) {
-    res <- dplyr::filter(res, !!gn_2 %in% top_2 | .data$rnk == 1)
+    res <- dplyr::filter(res, !!gn_2 %in% top_2 | rnk == 1)
 
   } else {
     res <- dplyr::filter(
       res,
-      !!gn_2 %in% top_2[[dplyr::cur_group()[[clst_col]]]] | .data$rnk == 1
+      !!gn_2 %in% top_2[[dplyr::cur_group()[[clst_col]]]] | rnk == 1
     )
   }
 

diff --git a/R/calc-similarity.R b/R/calc-similarity.R
@@ -128,7 +128,7 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar
   vdj <- tidyr::pivot_wider(
     vdj,
     names_from  = all_of(cluster_col),
-    values_from = .data$n,
+    values_from = n,
     values_fill = 0
   )
 
@@ -158,20 +158,20 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar
   })
 
   # Combine with inverse combinations
-  res_i <- dplyr::rename(res, Var1 = .data$Var2, Var2 = .data$Var1)
+  res_i <- dplyr::rename(res, Var1 = Var2, Var2 = Var1)
   res   <- dplyr::bind_rows(res, res_i, res_s)
 
   # Format data.frame
   clmns <- sort(unique(res$Var2))
-  res   <- dplyr::arrange(res, .data$Var2)
+  res   <- dplyr::arrange(res, Var2)
 
   res <- tidyr::pivot_wider(
     res,
-    names_from  = .data$Var1,
-    values_from = .data$sim
+    names_from  = Var1,
+    values_from = sim
   )
 
-  res <- dplyr::select(res, !!sym(cluster_col) := .data$Var2, all_of(clmns))
+  res <- dplyr::select(res, !!sym(cluster_col) := Var2, all_of(clmns))
 
   # Return matrix
   if (return_mat) {
@@ -241,6 +241,7 @@ calc_similarity <- function(input, data_col, cluster_col, method = abdiv::jaccar
 #' circos plot
 #' @importFrom abdiv jaccard
 #' @seealso [calc_similarity()], [calc_mds()], [plot_mds()]
+#' @return ggplot object
 #'
 #' @examples
 #' # Plot repertoire overlap
@@ -492,6 +493,7 @@ calc_mds <- function(input, data_col, cluster_col, method = abdiv::jaccard,
 #' @param sep Separator used for storing per-chain V(D)J data for each cell
 #' @param ... Additional arguments to pass to [ggplot2::geom_point()]
 #' @seealso [calc_mds()], [calc_similarity()], [plot_similarity()], [MASS::isoMDS()]
+#' @return ggplot object
 #' @export
 plot_mds <- function(input, data_col, cluster_col,
                      method = abdiv::jaccard, chain = NULL,

diff --git a/R/cluster-seqs.R b/R/cluster-seqs.R
@@ -97,16 +97,16 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL,
 
   make_adj_df <- function(mat) {
     res <- tibble::as_tibble(mat, rownames = "Var1")
-    res <- tidyr::pivot_longer(res, -.data$Var1, values_to = "Var2")
+    res <- tidyr::pivot_longer(res, -Var1, values_to = "Var2")
     res
   }
 
   adj_df <- make_adj_df(knn_res$id)
 
   adj_df <- dplyr::mutate(
     adj_df,
-    Var1 = seqs[as.integer(.data$Var1)],
-    Var2 = seqs[.data$Var2]
+    Var1 = seqs[as.integer(Var1)],
+    Var2 = seqs[Var2]
   )
 
   # Create adjacency graph
@@ -188,6 +188,7 @@ cluster_sequences <- function(input, data_col = "cdr3", chain = NULL,
 #' @param ... Additional parameters to pass to [ggseqlogo::geom_logo()]
 #' @importFrom stringr str_trunc
 #' @seealso [cluster_sequences()]
+#' @return ggplot object
 #' @export
 plot_motifs <- function(input, data_col = "cdr3", cluster_col = NULL,
                         chain, plot_colors = NULL,

diff --git a/R/filter-vdj.R b/R/filter-vdj.R
@@ -112,7 +112,7 @@ filter_vdj <- function(input, filt, data_cols = NULL,
   }
 
   keep_rows <- vdj$.KEEP
-  vdj       <- dplyr::select(vdj, -.data$.KEEP)
+  vdj       <- dplyr::select(vdj, -.KEEP)
 
   # If vectors in keep_rows are all length 1, filter cells
   if (length_one) {