Skip to content

Commit

Permalink
New plotting functions for QC steps in data processing
Browse files Browse the repository at this point in the history
  • Loading branch information
rfriedman22 committed Feb 28, 2024
1 parent 735bc90 commit 6cff262
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: false
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
LinkingTo:
Rcpp
Depends:
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,16 @@ export(pData)
export(partitions)
export(plot_cells)
export(plot_cells_3d)
export(plot_cells_per_sample_and_perturbation)
export(plot_genes_by_group)
export(plot_genes_hybrid)
export(plot_genes_in_pseudotime)
export(plot_genes_violin)
export(plot_mito_umi_per_cell)
export(plot_pc_variance_explained)
export(plot_percent_cells_positive)
export(plot_umi_per_cell)
export(plot_umi_per_cell_and_perturbation)
export(preprocess_cds)
export(preprocess_transform)
export(principal_graph)
Expand Down
241 changes: 240 additions & 1 deletion R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,7 @@ plot_genes_hybrid <- function (cds_subset,
point_interval = 'median_qi',
point_alpha=1.0,
outline_bars=TRUE,
slab_color='black',
slab_color='black',
slab_linewidth=0.1,
slab_fill='green',
slab_alpha=0.3)
Expand Down Expand Up @@ -1921,3 +1921,242 @@ plot_genes_by_group <- function(cds,
g
}

#' Plot a histogram of the number of UMIs per cell with a vertical line showing the cutoff for the minimum number of UMIs.
#' @param cds A cell_data_set for plotting.
#' @param min_rna_umi Cutoff for the minimum number of UMIs per cell.
#' @returns A ggplot2 object.
#' @import ggplot2
#' @export
plot_umi_per_cell <- function(cds,
min_rna_umi = 100) {
assertthat::assert_that(methods::is(cds, "cell_data_set"))
tryCatch(
assertthat::assert_that("log.n.umi" %in% colnames(colData(cds))),
error = function(e) {
colData(cds)$log.n.umi <- log10(colData(cds)$n.umi)
}
)

g <- colData(cds) %>%
as.data.frame() %>%
dplyr::select(cell, log.n.umi) %>%
dplyr::distinct() %>%
ggplot() +
geom_histogram(aes(x = log.n.umi),
fill = "grey80",
color = "black",
binwidth = 0.2,
linewidth = 0.1,
bins = 50) +
scale_x_continuous(name = "RNA UMIs",
breaks = c(0, 1, 2, 3, 4),
labels = as.character(c(0, 10, 100, 1000, 10000))) +
ylab("Number of Cells") +
geom_vline(xintercept = log10(min_rna_umi),
color = "red",
linewidth = 0.5) +
monocle_theme_opts()

return(g)
}

#' Plot a histogram of the percentage of UMIs mapping to mitochondria for each cell.
#' @param cds A cell_data_set for plotting.
#' @param max_mito_pct Cutoff for the maximum percentage of UMIs mapping to mitochondria.
#' @param pseudocount Value to add to the percentages before taking the log. Note that this is in terms of percent, not fractional.
#' @returns A ggplot2 object.
#' @import ggplot2
#' @export
plot_mito_umi_per_cell <- function(cds,
max_mito_pct = .max_mito_pct,
pseudocount = 1e-3) {
assertthat::assert_that(methods::is(cds, "cell_data_set"))
assertthat::assert_that(all(c("cell", "perc_mitochondrial_umis") %in% colnames(colData(cds))))

g <- colData(cds) %>%
as.data.frame() %>%
dplyr::select(cell, perc_mitochondrial_umis) %>%
dplyr::distinct() %>%
ggplot() +
geom_histogram(aes(x = log10(perc_mitochondrial_umis + pseudocount)),
fill = "grey80",
color = "black",
binwidth = 0.2,
linewidth = 0.1,
bins = 50) +
scale_x_continuous(name = "Percent Mitochondrial UMIs",
breaks = c(log10(pseudocount), -2, -1, 0, 1, 2),
labels = as.character(c(0, 0.001, 0.1, 1, 10, 100))) +
ylab("Number of Cells") +
geom_vline(xintercept = log10(max_mito_pct),
color = "red",
linewidth = 0.5) +
monocle_theme_opts()

return(g)
}

#' Make a boxplot of the number of UMIs per cell, with each perturbation as a separate box. Optionally facet by a second value.
#' @param cds A cell_data_set for plotting.
#' @param perturbation_col How to group the cells for the plot. Must be a column of the colData.
#' @param facet_by Facet the plot by this column of the colData. Each unique value is its own facet.
#' @param color_palette List of colors to color perturbation groups. Default is NULL. When NULL, use a default set.
#' @param yticks List of numeric values to put on the y-axis ticks.
#' @returns A ggplot2 object.
#' @import ggplot2
#' @export
plot_umi_per_cell_and_perturbation <- function(cds,
perturbation_col = "perturbation",
facet_by = NULL,
color_palette = NULL,
yticks = NULL) {
assertthat::assert_that(methods::is(cds, "cell_data_set"))
assertthat::assert_that(perturbation_col %in% colnames(colData(cds)),
msg = "perturbation_col is not in the colData of the CDS.")
assertthat::assert_that(is.null(facet_by) || (facet_by %in% colnames(colData(cds))),
msg = "facet_by is not in the colData of the CDS.")
assertthat::assert_that(is.null(yticks) || is.numeric(yticks),
msg = "yticks are not numeric.")

coldata_df <- colData(cds) %>%
as.data.frame() %>%
dplyr::filter(!is.na(perturbation_col))

if (is.null(color_palette)) {
N <- length(unique(coldata_df[[perturbation_col]]))
color_palette <- get_n_colors(N)
}

if (is.null(yticks)) {
yticks <- c(100, 250, 500, 1000, 2500, 5000, 10000)
}

g <- coldata_df %>%
ggplot() +
geom_boxplot(aes(x = reorder(!!sym(perturbation_col), log.n.umi),
y = log.n.umi,
fill = !!sym(perturbation_col)),
color = "black",
outlier.stroke = 0.1,
outlier.size = 0.1,
size = 0.2) +
scale_fill_manual(values = color_palette, name = stringr::str_to_title(perturbation_col)) +
scale_y_continuous(breaks = log10(yticks),
labels = as.character(yticks))

if (!is.null(facet_by)) {
g <- g +
facet_grid(cols = vars(!!sym(facet_by))) +
ggtitle(stringr::str_to_title(facet_by))
}

g <- g +
ylab("UMIs per Cell") +
monocle_theme_opts() +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.title = element_text(hjust = 0.5))

return(g)
}

#' Make a boxplot of the number of cells per sample, with each perturbation as a separate box. Optionally facet by a second value.
#' @param cds A cell_data_set for plotting.
#' @param perturbation_col How to group the cells for the plot. Must be a column of the colData.
#' @param count_per_sample_col Which column of the colData to put in the boxplots.
#' @param min_cells_per_sample The minimum number of cells per sample. Drawn on as a horizontal line.
#' @param facet_by Facet the plot by this column of the colData. Each unique value is its own facet.
#' @param color_palette List of colors to color perturbation groups. Default is NULL. When NULL, use a default set.
#' @param yticks List of numeric values to put on the y-axis ticks.
#' @returns A ggplot2 object.
#' @import ggplot2
#' @export
plot_cells_per_sample_and_perturbation <- function(cds,
perturbation_col = "perturbation",
count_per_sample_col = "count_per_embryo",
min_cells_per_sample = 1000,
facet_by = NULL,
color_palette = NULL,
yticks = NULL) {
assertthat::assert_that(methods::is(cds, "cell_data_set"))
assertthat::assert_that(all(c(perturbation_col, count_per_sample_col) %in% colnames(colData(cds))))
assertthat::assert_that(is.null(facet_by) || (facet_by %in% colnames(colData(cds))),
msg = "facet_by is not in the colData of the CDS.")
assertthat::assert_that(is.null(yticks) || is.numeric(yticks),
msg = "yticks are not numeric.")

counts_per_sample_df <- colData(cds) %>%
as.data.frame() %>%
dplyr::select(count_per_sample_col, perturbation_col, facet_by) %>%
dplyr::filter(!is.na(perturbation_col))

if (is.null(color_palette)) {
N <- length(unique(counts_per_sample_df[[perturbation_col]]))
color_palette <- get_n_colors(N)
}

if (is.null(yticks)) {
yticks <- c(100, 500, 1000, 2500, 5000, 10000, 20000, 40000)
}

g <- counts_per_sample_df %>%
ggplot() +
geom_boxplot(aes(x = reorder(!!sym(perturbation_col), !!sym(count_per_sample_col)),
y = log10(!!sym(count_per_sample_col)),
fill = !!sym(perturbation_col)),
color = "black",
outlier.stroke = 0.1,
outlier.size = 0.1,
size = 0.2) +
scale_fill_manual(values = color_palette, name = stringr::str_to_title(perturbation_col)) +
scale_y_continuous(breaks = log10(yticks),
labels = as.character(yticks)) +
geom_hline(yintercept = log10(min_cells_per_sample),
color = "red",
linewidth = 0.5)

if (!is.null(facet_by)) {
g <- g +
facet_grid(cols = vars(!!sym(facet_by))) +
ggtitle(stringr::str_to_title(facet_by))
}
g <- g +
ylab("Cells per Sample") +
monocle_theme_opts() +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.title = element_text(hjust = 0.5))

return(g)
}

#' Get a list of color hex codes from a custom palette.
#' @noRd
get_n_colors <- function(n) {
vibrant_colors <- c('#EE7733',
'#0077BB',
'#228833',
'#33BBEE',
'#EE3377',
'#CC3311',
'#AA3377',
'#009988',
'#004488',
'#DDAA33',
'#99CC66',
'#D590DD')

bright_colors <- c('#4477AA',
'#EE6677',
'#228833',
'#CCBB44',
'#66CCEE',
'#AA3377',
'#BBBBBB')

palette <- colorRampPalette(c(vibrant_colors, bright_colors))
return(palette(n))
}

37 changes: 37 additions & 0 deletions man/plot_cells_per_sample_and_perturbation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/plot_mito_umi_per_cell.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/plot_umi_per_cell.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/plot_umi_per_cell_and_perturbation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6cff262

Please sign in to comment.