diff --git a/workflow/rules/statistic/correlation.smk b/workflow/rules/statistic/correlation.smk index fed85b3..1973df2 100644 --- a/workflow/rules/statistic/correlation.smk +++ b/workflow/rules/statistic/correlation.smk @@ -311,6 +311,9 @@ rule statistic_correlation_calculate: if "label_file" in config["experiments"][wc.project] else "" ), + plot_width=29, + plot_height=17, + legend_nrow=6, log: temp( "results/logs/statistic/correlation/calculate.{project}.{condition}.{config}.{assignment}.log" @@ -323,6 +326,9 @@ rule statistic_correlation_calculate: --files {params.files} \ --replicates {params.replicates} \ --threshold {params.thresh} \ + --plot_width {params.plot_width} \ + --plot_height {params.plot_height} \ + --legend_nrow {params.legend_nrow} \ --outdir {params.outdir} &> {log} """ diff --git a/workflow/scripts/count/plot_perBCCounts_correlation.R b/workflow/scripts/count/plot_perBCCounts_correlation.R index 8f76b69..2a2d1d0 100644 --- a/workflow/scripts/count/plot_perBCCounts_correlation.R +++ b/workflow/scripts/count/plot_perBCCounts_correlation.R @@ -85,21 +85,21 @@ plot_correlations_dna <- function(data, plot_data, condition, r1, r2, name) { geom_point() + xlab(sprintf(paste("log2 Normalized DNA count per barcode,\n replicate", r1))) + ylab(sprintf(paste("log2 Normalized DNA count per barcode,\n replicate", r2))) + - geom_text(x = min + 0.5, y = max - 0.5, label = sprintf(" r = %.2f", cor(data$DNA_normalized_log2.x, data$DNA_normalized_log2.y, method = "pearson")), size = 10) + - geom_text(x = min + 0.5, y = max - 1.0, label = sprintf("rho = %.2f", cor(data$DNA_normalized.x, data$DNA_normalized.y, method = "spearman")), size = 10) + + geom_text(x = -Inf, y = Inf, hjust=0, vjust=1, label = sprintf(" r = %.2f", cor(data$DNA_normalized_log2.x, data$DNA_normalized_log2.y, method = "pearson")), size = 10) + + geom_text(x = -Inf, y = Inf, hjust=0, vjust=2.1, label = sprintf("rho = %.2f", cor(data$DNA_normalized.x, data$DNA_normalized.y, method = "spearman")), size = 10) + geom_abline(intercept = 0, slope = 1) + theme_classic(base_size = 30) return(dna_p) } plot_correlations_rna <- function(data, plot_data, condition, r1, r2, name) { - max <- max(data$`RNA_normalized.y_log2`) - min <- min(data$`RNA_normalized.x_log2`) + max <- max(data$`RNA_normalized_log2.y`) + min <- min(data$`RNA_normalized_log2.x`) rna_p <- ggplot(plot_data, aes(RNA_normalized_log2.x, RNA_normalized_log2.y)) + geom_point() + xlab(sprintf(paste("log2 Normalized RNA count per barcode,\n replicate", r1))) + ylab(sprintf(paste("log2 Normalized RNA count per barcode,\n replicate", r2))) + - geom_text(x = min + 0.5, y = max - 0.5, label = sprintf(" r = %.2f", cor(data$RNA_normalized_log2.x, data$RNA_normalized_log2.y, method = "pearson")), size = 10) + - geom_text(x = min + 0.5, y = max - 1.0, label = sprintf("rho = %.2f", cor(data$RNA_normalized.x, data$RNA_normalized.y, method = "spearman")), size = 10) + + geom_text(x = -Inf, y = Inf, hjust=0, vjust=1, label = sprintf(" r = %.2f", cor(data$RNA_normalized_log2.x, data$RNA_normalized_log2.y, method = "pearson")), size = 10) + + geom_text(x = -Inf, y = Inf, hjust=0, vjust=2.1, label = sprintf("rho = %.2f", cor(data$RNA_normalized.x, data$RNA_normalized.y, method = "spearman")), size = 10) + geom_abline(intercept = 0, slope = 1) + theme_classic(base_size = 30) return(rna_p) @@ -111,8 +111,8 @@ plot_correlations_ratio <- function(data, plot_data, condition, r1, r2, name) { geom_point() + xlab(sprintf(paste("log2 RNA/DNA per barcode,\n replicate", r1))) + ylab(sprintf(paste("log2 RNA/DNA per barcode,\n replicate", r2))) + - geom_text(x = min + 0.5, y = max - 0.5, label = sprintf(" r = %.2f", cor(data$Ratio_log2.x, res$Ratio_log2.y, method = "pearson")), size = 10) + - geom_text(x = min + 0.5, y = max - 1.0, label = sprintf("rho = %.2f", cor(data$Ratio.x, data$Ratio.y, method = "spearman")), size = 10) + + geom_text(x = -Inf, y = Inf, hjust=0, vjust=1, label = sprintf(" r = %.2f", cor(data$Ratio_log2.x, res$Ratio_log2.y, method = "pearson")), size = 10) + + geom_text(x = -Inf, y = Inf, hjust=0, vjust=2.1, label = sprintf("rho = %.2f", cor(data$Ratio.x, data$Ratio.y, method = "spearman")), size = 10) + geom_abline(intercept = 0, slope = 1) + theme_classic(base_size = 30) return(ratio_p) @@ -225,4 +225,5 @@ if (data %>% nrow() > 1) { writeCorrelationPlots(plots_correlations_rna, sprintf("%s_barcode_RNA_pairwise.png", outdir)) writeCorrelationPlots(plots_correlations_ratio, sprintf("%s_barcode_Ratio_pairwise.png", outdir)) -} \ No newline at end of file +} + diff --git a/workflow/scripts/count/plot_perInsertCounts_correlation.R b/workflow/scripts/count/plot_perInsertCounts_correlation.R index 7fd86b2..5693f98 100644 --- a/workflow/scripts/count/plot_perInsertCounts_correlation.R +++ b/workflow/scripts/count/plot_perInsertCounts_correlation.R @@ -31,6 +31,24 @@ option_list <- list( type = "integer", default = 10, help = "Number of required barcodes (default 10)" + ), + make_option( + c("-pw", "--plot_width"), + type = "integer", + default = 29, + help = "Width of the plots created by this script (default 29)" + ), + make_option( + c("-ph", "--plot_height"), + type = "integer", + default = 17, + help = "Height of the plots created by this script (default 17)" + ), + make_option( + c("-n", "--legend_nrow"), + type = "integer", + default = 6, + help = "Number of rows in the legend of the plots created by this script (default 6)" ), make_option(c("-o", "--outdir"), type = "character", @@ -72,7 +90,6 @@ if ("label" %in% names(opt)) { use_labels <- FALSE } - # replicates and count files files <- strsplit(opt$files, ",")[[1]] replicates <- strsplit(opt$replicates, ",")[[1]] @@ -85,13 +102,12 @@ data["Condition"] <- cond print(data) -# pairwise comparison only if more than one replicate thresh <- opt$threshold plot_correlations_dna <- function(data, condition, r1, r2, name) { dna_p <- ggplot(data, aes(dna_normalized_log2.x, dna_normalized_log2.y)) + - geom_point(aes(colour = label.x), show.legend = TRUE) + + geom_point(aes(colour = label.x)) + xlim(-5, 5) + ylim(-5, 5) + xlab(sprintf( @@ -126,13 +142,16 @@ plot_correlations_dna <- function(data, condition, r1, r2, name) { size = 10 ) + geom_abline(intercept = 0, slope = 1) + - theme_classic(base_size = 30) + theme_classic(base_size = 30) + + theme(legend.position="bottom") + # show legend below the plot + guides(fill=guide_legend(nrow=legend_nrow, byrow=TRUE)) + # show labels in rows + labs(color = "label\n") # legend name return(dna_p) } plot_correlations_rna <- function(data, condition, r1, r2, name) { rna_p <- ggplot(data, aes(rna_normalized_log2.x, rna_normalized_log2.y)) + - geom_point(aes(colour = label.x), show.legend = TRUE) + + geom_point(aes(colour = label.x)) + xlim(-5, 5) + ylim(-5, 5) + xlab(sprintf( @@ -165,12 +184,15 @@ plot_correlations_rna <- function(data, condition, r1, r2, name) { size = 10 ) + geom_abline(intercept = 0, slope = 1) + - theme_classic(base_size = 30) + theme_classic(base_size = 30) + + theme(legend.position="bottom") + # show legend below the plot + guides(fill=guide_legend(nrow=legend_nrow, byrow=TRUE)) + # show labels in rows + labs(color = "label\n") # legend name return(rna_p) } plot_correlations_ratio <- function(data, condition, r1, r2, name) { ratio_p <- ggplot(data, aes(ratio_log2.x, ratio_log2.y)) + - geom_point(aes(colour = label.x), show.legend = TRUE) + + geom_point(aes(colour = label.x)) + xlim(-5, 5) + ylim(-5, 5) + xlab(sprintf(paste( @@ -198,7 +220,10 @@ plot_correlations_ratio <- function(data, condition, r1, r2, name) { size = 10 ) + geom_abline(intercept = 0, slope = 1) + - theme_classic(base_size = 30) + theme_classic(base_size = 30) + + theme(legend.position="bottom") + # show legend below the plot + guides(fill=guide_legend(nrow=legend_nrow, byrow=TRUE)) + # show labels in rows + labs(color = "label\n") # legend name return(ratio_p) } @@ -268,8 +293,8 @@ write_correlation_plots <- function(plots, name) { ggplot2::ggsave(name, correlation_plots, - width = 15, - height = 10 * length(plots)) + width = plot_width, + height = plot_height * length(plots)) } write_correlation <- function(correlations, name) { @@ -324,7 +349,7 @@ if (use_labels) { all$label <- "NA" } } - +# pairwise comparison only if more than one replicate if (data %>% nrow() > 1 && nrow(all) > 1) { print("Pairwise comparisons") # make pairwise combinations