From c784771fa6d0cc3b7dd44ccc1bc2afb190b864d7 Mon Sep 17 00:00:00 2001 From: Ines Scheller Date: Thu, 11 May 2023 13:55:56 +0200 Subject: [PATCH 01/29] fix result script in case of no significant results --- .../FRASER/08_extract_results_FraseR.R | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R b/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R index 739c8839..65539a04 100644 --- a/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R +++ b/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R @@ -75,7 +75,7 @@ if(nrow(res_junc_dt) > 0){ res_junc_dt <- merge(res_junc_dt, as.data.table(colData(fds)), by = "sampleID") res_junc_dt[, c("bamFile", "pairedEnd", "STRAND", "RNA_BAM_FILE", "DNA_VCF_FILE", "COUNT_MODE", "COUNT_OVERLAPS") := NULL] } else{ - warning("The aberrant splicing pipeline gave 0 results for the ", dataset, " dataset.") + warning("The aberrant splicing pipeline gave 0 intron-level results for the ", dataset, " dataset.") } # Extract full results by gene @@ -102,7 +102,7 @@ res_genes_dt <- res_genes_dt[do.call(pmin, c(res_genes_dt[,padj_cols, with=FALSE abs(deltaPsi) >= snakemake@params$deltaPsiCutoff & totalCounts >= 5,] -if(length(res_gene) > 0){ +if(nrow(res_genes_dt) > 0){ res_genes_dt <- merge(res_genes_dt, as.data.table(colData(fds)), by = "sampleID") res_genes_dt[, c("bamFile", "pairedEnd", "STRAND", "RNA_BAM_FILE", "DNA_VCF_FILE", "COUNT_MODE", "COUNT_OVERLAPS") := NULL] @@ -115,7 +115,6 @@ if(length(res_gene) > 0){ } } } else{ - res_genes_dt <- data.table() warning("The aberrant splicing pipeline gave 0 gene-level results for the ", dataset, " dataset.") } @@ -125,8 +124,12 @@ library(AnnotationDbi) txdb <- loadDb(snakemake@input$txdb) # annotate the type of splice event and UTR overlap -res_junc_dt <- annotatePotentialImpact(result=res_junc_dt, txdb=txdb, fds=fds) -res_genes_dt <- annotatePotentialImpact(result=res_genes_dt, txdb=txdb, fds=fds) +if(nrow(res_junc_dt) > 0){ + res_junc_dt <- annotatePotentialImpact(result=res_junc_dt, txdb=txdb, fds=fds) +} +if(nrow(res_genes_dt) > 0){ + res_genes_dt <- annotatePotentialImpact(result=res_genes_dt, txdb=txdb, fds=fds) +} # set genome assembly version to load correct blacklist region BED file (hg19 or hg38) assemblyVersion <- snakemake@config$genomeAssembly @@ -139,10 +142,14 @@ if(grepl("grch38", assemblyVersion, ignore.case=TRUE)){ # annotate overlap with blacklist regions if(assemblyVersion %in% c("hg19", "hg38")){ - res_junc_dt <- flagBlacklistRegions(result=res_junc_dt, + if(nrow(res_junc_dt) > 0){ + res_junc_dt <- flagBlacklistRegions(result=res_junc_dt, assemblyVersion=assemblyVersion) - res_genes_dt <- flagBlacklistRegions(result=res_genes_dt, + } + if(nrow(res_genes_dt) > 0){ + res_genes_dt <- flagBlacklistRegions(result=res_genes_dt, assemblyVersion=assemblyVersion) + } } else{ message(date(), ": cannot annotate blacklist regions as no blacklist region\n", "BED file is available for genome assembly version ", assemblyVersion, From 0ade3c7f4f0b5f6dd92d291a294f281a15195a5e Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Wed, 17 May 2023 10:30:38 +0200 Subject: [PATCH 02/29] fix bug merge in wrong order coverage dt --- .../aberrant-expression-pipeline/Counting/Summary.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drop/modules/aberrant-expression-pipeline/Counting/Summary.R b/drop/modules/aberrant-expression-pipeline/Counting/Summary.R index ea198be4..d8f8a9fd 100644 --- a/drop/modules/aberrant-expression-pipeline/Counting/Summary.R +++ b/drop/modules/aberrant-expression-pipeline/Counting/Summary.R @@ -48,13 +48,14 @@ cnts_mtx <- counts(ods, normalized = F) #' Consider removing samples with too low or too high size factors. #' bam_coverage <- fread(snakemake@input$bam_cov) -bam_coverage[, sampleID := as.character(sampleID)] +bam_coverage[, RNA_ID := as.character(sampleID)] +bam_coverage[, sampleID := NULL] setnames(bam_coverage, 'record_count', 'total_count') -coverage_dt <- merge(bam_coverage, - data.table(sampleID = colnames(ods), +coverage_dt <- merge(data.table(RNA_ID = colnames(ods), read_count = colSums(cnts_mtx), isExternal = ods@colData$isExternal), - by = "sampleID", sort = FALSE) + bam_coverage, + by = "RNA_ID", sort = FALSE) # read counts coverage_dt[, count_rank := rank(read_count)] @@ -94,7 +95,7 @@ p_sf setnames(coverage_dt, old = c('total_count', 'read_count', 'size_factors'), new = c('Reads Mapped', 'Reads Counted', 'Size Factors')) -DT::datatable(coverage_dt[, .(sampleID, `Reads Mapped`, `Reads Counted`, `Size Factors`)][order(`Reads Mapped`)], +DT::datatable(coverage_dt[, .(RNA_ID, `Reads Mapped`, `Reads Counted`, `Size Factors`)][order(`Reads Mapped`)], caption = 'Reads summary statistics') #' # Filtering From c1824a0bcc23b468a0ab31355bed9fa5b13686a9 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Tue, 23 May 2023 13:26:24 +0200 Subject: [PATCH 03/29] Update prepare.rst --- docs/source/prepare.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/prepare.rst b/docs/source/prepare.rst index 156edcf3..0bb67663 100644 --- a/docs/source/prepare.rst +++ b/docs/source/prepare.rst @@ -314,9 +314,9 @@ Limiting FDR correction to subsets of genes of interest ------------------------------------ In addition to returning transcriptome-wide results, DROP provides the option to limit the FDR correction to user-provided genes of interest in the -``aberrantExpression`` and ``aberrantSplicing`` modules. These could e.g. be all +``aberrantExpression`` and ``aberrantSplicing`` modules. These could, for example, be all OMIM genes. It is also possible to provide sample-specific genes such as all -genes with a rare splice region variant for each sample. +genes with a rare splice-region variant for each sample. To use this feature, a YAML file containing the set(s) of genes to test (per sample or for all samples) needs to be specified in the ``genesToTest`` parameter of the ``aberrantExpression`` and ``aberrantSplicing`` modules in the config file. @@ -330,11 +330,14 @@ Creating the YAML file specifying subsets of genes to test The file containing the list of genes (HGNC symbols) to be tested must be a YAML file, where the variable names specify the name of each set of tested genes. In the output of DROP, this name will be used to identify the set in the results table. Each set -can either be a list of genes, in which case the set will be tested for all samples. Alternatively -(and additionally), sample-specific sets can be created by giving the RNA_ID of the sample +can either be: i) a list of genes, in which case the set will be tested for all samples, or ii) +sample-specific sets that can be created by giving the RNA_ID of the sample for which the set should be used as the name (see example below). This YAML file can be created in R using ``yaml::write_yaml(subsetList, filepath)``, where ``subsetList`` is a named list of named lists containing the sets of genes to test. +The gene names must match those from the provided gtf file. We currently do not support Ensembl ids as input. +The table with extracted gene names from the gtf file is located under: +``root/processed_data/preprocess/{geneAnnotation}/gene_name_mapping_{geneAnnotation}.tsv``. In the following example, the name of the global set of genes is ``Genes_to_test_on_all_samples`` and the name of the sample-specific set is ``Genes_with_rare_splice_variants``: From 87e0e2cf9e3af871c8829852fb818510aee55859 Mon Sep 17 00:00:00 2001 From: Ines Scheller Date: Wed, 24 May 2023 16:53:15 +0200 Subject: [PATCH 04/29] added saving of granges of split counts .as tsv.gz --- .../Counting/01_2_countRNA_splitReads_merge.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R index 3c3dec95..ee173f64 100644 --- a/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R +++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R @@ -54,6 +54,10 @@ splitCountRanges <- rowRanges(splitCounts) # Annotate granges from the split counts splitCountRanges <- FRASER:::annotateSpliceSite(splitCountRanges) saveRDS(splitCountRanges, snakemake@output$gRangesSplitCounts) +# additionally save as tsv.gz (for easier AbSplice input) +fwrite(as.data.table(splitCountRanges), + gsub(".Rds", ".tsv.gz", snakemake@output$gRangesSplitCounts, + ignore.case=TRUE)) # Create ranges for non split counts # Subset by minExpression From e82bfa5c9e6bb6b2cd6e9012bcdbb62a3ae64bbc Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Fri, 2 Jun 2023 19:07:29 +0200 Subject: [PATCH 05/29] Update DNA_RNA_matrix_plot.R --- drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R index 34298a11..771bfb19 100644 --- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R +++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R @@ -39,6 +39,8 @@ sa[, ANNOTATED_MATCH := TRUE] qc_mat <- readRDS(snakemake@input$mat_qc) melt_mat <- as.data.table(reshape2::melt(qc_mat)) colnames(melt_mat)[1:2] <- c('DNA_ID', 'RNA_ID') +melt_mat[, RNA_ID := as.character(RNA_ID)] +melt_mat[, DNA_ID := as.character(DNA_ID)] ggplot(melt_mat, aes(value)) + geom_histogram(fill = 'cadetblue4', binwidth = 0.05, center = .025) + theme_bw(base_size = 14) + From a837e4774fe17b9c4217c4f8d0f2ee5a439b4ce4 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 6 Jul 2023 14:07:12 -0700 Subject: [PATCH 06/29] change ln to softlink --- drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh b/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh index 3e97d2db..d61b11c7 100755 --- a/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh +++ b/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh @@ -35,8 +35,8 @@ while read header ; do echo "Internal Header $SM_internalHeader matches $sample" |tee $log echo "Internal Header is designated: $SM_internalHeader" |tee -a $log echo "SampleID is $sample" |tee -a $log - ln -f $input_bam $output_bam - ln -f $input_bai $output_bai + ln -s -f $input_bam $output_bam + ln -s -f $input_bai $output_bai echo "Done Linking files" samtools view -H $input_bam > $output_newHeader else From 61541d98408194dd9fb343c511ccb153902cd9d9 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 6 Jul 2023 14:09:22 -0700 Subject: [PATCH 07/29] add help to index --- docs/source/help.rst | 4 ++-- docs/source/index.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/help.rst b/docs/source/help.rst index 7c322d2c..5783ef33 100644 --- a/docs/source/help.rst +++ b/docs/source/help.rst @@ -8,11 +8,11 @@ Common errors The ``MAE:mae_allelicCounts`` step is susceptible to fail if: -1. the chromosomes styles of the reference genome and the BAM files do not match +1. The chromosomes styles of the reference genome and the BAM files do not match Solution: Identify the chromosomes style of the BAM file. Obtain an appropriate reference genome file and specify it in the config file. -2. the BAM file does not have the correct ``Read Groups`` documentation both the header and reads. You can often identify if the BAM file has any problems by using the command ``gatk ValidateSamFile -I path/to/bam_file.bam`` +2. The BAM file does not have the correct ``Read Groups`` documentation for both the header and reads. You can often identify if the BAM file has any problems by using the command ``gatk ValidateSamFile -I path/to/bam_file.bam`` Solution: To fix this is often dependent on the individual case, but some combination of the following tools is quite helpful: diff --git a/docs/source/index.rst b/docs/source/index.rst index 62e67717..8c7d3ca2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,7 +15,7 @@ Then, DROP can be executed in multiple ways (:doc:`pipeline`). pipeline output license - troubleshooting + help Quickstart ----------- From db611737e3c2cfd1048c07f220bf9de0eda36b67 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Sun, 1 Oct 2023 21:20:20 +0200 Subject: [PATCH 08/29] Update DNA_RNA_matrix_plot.R fix bug when multiple matching --- drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R index 771bfb19..f57d9cdc 100644 --- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R +++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R @@ -72,9 +72,9 @@ qc_dt[, PREDICTED_MATCH := value > identityCutoff] check_matches <- function(annot_col, pred_col){ if(sum(pred_col) == 0) return('no match') if(identical(annot_col,pred_col)) return('match') - if(sum(annot_col)==1 & sum(pred_col)==1) return('matches other') if(sum(annot_col)>1 & sum(pred_col)==1) return('matches less') if(sum(annot_col)==1 & sum(pred_col)>1) return('matches more') + return('matches other') } # check DNA and RNA matches (not necessarily the same) From 177c9a9598aba1021bc6dd910718a2e917a51c11 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Sun, 1 Oct 2023 21:39:59 +0200 Subject: [PATCH 09/29] Update create_matrix_dna_rna_cor.R Remove unnecessary step for the QC matrix --- drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R b/drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R index 16ad18e5..17ab6998 100644 --- a/drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R +++ b/drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R @@ -96,7 +96,7 @@ lp <- bplapply(1:N, function(i){ mat <- do.call(rbind, lp) row.names(mat) <- dna_samples colnames(mat) <- rna_samples -mat <- mat[sa[rows_in_group, DNA_ID], sa[rows_in_group, RNA_ID],drop=FALSE] +# mat <- mat[sa[rows_in_group, DNA_ID], sa[rows_in_group, RNA_ID],drop=FALSE] saveRDS(mat, snakemake@output$mat_qc) From e6520fd5047e0ff18fe3ce5e6da669727437a5a9 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Mon, 2 Oct 2023 08:02:57 +0200 Subject: [PATCH 10/29] Update DNA_RNA_matrix_plot.R fix bug in not annotated QC table --- drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R index f57d9cdc..2cdb6ca2 100644 --- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R +++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R @@ -138,5 +138,6 @@ DT::datatable(false_matches[value < identityCutoff]) #' ### Samples that were not annotated to match but actually do false_mismatches <- merge(melt_mat, sa, by = c('DNA_ID', 'RNA_ID'), sort = FALSE, all.x = TRUE) -DT::datatable(false_mismatches[is.na(ANNOTATED_MATCH) & value > identityCutoff]) +false_mismatches[is.na(ANNOTATED_MATCH), ANNOTATED_MATCH := FALSE] +DT::datatable(false_mismatches[ANNOTATED_MATCH == F & value > identityCutoff]) From e72f8ec77c9c7a8e21bc98d304fb370ce9ee9898 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Tue, 3 Oct 2023 08:14:17 +0200 Subject: [PATCH 11/29] Update DNA_RNA_matrix_plot.R fix bug in check matches function --- drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R index 2cdb6ca2..410dedd3 100644 --- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R +++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R @@ -72,9 +72,9 @@ qc_dt[, PREDICTED_MATCH := value > identityCutoff] check_matches <- function(annot_col, pred_col){ if(sum(pred_col) == 0) return('no match') if(identical(annot_col,pred_col)) return('match') - if(sum(annot_col)>1 & sum(pred_col)==1) return('matches less') - if(sum(annot_col)==1 & sum(pred_col)>1) return('matches more') - return('matches other') + if(all(rowSums(cbind(annot_col, pred_col)) < 2)) return('matches other') # the pred was never the same as the annot + if(sum(annot_col) > sum(pred_col)) return('matches less') + if(sum(annot_col) < sum(pred_col)) return('matches more') } # check DNA and RNA matches (not necessarily the same) From b87b11c9876a793ab10414a576561fcd9e28b672 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Mon, 16 Oct 2023 17:34:34 +0200 Subject: [PATCH 12/29] Update DNA_RNA_matrix_plot.R --- drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R index 410dedd3..4319f731 100644 --- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R +++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R @@ -75,6 +75,7 @@ check_matches <- function(annot_col, pred_col){ if(all(rowSums(cbind(annot_col, pred_col)) < 2)) return('matches other') # the pred was never the same as the annot if(sum(annot_col) > sum(pred_col)) return('matches less') if(sum(annot_col) < sum(pred_col)) return('matches more') + else return('matches other') } # check DNA and RNA matches (not necessarily the same) @@ -130,6 +131,8 @@ if(nrow(qc_mat) > 1 || ncol(qc_mat) > 1){ #' * Is the sample a relative of the other? #' +melt_mat[, value := round(value, 3)] + #' ### Samples that were annotated to match but do not false_matches <- merge(sa, melt_mat, by = c('DNA_ID', 'RNA_ID'), sort = FALSE, all.x = TRUE) From 95106ce3a20bc22f51cfbfeea024ed54f28ca5b7 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Tue, 12 Dec 2023 09:29:14 +0100 Subject: [PATCH 13/29] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f6ae7689..3941783d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Version](https://img.shields.io/github/v/release/gagneurlab/drop?include_prereleases)](https://github.com/gagneurlab/drop/releases) [![Version](https://readthedocs.org/projects/gagneurlab-drop/badge/?version=latest)](https://gagneurlab-drop.readthedocs.io/en/latest) -The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw sequencing files. +The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw short-read RNA-seq data. The manuscript is available in [Nature Protocols](https://www.nature.com/articles/s41596-020-00462-5). [SharedIt link.](https://rdcu.be/cdMmF) From d3dc0354aac7bdba2a2b0527dbc9c3bb92de1188 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Tue, 12 Dec 2023 09:47:45 +0100 Subject: [PATCH 14/29] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3941783d..88d29c19 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Version](https://img.shields.io/github/v/release/gagneurlab/drop?include_prereleases)](https://github.com/gagneurlab/drop/releases) [![Version](https://readthedocs.org/projects/gagneurlab-drop/badge/?version=latest)](https://gagneurlab-drop.readthedocs.io/en/latest) -The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw short-read RNA-seq data. +The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw sequencing data. The manuscript is available in [Nature Protocols](https://www.nature.com/articles/s41596-020-00462-5). [SharedIt link.](https://rdcu.be/cdMmF) From ae4e074b6e4661ab76dccbe90558dc9f44285699 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Tue, 12 Dec 2023 16:50:53 +0100 Subject: [PATCH 15/29] Update README.md --- README.md | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 88d29c19..d5f5ac2f 100644 --- a/README.md +++ b/README.md @@ -6,24 +6,11 @@ The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw sequencing data. The manuscript is available in [Nature Protocols](https://www.nature.com/articles/s41596-020-00462-5). [SharedIt link.](https://rdcu.be/cdMmF) +The html results of the Geuvadis demo dataset described in the paper can be found [here](https://cmm.in.tum.de/public/paper/drop_analysis/webDir/drop_analysis_index.html). drop logo -## What's new - -Versions 1.3.3, 1.3.2 and 1.3.1 fix some bugs. -Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See the [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml) for more details. When switching between FRASER versions, we recommend running DROP in a -separate folder for each version. Moreover, DROP now allows users to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest). - -`Snakemake v.7.8` introduced some changes in which changes in parameters can cause rules to be re-executed. More info [here](https://github.com/snakemake/snakemake/issues/1694). This affects DROP and causes certain rules in the AS and QC modules to be triggered even if they were already completed and there were no changes in the sample annotation or scripts. The workaround is to run DROP by adding the parameter `--rerun-triggers mtime`, e.g. `snakemake -n --rerun-triggers mtime` or `snakemake --cores 10 --rerun-triggers mtime`. We will investigate the rules in DROP to fix this. - -Version 1.2.3 simplifies the plots in the AE Summary Script. In addition, there's a new heatmap in the sampleQC Summary that allows to better identify DNA-RNA mismatches. - -As of version 1.2.1 DROP has a new module that performs RNA-seq variant calling. The input are BAM files and the output either a single-sample or a multi-sample VCF file (option specified by the user) annotated with allele frequencies from gnomAD (if specified by the user). The sample annotation table does not need to be changed, but several new parameters in the config file have to be added and tuned. For more info, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#rna-variant-calling-dictionary). - -Also, as of version 1.2.1 the integration of external split and non-split counts to detect aberrant splicing is now possible. Simply specify in a new column in the sample annotation the directory containing the counts. For more info, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#external-count-examples). - ## Quickstart DROP is available on [bioconda](https://anaconda.org/bioconda/drop). We recommend using a dedicated conda environment (`drop_env` in this example). Installation time: ~ 10min. @@ -55,6 +42,21 @@ Expected runtime: 25 min For more information on different installation options, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/installation.html) +## What's new + +Versions 1.3.3, 1.3.2 and 1.3.1 fix some bugs. +Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See the [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml) for more details. When switching between FRASER versions, we recommend running DROP in a +separate folder for each version. Moreover, DROP now allows users to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest). + +`Snakemake v.7.8` introduced some changes in which changes in parameters can cause rules to be re-executed. More info [here](https://github.com/snakemake/snakemake/issues/1694). This affects DROP and causes certain rules in the AS and QC modules to be triggered even if they were already completed and there were no changes in the sample annotation or scripts. The workaround is to run DROP by adding the parameter `--rerun-triggers mtime`, e.g. `snakemake -n --rerun-triggers mtime` or `snakemake --cores 10 --rerun-triggers mtime`. We will investigate the rules in DROP to fix this. + +Version 1.2.3 simplifies the plots in the AE Summary Script. In addition, there's a new heatmap in the sampleQC Summary that allows to better identify DNA-RNA mismatches. + +As of version 1.2.1 DROP has a new module that performs RNA-seq variant calling. The input are BAM files and the output either a single-sample or a multi-sample VCF file (option specified by the user) annotated with allele frequencies from gnomAD (if specified by the user). The sample annotation table does not need to be changed, but several new parameters in the config file have to be added and tuned. For more info, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#rna-variant-calling-dictionary). + +Also, as of version 1.2.1 the integration of external split and non-split counts to detect aberrant splicing is now possible. Simply specify in a new column in the sample annotation the directory containing the counts. For more info, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#external-count-examples). + + ## Set up a custom project Install the drop module according to [installation](#installation) and initialize the project in a custom project directory. ### Prepare the input data @@ -100,7 +102,10 @@ If you want to contribute with your own count matrices, please contact us: yepez If you use DROP in research, please cite our [manuscript](https://www.nature.com/articles/s41596-020-00462-5). -Furthermore, if you use the aberrant expression module, also cite [OUTRIDER](https://doi.org/10.1016/j.ajhg.2018.10.025); if you use the aberrant splicing module, also cite [FRASER](https://www.nature.com/articles/s41467-020-20573-7); and if you use the MAE module, also cite the [Kremer, Bader et al study](https://www.nature.com/articles/ncomms15824) and [DESeq2](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8). +Furthermore, if you use: +* the aberrant expression module, also cite [OUTRIDER](https://doi.org/10.1016/j.ajhg.2018.10.025) +* the aberrant splicing module, also cite [FRASER](https://www.nature.com/articles/s41467-020-20573-7) or [FRASER2](https://www.sciencedirect.com/science/article/pii/S0002929723003671?dgcid=coauthor), depending on the version that you use +* the MAE module, also cite the [Kremer, Bader et al study](https://www.nature.com/articles/ncomms15824) and [DESeq2](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8). For the complete set of tools used by DROP (e.g. for counting), see the [manuscript](https://www.nature.com/articles/s41596-020-00462-5). From 1d2fd5ac18256aede0e2de84d4b09407134a5f12 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Tue, 12 Dec 2023 16:52:48 +0100 Subject: [PATCH 16/29] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d5f5ac2f..b3c9e976 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw sequencing data. The manuscript is available in [Nature Protocols](https://www.nature.com/articles/s41596-020-00462-5). [SharedIt link.](https://rdcu.be/cdMmF) -The html results of the Geuvadis demo dataset described in the paper can be found [here](https://cmm.in.tum.de/public/paper/drop_analysis/webDir/drop_analysis_index.html). + +The website containing the different reports of the Geuvadis demo dataset described in the paper can be found [here](https://cmm.in.tum.de/public/paper/drop_analysis/webDir/drop_analysis_index.html). drop logo From f16e45a437587d69d8e720277eea49a4b880ae9a Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:15:48 +0100 Subject: [PATCH 17/29] Update 08_extract_results_FraseR.R --- .../FRASER/08_extract_results_FraseR.R | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R b/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R index 65539a04..87dba6ca 100644 --- a/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R +++ b/drop/modules/aberrant-splicing-pipeline/FRASER/08_extract_results_FraseR.R @@ -119,8 +119,6 @@ if(nrow(res_genes_dt) > 0){ } # Annotate results with spliceEventType and blacklist region overlap -# load reference annotation -library(AnnotationDbi) txdb <- loadDb(snakemake@input$txdb) # annotate the type of splice event and UTR overlap @@ -133,13 +131,9 @@ if(nrow(res_genes_dt) > 0){ # set genome assembly version to load correct blacklist region BED file (hg19 or hg38) assemblyVersion <- snakemake@config$genomeAssembly -if(grepl("grch37", assemblyVersion, ignore.case=TRUE)){ - assemblyVersion <- "hg19" -} -if(grepl("grch38", assemblyVersion, ignore.case=TRUE)){ - assemblyVersion <- "hg38" -} - +if(grepl("grch37", assemblyVersion, ignore.case=TRUE)) assemblyVersion <- "hg19" +if(grepl("grch38", assemblyVersion, ignore.case=TRUE)) assemblyVersion <- "hg38" + # annotate overlap with blacklist regions if(assemblyVersion %in% c("hg19", "hg38")){ if(nrow(res_junc_dt) > 0){ From d57da3c36393f358ff68210fa1dd96669910def3 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Fri, 12 Apr 2024 14:02:56 +0200 Subject: [PATCH 18/29] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b3c9e976..394b7f54 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,12 @@ The detection of RNA Outliers Pipeline (DROP) is an integrative workflow to detect aberrant expression, aberrant splicing, and mono-allelic expression from raw sequencing data. -The manuscript is available in [Nature Protocols](https://www.nature.com/articles/s41596-020-00462-5). [SharedIt link.](https://rdcu.be/cdMmF) +The manuscript is available in [Nature Protocols](https://www.nature.com/articles/s41596-020-00462-5). The website containing the different reports of the Geuvadis demo dataset described in the paper can be found [here](https://cmm.in.tum.de/public/paper/drop_analysis/webDir/drop_analysis_index.html). +This [video](https://www.youtube.com/watch?v=XvgjiFQClhM&t=2761s) can serve as an introduction to the tools used in DROP and their application to rare disease diagnostics. + drop logo From 3b34f53025357b64b1876dd06df79047ce1a69d8 Mon Sep 17 00:00:00 2001 From: Christian Mertes Date: Thu, 18 Apr 2024 01:02:41 +0200 Subject: [PATCH 19/29] update GHA with snakemake version range and activate PR checks --- .github/workflows/python-package-conda.yml | 2 +- environment.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 00437a34..9be16b75 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -1,6 +1,6 @@ name: Build -on: [push] +on: [push,pull_request] jobs: build-linux: diff --git a/environment.yml b/environment.yml index 53be3e1c..44e1a3a6 100644 --- a/environment.yml +++ b/environment.yml @@ -3,9 +3,10 @@ channels: - conda-forge - bioconda dependencies: - - python==3.8 + - python>=3.8 - pip - drop + - snakemake>=5,<8 - flake8 - bioconductor-bsgenome.hsapiens.ucsc.hg19 From ec9628f0ecfc1ec86b48cad6caf26d02456cc749 Mon Sep 17 00:00:00 2001 From: Christian Mertes Date: Thu, 18 Apr 2024 01:04:33 +0200 Subject: [PATCH 20/29] fix #532 knitr parsing --- drop/modules/mae-pipeline/MAE/Results.R | 2 +- drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R | 2 +- drop/template/Scripts/MonoallelicExpression/Overview.R | 4 ++-- drop/template/Scripts/Pipeline/SampleAnnotation.R | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drop/modules/mae-pipeline/MAE/Results.R b/drop/modules/mae-pipeline/MAE/Results.R index 4897e3b4..1be74698 100644 --- a/drop/modules/mae-pipeline/MAE/Results.R +++ b/drop/modules/mae-pipeline/MAE/Results.R @@ -107,7 +107,7 @@ res[, MAE_ALT := MAE == TRUE & altRatio >= allelicRatioCutoff] #' #' Number of samples with significant MAE for alternative events: `r uniqueN(res[MAE_ALT == TRUE, ID])` -#+echo=F +#+ echo=F # Save full results zipped res[, altRatio := round(altRatio, 3)] diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R index 4319f731..97bc612e 100644 --- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R +++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R @@ -12,7 +12,7 @@ #' type: noindex #'--- -#+echo=F +#+ echo=F saveRDS(snakemake, snakemake@log$snakemake) suppressPackageStartupMessages({ diff --git a/drop/template/Scripts/MonoallelicExpression/Overview.R b/drop/template/Scripts/MonoallelicExpression/Overview.R index b1ffe222..85d2c1c6 100644 --- a/drop/template/Scripts/MonoallelicExpression/Overview.R +++ b/drop/template/Scripts/MonoallelicExpression/Overview.R @@ -86,7 +86,7 @@ qc_links <- sapply(qc_groups, function(v) build_link_list( #' #' ## Analyze Individual Results -#+echo=FALSE +#+ echo=FALSE # Read the first results table res_sample <- readRDS(snakemake@input$results_sample[[1]]) sample <- unique(res_sample$ID) @@ -95,7 +95,7 @@ library(tMAE) library(ggplot2) rare_column <- 'rare' if(any(is.na(res_sample$rare))) rare_column <- NULL -#+echo=TRUE +#+ echo=TRUE #' ### MA plot: fold change vs RNA coverage plotMA4MAE(res_sample, rare_column = rare_column, diff --git a/drop/template/Scripts/Pipeline/SampleAnnotation.R b/drop/template/Scripts/Pipeline/SampleAnnotation.R index 7963bdc0..eea616ac 100644 --- a/drop/template/Scripts/Pipeline/SampleAnnotation.R +++ b/drop/template/Scripts/Pipeline/SampleAnnotation.R @@ -16,7 +16,7 @@ #' code_download: TRUE #'--- -#+echo=F +#+ echo=F saveRDS(snakemake, snakemake@log$snakemake) suppressPackageStartupMessages({ @@ -76,7 +76,7 @@ unique(sa[,.(RNA_ID, DROP_GROUP)])$DROP_GROUP %>% strsplit(',') %>% unlist %>% table %>% barplot(xlab = 'DROP groups', ylab = 'Number of samples') # Obtain genes that overlap with HPO terms -#+echo=F +#+ echo=F if(!is.null(sa$HPO_TERMS) & !all(is.na(sa$HPO_TERMS)) & ! all(sa$HPO_TERMS == '')){ sa2 <- sa[, .SD[1], by = RNA_ID] From eb218f34dfbcef4aacee1ee5fb11b7a249644b82 Mon Sep 17 00:00:00 2001 From: Christian Mertes Date: Thu, 18 Apr 2024 01:07:36 +0200 Subject: [PATCH 21/29] remove redundent rules to create dict and fai files --- drop/modules/mae-pipeline/Snakefile | 13 --------- drop/template/Snakefile | 43 +++++++++++++++++------------ 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/drop/modules/mae-pipeline/Snakefile b/drop/modules/mae-pipeline/Snakefile index 18505a5f..22ed3fdd 100644 --- a/drop/modules/mae-pipeline/Snakefile +++ b/drop/modules/mae-pipeline/Snakefile @@ -20,19 +20,6 @@ rule mae_dependency: rule sampleQC: input: cfg.getHtmlFromScript(MAE_WORKDIR / "QC" / "Datasets.R") -rule create_dict: - input: cfg.genome.getFastaList() - output: cfg.genome.getDictList() - shell: - """ - for fasta in {input} - do - if [ ! -f "${{fasta%.*}}.dict" ]; then - gatk CreateSequenceDictionary --REFERENCE $fasta - fi - done - """ - ## MAE rule mae_createSNVs: diff --git a/drop/template/Snakefile b/drop/template/Snakefile index 7ff1bd2c..74ed703e 100644 --- a/drop/template/Snakefile +++ b/drop/template/Snakefile @@ -72,28 +72,35 @@ rule dependencyGraph: rule publish_local: shell: "rsync -Ort {config[htmlOutputPath]} {config[webDir]}" -rule prepFasta_fa: - priority: 1 - input: - "{path_to_ref}.fa" - output: - dict = "{path_to_ref}.dict" , - fai = "{path_to_ref}.fa.fai" +rule index_fa: + input: "{path_to_ref}.fa" + output: "{path_to_ref}.fa.fai" shell: """ - samtools faidx {input} -o {output.fai} - gatk CreateSequenceDictionary -R {input} -O {output.dict} + samtools faidx {input} -o {output} """ -rule prepFasta_fasta: - priority: 1 - input: - "{path_to_ref}.fasta" - output: - dict = "{path_to_ref}.dict" , - fai = "{path_to_ref}.fasta.fai" +rule dict_fa: + input: "{path_to_ref}.fa" + output: "{path_to_ref}.dict" shell: """ - samtools faidx {input} -o {output.fai} - gatk CreateSequenceDictionary -R {input} -O {output.dict} + gatk CreateSequenceDictionary --REFERENCE {input} --OUTPUT {output} + """ + +rule index_fasta: + input: "{path_to_ref}.fasta" + output: "{path_to_ref}.fasta.fai" + shell: """ + samtools faidx {input} -o {output} + """ + +rule dict_fasta: + input: "{path_to_ref}.fasta" + output: "{path_to_ref}.dict" + shell: + """ + gatk CreateSequenceDictionary --REFERENCE {input} --OUTPUT {output} + """ + From f26dedf72177b181c4f73c4a109788086ddd1b7e Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Mon, 22 Apr 2024 19:40:38 +0200 Subject: [PATCH 22/29] fix last outrider version install --- drop/installRPackages.R | 10 ++++++---- drop/requirementsR.txt | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drop/installRPackages.R b/drop/installRPackages.R index 42148f58..6635ce61 100644 --- a/drop/installRPackages.R +++ b/drop/installRPackages.R @@ -23,18 +23,20 @@ if (file.exists(args[1])){ package=gsub("=.*", "", unlist(args)), version=gsub(".*=", "", unlist(args)), ref="") - packages[package == version, version:=NA] + packages[package == version, c("min_version", "max_version") := ""] } installed <- as.data.table(installed.packages()) for (pckg_name in packages$package) { package_dt <- packages[package == pckg_name] pckg_name <- gsub(".*/", "", pckg_name) - version <- package_dt$version + min_version <- package_dt$min_version + max_version <- package_dt$max_version branch <- package_dt$ref - if (!pckg_name %in% installed$Package || (!is.na(version) && compareVersion( - installed[Package == pckg_name, Version], version) < 0)) { + if (!pckg_name %in% installed$Package || (min_version != "" && (compareVersion( + installed[Package == pckg_name, Version], min_version) < 0 || compareVersion( + installed[Package == pckg_name, Version], max_version) > 0))) { package <- package_dt$package message(paste("install", package)) diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt index eb65d037..08faa914 100644 --- a/drop/requirementsR.txt +++ b/drop/requirementsR.txt @@ -1,8 +1,8 @@ -package version ref +package min_version max_version ref devtools -gagneurlab/OUTRIDER 1.17.2 HEAD -gagneurlab/FRASER 1.99.1 HEAD -gagneurlab/tMAE 1.0.4 HEAD +gagneurlab/OUTRIDER 1.17.2 1.17.2 HEAD +gagneurlab/FRASER 1.99.1 1.99.3 HEAD +gagneurlab/tMAE 1.0.4 1.0.4 HEAD VariantAnnotation rmarkdown knitr From 752edd1bbc05324b2c656995b8526ed191aa9f59 Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Mon, 13 May 2024 16:52:04 +0200 Subject: [PATCH 23/29] Fix FRASER and OUTRIDER versions --- README.md | 2 +- docs/source/conf.py | 2 +- drop/__init__.py | 2 +- drop/cli.py | 2 +- drop/requirementsR.txt | 6 +++--- setup.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 394b7f54..6958a2dc 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ For more information on different installation options, refer to the ## What's new -Versions 1.3.3, 1.3.2 and 1.3.1 fix some bugs. +Version 1.3.4 freezes FRASER and OUTRIDER versions before major changes in new FRASER to ensure reproducibility. Versions 1.3.3, 1.3.2 and 1.3.1 fix some bugs. Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See the [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml) for more details. When switching between FRASER versions, we recommend running DROP in a separate folder for each version. Moreover, DROP now allows users to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest). diff --git a/docs/source/conf.py b/docs/source/conf.py index 63e4ce56..749be82b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,7 +23,7 @@ author = 'Michaela Müller' # The full version, including alpha/beta/rc tags -release_ = '1.3.3' +release_ = '1.3.4' diff --git a/drop/__init__.py b/drop/__init__.py index 4a5f75c9..95a02cb1 100644 --- a/drop/__init__.py +++ b/drop/__init__.py @@ -4,5 +4,5 @@ from . import utils from . import demo -__version__ = "1.3.3" +__version__ = "1.3.4" diff --git a/drop/cli.py b/drop/cli.py index f3778598..0795ea87 100644 --- a/drop/cli.py +++ b/drop/cli.py @@ -17,7 +17,7 @@ @click.group() @click_log.simple_verbosity_option(logger) -@click.version_option('1.3.3',prog_name='drop') +@click.version_option('1.3.4',prog_name='drop') def main(): diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt index 08faa914..8dfbe2f6 100644 --- a/drop/requirementsR.txt +++ b/drop/requirementsR.txt @@ -1,8 +1,8 @@ package min_version max_version ref devtools -gagneurlab/OUTRIDER 1.17.2 1.17.2 HEAD -gagneurlab/FRASER 1.99.1 1.99.3 HEAD -gagneurlab/tMAE 1.0.4 1.0.4 HEAD +gagneurlab/OUTRIDER 1.17.2 1.17.2 1.17.2 +gagneurlab/FRASER 1.99.3 1.99.3 1.99.3 +gagneurlab/tMAE 1.0.4 1.0.4 1.0.4 VariantAnnotation rmarkdown knitr diff --git a/setup.py b/setup.py index 65b687c0..9499792c 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setuptools.setup( name="drop", - version="1.3.3", + version="1.3.4", author="Vicente A. Yépez, Michaela Müller, Nicholas H. Smith, Daniela Klaproth-Andrade, Luise Schuller, Ines Scheller, Christian Mertes , Julien Gagneur ", author_email="yepez@in.tum.de", From d5caf39b3a371a8b506caf553acfb9fdf727c7ac Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Tue, 14 May 2024 10:29:53 +0200 Subject: [PATCH 24/29] OUTRIDER 1.16.2 & FRASER 1.99.0 --- drop/requirementsR.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt index 8dfbe2f6..c699dc02 100644 --- a/drop/requirementsR.txt +++ b/drop/requirementsR.txt @@ -1,7 +1,7 @@ package min_version max_version ref devtools -gagneurlab/OUTRIDER 1.17.2 1.17.2 1.17.2 -gagneurlab/FRASER 1.99.3 1.99.3 1.99.3 +gagneurlab/OUTRIDER 1.16.2 1.16.2 1.16.2 +gagneurlab/FRASER 1.99.0 1.99.0 1.99.0 gagneurlab/tMAE 1.0.4 1.0.4 1.0.4 VariantAnnotation rmarkdown From f9a6b8fb65d4ec8c629d7b21a7a2e1360e14e400 Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Tue, 14 May 2024 11:03:35 +0200 Subject: [PATCH 25/29] pin OUTRIDER to 1.20.1 --- drop/requirementsR.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt index c699dc02..cc83fbde 100644 --- a/drop/requirementsR.txt +++ b/drop/requirementsR.txt @@ -1,6 +1,6 @@ package min_version max_version ref devtools -gagneurlab/OUTRIDER 1.16.2 1.16.2 1.16.2 +gagneurlab/OUTRIDER 1.20.1 1.20.1 1.20.1 gagneurlab/FRASER 1.99.0 1.99.0 1.99.0 gagneurlab/tMAE 1.0.4 1.0.4 1.0.4 VariantAnnotation From 68ccefccf61545c156c419a5c048916408cde092 Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Tue, 14 May 2024 14:20:15 +0200 Subject: [PATCH 26/29] pin FRASER to commit #d6a422c --- drop/requirementsR.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt index cc83fbde..6461207f 100644 --- a/drop/requirementsR.txt +++ b/drop/requirementsR.txt @@ -1,7 +1,7 @@ package min_version max_version ref devtools gagneurlab/OUTRIDER 1.20.1 1.20.1 1.20.1 -gagneurlab/FRASER 1.99.0 1.99.0 1.99.0 +gagneurlab/FRASER 1.99.3 1.99.3 d6a422c gagneurlab/tMAE 1.0.4 1.0.4 1.0.4 VariantAnnotation rmarkdown From 76f86076e03952498a72391d80f20045a04f0952 Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Thu, 16 May 2024 17:43:54 +0200 Subject: [PATCH 27/29] update version nr for bioconda --- README.md | 2 +- docs/source/installation.rst | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6958a2dc..b761061a 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ mamba create -n drop_env -c conda-forge -c bioconda drop --override-channels In the case of mamba/conda troubles we recommend using the fixed `DROP_.yaml` installation file we make available on our [public server](https://www.cmm.in.tum.de/public/paper/drop_analysis/). Install the current version and use the full path in the following command to install the conda environment `drop_env` ``` -mamba env create -f DROP_1.3.3.yaml +mamba env create -f DROP_1.3.4.yaml ``` Test installation with demo project diff --git a/docs/source/installation.rst b/docs/source/installation.rst index fea0d531..ad120274 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -20,7 +20,7 @@ Install the latest version and use the full path in the following command to ins .. code-block:: bash - mamba env create -f DROP_1.3.3.yaml + mamba env create -f DROP_1.3.4.yaml Installation time: ~ 10min diff --git a/setup.cfg b/setup.cfg index 09af7983..ae813b4e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.3.3 +current_version = 1.3.4 commit = True [bumpversion:file:setup.py] From 853a94b9f8ff09b5668dea69af6e9e6cd75ff3c8 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Thu, 16 May 2024 17:54:17 +0200 Subject: [PATCH 28/29] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b761061a..a9fb79ec 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ The manuscript is available in [Nature Protocols](https://www.nature.com/article The website containing the different reports of the Geuvadis demo dataset described in the paper can be found [here](https://cmm.in.tum.de/public/paper/drop_analysis/webDir/drop_analysis_index.html). -This [video](https://www.youtube.com/watch?v=XvgjiFQClhM&t=2761s) can serve as an introduction to the tools used in DROP and their application to rare disease diagnostics. +This [video](https://www.youtube.com/watch?v=XvgjiFQClhM&t=2761s) introduces the tools used in DROP and their application to rare disease diagnostics. drop logo @@ -21,7 +21,7 @@ We recommend using a dedicated conda environment (`drop_env` in this example). I mamba create -n drop_env -c conda-forge -c bioconda drop --override-channels ``` -In the case of mamba/conda troubles we recommend using the fixed `DROP_.yaml` installation file we make available on our [public server](https://www.cmm.in.tum.de/public/paper/drop_analysis/). Install the current version and use the full path in the following command to install the conda environment `drop_env` +In the case of mamba/conda troubles, we recommend using the fixed `DROP_.yaml` installation file we make available on our [public server](https://www.cmm.in.tum.de/public/paper/drop_analysis/). Install the current version and use the full path in the following command to install the conda environment `drop_env` ``` mamba env create -f DROP_1.3.4.yaml ``` @@ -47,14 +47,12 @@ For more information on different installation options, refer to the ## What's new -Version 1.3.4 freezes FRASER and OUTRIDER versions before major changes in new FRASER to ensure reproducibility. Versions 1.3.3, 1.3.2 and 1.3.1 fix some bugs. +Due to snakemake updates affecting wBuild and the way we installed FRASER, installing DROP 1.3.3 no longer works. Version 1.3.4 is the same as 1.3.3, but fixes the FRASER version to ensure reproducibility and fixes certain scripts affected by the snakemake update. Running 1.3.4 should provide the same results as 1.3.3. Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See the [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml) for more details. When switching between FRASER versions, we recommend running DROP in a separate folder for each version. Moreover, DROP now allows users to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest). `Snakemake v.7.8` introduced some changes in which changes in parameters can cause rules to be re-executed. More info [here](https://github.com/snakemake/snakemake/issues/1694). This affects DROP and causes certain rules in the AS and QC modules to be triggered even if they were already completed and there were no changes in the sample annotation or scripts. The workaround is to run DROP by adding the parameter `--rerun-triggers mtime`, e.g. `snakemake -n --rerun-triggers mtime` or `snakemake --cores 10 --rerun-triggers mtime`. We will investigate the rules in DROP to fix this. -Version 1.2.3 simplifies the plots in the AE Summary Script. In addition, there's a new heatmap in the sampleQC Summary that allows to better identify DNA-RNA mismatches. - As of version 1.2.1 DROP has a new module that performs RNA-seq variant calling. The input are BAM files and the output either a single-sample or a multi-sample VCF file (option specified by the user) annotated with allele frequencies from gnomAD (if specified by the user). The sample annotation table does not need to be changed, but several new parameters in the config file have to be added and tuned. For more info, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#rna-variant-calling-dictionary). Also, as of version 1.2.1 the integration of external split and non-split counts to detect aberrant splicing is now possible. Simply specify in a new column in the sample annotation the directory containing the counts. For more info, refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#external-count-examples). From d6bca12ad9a0052c5cbfa0bd0ddded7d34b2e846 Mon Sep 17 00:00:00 2001 From: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Date: Thu, 16 May 2024 17:58:13 +0200 Subject: [PATCH 29/29] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a9fb79ec..63f1acbb 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,8 @@ For more information on different installation options, refer to the ## What's new -Due to snakemake updates affecting wBuild and the way we installed FRASER, installing DROP 1.3.3 no longer works. Version 1.3.4 is the same as 1.3.3, but fixes the FRASER version to ensure reproducibility and fixes certain scripts affected by the snakemake update. Running 1.3.4 should provide the same results as 1.3.3. +Due to snakemake updates affecting wBuild and the way we installed FRASER, installing DROP 1.3.3 no longer works. Version 1.3.4 simply fixes the FRASER version to ensure reproducibility and fixes certain scripts affected by the snakemake update. Running the pipeline with the version 1.3.4 should provide the same outlier results as 1.3.3. + Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See the [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml) for more details. When switching between FRASER versions, we recommend running DROP in a separate folder for each version. Moreover, DROP now allows users to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest).