From 1f36a7a8517fec2cb7fe47195d97a731591e23f1 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 11 Feb 2021 16:18:37 +0100 Subject: [PATCH 001/202] Update qc stats compute from pycisTopic --- src/pycistopic/bin/compute_qc_stats.py | 8 ++++---- src/pycistopic/processes/compute_qc_stats.nf | 1 + src/pycistopic/pycistopic.config | 2 +- workflows/atac/qc_filtering.nf | 7 ++++++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index eee747a9..d29d9991 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -5,7 +5,7 @@ import pandas as pd import pickle -from pycisTopic.qc import computeQCStats +from pycisTopic.qc import compute_qc_stats ################################################################################ @@ -81,17 +81,17 @@ args.sampleId: args.regions } -metadata_bc_dict, profile_data_dict = computeQCStats( +metadata_bc_dict, profile_data_dict = compute_qc_stats( fragments_dict= fragments_dict, tss_annotation = annot, - stats=['barcodeRankPlot', 'insertSizeDistribution', 'profileTSS', 'FRIP'], + stats=['barcode_rank_plot', 'duplicate_rate', 'insert_size_distribution', 'profile_tss', 'frip'], label_list = None, path_to_regions = path_to_regions, n_cpu = args.threads, valid_bc = None, n_frag = args.n_frag, n_bc = None, - tss_flank_window = 2000, + tss_flank_window = 1000, tss_window = 50, tss_minimum_signal_window = 100, tss_rolling_window = 10, diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index 4777901e..e8319b80 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -7,6 +7,7 @@ processParams = params.tools.pycistopic.compute_qc_stats process SC__PYCISTOPIC__COMPUTE_QC_STATS { + publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' container toolParams.container label 'compute_resources__cpu' diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 4bd37a94..d1e2c31c 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -1,7 +1,7 @@ params { tools { pycistopic { - container = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/Pipeline_Dev/containers/aertslab-pycistopic-0.1-dev05.sif' + container = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/Pipeline_Dev/containers/aertslab-pycistopic-2021-02-02-2b4cc98.sif' macs2_call_peaks { gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' qvalue = 0.01 diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index be356745..931a3063 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -7,10 +7,12 @@ include { SC__ARCHR__CELL_CALLING; } from './../../src/archr/processes/cell_call include { SC__PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' params(params) include { SC__PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' params(params) +include { SC__PYCISTOPIC__PLOT_QC_STATS; } from './../../src/pycistopic/processes/plot_qc_stats.nf' params(params) include { - PUBLISH as PUBLISH_METADATA; PUBLISH as PUBLISH_PEAKS; + PUBLISH as PUBLISH_METADATA; + PUBLISH as PUBLISH_QC_SAMPLE_METRICS; } from "../../src/utils/workflows/utils.nf" params(params) ////////////////////////////////////////////////////// @@ -39,6 +41,9 @@ workflow ATAC_QC_PREFILTER { qc_stats = SC__PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks) PUBLISH_METADATA(qc_stats.map { it -> tuple(it[0], it[1]) }, 'metadata.tsv', 'gz', 'pycistopic', false) + qc_stats_plot = SC__PYCISTOPIC__PLOT_QC_STATS(qc_stats) + PUBLISH_QC_SAMPLE_METRICS(qc_stats_plot, 'qc_sample_metrics.pdf', 'pdf', 'pycistopic', false) + } From 88b4531026bdd241aefccfea9e63d3914614b77b Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 11 Feb 2021 16:19:07 +0100 Subject: [PATCH 002/202] Add qc stats plotting --- src/pycistopic/bin/plot_qc_stats.py | 46 +++++++++++++++++++++++ src/pycistopic/processes/plot_qc_stats.nf | 36 ++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100755 src/pycistopic/bin/plot_qc_stats.py create mode 100644 src/pycistopic/processes/plot_qc_stats.nf diff --git a/src/pycistopic/bin/plot_qc_stats.py b/src/pycistopic/bin/plot_qc_stats.py new file mode 100755 index 00000000..4a581618 --- /dev/null +++ b/src/pycistopic/bin/plot_qc_stats.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +import argparse +import pickle + +from pycisTopic.qc import plot_sample_metrics + +################################################################################ + +parser = argparse.ArgumentParser(description='Compute QC stats') + +parser.add_argument( + "--sampleId", + type=str, + required=True, + help='Sample ID.' +) +parser.add_argument( + "--profile_data_pkl", + type=str, + help='Profile data, pickle format.' +) +parser.add_argument( + "--output_pdf", + type=str, + help='Output plots, pdf format.' +) + +args = parser.parse_args() + +################################################################################ + +# Load sample metrics +infile = open(args.profile_data_pkl, 'rb') +profile_data_dict = pickle.load(infile) +infile.close() + + +# plot: +plot_sample_metrics(profile_data_dict, + insert_size_distriubtion_xlim=[0,600], + ncol=5, + cmap='viridis', + save=args.sampleId + "_qc_sample_metrics.pdf" + ) + diff --git a/src/pycistopic/processes/plot_qc_stats.nf b/src/pycistopic/processes/plot_qc_stats.nf new file mode 100644 index 00000000..3406d2a1 --- /dev/null +++ b/src/pycistopic/processes/plot_qc_stats.nf @@ -0,0 +1,36 @@ +nextflow.preview.dsl=2 + +binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" + +toolParams = params.tools.pycistopic +processParams = params.tools.pycistopic.compute_qc_stats + +process SC__PYCISTOPIC__PLOT_QC_STATS { + + container toolParams.container + label 'compute_resources__cpu' + + input: + tuple val(sampleId), + path(output_metadata), + path(output_metadata_pkl), + path(output_profile_data_pkl) + + output: + tuple val(sampleId), + path(output_pdf) + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + output_metadata = "${sampleId}_metadata.tsv.gz" + output_pdf = "${sampleId}_qc_sample_metrics.pdf" + output_metadata_pkl = "${sampleId}_metadata.pickle" + output_profile_data_pkl = "${sampleId}_profile_data.pickle" + """ + ${binDir}plot_qc_stats.py \ + --sampleId ${sampleId} \ + --profile_data_pkl ${output_profile_data_pkl} \ + --output_pdf ${output_pdf} + """ +} + From d531a93d385dcd63f3f2dc45d1799b840a73abfa Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 11 Feb 2021 21:42:50 +0100 Subject: [PATCH 003/202] Add quiet mode - If --quiet has been passed on the command line, suppress printing of any additional messages beyond those that come from Nextflow - Detect if params.quiet exists during channels input and in the INIT function - Fixes #283 --- src/channels/channels.nf | 28 +++++++++++++++------------- src/utils/workflows/utils.nf | 8 ++++++-- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/channels/channels.nf b/src/channels/channels.nf index 99b589d8..59352da6 100644 --- a/src/channels/channels.nf +++ b/src/channels/channels.nf @@ -49,7 +49,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "10x_cellranger_mex_outs", outputFileFormat) } - ).view() + ) } else { data = data.concat( getTenXCellRangerMEXChannel( @@ -57,7 +57,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "10x_cellranger_mex", outputFileFormat) } - ).view() + ) } } if(params.data.containsKey("tenx_atac") && params.data.tenx_atac.containsKey("cellranger_mex")) { @@ -68,7 +68,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "10x_atac_cellranger_mex_outs", outputFileFormat) } - ).view() + ) } else { data = data.concat( getTenXCellRangerMEXChannel( @@ -76,7 +76,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "10x_atac_cellranger_mex", outputFileFormat) } - ).view() + ) } } if(params.data.containsKey("tenx") && params.data.tenx.containsKey("cellranger_h5")) { @@ -87,7 +87,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "10x_cellranger_h5_outs", outputFileFormat) } - ).view() + ) } else { data = data.concat( getTenXCellRangerH5Channel( @@ -95,7 +95,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "10x_cellranger_h5", outputFileFormat) } - ).view() + ) } } if(params.data.containsKey("h5ad")) { @@ -116,7 +116,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "h5ad", outputFileFormat) } - ).view() + ) } if(params.data.containsKey("loom")) { data = data.concat( @@ -126,7 +126,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "loom", outputFileFormat) } - ).view() + ) } if(params.data.containsKey("tsv")) { data = data.concat( @@ -136,7 +136,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "tsv", outputFileFormat) } - ).view() + ) } if(params.data.containsKey("csv")) { data = data.concat( @@ -146,7 +146,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "csv", outputFileFormat) } - ).view() + ) } if(params.data.containsKey("seurat_rds")) { data = data.concat( @@ -156,7 +156,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], "seurat_rds", outputFileFormat) } - ).view() + ) } if(params.data.containsKey("fragments")) { data = data.concat( @@ -167,7 +167,7 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], it[2], "fragments") } - ).view() + ) } if(params.data.containsKey("bam")) { data = data.concat( @@ -178,10 +178,12 @@ workflow getDataChannel { ).map { it -> tuple(it[0], it[1], it[2], "bam") } - ).view() + ) } data.ifEmpty { exit 1, "Pipeline cannot run: no data provided." } + if(!params.containsKey('quiet')) data.view() + emit: data diff --git a/src/utils/workflows/utils.nf b/src/utils/workflows/utils.nf index 0993899a..c4b544a4 100644 --- a/src/utils/workflows/utils.nf +++ b/src/utils/workflows/utils.nf @@ -95,22 +95,26 @@ def setSeed(params) { if(!params.global.containsKey('seed')) { params.global.seed = workflow.manifest.version.replaceAll("\\.","").toInteger() - Channel.from('').view { + if(!params.containsKey('quiet')) { + Channel.from('').view { """ ------------------------------------------------------------------ \u001B[32m No seed detected in the config \u001B[0m \u001B[32m To ensure reproducibility the seed has been set to ${params.global.seed} \u001B[0m ------------------------------------------------------------------ """ + } } } else { - Channel.from('').view { + if(!params.containsKey('quiet')) { + Channel.from('').view { """ ------------------------------------------------------------------ \u001B[32m Custom seed detected in the config \u001B[0m \u001B[32m Seed is set to ${params.global.seed} \u001B[0m ------------------------------------------------------------------ """ + } } // If seed is of type String, it should be converted to an Integer because R doesn't not allow to have seeds of type character (see set.seed) if (params.global.seed instanceof String) { From 30fe2662ae9bbb8ec86360946ee86a052aebc075 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 12 Feb 2021 14:33:47 +0100 Subject: [PATCH 004/202] Change compute resources for qc processes - These are currently single-threaded --- src/pycistopic/processes/compute_qc_stats.nf | 2 +- src/pycistopic/processes/macs2_call_peaks.nf | 4 ++-- src/pycistopic/processes/plot_qc_stats.nf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index e8319b80..7f7d99c6 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -9,7 +9,7 @@ process SC__PYCISTOPIC__COMPUTE_QC_STATS { publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' container toolParams.container - label 'compute_resources__cpu' + label 'compute_resources__default' input: tuple val(sampleId), diff --git a/src/pycistopic/processes/macs2_call_peaks.nf b/src/pycistopic/processes/macs2_call_peaks.nf index fad7ff85..ef0d9421 100644 --- a/src/pycistopic/processes/macs2_call_peaks.nf +++ b/src/pycistopic/processes/macs2_call_peaks.nf @@ -8,7 +8,7 @@ processParams = params.tools.pycistopic.macs2_call_peaks process SC__PYCISTOPIC__MACS2_CALL_PEAKS { container toolParams.container - label 'compute_resources__cpu' + label 'compute_resources__default' input: tuple val(sampleId), @@ -22,7 +22,7 @@ process SC__PYCISTOPIC__MACS2_CALL_PEAKS { path("${sampleId}_summits.bed") script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + //def sampleParams = params.parseConfig(sampleId, params.global, toolParams) """ macs2 callpeak \ --treatment ${bam} \ diff --git a/src/pycistopic/processes/plot_qc_stats.nf b/src/pycistopic/processes/plot_qc_stats.nf index 3406d2a1..66d1df1d 100644 --- a/src/pycistopic/processes/plot_qc_stats.nf +++ b/src/pycistopic/processes/plot_qc_stats.nf @@ -8,7 +8,7 @@ processParams = params.tools.pycistopic.compute_qc_stats process SC__PYCISTOPIC__PLOT_QC_STATS { container toolParams.container - label 'compute_resources__cpu' + label 'compute_resources__default' input: tuple val(sampleId), From c449d755ce6d23092ae93f4ea17254623fdb6c10 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 19 Feb 2021 16:36:16 +0100 Subject: [PATCH 005/202] QC filtering updates: - Move biomart step outside of the compute_qc_stats function (avoid querying the server too frequently) --- src/pycistopic/bin/biomart_annot.py | 31 ++++++++++++++++++++ src/pycistopic/bin/compute_qc_stats.py | 17 ++++++----- src/pycistopic/processes/biomart_annot.nf | 23 +++++++++++++++ src/pycistopic/processes/compute_qc_stats.nf | 6 ++-- src/pycistopic/pycistopic.config | 3 ++ workflows/atac/qc_filtering.nf | 8 +++-- 6 files changed, 76 insertions(+), 12 deletions(-) create mode 100755 src/pycistopic/bin/biomart_annot.py create mode 100644 src/pycistopic/processes/biomart_annot.nf diff --git a/src/pycistopic/bin/biomart_annot.py b/src/pycistopic/bin/biomart_annot.py new file mode 100755 index 00000000..4a667069 --- /dev/null +++ b/src/pycistopic/bin/biomart_annot.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import pybiomart as pbm +import argparse +import pickle + + +parser = argparse.ArgumentParser(description='Biomart annotation download') + +parser.add_argument( + "--biomart_dataset_name", + type=str, + required=True, + help='Biomart dataset name, e.g. "hsapiens_gene_ensembl".' +) + +args = parser.parse_args() + +################################################################################ + +dataset = pbm.Dataset(name=args.biomart_dataset_name, host='http://www.ensembl.org') +annot = dataset.query(attributes=['chromosome_name', 'transcription_start_site', 'strand', 'external_gene_name', 'transcript_biotype']) +filter = annot['Chromosome/scaffold name'].str.contains('CHR|GL|JH|MT', na=False) +annot = annot[~filter] +annot['Chromosome/scaffold name'] = annot['Chromosome/scaffold name'].str.replace(r'(\b\S)', r'chr\1') +annot.columns=['Chromosome', 'Start', 'Strand', 'Gene', 'Transcript_type'] +annot = annot[annot.Transcript_type == 'protein_coding'] + +with open('biomart_annot.pickle', 'wb') as f: + pickle.dump(annot, f) + diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index d29d9991..6b685c26 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -36,6 +36,11 @@ default=50, help='Threshold on the number of fragments to keep for a barcode.' ) +parser.add_argument( + "--biomart_annot_pkl", + type=str, + help='Biomart annotations, pickle format.' +) parser.add_argument( "--output_metadata", type=str, @@ -63,15 +68,11 @@ ################################################################################ -dataset = pbm.Dataset(name='hsapiens_gene_ensembl', host='http://www.ensembl.org') -annot = dataset.query(attributes=['chromosome_name', 'transcription_start_site', 'strand', 'external_gene_name', 'transcript_biotype']) -filter = annot['Chromosome/scaffold name'].str.contains('CHR|GL|JH|MT', na=False) -annot = annot[~filter] -annot['Chromosome/scaffold name'] = annot['Chromosome/scaffold name'].str.replace(r'(\b\S)', r'chr\1') -annot.columns=['Chromosome', 'Start', 'Strand', 'Gene', 'Transcript_type'] -annot = annot[annot.Transcript_type == 'protein_coding'] +# Load biomart annotations: +infile = open(args.biomart_annot_pkl, 'rb') +annot = pickle.load(infile) +infile.close() -################################################## fragments_dict = { diff --git a/src/pycistopic/processes/biomart_annot.nf b/src/pycistopic/processes/biomart_annot.nf new file mode 100644 index 00000000..3e896485 --- /dev/null +++ b/src/pycistopic/processes/biomart_annot.nf @@ -0,0 +1,23 @@ +nextflow.preview.dsl=2 + +binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" + +toolParams = params.tools.pycistopic +processParams = params.tools.pycistopic.biomart_annot + +process SC__PYCISTOPIC__BIOMART_ANNOT { + + publishDir "${params.global.outdir}/intermediate/pycistopic/biomart/", mode: 'symlink' + container toolParams.container + label 'compute_resources__default' + + output: + path("biomart_annot.pickle") + + script: + """ + ${binDir}biomart_annot.py \ + --biomart_dataset_name ${processParams.biomart_dataset_name} + """ +} + diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index 7f7d99c6..80f6c6a8 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -9,13 +9,14 @@ process SC__PYCISTOPIC__COMPUTE_QC_STATS { publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' container toolParams.container - label 'compute_resources__default' + label 'compute_resources__default','compute_resources__24hqueue' input: tuple val(sampleId), path(fragments), path(fragments_index), - val(peaks) + path(peaks) + path(biomart_annot) output: tuple val(sampleId), @@ -36,6 +37,7 @@ process SC__PYCISTOPIC__COMPUTE_QC_STATS { --regions ${peaks} \ --n_frag ${processParams.n_frag} \ --threads ${task.cpus} \ + --biomart_annot_pkl ${biomart_annot} \ --output_metadata ${output_metadata} \ --output_metadata_pkl ${output_metadata_pkl} \ --output_profile_data_pkl ${output_profile_data_pkl} diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index d1e2c31c..38408dfb 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -2,6 +2,9 @@ params { tools { pycistopic { container = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/Pipeline_Dev/containers/aertslab-pycistopic-2021-02-02-2b4cc98.sif' + biomart_annot { + biomart_dataset_name = 'hsapiens_gene_ensembl' + } macs2_call_peaks { gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' qvalue = 0.01 diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 931a3063..772b4ad2 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -5,6 +5,7 @@ nextflow.enable.dsl=2 include { SC__ARCHR__CREATE_ARROW_UNFILTERED; } from './../../src/archr/processes/createArrow_unfiltered.nf' params(params) include { SC__ARCHR__CELL_CALLING; } from './../../src/archr/processes/cell_calling.nf' params(params) +include { SC__PYCISTOPIC__BIOMART_ANNOT; } from './../../src/pycistopic/processes/biomart_annot.nf' params(params) include { SC__PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' params(params) include { SC__PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' params(params) include { SC__PYCISTOPIC__PLOT_QC_STATS; } from './../../src/pycistopic/processes/plot_qc_stats.nf' params(params) @@ -31,6 +32,9 @@ workflow ATAC_QC_PREFILTER { } .set{ data_split } + biomart = SC__PYCISTOPIC__BIOMART_ANNOT() + biomart.view() + peaks = SC__PYCISTOPIC__MACS2_CALL_PEAKS(data_split.bam) PUBLISH_PEAKS(peaks.map { it -> tuple(it[0], it[1]) }, 'peaks', 'narrowPeak', 'macs2', false) @@ -38,11 +42,11 @@ workflow ATAC_QC_PREFILTER { .map { it -> tuple(it[0], it[1], it[2], it[4]) } .set{ fragpeaks } - qc_stats = SC__PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks) + qc_stats = SC__PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, biomart) PUBLISH_METADATA(qc_stats.map { it -> tuple(it[0], it[1]) }, 'metadata.tsv', 'gz', 'pycistopic', false) qc_stats_plot = SC__PYCISTOPIC__PLOT_QC_STATS(qc_stats) - PUBLISH_QC_SAMPLE_METRICS(qc_stats_plot, 'qc_sample_metrics.pdf', 'pdf', 'pycistopic', false) + PUBLISH_QC_SAMPLE_METRICS(qc_stats_plot, 'qc_sample_metrics', 'pdf', 'pycistopic', false) } From b0f0952f9f652b50d5ac6d989a643f52fed9fe22 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 19 Feb 2021 20:38:45 +0100 Subject: [PATCH 006/202] Add barcode level statistics py script --- .../bin/barcode_level_statistics.py | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100755 src/pycistopic/bin/barcode_level_statistics.py diff --git a/src/pycistopic/bin/barcode_level_statistics.py b/src/pycistopic/bin/barcode_level_statistics.py new file mode 100755 index 00000000..4d828ebc --- /dev/null +++ b/src/pycistopic/bin/barcode_level_statistics.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 + +import argparse +#import pybiomart as pbm +import pandas as pd +import pickle + +#from pycisTopic.qc import compute_qc_stats + +################################################################################ + +parser = argparse.ArgumentParser(description='Barcode level statistics') + +parser.add_argument( + "--sampleId", + type=str, + required=True, + help='Sample ID.' +) +parser.add_argument( + "--metadata_pkl", + type=str, + help='Metadata, pickle format.' +) + +### fragments filters: +parser.add_argument( + "--filter_frags_lower", + type=float, + required=False, + default=3, + help='Lower threshold on the number of fragments for keeping a barcode. Log10-scaled.' +) +parser.add_argument( + "--filter_frags_upper", + type=float, + required=False, + default=None, + help='Upper threshold on the number of fragments for keeping a barcode. Log10-scaled.' +) + +### TSS Encrichment filters: +parser.add_argument( + "--filter_tss_lower", + type=float, + required=False, + default=8, + help='Lower threshold on the TSS Enrichment for keeping a barcode.' +) +parser.add_argument( + "--filter_tss_upper", + type=float, + required=False, + default=None, + help='Upper threshold on the TSS Enrichment for keeping a barcode.' +) + +### FRIP filters: +parser.add_argument( + "--filter_frip_lower", + type=float, + required=False, + default=None, + help='Lower threshold on FRIP for keeping a barcode.' +) +parser.add_argument( + "--filter_frip_upper", + type=float, + required=False, + default=None, + help='Upper threshold on FRIP for keeping a barcode.' +) + +### Duplication rate filters: +parser.add_argument( + "--filter_dup_rate_lower", + type=float, + required=False, + default=None, + help='Lower threshold on duplication rate for keeping a barcode.' +) +parser.add_argument( + "--filter_dup_rate_upper", + type=float, + required=False, + default=None, + help='Upper threshold on duplication rate for keeping a barcode.' +) + +parser <- add_option( + parser, + c("--filter_tss"), + action = "store", + default = 4, + help = "Filter on TSS Enrichment [%default]" +) +parser <- add_option( + parser, + c("--filter_frags"), + action = "store", + default = 1000, + help = "Filter on fragments/cell [%default]" +) + + + + +args = parser.parse_args() + +################################################################################ + + +# Load barcode metrics +infile = open(args.metadata_pkl, 'rb') +metadata_bc = pickle.load(infile) +infile.close() + + + +# Return figure to plot together with other metrics, and cells passing filters. Figure will be saved as pdf. +FRIP_NR_FRAG_fig, FRIP_NR_FRAG_filter=plot_barcode_metrics(metadata_bc[args.sampleId], + var_x='Log_unique_nr_frag', + var_y='FRIP', + min_x=args.filter_frags_lower, + max_x=args.filter_frags_upper, + min_y=args.filter_frags_lower, + max_y=args.filter_frags_upper, + cmap='viridis', + return_cells=True, + return_fig=True, + plot=False, + save='FRIP-VS-NRFRAG.pdf') + +# Return figure to plot together with other metrics, and cells passing filters +TSS_NR_FRAG_fig, TSS_NR_FRAG_filter=plot_barcode_metrics(metadata_bc[args.sampleId], + var_x='Log_unique_nr_frag', + var_y='TSS_enrichment', + min_x=args.filter_frags_lower, + max_x=args.filter_frags_upper, + min_y=args.filter_tss_lower, + max_y=args.filter_tss_upper, + cmap='viridis', + return_cells=True, + return_fig=True, + plot=False, + save='TSS-VS-NRFRAG.pdf') + +# Return figure to plot together with other metrics, but not returning cells (no filter applied for the duplication rate per barcode) +DR_NR_FRAG_fig=plot_barcode_metrics(metadata_bc[args.sampleId], + var_x='Log_unique_nr_frag', + var_y='Dupl_rate', + min_x=args.filter_frags_lower, + max_x=args.filter_frags_upper, + min_y=args.filter_dup_rate_lower, + max_y=args.filter_dup_rate_upper, + cmap='viridis', + return_cells=False, + return_fig=True, + plot=False) + +# intersection of barcodes to keep: +bc_passing_filters = {'10x_multiome_brain':[]} +bc_passing_filters['10x_multiome_brain'] = list((set(FRIP_NR_FRAG_filter) & set(TSS_NR_FRAG_filter)) ^ set(SCRUBLET_doublets)) From 32e3c10c522cab417317c430ba2219c3721eb884 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 25 Feb 2021 10:43:45 +0100 Subject: [PATCH 007/202] Add bwa-mem2 to bwamaptools image --- src/bwamaptools/Dockerfile | 9 ++++++++- src/bwamaptools/bwamaptools.config | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/bwamaptools/Dockerfile b/src/bwamaptools/Dockerfile index af9dc8b5..334ceda7 100644 --- a/src/bwamaptools/Dockerfile +++ b/src/bwamaptools/Dockerfile @@ -9,6 +9,7 @@ RUN BUILDPKGS="build-essential \ libssl-dev \ zlib1g-dev \ liblzma-dev \ + curl \ libncurses5-dev"&& \ apt-get update && \ apt-get install -y --no-install-recommends apt-utils debconf locales && dpkg-reconfigure locales && \ @@ -16,7 +17,7 @@ RUN BUILDPKGS="build-essential \ apt-get install -y --no-install-recommends $BUILDPKGS # Install htslib -RUN git clone https://github.com/samtools/htslib.git && \ +RUN git clone --recurse-submodules https://github.com/samtools/htslib.git && \ cd htslib && \ autoheader && \ autoconf && \ @@ -39,6 +40,12 @@ RUN git clone https://github.com/lh3/bwa.git && \ make && \ mv /bwa/bwa /usr/local/bin/ +# install bwa-mem2 +ENV BWAMEM2_VER 2.1 +RUN cd /tmp && \ + curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v${BWAMEM2_VER}/bwa-mem2-${BWAMEM2_VER}_x64-linux.tar.bz2 | tar jxf - && \ + mv bwa-mem2-${BWAMEM2_VER}_x64-linux/bwa-mem2* /usr/local/bin + RUN apt-get -y update && \ apt-get -y --no-install-recommends install \ # Need to run ps diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index e6cb08ae..38232fd2 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,7 +1,7 @@ params { tools { bwamaptools { - container = 'vibsinglecellnf/bwamaptools:2020-07-02-13b5637' + container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.1' } } } From e28202a46992b3363cc556d002f1ca8cd7b210c1 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 25 Feb 2021 21:50:58 +0100 Subject: [PATCH 008/202] Update bwa steps to use bwa-mem2 (optional) --- src/bwamaptools/conf/bwa_mapping.config | 1 + src/bwamaptools/main.nf | 2 +- src/bwamaptools/processes/mapping.nf | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bwamaptools/conf/bwa_mapping.config b/src/bwamaptools/conf/bwa_mapping.config index 55762d9c..69192b93 100644 --- a/src/bwamaptools/conf/bwa_mapping.config +++ b/src/bwamaptools/conf/bwa_mapping.config @@ -2,6 +2,7 @@ params { tools { bwamaptools { bwa_fasta = '/path/to/bwa_index/hg38.fa' + bwa_version = 'bwa-mem2' // or 'bwa' // to do: add bwa mem params //mem { //} diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index 5dad0edb..3f9444ca 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -29,7 +29,7 @@ workflow get_bwa_index { bwa_index_path = Paths.get( Paths.get(fasta_path).getParent().toString(), - "*.{amb,ann,bwt,fai,flat,gdx,pac,sa}" + "*.{amb,ann,bwt,fai,flat,gdx,pac,sa,0123,bwt.2bit.64}" ) bwa_index = Channel.fromPath(bwa_index_path, glob: true, diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index c6372ada..da4dd2f9 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -26,7 +26,7 @@ process SC__BWAMAPTOOLS__BWA_MEM_PE { def samtools_cpus = (task.cpus > 6) ? 6 : task.cpus """ set -euo pipefail - bwa mem \ + ${toolParams.bwa_version} mem \ -t ${task.cpus} \ ${bwa_fasta} \ ${fastq_PE1} \ From 22339b7c7b4d42affd2ebe0415ae1e4f61658e77 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 25 Feb 2021 21:56:32 +0100 Subject: [PATCH 009/202] Integrate singlecelltoolkit updates - Rename debarcode_10x_scatac_fastqs to barcode_10x_scatac_fastqs - Additional options fixed - Keep the barcodes in the fastq name for now --- ..._10x_scatac_fastqs.nf => barcode_10x_scatac_fastqs.nf} | 8 +++++--- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- workflows/atac/preprocess.nf | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) rename src/singlecelltoolkit/processes/{debarcode_10x_scatac_fastqs.nf => barcode_10x_scatac_fastqs.nf} (84%) diff --git a/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf similarity index 84% rename from src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf rename to src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf index faf59338..aa3f8f4a 100644 --- a/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.singlecelltoolkit -process SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ { +process SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ { container toolParams.container label 'compute_resources__cpu' @@ -24,12 +24,14 @@ process SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ { def sampleParams = params.parseConfig(sampleId, params.global, toolParams) processParams = sampleParams.local """ - debarcode_10x_scatac_fastqs.sh \ + barcode_10x_scatac_fastqs.sh \ ${fastq_PE1} \ ${fastq_bc} \ ${fastq_PE2} \ ${sampleId}_dex \ - _ + false \ + false \ + "_" """ } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 94275472..a677df87 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:0.5-dev' + container = 'vibsinglecellnf/singlecelltoolkit:2021-02-25-f35b286' barcode_correction { whitelist { standard = '' diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index dc2beaf6..ae0272f4 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl=2 ////////////////////////////////////////////////////// // process imports: include { SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) -include { SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ; } from './../../src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf' params(params) +include { SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) include { SC__TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) // workflow imports: @@ -70,7 +70,7 @@ workflow ATAC_PREPROCESS_WITH_METADATA { println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.") } // run barcode demultiplexing on each read+barcode: - fastq_dex = SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ( + fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ( data.standard.map { it -> tuple(it[0], it[2], it[3], it[4]) } ) } else { @@ -88,7 +88,7 @@ workflow ATAC_PREPROCESS_WITH_METADATA { // run barcode demultiplexing on each read+barcode: - fastq_dex = SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ( + fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ( data.standard.join(fastq_bc_corrected).map { it -> tuple(it[0], it[2], it[5], it[4]) } ) From c181ad6651ffb0be99b7cac54bb0d90c435cdb30 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 26 Feb 2021 12:42:25 +0100 Subject: [PATCH 010/202] Simplify bwa mapping subworkflow - Bwa mem uses the -C option to add barcodes from the fastq comments (now added by the barcode_10x_scatac_fastqs.sh script in singlecelltoolkit. - Index process now passes through the input bam as output - Simplify bwa main.nf with pipes --- src/bwamaptools/main.nf | 21 ++++++--------------- src/bwamaptools/processes/index.nf | 1 + src/bwamaptools/processes/mapping.nf | 2 +- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index 3f9444ca..f5665a1e 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -7,7 +7,6 @@ import java.nio.file.Paths include { SC__BWAMAPTOOLS__BWA_MEM_PE; } from './processes/mapping.nf' params(params) include { SC__BWAMAPTOOLS__INDEX_BAM; } from './processes/index.nf' params(params) -include { SC__BWAMAPTOOLS__ADD_BARCODE_TAG; } from './processes/add_barcode_as_tag.nf' params(params) include { SC__BWAMAPTOOLS__MAPPING_SUMMARY } from './processes/mapping_summary.nf' params(params) include { PUBLISH as PUBLISH_BAM; @@ -59,25 +58,17 @@ workflow BWA_MAPPING_PE { */ bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) - bam = SC__BWAMAPTOOLS__BWA_MEM_PE(bwa_inputs) - - bam_with_tag = SC__BWAMAPTOOLS__ADD_BARCODE_TAG(bam) - - bam_index = SC__BWAMAPTOOLS__INDEX_BAM(bam_with_tag) - - // join bam index into the bam channel: - bamout = bam_with_tag.join(bam_index) - - // get mapping summary stats - SC__BWAMAPTOOLS__MAPPING_SUMMARY(bamout) + SC__BWAMAPTOOLS__BWA_MEM_PE(bwa_inputs) | + SC__BWAMAPTOOLS__INDEX_BAM | + SC__BWAMAPTOOLS__MAPPING_SUMMARY // publish output: - PUBLISH_BAM(bam_with_tag, 'bwa.out.possorted', 'bam', 'bam', false) - PUBLISH_BAM_INDEX(bam_index, 'bwa.out.possorted.bam', 'bai', 'bam', false) + PUBLISH_BAM(SC__BWAMAPTOOLS__INDEX_BAM.out, 'bwa.out.possorted', 'bam', 'bam', false) + PUBLISH_BAM_INDEX(SC__BWAMAPTOOLS__INDEX_BAM.out.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) PUBLISH_MAPPING_SUMMARY(SC__BWAMAPTOOLS__MAPPING_SUMMARY.out, 'mapping_stats', 'tsv', 'bam', false) emit: - bamout + SC__BWAMAPTOOLS__INDEX_BAM.out } diff --git a/src/bwamaptools/processes/index.nf b/src/bwamaptools/processes/index.nf index 9ae964e9..2616ef2f 100644 --- a/src/bwamaptools/processes/index.nf +++ b/src/bwamaptools/processes/index.nf @@ -15,6 +15,7 @@ process SC__BWAMAPTOOLS__INDEX_BAM { output: tuple val(sampleId), + path(bam), path("*.bai") script: diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index da4dd2f9..2cf3f62e 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -28,6 +28,7 @@ process SC__BWAMAPTOOLS__BWA_MEM_PE { set -euo pipefail ${toolParams.bwa_version} mem \ -t ${task.cpus} \ + -C \ ${bwa_fasta} \ ${fastq_PE1} \ ${fastq_PE2} \ @@ -35,6 +36,5 @@ process SC__BWAMAPTOOLS__BWA_MEM_PE { | samtools sort -@ ${samtools_cpus} -u -O bam - \ | samtools markdup -@ ${samtools_cpus} -f ${sampleId}.markdup.log - ${sampleId}.bwa.out.possorted.bam """ - } From f80ace674664bab9e39458e2c0e65a11a33938c2 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 26 Feb 2021 13:03:58 +0100 Subject: [PATCH 011/202] Update barcode_10x_scatac_fastqs - Barcode, quality and corrected barcodes are now added into the fastq comments field --- .../processes/barcode_10x_scatac_fastqs.nf | 8 ++++---- src/singlecelltoolkit/singlecelltoolkit.config | 4 ++++ workflows/atac/preprocess.nf | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf index aa3f8f4a..6a0c9cd2 100644 --- a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.singlecelltoolkit -process SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ { +process SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ { container toolParams.container label 'compute_resources__cpu' @@ -21,7 +21,7 @@ process SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ { path("${sampleId}_dex_R2.fastq.gz") script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_10x_scatac_fastqs) processParams = sampleParams.local """ barcode_10x_scatac_fastqs.sh \ @@ -30,8 +30,8 @@ process SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ { ${fastq_PE2} \ ${sampleId}_dex \ false \ - false \ - "_" + true \ + ${processParams.uncorrected_bc_tag}_${processParams.barcode_quality_tag} """ } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index a677df87..568b2f1b 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -8,6 +8,10 @@ params { multiome = '' } } + barcode_10x_scatac_fastqs { + uncorrected_bc_tag = 'CR' + corrected_bc_tag = 'CB' + } } } } diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index ae0272f4..e0d7cdcb 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl=2 ////////////////////////////////////////////////////// // process imports: include { SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) -include { SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) +include { SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) include { SC__TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) // workflow imports: @@ -70,7 +70,7 @@ workflow ATAC_PREPROCESS_WITH_METADATA { println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.") } // run barcode demultiplexing on each read+barcode: - fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ( + fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ( data.standard.map { it -> tuple(it[0], it[2], it[3], it[4]) } ) } else { @@ -88,7 +88,7 @@ workflow ATAC_PREPROCESS_WITH_METADATA { // run barcode demultiplexing on each read+barcode: - fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_FASTQ( + fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ( data.standard.join(fastq_bc_corrected).map { it -> tuple(it[0], it[2], it[5], it[4]) } ) From 8f86cbace75bcc37a44842d143b8d1d7aa701290 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 26 Feb 2021 13:11:41 +0100 Subject: [PATCH 012/202] Simplify naming in bwa subworkflow --- src/bwamaptools/main.nf | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index f5665a1e..cd35ebb8 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -5,9 +5,15 @@ import java.nio.file.Paths ////////////////////////////////////////////////////// // Import sub-workflows from the modules: -include { SC__BWAMAPTOOLS__BWA_MEM_PE; } from './processes/mapping.nf' params(params) -include { SC__BWAMAPTOOLS__INDEX_BAM; } from './processes/index.nf' params(params) -include { SC__BWAMAPTOOLS__MAPPING_SUMMARY } from './processes/mapping_summary.nf' params(params) +include { + SC__BWAMAPTOOLS__BWA_MEM_PE as BWA_MEM_PE; +} from './processes/mapping.nf' params(params) +include { + SC__BWAMAPTOOLS__INDEX_BAM as INDEX_BAM; +} from './processes/index.nf' params(params) +include { + SC__BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY; +} from './processes/mapping_summary.nf' params(params) include { PUBLISH as PUBLISH_BAM; PUBLISH as PUBLISH_BAM_INDEX; @@ -58,17 +64,17 @@ workflow BWA_MAPPING_PE { */ bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) - SC__BWAMAPTOOLS__BWA_MEM_PE(bwa_inputs) | - SC__BWAMAPTOOLS__INDEX_BAM | - SC__BWAMAPTOOLS__MAPPING_SUMMARY + BWA_MEM_PE(bwa_inputs) | + INDEX_BAM | + MAPPING_SUMMARY // publish output: - PUBLISH_BAM(SC__BWAMAPTOOLS__INDEX_BAM.out, 'bwa.out.possorted', 'bam', 'bam', false) - PUBLISH_BAM_INDEX(SC__BWAMAPTOOLS__INDEX_BAM.out.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) - PUBLISH_MAPPING_SUMMARY(SC__BWAMAPTOOLS__MAPPING_SUMMARY.out, 'mapping_stats', 'tsv', 'bam', false) + PUBLISH_BAM(INDEX_BAM.out, 'bwa.out.possorted', 'bam', 'bam', false) + PUBLISH_BAM_INDEX(INDEX_BAM.out.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) + PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, 'mapping_stats', 'tsv', 'bam', false) emit: - SC__BWAMAPTOOLS__INDEX_BAM.out + INDEX_BAM.out } From 9311538dc084e400438d6beb842d240217865797 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sun, 28 Feb 2021 21:23:58 +0100 Subject: [PATCH 013/202] Add barcode_level_statistics to pycistopic qc --- .../bin/barcode_level_statistics.py | 50 +++++++++---------- .../processes/barcode_level_statistics.nf | 50 +++++++++++++++++++ src/pycistopic/pycistopic.config | 10 ++++ workflows/atac/qc_filtering.nf | 3 ++ 4 files changed, 87 insertions(+), 26 deletions(-) create mode 100644 src/pycistopic/processes/barcode_level_statistics.nf diff --git a/src/pycistopic/bin/barcode_level_statistics.py b/src/pycistopic/bin/barcode_level_statistics.py index 4d828ebc..b3b476ae 100755 --- a/src/pycistopic/bin/barcode_level_statistics.py +++ b/src/pycistopic/bin/barcode_level_statistics.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 import argparse -#import pybiomart as pbm import pandas as pd import pickle +import numpy as np -#from pycisTopic.qc import compute_qc_stats +from pycisTopic.qc import plot_barcode_metrics ################################################################################ @@ -22,6 +22,11 @@ type=str, help='Metadata, pickle format.' ) +parser.add_argument( + "--selected_barcodes", + type=str, + help='Output file containing selected barcodes.' +) ### fragments filters: parser.add_argument( @@ -87,28 +92,16 @@ help='Upper threshold on duplication rate for keeping a barcode.' ) -parser <- add_option( - parser, - c("--filter_tss"), - action = "store", - default = 4, - help = "Filter on TSS Enrichment [%default]" -) -parser <- add_option( - parser, - c("--filter_frags"), - action = "store", - default = 1000, - help = "Filter on fragments/cell [%default]" -) - - - - args = parser.parse_args() ################################################################################ +# the pycisTopic filter setting is in log10 scale +if args.filter_frags_lower is not None: + args.filter_frags_lower = np.log10(args.filter_frags_lower) +if args.filter_frags_upper is not None: + args.filter_frags_upper = np.log10(args.filter_frags_upper) + # Load barcode metrics infile = open(args.metadata_pkl, 'rb') @@ -116,7 +109,6 @@ infile.close() - # Return figure to plot together with other metrics, and cells passing filters. Figure will be saved as pdf. FRIP_NR_FRAG_fig, FRIP_NR_FRAG_filter=plot_barcode_metrics(metadata_bc[args.sampleId], var_x='Log_unique_nr_frag', @@ -129,7 +121,7 @@ return_cells=True, return_fig=True, plot=False, - save='FRIP-VS-NRFRAG.pdf') + save='./'+args.sampleId+'__FRIP-vs-nFrag.pdf') # Return figure to plot together with other metrics, and cells passing filters TSS_NR_FRAG_fig, TSS_NR_FRAG_filter=plot_barcode_metrics(metadata_bc[args.sampleId], @@ -143,7 +135,7 @@ return_cells=True, return_fig=True, plot=False, - save='TSS-VS-NRFRAG.pdf') + save='./'+args.sampleId+'__TSS-vs-nFrag.pdf') # Return figure to plot together with other metrics, but not returning cells (no filter applied for the duplication rate per barcode) DR_NR_FRAG_fig=plot_barcode_metrics(metadata_bc[args.sampleId], @@ -156,8 +148,14 @@ cmap='viridis', return_cells=False, return_fig=True, - plot=False) + plot=False, + save='./'+args.sampleId+'__duprate-vs-nFrag.pdf') # intersection of barcodes to keep: -bc_passing_filters = {'10x_multiome_brain':[]} -bc_passing_filters['10x_multiome_brain'] = list((set(FRIP_NR_FRAG_filter) & set(TSS_NR_FRAG_filter)) ^ set(SCRUBLET_doublets)) +bc_passing_filters = list(set(FRIP_NR_FRAG_filter) & set(TSS_NR_FRAG_filter)) + + +### outputs: + +pd.DataFrame(bc_passing_filters).to_csv(args.selected_barcodes, sep='\t', index=False, header=False) + diff --git a/src/pycistopic/processes/barcode_level_statistics.nf b/src/pycistopic/processes/barcode_level_statistics.nf new file mode 100644 index 00000000..a5f654ac --- /dev/null +++ b/src/pycistopic/processes/barcode_level_statistics.nf @@ -0,0 +1,50 @@ +nextflow.preview.dsl=2 + +binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" + +toolParams = params.tools.pycistopic +//processParams = params.tools.pycistopic.barcode_level_statistics + +process SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS { + + publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' + container toolParams.container + label 'compute_resources__default','compute_resources__24hqueue' + + input: + tuple val(sampleId), + path(metadata), + path(metadata_pkl), + path(profile_data_pkl) + + output: + tuple val(sampleId), + path(selected_barcodes), + path(output_pdf_ff), + path(output_pdf_tf), + path(output_pdf_df) + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_level_statistics) + processParams = sampleParams.local + selected_barcodes = "${sampleId}__selected_barcodes.txt" + output_pdf_ff = "${sampleId}__FRIP-vs-nFrag.pdf" + output_pdf_tf = "${sampleId}__TSS-vs-nFrag.pdf" + output_pdf_df = "${sampleId}__duprate-vs-nFrag.pdf" + """ + export NUMEXPR_MAX_THREADS=${task.cpus} + ${binDir}barcode_level_statistics.py \ + --sampleId ${sampleId} \ + --metadata_pkl ${metadata_pkl} \ + --selected_barcodes ${selected_barcodes} \ + ${processParams?.filter_frags_lower ? '--filter_frags_lower ' + processParams?.filter_frags_lower : ''} \ + ${processParams?.filter_frags_upper ? '--filter_frags_upper ' + processParams?.filter_frags_upper : ''} \ + ${processParams?.filter_tss_lower ? '--filter_tss_lower ' + processParams?.filter_tss_lower : ''} \ + ${processParams?.filter_tss_upper ? '--filter_tss_upper ' + processParams?.filter_tss_upper : ''} \ + ${processParams?.filter_frip_lower ? '--filter_frip_lower ' + processParams?.filter_frip_lower : ''} \ + ${processParams?.filter_frip_upper ? '--filter_frip_upper ' + processParams?.filter_frip_upper : ''} \ + ${processParams?.filter_dup_rate_lower ? '--filter_dup_rate_lower ' + processParams?.filter_dup_rate_lower : ''} \ + ${processParams?.filter_dup_rate_upper ? '--filter_dup_rate_upper ' + processParams?.filter_dup_rate_upper : ''} + """ +} + diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 38408dfb..141ca2d5 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -15,6 +15,16 @@ params { compute_qc_stats { n_frag = 50 } + barcode_level_statistics { + filter_frags_lower = '1000' + filter_frags_upper = '' + filter_tss_lower = '8' + filter_tss_upper = '' + filter_frip_lower = '' + filter_frip_upper = '' + filter_dup_rate_lower = '' + filter_dup_rate_upper = '' + } } } } diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 772b4ad2..6b8fe2b9 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -9,6 +9,7 @@ include { SC__PYCISTOPIC__BIOMART_ANNOT; } from './../../src/pycistopic/processe include { SC__PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' params(params) include { SC__PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' params(params) include { SC__PYCISTOPIC__PLOT_QC_STATS; } from './../../src/pycistopic/processes/plot_qc_stats.nf' params(params) +include { SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS; } from './../../src/pycistopic/processes/barcode_level_statistics.nf' params(params) include { PUBLISH as PUBLISH_PEAKS; @@ -48,6 +49,8 @@ workflow ATAC_QC_PREFILTER { qc_stats_plot = SC__PYCISTOPIC__PLOT_QC_STATS(qc_stats) PUBLISH_QC_SAMPLE_METRICS(qc_stats_plot, 'qc_sample_metrics', 'pdf', 'pycistopic', false) + SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS(qc_stats) + } From c07057905c27a6ef99a3099528c11f9f8e42ae58 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 14:38:39 +0100 Subject: [PATCH 014/202] Add biorad processing step from singlecelltoolkit - Extracts and corrects barcode in one step --- .../extract_and_correct_biorad_barcode.nf | 33 +++++++++++++++++++ workflows/atac/preprocess.nf | 12 +++++-- 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf diff --git a/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf b/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf new file mode 100644 index 00000000..2a6c19a0 --- /dev/null +++ b/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf @@ -0,0 +1,33 @@ +nextflow.enable.dsl=2 + +//binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" + +toolParams = params.tools.singlecelltoolkit + +process SC__SINGLECELLTOOLKIT__EXTRACT_AND_CORRECT_BIORAD_BARCODE { + + container toolParams.container + label 'compute_resources__default' + + input: + tuple val(sampleId), + path(fastq_PE1), + path(fastq_PE2) + + output: + tuple val(sampleId), + path("${sampleId}_dex_R1.fastq.gz"), + path("${sampleId}_dex_R2.fastq.gz"), + path("${sampleId}_dex.corrected_bc_stats.tsv") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + //processParams = sampleParams.local + """ + extract_and_correct_biorad_barcode_in_fastq.sh \ + ${fastq_PE1} \ + ${fastq_PE2} \ + ${sampleId}_dex + """ +} + diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index e0d7cdcb..5b5e17c0 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -4,6 +4,9 @@ nextflow.enable.dsl=2 // process imports: include { SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) include { SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) +include { + SC__SINGLECELLTOOLKIT__EXTRACT_AND_CORRECT_BIORAD_BARCODE as SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE; +} from './../../src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf' params(params) include { SC__TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) // workflow imports: @@ -51,8 +54,13 @@ workflow ATAC_PREPROCESS_WITH_METADATA { standard: true // capture all other technology types here } - // run biorad barcode correction and debarcoding separately: - fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + /* run biorad barcode correction and debarcoding separately: */ + // using BAP: + //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + + // using singlecelltoolkit: + fastq_dex_br = SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + /* Barcode correction */ // gather barcode whitelists from params into a channel: From d1498bb1df4f3b6d3c8e3eea77019095bfb1cc87 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 14:40:54 +0100 Subject: [PATCH 015/202] Move biorad block after standard bc correction --- workflows/atac/preprocess.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 5b5e17c0..1e4825af 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -54,13 +54,6 @@ workflow ATAC_PREPROCESS_WITH_METADATA { standard: true // capture all other technology types here } - /* run biorad barcode correction and debarcoding separately: */ - // using BAP: - //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - - // using singlecelltoolkit: - fastq_dex_br = SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - /* Barcode correction */ // gather barcode whitelists from params into a channel: @@ -102,6 +95,13 @@ workflow ATAC_PREPROCESS_WITH_METADATA { } + /* run BioRad barcode correction and debarcoding separately: */ + // using BAP: + //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + // using singlecelltoolkit: + fastq_dex_br = SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + + // concatenate the read channels: fastq_dex = fastq_dex.concat(fastq_dex_br) From 643485334f9191f1aa64a78187a9e29b8db04543 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 15:06:29 +0100 Subject: [PATCH 016/202] Do not publish trimmed fastqs - cleanup br input tuple to trimming --- workflows/atac/preprocess.nf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 1e4825af..ac915564 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -16,8 +16,7 @@ include { BAP__BIORAD_DEBARCODE; } from './../../src/bap/workflows/bap_debarcode include { PUBLISH as PUBLISH_BC_STATS; - PUBLISH as PUBLISH_FASTQS_PE1; - PUBLISH as PUBLISH_FASTQS_PE2; + PUBLISH as PUBLISH_BR_BC_STATS; PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE1; PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE2; PUBLISH as PUBLISH_FRAGMENTS; @@ -100,16 +99,15 @@ workflow ATAC_PREPROCESS_WITH_METADATA { //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) // using singlecelltoolkit: fastq_dex_br = SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'fastq', false) // concatenate the read channels: - fastq_dex = fastq_dex.concat(fastq_dex_br) + fastq_dex = fastq_dex.concat(fastq_dex_br.map{ it -> tuple(it[0], it[1],it[2])}) // run adapter trimming: fastq_dex_trim = SC__TRIMGALORE__TRIM(fastq_dex) // publish fastq output: - PUBLISH_FASTQS_PE1(fastq_dex_trim, 'R1.fastq', 'gz', 'fastq', false) - PUBLISH_FASTQS_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[2]) }, 'R2.fastq', 'gz', 'fastq', false) PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'R1.trimming_report', 'txt', 'fastq', false) PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, 'R2.trimming_report', 'txt', 'fastq', false) From fd58b8b48dcfcaf22d4916a0d527e251ed0c88ed Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 16:29:50 +0100 Subject: [PATCH 017/202] Cleanup and simplification of process names and imports --- main_atac.nf | 27 +++++----------- src/bap/main.nf | 8 +++-- src/bap/processes/barcode_multiplet.nf | 2 +- src/bap/processes/biorad_debarcode.nf | 4 +-- src/bap/workflows/bap_debarcode.nf | 12 +++---- src/bwamaptools/main.nf | 6 ++-- .../processes/add_barcode_as_tag.nf | 2 +- src/bwamaptools/processes/index.nf | 4 +-- src/bwamaptools/processes/mapping.nf | 2 +- src/bwamaptools/processes/mapping_summary.nf | 3 +- .../processes/barcode_10x_scatac_fastqs.nf | 2 +- .../processes/barcode_correction.nf | 2 +- .../extract_and_correct_biorad_barcode.nf | 2 +- src/sinto/main.nf | 12 +++---- src/sinto/processes/fragments.nf | 4 +-- src/trimgalore/processes/trim.nf | 2 +- workflows/atac/preprocess.nf | 31 +++++++++---------- 17 files changed, 56 insertions(+), 69 deletions(-) diff --git a/main_atac.nf b/main_atac.nf index b0215e66..7f57d918 100644 --- a/main_atac.nf +++ b/main_atac.nf @@ -37,25 +37,14 @@ workflow cellranger_atac { } -workflow cistopic { - - include { - cistopic as CISTOPIC - } from './src/cistopic/main' params(params) - - getDataChannel | CISTOPIC - -} - - workflow atac_preprocess { // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation include { - ATAC_PREPROCESS_WITH_METADATA; + ATAC_PREPROCESS; } from './workflows/atac/preprocess.nf' params(params) - ATAC_PREPROCESS_WITH_METADATA(file(params.data.atac_preprocess.metadata)) + ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) } @@ -63,16 +52,16 @@ workflow atac_preprocess { workflow atac_preprocess_bap { include { - ATAC_PREPROCESS_WITH_METADATA; + ATAC_PREPROCESS; } from './workflows/atac/preprocess.nf' params(params) include { get_bam; - BAP__BARCODE_MULTIPLET_PIPELINE; + BAP__BARCODE_MULTIPLET_WF; } from './src/bap/main.nf' params(params) - ATAC_PREPROCESS_WITH_METADATA(file(params.data.atac_preprocess.metadata)) | + ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) | get_bam | - BAP__BARCODE_MULTIPLET_PIPELINE + BAP__BARCODE_MULTIPLET_WF } @@ -80,10 +69,10 @@ workflow atac_preprocess_bap { workflow bap { include { - BAP__BARCODE_MULTIPLET_PIPELINE; + BAP__BARCODE_MULTIPLET_WF; } from './src/bap/main.nf' params(params) - getDataChannel | BAP__BARCODE_MULTIPLET_PIPELINE + getDataChannel | BAP__BARCODE_MULTIPLET_WF } diff --git a/src/bap/main.nf b/src/bap/main.nf index 3c65dc2e..f0190d2c 100644 --- a/src/bap/main.nf +++ b/src/bap/main.nf @@ -3,7 +3,9 @@ nextflow.enable.dsl=2 ////////////////////////////////////////////////////// // Import sub-workflows from the modules: -include { SC__BAP__BARCODE_MULTIPLET_PIPELINE; } from './processes/barcode_multiplet.nf' params(params) +include { + BAP__BARCODE_MULTIPLET_PIPELINE as BARCODE_MULTIPLET_PIPELINE; +} from './processes/barcode_multiplet.nf' params(params) include { BAM_TO_FRAGMENTS as BAP_BAM_TO_FRAGMENTS; } from './../../src/sinto/main.nf' addParams(tools_sinto_fragments_barcodetag: params.tools.bap.barcode_multiplet.drop_tag) @@ -27,14 +29,14 @@ workflow get_bam { } -workflow BAP__BARCODE_MULTIPLET_PIPELINE { +workflow BAP__BARCODE_MULTIPLET_WF { take: bam main: - bap = SC__BAP__BARCODE_MULTIPLET_PIPELINE(bam.map { it -> tuple(it[0], it[1], it[2]) }) + bap = BARCODE_MULTIPLET_PIPELINE(bam.map { it -> tuple(it[0], it[1], it[2]) }) // generate a fragments file: fragments = BAP_BAM_TO_FRAGMENTS(bap.map {it -> tuple(it[0], it[1], it[2])}) diff --git a/src/bap/processes/barcode_multiplet.nf b/src/bap/processes/barcode_multiplet.nf index 0f83a180..d78ca34a 100644 --- a/src/bap/processes/barcode_multiplet.nf +++ b/src/bap/processes/barcode_multiplet.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/ toolParams = params.tools.bap -process SC__BAP__BARCODE_MULTIPLET_PIPELINE { +process BAP__BARCODE_MULTIPLET_PIPELINE { container toolParams.container publishDir "${params.global.outdir}/data/bap", mode: params.utils.publish.mode diff --git a/src/bap/processes/biorad_debarcode.nf b/src/bap/processes/biorad_debarcode.nf index 8fc438b2..dee189c4 100644 --- a/src/bap/processes/biorad_debarcode.nf +++ b/src/bap/processes/biorad_debarcode.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/ toolParams = params.tools.bap -process SC__BAP__BIORAD_DEBARCODE { +process BAP__BIORAD_DEBARCODE { container toolParams.container label 'compute_resources__cpu','compute_resources__24hqueue' @@ -34,7 +34,7 @@ process SC__BAP__BIORAD_DEBARCODE { } -process SC__BAP__MERGE_FASTQS { +process BAP__MERGE_FASTQS { container toolParams.container label 'compute_resources__default' diff --git a/src/bap/workflows/bap_debarcode.nf b/src/bap/workflows/bap_debarcode.nf index 00934604..aaf54368 100644 --- a/src/bap/workflows/bap_debarcode.nf +++ b/src/bap/workflows/bap_debarcode.nf @@ -3,8 +3,8 @@ nextflow.enable.dsl=2 ////////////////////////////////////////////////////// // Import sub-workflows from the modules: include { - SC__BAP__BIORAD_DEBARCODE; - SC__BAP__MERGE_FASTQS; + BAP__BIORAD_DEBARCODE as BIORAD_DEBARCODE; + BAP__MERGE_FASTQS as MERGE_FASTQS; } from './../processes/biorad_debarcode.nf' params(params) include { @@ -22,13 +22,13 @@ workflow BAP__BIORAD_DEBARCODE { main: - SC__BAP__BIORAD_DEBARCODE(data) | - SC__BAP__MERGE_FASTQS + BIORAD_DEBARCODE(data) | + MERGE_FASTQS - PUBLISH_BC_STATS_BR(SC__BAP__BIORAD_DEBARCODE.out.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'fastq', false) + PUBLISH_BC_STATS_BR(BIORAD_DEBARCODE.out.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'fastq', false) emit: - SC__BAP__MERGE_FASTQS.out + MERGE_FASTQS.out } diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index cd35ebb8..dbcbcb88 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -6,13 +6,13 @@ import java.nio.file.Paths // Import sub-workflows from the modules: include { - SC__BWAMAPTOOLS__BWA_MEM_PE as BWA_MEM_PE; + BWAMAPTOOLS__BWA_MEM_PE as BWA_MEM_PE; } from './processes/mapping.nf' params(params) include { - SC__BWAMAPTOOLS__INDEX_BAM as INDEX_BAM; + BWAMAPTOOLS__INDEX_BAM as INDEX_BAM; } from './processes/index.nf' params(params) include { - SC__BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY; + BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY; } from './processes/mapping_summary.nf' params(params) include { PUBLISH as PUBLISH_BAM; diff --git a/src/bwamaptools/processes/add_barcode_as_tag.nf b/src/bwamaptools/processes/add_barcode_as_tag.nf index d6b41864..393a6f64 100644 --- a/src/bwamaptools/processes/add_barcode_as_tag.nf +++ b/src/bwamaptools/processes/add_barcode_as_tag.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.bwamaptools -process SC__BWAMAPTOOLS__ADD_BARCODE_TAG { +process BWAMAPTOOLS__ADD_BARCODE_TAG { container toolParams.container label 'compute_resources__default','compute_resources__24hqueue' diff --git a/src/bwamaptools/processes/index.nf b/src/bwamaptools/processes/index.nf index 2616ef2f..0100e36d 100644 --- a/src/bwamaptools/processes/index.nf +++ b/src/bwamaptools/processes/index.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.bwamaptools -process SC__BWAMAPTOOLS__INDEX_BAM { +process BWAMAPTOOLS__INDEX_BAM { container toolParams.container label 'compute_resources__default' @@ -26,7 +26,7 @@ process SC__BWAMAPTOOLS__INDEX_BAM { """ } -process SC__BWAMAPTOOLS__INDEX_BED { +process BWAMAPTOOLS__INDEX_BED { container toolParams.container label 'compute_resources__default' diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index 2cf3f62e..aabc4b0f 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.bwamaptools -process SC__BWAMAPTOOLS__BWA_MEM_PE { +process BWAMAPTOOLS__BWA_MEM_PE { container toolParams.container label 'compute_resources__bwa_mem' diff --git a/src/bwamaptools/processes/mapping_summary.nf b/src/bwamaptools/processes/mapping_summary.nf index 3517efad..9f76af6c 100644 --- a/src/bwamaptools/processes/mapping_summary.nf +++ b/src/bwamaptools/processes/mapping_summary.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/bwamaptools/b toolParams = params.tools.bwamaptools -process SC__BWAMAPTOOLS__MAPPING_SUMMARY { +process BWAMAPTOOLS__MAPPING_SUMMARY { container toolParams.container label 'compute_resources__default','compute_resources__24hqueue' @@ -28,4 +28,3 @@ process SC__BWAMAPTOOLS__MAPPING_SUMMARY { """ } - diff --git a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf index 6a0c9cd2..57675bcc 100644 --- a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.singlecelltoolkit -process SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ { +process SCTK__BARCODE_10X_SCATAC_FASTQ { container toolParams.container label 'compute_resources__cpu' diff --git a/src/singlecelltoolkit/processes/barcode_correction.nf b/src/singlecelltoolkit/processes/barcode_correction.nf index 4fe9d9ac..6524c6a3 100644 --- a/src/singlecelltoolkit/processes/barcode_correction.nf +++ b/src/singlecelltoolkit/processes/barcode_correction.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.singlecelltoolkit -process SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION { +process SCTK__BARCODE_CORRECTION { container toolParams.container label 'compute_resources__default' diff --git a/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf b/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf index 2a6c19a0..f8e18d09 100644 --- a/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf +++ b/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.singlecelltoolkit -process SC__SINGLECELLTOOLKIT__EXTRACT_AND_CORRECT_BIORAD_BARCODE { +process SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE { container toolParams.container label 'compute_resources__default' diff --git a/src/sinto/main.nf b/src/sinto/main.nf index fa039cfe..b92ae4cd 100644 --- a/src/sinto/main.nf +++ b/src/sinto/main.nf @@ -4,10 +4,10 @@ nextflow.enable.dsl=2 // Import sub-workflows from the modules: include { - SC__SINTO__FRAGMENTS; - SC__SINTO__SORT_FRAGMENTS; + SINTO__FRAGMENTS; + SINTO__SORT_FRAGMENTS; } from './processes/fragments.nf' params(params) -include { SC__BWAMAPTOOLS__INDEX_BED; } from './../../src/bwamaptools/processes/index.nf' params(params) +include { BWAMAPTOOLS__INDEX_BED; } from './../../src/bwamaptools/processes/index.nf' params(params) include { PUBLISH as PUBLISH_FRAGMENTS; PUBLISH as PUBLISH_FRAGMENTS_INDEX; @@ -25,9 +25,9 @@ workflow BAM_TO_FRAGMENTS { main: //println("${params.tools.sinto.fragments.barcodetag}") - fragments = SC__SINTO__FRAGMENTS(bam) - fragments_sort = SC__SINTO__SORT_FRAGMENTS(fragments) - index = SC__BWAMAPTOOLS__INDEX_BED(fragments_sort) + fragments = SINTO__FRAGMENTS(bam) + fragments_sort = SINTO__SORT_FRAGMENTS(fragments) + index = BWAMAPTOOLS__INDEX_BED(fragments_sort) // join bed index into the fragments channel: fragments_out = fragments_sort.join(index) diff --git a/src/sinto/processes/fragments.nf b/src/sinto/processes/fragments.nf index 7950ca3a..d8038829 100644 --- a/src/sinto/processes/fragments.nf +++ b/src/sinto/processes/fragments.nf @@ -6,7 +6,7 @@ toolParams = params.tools.sinto barcode_tag = params.containsKey('tools_sinto_fragments_barcodetag') ? params.tools_sinto_fragments_barcodetag : toolParams.fragments.barcodetag -process SC__SINTO__FRAGMENTS { +process SINTO__FRAGMENTS { container toolParams.container label 'compute_resources__cpu','compute_resources__24hqueue' @@ -40,7 +40,7 @@ process SC__SINTO__FRAGMENTS { } -process SC__SINTO__SORT_FRAGMENTS { +process SINTO__SORT_FRAGMENTS { container toolParams.container label 'compute_resources__mem' diff --git a/src/trimgalore/processes/trim.nf b/src/trimgalore/processes/trim.nf index 02e9a9f9..ed58a24a 100644 --- a/src/trimgalore/processes/trim.nf +++ b/src/trimgalore/processes/trim.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.trimgalore -process SC__TRIMGALORE__TRIM { +process TRIMGALORE__TRIM { container toolParams.container label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index ac915564..34481aa7 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -2,12 +2,10 @@ nextflow.enable.dsl=2 ////////////////////////////////////////////////////// // process imports: -include { SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) -include { SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) -include { - SC__SINGLECELLTOOLKIT__EXTRACT_AND_CORRECT_BIORAD_BARCODE as SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE; -} from './../../src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf' params(params) -include { SC__TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) +include { SCTK__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) +include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) +include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './../../src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf' params(params) +include { TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) // workflow imports: include { BWA_MAPPING_PE; } from './../../src/bwamaptools/main.nf' params(params) @@ -27,7 +25,7 @@ include { ////////////////////////////////////////////////////// // Define the workflow -workflow ATAC_PREPROCESS_WITH_METADATA { +workflow ATAC_PREPROCESS { take: metadata @@ -70,25 +68,24 @@ workflow ATAC_PREPROCESS_WITH_METADATA { println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.") } // run barcode demultiplexing on each read+barcode: - fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ( + fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( data.standard.map { it -> tuple(it[0], it[2], it[3], it[4]) } ) } else { - // join wl to the data channel: data_wl = wl.cross( data.standard.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3 - .map { it -> tuple(it[1][1], it[1][0], // sampleId, technology - it[1][2], it[1][3], it[1][4], // R1, R2, R3 - it[0][1] // whitelist - ) } + .map { it -> tuple(it[1][1], it[1][0], // sampleId, technology + it[1][2], it[1][3], it[1][4], // R1, R2, R3 + it[0][1] // whitelist + ) } // run barcode correction against a whitelist: - fastq_bc_corrected = SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION(data_wl.map{ it -> tuple(it[0], it[3], it[5]) } ) + fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl.map{ it -> tuple(it[0], it[3], it[5]) } ) PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[2]) }, 'corrected.bc_stats', 'log', 'fastq', false) // run barcode demultiplexing on each read+barcode: - fastq_dex = SC__SINGLECELLTOOLKIT__BARCODE_10X_SCATAC_FASTQ( + fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( data.standard.join(fastq_bc_corrected).map { it -> tuple(it[0], it[2], it[5], it[4]) } ) @@ -98,7 +95,7 @@ workflow ATAC_PREPROCESS_WITH_METADATA { // using BAP: //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) // using singlecelltoolkit: - fastq_dex_br = SCTX__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'fastq', false) @@ -106,7 +103,7 @@ workflow ATAC_PREPROCESS_WITH_METADATA { fastq_dex = fastq_dex.concat(fastq_dex_br.map{ it -> tuple(it[0], it[1],it[2])}) // run adapter trimming: - fastq_dex_trim = SC__TRIMGALORE__TRIM(fastq_dex) + fastq_dex_trim = TRIMGALORE__TRIM(fastq_dex) // publish fastq output: PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'R1.trimming_report', 'txt', 'fastq', false) PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, 'R2.trimming_report', 'txt', 'fastq', false) From 94c40fa4d8f902b7c4d886d42e5e192de04a9b52 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 16:32:40 +0100 Subject: [PATCH 018/202] Update bap docker image to include levenshtein pkg --- src/bap/Dockerfile | 2 +- src/bap/bap.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bap/Dockerfile b/src/bap/Dockerfile index 8182f5e9..c92cfd0b 100644 --- a/src/bap/Dockerfile +++ b/src/bap/Dockerfile @@ -31,7 +31,7 @@ RUN mkdir /py_build && cd /py_build && \ # install bap fork (original is https://github.com/caleblareau/bap): RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir biopython rapidfuzz fuzzysearch && \ + pip install --no-cache-dir biopython rapidfuzz fuzzysearch python-Levenshtein && \ pip install --no-cache-dir git+https://github.com/cflerin/bap.git diff --git a/src/bap/bap.config b/src/bap/bap.config index 042e739d..3283907e 100644 --- a/src/bap/bap.config +++ b/src/bap/bap.config @@ -1,7 +1,7 @@ params { tools { bap { - container = 'vibsinglecellnf/bap:2021-02-03-1d32827' + container = 'vibsinglecellnf/bap:2021-02-03-1d32827-b' } } } From 8b38853a09dfbd30b89c8b0586cf743a525a4bb4 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 16:46:55 +0100 Subject: [PATCH 019/202] Update singlecelltoolkit Docker image - Includes scripts for processing BioRad data --- src/singlecelltoolkit/Dockerfile | 7 +------ src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index feba2821..f5b2d519 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -1,10 +1,5 @@ -*** System restart required *** -Last login: Wed Nov 25 10:25:17 2020 from 10.118.229.21 - u0125489 @ gbw-s-seq10 : ~ -$ tmux a FROM ubuntu:20.04 - ENV DEBIAN_FRONTEND=noninteractive RUN BUILDPKGS="autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev \ git \ @@ -12,7 +7,7 @@ RUN BUILDPKGS="autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev curl \ wget" && \ apt-get update && \ - apt-get install -y apt-utils debconf locales ca-certificates && dpkg-reconfigure locales && \ + apt-get install -y apt-utils debconf locales locales-all ca-certificates && dpkg-reconfigure locales && \ apt-get install -y --no-install-recommends $BUILDPKGS && \ apt-get install -y --no-install-recommends clang python3 llvm-6.0 pigz diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 568b2f1b..66e096ff 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-02-25-f35b286' + container = 'vibsinglecellnf/singlecelltoolkit:2021-03-03-21d4f50' barcode_correction { whitelist { standard = '' From 3c3e2e6371f789080379cbcdc26365d10972d47c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 16:47:43 +0100 Subject: [PATCH 020/202] Revert Sinto image to the standard release - Remove option to use an alternate temp directory (instead use NXF_TEMP env variable, or map the /tmp volume in the container elsewhere) --- src/sinto/processes/fragments.nf | 1 - src/sinto/sinto.config | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/sinto/processes/fragments.nf b/src/sinto/processes/fragments.nf index d8038829..2f2b6208 100644 --- a/src/sinto/processes/fragments.nf +++ b/src/sinto/processes/fragments.nf @@ -33,7 +33,6 @@ process SINTO__FRAGMENTS { ${processParams.containsKey('min_distance') && processParams.min_distance ? '--min_distance ' + processParams.min_distance: ''} \ ${processParams.containsKey('max_distance') && processParams.max_distance ? '--max_distance ' + processParams.max_distance: ''} \ ${processParams.containsKey('chunksize') && processParams.chunksize ? '--chunksize ' + processParams.chunksize: ''} \ - ${processParams.containsKey('temp_dir') && processParams.temp_dir ? '--temp_dir ' + processParams.temp_dir: ''} \ -p ${task.cpus} \ -f ${sampleId}.fragments.bed """ diff --git a/src/sinto/sinto.config b/src/sinto/sinto.config index 7ba9b70c..39d942d8 100644 --- a/src/sinto/sinto.config +++ b/src/sinto/sinto.config @@ -1,7 +1,7 @@ params { tools { sinto { - container = 'vibsinglecellnf/sinto:0.7.2-dev' + container = 'vibsinglecellnf/sinto:0.7.2' fragments { min_mapq = 30 barcodetag = 'CB' @@ -10,7 +10,6 @@ params { min_distance = 10 max_distance = 5000 chunksize = 5000000 - temp_dir = '' } } } From 2c670af7ea6decf2c3e451e40bd641e76ceefb8c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 16:49:55 +0100 Subject: [PATCH 021/202] Remove bap barcode entry from the config - This may become a selectable option later on --- conf/atac/preprocess.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index e73c16e1..1e745169 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -12,5 +12,5 @@ includeConfig './../../src/bwamaptools/bwamaptools.config' includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' includeConfig './../../src/sinto/sinto.config' includeConfig './../../src/bap/bap.config' -includeConfig './../../src/bap/conf/bap_biorad_debarcode.config' +//includeConfig './../../src/bap/conf/bap_biorad_debarcode.config' From adab0d31c01b2edc56f35e7487d5c23e8e25d44c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 5 Mar 2021 21:09:02 +0100 Subject: [PATCH 022/202] Fix default barcode tags in config --- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 66e096ff..05338e58 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -10,7 +10,7 @@ params { } barcode_10x_scatac_fastqs { uncorrected_bc_tag = 'CR' - corrected_bc_tag = 'CB' + barcode_quality_tag = 'CY' } } } From dcbbfe9dd694c5eef3f46e154acf44e21c0afeea Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 11 Mar 2021 11:05:49 +0100 Subject: [PATCH 023/202] Add call_cells to pycistopic qc --- src/pycistopic/bin/call_cells.py | 209 +++++++++++++++++++ src/pycistopic/bin/compute_qc_stats.py | 14 +- src/pycistopic/processes/call_cells.nf | 44 ++++ src/pycistopic/processes/compute_qc_stats.nf | 5 +- src/pycistopic/pycistopic.config | 12 +- workflows/atac/qc_filtering.nf | 5 +- 6 files changed, 281 insertions(+), 8 deletions(-) create mode 100755 src/pycistopic/bin/call_cells.py create mode 100644 src/pycistopic/processes/call_cells.nf diff --git a/src/pycistopic/bin/call_cells.py b/src/pycistopic/bin/call_cells.py new file mode 100755 index 00000000..f8dbe737 --- /dev/null +++ b/src/pycistopic/bin/call_cells.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import pickle +import numpy as np + +from scipy.stats import gaussian_kde +import matplotlib.pyplot as plt + +################################################################################ + +parser = argparse.ArgumentParser(description='Call cells from barcodes') + +parser.add_argument( + "--sampleId", + type=str, + required=True, + help='Sample ID.' +) +parser.add_argument( + "--metadata_pkl", + type=str, + help='Metadata, pickle format.' +) + +### fragments filters: +parser.add_argument( + "--filter_frags_lower", + type=float, + required=False, + default=3, + help='Lower threshold on the number of fragments for keeping a barcode.' +) +parser.add_argument( + "--filter_frags_upper", + type=float, + required=False, + default=None, + help='Upper threshold on the number of fragments for keeping a barcode.' +) + +### TSS Encrichment filters: +parser.add_argument( + "--filter_tss_lower", + type=float, + required=False, + default=8, + help='Lower threshold on the TSS Enrichment for keeping a barcode.' +) +parser.add_argument( + "--filter_tss_upper", + type=float, + required=False, + default=None, + help='Upper threshold on the TSS Enrichment for keeping a barcode.' +) + +### FRIP filters: +parser.add_argument( + "--filter_frip_lower", + type=float, + required=False, + default=None, + help='Lower threshold on FRIP for keeping a barcode.' +) +parser.add_argument( + "--filter_frip_upper", + type=float, + required=False, + default=None, + help='Upper threshold on FRIP for keeping a barcode.' +) + +### Duplication rate filters: +parser.add_argument( + "--filter_dup_rate_lower", + type=float, + required=False, + default=None, + help='Lower threshold on duplication rate for keeping a barcode.' +) +parser.add_argument( + "--filter_dup_rate_upper", + type=float, + required=False, + default=None, + help='Upper threshold on duplication rate for keeping a barcode.' +) + +args = parser.parse_args() + +################################################################################ + + +def plot_frag_qc(x, y, + ax, + x_thr_min=None, x_thr_max=None, + y_thr_min=None, y_thr_max=None, + ylab=None, + xlab="Number of (unique) fragments", + cmap='viridis', + density_overlay=False, + s=10, + marker='+', + c='#343434', + xlim=None, + ylim=None, + **kwargs + ): + assert all(x.index == y.index) + barcodes = x.index.values + if density_overlay: + #pdf,axes = fastKDE.pdf(x.to_numpy(),y.to_numpy()) + xy = np.vstack([np.log(x),y]) + z = gaussian_kde(xy)(xy) + idx = z.argsort() + x, y, z, barcodes = x[idx], y[idx], z[idx], barcodes[idx] + else: + z=c + + barcodes_to_keep=[] + sp=ax.scatter(x, y, c=z, s=s, edgecolors=None, marker=marker, cmap=cmap, **kwargs) + #fig.colorbar(sp) + if ylim is not None: + ax.set_ylim(ylim[0], ylim[1]) + if xlim is not None: + ax.set_ylim(xlim[0], xlim[1]) + # thresholds: + if x_thr_min is not None: + ax.axvline(x=x_thr_min, color='r', linestyle='--') + barcodes_to_keep.append(barcodes[x>x_thr_min]) + if x_thr_max is not None: + ax.axvline(x=x_thr_max, color='r', linestyle='--') + barcodes_to_keep.append(barcodes[xy_thr_min]) + if y_thr_max is not None: + ax.axhline(y=y_thr_max, color='r', linestyle='--') + barcodes_to_keep.append(barcodes[y0: + return list(set.intersection(*map(set, barcodes_to_keep))) + else: + return barcodes + + +# Load barcode metrics +infile = open(args.metadata_pkl, 'rb') +metadata_bc_dict = pickle.load(infile) +infile.close() + +fig, (ax1,ax2,ax3) = plt.subplots(1,3, figsize=(15,5), dpi=150 ) +p1_cells = plot_frag_qc( + x = metadata_bc_dict[args.sampleId]['Unique_nr_frag'], + y = metadata_bc_dict[args.sampleId]['TSS_enrichment'], + ylab = 'TSS Enrichment', + x_thr_min=args.filter_frags_lower, + x_thr_max=args.filter_frags_upper, + y_thr_min=args.filter_tss_lower, + y_thr_max=args.filter_tss_upper, + density_overlay=True, + ax=ax1 +) +p2_cells = plot_frag_qc( + x = metadata_bc_dict[args.sampleId]['Unique_nr_frag'], + y = metadata_bc_dict[args.sampleId]['FRIP'], + ylab = 'FRIP', + ylim=[0,1], + x_thr_min=args.filter_frags_lower, + x_thr_max=args.filter_frags_upper, + y_thr_min=args.filter_frip_lower, + y_thr_max=args.filter_frip_upper, + density_overlay=True, + ax=ax2 +) +p3_cells = plot_frag_qc( + x = metadata_bc_dict[args.sampleId]['Unique_nr_frag'], + y = metadata_bc_dict[args.sampleId]['Dupl_rate'], + ylab = 'Duplicate rate per cell', + ylim=[0,1], + x_thr_min=args.filter_frags_lower, + x_thr_max=args.filter_frags_upper, + y_thr_min=args.filter_dup_rate_lower, + y_thr_max=args.filter_dup_rate_upper, + density_overlay=True, + ax=ax3 +) +fig.suptitle(args.sampleId) +plt.tight_layout() +plt.savefig(args.sampleId + '__fragments_qc.pdf', dpi=300, bbox_inches = 'tight') + +# intersection of barcodes to keep: +bc_passing_filters = list(set(p1_cells) & set(p2_cells) & set(p3_cells)) +metadata_bc_dict[args.sampleId]['Keep'] = [ 1 if x in bc_passing_filters else 0 for x in metadata_bc_dict[args.sampleId].index ] + + +### outputs: +pd.DataFrame(bc_passing_filters).to_csv(args.sampleId + '__selected_barcodes.txt', sep='\t', index=False, header=False) + +with open(args.sampleId + '__metadata_with_calls.pickle', 'wb') as f: + pickle.dump(metadata_bc_dict, f) + diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index 6b685c26..d2df19df 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -4,6 +4,7 @@ import pybiomart as pbm import pandas as pd import pickle +import os from pycisTopic.qc import compute_qc_stats @@ -73,8 +74,6 @@ annot = pickle.load(infile) infile.close() - - fragments_dict = { args.sampleId: args.fragments } @@ -96,8 +95,17 @@ tss_window = 50, tss_minimum_signal_window = 100, tss_rolling_window = 10, - remove_duplicates = True) + remove_duplicates = True, + ### ray init args: + include_dashboard=False, + ) + +# load bap results to use for duplicate rate (if we are using bap output): +f_bap_qc = os.path.join(os.path.dirname(args.fragments),args.sampleId+'.QCstats.csv') +if os.path.isfile(f_bap_qc) and all(metadata_bc_dict[args.sampleId]['Dupl_rate_bap'] == 0): + bapqc = pd.read_csv(f_bap_qc, index_col=0) + metadata_bc_dict[args.sampleId]['Dupl_rate'] = bapqc['duplicateProportion'] ### outputs: diff --git a/src/pycistopic/processes/call_cells.nf b/src/pycistopic/processes/call_cells.nf new file mode 100644 index 00000000..01cf88e9 --- /dev/null +++ b/src/pycistopic/processes/call_cells.nf @@ -0,0 +1,44 @@ +nextflow.preview.dsl=2 + +binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" + +toolParams = params.tools.pycistopic + +process SC__PYCISTOPIC__CALL_CELLS { + + publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' + container toolParams.container + label 'compute_resources__default','compute_resources__24hqueue' + + input: + tuple val(sampleId), + path(metadata), + path(metadata_pkl), + path(profile_data_pkl) + + output: + tuple val(sampleId), + path(selected_barcodes), + path(output_pdf) + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams.call_cells) + processParams = sampleParams.local + selected_barcodes = "${sampleId}__selected_barcodes.txt" + output_metadata_pkl = "${sampleId}__metadata_with_calls.pickle" + output_pdf = "${sampleId}__fragments_qc.pdf" + """ + ${binDir}call_cells.py \ + --sampleId ${sampleId} \ + --metadata_pkl ${metadata_pkl} \ + ${processParams?.filter_frags_lower ? '--filter_frags_lower ' + processParams?.filter_frags_lower : ''} \ + ${processParams?.filter_frags_upper ? '--filter_frags_upper ' + processParams?.filter_frags_upper : ''} \ + ${processParams?.filter_tss_lower ? '--filter_tss_lower ' + processParams?.filter_tss_lower : ''} \ + ${processParams?.filter_tss_upper ? '--filter_tss_upper ' + processParams?.filter_tss_upper : ''} \ + ${processParams?.filter_frip_lower ? '--filter_frip_lower ' + processParams?.filter_frip_lower : ''} \ + ${processParams?.filter_frip_upper ? '--filter_frip_upper ' + processParams?.filter_frip_upper : ''} \ + ${processParams?.filter_dup_rate_lower ? '--filter_dup_rate_lower ' + processParams?.filter_dup_rate_lower : ''} \ + ${processParams?.filter_dup_rate_upper ? '--filter_dup_rate_upper ' + processParams?.filter_dup_rate_upper : ''} + """ +} + diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index 80f6c6a8..4c505233 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -30,13 +30,14 @@ process SC__PYCISTOPIC__COMPUTE_QC_STATS { output_metadata_pkl = "${sampleId}_metadata.pickle" output_profile_data_pkl = "${sampleId}_profile_data.pickle" """ - export NUMEXPR_MAX_THREADS=${task.cpus} + export NUMEXPR_MAX_THREADS=1 + export OMP_NUM_THREADS=1 ${binDir}compute_qc_stats.py \ --sampleId ${sampleId} \ --fragments ${fragments} \ --regions ${peaks} \ --n_frag ${processParams.n_frag} \ - --threads ${task.cpus} \ + --threads 1 \ --biomart_annot_pkl ${biomart_annot} \ --output_metadata ${output_metadata} \ --output_metadata_pkl ${output_metadata_pkl} \ diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 141ca2d5..a2b8cdfa 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -1,7 +1,7 @@ params { tools { pycistopic { - container = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/Pipeline_Dev/containers/aertslab-pycistopic-2021-02-02-2b4cc98.sif' + container = '/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/containers/aertslab-pycistopic-latest.sif' biomart_annot { biomart_dataset_name = 'hsapiens_gene_ensembl' } @@ -15,6 +15,16 @@ params { compute_qc_stats { n_frag = 50 } + call_cells { + filter_frags_lower = '1000' + filter_frags_upper = '' + filter_tss_lower = '8' + filter_tss_upper = '' + filter_frip_lower = '' + filter_frip_upper = '' + filter_dup_rate_lower = '' + filter_dup_rate_upper = '' + } barcode_level_statistics { filter_frags_lower = '1000' filter_frags_upper = '' diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 6b8fe2b9..9a29b30d 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -10,6 +10,7 @@ include { SC__PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/proce include { SC__PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' params(params) include { SC__PYCISTOPIC__PLOT_QC_STATS; } from './../../src/pycistopic/processes/plot_qc_stats.nf' params(params) include { SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS; } from './../../src/pycistopic/processes/barcode_level_statistics.nf' params(params) +include { SC__PYCISTOPIC__CALL_CELLS; } from './../../src/pycistopic/processes/call_cells.nf' params(params) include { PUBLISH as PUBLISH_PEAKS; @@ -34,7 +35,6 @@ workflow ATAC_QC_PREFILTER { .set{ data_split } biomart = SC__PYCISTOPIC__BIOMART_ANNOT() - biomart.view() peaks = SC__PYCISTOPIC__MACS2_CALL_PEAKS(data_split.bam) PUBLISH_PEAKS(peaks.map { it -> tuple(it[0], it[1]) }, 'peaks', 'narrowPeak', 'macs2', false) @@ -49,7 +49,8 @@ workflow ATAC_QC_PREFILTER { qc_stats_plot = SC__PYCISTOPIC__PLOT_QC_STATS(qc_stats) PUBLISH_QC_SAMPLE_METRICS(qc_stats_plot, 'qc_sample_metrics', 'pdf', 'pycistopic', false) - SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS(qc_stats) + //SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS(qc_stats) + SC__PYCISTOPIC__CALL_CELLS(qc_stats) } From e2d7036fc5b6b752fa511b1380e0611af963b8f8 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 15 Mar 2021 12:09:34 +0100 Subject: [PATCH 024/202] Remove cpus param from samtools fixmate --- src/bwamaptools/processes/mapping.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index aabc4b0f..b56f0b85 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -32,7 +32,7 @@ process BWAMAPTOOLS__BWA_MEM_PE { ${bwa_fasta} \ ${fastq_PE1} \ ${fastq_PE2} \ - | samtools fixmate -@ ${samtools_cpus} -m -u -O bam - - \ + | samtools fixmate -m -u -O bam - - \ | samtools sort -@ ${samtools_cpus} -u -O bam - \ | samtools markdup -@ ${samtools_cpus} -f ${sampleId}.markdup.log - ${sampleId}.bwa.out.possorted.bam """ From b50fc36032c625fd87da663ecaee72003e60539c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:01:43 +0100 Subject: [PATCH 025/202] Update picard config - Load base config, and profile-specific settings separately --- conf/atac/preprocess.config | 1 + nextflow.config | 1 + src/picard/conf/nemesh.config | 14 ++++++++++++++ src/picard/picard.config | 14 ++++---------- 4 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 src/picard/conf/nemesh.config diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index 1e745169..391faf50 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -9,6 +9,7 @@ params { includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' includeConfig './../../src/trimgalore/trimgalore.config' includeConfig './../../src/bwamaptools/bwamaptools.config' +includeConfig './../../src/picard/picard.config' includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' includeConfig './../../src/sinto/sinto.config' includeConfig './../../src/bap/bap.config' diff --git a/nextflow.config b/nextflow.config index 9194468a..11cab164 100644 --- a/nextflow.config +++ b/nextflow.config @@ -171,6 +171,7 @@ profiles { includeConfig 'conf/nemesh.config' includeConfig 'src/fastp/fastp.config' includeConfig 'src/picard/picard.config' + includeConfig 'src/picard/conf/nemesh.config' includeConfig 'src/dropseqtools/dropseqtools.config' includeConfig 'src/star/star.config' includeConfig 'src/dropletutils/dropletutils.config' diff --git a/src/picard/conf/nemesh.config b/src/picard/conf/nemesh.config new file mode 100644 index 00000000..dba6dfe1 --- /dev/null +++ b/src/picard/conf/nemesh.config @@ -0,0 +1,14 @@ +params { + picard { + + merge_bam_alignment { + includeSecondaryAlignments = false + pairedRun = false + } + + sort_sam { + so = "coordinate" + } + } +} + diff --git a/src/picard/picard.config b/src/picard/picard.config index b8b5a0e9..7742c4c5 100644 --- a/src/picard/picard.config +++ b/src/picard/picard.config @@ -1,14 +1,8 @@ params { - picard { - container = 'vibsinglecellnf/picard:2.21.1' - - merge_bam_alignment { - includeSecondaryAlignments = false - pairedRun = false - } - - sort_sam { - so = "coordinate" + tools { + picard { + container = 'vibsinglecellnf/picard:2.21.1' } } } + From 6ff7dddde3e004ed74da3b92df065fe7f3feedda Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:04:47 +0100 Subject: [PATCH 026/202] Add MarkDuplicates to Picard - Run MarkDuplicates, pipe the output to SortSam --- src/picard/processes/mark_duplicates.nf | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/picard/processes/mark_duplicates.nf diff --git a/src/picard/processes/mark_duplicates.nf b/src/picard/processes/mark_duplicates.nf new file mode 100644 index 00000000..e67d98db --- /dev/null +++ b/src/picard/processes/mark_duplicates.nf @@ -0,0 +1,41 @@ +nextflow.enable.dsl=2 + +// binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" + +toolParams = params.picard + +process MARK_DUPLICATES { + + container toolParams.container + label 'compute_resources__default','compute_resources__24hqueue' + + input: + tuple val(sampleId), + path(bam) + + output: + tuple val(sampleId), + path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam"), + path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bai") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + processParams = sampleParams.local + """ + set -euo pipefail + java -jar /picard.jar MarkDuplicates \ + I=${bam} \ + O=/dev/stdout \ + BARCODE_TAG=CB \ + COMPRESSION_LEVEL=0 \ + QUIET=true \ + M=${sampleId}.picard_markdup_metrics.txt \ + ASSUME_SORT_ORDER=queryname \ + | java -jar /picard.jar SortSam \ + I=/dev/stdin \ + O=${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam \ + SORT_ORDER=coordinate \ + CREATE_INDEX=true + """ +} + From c748922988ec1a712fc5fd5a7b6cf0c56faf9d0a Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:09:23 +0100 Subject: [PATCH 027/202] Add GATK tool - Use existing docker image - Use MarkDuplicatesSpark process --- conf/atac/preprocess.config | 1 + src/gatk/LICENSE | 674 ++++++++++++++++++++ src/gatk/gatk.config | 8 + src/gatk/processes/mark_duplicates_spark.nf | 35 + 4 files changed, 718 insertions(+) create mode 100644 src/gatk/LICENSE create mode 100644 src/gatk/gatk.config create mode 100644 src/gatk/processes/mark_duplicates_spark.nf diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index 391faf50..e679f14f 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -10,6 +10,7 @@ includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' includeConfig './../../src/trimgalore/trimgalore.config' includeConfig './../../src/bwamaptools/bwamaptools.config' includeConfig './../../src/picard/picard.config' +includeConfig './../../src/gatk/gatk.config' includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' includeConfig './../../src/sinto/sinto.config' includeConfig './../../src/bap/bap.config' diff --git a/src/gatk/LICENSE b/src/gatk/LICENSE new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/src/gatk/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/src/gatk/gatk.config b/src/gatk/gatk.config new file mode 100644 index 00000000..c55c1207 --- /dev/null +++ b/src/gatk/gatk.config @@ -0,0 +1,8 @@ +params { + tools { + gatk { + container = 'broadinstitute/gatk:4.2.0.0' + } + } +} + diff --git a/src/gatk/processes/mark_duplicates_spark.nf b/src/gatk/processes/mark_duplicates_spark.nf new file mode 100644 index 00000000..be218ece --- /dev/null +++ b/src/gatk/processes/mark_duplicates_spark.nf @@ -0,0 +1,35 @@ +nextflow.enable.dsl=2 + +//binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" + +toolParams = params.tools.gatk + +process MARK_DUPLICATES_SPARK { + + container toolParams.container + label 'compute_resources__cpu','compute_resources__24hqueue' + + input: + tuple val(sampleId), + path(bam) + + output: + tuple val(sampleId), + path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam"), + path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam.bai"), + path("${sampleId}.picard_markdup_metrics.txt") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + //processParams = sampleParams.local + """ + gatk MarkDuplicatesSpark \ + -I ${bam} \ + -O ${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam \ + -M ${sampleId}.picard_markdup_metrics.txt \ + -- \ + --spark-runner LOCAL \ + --spark-master local[${task.cpus}] + """ +} + From c0f543f8a8211960e4aeb1dc421b59c3f7f49d64 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:10:30 +0100 Subject: [PATCH 028/202] Add GATK directory fragmework --- src/gatk/bin/.gitkeep | 0 src/gatk/conf/.gitkeep | 0 src/gatk/workflows/.gitkeep | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/gatk/bin/.gitkeep create mode 100644 src/gatk/conf/.gitkeep create mode 100644 src/gatk/workflows/.gitkeep diff --git a/src/gatk/bin/.gitkeep b/src/gatk/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/gatk/conf/.gitkeep b/src/gatk/conf/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/gatk/workflows/.gitkeep b/src/gatk/workflows/.gitkeep new file mode 100644 index 00000000..e69de29b From 61f314d050279846c5358a059c7b37934ce32f06 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:15:06 +0100 Subject: [PATCH 029/202] Update ATAC preprocessing workflow (mapping) - Mapping now writes a bam to disk after fixmate - Marking duplicates is handled with Picard/GATK, also outputting a coordinate sorted and indexed bam - Derive readgroup from fastq prior to mapping; add this to the bam file with bwa (used in Picard/GATK) --- src/bwamaptools/main.nf | 20 +++++++++++++------- src/bwamaptools/processes/mapping.nf | 10 ++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index dbcbcb88..f21b64db 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -8,16 +8,20 @@ import java.nio.file.Paths include { BWAMAPTOOLS__BWA_MEM_PE as BWA_MEM_PE; } from './processes/mapping.nf' params(params) -include { - BWAMAPTOOLS__INDEX_BAM as INDEX_BAM; -} from './processes/index.nf' params(params) include { BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY; } from './processes/mapping_summary.nf' params(params) +include { + MARK_DUPLICATES; +} from './../../src/picard/processes/mark_duplicates.nf' params(params) +include { + MARK_DUPLICATES_SPARK; +} from './../../src/gatk/processes/mark_duplicates_spark.nf' params(params) include { PUBLISH as PUBLISH_BAM; PUBLISH as PUBLISH_BAM_INDEX; PUBLISH as PUBLISH_MAPPING_SUMMARY; + PUBLISH as PUBLISH_MARK_DUPLICATES_SUMMARY; } from "../utils/workflows/utils.nf" params(params) ////////////////////////////////////////////////////// @@ -65,16 +69,18 @@ workflow BWA_MAPPING_PE { bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) BWA_MEM_PE(bwa_inputs) | - INDEX_BAM | + MARK_DUPLICATES_SPARK | + map{it -> tuple(it[0],it[1],it[2])} | MAPPING_SUMMARY // publish output: - PUBLISH_BAM(INDEX_BAM.out, 'bwa.out.possorted', 'bam', 'bam', false) - PUBLISH_BAM_INDEX(INDEX_BAM.out.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) + PUBLISH_BAM(MARK_DUPLICATES_SPARK.out, 'bwa.out.possorted', 'bam', 'bam', false) + PUBLISH_BAM_INDEX(MARK_DUPLICATES_SPARK.out.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) + PUBLISH_MARK_DUPLICATES_SUMMARY(MARK_DUPLICATES_SPARK.out.map{it -> tuple(it[0], it[3])}, 'mark_duplicates', 'txt', 'bam', false) PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, 'mapping_stats', 'tsv', 'bam', false) emit: - INDEX_BAM.out + MARK_DUPLICATES_SPARK.out.map{it -> tuple(it[0],it[1],it[2])} } diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index b56f0b85..ec82e722 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -18,23 +18,21 @@ process BWAMAPTOOLS__BWA_MEM_PE { output: tuple val(sampleId), - path("${sampleId}.bwa.out.possorted.bam") + path("${sampleId}.bwa.out.fixmate.bam") script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams) processParams = sampleParams.local - def samtools_cpus = (task.cpus > 6) ? 6 : task.cpus """ - set -euo pipefail + id=\$(zcat ${fastq_PE1} | head -n 1 | cut -f 1-4 -d':' | sed 's/@//') ${toolParams.bwa_version} mem \ -t ${task.cpus} \ -C \ + -R "@RG\\tID:\${id}\\tSM:${sampleId}\\tLB:\${id}"__"${sampleId}\\tPL:ILLUMINA" \ ${bwa_fasta} \ ${fastq_PE1} \ ${fastq_PE2} \ - | samtools fixmate -m -u -O bam - - \ - | samtools sort -@ ${samtools_cpus} -u -O bam - \ - | samtools markdup -@ ${samtools_cpus} -f ${sampleId}.markdup.log - ${sampleId}.bwa.out.possorted.bam + | samtools fixmate -m -O bam - ${sampleId}.bwa.out.fixmate.bam """ } From 74e1579b2caefaafb58f83898137a504a7d09d7e Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:21:06 +0100 Subject: [PATCH 030/202] Cleanup main Sinto workflow --- src/sinto/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/sinto/main.nf b/src/sinto/main.nf index b92ae4cd..eb80a6eb 100644 --- a/src/sinto/main.nf +++ b/src/sinto/main.nf @@ -7,7 +7,9 @@ include { SINTO__FRAGMENTS; SINTO__SORT_FRAGMENTS; } from './processes/fragments.nf' params(params) -include { BWAMAPTOOLS__INDEX_BED; } from './../../src/bwamaptools/processes/index.nf' params(params) +include { + BWAMAPTOOLS__INDEX_BED; +} from './../../src/bwamaptools/processes/index.nf' params(params) include { PUBLISH as PUBLISH_FRAGMENTS; PUBLISH as PUBLISH_FRAGMENTS_INDEX; @@ -24,7 +26,6 @@ workflow BAM_TO_FRAGMENTS { main: - //println("${params.tools.sinto.fragments.barcodetag}") fragments = SINTO__FRAGMENTS(bam) fragments_sort = SINTO__SORT_FRAGMENTS(fragments) index = BWAMAPTOOLS__INDEX_BED(fragments_sort) From 2fab55c714771988ab5d3de6c0c42c2ad80a2fa2 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 22 Mar 2021 22:30:19 +0100 Subject: [PATCH 031/202] Cleanup bap workflow - No longer generate a new fragments file from the bap bam output --- src/bap/main.nf | 18 +----------------- src/sinto/processes/fragments.nf | 4 +--- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/src/bap/main.nf b/src/bap/main.nf index f0190d2c..e8e33e47 100644 --- a/src/bap/main.nf +++ b/src/bap/main.nf @@ -6,14 +6,6 @@ nextflow.enable.dsl=2 include { BAP__BARCODE_MULTIPLET_PIPELINE as BARCODE_MULTIPLET_PIPELINE; } from './processes/barcode_multiplet.nf' params(params) -include { - BAM_TO_FRAGMENTS as BAP_BAM_TO_FRAGMENTS; -} from './../../src/sinto/main.nf' addParams(tools_sinto_fragments_barcodetag: params.tools.bap.barcode_multiplet.drop_tag) - -include { - PUBLISH as PUBLISH_BAP_FRAGMENTS; - PUBLISH as PUBLISH_BAP_FRAGMENTS_INDEX; -} from "../../src/utils/workflows/utils.nf" params(params) ////////////////////////////////////////////////////// // Define the workflow @@ -28,7 +20,6 @@ workflow get_bam { bam } - workflow BAP__BARCODE_MULTIPLET_WF { take: @@ -38,15 +29,8 @@ workflow BAP__BARCODE_MULTIPLET_WF { bap = BARCODE_MULTIPLET_PIPELINE(bam.map { it -> tuple(it[0], it[1], it[2]) }) - // generate a fragments file: - fragments = BAP_BAM_TO_FRAGMENTS(bap.map {it -> tuple(it[0], it[1], it[2])}) - - // publish fragments output: - PUBLISH_BAP_FRAGMENTS(fragments, 'bap.sinto.fragments.tsv', 'gz', 'bap/fragments_sinto', false) - PUBLISH_BAP_FRAGMENTS_INDEX(fragments.map{ it -> tuple(it[0], it[2]) }, 'bap.sinto.fragments.tsv.gz', 'tbi', 'bap/fragments_sinto', false) - emit: - fragments + bap } diff --git a/src/sinto/processes/fragments.nf b/src/sinto/processes/fragments.nf index 2f2b6208..a71f80b8 100644 --- a/src/sinto/processes/fragments.nf +++ b/src/sinto/processes/fragments.nf @@ -4,8 +4,6 @@ nextflow.enable.dsl=2 toolParams = params.tools.sinto -barcode_tag = params.containsKey('tools_sinto_fragments_barcodetag') ? params.tools_sinto_fragments_barcodetag : toolParams.fragments.barcodetag - process SINTO__FRAGMENTS { container toolParams.container @@ -27,7 +25,7 @@ process SINTO__FRAGMENTS { sinto fragments \ -b ${bam} \ -m ${processParams.min_mapq} \ - ${barcode_tag=='' ? '' : '--barcodetag ' + barcode_tag} \ + ${processParams.containsKey('barcodetag') && processParams.barcodetag ? '--barcodetag ' + processParams.barcodetag: ''} \ ${processParams.containsKey('barcode_regex') && processParams.barcode_regex ? '--barcode_regex ' + processParams.barcode_regex: ''} \ ${processParams.containsKey('use_chrom') && processParams.use_chrom ? '--use_chrom ' + processParams.use_chrom: ''} \ ${processParams.containsKey('min_distance') && processParams.min_distance ? '--min_distance ' + processParams.min_distance: ''} \ From 043e9e7fbabf80ad260006b5e4ddcd303bb74035 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sun, 21 Feb 2021 23:04:00 +0100 Subject: [PATCH 032/202] Avoid hard link in publish process - Use stageAs in place of hard link - Resolves #317 --- src/utils/processes/utils.nf | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index d59162ed..513c18ee 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -426,7 +426,7 @@ process SC__PUBLISH { input: tuple \ val(tag), \ - path(f), \ + path(f, stageAs: 'input_file'), \ val(stashedParams) val(fileOutputSuffix) val(toolName) @@ -447,13 +447,9 @@ process SC__PUBLISH { isParameterExplorationModeOn, stashedParams ) - /* avoid cases where the input and output files have identical names: - Move the input file to a unique name, then create a link to - the input file */ """ - mv $f tmp if [ ! -f ${outputFileName} ]; then - ln -L tmp "${outputFileName}" + ln -s input_file "${outputFileName}" fi """ } From 5e104cc176bd4d7e9372a13bdede929ffd24caa6 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 15:53:02 +0200 Subject: [PATCH 033/202] Don't output metrics alongside MarkDuplicatesSpark - This reduces the run time by ~2.5-3x - Instead, run EstimateLibraryComplexity separately --- src/gatk/processes/mark_duplicates_spark.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gatk/processes/mark_duplicates_spark.nf b/src/gatk/processes/mark_duplicates_spark.nf index be218ece..e65768d1 100644 --- a/src/gatk/processes/mark_duplicates_spark.nf +++ b/src/gatk/processes/mark_duplicates_spark.nf @@ -16,8 +16,7 @@ process MARK_DUPLICATES_SPARK { output: tuple val(sampleId), path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam"), - path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam.bai"), - path("${sampleId}.picard_markdup_metrics.txt") + path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam.bai") script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams) @@ -26,7 +25,6 @@ process MARK_DUPLICATES_SPARK { gatk MarkDuplicatesSpark \ -I ${bam} \ -O ${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam \ - -M ${sampleId}.picard_markdup_metrics.txt \ -- \ --spark-runner LOCAL \ --spark-master local[${task.cpus}] From 70a695c9dd36fd038c8dc125a700dc171c4b1f18 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 15:58:21 +0200 Subject: [PATCH 034/202] Picate updates: MarkDuplicates - Add EstimateLibraryComplexity method - Remove stats output from MarkDuplicates - Rename nf process for MarkDuplicates --- src/picard/picard.config | 3 ++ .../processes/estimate_library_complexity.nf | 30 +++++++++++++++++++ src/picard/processes/mark_duplicates.nf | 3 +- 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 src/picard/processes/estimate_library_complexity.nf diff --git a/src/picard/picard.config b/src/picard/picard.config index 7742c4c5..16c02436 100644 --- a/src/picard/picard.config +++ b/src/picard/picard.config @@ -3,6 +3,9 @@ params { picard { container = 'vibsinglecellnf/picard:2.21.1' } + estimate_library_complexity { + barcode_tag = 'CB' + } } } diff --git a/src/picard/processes/estimate_library_complexity.nf b/src/picard/processes/estimate_library_complexity.nf new file mode 100644 index 00000000..07df6701 --- /dev/null +++ b/src/picard/processes/estimate_library_complexity.nf @@ -0,0 +1,30 @@ +nextflow.enable.dsl=2 + +// binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" + +toolParams = params.picard + +process PICARD__ESTIMATE_LIBRARY_COMPLEXITY { + + container toolParams.container + label 'compute_resources__default','compute_resources__24hqueue' + + input: + tuple val(sampleId), + path(bam) + + output: + tuple val(sampleId), + path("${sampleId}.picard_library_complexity_metrics.txt") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams.estimate_library_complexity) + processParams = sampleParams.local + """ + java -jar /picard.jar EstimateLibraryComplexity \ + I=${bam} \ + O=${sampleId}.picard_library_complexity_metrics.txt \ + BARCODE_TAG=${processParams.barcode_tag} \ + """ +} + diff --git a/src/picard/processes/mark_duplicates.nf b/src/picard/processes/mark_duplicates.nf index e67d98db..11c71f06 100644 --- a/src/picard/processes/mark_duplicates.nf +++ b/src/picard/processes/mark_duplicates.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.picard -process MARK_DUPLICATES { +process PICARD__MARK_DUPLICATES_AND_SORT { container toolParams.container label 'compute_resources__default','compute_resources__24hqueue' @@ -29,7 +29,6 @@ process MARK_DUPLICATES { BARCODE_TAG=CB \ COMPRESSION_LEVEL=0 \ QUIET=true \ - M=${sampleId}.picard_markdup_metrics.txt \ ASSUME_SORT_ORDER=queryname \ | java -jar /picard.jar SortSam \ I=/dev/stdin \ From 02de8658c8f600a30f875598d5e3b46963590781 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 16:01:32 +0200 Subject: [PATCH 035/202] Rename MarkDuplicatesSpark --- src/gatk/processes/mark_duplicates_spark.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gatk/processes/mark_duplicates_spark.nf b/src/gatk/processes/mark_duplicates_spark.nf index e65768d1..2a67209c 100644 --- a/src/gatk/processes/mark_duplicates_spark.nf +++ b/src/gatk/processes/mark_duplicates_spark.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.gatk -process MARK_DUPLICATES_SPARK { +process GATK__MARK_DUPLICATES_SPARK { container toolParams.container label 'compute_resources__cpu','compute_resources__24hqueue' From ba0ecfc71366adb9b511cab29868c8f443556eb9 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 16:51:53 +0200 Subject: [PATCH 036/202] Add fastp adapter trimming - Resolves #326 --- src/fastp/processes/adapter_trimming.nf | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 src/fastp/processes/adapter_trimming.nf diff --git a/src/fastp/processes/adapter_trimming.nf b/src/fastp/processes/adapter_trimming.nf new file mode 100644 index 00000000..3a997ddc --- /dev/null +++ b/src/fastp/processes/adapter_trimming.nf @@ -0,0 +1,40 @@ +nextflow.enable.dsl=2 + +// binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" + +toolParams = params.tools.fastp + +process FASTP__ADAPTER_TRIMMING { + + container toolParams.container + label 'compute_resources__cpu','compute_resources__24hqueue' + + when: + params.atac_preprocess_tools.adapter_trimming_method == 'fastp' + + input: + tuple val(sampleId), + path(fastq_PE1), + path(fastq_PE2) + + output: + tuple val(sampleId), + path("${sampleId}_dex_R1_val_1.fq.gz"), + path("${sampleId}_dex_R2_val_2.fq.gz"), + path("${sampleId}_fastp.html") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + processParams = sampleParams.local + """ + fastp \ + --in1 ${fastq_PE1} \ + --in2 ${fastq_PE2} \ + --out1 ${sampleId}_dex_R1_val_1.fq.gz \ + --out2 ${sampleId}_dex_R2_val_2.fq.gz \ + --detect_adapter_for_pe \ + --html ${sampleId}_fastp.html \ + --thread ${task.cpus} + """ +} + From b09e91643bafa2d35ae625980ceb145b8cdcf8e4 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 16:59:42 +0200 Subject: [PATCH 037/202] BWA mapping updates - Include two mark duplicates methods, and ability to choose between them. mark_duplicates_method is now a required input to the BWA_MAPPING_PE workflow - Change publishing output directory to 'reports' (from 'fastq'). --- src/bwamaptools/main.nf | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index f21b64db..d96a75c5 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -12,16 +12,19 @@ include { BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY; } from './processes/mapping_summary.nf' params(params) include { - MARK_DUPLICATES; + PICARD__MARK_DUPLICATES_AND_SORT; } from './../../src/picard/processes/mark_duplicates.nf' params(params) include { - MARK_DUPLICATES_SPARK; + PICARD__ESTIMATE_LIBRARY_COMPLEXITY; +} from './../../src/picard/processes/estimate_library_complexity.nf' params(params) +include { + GATK__MARK_DUPLICATES_SPARK; } from './../../src/gatk/processes/mark_duplicates_spark.nf' params(params) include { PUBLISH as PUBLISH_BAM; PUBLISH as PUBLISH_BAM_INDEX; PUBLISH as PUBLISH_MAPPING_SUMMARY; - PUBLISH as PUBLISH_MARK_DUPLICATES_SUMMARY; + PUBLISH as PUBLISH_LIBRARY_METRICS; } from "../utils/workflows/utils.nf" params(params) ////////////////////////////////////////////////////// @@ -60,6 +63,7 @@ workflow BWA_MAPPING_PE { take: data // a channel of [val(sampleId), path(fastq_PE1), path(fastq_PE2)] + mark_duplicates_method main: /* @@ -68,19 +72,28 @@ workflow BWA_MAPPING_PE { */ bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) - BWA_MEM_PE(bwa_inputs) | - MARK_DUPLICATES_SPARK | - map{it -> tuple(it[0],it[1],it[2])} | - MAPPING_SUMMARY + BWA_MEM_PE(bwa_inputs) + + switch(mark_duplicates_method) { + case 'MarkDuplicates': + dup_marked_bam = PICARD__MARK_DUPLICATES_AND_SORT(BWA_MEM_PE.out) + break + case 'MarkDuplicatesSpark': + dup_marked_bam = GATK__MARK_DUPLICATES_SPARK(BWA_MEM_PE.out) + break + } + + MAPPING_SUMMARY(dup_marked_bam) + PICARD__ESTIMATE_LIBRARY_COMPLEXITY(BWA_MEM_PE.out) // publish output: - PUBLISH_BAM(MARK_DUPLICATES_SPARK.out, 'bwa.out.possorted', 'bam', 'bam', false) - PUBLISH_BAM_INDEX(MARK_DUPLICATES_SPARK.out.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) - PUBLISH_MARK_DUPLICATES_SUMMARY(MARK_DUPLICATES_SPARK.out.map{it -> tuple(it[0], it[3])}, 'mark_duplicates', 'txt', 'bam', false) + PUBLISH_BAM(dup_marked_bam, 'bwa.out.possorted', 'bam', 'bam', false) + PUBLISH_BAM_INDEX(dup_marked_bam.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) + PUBLISH_LIBRARY_METRICS(PICARD__ESTIMATE_LIBRARY_COMPLEXITY.out, 'library_complexity_metrics', 'txt', 'reports', false) PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, 'mapping_stats', 'tsv', 'bam', false) emit: - MARK_DUPLICATES_SPARK.out.map{it -> tuple(it[0],it[1],it[2])} + dup_marked_bam } From ae9305a57ee480e4e09343b146ebeb08175282be Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 17:01:27 +0200 Subject: [PATCH 038/202] ATAC preprocess updates - New param 'atac_preprocess_tools' that allows selection of tools for mark duplicate and adapter trimming steps - Include fastp adapter trimming - Pass mark_duplicates_method to BWA_MAPPING_PE --- conf/atac/preprocess.config | 4 ++++ workflows/atac/preprocess.nf | 24 ++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index e679f14f..d7bffce5 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -1,4 +1,8 @@ params { + atac_preprocess_tools { + mark_duplicates_method = 'MarkDuplicatesSpark' + adapter_trimming_method = 'Trim_Galore' + } data { atac_preprocess { metadata = 'metadata.tsv' diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 34481aa7..1929a3c5 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -6,6 +6,9 @@ include { SCTK__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/proces include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './../../src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf' params(params) include { TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) +include { + FASTP__ADAPTER_TRIMMING as FASTP__TRIM; +} from './../../src/fastp/processes/adapter_trimming.nf' params(params) // workflow imports: include { BWA_MAPPING_PE; } from './../../src/bwamaptools/main.nf' params(params) @@ -17,6 +20,7 @@ include { PUBLISH as PUBLISH_BR_BC_STATS; PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE1; PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE2; + PUBLISH as PUBLISH_FASTQS_TRIMLOG_FASTP; PUBLISH as PUBLISH_FRAGMENTS; PUBLISH as PUBLISH_FRAGMENTS_INDEX; } from "../../src/utils/workflows/utils.nf" params(params) @@ -96,20 +100,28 @@ workflow ATAC_PREPROCESS { //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) // using singlecelltoolkit: fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'fastq', false) + PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'reports', false) // concatenate the read channels: fastq_dex = fastq_dex.concat(fastq_dex_br.map{ it -> tuple(it[0], it[1],it[2])}) // run adapter trimming: - fastq_dex_trim = TRIMGALORE__TRIM(fastq_dex) - // publish fastq output: - PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'R1.trimming_report', 'txt', 'fastq', false) - PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, 'R2.trimming_report', 'txt', 'fastq', false) + switch(params.atac_preprocess_tools.adapter_trimming_method) { + case 'Trim_Galore': + fastq_dex_trim = TRIMGALORE__TRIM(fastq_dex); + PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'R1.trimming_report', 'txt', 'reports', false); + PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, 'R2.trimming_report', 'txt', 'reports', false); + break; + case 'fastp': + fastq_dex_trim = FASTP__TRIM(fastq_dex); + PUBLISH_FASTQS_TRIMLOG_FASTP(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'fastp.trimming_report', '.html', 'reports', false); + break; + } // map with bwa mem: - bam = BWA_MAPPING_PE(fastq_dex_trim.map { it -> tuple(it[0..2]) }) + bam = BWA_MAPPING_PE(fastq_dex_trim.map { it -> tuple(it[0..2]) }, + params.atac_preprocess_tools.mark_duplicates_method) // generate a fragments file: fragments = BAM_TO_FRAGMENTS(bam) From 1282878f0f3b6d6fd306139751453ddd6166cba3 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Apr 2021 17:04:21 +0200 Subject: [PATCH 039/202] Update single_cell_toolkit Docker image - Use zlib-ng base image (#325) - Include saturation script (#332), and python 3.7 install --- src/singlecelltoolkit/Dockerfile | 63 +++++++++++--------------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index f5b2d519..d709a38b 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -1,39 +1,26 @@ -FROM ubuntu:20.04 +FROM vibsinglecellnf/samtools:1.12 ENV DEBIAN_FRONTEND=noninteractive -RUN BUILDPKGS="autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev \ - git \ - libhts-dev \ - curl \ - wget" && \ - apt-get update && \ - apt-get install -y apt-utils debconf locales locales-all ca-certificates && dpkg-reconfigure locales && \ - apt-get install -y --no-install-recommends $BUILDPKGS && \ - apt-get install -y --no-install-recommends clang python3 llvm-6.0 pigz +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3 \ + python3-venv \ + wget && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.7 100 -# install htslib -ENV HTSLIB_VERSION 1.11 -RUN curl -L -o /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 \ - https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 && \ - mkdir -p /tmp/htslib-${HTSLIB_VERSION} && \ - tar jxvf /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 -C /tmp/htslib-${HTSLIB_VERSION} --strip-components 1 && \ - cd /tmp/htslib-${HTSLIB_VERSION} && \ - ./configure && \ - make && \ - make install && \ - cd .. && rm -rf htslib-${HTSLIB_VERSION} +RUN python -m venv /opt/venv +# Make sure we use the virtualenv: +ENV PATH="/opt/venv/bin:$PATH" -# install samtools -ENV SAMTOOLS_VERSION 1.11 -RUN curl -L -o /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ - https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 && \ - mkdir -p /tmp/samtools-${SAMTOOLS_VERSION} && \ - tar jxvf /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 -C /tmp/samtools-${SAMTOOLS_VERSION} --strip-components 1 && \ - cd /tmp/samtools-${SAMTOOLS_VERSION} && \ - ./configure && \ - make && \ - make install && \ - cd .. && rm -rf samtools-${SAMTOOLS_VERSION} +RUN pip install --no-cache-dir --upgrade pip wheel && \ + pip install --no-cache-dir \ + pandas \ + scipy \ + uncertainties \ + typing \ + pathlib \ + matplotlib \ + numpy # install seq (https://github.com/seq-lang/seq/): ENV SEQ_VERSION=0.9.11 @@ -51,13 +38,7 @@ RUN git clone --depth=1 https://github.com/aertslab/single_cell_toolkit.git /opt ENV seq_root_dir=/opt/seq ENV PATH="/opt/single_cell_toolkit:${PATH}" -# final set of packages for usability -RUN apt-get -y update && \ - apt-get -y --no-install-recommends install \ - procps \ - bash-completion \ - less && \ - apt-get remove --purge -y $BUILDPKGS && \ - rm -rf /var/cache/apt/* && \ - rm -rf /var/lib/apt/lists/* +RUN rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* && \ + ldconfig From d90db03e5ed5fece5d2299871924c22177378982 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 2 Apr 2021 10:55:37 +0200 Subject: [PATCH 040/202] Set default OMP_NUM_THREADS in single_cell_toolkit Docker image --- src/singlecelltoolkit/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index d709a38b..1934ae3b 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -29,7 +29,7 @@ RUN mkdir -p /opt/seq && \ tar xzf seq-linux-x86_64.tar.gz --strip-components 1 -C /opt/seq && \ rm seq-linux-x86_64.tar.gz ENV PATH="/opt/seq/bin:${PATH}" -# ENV OMP_NUM_THREADS=1 +ENV OMP_NUM_THREADS=4 ENV SEQ_PYTHON=/usr/lib/x86_64-linux-gnu/libpython3.8.so.1 # install single_cell_toolkit From e2a1f032fde5c0dc24a15250ae23e659ff64b150 Mon Sep 17 00:00:00 2001 From: cflerin Date: Fri, 30 Apr 2021 15:42:32 +0200 Subject: [PATCH 041/202] Add a SIMPLE_PUBLISH process in utils --- src/utils/processes/utils.nf | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index bbab099f..e7af1bf8 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -470,6 +470,36 @@ process SC__PUBLISH { """ } +process SIMPLE_PUBLISH { + + publishDir \ + "${getPublishDir(params.global.outdir,toolName)}", \ + mode: "${params.utils.publish?.mode ? params.utils.publish.mode: 'link'}", \ + saveAs: { filename -> "${outputFileName}" } + + label 'compute_resources__minimal' + + input: + tuple \ + val(sampleId), \ + path(f, stageAs: 'input_file') + val(outputFileSuffix) + val(toolName) + + output: + tuple \ + val(sampleId), \ + path(outputFileName) + + script: + outputFileName = "${sampleId}${outputFileSuffix}" + """ + if [ ! -f ${outputFileName} ]; then + ln -s input_file "${outputFileName}" + fi + """ +} + process COMPRESS_HDF5() { From 44c0ef706e8f825e3799de1f62940f503c357b63 Mon Sep 17 00:00:00 2001 From: cflerin Date: Fri, 30 Apr 2021 15:42:55 +0200 Subject: [PATCH 042/202] Update atac_preprocess to use simple publish --- workflows/atac/preprocess.nf | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 1929a3c5..2ee3d644 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -16,14 +16,14 @@ include { BAM_TO_FRAGMENTS; } from './../../src/sinto/main.nf' params(params) include { BAP__BIORAD_DEBARCODE; } from './../../src/bap/workflows/bap_debarcode.nf' params(params) include { - PUBLISH as PUBLISH_BC_STATS; - PUBLISH as PUBLISH_BR_BC_STATS; - PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE1; - PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE2; - PUBLISH as PUBLISH_FASTQS_TRIMLOG_FASTP; - PUBLISH as PUBLISH_FRAGMENTS; - PUBLISH as PUBLISH_FRAGMENTS_INDEX; -} from "../../src/utils/workflows/utils.nf" params(params) + SIMPLE_PUBLISH as PUBLISH_BC_STATS; + SIMPLE_PUBLISH as PUBLISH_BR_BC_STATS; + SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE1; + SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE2; + SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_FASTP; + SIMPLE_PUBLISH as PUBLISH_FRAGMENTS; + SIMPLE_PUBLISH as PUBLISH_FRAGMENTS_INDEX; +} from "../../src/utils/processes/utils.nf" params(params) ////////////////////////////////////////////////////// @@ -85,7 +85,7 @@ workflow ATAC_PREPROCESS { // run barcode correction against a whitelist: fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl.map{ it -> tuple(it[0], it[3], it[5]) } ) - PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[2]) }, 'corrected.bc_stats', 'log', 'fastq', false) + PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[2]) }, '.corrected.bc_stats.log', 'fastq') // run barcode demultiplexing on each read+barcode: @@ -100,7 +100,7 @@ workflow ATAC_PREPROCESS { //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) // using singlecelltoolkit: fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, 'corrected.bc_stats', 'log', 'reports', false) + PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports') // concatenate the read channels: @@ -110,12 +110,12 @@ workflow ATAC_PREPROCESS { switch(params.atac_preprocess_tools.adapter_trimming_method) { case 'Trim_Galore': fastq_dex_trim = TRIMGALORE__TRIM(fastq_dex); - PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'R1.trimming_report', 'txt', 'reports', false); - PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, 'R2.trimming_report', 'txt', 'reports', false); + PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, '.R1.trimming_report.txt', 'reports'); + PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, '.R2.trimming_report.txt', 'reports'); break; case 'fastp': fastq_dex_trim = FASTP__TRIM(fastq_dex); - PUBLISH_FASTQS_TRIMLOG_FASTP(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, 'fastp.trimming_report', '.html', 'reports', false); + PUBLISH_FASTQS_TRIMLOG_FASTP(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, '.fastp.trimming_report.html', 'reports'); break; } @@ -126,8 +126,8 @@ workflow ATAC_PREPROCESS { // generate a fragments file: fragments = BAM_TO_FRAGMENTS(bam) // publish fragments output: - PUBLISH_FRAGMENTS(fragments, 'sinto.fragments.tsv', 'gz', 'fragments', false) - PUBLISH_FRAGMENTS_INDEX(fragments.map{ it -> tuple(it[0], it[2]) }, 'sinto.fragments.tsv.gz', 'tbi', 'fragments', false) + PUBLISH_FRAGMENTS(fragments.map{ it -> tuple(it[0..1]) }, '.sinto.fragments.tsv.gz', 'fragments') + PUBLISH_FRAGMENTS_INDEX(fragments.map{ it -> tuple(it[0],it[2]) }, '.sinto.fragments.tsv.gz.tbi', 'fragments') emit: bam From 21da3bc7415387861742b2b9c8e1e55eae84614e Mon Sep 17 00:00:00 2001 From: cflerin Date: Fri, 30 Apr 2021 15:22:39 +0200 Subject: [PATCH 043/202] BAP: update image and parameters --- src/bap/bap.config | 2 +- src/bap/conf/bap_barcode_multiplet.config | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bap/bap.config b/src/bap/bap.config index 3283907e..fc763f72 100644 --- a/src/bap/bap.config +++ b/src/bap/bap.config @@ -1,7 +1,7 @@ params { tools { bap { - container = 'vibsinglecellnf/bap:2021-02-03-1d32827-b' + container = 'vibsinglecellnf/bap:2021-04-27-3b48f4b' } } } diff --git a/src/bap/conf/bap_barcode_multiplet.config b/src/bap/conf/bap_barcode_multiplet.config index ece81b4b..1d537411 100644 --- a/src/bap/conf/bap_barcode_multiplet.config +++ b/src/bap/conf/bap_barcode_multiplet.config @@ -2,10 +2,11 @@ params { tools { bap { barcode_multiplet { + report_ipynb = './src/bap/bin/bap_barcode_multiplet_report.ipynb' bead_tag = 'CB' // existing tag in bam file drop_tag = 'DB' // new tag with merged barcodes // filters: - minimum_barcode_fragments = 10 // if set to 0, this is determined automatically via knee-calling + minimum_barcode_fragments = 1000 // if set to 0, this is determined automatically via knee-calling barcode_whitelist = '' // use in place of the 'minimum_barcode_fragments' threshold minimum_jaccard_index = 0.0 nc_threshold = 6 From 9a653b3c050fad71eb2e9c7dab108473dcce74dc Mon Sep 17 00:00:00 2001 From: cflerin Date: Fri, 30 Apr 2021 15:23:33 +0200 Subject: [PATCH 044/202] BAP: add report ipynb and processes to run it --- .../bin/bap_barcode_multiplet_report.ipynb | 363 ++++++++++++++++++ src/bap/main.nf | 12 + src/bap/processes/barcode_multiplet.nf | 2 +- src/bap/processes/report.nf | 56 +++ 4 files changed, 432 insertions(+), 1 deletion(-) create mode 100644 src/bap/bin/bap_barcode_multiplet_report.ipynb create mode 100644 src/bap/processes/report.nf diff --git a/src/bap/bin/bap_barcode_multiplet_report.ipynb b/src/bap/bin/bap_barcode_multiplet_report.ipynb new file mode 100644 index 00000000..8cef0742 --- /dev/null +++ b/src/bap/bin/bap_barcode_multiplet_report.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "78d9bdeb-b1c0-4347-bc27-fda80a0a63ec", + "metadata": {}, + "source": [ + "# VSN Pipelines: BAP multiplet pipeline report\n", + "\n", + "Additional analyses and plots from the output of BAP (https://github.com/caleblareau/bap)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b62c5ef-9f22-42c4-bee6-b0aaff4db0bc", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "from collections import Counter\n", + "import itertools\n", + "import json\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2b6cccb-d9b9-4862-9ece-022907f119f6", + "metadata": {}, + "outputs": [], + "source": [ + "params = json.loads(WORKFLOW_PARAMETERS)\n", + "\n", + "print(f\"SAMPLE: {SAMPLE}\")\n", + "print(f\"BAP Parameters: {json.dumps(params, indent=4)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d4ea79a-0459-4830-846c-a0ca048fcc40", + "metadata": {}, + "outputs": [], + "source": [ + "# read in data\n", + "f_bap_qc = SAMPLE+'.implicatedBarcodes.csv.gz'\n", + "f_bap_bct = SAMPLE+'.barcodeTranslate.tsv'\n", + "f_bap_nc = SAMPLE+'.NCsumstats.tsv'\n", + "\n", + "bap_ib = pd.read_csv(f_bap_qc)\n", + "bap_bct = pd.read_csv(f_bap_bct, sep='\\t', header=None, names=['barcode','bap_barcode'])\n", + "bap_nc = pd.read_csv(f_bap_nc, sep='\\t')" + ] + }, + { + "cell_type": "markdown", + "id": "1d142706-698a-40cd-9dc1-aeac375940c4", + "metadata": {}, + "source": [ + "## Overlap index\n", + "\n", + "Plot the overlap scores, representing the pairwise similarity between each barcode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf430147-661b-4107-b232-1f07e8689ee0", + "metadata": {}, + "outputs": [], + "source": [ + "# overlap index rank plot\n", + "def plot_overlap_rankplot(dat, label, ax):\n", + " y = dat['jaccard_frag'].sort_values(ascending=False)\n", + " ix = dat['jaccard_frag'][ dat['merged']==True ].idxmin()\n", + " thr = dat.iloc[ix]['jaccard_frag']\n", + " thrx = sum(dat['merged']==True)\n", + " \n", + " ax.scatter(x=range(y.shape[0]),\n", + " y=y.values,\n", + " s=4,\n", + " edgecolors=None)\n", + " ax.set_xscale('log')\n", + " ax.set_yscale('log')\n", + " ax.set_ylabel('Jaccard index per barcode pair')\n", + " ax.set_xlabel('Barcode pair rank')\n", + " ax.axhline(thr, color='grey', linestyle='--')\n", + " ax.text(0.01, 0.01, f\" thr={round(thr,4)} ({thrx} barcode pairs)\", ha='left', va='bottom' , transform=ax.transAxes, fontsize='x-small')\n", + " ax.set_title(label)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae73615d-e2bd-4ff8-8296-f85abdf17f2d", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1,1, figsize=(5,4), dpi=150 )\n", + "\n", + "plot_overlap_rankplot(bap_ib, label=SAMPLE, ax=ax)\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "id": "b4f5a666-cc70-4394-9d77-2aaccbe36223", + "metadata": {}, + "source": [ + "## Calculate restricted longest common subsequence (rLCS)\n", + "\n", + "This approach follows the methods in:\n", + "* Lareau, C. A., Ma, S., Duarte, F. M. & Buenrostro, J. D. Inference and effects of barcode multiplets in droplet-based single-cell assays. Nature Communications 11, 1–9 (2020). https://doi.org/10.1038/s41467-020-14667-5\n", + "\n", + "This is calculated by taking each set of barcodes identified as a multiplet, and calculating the overlap\n", + "and identifies two groups of droplet multiplets:\n", + "* **Physical multiplets** (multiple beads that are physically grouped together in a droplet)\n", + " * Contains uniform barcodes (less than cutoff threshold on rLCS)\n", + "* **Complex beads** (assumed to be due to bead barcode synthesis errors)\n", + " * Contain heterogenous barcodes (greater than a cutoff threshold on rLCS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d4ef782-c47b-4c83-9907-544de71dbc0b", + "metadata": {}, + "outputs": [], + "source": [ + "# calculate rLCS match score\n", + "def barcode_rle(a, b):\n", + " assert len(a) == len(b)\n", + " match_cnt = 0\n", + " match_list = []\n", + " for i in range(len(a)):\n", + " \n", + " if(a[i] == b[i]):\n", + " match_cnt += 1\n", + " else:\n", + " match_list.append(match_cnt)\n", + " match_cnt = 0\n", + " match_list.append(match_cnt)\n", + " return max(match_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c105744-c0b9-46a4-9053-f9e19301fd18", + "metadata": {}, + "outputs": [], + "source": [ + "rLCS_thr = 6\n", + "\n", + "bap_bct['bap_N'] = [ x.split('_')[-1] for x in bap_bct['bap_barcode'] ]\n", + "\n", + "multiplets = pd.DataFrame(\n", + " bap_bct['bap_barcode'][bap_bct['bap_N']!='N01'].unique(), \n", + " columns=['bap_barcode'])\n", + "multiplets['bap_N'] = [ x.split('_')[-1] for x in multiplets['bap_barcode'] ]\n", + "\n", + "bcs_scores = []\n", + "for i,m in enumerate(multiplets['bap_barcode']):\n", + " bcs = bap_bct['barcode'][bap_bct['bap_barcode']==m]\n", + " bcs_comb = list(itertools.combinations(bcs, 2))\n", + " mean_scores = np.mean([ barcode_rle(x[0], x[1]) for x in bcs_comb ])\n", + " bcs_scores.append(mean_scores)\n", + "multiplets['mean_rLCS'] = bcs_scores\n", + "\n", + "# classify multiplet similarity by threshold\n", + "multiplets['multiplets_passing_rLCS_thr'] = multiplets['mean_rLCS']>=rLCS_thr\n", + "multiplets.dropna(inplace=True)\n", + "\n", + "multiplets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "289ba617-4e8b-4ee6-aedd-9ad7260b4efc", + "metadata": {}, + "outputs": [], + "source": [ + "# number of beads per droplet\n", + "Counter(bap_bct['bap_N'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7160683-6f5a-4bc9-9b4a-0841ae6b9d11", + "metadata": {}, + "outputs": [], + "source": [ + "n_singlets = np.sum(bap_bct['bap_N']=='N01')\n", + "\n", + "# multiplets with non barcode similarity (physical multiplets):\n", + "n_physical = sum([ \n", + " int(x.replace('N','')) \n", + " for x in multiplets['bap_N'][multiplets['multiplets_passing_rLCS_thr']==False] \n", + "])\n", + "\n", + "# multiplets with barcode similarity (complex multiplets):\n", + "n_complex = sum(multiplets['multiplets_passing_rLCS_thr'])\n", + "\n", + "n_total = n_singlets + n_physical + n_complex\n", + "\n", + "# n_complex / n_total # rate of bead heterogeneity\n", + "\n", + "print(\n", + " f\"Number of singlets: {n_singlets}\\n\"\n", + " f\"Number of physical multiplets: {n_physical}\\n\"\n", + " f\"Number of complex multiplets: {n_complex}\\n\"\n", + " f\"Total: {n_total}\\n\"\n", + " f\"Rate of bead heterogeneity {round(n_complex/n_total,8)}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6217139-2c45-485a-87eb-5137a1182450", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1,1, figsize=(5,4), dpi=150)\n", + "\n", + "sns.histplot(\n", + " multiplets['mean_rLCS'],\n", + " ax=ax,\n", + " discrete=True)\n", + "ax.set_xlabel('Mean rLCS per multiplet')\n", + "ax.set_ylabel('Count')\n", + "ax.axvline(x=rLCS_thr-0.5, color='grey', linestyle='--')\n", + "\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "id": "212e4dae-a0c6-4b19-bcdd-152a88c1918e", + "metadata": {}, + "source": [ + "## Barcode multiplets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fee9465-f161-4346-81df-79684ced6d58", + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,4), dpi=150, sharex=True, sharey=True )\n", + "\n", + "sns.histplot(\n", + " multiplets['bap_N'][(multiplets['bap_N']!='N01') & (multiplets['multiplets_passing_rLCS_thr']==True)].sort_values(),\n", + " ax=ax1,\n", + " discrete=True)\n", + "ax1.set_title(f\"Physical multiplets (non-barcode similarity; rLCS<{rLCS_thr})\")\n", + "ax1.set_xlabel('# beads / droplet')\n", + "ax1.set_ylabel('Count')\n", + "\n", + "sns.histplot(\n", + " multiplets['bap_N'][(multiplets['bap_N']!='N01') & (multiplets['multiplets_passing_rLCS_thr']==False)].sort_values(),\n", + " ax=ax2,\n", + " discrete=True)\n", + "#ax1.set_title(SAMPLE)\n", + "ax2.set_title(f\"Complex beads (barcode similarity; rLCS>={rLCS_thr})\")\n", + "ax2.set_xlabel('# barcodes / droplet')\n", + "ax2.set_ylabel('Count')\n", + "\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "id": "4a480494-d0f1-4cbb-a5f1-c6e88d926a7e", + "metadata": {}, + "source": [ + "## NC values\n", + "\n", + "Look at the number of occurrences of a particular read across the experiment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b09d13c-d641-402e-97a5-3f3aa9f4cfd7", + "metadata": {}, + "outputs": [], + "source": [ + "# plot NC values hist\n", + "\n", + "def plot_bap_NC(dat, label, ax):\n", + " p_ = sns.barplot(x='NC_value', y='NumberOfFragments', data=dat, ax=ax, color='grey')\n", + " #ax.set_xscale('log')\n", + " ax.set_yscale('log')\n", + " ax.set_xlabel('NC value')\n", + " ax.set_ylabel('Number of fragments')\n", + " ax.set_title(SAMPLE)\n", + " ax.set_xbound(lower=-1)\n", + " for ind, label in enumerate(p_.get_xticklabels()):\n", + " if ind % 20 == 0: # every 20th label is kept\n", + " label.set_visible(True)\n", + " else:\n", + " label.set_visible(False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902a94f7-0f97-413a-84a5-0e2afc7c0153", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1,1, figsize=(8,4), dpi=150 )\n", + "\n", + "plot_bap_NC(bap_nc, label=SAMPLE, ax=ax)\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "785588ea-429d-4f82-9ce7-2e300993af9d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/bap/main.nf b/src/bap/main.nf index e8e33e47..540fc70c 100644 --- a/src/bap/main.nf +++ b/src/bap/main.nf @@ -7,6 +7,11 @@ include { BAP__BARCODE_MULTIPLET_PIPELINE as BARCODE_MULTIPLET_PIPELINE; } from './processes/barcode_multiplet.nf' params(params) +include { + GENERATE_REPORT; + REPORT_TO_HTML; +} from './processes/report.nf' params(params) + ////////////////////////////////////////////////////// // Define the workflow @@ -29,6 +34,13 @@ workflow BAP__BARCODE_MULTIPLET_WF { bap = BARCODE_MULTIPLET_PIPELINE(bam.map { it -> tuple(it[0], it[1], it[2]) }) + GENERATE_REPORT( + file(workflow.projectDir + params.tools.bap.barcode_multiplet.report_ipynb), + bap.map { it -> tuple(it[0], it[3]) }, + "BAP_multiplet_report" + ) | + REPORT_TO_HTML + emit: bap diff --git a/src/bap/processes/barcode_multiplet.nf b/src/bap/processes/barcode_multiplet.nf index d78ca34a..ec149efe 100644 --- a/src/bap/processes/barcode_multiplet.nf +++ b/src/bap/processes/barcode_multiplet.nf @@ -25,7 +25,7 @@ process BAP__BARCODE_MULTIPLET_PIPELINE { script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_multiplet) - processParams = sampleParams.local + processParams = sampleParams.local """ bap2 bam \ --input ${bam} \ diff --git a/src/bap/processes/report.nf b/src/bap/processes/report.nf new file mode 100644 index 00000000..9da2f2bd --- /dev/null +++ b/src/bap/processes/report.nf @@ -0,0 +1,56 @@ +nextflow.enable.dsl=2 + +import java.nio.file.Paths +import static groovy.json.JsonOutput.* + +toolParams = params.tools.bap + +process GENERATE_REPORT { + + container toolParams.container + publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode + label 'compute_resources__report' + + input: + path(ipynb) + tuple val(sampleId), + path(bap_final) + val(reportTitle) + + output: + tuple val(sampleId), + path("${sampleId}.${reportTitle}.ipynb") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_multiplet) + processParams = sampleParams.local + bap_params = toJson(processParams) + """ + papermill ${ipynb} \ + --report-mode \ + ${sampleId}.${reportTitle}.ipynb \ + -p SAMPLE ${sampleId} \ + -p WORKFLOW_PARAMETERS '${bap_params}' \ + """ +} + + +process REPORT_TO_HTML { + + container toolParams.container + publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode + label 'compute_resources__report' + + input: + tuple val(sampleId), + path(ipynb) + + output: + file("*.html") + + script: + """ + jupyter nbconvert ${ipynb} --to html + """ +} + From f062491e698428a5d777a6255d638d263f645937 Mon Sep 17 00:00:00 2001 From: cflerin Date: Fri, 30 Apr 2021 15:37:19 +0200 Subject: [PATCH 045/202] BAP: update Dockerfile --- src/bap/Dockerfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/bap/Dockerfile b/src/bap/Dockerfile index c92cfd0b..6fff7acc 100644 --- a/src/bap/Dockerfile +++ b/src/bap/Dockerfile @@ -34,6 +34,15 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir biopython rapidfuzz fuzzysearch python-Levenshtein && \ pip install --no-cache-dir git+https://github.com/cflerin/bap.git +RUN pip install --no-cache-dir \ + pandas \ + matplotlib \ + seaborn \ + nbconvert \ + papermill \ + ipython \ + ipykernel \ + jupyter-client ################################################## # final set of packages for usability From 13f5a141e554a56d8d95fa420962a9aa1dadf171 Mon Sep 17 00:00:00 2001 From: cflerin Date: Sun, 2 May 2021 22:19:21 +0200 Subject: [PATCH 046/202] BAP: remove sinto from config --- nextflow.config | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index f7d06aec..f8deafe1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -195,7 +195,6 @@ profiles { bap { includeConfig 'src/bap/bap.config' includeConfig 'src/bap/conf/bap_barcode_multiplet.config' - includeConfig 'src/sinto/sinto.config' includeConfig 'src/bwamaptools/bwamaptools.config' } From 3bbcd06a18d5767ef3d0f357982023d53ce227c9 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sat, 22 May 2021 00:40:48 +0200 Subject: [PATCH 047/202] Remove add_barcode_as_tag and config entries --- src/bwamaptools/conf/bwa_mapping.config | 11 ----- .../processes/add_barcode_as_tag.nf | 46 ------------------- src/bwamaptools/processes/mapping_summary.nf | 2 +- 3 files changed, 1 insertion(+), 58 deletions(-) delete mode 100644 src/bwamaptools/processes/add_barcode_as_tag.nf diff --git a/src/bwamaptools/conf/bwa_mapping.config b/src/bwamaptools/conf/bwa_mapping.config index 69192b93..8e2c328e 100644 --- a/src/bwamaptools/conf/bwa_mapping.config +++ b/src/bwamaptools/conf/bwa_mapping.config @@ -3,17 +3,6 @@ params { bwamaptools { bwa_fasta = '/path/to/bwa_index/hg38.fa' bwa_version = 'bwa-mem2' // or 'bwa' - // to do: add bwa mem params - //mem { - //} - add_barcode_as_tag { - uncorrected_bc_tag = 'CR' - corrected_bc_tag = 'CB' - // delimiter used to get barcode block from the qname. ('_'): - delimiter_to_get_barcode_block = '_' - // delimiter used to split the {uncorrected_bc}-{corrected_bc} block ('-'): - delimiter_to_split_barcodes = '-' - } } } } diff --git a/src/bwamaptools/processes/add_barcode_as_tag.nf b/src/bwamaptools/processes/add_barcode_as_tag.nf deleted file mode 100644 index 393a6f64..00000000 --- a/src/bwamaptools/processes/add_barcode_as_tag.nf +++ /dev/null @@ -1,46 +0,0 @@ -nextflow.enable.dsl=2 - -// binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" - -toolParams = params.tools.bwamaptools - -process BWAMAPTOOLS__ADD_BARCODE_TAG { - - container toolParams.container - label 'compute_resources__default','compute_resources__24hqueue' - // todo: add storeDir instead of publishDir - - input: - tuple val(sampleId), - path(bam) - - output: - tuple val(sampleId), - path("${sampleId}.bwa.possorted.bam") - - script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams.add_barcode_as_tag) - processParams = sampleParams.local - """ - samtools view -h ${bam} \ - | mawk '/^@/ {print;next} { - N=split(\$1,n,"${processParams.delimiter_to_get_barcode_block}"); - NN=split(n[1],nbc,"${processParams.delimiter_to_split_barcodes}"); - ucorr_bc=""; - corr_bc=""; - if(NN==1){ # BioRad data with format {corrected_bc}_qname - corr_bc="\t${processParams.corrected_bc_tag}:Z:" nbc[1]; - } - if(NN==2){ # standard format with {uncorrected_bc}-{corrected_bc}_qname - ucorr_bc="\t${processParams.uncorrected_bc_tag}:Z:" nbc[1]; - if(length(nbc[2])>0){ # skip if corrected bc is empty: {uncorrected_bc}-_qname - corr_bc="\t${processParams.corrected_bc_tag}:Z:" nbc[2]; - } - } - sub(/^[^${processParams.delimiter_to_get_barcode_block}]+${processParams.delimiter_to_get_barcode_block}/,"",\$0); - print \$0 ucorr_bc corr_bc; - }' \ - | samtools view -bS - -o ${sampleId}.bwa.possorted.bam - """ -} - diff --git a/src/bwamaptools/processes/mapping_summary.nf b/src/bwamaptools/processes/mapping_summary.nf index 9f76af6c..233b0ebe 100644 --- a/src/bwamaptools/processes/mapping_summary.nf +++ b/src/bwamaptools/processes/mapping_summary.nf @@ -19,7 +19,7 @@ process BWAMAPTOOLS__MAPPING_SUMMARY { path("${sampleId}.mapping_stats.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams.add_barcode_as_tag) + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) processParams = sampleParams.local """ ${binDir}mapping_summary.sh \ From bb785a6ad232c6196f8f05e699dda3064a82ef44 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sat, 22 May 2021 01:09:43 +0200 Subject: [PATCH 048/202] Fix picard parameter scoping --- src/picard/picard.config | 6 +++--- src/picard/processes/estimate_library_complexity.nf | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/picard/picard.config b/src/picard/picard.config index 16c02436..ea7d3958 100644 --- a/src/picard/picard.config +++ b/src/picard/picard.config @@ -2,9 +2,9 @@ params { tools { picard { container = 'vibsinglecellnf/picard:2.21.1' - } - estimate_library_complexity { - barcode_tag = 'CB' + estimate_library_complexity { + barcode_tag = 'CB' + } } } } diff --git a/src/picard/processes/estimate_library_complexity.nf b/src/picard/processes/estimate_library_complexity.nf index 07df6701..3829f1e4 100644 --- a/src/picard/processes/estimate_library_complexity.nf +++ b/src/picard/processes/estimate_library_complexity.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.picard +toolParams = params.tools.picard process PICARD__ESTIMATE_LIBRARY_COMPLEXITY { From 4efc576bd7c5bdc2c9330d7f3f5e48bed806bd42 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sat, 22 May 2021 01:13:59 +0200 Subject: [PATCH 049/202] Use copy as default publish method --- src/utils/conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/conf/base.config b/src/utils/conf/base.config index 7f872648..bc34081a 100644 --- a/src/utils/conf/base.config +++ b/src/utils/conf/base.config @@ -5,7 +5,7 @@ params { // pipelineOutputSuffix = '' compressionLevel = 6 annotateWithBatchVariableName = false - mode = 'link' + mode = 'copy' } } sc { From 319fea454e58f52dc2ea3414fdf37f67de1016c6 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sat, 22 May 2021 01:48:49 +0200 Subject: [PATCH 050/202] Change markdups default to Picard MarkDuplicates --- conf/atac/preprocess.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index d7bffce5..d41e3f36 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -1,6 +1,6 @@ params { atac_preprocess_tools { - mark_duplicates_method = 'MarkDuplicatesSpark' + mark_duplicates_method = 'MarkDuplicates' adapter_trimming_method = 'Trim_Galore' } data { From e436311a10a643a84e28b209f21a55d281ee1270 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 25 May 2021 20:44:37 +0200 Subject: [PATCH 051/202] Fix picard MarkDuplicates - update toolParams scope - add metrics file parameter --- src/picard/processes/mark_duplicates.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/picard/processes/mark_duplicates.nf b/src/picard/processes/mark_duplicates.nf index 11c71f06..3f0667bb 100644 --- a/src/picard/processes/mark_duplicates.nf +++ b/src/picard/processes/mark_duplicates.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.picard +toolParams = params.tools.picard process PICARD__MARK_DUPLICATES_AND_SORT { @@ -26,6 +26,7 @@ process PICARD__MARK_DUPLICATES_AND_SORT { java -jar /picard.jar MarkDuplicates \ I=${bam} \ O=/dev/stdout \ + METRICS_FILE=${sampleId}.bwa.out.fixmate.picard_markdup.metrics.txt \ BARCODE_TAG=CB \ COMPRESSION_LEVEL=0 \ QUIET=true \ From 2aebdf3a084c4db615b68f2166ef95c99270e14e Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 25 May 2021 22:35:44 +0200 Subject: [PATCH 052/202] Process multiple fastqs per sample - Split bwa mapping step to allow processing of multiple fastqs per sample - Add merge bams step to merge multiple bams per sample - Switch to simple_publish method --- src/bwamaptools/main.nf | 46 +++++++++++++++++-------- src/picard/processes/merge_sam_files.nf | 33 ++++++++++++++++++ 2 files changed, 64 insertions(+), 15 deletions(-) create mode 100644 src/picard/processes/merge_sam_files.nf diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index d96a75c5..8dd54662 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -21,11 +21,12 @@ include { GATK__MARK_DUPLICATES_SPARK; } from './../../src/gatk/processes/mark_duplicates_spark.nf' params(params) include { - PUBLISH as PUBLISH_BAM; - PUBLISH as PUBLISH_BAM_INDEX; - PUBLISH as PUBLISH_MAPPING_SUMMARY; - PUBLISH as PUBLISH_LIBRARY_METRICS; -} from "../utils/workflows/utils.nf" params(params) + SIMPLE_PUBLISH as PUBLISH_BAM; + SIMPLE_PUBLISH as PUBLISH_BAM_INDEX; + SIMPLE_PUBLISH as PUBLISH_MAPPING_SUMMARY; + SIMPLE_PUBLISH as PUBLISH_LIBRARY_METRICS; +} from "../utils/processes/utils.nf" params(params) +//} from "../utils/workflows/utils.nf" params(params) ////////////////////////////////////////////////////// // Define the workflow @@ -62,8 +63,9 @@ workflow get_bwa_index { workflow BWA_MAPPING_PE { take: - data // a channel of [val(sampleId), path(fastq_PE1), path(fastq_PE2)] - mark_duplicates_method + data // a channel of [val(unique_sampleId), val(sampleId), path(fastq_PE1), path(fastq_PE2)] + // unique_sampleId is used to label the read group field "SM" and (part of) "LB", + // while sampleId represents each split fastq file for a unique sample. main: /* @@ -72,25 +74,39 @@ workflow BWA_MAPPING_PE { */ bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) - BWA_MEM_PE(bwa_inputs) + aligned_bam = BWA_MEM_PE(bwa_inputs) + + emit: + aligned_bam + +} + + +workflow MARK_DUPLICATES { + + take: + data // a channel of [val(sampleId), path(bam) ] + mark_duplicates_method + + main: switch(mark_duplicates_method) { case 'MarkDuplicates': - dup_marked_bam = PICARD__MARK_DUPLICATES_AND_SORT(BWA_MEM_PE.out) + dup_marked_bam = PICARD__MARK_DUPLICATES_AND_SORT(data) break case 'MarkDuplicatesSpark': - dup_marked_bam = GATK__MARK_DUPLICATES_SPARK(BWA_MEM_PE.out) + dup_marked_bam = GATK__MARK_DUPLICATES_SPARK(data) break } MAPPING_SUMMARY(dup_marked_bam) - PICARD__ESTIMATE_LIBRARY_COMPLEXITY(BWA_MEM_PE.out) + PICARD__ESTIMATE_LIBRARY_COMPLEXITY(data) // publish output: - PUBLISH_BAM(dup_marked_bam, 'bwa.out.possorted', 'bam', 'bam', false) - PUBLISH_BAM_INDEX(dup_marked_bam.map{it -> tuple(it[0], it[2])}, 'bwa.out.possorted.bam', 'bai', 'bam', false) - PUBLISH_LIBRARY_METRICS(PICARD__ESTIMATE_LIBRARY_COMPLEXITY.out, 'library_complexity_metrics', 'txt', 'reports', false) - PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, 'mapping_stats', 'tsv', 'bam', false) + PUBLISH_BAM(dup_marked_bam.map{it -> tuple(it[0], it[1])}, '.bwa.out.possorted.bam', 'bam') + PUBLISH_BAM_INDEX(dup_marked_bam.map{it -> tuple(it[0], it[2])}, '.bwa.out.possorted.bam.bai', 'bam') + PUBLISH_LIBRARY_METRICS(PICARD__ESTIMATE_LIBRARY_COMPLEXITY.out, '.library_complexity_metrics.txt', 'reports') + PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, '.mapping_stats.tsv', 'bam') emit: dup_marked_bam diff --git a/src/picard/processes/merge_sam_files.nf b/src/picard/processes/merge_sam_files.nf new file mode 100644 index 00000000..1a52dd3e --- /dev/null +++ b/src/picard/processes/merge_sam_files.nf @@ -0,0 +1,33 @@ +nextflow.enable.dsl=2 + +// binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" + +toolParams = params.tools.picard + +process PICARD__MERGE_SAM_FILES { + + container toolParams.container + label 'compute_resources__default','compute_resources__24hqueue' + + input: + tuple val(sampleId), + path(bams) + + output: + tuple val(sampleId), + path("${sampleId}.bwa.out.fixmate.merged.bam") + + script: + //def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + //processParams = sampleParams.local + """ + java -jar /picard.jar MergeSamFiles \ + ${"I="+bams.join(" I=")} \ + O=/dev/stdout \ + | java -jar /picard.jar SortSam \ + I=/dev/stdin \ + O=${sampleId}.bwa.out.fixmate.merged.bam \ + SORT_ORDER=queryname + """ +} + From f3cb25f25c31ab55d3045ddbed234f642806d2ea Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 25 May 2021 22:51:22 +0200 Subject: [PATCH 053/202] Add capability to run multiple fastqs per sample - Create unique sample ID based on each fq name and sampleId - Propagate changes to the rest of the pipeline: - Use unique sampleId for RG sample ID (SM) - Merge files per-sample after mapping and prior to MarkDuplicates --- src/bwamaptools/processes/mapping.nf | 5 +-- workflows/atac/preprocess.nf | 49 +++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index ec82e722..e95d1401 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -12,6 +12,7 @@ process BWAMAPTOOLS__BWA_MEM_PE { input: tuple path(bwa_fasta), path(bwa_index), + val(unique_sampleId), val(sampleId), path(fastq_PE1), path(fastq_PE2) @@ -21,14 +22,14 @@ process BWAMAPTOOLS__BWA_MEM_PE { path("${sampleId}.bwa.out.fixmate.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + def sampleParams = params.parseConfig(unique_sampleId, params.global, toolParams) processParams = sampleParams.local """ id=\$(zcat ${fastq_PE1} | head -n 1 | cut -f 1-4 -d':' | sed 's/@//') ${toolParams.bwa_version} mem \ -t ${task.cpus} \ -C \ - -R "@RG\\tID:\${id}\\tSM:${sampleId}\\tLB:\${id}"__"${sampleId}\\tPL:ILLUMINA" \ + -R "@RG\\tID:\${id}\\tSM:${unique_sampleId}\\tLB:\${id}"__"${unique_sampleId}\\tPL:ILLUMINA" \ ${bwa_fasta} \ ${fastq_PE1} \ ${fastq_PE2} \ diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 2ee3d644..c1b5c284 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -11,10 +11,17 @@ include { } from './../../src/fastp/processes/adapter_trimming.nf' params(params) // workflow imports: -include { BWA_MAPPING_PE; } from './../../src/bwamaptools/main.nf' params(params) +include { + BWA_MAPPING_PE; + MARK_DUPLICATES; +} from './../../src/bwamaptools/main.nf' params(params) include { BAM_TO_FRAGMENTS; } from './../../src/sinto/main.nf' params(params) include { BAP__BIORAD_DEBARCODE; } from './../../src/bap/workflows/bap_debarcode.nf' params(params) +include { + PICARD__MERGE_SAM_FILES; +} from './../../src/picard/processes/merge_sam_files.nf' params(params) + include { SIMPLE_PUBLISH as PUBLISH_BC_STATS; SIMPLE_PUBLISH as PUBLISH_BR_BC_STATS; @@ -43,11 +50,13 @@ workflow ATAC_PREPROCESS { ) .map { row -> tuple( - row.sample_name, + row.sample_name + "___" + file(row.fastq_PE1_path) + .getSimpleName() + .replaceAll(row.sample_name,""), row.technology, - row.fastq_PE1_path, - row.fastq_barcode_path, - row.fastq_PE2_path + file(row.fastq_PE1_path, checkIfExists: true), + file(row.fastq_barcode_path, checkIfExists: true), + file(row.fastq_PE2_path, checkIfExists: true) ) } .branch { @@ -55,7 +64,6 @@ workflow ATAC_PREPROCESS { standard: true // capture all other technology types here } - /* Barcode correction */ // gather barcode whitelists from params into a channel: wl = Channel.empty() @@ -120,8 +128,32 @@ workflow ATAC_PREPROCESS { } // map with bwa mem: - bam = BWA_MAPPING_PE(fastq_dex_trim.map { it -> tuple(it[0..2]) }, - params.atac_preprocess_tools.mark_duplicates_method) + aligned_bam = BWA_MAPPING_PE( + fastq_dex_trim.map { it -> tuple(it[0].split("___")[0], // [val(unique_sampleId), + *it[0..2] ) // val(sampleId), path(fastq_PE1), path(fastq_PE2)] + }) + + + // split by sample: + aligned_bam.map{ it -> tuple(it[0].split("___")[0], it[1]) } // [ sampleId, bam ] + .groupTuple() + .branch { + to_merge: it[1].size() > 1 + no_merge: it[1].size() == 1 + } + .set { aligned_bam_size_split } + + // merge samples with multiple files: + merged_bam = PICARD__MERGE_SAM_FILES(aligned_bam_size_split.to_merge) + + // re-combine with single files: + merged_bam.mix(aligned_bam_size_split.no_merge + .map { it -> tuple(it[0], *it[1]) } + ) + .set { aligned_bam_sample_merged } + + bam = MARK_DUPLICATES(aligned_bam_sample_merged, + params.atac_preprocess_tools.mark_duplicates_method) // generate a fragments file: fragments = BAM_TO_FRAGMENTS(bam) @@ -132,6 +164,5 @@ workflow ATAC_PREPROCESS { emit: bam fragments - } From 2a0f0dd41d8e65efb2023b82d297e76560e60e62 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 25 May 2021 23:20:48 +0200 Subject: [PATCH 054/202] Update report structure in out directory - Add MarkDuplicates metrics file - Rename reports/ directories --- src/bwamaptools/main.nf | 10 ++++++---- src/picard/processes/mark_duplicates.nf | 3 ++- workflows/atac/preprocess.nf | 10 +++++----- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index 8dd54662..3c79ff4e 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -24,6 +24,7 @@ include { SIMPLE_PUBLISH as PUBLISH_BAM; SIMPLE_PUBLISH as PUBLISH_BAM_INDEX; SIMPLE_PUBLISH as PUBLISH_MAPPING_SUMMARY; + SIMPLE_PUBLISH as PUBLISH_MARKDUPS_METRICS; SIMPLE_PUBLISH as PUBLISH_LIBRARY_METRICS; } from "../utils/processes/utils.nf" params(params) //} from "../utils/workflows/utils.nf" params(params) @@ -93,23 +94,24 @@ workflow MARK_DUPLICATES { switch(mark_duplicates_method) { case 'MarkDuplicates': dup_marked_bam = PICARD__MARK_DUPLICATES_AND_SORT(data) + PUBLISH_MARKDUPS_METRICS(dup_marked_bam.map{it -> tuple(it[0], it[3])}, '.mark_duplicates_metrics.txt', 'reports/mark_duplicates') break case 'MarkDuplicatesSpark': dup_marked_bam = GATK__MARK_DUPLICATES_SPARK(data) break } - MAPPING_SUMMARY(dup_marked_bam) + MAPPING_SUMMARY(dup_marked_bam.map { it -> tuple(it[0..2]) }) PICARD__ESTIMATE_LIBRARY_COMPLEXITY(data) // publish output: PUBLISH_BAM(dup_marked_bam.map{it -> tuple(it[0], it[1])}, '.bwa.out.possorted.bam', 'bam') PUBLISH_BAM_INDEX(dup_marked_bam.map{it -> tuple(it[0], it[2])}, '.bwa.out.possorted.bam.bai', 'bam') - PUBLISH_LIBRARY_METRICS(PICARD__ESTIMATE_LIBRARY_COMPLEXITY.out, '.library_complexity_metrics.txt', 'reports') - PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, '.mapping_stats.tsv', 'bam') + PUBLISH_LIBRARY_METRICS(PICARD__ESTIMATE_LIBRARY_COMPLEXITY.out, '.library_complexity_metrics.txt', 'reports/mark_duplicates') + PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, '.mapping_stats.tsv', 'reports/mapping_stats') emit: - dup_marked_bam + dup_marked_bam.map { it -> tuple(it[0..2]) } } diff --git a/src/picard/processes/mark_duplicates.nf b/src/picard/processes/mark_duplicates.nf index 3f0667bb..766dd4c6 100644 --- a/src/picard/processes/mark_duplicates.nf +++ b/src/picard/processes/mark_duplicates.nf @@ -16,7 +16,8 @@ process PICARD__MARK_DUPLICATES_AND_SORT { output: tuple val(sampleId), path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam"), - path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bai") + path("${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bai"), + path("${sampleId}.bwa.out.fixmate.picard_markdup.metrics.txt") script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams) diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index c1b5c284..6f29635a 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -93,7 +93,7 @@ workflow ATAC_PREPROCESS { // run barcode correction against a whitelist: fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl.map{ it -> tuple(it[0], it[3], it[5]) } ) - PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[2]) }, '.corrected.bc_stats.log', 'fastq') + PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[2]) }, '.corrected.bc_stats.log', 'reports/barcode') // run barcode demultiplexing on each read+barcode: @@ -108,7 +108,7 @@ workflow ATAC_PREPROCESS { //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) // using singlecelltoolkit: fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports') + PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports/barcode') // concatenate the read channels: @@ -118,12 +118,12 @@ workflow ATAC_PREPROCESS { switch(params.atac_preprocess_tools.adapter_trimming_method) { case 'Trim_Galore': fastq_dex_trim = TRIMGALORE__TRIM(fastq_dex); - PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, '.R1.trimming_report.txt', 'reports'); - PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, '.R2.trimming_report.txt', 'reports'); + PUBLISH_FASTQS_TRIMLOG_PE1(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, '.R1.trimming_report.txt', 'reports/trim'); + PUBLISH_FASTQS_TRIMLOG_PE2(fastq_dex_trim.map{ it -> tuple(it[0], it[4]) }, '.R2.trimming_report.txt', 'reports/trim'); break; case 'fastp': fastq_dex_trim = FASTP__TRIM(fastq_dex); - PUBLISH_FASTQS_TRIMLOG_FASTP(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, '.fastp.trimming_report.html', 'reports'); + PUBLISH_FASTQS_TRIMLOG_FASTP(fastq_dex_trim.map{ it -> tuple(it[0], it[3]) }, '.fastp.trimming_report.html', 'reports/trim'); break; } From cd319603e7a33480f53d5fa42d0eb2f4e130a49c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 26 May 2021 11:44:04 +0200 Subject: [PATCH 055/202] Update documentation for scATAC-seq preprocessing --- docs/scatac-seq.rst | 247 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 201 insertions(+), 46 deletions(-) diff --git a/docs/scatac-seq.rst b/docs/scatac-seq.rst index efe75894..a2b418de 100644 --- a/docs/scatac-seq.rst +++ b/docs/scatac-seq.rst @@ -3,27 +3,41 @@ scATAC-seq Preprocessing This pipeline takes fastq files from paired end single cell ATAC-seq, and applies preprocessing steps to align the reads to a reference genome, and produce a bam file and scATAC-seq fragments file. + +Pipeline Steps +************** + The full steps are: - Barcode correction: - * For 'standard' and 'multiome' samples (e.g. 10x Genomics) correction is performed against a whitelist by `this script `_. + * For 'standard' and 'multiome' samples (e.g. 10x Genomics or similar) correction is performed against a whitelist by + `this method `_ + from `aertslab/single_cell_toolkit `_. * For 'biorad' samples, barcode correction is performed by `BAP `_. -- Debarcoding: Add the barcode sequence to the beginning of the fastq sequence identifier -- Read/adapter trimming +- Fastq barcoding: Add the barcode sequence to the comment field of the fastq sequence identifier. + Uses methods from `aertslab/single_cell_toolkit `_. +- Read/adapter trimming + (`Trim_Galore `_ + or `fastp `_). - Mapping to a reference genome: - * ``bwa mem`` is used with default parameters. - * Duplicates are marked with ``samtools markdup``. - * Droplet barcodes are included in the BAM file with the ``CR`` tag (by default). No barcode correction is performed. - + * ``bwa mem`` is used with default parameters, with a choice of the original + `bwa mem `_, or `bwa-mem2 `_. +- Mark PCR and optical duplicates (`MarkDuplicates (Picard) `_ + or `MarkDuplicatesSpark (GATK) `_). +- Estimate library complexity with + `EstimateLibraryComplexity (Picard) `_. - A fragments file is created using `Sinto `_. -Input -***** +---- + +Pipeline Input Metadata +*********************** -The input to this pipeline is a (tab-delimited) metadata table with the sample ID, sequencing technology, and locations of the fastq files: +The input to this pipeline is a (tab-delimited) metadata table with the sample ID, sequencing technology, and locations of the fastq files. +Note that the fastq file fields must be full paths; this is not shown here for clarity: .. list-table:: Metadata Table :widths: 10 10 10 10 10 @@ -49,27 +63,139 @@ The input to this pipeline is a (tab-delimited) metadata table with the sample I - sample_3_R1.fastq.gz - - sample_3_R3.fastq.gz + * - sample_4 + - revcomp_wl + - sample_4_R1.fastq.gz + - sample_2_R2.fastq.gz + - sample_4_R3.fastq.gz The columns represent: - ``sample_name`` Sample name for labeling the sample in the pipeline and output files. This can be any arbitrary string. -- ``technology``: This described the barcode correction and processing methods to use for the fastq files. Current options are ``standard``, ``multiome``, or ``biorad``. See below for additional details. +- ``technology``: This controls the barcode correction and processing methods to use for the fastq files. Currently only the ``biorad`` option involves different processing steps. Otherwise, the value in this field (e.g. ``standard``, ``multiome``) controls which barcode whitelist is used for correction. See below for additional details. - ``fastq_PE1_path``: The full path to the fastq file for the first read in a pair. -- ``fastq_barcode_path``: The full path to the fastq file containing the barcodes. This column can be blank/empty depending on the technology setting. +- ``fastq_barcode_path``: The full path to the fastq file containing the barcodes. This column can be blank/empty depending on the technology setting (e.g. ``biorad``). - ``fastq_PE2_path``: The full path to the fastq file for the second read in a pair. + +Fastq input +----------- + +Fastq input for each sample can be given as a single set of files (R1, R2, R3), or it can be multiple files, in the case of samples which have been split across multiple sequencing lanes. +A combination of these cases can be processed together in one metadata file. + +Within the pipeline, all of the reads from each set of files will be considered and labeled as one read group. +Read group names are taken from the first read in the fastq:: + + @A01044:19:HLYKFDRXX:1:2101:4291:1000 1:N:0:ACTCAGAC + +will produce a RG in the bam:: + + @RG ID:A01044:19:HLYKFDRXX:1 SM:sample_1 LB:A01044:19:HLYKFDRXX:1__sample_1 PL:ILLUMINA + + +Single fastq input +__________________ + +In this situation, there is only one set of reads per sample, the metadata file will look very similar to one of the rows from above. +There will be one read group in the final bam file. + + +Split fastq input +_________________ + +In this case, multiple fastq files (in rows) for each sample can be given. +In this example, there are two sets of fastqs for ``sample_1`` that were run on two separate lanes. +Note that the sample ID is the same for both rows: + +.. list-table:: Metadata Table with split fastqs + :widths: 10 10 10 10 10 + :header-rows: 1 + + * - sample_name + - technology + - fastq_PE1_path + - fastq_barcode_path + - fastq_PE2_path + * - sample_1 + - standard + - sample_1_S1_L001_R1_001.fastq.gz + - sample_1_S1_L001_R2_001.fastq.gz + - sample_1_S1_L001_R3_001.fastq.gz + * - sample_1 + - standard + - sample_1_S1_L002_R1_001.fastq.gz + - sample_1_S1_L002_R2_001.fastq.gz + - sample_1_S1_L002_R3_001.fastq.gz + +In this situation, each set of fastqs will be processed separately for the barcode correction, barcode addition to the fastq comment field, adaptor trimming, and mapping steps. +Following mapping, each mapped bam is merged and duplicates are marked using the full data. +Downstream steps are done with the merged data. + + +Generating the metadata file +---------------------------- + +Note that there is an easy way to create the metadata from the file paths for each sample by using the following bash command.: + +.. raw:: html + +
+ metadata generator + +.. code-block:: none + + create_atac_metadata() { + local sample="${1}" + local technology="${2}" + local file_prefix="${3}" + local read_labels="${4}" + if [ "${sample}" == "header" ]; then + echo -e "sample_name\ttechnology\tfastq_PE1_path\tfastq_barcode_path\tfastq_PE2_path" + return 1 + fi + read_labels_arr=(${read_labels//,/ }) + R1=(${file_prefix}*${read_labels_arr[0]}*) + R2=(${file_prefix}*${read_labels_arr[1]}*) + R3=(${file_prefix}*${read_labels_arr[2]}*) + for i in "${!R1[@]}"; do + echo -e "${sample}\t${technology}\t${R1[i]}\t${R2[i]}\t${R3[i]}"; + done + } + +To run use the options: + +#. Sample ID (if this parameter is "header", it will print the metadata header and stop) +#. Technology (e.g. "standard") +#. The "file prefix" full path to your fastq files, matching the common portions of the file names (without any glob ``*`` expansions) +#. The "read labels" to indicate how the files are named and match the remainder of the file names (e.g. "R1,R2,R3", "R1,UMI,R2", etc.) + +.. code-block:: none + + create_atac_metadata header > auto_metadata.tsv + create_atac_metadata sample_1 standard /path/to/sample_1_subset_S R1,R2,R3 >> auto_metadata.tsv + create_atac_metadata sample_2 standard /path/to/sample_2_subset_S R1,R2,R3 >> auto_metadata.tsv + +.. raw:: html + +
+ +---- + Technology -********** +---------- -This controls how both barcode correction and debarcoding is applied to the input fastq files. -Available options are: +The "technology" field in the metadata table controls how technology-specific pipeline steps are applied, as well as which whitelist is used for barcode correction. +Currently the only the ``biorad`` setting uses alternate pipelines processes (to extract and correct the barcode sequence from the two input fastqs). +Except for the ``biorad`` setting, the samples will be processed in the standard pipeline (barcode correction against a whitelist). -``standard`` ------------- +The "technology" field can be set to any string (e.g. ``standard``), but note that the entry in this field must match the barcode label given in the ``params.tools.singlecelltoolkit.barcode_correction.whitelist`` parameter. +Commonly used default settings are: -The ``standard`` setting assumes a typical 10x Genomics style format with two read pair fastqs and a barcode fastq: +``standard`` +____________ -.. code:: none +The ``standard`` setting assumes a typical 10x Genomics style format with two read pair fastqs and a barcode fastq (note here that the barcode correction has already been performed, writing the ``CB`` into the comment of the barcode fastq):: $ zcat sample_1_R1.fastq.gz | head -n 4 @A00311:74:HMLK5DMXX:1:1101:2013:1000 1:N:0:ACTCAGAC @@ -78,7 +204,7 @@ The ``standard`` setting assumes a typical 10x Genomics style format with two re #FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF $ zcat sample_1_R2.fastq.gz | head -n 4 - @A00311:74:HMLK5DMXX:1:1101:2013:1000 2:N:0:ACTCAGAC + @A00311:74:HMLK5DMXX:1:1101:2013:1000 2:N:0:ACTCAGAC CB:Z:CTGTTCGCAAAGCATA CTGTTCGCAAAGCATA + F:FFFFFFFFFFFFFF @@ -89,39 +215,37 @@ The ``standard`` setting assumes a typical 10x Genomics style format with two re + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -The debarcoding step here uses a -`helper script `_ -which transforms this input into two paired fastq files with the barcode integrated into the read name: - -.. code:: none +The barcoding step here uses a +`helper script `_ +from `aertslab/single_cell_toolkit `_ +which transforms this input into two paired fastq files with the barcode information embedded in the fastq comments field:: $ zcat sample_1_dex_R1.fastq.gz | head -n 4 - @CTGTTCGCAAAGCATA:A00311:74:HMLK5DMXX:1:1101:2013:1000 1:N:0:ACTCAGAC + @A00311:74:HMLK5DMXX:1:1101:2013:1000 CR:Z:CTGTTCGCAAAGCATA CY:Z:F:FFFFFFFFFFFFFF CB:Z:CTGTTCGCAAAGCATA NTTGTCTCAGCACCCCCCGACATGGATTCAGGCTGTCTCTTATACACATC + #FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF $ zcat sample_1_dex_R2.fastq.gz | head -n 4 - @CTGTTCGCAAAGCATA:A00311:74:HMLK5DMXX:1:1101:2013:1000 3:N:0:ACTCAGAC + @A00311:74:HMLK5DMXX:1:1101:2013:1000 CR:Z:CTGTTCGCAAAGCATA CY:Z:F:FFFFFFFFFFFFFF CB:Z:CTGTTCGCAAAGCATA CCTGAATCCATGTCGGGGGGTGCTGAGACAAGCTGTCTCTTATACACAT + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -``multiome`` ------------- +``multiome``/alternate +______________________ -The ``multiome`` setting works the same as ``standard`` with the exception of the whitelist used for barcode correction. +The ``multiome`` or alternately-named settings work with the same pipeline steps as ``standard`` with the exception of the whitelist used for barcode correction. The whitelists are supplied in the params file (``params.tools.singlecelltoolkit.barcode_correction.whitelist``). +This can be used to supply alternate whitelists for certain samples, for example if you need to supply a reverse complemented whitelist for samples run in certain sequencing machines. ``biorad`` ----------- +__________ The ``biorad`` setting processes BioRad data using `BAP `_. -This takes input data: - -.. code:: none +This takes input data:: $ zcat sample_2_R1.fastq.gz | head -n 4 @NB551608:167:HNYFJBGXC:1:11101:11281:1033 1:N:0:TAAGGCGA @@ -136,9 +260,7 @@ This takes input data: ##A#################################### -And produces paired fastq files with the barcode integrated into the read name (with a ``_`` delimiter): - -.. code:: none +And produces paired fastq files with the barcode integrated into the read name (with a ``_`` delimiter):: $ zcat sample_2_dex_R1.fastq.gz | head -n 4 @GCGTAGAGGAAGTTTCAGCAA_NB551608:167:HNYFJBGXC:1:11101:11281:1033 1:N:0:TAAGGCGA @@ -156,6 +278,9 @@ And produces paired fastq files with the barcode integrated into the read name ( Running the workflow ******************** +Configuration +------------- + To generate a config file, use the ``atac_preprocess`` profile along with ``docker`` or ``singularity``. Note that the full path to ``vib-singlecell-nf/vsn-pipelines/main_atac.nf`` must be used: @@ -166,27 +291,43 @@ Note that the full path to ``vib-singlecell-nf/vsn-pipelines/main_atac.nf`` must -profile atac_preprocess,singularity \ > atac_preprocess.config + +Parameters +---------- + The ATAC-specific parameters are described here. -The important parameters to change are: +The important parameters to verify are: - ``params.data.atac_preprocess.metadata``: the path to the metadata file. -- ``params.tools.bwamaptools.bwa_fasta``: the path to the bwa reference fasta file. This should be already indexed with ``bwa index``, and the index files located in the same directory as the fasta file. -- ``params.tools.singlecelltoolkit.barcode_correction.whitelist``: Whitelists for barcode correction are supplied here. The whitelists are matched to samples based on the parameter key here ('standard', 'multiome') and the technology field listed for each sample in the metadata file. +- ``params.tools.bwamaptools.bwa_fasta``: the path to the bwa reference fasta file. This should be already indexed with ``bwa index``, and the index files located in the same directory as the fasta file. Note that ``bwa`` and ``bwa-mem2`` use different indexes that are not interchangeable. +- ``params.tools.singlecelltoolkit.barcode_correction.whitelist``: Whitelists for barcode correction are supplied here. The whitelists are matched to samples based on the parameter key here ('standard', 'multiome', etc.) and the technology field listed for each sample in the metadata file. + +Choice of tools +_______________ + +Several steps have options for the choice of method to use. +These options are controlled within ``params.atac_preprocess_tools``. + +- Adapter trimming (``adapter_trimming_method``): Can be either of ``Trim_Galore`` (default), or ``fastp``. +- Duplicate marking (``mark_duplicates_method``): Can be either of ``MarkDuplicates`` (Picard tools, default) or ``MarkDuplicatesSpark`` (GATK). We currently recommend Picard MarkDuplicates because it has the capability to perform barcode-aware marking of PCR duplicates. MarkDuplicatesSpark has the advantage of parallelization, however it requires a large SSD to use for temporary files. -Optional parameters to change: +Additionally: -- Within ``params.tools.bwamaptools.add_barcode_as_tag``: +- Mapping: Use parameter ``params.tools.bwamaptools.bwa_version`` to select either ``bwa`` or ``bwa-mem2``. These should give virtually identical results, however ``bwa-mem2``, while faster, has used more memory in our tests. Note that the index (``bwa_index``) is not interchangeable between the versions. - - ``tag``: controls the naming of the barcode tag added to the bam (``CR`` by default). - - ``delimiter_to_split_qname``: Controls which delimiter to split the bam read name field to get the barcode. By default it uses the regex ``'[:|_]'`` to split on both ``:`` and ``|``. +Optional parameters +___________________ - Within ``params.tools.sinto.fragments``: - - One of (but not both) ``barcodetag`` or ``barcode_regex`` needs to be set to tell Sinto where to find the barcodes in the bam file. The default is to use ``barcodetag`` of ``CR``. + - One of (but not both) ``barcodetag`` or ``barcode_regex`` needs to be set to tell Sinto where to find the barcodes in the bam file. The default is to use ``barcodetag`` of ``CB``. - ``mapq``: Controls quality filtering settings for generating the fragments file. Discards reads with quality score lower than this number (default 30). - - ``temp_dir``: Controls where temp files are stored during fragments processing. For large BAM files, the system default temp location may become full. An alternate temp path can be specified here. Be sure to also include this temp path in the global volume mounts for Docker/Singularity in the config file. + +Execution +--------- + After configuring, the workflow can be run with: .. code:: bash @@ -196,3 +337,17 @@ After configuring, the workflow can be run with: -entry atac_preprocess -resume ---- + +Other considerations +-------------------- + +Temporary directory mapping +___________________________ + +For large BAM files, the system default temp location may become full. +A workaround is to map ``/tmp`` to an alternate path using the volume mount options in Docker or Singularity. For example in the container engine options: + - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/tmp:/path/to/tmp'`` + - Docker run options: ``runOptions = '-i -v /data:/data -v /tmp:/path/to/tmp'`` + + + From 2c4c47dc6664b3c50b2378c31ebb4a10844524ad Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 26 May 2021 12:19:49 +0200 Subject: [PATCH 056/202] Move picard processes to use GATK image - Move relevant process file to /src/gatk - Update picard command parameters - Cleanup configs --- conf/atac/preprocess.config | 1 - src/bwamaptools/main.nf | 5 ++-- src/gatk/gatk.config | 3 ++ .../processes/estimate_library_complexity.nf | 10 +++---- .../processes/mark_duplicates.nf | 28 +++++++++---------- .../processes/merge_sam_files.nf | 18 ++++++------ src/picard/picard.config | 9 ++---- workflows/atac/preprocess.nf | 6 ++-- 8 files changed, 38 insertions(+), 42 deletions(-) rename src/{picard => gatk}/processes/estimate_library_complexity.nf (73%) rename src/{picard => gatk}/processes/mark_duplicates.nf (59%) rename src/{picard => gatk}/processes/merge_sam_files.nf (62%) diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index d41e3f36..18ef667e 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -13,7 +13,6 @@ params { includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' includeConfig './../../src/trimgalore/trimgalore.config' includeConfig './../../src/bwamaptools/bwamaptools.config' -includeConfig './../../src/picard/picard.config' includeConfig './../../src/gatk/gatk.config' includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' includeConfig './../../src/sinto/sinto.config' diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index 3c79ff4e..5189aa65 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -13,10 +13,10 @@ include { } from './processes/mapping_summary.nf' params(params) include { PICARD__MARK_DUPLICATES_AND_SORT; -} from './../../src/picard/processes/mark_duplicates.nf' params(params) +} from './../../src/gatk/processes/mark_duplicates.nf' params(params) include { PICARD__ESTIMATE_LIBRARY_COMPLEXITY; -} from './../../src/picard/processes/estimate_library_complexity.nf' params(params) +} from './../../src/gatk/processes/estimate_library_complexity.nf' params(params) include { GATK__MARK_DUPLICATES_SPARK; } from './../../src/gatk/processes/mark_duplicates_spark.nf' params(params) @@ -27,7 +27,6 @@ include { SIMPLE_PUBLISH as PUBLISH_MARKDUPS_METRICS; SIMPLE_PUBLISH as PUBLISH_LIBRARY_METRICS; } from "../utils/processes/utils.nf" params(params) -//} from "../utils/workflows/utils.nf" params(params) ////////////////////////////////////////////////////// // Define the workflow diff --git a/src/gatk/gatk.config b/src/gatk/gatk.config index c55c1207..5f963bf1 100644 --- a/src/gatk/gatk.config +++ b/src/gatk/gatk.config @@ -3,6 +3,9 @@ params { gatk { container = 'broadinstitute/gatk:4.2.0.0' } + estimate_library_complexity { + barcode_tag = 'CB' + } } } diff --git a/src/picard/processes/estimate_library_complexity.nf b/src/gatk/processes/estimate_library_complexity.nf similarity index 73% rename from src/picard/processes/estimate_library_complexity.nf rename to src/gatk/processes/estimate_library_complexity.nf index 3829f1e4..4cd6419e 100644 --- a/src/picard/processes/estimate_library_complexity.nf +++ b/src/gatk/processes/estimate_library_complexity.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.picard +toolParams = params.tools.gatk process PICARD__ESTIMATE_LIBRARY_COMPLEXITY { @@ -21,10 +21,10 @@ process PICARD__ESTIMATE_LIBRARY_COMPLEXITY { def sampleParams = params.parseConfig(sampleId, params.global, toolParams.estimate_library_complexity) processParams = sampleParams.local """ - java -jar /picard.jar EstimateLibraryComplexity \ - I=${bam} \ - O=${sampleId}.picard_library_complexity_metrics.txt \ - BARCODE_TAG=${processParams.barcode_tag} \ + gatk EstimateLibraryComplexity \ + -I ${bam} \ + -O ${sampleId}.picard_library_complexity_metrics.txt \ + --BARCODE_TAG ${processParams.barcode_tag} \ """ } diff --git a/src/picard/processes/mark_duplicates.nf b/src/gatk/processes/mark_duplicates.nf similarity index 59% rename from src/picard/processes/mark_duplicates.nf rename to src/gatk/processes/mark_duplicates.nf index 766dd4c6..939a0e35 100644 --- a/src/picard/processes/mark_duplicates.nf +++ b/src/gatk/processes/mark_duplicates.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.picard +toolParams = params.tools.gatk process PICARD__MARK_DUPLICATES_AND_SORT { @@ -24,19 +24,19 @@ process PICARD__MARK_DUPLICATES_AND_SORT { processParams = sampleParams.local """ set -euo pipefail - java -jar /picard.jar MarkDuplicates \ - I=${bam} \ - O=/dev/stdout \ - METRICS_FILE=${sampleId}.bwa.out.fixmate.picard_markdup.metrics.txt \ - BARCODE_TAG=CB \ - COMPRESSION_LEVEL=0 \ - QUIET=true \ - ASSUME_SORT_ORDER=queryname \ - | java -jar /picard.jar SortSam \ - I=/dev/stdin \ - O=${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam \ - SORT_ORDER=coordinate \ - CREATE_INDEX=true + gatk MarkDuplicates \ + -I ${bam} \ + -O /dev/stdout \ + --METRICS_FILE ${sampleId}.bwa.out.fixmate.picard_markdup.metrics.txt \ + --BARCODE_TAG CB \ + --COMPRESSION_LEVEL 0 \ + --QUIET true \ + --ASSUME_SORT_ORDER queryname \ + | gatk SortSam \ + -I /dev/stdin \ + -O ${sampleId}.bwa.out.fixmate.picard_markdup.possorted.bam \ + --SORT_ORDER coordinate \ + --CREATE_INDEX true """ } diff --git a/src/picard/processes/merge_sam_files.nf b/src/gatk/processes/merge_sam_files.nf similarity index 62% rename from src/picard/processes/merge_sam_files.nf rename to src/gatk/processes/merge_sam_files.nf index 1a52dd3e..3725db3b 100644 --- a/src/picard/processes/merge_sam_files.nf +++ b/src/gatk/processes/merge_sam_files.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.picard +toolParams = params.tools.gatk -process PICARD__MERGE_SAM_FILES { +process PICARD__MERGE_SAM_FILES_AND_SORT { container toolParams.container label 'compute_resources__default','compute_resources__24hqueue' @@ -21,13 +21,13 @@ process PICARD__MERGE_SAM_FILES { //def sampleParams = params.parseConfig(sampleId, params.global, toolParams) //processParams = sampleParams.local """ - java -jar /picard.jar MergeSamFiles \ - ${"I="+bams.join(" I=")} \ - O=/dev/stdout \ - | java -jar /picard.jar SortSam \ - I=/dev/stdin \ - O=${sampleId}.bwa.out.fixmate.merged.bam \ - SORT_ORDER=queryname + gatk MergeSamFiles \ + ${"-I "+bams.join(" -I ")} \ + -O /dev/stdout \ + | gatk SortSam \ + -I /dev/stdin \ + -O ${sampleId}.bwa.out.fixmate.merged.bam \ + --SORT_ORDER queryname """ } diff --git a/src/picard/picard.config b/src/picard/picard.config index ea7d3958..0213144f 100644 --- a/src/picard/picard.config +++ b/src/picard/picard.config @@ -1,11 +1,6 @@ params { - tools { - picard { - container = 'vibsinglecellnf/picard:2.21.1' - estimate_library_complexity { - barcode_tag = 'CB' - } - } + picard { + container = 'vibsinglecellnf/picard:2.21.1' } } diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 6f29635a..38986495 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -19,8 +19,8 @@ include { BAM_TO_FRAGMENTS; } from './../../src/sinto/main.nf' params(params) include { BAP__BIORAD_DEBARCODE; } from './../../src/bap/workflows/bap_debarcode.nf' params(params) include { - PICARD__MERGE_SAM_FILES; -} from './../../src/picard/processes/merge_sam_files.nf' params(params) + PICARD__MERGE_SAM_FILES_AND_SORT; +} from './../../src/gatk/processes/merge_sam_files.nf' params(params) include { SIMPLE_PUBLISH as PUBLISH_BC_STATS; @@ -144,7 +144,7 @@ workflow ATAC_PREPROCESS { .set { aligned_bam_size_split } // merge samples with multiple files: - merged_bam = PICARD__MERGE_SAM_FILES(aligned_bam_size_split.to_merge) + merged_bam = PICARD__MERGE_SAM_FILES_AND_SORT(aligned_bam_size_split.to_merge) // re-combine with single files: merged_bam.mix(aligned_bam_size_split.no_merge From 8b7c04f4602324888b6ef657d11cd6bb73cab373 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 27 May 2021 10:08:54 +0200 Subject: [PATCH 057/202] Fix gatk config scoping --- src/gatk/gatk.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gatk/gatk.config b/src/gatk/gatk.config index 5f963bf1..339ae2a4 100644 --- a/src/gatk/gatk.config +++ b/src/gatk/gatk.config @@ -2,9 +2,9 @@ params { tools { gatk { container = 'broadinstitute/gatk:4.2.0.0' - } - estimate_library_complexity { - barcode_tag = 'CB' + estimate_library_complexity { + barcode_tag = 'CB' + } } } } From 04f0389fb482683edd0362ddddca3814cd5050d2 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 27 May 2021 10:13:08 +0200 Subject: [PATCH 058/202] qc workflow updates for combined samples in a single process --- src/pycistopic/bin/compute_qc_stats.py | 14 ++++++++++++++ workflows/atac/qc_filtering.nf | 3 +++ 2 files changed, 17 insertions(+) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index d2df19df..7de38ca4 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -12,22 +12,32 @@ parser = argparse.ArgumentParser(description='Compute QC stats') +parser.add_argument( + "--input_files", + type=str, + required=True, + nargs='+', + help='Input files in the form of [SampleId, path_to_fragments, path_to_peaks]. Multiple inputs are possible.' +) parser.add_argument( "--sampleId", type=str, required=True, + nargs='+', help='Sample ID.' ) parser.add_argument( "--fragments", type=str, required=True, + nargs='+', help='Input fragments file.' ) parser.add_argument( "--regions", type=str, required=True, + nargs='+', help='Path to regions file.' ) parser.add_argument( @@ -74,6 +84,10 @@ annot = pickle.load(infile) infile.close() + +print(args.input_files) + + fragments_dict = { args.sampleId: args.fragments } diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 9a29b30d..2eadc62d 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -42,6 +42,9 @@ workflow ATAC_QC_PREFILTER { data_split.fragments.join(peaks) .map { it -> tuple(it[0], it[1], it[2], it[4]) } .set{ fragpeaks } + fragpeaks.map { it -> tuple(it[0], it[1], it[3]) } + .collectFile(name: 'input_files.txt') + .view() qc_stats = SC__PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, biomart) PUBLISH_METADATA(qc_stats.map { it -> tuple(it[0], it[1]) }, 'metadata.tsv', 'gz', 'pycistopic', false) From 5d4c4f66ca875aa1175b3228f4a5650e954aeea3 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 27 May 2021 11:13:57 +0200 Subject: [PATCH 059/202] Update Dockerfile to use samtools zlibng image - Add bwa-mem2 - #325 --- src/bwamaptools/Dockerfile | 46 ++++------------------------- src/bwamaptools/Dockerfile.samtools | 32 -------------------- 2 files changed, 5 insertions(+), 73 deletions(-) delete mode 100644 src/bwamaptools/Dockerfile.samtools diff --git a/src/bwamaptools/Dockerfile b/src/bwamaptools/Dockerfile index 334ceda7..0d4327e7 100644 --- a/src/bwamaptools/Dockerfile +++ b/src/bwamaptools/Dockerfile @@ -1,38 +1,6 @@ -FROM debian:buster-slim +FROM vibsinglecellnf/samtools:1.12 ENV DEBIAN_FRONTEND=noninteractive -RUN BUILDPKGS="build-essential \ - autoconf cmake git \ - automake make gcc perl bedtools \ - libbz2-dev \ - libcurl4-openssl-dev \ - libssl-dev \ - zlib1g-dev \ - liblzma-dev \ - curl \ - libncurses5-dev"&& \ - apt-get update && \ - apt-get install -y --no-install-recommends apt-utils debconf locales && dpkg-reconfigure locales && \ - apt-get install -y --reinstall ca-certificates && \ - apt-get install -y --no-install-recommends $BUILDPKGS - -# Install htslib -RUN git clone --recurse-submodules https://github.com/samtools/htslib.git && \ - cd htslib && \ - autoheader && \ - autoconf && \ - ./configure --prefix=/usr/local/ && \ - make && \ - make install - -# Install SAMtools -RUN git clone https://github.com/samtools/samtools.git && \ - cd samtools && \ - autoheader && \ - autoconf -Wno-syntax && \ - ./configure --prefix=/usr/local/ && \ - make && \ - make install # install bwa RUN git clone https://github.com/lh3/bwa.git && \ @@ -41,16 +9,12 @@ RUN git clone https://github.com/lh3/bwa.git && \ mv /bwa/bwa /usr/local/bin/ # install bwa-mem2 -ENV BWAMEM2_VER 2.1 +ENV BWAMEM2_VER 2.2.1 RUN cd /tmp && \ curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v${BWAMEM2_VER}/bwa-mem2-${BWAMEM2_VER}_x64-linux.tar.bz2 | tar jxf - && \ mv bwa-mem2-${BWAMEM2_VER}_x64-linux/bwa-mem2* /usr/local/bin -RUN apt-get -y update && \ - apt-get -y --no-install-recommends install \ - # Need to run ps - procps \ - less && \ - rm -rf /var/cache/apt/* && \ - rm -rf /var/lib/apt/lists/* +RUN rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* && \ + ldconfig diff --git a/src/bwamaptools/Dockerfile.samtools b/src/bwamaptools/Dockerfile.samtools deleted file mode 100644 index 0f707642..00000000 --- a/src/bwamaptools/Dockerfile.samtools +++ /dev/null @@ -1,32 +0,0 @@ -FROM alpine:3.12.1 - -ENV SAMTOOLS_VERSION 1.11 -ENV HTSLIB_VERSION 1.11 - -RUN apk update && \ - apk add --update autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev ncurses-dev procps mawk && \ - apk add --virtual build-dependencies curl git - -RUN curl -L -o /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 \ - https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 && \ - mkdir -p /tmp/htslib-${HTSLIB_VERSION} && \ - tar jxvf /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 -C /tmp/htslib-${HTSLIB_VERSION} --strip-components 1 && \ - cd /tmp/htslib-${HTSLIB_VERSION} && \ - ./configure && \ - make && \ - make install && \ - cd .. && rm -rf htslib-${HTSLIB_VERSION} - -RUN curl -L -o /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ - https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 && \ - mkdir -p /tmp/samtools-${SAMTOOLS_VERSION} && \ - tar jxvf /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 -C /tmp/samtools-${SAMTOOLS_VERSION} --strip-components 1 && \ - cd /tmp/samtools-${SAMTOOLS_VERSION} && \ - ./configure && \ - make && \ - make install && \ - cd .. && rm -rf samtools-${SAMTOOLS_VERSION} - -RUN apk del build-dependencies && \ - rm -rf /var/cache/apk/* - From 8fdcefd0be566e8559f5bcd581d465b7d73a0bc4 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 27 May 2021 11:25:57 +0200 Subject: [PATCH 060/202] Update default container - bwamaptools - singlecelltoolkit --- src/bwamaptools/bwamaptools.config | 2 +- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index 38232fd2..6053476d 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,7 +1,7 @@ params { tools { bwamaptools { - container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.1' + container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.1-zlibng' } } } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 05338e58..81b6a0a6 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-03-03-21d4f50' + container = 'vibsinglecellnf/singlecelltoolkit:2021-04-01-183902e' barcode_correction { whitelist { standard = '' From a6cbf42659c7b96e72b596f181c4200553dffc06 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 4 Jun 2021 16:02:16 +0200 Subject: [PATCH 061/202] Add fastp settings to the preprocess config --- conf/atac/preprocess.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index 18ef667e..2acc38b7 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -12,6 +12,7 @@ params { includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' includeConfig './../../src/trimgalore/trimgalore.config' +includeConfig './../../src/fastp/fastp.config' includeConfig './../../src/bwamaptools/bwamaptools.config' includeConfig './../../src/gatk/gatk.config' includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' From 37c9ea3465a25e03dc20b440308f00da013b245c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 4 Jun 2021 16:02:40 +0200 Subject: [PATCH 062/202] Update bwamaptools docker image setting --- src/bwamaptools/bwamaptools.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index 6053476d..e51480fc 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,7 +1,7 @@ params { tools { bwamaptools { - container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.1-zlibng' + container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.1.1-zlibng' } } } From b69c51a858ea9923eb2e106e3bc134171e7a3144 Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Mon, 7 Jun 2021 14:44:46 +0200 Subject: [PATCH 063/202] Cleanup bash code of mapping_summary.sh script. Cleanup bash code of mapping_summary.sh script: - Use double quotes around variable expansions. - Add Usage information if wrong number of arguments is given. - Cleanup comments. --- src/bwamaptools/bin/mapping_summary.sh | 31 +++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/bwamaptools/bin/mapping_summary.sh b/src/bwamaptools/bin/mapping_summary.sh index 08f17d72..6ea92af9 100755 --- a/src/bwamaptools/bin/mapping_summary.sh +++ b/src/bwamaptools/bin/mapping_summary.sh @@ -1,22 +1,27 @@ #!/usr/bin/env bash -sampleId="$1" -bam="$2" +sampleId="${1}"; +bam="${2}"; +if [ ${#@} -ne 2 ] ; then + printf 'Usage: mapping_summary.sh sampleId bam_file\n' >&2; + exit 1; +fi -# run samtools stat: -samtools stat ${bam} > ${sampleId}.stat -# uniquely mapped reads (BWA): -UMR=$(samtools view -F 0x4 -F 0x100 -F 0x800 ${bam} | grep -v -e 'XA:Z:' -e 'SA:Z:' | wc -l) +# Run samtools stat: +samtools stat "${bam}" > "${sampleId}.stat" -# Fraction of total read pairs mapped confidently to genome (>30 mapq) -CMR=$(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 ${bam}) +# Uniquely mapped reads (BWA): +UMR=$(samtools view -F 0x4 -F 0x100 -F 0x800 "${bam}" | grep -v -e 'XA:Z:' -e 'SA:Z:' | wc -l); +# Fraction of total read pairs mapped confidently to genome (>30 mapq): +CMR=$(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"); -# output file: -printf "\t${sampleId}\n" > ${sampleId}.mapping_stats.tsv -grep ^SN ${sampleId}.stat | cut -f 2,3 >> ${sampleId}.mapping_stats.tsv -printf "Uniquely mapped reads:\t${UMR}\n" >> ${sampleId}.mapping_stats.tsv -printf "Reads mapped with MAPQ>30:\t${CMR}\n" >> ${sampleId}.mapping_stats.tsv + +# Output file: +printf "\t${sampleId}\n" > "${sampleId}.mapping_stats.tsv"; +grep '^SN' "${sampleId}.stat" | cut -f 2,3 >> "${sampleId}.mapping_stats.tsv"; +printf "Uniquely mapped reads:\t${UMR}\n" >> "${sampleId}.mapping_stats.tsv"; +printf "Reads mapped with MAPQ>30:\t${CMR}\n" >> "${sampleId}.mapping_stats.tsv"; From 67600f64c2f9b427967578e34f1afe026e7cecb7 Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Mon, 7 Jun 2021 14:52:34 +0200 Subject: [PATCH 064/202] Use 2 threads for samtools view when calculating statistics in mapping_summary.sh. Use 2 threads for samtools view when calculating statistics in mapping_summary.sh. --- src/bwamaptools/bin/mapping_summary.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bwamaptools/bin/mapping_summary.sh b/src/bwamaptools/bin/mapping_summary.sh index 6ea92af9..9fb567d7 100755 --- a/src/bwamaptools/bin/mapping_summary.sh +++ b/src/bwamaptools/bin/mapping_summary.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +nbr_threads=2; + sampleId="${1}"; bam="${2}"; @@ -10,13 +12,13 @@ fi # Run samtools stat: -samtools stat "${bam}" > "${sampleId}.stat" +samtools stat -@ "${nbr_threads}" "${bam}" > "${sampleId}.stat" # Uniquely mapped reads (BWA): -UMR=$(samtools view -F 0x4 -F 0x100 -F 0x800 "${bam}" | grep -v -e 'XA:Z:' -e 'SA:Z:' | wc -l); +UMR=$(samtools view -@ "${nbr_threads}" -F 0x4 -F 0x100 -F 0x800 "${bam}" | grep -v -e 'XA:Z:' -e 'SA:Z:' | wc -l); # Fraction of total read pairs mapped confidently to genome (>30 mapq): -CMR=$(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"); +CMR=$(samtools view -@ "${nbr_threads}" -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"); # Output file: From 2528381ac840dbf603deee4b1d980a72a689e4ee Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Mon, 7 Jun 2021 15:24:22 +0200 Subject: [PATCH 065/202] Filter out Calculate number of "Uniquely mapped reads" directly with SAMtools view. Filter out Calculate number of "Uniquely mapped reads" directly with SAMtools view as from SAMtools v1.12 filter expression syntax is supported (-e option). This allows counting reads with SAMtools view too, as there is no need anymore for grep filtering and counting of reads afterwards. This gives a small speedup, but more importantly brings CPU usage down a bit. --- src/bwamaptools/bin/mapping_summary.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bwamaptools/bin/mapping_summary.sh b/src/bwamaptools/bin/mapping_summary.sh index 9fb567d7..f5716d8b 100755 --- a/src/bwamaptools/bin/mapping_summary.sh +++ b/src/bwamaptools/bin/mapping_summary.sh @@ -15,7 +15,7 @@ fi samtools stat -@ "${nbr_threads}" "${bam}" > "${sampleId}.stat" # Uniquely mapped reads (BWA): -UMR=$(samtools view -@ "${nbr_threads}" -F 0x4 -F 0x100 -F 0x800 "${bam}" | grep -v -e 'XA:Z:' -e 'SA:Z:' | wc -l); +UMR=$(samtools view -@ "${nbr_threads}" -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}"); # Fraction of total read pairs mapped confidently to genome (>30 mapq): CMR=$(samtools view -@ "${nbr_threads}" -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"); From 0f83400a4ed6eedd7648cbd2142883e305b3c375 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 7 Jun 2021 15:42:29 +0200 Subject: [PATCH 066/202] Fix fastp parameter scope --- src/fastp/fastp.config | 14 ++++++++------ src/fastp/processes/clean_and_fastqc.nf | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/fastp/fastp.config b/src/fastp/fastp.config index 1b518930..aac86685 100644 --- a/src/fastp/fastp.config +++ b/src/fastp/fastp.config @@ -1,11 +1,13 @@ params { - fastp { - container = 'vibsinglecellnf/fastp:0.20.0' - thread = 1 + tools { + fastp { + container = 'vibsinglecellnf/fastp:0.20.0' + thread = 1 - clean_and_fastqc { - length_required = 20 - adapter_fasta = "$baseDir/src/fastp/assets/fastp.adapters" + clean_and_fastqc { + length_required = 20 + adapter_fasta = "$baseDir/src/fastp/assets/fastp.adapters" + } } } } diff --git a/src/fastp/processes/clean_and_fastqc.nf b/src/fastp/processes/clean_and_fastqc.nf index 3550b84c..045c1d19 100644 --- a/src/fastp/processes/clean_and_fastqc.nf +++ b/src/fastp/processes/clean_and_fastqc.nf @@ -5,7 +5,7 @@ nextflow.enable.dsl=2 */ process FASTP__CLEAN_AND_FASTQC { - container params.fastp.container + container params.tools.fastp.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -17,7 +17,7 @@ process FASTP__CLEAN_AND_FASTQC { tuple file('*_fastp.{json,html}'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.fastp) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.fastp) processParams = sampleParams.local """ fastp --thread ${processParams.thread} \ From 26a52aba4c64732e853757dd16862aef6eb1b6f9 Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Mon, 7 Jun 2021 17:15:03 +0200 Subject: [PATCH 067/202] Uncompress BAM file only once when calculating all mapping statistics. By uncompressing the BAM file only once and piping the uncompressed output to tee, which pipes it to 3 samtools commands that calculate some mapping statistics, the running time goes down with 40% (while the previous version was already a 40% improvement compared with the original implementation). Timing: - original version: 453 seconds - previous version (with samtools commands running with 2 threads): 279 seconds - current version: 172 seconds --- src/bwamaptools/bin/mapping_summary.sh | 37 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/bwamaptools/bin/mapping_summary.sh b/src/bwamaptools/bin/mapping_summary.sh index f5716d8b..59be0cf0 100755 --- a/src/bwamaptools/bin/mapping_summary.sh +++ b/src/bwamaptools/bin/mapping_summary.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -nbr_threads=2; - sampleId="${1}"; bam="${2}"; @@ -11,19 +9,34 @@ if [ ${#@} -ne 2 ] ; then fi -# Run samtools stat: -samtools stat -@ "${nbr_threads}" "${bam}" > "${sampleId}.stat" - -# Uniquely mapped reads (BWA): -UMR=$(samtools view -@ "${nbr_threads}" -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}"); - -# Fraction of total read pairs mapped confidently to genome (>30 mapq): -CMR=$(samtools view -@ "${nbr_threads}" -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"); +# Get mapping statistics from BAM file: +# - Read BAM file and write uncompressed BAM. +# - Uncompressed BAM file is written to each samtools command with tee (writes to each specified file and stdout). +# - samtools commands: +# - Get samtools statistics with: +# samtools stat "${bam}" > "${sampleId}.stat" +# - Uniquely mapped reads (BWA): +# samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}" +# - Fraction of total read pairs mapped confidently to genome (>30 mapq): +# samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}" +# - Only use threads for "samtools stat". Using it with any of the other samtools commands +# makes everything slower than not using any threads at all. +samtools view -u "${bam}" \ + | tee \ + >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' - > "${sampleId}.uniquely_mapped_reads.txt") \ + >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 - > "${sampleId}.fraction_total_read_pairs.txt") \ + | samtools stat -@ 2 - > "${sampleId}.stat" # Output file: printf "\t${sampleId}\n" > "${sampleId}.mapping_stats.tsv"; + grep '^SN' "${sampleId}.stat" | cut -f 2,3 >> "${sampleId}.mapping_stats.tsv"; -printf "Uniquely mapped reads:\t${UMR}\n" >> "${sampleId}.mapping_stats.tsv"; -printf "Reads mapped with MAPQ>30:\t${CMR}\n" >> "${sampleId}.mapping_stats.tsv"; +printf "Uniquely mapped reads:\t" >> "${sampleId}.mapping_stats.tsv"; +cat "${sampleId}.uniquely_mapped_reads.txt" >> "${sampleId}.mapping_stats.tsv"; + +printf "Reads mapped with MAPQ>30:\t" >> "${sampleId}.mapping_stats.tsv"; +cat "${sampleId}.fraction_total_read_pairs.txt" >> "${sampleId}.mapping_stats.tsv"; + +rm "${sampleId}.uniquely_mapped_reads.txt" "${sampleId}.fraction_total_read_pairs.txt"; From 3e02f5c5b679b1274c911ab8a05719f817abca7a Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 8 Jun 2021 13:43:14 +0200 Subject: [PATCH 068/202] Remove when condition from fastp process --- src/fastp/processes/adapter_trimming.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/fastp/processes/adapter_trimming.nf b/src/fastp/processes/adapter_trimming.nf index 3a997ddc..a65191d9 100644 --- a/src/fastp/processes/adapter_trimming.nf +++ b/src/fastp/processes/adapter_trimming.nf @@ -9,9 +9,6 @@ process FASTP__ADAPTER_TRIMMING { container toolParams.container label 'compute_resources__cpu','compute_resources__24hqueue' - when: - params.atac_preprocess_tools.adapter_trimming_method == 'fastp' - input: tuple val(sampleId), path(fastq_PE1), From 8376131ea5ccd603a35005fe95d2e9a60b87ea62 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 8 Jun 2021 13:43:35 +0200 Subject: [PATCH 069/202] Update sinto container --- src/sinto/sinto.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sinto/sinto.config b/src/sinto/sinto.config index 39d942d8..f4ca0302 100644 --- a/src/sinto/sinto.config +++ b/src/sinto/sinto.config @@ -1,7 +1,7 @@ params { tools { sinto { - container = 'vibsinglecellnf/sinto:0.7.2' + container = 'vibsinglecellnf/sinto:0.7.3.1' fragments { min_mapq = 30 barcodetag = 'CB' From 13724c23d011467fc7cfc9b7a43aa9027446482e Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 8 Jun 2021 13:46:51 +0200 Subject: [PATCH 070/202] Fix bwamaptools container link --- src/bwamaptools/bwamaptools.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index e51480fc..1bd0d2cb 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,7 +1,7 @@ params { tools { bwamaptools { - container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.1.1-zlibng' + container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng' } } } From 9b6ae125de01a359a9c3ba1a19182cc5c3b41f50 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 9 Jun 2021 11:30:51 +0200 Subject: [PATCH 071/202] Update singlecelltoolkit: - New Docker image includes seqc 0.10.1, and igzip for faster file decompression - New version of singlecelltoolkit with rewrites for new seqc and HyDrop barcode extraction script --- src/singlecelltoolkit/Dockerfile | 16 +++++++++++++++- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index 1934ae3b..7aed81ab 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -5,9 +5,22 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ python3 \ python3-venv \ + nasm \ + libtool \ wget && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.7 100 +# install igzip (https://github.com/intel/isa-l) +RUN git clone --depth=1 https://github.com/intel/isa-l.git /tmp/isa-l && \ + cd /tmp/isa-l && \ + ./autogen.sh && \ + ./configure && \ + make && \ + make install && \ + cd .. && \ + rm -r isa-l + + RUN python -m venv /opt/venv # Make sure we use the virtualenv: ENV PATH="/opt/venv/bin:$PATH" @@ -22,8 +35,9 @@ RUN pip install --no-cache-dir --upgrade pip wheel && \ matplotlib \ numpy + # install seq (https://github.com/seq-lang/seq/): -ENV SEQ_VERSION=0.9.11 +ENV SEQ_VERSION=0.10.1 RUN mkdir -p /opt/seq && \ wget https://github.com/seq-lang/seq/releases/download/v${SEQ_VERSION}/seq-linux-x86_64.tar.gz && \ tar xzf seq-linux-x86_64.tar.gz --strip-components 1 -C /opt/seq && \ diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 81b6a0a6..f8c50f19 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-04-01-183902e' + container = 'vibsinglecellnf/singlecelltoolkit:2021-06-07-959e326' barcode_correction { whitelist { standard = '' From b3a6e74173206f48ae66738368835226d6f6eefc Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 9 Jun 2021 11:32:51 +0200 Subject: [PATCH 072/202] Add hydrop mode for atac_preprocess - Barcode extraction for HyDrop ATAC is now performed automatically (using singlecelltoolkit scripts) when the technology field is set to 'hydrop' in the metadata --- .../processes/extract_hydrop_atac_barcode.nf | 36 +++++++++++++++++++ workflows/atac/preprocess.nf | 13 +++++-- 2 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf diff --git a/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf b/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf new file mode 100644 index 00000000..794a2e63 --- /dev/null +++ b/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf @@ -0,0 +1,36 @@ +nextflow.enable.dsl=2 + +//binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" + +toolParams = params.tools.singlecelltoolkit + +process SCTK__EXTRACT_HYDROP_ATAC_BARCODE { + + container toolParams.container + label 'compute_resources__default' + + input: + tuple val(sampleId), + val(technology), + path(fastq_R1), + path(fastq_R2), + path(fastq_R3) + + output: + tuple val(sampleId), + val(technology), + path(fastq_R1), + path("${sampleId}_hydrop_barcode_R2.fastq.gz"), + path(fastq_R3) + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + //processParams = sampleParams.local + """ + extract_hydrop_atac_barcode_from_R2_fastq.sh \ + ${fastq_R2} \ + ${sampleId}_hydrop_barcode_R2.fastq.gz \ + igzip + """ +} + diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 38986495..f539d65b 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -5,6 +5,7 @@ nextflow.enable.dsl=2 include { SCTK__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './../../src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf' params(params) +include { SCTK__EXTRACT_HYDROP_ATAC_BARCODE; } from './../../src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf' params(params) include { TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) include { FASTP__ADAPTER_TRIMMING as FASTP__TRIM; @@ -61,9 +62,15 @@ workflow ATAC_PREPROCESS { } .branch { biorad: it[1] == 'biorad' + hydrop: it[1] == 'hydrop' standard: true // capture all other technology types here } + /* extract HyDrop ATAC barcode & combine with data.standard */ + data_combined = data.standard.mix( + SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop) + ) + /* Barcode correction */ // gather barcode whitelists from params into a channel: wl = Channel.empty() @@ -81,11 +88,11 @@ workflow ATAC_PREPROCESS { } // run barcode demultiplexing on each read+barcode: fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( - data.standard.map { it -> tuple(it[0], it[2], it[3], it[4]) } + data_combined.map { it -> tuple(it[0], it[2], it[3], it[4]) } ) } else { // join wl to the data channel: - data_wl = wl.cross( data.standard.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3 + data_wl = wl.cross( data_combined.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3 .map { it -> tuple(it[1][1], it[1][0], // sampleId, technology it[1][2], it[1][3], it[1][4], // R1, R2, R3 it[0][1] // whitelist @@ -98,7 +105,7 @@ workflow ATAC_PREPROCESS { // run barcode demultiplexing on each read+barcode: fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( - data.standard.join(fastq_bc_corrected).map { it -> tuple(it[0], it[2], it[5], it[4]) } + data_combined.join(fastq_bc_corrected).map { it -> tuple(it[0], it[2], it[5], it[4]) } ) } From 9f4919d5665195d31ea96a27282ea4a6b04d74b9 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 9 Jun 2021 14:45:28 +0200 Subject: [PATCH 073/202] Rename extract_hydrop_atac_barcode in/out - use pigz for compression of output --- .../processes/extract_hydrop_atac_barcode.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf b/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf index 794a2e63..fdad0d78 100644 --- a/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf +++ b/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf @@ -12,25 +12,25 @@ process SCTK__EXTRACT_HYDROP_ATAC_BARCODE { input: tuple val(sampleId), val(technology), - path(fastq_R1), - path(fastq_R2), - path(fastq_R3) + path(fastq_PE1), + path(fastq_bc), + path(fastq_PE2) output: tuple val(sampleId), val(technology), - path(fastq_R1), + path(fastq_PE1), path("${sampleId}_hydrop_barcode_R2.fastq.gz"), - path(fastq_R3) + path(fastq_PE2) script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams) //processParams = sampleParams.local """ extract_hydrop_atac_barcode_from_R2_fastq.sh \ - ${fastq_R2} \ + ${fastq_bc} \ ${sampleId}_hydrop_barcode_R2.fastq.gz \ - igzip + pigz """ } From ddacfcf7c9ef412d9ab7c5b59338eae7c24a828e Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 9 Jun 2021 21:25:24 +0200 Subject: [PATCH 074/202] Refactor singlecelltoolkit barcode correction: - move subworkflows to sctk/main.nf - standardize input/output from barcode correction processes (use [sampleId, technology PE1, bc, PE2]; reduces remapping) --- src/singlecelltoolkit/main.nf | 85 +++++++++++++++++++ .../processes/barcode_10x_scatac_fastqs.nf | 1 + .../processes/barcode_correction.nf | 6 ++ .../extract_and_correct_biorad_barcode.nf | 1 + 4 files changed, 93 insertions(+) create mode 100644 src/singlecelltoolkit/main.nf diff --git a/src/singlecelltoolkit/main.nf b/src/singlecelltoolkit/main.nf new file mode 100644 index 00000000..97877cd0 --- /dev/null +++ b/src/singlecelltoolkit/main.nf @@ -0,0 +1,85 @@ +nextflow.enable.dsl=2 + +////////////////////////////////////////////////////// +// process imports: +include { SCTK__BARCODE_CORRECTION; } from './processes/barcode_correction.nf' +include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './processes/barcode_10x_scatac_fastqs.nf' +include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './processes/extract_and_correct_biorad_barcode.nf' +include { BAP__BIORAD_DEBARCODE; } from './../bap/workflows/bap_debarcode.nf' + +include { + SIMPLE_PUBLISH as PUBLISH_BC_STATS; + SIMPLE_PUBLISH as PUBLISH_BR_BC_STATS; +} from '../../src/utils/processes/utils.nf' + +////////////////////////////////////////////////////// +// Define the workflow + + +/* Barcode correction */ +workflow barcode_correction { + take: + data + + main: + + // gather barcode whitelists from params into a channel: + wl = Channel.empty() + wl_cnt = 0 + params.tools.singlecelltoolkit.barcode_correction.whitelist.each { k, v -> + if(v != '') { + wl = wl.mix( Channel.of(tuple(k, file(v)) )) + wl_cnt = wl_cnt + 1 + } + } + + /* TO DO: fix ability to skip barcode correction */ + if(wl_cnt == 0) { + if(!params.containsKey('quiet')) { + println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.") + } + // run barcode demultiplexing on each read+barcode: + fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ(data) + } else { + // join wl to the data channel: + data_wl = wl.cross( data.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3 + .map { it -> tuple(it[1][1], it[1][0], // sampleId, technology + it[1][2], it[1][3], it[1][4], // R1, R2, R3 + it[0][1] // whitelist + ) } + + // run barcode correction against a whitelist: + fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl) + PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[5]) }, '.corrected.bc_stats.log', 'reports/barcode') + + // run barcode demultiplexing on each read+barcode: + fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( + fastq_bc_corrected.map { it -> tuple(*it[0..4]) } + ) + } + + emit: + fastq_dex +} + + +workflow biorad_bc { + + take: + data_biorad + + main: + + /* run BioRad barcode correction and debarcoding separately: */ + // using BAP: + //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) + + // using singlecelltoolkit: + fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data_biorad.map{ it -> tuple(it[0], it[1], it[2], it[4]) }) + PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports/barcode') + + emit: + fastq_dex_br.map { it -> tuple(*it[0..2]) } + +} + diff --git a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf index 57675bcc..150b244f 100644 --- a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf @@ -11,6 +11,7 @@ process SCTK__BARCODE_10X_SCATAC_FASTQ { input: tuple val(sampleId), + val(technology), path(fastq_PE1), path(fastq_bc), path(fastq_PE2) diff --git a/src/singlecelltoolkit/processes/barcode_correction.nf b/src/singlecelltoolkit/processes/barcode_correction.nf index 6524c6a3..518ad8dd 100644 --- a/src/singlecelltoolkit/processes/barcode_correction.nf +++ b/src/singlecelltoolkit/processes/barcode_correction.nf @@ -11,12 +11,18 @@ process SCTK__BARCODE_CORRECTION { input: tuple val(sampleId), + val(technology), + path(fastq_PE1), path(fastq_bc), + path(fastq_PE2), path(bc_whitelist) output: tuple val(sampleId), + val(technology), + path(fastq_PE1), path("${sampleId}_bc_corrected.fastq.gz"), + path(fastq_PE2), path("${sampleId}_bc_corrected.fastq.gz.corrected.bc_stats.tsv") script: diff --git a/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf b/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf index f8e18d09..89ea1075 100644 --- a/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf +++ b/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf @@ -11,6 +11,7 @@ process SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE { input: tuple val(sampleId), + val(technology), path(fastq_PE1), path(fastq_PE2) From 9248a23b37e2ab913d326d78c96c0cdfd01f2547 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 9 Jun 2021 21:28:24 +0200 Subject: [PATCH 075/202] Refactor atac_preprocess: - Use barcode correction subworkflows from singlecelltoolkit - barcode correction for each method (hydrop, standard, biorad) is run separately and mixed into the remainder of the steps --- workflows/atac/preprocess.nf | 151 +++++++++++++++++------------------ 1 file changed, 74 insertions(+), 77 deletions(-) diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index f539d65b..d555f50e 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -1,37 +1,42 @@ nextflow.enable.dsl=2 -////////////////////////////////////////////////////// -// process imports: -include { SCTK__BARCODE_CORRECTION; } from './../../src/singlecelltoolkit/processes/barcode_correction.nf' params(params) -include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './../../src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf' params(params) -include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './../../src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf' params(params) -include { SCTK__EXTRACT_HYDROP_ATAC_BARCODE; } from './../../src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf' params(params) -include { TRIMGALORE__TRIM; } from './../../src/trimgalore/processes/trim.nf' params(params) +// process imports +include { + SCTK__EXTRACT_HYDROP_ATAC_BARCODE; +} from './../../src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf' +include { + TRIMGALORE__TRIM; +} from './../../src/trimgalore/processes/trim.nf' include { FASTP__ADAPTER_TRIMMING as FASTP__TRIM; -} from './../../src/fastp/processes/adapter_trimming.nf' params(params) +} from './../../src/fastp/processes/adapter_trimming.nf' + +include { + SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE1; + SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE2; + SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_FASTP; + SIMPLE_PUBLISH as PUBLISH_FRAGMENTS; + SIMPLE_PUBLISH as PUBLISH_FRAGMENTS_INDEX; +} from '../../src/utils/processes/utils.nf' // workflow imports: include { BWA_MAPPING_PE; MARK_DUPLICATES; -} from './../../src/bwamaptools/main.nf' params(params) -include { BAM_TO_FRAGMENTS; } from './../../src/sinto/main.nf' params(params) -include { BAP__BIORAD_DEBARCODE; } from './../../src/bap/workflows/bap_debarcode.nf' params(params) - +} from './../../src/bwamaptools/main.nf' include { PICARD__MERGE_SAM_FILES_AND_SORT; -} from './../../src/gatk/processes/merge_sam_files.nf' params(params) +} from './../../src/gatk/processes/merge_sam_files.nf' +include { + BAM_TO_FRAGMENTS; +} from './../../src/sinto/main.nf' + include { - SIMPLE_PUBLISH as PUBLISH_BC_STATS; - SIMPLE_PUBLISH as PUBLISH_BR_BC_STATS; - SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE1; - SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_PE2; - SIMPLE_PUBLISH as PUBLISH_FASTQS_TRIMLOG_FASTP; - SIMPLE_PUBLISH as PUBLISH_FRAGMENTS; - SIMPLE_PUBLISH as PUBLISH_FRAGMENTS_INDEX; -} from "../../src/utils/processes/utils.nf" params(params) + barcode_correction as bc_corr_std; + barcode_correction as bc_corr_hyd; + biorad_bc; +} from './../../src/singlecelltoolkit/main.nf' ////////////////////////////////////////////////////// @@ -43,6 +48,7 @@ workflow ATAC_PREPROCESS { metadata main: + // import metadata data = Channel.from(metadata) .splitCsv( @@ -56,7 +62,7 @@ workflow ATAC_PREPROCESS { .replaceAll(row.sample_name,""), row.technology, file(row.fastq_PE1_path, checkIfExists: true), - file(row.fastq_barcode_path, checkIfExists: true), + row.fastq_barcode_path, file(row.fastq_PE2_path, checkIfExists: true) ) } @@ -66,60 +72,40 @@ workflow ATAC_PREPROCESS { standard: true // capture all other technology types here } - /* extract HyDrop ATAC barcode & combine with data.standard */ - data_combined = data.standard.mix( - SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop) - ) - - /* Barcode correction */ - // gather barcode whitelists from params into a channel: - wl = Channel.empty() - wl_cnt = 0 - params.tools.singlecelltoolkit.barcode_correction.whitelist.each { k, v -> - if(v != '') { - wl = wl.mix( Channel.of(tuple(k, file(v)) )) - wl_cnt = wl_cnt + 1 - } - } + /* standard data + barcode correction */ + bc_corr_std(data.standard) - if(wl_cnt == 0) { - if(!params.containsKey('quiet')) { - println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.") - } - // run barcode demultiplexing on each read+barcode: - fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( - data_combined.map { it -> tuple(it[0], it[2], it[3], it[4]) } - ) - } else { - // join wl to the data channel: - data_wl = wl.cross( data_combined.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3 - .map { it -> tuple(it[1][1], it[1][0], // sampleId, technology - it[1][2], it[1][3], it[1][4], // R1, R2, R3 - it[0][1] // whitelist - ) } - - // run barcode correction against a whitelist: - fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl.map{ it -> tuple(it[0], it[3], it[5]) } ) - PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[2]) }, '.corrected.bc_stats.log', 'reports/barcode') - - - // run barcode demultiplexing on each read+barcode: - fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( - data_combined.join(fastq_bc_corrected).map { it -> tuple(it[0], it[2], it[5], it[4]) } - ) + /* HyDrop ATAC + extract barcode and correct */ + SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop) \ + | bc_corr_hyd - } + /* BioRad data + extract barcode and correct */ + biorad_bc(data.biorad) - /* run BioRad barcode correction and debarcoding separately: */ - // using BAP: - //fastq_dex_br = BAP__BIORAD_DEBARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - // using singlecelltoolkit: - fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data.biorad.map{ it -> tuple(it[0], it[2], it[4]) }) - PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports/barcode') + /* downstream steps */ + bc_corr_std.out + .mix(bc_corr_hyd.out) + .mix(biorad_bc.out) \ + | adapter_trimming \ + | mapping + emit: + bam = mapping.out.bam + fragments = mapping.out.fragments +} + + +/* sub-workflows used above */ + +workflow adapter_trimming { - // concatenate the read channels: - fastq_dex = fastq_dex.concat(fastq_dex_br.map{ it -> tuple(it[0], it[1],it[2])}) + take: + fastq_dex + + main: // run adapter trimming: switch(params.atac_preprocess_tools.adapter_trimming_method) { @@ -134,14 +120,26 @@ workflow ATAC_PREPROCESS { break; } + emit: + fastq_dex_trim + +} + + +workflow mapping { + + take: + fastq_dex_trim + + main: + // map with bwa mem: aligned_bam = BWA_MAPPING_PE( fastq_dex_trim.map { it -> tuple(it[0].split("___")[0], // [val(unique_sampleId), *it[0..2] ) // val(sampleId), path(fastq_PE1), path(fastq_PE2)] }) - - // split by sample: + // split by sample size: aligned_bam.map{ it -> tuple(it[0].split("___")[0], it[1]) } // [ sampleId, bam ] .groupTuple() .branch { @@ -154,10 +152,8 @@ workflow ATAC_PREPROCESS { merged_bam = PICARD__MERGE_SAM_FILES_AND_SORT(aligned_bam_size_split.to_merge) // re-combine with single files: - merged_bam.mix(aligned_bam_size_split.no_merge - .map { it -> tuple(it[0], *it[1]) } - ) - .set { aligned_bam_sample_merged } + merged_bam.mix(aligned_bam_size_split.no_merge.map { it -> tuple(it[0], *it[1]) }) + .set { aligned_bam_sample_merged } bam = MARK_DUPLICATES(aligned_bam_sample_merged, params.atac_preprocess_tools.mark_duplicates_method) @@ -171,5 +167,6 @@ workflow ATAC_PREPROCESS { emit: bam fragments + } From 74fcdd3bbb795001a02aff1fdc81c958dc0ef062 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 14 Jun 2021 16:41:50 +0200 Subject: [PATCH 076/202] Update pyscisTopic config --- src/pycistopic/pycistopic.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index a2b8cdfa..88ce0f11 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -1,7 +1,7 @@ params { tools { pycistopic { - container = '/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/containers/aertslab-pycistopic-latest.sif' + container = '/staging/leuven/stg_00002/lcb/cflerin/containers/aertslab-pycistopic-latest.sif' biomart_annot { biomart_dataset_name = 'hsapiens_gene_ensembl' } @@ -13,7 +13,7 @@ params { keepdup = 'all' } compute_qc_stats { - n_frag = 50 + n_frag = 100 } call_cells { filter_frags_lower = '1000' From dd7d8a6fdbb41825ae44c4e3a2aac7578d0228ef Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 16 Jun 2021 15:18:00 +0200 Subject: [PATCH 077/202] Update pycistopic config - Add compute resources label - Add ipynb report - remove barcode selection params --- src/pycistopic/pycistopic.config | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 88ce0f11..9e133473 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -16,16 +16,7 @@ params { n_frag = 100 } call_cells { - filter_frags_lower = '1000' - filter_frags_upper = '' - filter_tss_lower = '8' - filter_tss_upper = '' - filter_frip_lower = '' - filter_frip_upper = '' - filter_dup_rate_lower = '' - filter_dup_rate_upper = '' - } - barcode_level_statistics { + report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb' filter_frags_lower = '1000' filter_frags_upper = '' filter_tss_lower = '8' @@ -39,3 +30,13 @@ params { } } +// define computing resources via process labels +process { + withLabel: 'compute_resources__pycisTopic' { + executor = 'local' // or 'pbs' + cpus = 8 + memory = '120 GB' + time = '24h' + } +} + From 99fe0a62c64cc6023425dc2dd28b6dcb371fc0fb Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 16 Jun 2021 15:20:30 +0200 Subject: [PATCH 078/202] pycistopic compute_qc_stats update: - Load all input fragments/peak files at once with mulitple calls to --input_files, then parse this within the script - samples are parallelized using pycistopic/ray - remove metadata text output --- src/pycistopic/bin/compute_qc_stats.py | 76 ++++++++------------------ 1 file changed, 22 insertions(+), 54 deletions(-) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index 7de38ca4..c1c39490 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -17,29 +17,9 @@ type=str, required=True, nargs='+', + action='append', help='Input files in the form of [SampleId, path_to_fragments, path_to_peaks]. Multiple inputs are possible.' ) -parser.add_argument( - "--sampleId", - type=str, - required=True, - nargs='+', - help='Sample ID.' -) -parser.add_argument( - "--fragments", - type=str, - required=True, - nargs='+', - help='Input fragments file.' -) -parser.add_argument( - "--regions", - type=str, - required=True, - nargs='+', - help='Path to regions file.' -) parser.add_argument( "--n_frag", type=int, @@ -52,11 +32,6 @@ type=str, help='Biomart annotations, pickle format.' ) -parser.add_argument( - "--output_metadata", - type=str, - help='Output file, tsv format.' -) parser.add_argument( "--output_metadata_pkl", type=str, @@ -79,52 +54,45 @@ ################################################################################ + +fragments_dict = { x[0].split(',')[0]: x[0].split(',')[1] for x in args.input_files } +path_to_regions = { x[0].split(',')[0]: x[0].split(',')[2] for x in args.input_files } + # Load biomart annotations: infile = open(args.biomart_annot_pkl, 'rb') annot = pickle.load(infile) infile.close() -print(args.input_files) - - -fragments_dict = { - args.sampleId: args.fragments - } -path_to_regions = { - args.sampleId: args.regions - } metadata_bc_dict, profile_data_dict = compute_qc_stats( - fragments_dict= fragments_dict, - tss_annotation = annot, + fragments_dict=fragments_dict, + tss_annotation=annot, stats=['barcode_rank_plot', 'duplicate_rate', 'insert_size_distribution', 'profile_tss', 'frip'], - label_list = None, - path_to_regions = path_to_regions, - n_cpu = args.threads, - valid_bc = None, - n_frag = args.n_frag, - n_bc = None, - tss_flank_window = 1000, - tss_window = 50, + label_list=None, + path_to_regions=path_to_regions, + n_cpu=args.threads, + valid_bc=None, + n_frag=args.n_frag, + n_bc=None, + tss_flank_window=1000, + tss_window=50, tss_minimum_signal_window = 100, tss_rolling_window = 10, + min_norm=0.1, remove_duplicates = True, - ### ray init args: - include_dashboard=False, + #_temp_dir= ) -# load bap results to use for duplicate rate (if we are using bap output): -f_bap_qc = os.path.join(os.path.dirname(args.fragments),args.sampleId+'.QCstats.csv') -if os.path.isfile(f_bap_qc) and all(metadata_bc_dict[args.sampleId]['Dupl_rate_bap'] == 0): - bapqc = pd.read_csv(f_bap_qc, index_col=0) - metadata_bc_dict[args.sampleId]['Dupl_rate'] = bapqc['duplicateProportion'] +## load bap results to use for duplicate rate (if we are using bap output): +#f_bap_qc = os.path.join(os.path.dirname(args.fragments),args.sampleId+'.QCstats.csv') +#if os.path.isfile(f_bap_qc) and all(metadata_bc_dict[args.sampleId]['Dupl_rate_bap'] == 0): +# bapqc = pd.read_csv(f_bap_qc, index_col=0) +# metadata_bc_dict[args.sampleId]['Dupl_rate'] = bapqc['duplicateProportion'] ### outputs: -metadata_bc_dict[args.sampleId].to_csv(args.output_metadata, sep='\t', index_label='barcode') - with open(args.output_metadata_pkl, 'wb') as f: pickle.dump(metadata_bc_dict, f) From cbb9e0d40144bb1c919a23b0e5862eb1a3ac1960 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 16 Jun 2021 15:24:12 +0200 Subject: [PATCH 079/202] pycistopic: rename processes - remove SC__ prefix --- src/pycistopic/processes/barcode_level_statistics.nf | 2 +- src/pycistopic/processes/biomart_annot.nf | 2 +- src/pycistopic/processes/macs2_call_peaks.nf | 5 ++--- src/pycistopic/processes/plot_qc_stats.nf | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/pycistopic/processes/barcode_level_statistics.nf b/src/pycistopic/processes/barcode_level_statistics.nf index a5f654ac..63691baa 100644 --- a/src/pycistopic/processes/barcode_level_statistics.nf +++ b/src/pycistopic/processes/barcode_level_statistics.nf @@ -5,7 +5,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bi toolParams = params.tools.pycistopic //processParams = params.tools.pycistopic.barcode_level_statistics -process SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS { +process PYCISTOPIC__BARCODE_LEVEL_STATISTICS { publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' container toolParams.container diff --git a/src/pycistopic/processes/biomart_annot.nf b/src/pycistopic/processes/biomart_annot.nf index 3e896485..24d38645 100644 --- a/src/pycistopic/processes/biomart_annot.nf +++ b/src/pycistopic/processes/biomart_annot.nf @@ -5,7 +5,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bi toolParams = params.tools.pycistopic processParams = params.tools.pycistopic.biomart_annot -process SC__PYCISTOPIC__BIOMART_ANNOT { +process PYCISTOPIC__BIOMART_ANNOT { publishDir "${params.global.outdir}/intermediate/pycistopic/biomart/", mode: 'symlink' container toolParams.container diff --git a/src/pycistopic/processes/macs2_call_peaks.nf b/src/pycistopic/processes/macs2_call_peaks.nf index ef0d9421..dd1398c7 100644 --- a/src/pycistopic/processes/macs2_call_peaks.nf +++ b/src/pycistopic/processes/macs2_call_peaks.nf @@ -5,7 +5,7 @@ nextflow.enable.dsl=2 toolParams = params.tools.pycistopic processParams = params.tools.pycistopic.macs2_call_peaks -process SC__PYCISTOPIC__MACS2_CALL_PEAKS { +process PYCISTOPIC__MACS2_CALL_PEAKS { container toolParams.container label 'compute_resources__default' @@ -13,8 +13,7 @@ process SC__PYCISTOPIC__MACS2_CALL_PEAKS { input: tuple val(sampleId), path(bam), - path(bam_index), - val(filetype) + path(bam_index) output: tuple val(sampleId), diff --git a/src/pycistopic/processes/plot_qc_stats.nf b/src/pycistopic/processes/plot_qc_stats.nf index 66d1df1d..87ce2431 100644 --- a/src/pycistopic/processes/plot_qc_stats.nf +++ b/src/pycistopic/processes/plot_qc_stats.nf @@ -5,7 +5,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bi toolParams = params.tools.pycistopic processParams = params.tools.pycistopic.compute_qc_stats -process SC__PYCISTOPIC__PLOT_QC_STATS { +process PYCISTOPIC__PLOT_QC_STATS { container toolParams.container label 'compute_resources__default' From 88b1e82d37c6dc0bb10b150e0e04e428daf8247b Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 16 Jun 2021 15:34:58 +0200 Subject: [PATCH 080/202] pycistopic compute_qc_stats: - Use single input with all samples/fragments/peaks - rework input and outputs to handle new single-input strategy --- src/pycistopic/processes/compute_qc_stats.nf | 32 ++++++-------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index 4c505233..7d970fe7 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -5,43 +5,31 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bi toolParams = params.tools.pycistopic processParams = params.tools.pycistopic.compute_qc_stats -process SC__PYCISTOPIC__COMPUTE_QC_STATS { +process PYCISTOPIC__COMPUTE_QC_STATS { - publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' + //publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' container toolParams.container - label 'compute_resources__default','compute_resources__24hqueue' + label 'compute_resources__pycisTopic' input: - tuple val(sampleId), - path(fragments), - path(fragments_index), - path(peaks) + val(input) path(biomart_annot) output: - tuple val(sampleId), - path(output_metadata), - path(output_metadata_pkl), - path(output_profile_data_pkl) + tuple path("project_metadata.pickle"), + path("project_profile_data.pickle") script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams) - output_metadata = "${sampleId}_metadata.tsv.gz" - output_metadata_pkl = "${sampleId}_metadata.pickle" - output_profile_data_pkl = "${sampleId}_profile_data.pickle" """ export NUMEXPR_MAX_THREADS=1 export OMP_NUM_THREADS=1 ${binDir}compute_qc_stats.py \ - --sampleId ${sampleId} \ - --fragments ${fragments} \ - --regions ${peaks} \ + ${"--input_files "+input.join(" --input_files ")} \ --n_frag ${processParams.n_frag} \ - --threads 1 \ + --threads ${task.cpus} \ --biomart_annot_pkl ${biomart_annot} \ - --output_metadata ${output_metadata} \ - --output_metadata_pkl ${output_metadata_pkl} \ - --output_profile_data_pkl ${output_profile_data_pkl} + --output_metadata_pkl project_metadata.pickle \ + --output_profile_data_pkl project_profile_data.pickle """ } From 62bac4ce3b3f91065012ce16dedb13b6fb7349c7 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 16 Jun 2021 15:40:07 +0200 Subject: [PATCH 081/202] pycistopic call_cells: - Add ipynb report to plot qc metrics and call cells based on parameters in the config file --- src/pycistopic/processes/call_cells.nf | 54 +++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/pycistopic/processes/call_cells.nf b/src/pycistopic/processes/call_cells.nf index 01cf88e9..f52aaf15 100644 --- a/src/pycistopic/processes/call_cells.nf +++ b/src/pycistopic/processes/call_cells.nf @@ -1,10 +1,13 @@ -nextflow.preview.dsl=2 +nextflow.enable.dsl=2 + +import java.nio.file.Paths +import static groovy.json.JsonOutput.* binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" toolParams = params.tools.pycistopic -process SC__PYCISTOPIC__CALL_CELLS { +process PYCISTOPIC__CALL_CELLS { publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' container toolParams.container @@ -42,3 +45,50 @@ process SC__PYCISTOPIC__CALL_CELLS { """ } + +process PYCISTOPIC__QC_REPORT { + + container toolParams.container + publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode + label 'compute_resources__report' + + input: + path(ipynb) + val(sampleId) + tuple path(metadata_pickle), + path(profile_data_pickle) + val(reportTitle) + + output: + path("${reportTitle}.ipynb") + + script: + pycistopic_params = toJson(toolParams) + """ + papermill ${ipynb} \ + --report-mode \ + ${reportTitle}.ipynb \ + -p SAMPLE "${sampleId.join(",")}" \ + -p WORKFLOW_PARAMETERS '${pycistopic_params}' \ + """ +} + + +process REPORT_TO_HTML { + + container toolParams.container + publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode + label 'compute_resources__report' + + input: + path(ipynb) + + output: + file("*.html") + + script: + """ + jupyter nbconvert ${ipynb} --to html + """ +} + From c95be5c9acec36f57a227d583de80ce12367bd66 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 16 Jun 2021 15:41:21 +0200 Subject: [PATCH 082/202] QC Filtering workflow updates - Rework channels to combine all samples into a single input to pycistopic qc metrics step - Clean up imports, remove extra processes --- workflows/atac/qc_filtering.nf | 58 ++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 2eadc62d..e913bf27 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -2,21 +2,22 @@ nextflow.enable.dsl=2 ////////////////////////////////////////////////////// // process imports: -include { SC__ARCHR__CREATE_ARROW_UNFILTERED; } from './../../src/archr/processes/createArrow_unfiltered.nf' params(params) -include { SC__ARCHR__CELL_CALLING; } from './../../src/archr/processes/cell_calling.nf' params(params) +include { SC__ARCHR__CREATE_ARROW_UNFILTERED; } from './../../src/archr/processes/createArrow_unfiltered.nf' +include { SC__ARCHR__CELL_CALLING; } from './../../src/archr/processes/cell_calling.nf' -include { SC__PYCISTOPIC__BIOMART_ANNOT; } from './../../src/pycistopic/processes/biomart_annot.nf' params(params) -include { SC__PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' params(params) -include { SC__PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' params(params) -include { SC__PYCISTOPIC__PLOT_QC_STATS; } from './../../src/pycistopic/processes/plot_qc_stats.nf' params(params) -include { SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS; } from './../../src/pycistopic/processes/barcode_level_statistics.nf' params(params) -include { SC__PYCISTOPIC__CALL_CELLS; } from './../../src/pycistopic/processes/call_cells.nf' params(params) +include { PYCISTOPIC__BIOMART_ANNOT; } from './../../src/pycistopic/processes/biomart_annot.nf' +include { PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' +include { PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' +include { + PYCISTOPIC__QC_REPORT; + REPORT_TO_HTML; +} from './../../src/pycistopic/processes/call_cells.nf' include { PUBLISH as PUBLISH_PEAKS; PUBLISH as PUBLISH_METADATA; PUBLISH as PUBLISH_QC_SAMPLE_METRICS; -} from "../../src/utils/workflows/utils.nf" params(params) +} from "../../src/utils/workflows/utils.nf" ////////////////////////////////////////////////////// // Define the workflow @@ -29,31 +30,32 @@ workflow ATAC_QC_PREFILTER { main: data.branch { - fragments: it[3] == 'fragments' - bam: it[3] == 'bam' + fragments: it[2] == 'fragments' + bam: it[2] == 'bam' } .set{ data_split } - biomart = SC__PYCISTOPIC__BIOMART_ANNOT() + biomart = PYCISTOPIC__BIOMART_ANNOT() - peaks = SC__PYCISTOPIC__MACS2_CALL_PEAKS(data_split.bam) + peaks = PYCISTOPIC__MACS2_CALL_PEAKS(data_split.bam.map { it -> tuple(it[0], it[1][0], it[1][1] ) } ) PUBLISH_PEAKS(peaks.map { it -> tuple(it[0], it[1]) }, 'peaks', 'narrowPeak', 'macs2', false) - data_split.fragments.join(peaks) - .map { it -> tuple(it[0], it[1], it[2], it[4]) } - .set{ fragpeaks } - fragpeaks.map { it -> tuple(it[0], it[1], it[3]) } - .collectFile(name: 'input_files.txt') - .view() - - qc_stats = SC__PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, biomart) - PUBLISH_METADATA(qc_stats.map { it -> tuple(it[0], it[1]) }, 'metadata.tsv', 'gz', 'pycistopic', false) - - qc_stats_plot = SC__PYCISTOPIC__PLOT_QC_STATS(qc_stats) - PUBLISH_QC_SAMPLE_METRICS(qc_stats_plot, 'qc_sample_metrics', 'pdf', 'pycistopic', false) - - //SC__PYCISTOPIC__BARCODE_LEVEL_STATISTICS(qc_stats) - SC__PYCISTOPIC__CALL_CELLS(qc_stats) + data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) } + .join(peaks) + //.map { it -> [ tuple(*it[0..1], it[3]) ] } + .map { it -> ["${it[0]},${it[1]},${it[3]}"] } + .collect() + .set { fragpeaks } + + qc_stats = PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, biomart) + + PYCISTOPIC__QC_REPORT( + file(workflow.projectDir + params.tools.pycistopic.call_cells.report_ipynb), + data_split.fragments.map { it -> it[0] }.collect(), // all sampleIds + qc_stats, + "pycisTopic_QC_report" + ) | + REPORT_TO_HTML } From 549b22173bebf8063b28fb788e1bf178dc40d8ab Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 09:28:03 +0200 Subject: [PATCH 083/202] Update params for pycistopic call_calls report --- src/pycistopic/processes/call_cells.nf | 2 +- src/pycistopic/pycistopic.config | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pycistopic/processes/call_cells.nf b/src/pycistopic/processes/call_cells.nf index f52aaf15..001d0530 100644 --- a/src/pycistopic/processes/call_cells.nf +++ b/src/pycistopic/processes/call_cells.nf @@ -68,7 +68,7 @@ process PYCISTOPIC__QC_REPORT { papermill ${ipynb} \ --report-mode \ ${reportTitle}.ipynb \ - -p SAMPLE "${sampleId.join(",")}" \ + -p SAMPLES "${sampleId.join(",")}" \ -p WORKFLOW_PARAMETERS '${pycistopic_params}' \ """ } diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 9e133473..2743bd77 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -17,6 +17,8 @@ params { } call_cells { report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb' + use_density_coloring_on_scatterplot = True + use_detailed_title_on_scatterplot = True filter_frags_lower = '1000' filter_frags_upper = '' filter_tss_lower = '8' From 701538beff317d29c3c0eef49e67f68a698c3fac Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 09:28:23 +0200 Subject: [PATCH 084/202] Add pycistopic call_cells notebook --- .../bin/pycisTopic_qc_report_template.ipynb | 593 ++++++++++++++++++ 1 file changed, 593 insertions(+) create mode 100644 src/pycistopic/bin/pycisTopic_qc_report_template.ipynb diff --git a/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb b/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb new file mode 100644 index 00000000..01674eac --- /dev/null +++ b/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb @@ -0,0 +1,593 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VSN Pipelines: pycisTopic QC report\n", + "\n", + "scATAC-seq quality control and cell calling from pycisTopic (https://github.com/aertslab/pycisTopic)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pycisTopic\n", + "pycisTopic.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "warnings.simplefilter('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pybiomart as pbm\n", + "import pandas as pd\n", + "import pickle\n", + "import re\n", + "import os\n", + "import json\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = json.loads(WORKFLOW_PARAMETERS)\n", + "\n", + "sample_ids = SAMPLES.split(',')\n", + "\n", + "print(f\"SAMPLES: {sample_ids}\")\n", + "print(f\"pycisTopic parameters: {json.dumps(params, indent=4)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load metadata\n", + "import pickle\n", + "infile = open('project_metadata.pickle', 'rb')\n", + "metadata_bc_dict = pickle.load(infile)\n", + "infile.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load profile data\n", + "infile = open('project_profile_data.pickle', 'rb')\n", + "profile_data_dict = pickle.load(infile)\n", + "infile.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## QC summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pycisTopic.qc import plot_sample_metrics\n", + "from scipy.stats import gaussian_kde" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Per-sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sample_id in profile_data_dict:\n", + " plot_sample_metrics({sample_id: profile_data_dict[sample_id]},\n", + " profile_list=['barcode_rank_plot', 'insert_size_distribution', 'profile_tss', 'frip'],\n", + " insert_size_distriubtion_xlim=[0,600],\n", + " legend_outside=False,\n", + " ncol=4,\n", + " cmap='tab20',\n", + " plot=True)#,\n", + " #save='quality_control/sample_metrics_all.pdf')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_sample_metrics(profile_data_dict,\n", + " profile_list=['barcode_rank_plot', 'insert_size_distribution', 'profile_tss', 'frip'],\n", + " insert_size_distriubtion_xlim=[0,600],\n", + " #legend_outside=True,\n", + " ncol=4,\n", + " cmap='tab20',\n", + " plot=True)#,\n", + " #save='quality_control/sample_metrics_all.pdf')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cell calling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_frag_qc(x, y, \n", + " ax,\n", + " x_thr_min=None, x_thr_max=None,\n", + " y_thr_min=None, y_thr_max=None,\n", + " ylab=None,\n", + " xlab=\"Number of (unique) fragments\",\n", + " cmap='viridis',\n", + " density_overlay=False,\n", + " s=10,\n", + " marker='+',\n", + " c='#343434',\n", + " xlim=None,\n", + " ylim=None,\n", + " **kwargs\n", + " ):\n", + " assert all(x.index == y.index)\n", + " barcodes = x.index.values\n", + " if density_overlay:\n", + " xy = np.vstack([np.log(x),y])\n", + " z = gaussian_kde(xy)(xy)\n", + " idx = z.argsort()\n", + " x, y, z, barcodes = x[idx], y[idx], z[idx], barcodes[idx]\n", + " else:\n", + " z=c\n", + " barcodes_to_keep=[]\n", + " sp=ax.scatter(x, y, c=z, s=s, edgecolors=None, marker=marker, cmap=cmap, **kwargs)\n", + " if ylim is not None:\n", + " ax.set_ylim(ylim[0], ylim[1])\n", + " if xlim is not None:\n", + " ax.set_xlim(xlim[0], xlim[1])\n", + " # thresholds:\n", + " if x_thr_min is not None: \n", + " ax.axvline(x=x_thr_min, color='r', linestyle='--')\n", + " barcodes_to_keep.append(barcodes[x>x_thr_min])\n", + " if x_thr_max is not None: \n", + " ax.axvline(x=x_thr_max, color='r', linestyle='--')\n", + " barcodes_to_keep.append(barcodes[xy_thr_min])\n", + " if y_thr_max is not None: \n", + " ax.axhline(y=y_thr_max, color='r', linestyle='--')\n", + " barcodes_to_keep.append(barcodes[y0:\n", + " return list(set.intersection(*map(set, barcodes_to_keep)))\n", + " else:\n", + " return barcodes\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# extract filter parameters\n", + "filter_frags_lower = {}\n", + "filter_frags_upper = {}\n", + "filter_tss_lower = {}\n", + "filter_tss_upper = {}\n", + "filter_frip_lower = {}\n", + "filter_frip_upper = {}\n", + "filter_dup_rate_lower = {}\n", + "filter_dup_rate_upper = {}\n", + "\n", + "def float_or_none(x):\n", + " try:\n", + " return float(x)\n", + " except ValueError:\n", + " return None\n", + " \n", + "def extract_sample_specific_param(filter_dict, sample_id, param_key):\n", + " if type(params['call_cells'][param_key]) is dict:\n", + " if sample_id in params['call_cells'][param_key]:\n", + " filter_dict[s] = float_or_none(params['call_cells'][param_key][sample_id])\n", + " else:\n", + " try:\n", + " filter_dict[s] = float_or_none(params['call_cells'][param_key]['default'])\n", + " except KeyError:\n", + " print(f\"WARNING: Missing 'default' key in the sample parameters list. Filter for '{param_key}' will be missing for sample '{sample_id}'.\")\n", + " filter_dict[s] = None\n", + " else:\n", + " filter_dict[s] = float_or_none(params['call_cells'][param_key])\n", + " return filter_dict\n", + "\n", + "for s in sample_ids:\n", + " filter_frags_lower = extract_sample_specific_param(filter_frags_lower, s, 'filter_frags_lower')\n", + " filter_frags_upper = extract_sample_specific_param(filter_frags_upper, s, 'filter_frags_upper')\n", + " #\n", + " filter_tss_lower = extract_sample_specific_param(filter_tss_lower, s, 'filter_tss_lower')\n", + " filter_tss_upper = extract_sample_specific_param(filter_tss_upper, s, 'filter_tss_upper')\n", + " #\n", + " filter_frip_lower = extract_sample_specific_param(filter_frip_lower, s, 'filter_frip_lower')\n", + " filter_frip_upper = extract_sample_specific_param(filter_frip_upper, s, 'filter_frip_upper')\n", + " #\n", + " filter_dup_rate_lower = extract_sample_specific_param(filter_dup_rate_lower, s, 'filter_dup_rate_lower')\n", + " filter_dup_rate_upper = extract_sample_specific_param(filter_dup_rate_upper, s, 'filter_dup_rate_upper')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# show cell filters:\n", + "print(f\"Filter parameters:\")\n", + "print(f\"filter_frags_lower: {json.dumps(filter_frags_lower, indent=4)}\")\n", + "print(f\"filter_frags_upper: {json.dumps(filter_frags_upper, indent=4)}\")\n", + "print(f\"filter_tss_lower: {json.dumps(filter_tss_lower, indent=4)}\")\n", + "print(f\"filter_tss_upper: {json.dumps(filter_tss_upper, indent=4)}\")\n", + "print(f\"filter_frip_lower: {json.dumps(filter_frip_lower, indent=4)}\")\n", + "print(f\"filter_frip_upper: {json.dumps(filter_frip_upper, indent=4)}\")\n", + "print(f\"filter_dup_rate_lower: {json.dumps(filter_dup_rate_lower, indent=4)}\")\n", + "print(f\"filter_dup_rate_upper: {json.dumps(filter_dup_rate_upper, indent=4)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "include_kde = params['call_cells']['use_density_coloring_on_scatterplot']\n", + "detailed_title=params['call_cells']['use_detailed_title_on_scatterplot']\n", + "s=4\n", + "bc_passing_filters = {}\n", + "for k,v in metadata_bc_dict.items():\n", + "\n", + " fig, (ax1,ax2,ax3) = plt.subplots(1,3, figsize=(12,4), dpi=150 )\n", + " p1_cells = plot_frag_qc(\n", + " x = metadata_bc_dict[k]['Unique_nr_frag'],\n", + " y = metadata_bc_dict[k]['TSS_enrichment'],\n", + " ylab = 'TSS Enrichment',\n", + " s=s,\n", + " x_thr_min=filter_frags_lower[k],\n", + " y_thr_min=filter_tss_lower[k],\n", + " density_overlay=include_kde,\n", + " ax=ax1\n", + " )\n", + " p2_cells = plot_frag_qc(\n", + " x = metadata_bc_dict[k]['Unique_nr_frag'],\n", + " y = metadata_bc_dict[k]['FRIP'],\n", + " x_thr_min=filter_frags_lower[k],\n", + " ylab = 'FRIP',\n", + " s=s,\n", + " ylim=[0,1],\n", + " density_overlay=include_kde,\n", + " ax=ax2\n", + " )\n", + " p3_cells = plot_frag_qc(\n", + " x = metadata_bc_dict[k]['Unique_nr_frag'],\n", + " y = metadata_bc_dict[k]['Dupl_rate'],\n", + " x_thr_min=filter_frags_lower[k],\n", + " ylab = 'Duplicate rate per cell',\n", + " s=s,\n", + " ylim=[0,1],\n", + " density_overlay=include_kde,\n", + " ax=ax3\n", + " )\n", + " bc_passing_filters[k] = list(set(p1_cells) & set(p2_cells) & set(p3_cells))\n", + " if detailed_title:\n", + " med_nf = metadata_bc_dict[k].loc[bc_passing_filters[k],'Unique_nr_frag'].median()\n", + " med_tss = metadata_bc_dict[k].loc[bc_passing_filters[k],'TSS_enrichment'].median()\n", + " title = f\"{k}: Kept {len(bc_passing_filters[k])} cells. Median(fragments): {med_nf:.0f}. Median(TSS Enrichment): {med_tss:.2f})\"\n", + " else:\n", + " title = k\n", + " fig.suptitle(title, x=0.5, y=0.95, fontsize=10)\n", + " plt.tight_layout()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('bc_passing_filters.pkl', 'wb') as f:\n", + " pickle.dump(bc_passing_filters, f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# write all barcodes selected in compute_qc_stats (with nFrag>x)\n", + "if not os.path.exists('selected_barcodes_nFrag'):\n", + " os.makedirs('selected_barcodes_nFrag')\n", + " \n", + "for k,v in metadata_bc_dict.items():\n", + " pd.DataFrame(v.index).to_csv('selected_barcodes_nFrag/'+k+'.barcodes_nFrag_thr.txt', sep='\\t', header=False, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# write \n", + "if not os.path.exists('selected_barcodes'):\n", + " os.makedirs('selected_barcodes')\n", + " \n", + "for k,v in bc_passing_filters.items():\n", + " pd.DataFrame(v).to_csv('selected_barcodes/'+k+'.cell_barcodes.txt', sep='\\t', header=False, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tp = pd.DataFrame([\n", + " list(bc_passing_filters),\n", + " [ len(bc_passing_filters[x]) for x in bc_passing_filters ]\n", + "\n", + "]).T\n", + "\n", + "fig, ax = plt.subplots(1,1, figsize=(6,4), dpi=150 )\n", + "g=sns.barplot(x=1, y=0, data=tp,\n", + " palette='tab20')\n", + "for p in ax.patches:\n", + " width = p.get_width() # get bar length\n", + " ax.text(width, # set the text at 1 unit right of the bar\n", + " p.get_y() + p.get_height() / 2, # get Y coordinate + X coordinate / 2\n", + " '{:1.0f}'.format(width), # set variable to display, 2 decimals\n", + " ha = 'right', # horizontal alignment\n", + " va = 'center') # vertical alignment\n", + "ax.set_xlabel(\"Number of cells\",fontsize=10)\n", + "ax.set_ylabel(\"\",fontsize=10)\n", + "plt.show()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import ticker as mticker\n", + "import matplotlib.gridspec as gridspec\n", + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_stats_violin(\n", + " data,\n", + " ax,\n", + " var = 'Unique_nr_frag',\n", + " ylab='Number of (unique) fragments',\n", + " xlab='',\n", + " logscale=True,\n", + " **kwargs\n", + " ):\n", + "\n", + " tp = []\n", + " for k,v in data.items():\n", + " tmp = pd.DataFrame(np.log10(data[k][var])) if logscale else pd.DataFrame(data[k][var])\n", + " tmp['Sample'] = k\n", + " tp.append(tmp)\n", + " tp = pd.concat(tp, join='outer', axis=0)\n", + "\n", + " yrange = [ int(math.floor(tp[var].min())), int(math.ceil(tp[var].max())) ]\n", + " \n", + " g = sns.violinplot(data=tp, x='Sample', y=var,\n", + " dodge=True,\n", + " linewidth=0.5,\n", + " inner='quartiles',\n", + " ax=ax, kind='kde',\n", + " **kwargs)\n", + " \n", + " ax.set_xlabel(xlab,fontsize=10)\n", + " ax.set_ylabel(ylab,fontsize=10)\n", + " g.set_xticklabels(g.get_xticklabels(), rotation=15, ha='right', fontsize=8)\n", + "\n", + " #if logscale:\n", + " # ax.yaxis.set_major_formatter(mticker.StrMethodFormatter(\"$10^{{{x:.0f}}}$\"))\n", + " # ax.yaxis.set_ticks([np.log10(x) for p in range(yrange[0],yrange[1]) for x in np.linspace(10**p, 10**(p+1), 10)], minor=True)\n", + "\n", + " plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax1 = plt.subplots(1,1, figsize=(8,4), dpi=150 )\n", + "\n", + "plot_stats_violin(\n", + " metadata_bc_dict,\n", + " ax=ax1,\n", + " split=False,\n", + " #palette='tab20',\n", + " color=\"#a6a6a6\",\n", + " ylab='log10 Number of (unique) fragments',\n", + " var='Unique_nr_frag')\n", + "\n", + "plt.show()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax1 = plt.subplots(1,1, figsize=(8,4), dpi=150 )\n", + "\n", + "plot_stats_violin(\n", + " metadata_bc_dict,\n", + " ax=ax1,\n", + " split=False,\n", + " #palette='tab20',\n", + " color=\"#a6a6a6\",\n", + " logscale=False,\n", + " ylab='TSS enrichment',\n", + " var='TSS_enrichment')\n", + "\n", + "plt.show()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax1 = plt.subplots(1,1, figsize=(8,4), dpi=150 )\n", + "\n", + "plot_stats_violin(\n", + " metadata_bc_dict,\n", + " ax=ax1,\n", + " split=False,\n", + " #palette='tab20',\n", + " color=\"#a6a6a6\",\n", + " logscale=False,\n", + " ylab='Duplicate rate',\n", + " var='Dupl_rate')\n", + "\n", + "plt.show()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax1 = plt.subplots(1,1, figsize=(8,4), dpi=150 )\n", + "\n", + "plot_stats_violin(\n", + " metadata_bc_dict,\n", + " ax=ax1,\n", + " split=False,\n", + " #alette='tab20',\n", + " color=\"#a6a6a6\",\n", + " logscale=False,\n", + " ylab='FRIP',\n", + " var='FRIP')\n", + "\n", + "plt.show()\n", + "plt.tight_layout()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 3c4cb274a24117fd62abd50e0041599ae1392bef Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 09:58:48 +0200 Subject: [PATCH 085/202] pycistopic / qc_filtering config updates - Cleanup old configs - Include pycistopic config presets for human, mouse, fly (affects biomart annotations, macs2 peak calling settings) --- conf/atac/qc_filtering.config | 1 - nextflow.config | 7 ++++--- src/pycistopic/conf/pycistopic_dmel.config | 14 ++++++++++++++ src/pycistopic/conf/pycistopic_hg38.config | 14 ++++++++++++++ src/pycistopic/conf/pycistopic_mm10.config | 14 ++++++++++++++ src/pycistopic/pycistopic.config | 5 +++-- 6 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 src/pycistopic/conf/pycistopic_dmel.config create mode 100644 src/pycistopic/conf/pycistopic_hg38.config create mode 100644 src/pycistopic/conf/pycistopic_mm10.config diff --git a/conf/atac/qc_filtering.config b/conf/atac/qc_filtering.config index 7f84fc8d..b9ea8e96 100644 --- a/conf/atac/qc_filtering.config +++ b/conf/atac/qc_filtering.config @@ -1,3 +1,2 @@ -includeConfig './../../src/archr/archr.config' includeConfig './../../src/pycistopic/pycistopic.config' diff --git a/nextflow.config b/nextflow.config index f8deafe1..1dae337c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -466,9 +466,6 @@ profiles { includeConfig 'src/cellranger-atac/conf/count.config' includeConfig 'src/cellranger-atac/conf/count_metadata.config' } - cistopic { - includeConfig 'src/cistopic/cistopic.config' - } atac_preprocess { includeConfig 'conf/atac/preprocess.config' } @@ -483,6 +480,10 @@ profiles { includeConfig 'conf/atac/preprocess.config' includeConfig 'src/popscle/popscle.config' } + /* pycistopic (biomart/macs2) profiles (load after pycistopic config) */ + pycistopic_hg38 { includeConfig 'src/pycistopic/conf/pycistopic_hg38.config' } + pycistopic_mm10 { includeConfig 'src/pycistopic/conf/pycistopic_mm10.config' } + pycistopic_dmel { includeConfig 'src/pycistopic/conf/pycistopic_dmel.config' } /* diff --git a/src/pycistopic/conf/pycistopic_dmel.config b/src/pycistopic/conf/pycistopic_dmel.config new file mode 100644 index 00000000..d2d6dd98 --- /dev/null +++ b/src/pycistopic/conf/pycistopic_dmel.config @@ -0,0 +1,14 @@ +params { + tools { + pycistopic { + biomart_annot { + biomart_dataset_name = 'dmelanogaster_gene_ensembl' + biomart_host = 'http://www.ensembl.org' + } + macs2_call_peaks { + gsize = 'dm' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' + } + } + } +} + diff --git a/src/pycistopic/conf/pycistopic_hg38.config b/src/pycistopic/conf/pycistopic_hg38.config new file mode 100644 index 00000000..4d07bd72 --- /dev/null +++ b/src/pycistopic/conf/pycistopic_hg38.config @@ -0,0 +1,14 @@ +params { + tools { + pycistopic { + biomart_annot { + biomart_dataset_name = 'hsapiens_gene_ensembl' + biomart_host = 'http://www.ensembl.org' + } + macs2_call_peaks { + gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' + } + } + } +} + diff --git a/src/pycistopic/conf/pycistopic_mm10.config b/src/pycistopic/conf/pycistopic_mm10.config new file mode 100644 index 00000000..a181e89e --- /dev/null +++ b/src/pycistopic/conf/pycistopic_mm10.config @@ -0,0 +1,14 @@ +params { + tools { + pycistopic { + biomart_annot { + biomart_dataset_name = 'mmusculus_gene_ensembl' + biomart_host = 'http://nov2020.archive.ensembl.org/' + } + macs2_call_peaks { + gsize = 'mm' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' + } + } + } +} + diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 2743bd77..897bad67 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -4,6 +4,7 @@ params { container = '/staging/leuven/stg_00002/lcb/cflerin/containers/aertslab-pycistopic-latest.sif' biomart_annot { biomart_dataset_name = 'hsapiens_gene_ensembl' + biomart_host = 'http://www.ensembl.org' } macs2_call_peaks { gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' @@ -17,8 +18,8 @@ params { } call_cells { report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb' - use_density_coloring_on_scatterplot = True - use_detailed_title_on_scatterplot = True + use_density_coloring_on_scatterplot = true + use_detailed_title_on_scatterplot = true filter_frags_lower = '1000' filter_frags_upper = '' filter_tss_lower = '8' From 83dca9887e238cf537eaadab5b8996b8714c5baf Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 09:59:52 +0200 Subject: [PATCH 086/202] Update biomart annotation script --- src/pycistopic/bin/biomart_annot.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/pycistopic/bin/biomart_annot.py b/src/pycistopic/bin/biomart_annot.py index 4a667069..148794f0 100755 --- a/src/pycistopic/bin/biomart_annot.py +++ b/src/pycistopic/bin/biomart_annot.py @@ -13,18 +13,34 @@ required=True, help='Biomart dataset name, e.g. "hsapiens_gene_ensembl".' ) +parser.add_argument( + "--biomart_host", + type=str, + required=True, + help='Biomart host address, e.g. "http://www.ensembl.org".' +) args = parser.parse_args() ################################################################################ -dataset = pbm.Dataset(name=args.biomart_dataset_name, host='http://www.ensembl.org') +dataset = pbm.Dataset(name=args.biomart_dataset_name, host=args.biomart_host) annot = dataset.query(attributes=['chromosome_name', 'transcription_start_site', 'strand', 'external_gene_name', 'transcript_biotype']) -filter = annot['Chromosome/scaffold name'].str.contains('CHR|GL|JH|MT', na=False) -annot = annot[~filter] -annot['Chromosome/scaffold name'] = annot['Chromosome/scaffold name'].str.replace(r'(\b\S)', r'chr\1') + +# Rename columns. annot.columns=['Chromosome', 'Start', 'Strand', 'Gene', 'Transcript_type'] +# Convert objects in chromosome column to strings. +annot['Chromosome'] = annot['Chromosome'].astype(str) +# Calculate number of TSS per chromosome and number of unique genes per chromosome for unfiltered data. +annotation_unfiltered_counts = annot.groupby('Chromosome').agg({'Gene': ['count', 'nunique']}) +# Only keep protein coding genes. annot = annot[annot.Transcript_type == 'protein_coding'] +# Only keep genes on normal chromosomes: (1-99, X, Y, 2L, 2R, 2L, 3R). +filter_chroms = annot['Chromosome'].str.contains('^[0-9]{1,2}$|^[XY]$|^[23][LR]$') +annot = annot[(filter_chroms)] +# Add "chr" to the beginning of the chromosome names to make them UCSC compatible. +annot['Chromosome'] = annot['Chromosome'].str.replace(r'(\b\S)', r'chr\1') + with open('biomart_annot.pickle', 'wb') as f: pickle.dump(annot, f) From 471a40a161261dd1bd3d132a2ab8b2b805b7be29 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 10:10:27 +0200 Subject: [PATCH 087/202] Add host parameter to biomart process --- src/pycistopic/processes/biomart_annot.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pycistopic/processes/biomart_annot.nf b/src/pycistopic/processes/biomart_annot.nf index 24d38645..18032f2c 100644 --- a/src/pycistopic/processes/biomart_annot.nf +++ b/src/pycistopic/processes/biomart_annot.nf @@ -17,7 +17,8 @@ process PYCISTOPIC__BIOMART_ANNOT { script: """ ${binDir}biomart_annot.py \ - --biomart_dataset_name ${processParams.biomart_dataset_name} + --biomart_dataset_name ${processParams.biomart_dataset_name} \ + --biomart_host ${processParams.biomart_host} """ } From 468f340fbf8c470f2fbd94d31947803c2ad8bac7 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 14:25:16 +0200 Subject: [PATCH 088/202] pycistopic QC notebook cleanup --- .../bin/pycisTopic_qc_report_template.ipynb | 34 +++++-------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb b/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb index 01674eac..15cf74d1 100644 --- a/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb +++ b/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb @@ -71,7 +71,6 @@ "outputs": [], "source": [ "# Load metadata\n", - "import pickle\n", "infile = open('project_metadata.pickle', 'rb')\n", "metadata_bc_dict = pickle.load(infile)\n", "infile.close()" @@ -123,11 +122,9 @@ " plot_sample_metrics({sample_id: profile_data_dict[sample_id]},\n", " profile_list=['barcode_rank_plot', 'insert_size_distribution', 'profile_tss', 'frip'],\n", " insert_size_distriubtion_xlim=[0,600],\n", - " legend_outside=False,\n", " ncol=4,\n", " cmap='tab20',\n", - " plot=True)#,\n", - " #save='quality_control/sample_metrics_all.pdf')\n", + " plot=True)\n", "plt.show()" ] }, @@ -147,11 +144,9 @@ "plot_sample_metrics(profile_data_dict,\n", " profile_list=['barcode_rank_plot', 'insert_size_distribution', 'profile_tss', 'frip'],\n", " insert_size_distriubtion_xlim=[0,600],\n", - " #legend_outside=True,\n", " ncol=4,\n", " cmap='tab20',\n", - " plot=True)#,\n", - " #save='quality_control/sample_metrics_all.pdf')\n", + " plot=True)\n", "plt.show()" ] }, @@ -220,7 +215,7 @@ " if len(barcodes_to_keep)>0:\n", " return list(set.intersection(*map(set, barcodes_to_keep)))\n", " else:\n", - " return barcodes\n" + " return barcodes" ] }, { @@ -229,7 +224,7 @@ "metadata": {}, "outputs": [], "source": [ - "# extract filter parameters\n", + "# extract filter thresholds from Nextflow parameters\n", "filter_frags_lower = {}\n", "filter_frags_upper = {}\n", "filter_tss_lower = {}\n", @@ -248,15 +243,15 @@ "def extract_sample_specific_param(filter_dict, sample_id, param_key):\n", " if type(params['call_cells'][param_key]) is dict:\n", " if sample_id in params['call_cells'][param_key]:\n", - " filter_dict[s] = float_or_none(params['call_cells'][param_key][sample_id])\n", + " filter_dict[sample_id] = float_or_none(params['call_cells'][param_key][sample_id])\n", " else:\n", " try:\n", - " filter_dict[s] = float_or_none(params['call_cells'][param_key]['default'])\n", + " filter_dict[sample_id] = float_or_none(params['call_cells'][param_key]['default'])\n", " except KeyError:\n", " print(f\"WARNING: Missing 'default' key in the sample parameters list. Filter for '{param_key}' will be missing for sample '{sample_id}'.\")\n", - " filter_dict[s] = None\n", + " filter_dict[sample_id] = None\n", " else:\n", - " filter_dict[s] = float_or_none(params['call_cells'][param_key])\n", + " filter_dict[sample_id] = float_or_none(params['call_cells'][param_key])\n", " return filter_dict\n", "\n", "for s in sample_ids:\n", @@ -377,7 +372,7 @@ "metadata": {}, "outputs": [], "source": [ - "# write \n", + "# write all cell barcodes selected by filtering\n", "if not os.path.exists('selected_barcodes'):\n", " os.makedirs('selected_barcodes')\n", " \n", @@ -385,13 +380,6 @@ " pd.DataFrame(v).to_csv('selected_barcodes/'+k+'.cell_barcodes.txt', sep='\\t', header=False, index=False)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -474,10 +462,6 @@ " ax.set_ylabel(ylab,fontsize=10)\n", " g.set_xticklabels(g.get_xticklabels(), rotation=15, ha='right', fontsize=8)\n", "\n", - " #if logscale:\n", - " # ax.yaxis.set_major_formatter(mticker.StrMethodFormatter(\"$10^{{{x:.0f}}}$\"))\n", - " # ax.yaxis.set_ticks([np.log10(x) for p in range(yrange[0],yrange[1]) for x in np.linspace(10**p, 10**(p+1), 10)], minor=True)\n", - "\n", " plt.tight_layout()" ] }, From f5315cf8266dc2433fc1bcc9e90fb1846fa8aa53 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 14:46:10 +0200 Subject: [PATCH 089/202] Add additional qc parameters to compute_qc_stats --- src/pycistopic/bin/compute_qc_stats.py | 46 +++++++++++++++++--- src/pycistopic/processes/compute_qc_stats.nf | 5 +++ src/pycistopic/pycistopic.config | 5 +++ 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index c1c39490..965e8e29 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -27,6 +27,42 @@ default=50, help='Threshold on the number of fragments to keep for a barcode.' ) +parser.add_argument( + "--tss_flank_window", + type=int, + required=True, + default=2000, + help='Flanking window around the TSS.' +) +parser.add_argument( + "--tss_window", + type=int, + required=True, + default=50, + help='Window around the TSS used to count fragments in the TSS when calculating the TSS enrichment per barcode.' +) +parser.add_argument( + "--tss_minimum_signal_window", + type=int, + required=True, + default=100, + help='Tail window use to normalize the TSS enrichment (average signal in the X bp in the extremes of the TSS window).' +) +parser.add_argument( + "--tss_rolling_window", + type=int, + required=True, + default=10, + help='Rolling window used to smooth signal.' +) +parser.add_argument( + "--min_norm", + type=int, + required=True, + default=0.1, + help='Minimum normalization score. If the average minimum signal value is below this value, this number is used to normalize the TSS signal. This approach penalizes cells with fewer reads.' +) + parser.add_argument( "--biomart_annot_pkl", type=str, @@ -75,11 +111,11 @@ valid_bc=None, n_frag=args.n_frag, n_bc=None, - tss_flank_window=1000, - tss_window=50, - tss_minimum_signal_window = 100, - tss_rolling_window = 10, - min_norm=0.1, + tss_flank_window=args.tss_flank_window, + tss_window=args.tss_window, + tss_minimum_signal_window=args.tss_minimum_signal_window, + tss_rolling_window=args.tss_rolling_window, + min_norm=args.min_norm, remove_duplicates = True, #_temp_dir= ) diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index 7d970fe7..e020b916 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -26,6 +26,11 @@ process PYCISTOPIC__COMPUTE_QC_STATS { ${binDir}compute_qc_stats.py \ ${"--input_files "+input.join(" --input_files ")} \ --n_frag ${processParams.n_frag} \ + --tss_flank_window ${processParams.tss_flank_window} \ + --tss_window ${processParams.tss_window} \ + --tss_minimum_signal_window ${processParams.tss_minimum_signal_window} \ + --tss_rolling_window ${processParams.tss_rolling_window} \ + --min_norm ${processParams.tss_rolling_window} \ --threads ${task.cpus} \ --biomart_annot_pkl ${biomart_annot} \ --output_metadata_pkl project_metadata.pickle \ diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 897bad67..a140f448 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -15,6 +15,11 @@ params { } compute_qc_stats { n_frag = 100 + tss_flank_window = 2000 + tss_window = 50 + tss_minimum_signal_window = 100 + tss_rolling_window = 10 + min_norm = 0.1 } call_cells { report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb' From 8ab5652524424a9f50661d2a875dd911c8d2980c Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 22:13:01 +0200 Subject: [PATCH 090/202] More consistent naming in atac_preprocess steps --- workflows/atac/preprocess.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index d555f50e..8fb3d73f 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -33,9 +33,9 @@ include { include { - barcode_correction as bc_corr_std; - barcode_correction as bc_corr_hyd; - biorad_bc; + barcode_correction as bc_correct_standard; + barcode_correction as bc_correct_hydrop; + biorad_bc as bc_correct_biorad; } from './../../src/singlecelltoolkit/main.nf' @@ -74,21 +74,21 @@ workflow ATAC_PREPROCESS { /* standard data barcode correction */ - bc_corr_std(data.standard) + bc_correct_standard(data.standard) /* HyDrop ATAC extract barcode and correct */ SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop) \ - | bc_corr_hyd + | bc_correct_hydrop /* BioRad data extract barcode and correct */ - biorad_bc(data.biorad) + bc_correct_biorad(data.biorad) /* downstream steps */ - bc_corr_std.out - .mix(bc_corr_hyd.out) - .mix(biorad_bc.out) \ + bc_correct_standard.out + .mix(bc_correct_hydrop.out) + .mix(bc_correct_biorad.out) \ | adapter_trimming \ | mapping From e3b21415a413c95a01683ce9edfe6eb43a7b7abf Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 22:13:39 +0200 Subject: [PATCH 091/202] Update bwamaptools Docker image and config - include new zlib 2.0.4 --- src/bwamaptools/Dockerfile | 2 +- src/bwamaptools/bwamaptools.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bwamaptools/Dockerfile b/src/bwamaptools/Dockerfile index 0d4327e7..c5192d61 100644 --- a/src/bwamaptools/Dockerfile +++ b/src/bwamaptools/Dockerfile @@ -1,4 +1,4 @@ -FROM vibsinglecellnf/samtools:1.12 +FROM vibsinglecellnf/samtools:0.2-1.12 ENV DEBIAN_FRONTEND=noninteractive diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index 1bd0d2cb..fad4f744 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,7 +1,7 @@ params { tools { bwamaptools { - container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng' + container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng2' } } } From 42038545af193e884d7f0fafe054da291422b734 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 17 Jun 2021 22:33:11 +0200 Subject: [PATCH 092/202] Updates for singlescelltoolkit - Docker image with new zlibng 2.0.4 - Additional fixes and include saturation script --- src/singlecelltoolkit/Dockerfile | 2 +- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index 7aed81ab..4010cc87 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -1,4 +1,4 @@ -FROM vibsinglecellnf/samtools:1.12 +FROM vibsinglecellnf/samtools:0.2-1.12 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index f8c50f19..cf61a5f5 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-06-07-959e326' + container = 'vibsinglecellnf/singlecelltoolkit:2021-06-17-bcf4653' barcode_correction { whitelist { standard = '' From b9b9d61f04a9544f996f3f0122a8dcce12119824 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 09:38:07 +0200 Subject: [PATCH 093/202] Fix incorrect parameter passed to min_norm in compute_qc_stats --- src/pycistopic/processes/compute_qc_stats.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index e020b916..da5a00f5 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -30,7 +30,7 @@ process PYCISTOPIC__COMPUTE_QC_STATS { --tss_window ${processParams.tss_window} \ --tss_minimum_signal_window ${processParams.tss_minimum_signal_window} \ --tss_rolling_window ${processParams.tss_rolling_window} \ - --min_norm ${processParams.tss_rolling_window} \ + --min_norm ${processParams.min_norm} \ --threads ${task.cpus} \ --biomart_annot_pkl ${biomart_annot} \ --output_metadata_pkl project_metadata.pickle \ From d29a3ba5331fc0b0f86f5754e93528c2531376af Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 09:42:18 +0200 Subject: [PATCH 094/202] Fix argparse parameter type in compute_qc_stats.py --- src/pycistopic/bin/compute_qc_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index 965e8e29..61871e67 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -57,7 +57,7 @@ ) parser.add_argument( "--min_norm", - type=int, + type=float, required=True, default=0.1, help='Minimum normalization score. If the average minimum signal value is below this value, this number is used to normalize the TSS signal. This approach penalizes cells with fewer reads.' From 952c06b315ff459d07e5719905139629750e9a3e Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 10:41:55 +0200 Subject: [PATCH 095/202] Revise publishing steps for atac QC --- src/pycistopic/processes/call_cells.nf | 8 ++++++-- src/pycistopic/processes/compute_qc_stats.nf | 2 +- workflows/atac/qc_filtering.nf | 15 ++++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/pycistopic/processes/call_cells.nf b/src/pycistopic/processes/call_cells.nf index 001d0530..1cbf8d55 100644 --- a/src/pycistopic/processes/call_cells.nf +++ b/src/pycistopic/processes/call_cells.nf @@ -49,7 +49,9 @@ process PYCISTOPIC__CALL_CELLS { process PYCISTOPIC__QC_REPORT { container toolParams.container - publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode + publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode, pattern: '*ipynb' + publishDir "${params.global.outdir}/data/pycistopic/qc/", mode: params.utils.publish.mode, pattern: 'selected_barcodes' + publishDir "${params.global.outdir}/data/pycistopic/qc/", mode: params.utils.publish.mode, pattern: 'selected_barcodes_nFrag' label 'compute_resources__report' input: @@ -60,7 +62,9 @@ process PYCISTOPIC__QC_REPORT { val(reportTitle) output: - path("${reportTitle}.ipynb") + tuple path("${reportTitle}.ipynb"), + path("selected_barcodes/"), + path("selected_barcodes_nFrag/") script: pycistopic_params = toJson(toolParams) diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index da5a00f5..df2745d3 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -7,7 +7,7 @@ processParams = params.tools.pycistopic.compute_qc_stats process PYCISTOPIC__COMPUTE_QC_STATS { - //publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' + publishDir "${params.global.outdir}/data/pycistopic/qc/", mode: params.utils.publish.mode container toolParams.container label 'compute_resources__pycisTopic' diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index e913bf27..7361abaf 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -14,10 +14,9 @@ include { } from './../../src/pycistopic/processes/call_cells.nf' include { - PUBLISH as PUBLISH_PEAKS; - PUBLISH as PUBLISH_METADATA; - PUBLISH as PUBLISH_QC_SAMPLE_METRICS; -} from "../../src/utils/workflows/utils.nf" + SIMPLE_PUBLISH as PUBLISH_PEAKS; + SIMPLE_PUBLISH as PUBLISH_SUMMITS; +} from '../../src/utils/processes/utils.nf' ////////////////////////////////////////////////////// // Define the workflow @@ -38,7 +37,8 @@ workflow ATAC_QC_PREFILTER { biomart = PYCISTOPIC__BIOMART_ANNOT() peaks = PYCISTOPIC__MACS2_CALL_PEAKS(data_split.bam.map { it -> tuple(it[0], it[1][0], it[1][1] ) } ) - PUBLISH_PEAKS(peaks.map { it -> tuple(it[0], it[1]) }, 'peaks', 'narrowPeak', 'macs2', false) + PUBLISH_PEAKS(peaks.map { it -> tuple(it[0], it[1]) }, '.peaks.narrowPeak', 'macs2') + PUBLISH_SUMMITS(peaks.map { it -> tuple(it[0], it[2]) }, '.summits.bed', 'macs2') data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) } .join(peaks) @@ -54,8 +54,9 @@ workflow ATAC_QC_PREFILTER { data_split.fragments.map { it -> it[0] }.collect(), // all sampleIds qc_stats, "pycisTopic_QC_report" - ) | - REPORT_TO_HTML + ) \ + | map { it -> it[0] } + | REPORT_TO_HTML } From cc691fa6a9c40eb865e672153099ed08de1a4184 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 12:04:46 +0200 Subject: [PATCH 096/202] Use project name in output file naming --- src/pycistopic/processes/call_cells.nf | 2 ++ src/pycistopic/processes/compute_qc_stats.nf | 8 ++++---- workflows/atac/qc_filtering.nf | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pycistopic/processes/call_cells.nf b/src/pycistopic/processes/call_cells.nf index 1cbf8d55..b205566d 100644 --- a/src/pycistopic/processes/call_cells.nf +++ b/src/pycistopic/processes/call_cells.nf @@ -73,6 +73,8 @@ process PYCISTOPIC__QC_REPORT { --report-mode \ ${reportTitle}.ipynb \ -p SAMPLES "${sampleId.join(",")}" \ + -p METADATAPKL "${metadata_pickle}" \ + -p PROFDATAPKL "${profile_data_pickle}" \ -p WORKFLOW_PARAMETERS '${pycistopic_params}' \ """ } diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index df2745d3..f5dc1b9d 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -16,8 +16,8 @@ process PYCISTOPIC__COMPUTE_QC_STATS { path(biomart_annot) output: - tuple path("project_metadata.pickle"), - path("project_profile_data.pickle") + tuple path("${params.global.project_name}__metadata.pickle"), + path("${params.global.project_name}__profile_data.pickle") script: """ @@ -33,8 +33,8 @@ process PYCISTOPIC__COMPUTE_QC_STATS { --min_norm ${processParams.min_norm} \ --threads ${task.cpus} \ --biomart_annot_pkl ${biomart_annot} \ - --output_metadata_pkl project_metadata.pickle \ - --output_profile_data_pkl project_profile_data.pickle + --output_metadata_pkl ${params.global.project_name}__metadata.pickle \ + --output_profile_data_pkl ${params.global.project_name}__profile_data.pickle """ } diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 7361abaf..43544a55 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -53,7 +53,7 @@ workflow ATAC_QC_PREFILTER { file(workflow.projectDir + params.tools.pycistopic.call_cells.report_ipynb), data_split.fragments.map { it -> it[0] }.collect(), // all sampleIds qc_stats, - "pycisTopic_QC_report" + params.global.project_name + "pycisTopic_QC_report" ) \ | map { it -> it[0] } | REPORT_TO_HTML From 136636f548704a39301977db15c5f1d653267cc8 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 12:39:38 +0200 Subject: [PATCH 097/202] Update atac_qc notebook - Additional plots (pycisTopic default barcode metric plots) - Fixed naming scheme for variables/report --- .../bin/pycisTopic_qc_report_template.ipynb | 88 ++++++++++++++++++- 1 file changed, 84 insertions(+), 4 deletions(-) diff --git a/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb b/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb index 15cf74d1..3178f697 100644 --- a/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb +++ b/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb @@ -71,7 +71,7 @@ "outputs": [], "source": [ "# Load metadata\n", - "infile = open('project_metadata.pickle', 'rb')\n", + "infile = open(METADATAPKL, 'rb')\n", "metadata_bc_dict = pickle.load(infile)\n", "infile.close()" ] @@ -83,7 +83,7 @@ "outputs": [], "source": [ "# Load profile data\n", - "infile = open('project_profile_data.pickle', 'rb')\n", + "infile = open(PROFDATAPKL, 'rb')\n", "profile_data_dict = pickle.load(infile)\n", "infile.close()" ] @@ -109,7 +109,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Per-sample" + "### Per-sample metrics" ] }, { @@ -132,7 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Combined" + "### Combined sample metrics" ] }, { @@ -380,6 +380,86 @@ " pd.DataFrame(v).to_csv('selected_barcodes/'+k+'.cell_barcodes.txt', sep='\\t', header=False, index=False)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### pycisTopic barcode metrics plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pycisTopic.qc import plot_barcode_metrics\n", + "from pycisTopic.utils import fig2img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def none_or_log10(x):\n", + " return None if x is None else np.log10(x)\n", + "\n", + "for k,v in metadata_bc_dict.items():\n", + "\n", + " FRIP_NR_FRAG_fig = plot_barcode_metrics(metadata_bc_dict[k],\n", + " var_x='Log_unique_nr_frag',\n", + " var_y='FRIP',\n", + " min_x=none_or_log10(filter_frags_lower[k]),\n", + " max_x=none_or_log10(filter_frags_upper[k]),\n", + " min_y=filter_frip_lower[k],\n", + " max_y=filter_frip_upper[k],\n", + " return_cells=False,\n", + " return_fig=True,\n", + " plot=False,\n", + " )\n", + "\n", + " TSS_NR_FRAG_fig = plot_barcode_metrics(metadata_bc_dict[k],\n", + " var_x='Log_unique_nr_frag',\n", + " var_y='TSS_enrichment',\n", + " min_x=none_or_log10(filter_frags_lower[k]),\n", + " max_x=none_or_log10(filter_frags_upper[k]),\n", + " min_y=filter_tss_lower[k],\n", + " max_y=filter_tss_upper[k],\n", + " return_cells=False,\n", + " return_fig=True,\n", + " plot=False\n", + " )\n", + "\n", + " DR_NR_FRAG_fig = plot_barcode_metrics(metadata_bc_dict[k],\n", + " var_x='Log_unique_nr_frag',\n", + " var_y='Dupl_rate',\n", + " min_x=none_or_log10(filter_frags_lower[k]),\n", + " max_x=none_or_log10(filter_frags_upper[k]),\n", + " min_y=filter_dup_rate_lower[k],\n", + " max_y=filter_dup_rate_upper[k],\n", + " return_cells=False,\n", + " return_fig=True,\n", + " plot=False\n", + " )\n", + " fig=plt.figure(figsize=(30,7.5))\n", + " plt.subplot(1, 3, 1)\n", + " img = fig2img(TSS_NR_FRAG_fig)\n", + " plt.imshow(img)\n", + " plt.axis('off')\n", + " plt.subplot(1, 3, 2)\n", + " img = fig2img(FRIP_NR_FRAG_fig)\n", + " plt.imshow(img)\n", + " plt.axis('off')\n", + " plt.subplot(1, 3, 3)\n", + " img = fig2img(DR_NR_FRAG_fig)\n", + " plt.imshow(img)\n", + " plt.axis('off')\n", + " fig.suptitle(k, fontsize=16)\n", + " plt.show()" + ] + }, { "cell_type": "markdown", "metadata": {}, From a3ccfe19704a4f47f099fbececf62dd9e9756ff9 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 12:47:24 +0200 Subject: [PATCH 098/202] Fix ipynb report naming --- workflows/atac/qc_filtering.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 43544a55..2de80bf5 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -53,7 +53,7 @@ workflow ATAC_QC_PREFILTER { file(workflow.projectDir + params.tools.pycistopic.call_cells.report_ipynb), data_split.fragments.map { it -> it[0] }.collect(), // all sampleIds qc_stats, - params.global.project_name + "pycisTopic_QC_report" + params.global.project_name + "__pycisTopic_QC_report" ) \ | map { it -> it[0] } | REPORT_TO_HTML From 1ef921a93679f91999ac194e7e942ad1aa1321e6 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 15:13:42 +0200 Subject: [PATCH 099/202] Add saturation script to atac QC workflow - Split up config for singlecell toolkit to keep saturation params separate - Run saturation script in two methods: with and without a barcode whitelist - Publish steps for saturation output --- conf/atac/preprocess.config | 1 + conf/atac/qc_filtering.config | 2 + .../conf/sctk_mapping.config | 18 +++++++++ .../conf/sctk_saturation.config | 12 ++++++ src/singlecelltoolkit/processes/saturation.nf | 38 +++++++++++++++++++ .../singlecelltoolkit.config | 10 ----- workflows/atac/qc_filtering.nf | 18 +++++++++ 7 files changed, 89 insertions(+), 10 deletions(-) create mode 100644 src/singlecelltoolkit/conf/sctk_mapping.config create mode 100644 src/singlecelltoolkit/conf/sctk_saturation.config create mode 100644 src/singlecelltoolkit/processes/saturation.nf diff --git a/conf/atac/preprocess.config b/conf/atac/preprocess.config index 2acc38b7..e49fc14d 100644 --- a/conf/atac/preprocess.config +++ b/conf/atac/preprocess.config @@ -11,6 +11,7 @@ params { } includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' +includeConfig './../../src/singlecelltoolkit/conf/sctk_mapping.config' includeConfig './../../src/trimgalore/trimgalore.config' includeConfig './../../src/fastp/fastp.config' includeConfig './../../src/bwamaptools/bwamaptools.config' diff --git a/conf/atac/qc_filtering.config b/conf/atac/qc_filtering.config index b9ea8e96..089ee7d4 100644 --- a/conf/atac/qc_filtering.config +++ b/conf/atac/qc_filtering.config @@ -1,2 +1,4 @@ includeConfig './../../src/pycistopic/pycistopic.config' +includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' +includeConfig './../../src/singlecelltoolkit/conf/sctk_saturation.config' diff --git a/src/singlecelltoolkit/conf/sctk_mapping.config b/src/singlecelltoolkit/conf/sctk_mapping.config new file mode 100644 index 00000000..599df060 --- /dev/null +++ b/src/singlecelltoolkit/conf/sctk_mapping.config @@ -0,0 +1,18 @@ +params { + tools { + singlecelltoolkit { + barcode_correction { + whitelist { + standard = '' + multiome = '' + } + } + barcode_10x_scatac_fastqs { + uncorrected_bc_tag = 'CR' + barcode_quality_tag = 'CY' + } + } + } +} + + diff --git a/src/singlecelltoolkit/conf/sctk_saturation.config b/src/singlecelltoolkit/conf/sctk_saturation.config new file mode 100644 index 00000000..d3f13e92 --- /dev/null +++ b/src/singlecelltoolkit/conf/sctk_saturation.config @@ -0,0 +1,12 @@ +params { + tools { + singlecelltoolkit { + saturation { + percentages = '0.3,0.6,0.9' + min_frags_per_cb = 200 + subsamplings = 10 + } + } + } +} + diff --git a/src/singlecelltoolkit/processes/saturation.nf b/src/singlecelltoolkit/processes/saturation.nf new file mode 100644 index 00000000..044e8088 --- /dev/null +++ b/src/singlecelltoolkit/processes/saturation.nf @@ -0,0 +1,38 @@ +nextflow.enable.dsl=2 + +//binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" + +toolParams = params.tools.singlecelltoolkit + +process SCTK__SATURATION { + + container toolParams.container + label 'compute_resources__default' + + input: + tuple val(sampleId), + path(fragments), + path(fragments_index) + file(bc_whitelists) + val(optional) + + output: + tuple val(sampleId), + path("${sampleId}.sampling_stats.tsv"), + path("${sampleId}.saturation.png") + + script: + def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + //processParams = sampleParams.local + def bc_wl_param = optional == 'RUN' ? '-w selected_barcodes/' + sampleId + '.cell_barcodes.txt' : '' + """ + calculate_saturation_from_fragments.py \ + -i ${fragments} \ + -o ${sampleId} \ + -p ${toolParams.saturation.percentages} \ + -m ${toolParams.saturation.min_frags_per_cb} \ + -s ${toolParams.saturation.subsamplings} \ + ${bc_wl_param} + """ +} + diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index f8c50f19..30836ed9 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -2,16 +2,6 @@ params { tools { singlecelltoolkit { container = 'vibsinglecellnf/singlecelltoolkit:2021-06-07-959e326' - barcode_correction { - whitelist { - standard = '' - multiome = '' - } - } - barcode_10x_scatac_fastqs { - uncorrected_bc_tag = 'CR' - barcode_quality_tag = 'CY' - } } } } diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 2de80bf5..202a0b3a 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -8,6 +8,10 @@ include { SC__ARCHR__CELL_CALLING; } from './../../src/archr/processes/cell_call include { PYCISTOPIC__BIOMART_ANNOT; } from './../../src/pycistopic/processes/biomart_annot.nf' include { PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' include { PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' +include { + SCTK__SATURATION; + SCTK__SATURATION as SCTK__SATURATION_BC_WL; +} from './../../src/singlecelltoolkit/processes/saturation.nf' include { PYCISTOPIC__QC_REPORT; REPORT_TO_HTML; @@ -16,6 +20,10 @@ include { include { SIMPLE_PUBLISH as PUBLISH_PEAKS; SIMPLE_PUBLISH as PUBLISH_SUMMITS; + SIMPLE_PUBLISH as PUBLISH_SATURATION_TSV; + SIMPLE_PUBLISH as PUBLISH_SATURATION_PNG; + SIMPLE_PUBLISH as PUBLISH_SATURATION_BC_WL_TSV; + SIMPLE_PUBLISH as PUBLISH_SATURATION_BC_WL_PNG; } from '../../src/utils/processes/utils.nf' ////////////////////////////////////////////////////// @@ -58,6 +66,16 @@ workflow ATAC_QC_PREFILTER { | map { it -> it[0] } | REPORT_TO_HTML + /* saturation */ + SCTK__SATURATION(data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) }, '', '') + SCTK__SATURATION_BC_WL(data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) }, + PYCISTOPIC__QC_REPORT.out, 'RUN') + /* publish saturation outputs */ + PUBLISH_SATURATION_TSV(SCTK__SATURATION.out.map { it -> tuple(it[0], it[1]) }, '.sampling_stats.tsv', 'singlecelltoolkit/saturation') + PUBLISH_SATURATION_PNG(SCTK__SATURATION.out.map { it -> tuple(it[0], it[2]) }, '.saturation.png', 'singlecelltoolkit/saturation') + // + PUBLISH_SATURATION_BC_WL_TSV(SCTK__SATURATION_BC_WL.out.map { it -> tuple(it[0], it[1]) }, '.sampling_stats.tsv', 'singlecelltoolkit/saturation_bc_wl') + PUBLISH_SATURATION_BC_WL_PNG(SCTK__SATURATION_BC_WL.out.map { it -> tuple(it[0], it[2]) }, '.saturation.png', 'singlecelltoolkit/saturation_bc_wl') } From 47634737d9fef68ce161bebc8bd14f55a9795d2e Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 18 Jun 2021 15:47:28 +0200 Subject: [PATCH 100/202] Update sctk docker image --- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 30836ed9..0447facd 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-06-07-959e326' + container = 'vibsinglecellnf/singlecelltoolkit:2021-06-17-bcf4653' } } } From 859916297a280c800f2d6c1ce9062f186b9fe61f Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Sun, 20 Jun 2021 13:59:12 +0200 Subject: [PATCH 101/202] Rename bap inputs to correctly identify bam index - bap require the bam index to end with '.bam.bai', while Picard/GATK produces .bam and .bai files. --- src/bap/processes/barcode_multiplet.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bap/processes/barcode_multiplet.nf b/src/bap/processes/barcode_multiplet.nf index ec149efe..ee8d4463 100644 --- a/src/bap/processes/barcode_multiplet.nf +++ b/src/bap/processes/barcode_multiplet.nf @@ -12,8 +12,8 @@ process BAP__BARCODE_MULTIPLET_PIPELINE { input: tuple val(sampleId), - path(bam), - path(bai) + path("input.bam"), + path("input.bam.bai") output: tuple val(sampleId), @@ -28,7 +28,7 @@ process BAP__BARCODE_MULTIPLET_PIPELINE { processParams = sampleParams.local """ bap2 bam \ - --input ${bam} \ + --input input.bam \ --output ${sampleId} \ --name ${sampleId} \ --ncores ${task.cpus} \ From c292f7d6df0d395d08c55f64e32ab1c9a84b72e9 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 21 Jun 2021 14:27:49 +0200 Subject: [PATCH 102/202] Update regex to select chromosomes for fragments file - Major change: now exclude chrM by default - Selection is now compatible with other species (e.g. fly) --- src/sinto/sinto.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sinto/sinto.config b/src/sinto/sinto.config index f4ca0302..7057ef5b 100644 --- a/src/sinto/sinto.config +++ b/src/sinto/sinto.config @@ -6,7 +6,7 @@ params { min_mapq = 30 barcodetag = 'CB' barcode_regex = '' - use_chrom = '"(?i)^chr"' + use_chrom = '"^(chr|)([0-9]{1,2}|[XY]|[23][LR])$"' min_distance = 10 max_distance = 5000 chunksize = 5000000 From d5bc8c42bfc9613745213233ba77f924351cae13 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 21 Jun 2021 14:33:10 +0200 Subject: [PATCH 103/202] ATAC_QC: Take cellranger path as input - Extract bam / fragments, compatible with Cell Ranger ATAC+ARC formats --- workflows/atac/qc_filtering.nf | 72 ++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 12 deletions(-) diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 202a0b3a..56b332eb 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -26,9 +26,43 @@ include { SIMPLE_PUBLISH as PUBLISH_SATURATION_BC_WL_PNG; } from '../../src/utils/processes/utils.nf' + ////////////////////////////////////////////////////// // Define the workflow + +workflow cellranger_output_to_bam_fragments { + /* + Cell Ranger ATAC:: + possorted_bam.bam, fragments.tsv.gz + Cell Ranger ARC:: + atac_possorted_bam.bam, atac_fragments.tsv.gz + */ + + take: + data // standard data channel [ sampleId, path, type, format] + + main: + + bam = data.map{ it -> tuple(it[0], [ + file(it[1]+"/*possorted*bam.bam")[0], + file(it[1]+"/*possorted*bam.bam.bai")[0], + ]) } + fragments = data.map{ it -> tuple(it[0], [ + *file(it[1]+"/*fragments.tsv.gz"), + *file(it[1]+"/*fragments.tsv.gz.tbi"), + ]) } + + if(!params.containsKey('quiet')) bam.view() + if(!params.containsKey('quiet')) fragments.view() + + emit: + bam + fragments + +} + + workflow ATAC_QC_PREFILTER { take: @@ -37,29 +71,42 @@ workflow ATAC_QC_PREFILTER { main: data.branch { - fragments: it[2] == 'fragments' - bam: it[2] == 'bam' + fragments: it[2] == 'fragments' + bam: it[2] == 'bam' + cellranger: it[2] == '10x_atac_cellranger_mex_outs' } .set{ data_split } + // get cellranger data & merge + data_split.cellranger \ + | cellranger_output_to_bam_fragments + | set { data_cr } + + bam = data_split.bam.mix(data_cr.bam) + fragments = data_split.fragments.mix(data_cr.fragments) + + biomart = PYCISTOPIC__BIOMART_ANNOT() - peaks = PYCISTOPIC__MACS2_CALL_PEAKS(data_split.bam.map { it -> tuple(it[0], it[1][0], it[1][1] ) } ) + peaks = PYCISTOPIC__MACS2_CALL_PEAKS(bam.map { it -> tuple(it[0], it[1][0], it[1][1] ) } ) PUBLISH_PEAKS(peaks.map { it -> tuple(it[0], it[1]) }, '.peaks.narrowPeak', 'macs2') PUBLISH_SUMMITS(peaks.map { it -> tuple(it[0], it[2]) }, '.summits.bed', 'macs2') - data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) } - .join(peaks) - //.map { it -> [ tuple(*it[0..1], it[3]) ] } - .map { it -> ["${it[0]},${it[1]},${it[3]}"] } - .collect() - .set { fragpeaks } + /* pycisTopic qc: pass every fragment/peak file into a single process + together. These will be formatted as a string "sampleId,fragments,peak", + which is parsed in the python script. + */ + fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) } // [sampleId, fragments, fragments.tbi] + .join(peaks) // combine with peaks for each sample + .map { it -> ["${it[0]},${it[1]},${it[3]}"] } // join as string + .collect() // collapse to a single channel element + .set { fragpeaks } qc_stats = PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, biomart) PYCISTOPIC__QC_REPORT( file(workflow.projectDir + params.tools.pycistopic.call_cells.report_ipynb), - data_split.fragments.map { it -> it[0] }.collect(), // all sampleIds + fragments.map { it -> it[0] }.collect(), // all sampleIds qc_stats, params.global.project_name + "__pycisTopic_QC_report" ) \ @@ -67,8 +114,8 @@ workflow ATAC_QC_PREFILTER { | REPORT_TO_HTML /* saturation */ - SCTK__SATURATION(data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) }, '', '') - SCTK__SATURATION_BC_WL(data_split.fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) }, + SCTK__SATURATION(fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) }, '', '') + SCTK__SATURATION_BC_WL(fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) }, PYCISTOPIC__QC_REPORT.out, 'RUN') /* publish saturation outputs */ PUBLISH_SATURATION_TSV(SCTK__SATURATION.out.map { it -> tuple(it[0], it[1]) }, '.sampling_stats.tsv', 'singlecelltoolkit/saturation') @@ -76,6 +123,7 @@ workflow ATAC_QC_PREFILTER { // PUBLISH_SATURATION_BC_WL_TSV(SCTK__SATURATION_BC_WL.out.map { it -> tuple(it[0], it[1]) }, '.sampling_stats.tsv', 'singlecelltoolkit/saturation_bc_wl') PUBLISH_SATURATION_BC_WL_PNG(SCTK__SATURATION_BC_WL.out.map { it -> tuple(it[0], it[2]) }, '.saturation.png', 'singlecelltoolkit/saturation_bc_wl') + } From 2402cd41a070306c11056c9ae993ca9a225a9331 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 21 Jun 2021 14:34:39 +0200 Subject: [PATCH 104/202] Long queue for saturation script --- src/singlecelltoolkit/processes/saturation.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/singlecelltoolkit/processes/saturation.nf b/src/singlecelltoolkit/processes/saturation.nf index 044e8088..ff7abd7b 100644 --- a/src/singlecelltoolkit/processes/saturation.nf +++ b/src/singlecelltoolkit/processes/saturation.nf @@ -7,7 +7,7 @@ toolParams = params.tools.singlecelltoolkit process SCTK__SATURATION { container toolParams.container - label 'compute_resources__default' + label 'compute_resources__default','compute_resources__24hqueue' input: tuple val(sampleId), From 2c00383c262aa1b8b5b770672113e91d3474c43d Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 24 Jun 2021 16:05:40 +0200 Subject: [PATCH 105/202] Properly stage input files for compute_qc_stats - collect() fragments and peaks files into a single channel to stage them in the working directory - Shorten the input tuple to only have the base filenames so that they are read from the current working directory --- src/pycistopic/bin/compute_qc_stats.py | 2 +- src/pycistopic/processes/compute_qc_stats.nf | 2 ++ workflows/atac/qc_filtering.nf | 10 +++++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index 61871e67..ef7559f3 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -18,7 +18,7 @@ required=True, nargs='+', action='append', - help='Input files in the form of [SampleId, path_to_fragments, path_to_peaks]. Multiple inputs are possible.' + help='Input files in the form of "SampleId,fragments_filename,peaks_filename". Multiple inputs are possible.' ) parser.add_argument( "--n_frag", diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index f5dc1b9d..a83386f5 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -14,6 +14,8 @@ process PYCISTOPIC__COMPUTE_QC_STATS { input: val(input) path(biomart_annot) + path(fragments) + path(peaks) output: tuple path("${params.global.project_name}__metadata.pickle"), diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index 56b332eb..bdea9ef2 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -96,13 +96,17 @@ workflow ATAC_QC_PREFILTER { together. These will be formatted as a string "sampleId,fragments,peak", which is parsed in the python script. */ - fragments.map { it -> tuple(it[0], it[1][0], it[1][1] ) } // [sampleId, fragments, fragments.tbi] - .join(peaks) // combine with peaks for each sample + fragments.map { it -> tuple(it[0], it[1][0].getName(), it[1][1].getName() ) } // [sampleId, fragments, fragments.tbi] + .join(peaks.map{ it -> tuple(it[0], it[1].getName()) }) // combine with peaks for each sample .map { it -> ["${it[0]},${it[1]},${it[3]}"] } // join as string .collect() // collapse to a single channel element .set { fragpeaks } - qc_stats = PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, biomart) + qc_stats = PYCISTOPIC__COMPUTE_QC_STATS(fragpeaks, + biomart, + fragments.map { it -> it[1][0] }.collect(), + peaks.map { it -> it[1]}.collect() + ) PYCISTOPIC__QC_REPORT( file(workflow.projectDir + params.tools.pycistopic.call_cells.report_ipynb), From e7b531010d7d5baae496bd19cd7d8d505d843414 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 24 Jun 2021 16:16:42 +0200 Subject: [PATCH 106/202] Add try/except to catch /tmp/ray errors and give suggestions how to avoid --- src/pycistopic/bin/compute_qc_stats.py | 41 +++++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/pycistopic/bin/compute_qc_stats.py b/src/pycistopic/bin/compute_qc_stats.py index ef7559f3..3ccd8a8c 100755 --- a/src/pycistopic/bin/compute_qc_stats.py +++ b/src/pycistopic/bin/compute_qc_stats.py @@ -101,24 +101,29 @@ -metadata_bc_dict, profile_data_dict = compute_qc_stats( - fragments_dict=fragments_dict, - tss_annotation=annot, - stats=['barcode_rank_plot', 'duplicate_rate', 'insert_size_distribution', 'profile_tss', 'frip'], - label_list=None, - path_to_regions=path_to_regions, - n_cpu=args.threads, - valid_bc=None, - n_frag=args.n_frag, - n_bc=None, - tss_flank_window=args.tss_flank_window, - tss_window=args.tss_window, - tss_minimum_signal_window=args.tss_minimum_signal_window, - tss_rolling_window=args.tss_rolling_window, - min_norm=args.min_norm, - remove_duplicates = True, - #_temp_dir= - ) +try: + metadata_bc_dict, profile_data_dict = compute_qc_stats( + fragments_dict=fragments_dict, + tss_annotation=annot, + stats=['barcode_rank_plot', 'duplicate_rate', 'insert_size_distribution', 'profile_tss', 'frip'], + label_list=None, + path_to_regions=path_to_regions, + n_cpu=args.threads, + valid_bc=None, + n_frag=args.n_frag, + n_bc=None, + tss_flank_window=args.tss_flank_window, + tss_window=args.tss_window, + tss_minimum_signal_window=args.tss_minimum_signal_window, + tss_rolling_window=args.tss_rolling_window, + min_norm=args.min_norm, + remove_duplicates = True, + #_temp_dir= + ) +except FileExistsError as e: + print(e) + print("For errors with /tmp/ray, use an alternate temporary directory and map this to '/tmp' within the container. For example in Singularity: '-B /alt/tmp/path:/tmp', or with Docker: '-v /alt/tmp/path:/tmp'. Set these mappings in the Singularity/Docker 'runOptions' parameter within your config file.") + exit(1) ## load bap results to use for duplicate rate (if we are using bap output): From 9d327ad0fa84d0fa25402881af0b1f1557ad10a8 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 28 Jun 2021 09:55:35 +0200 Subject: [PATCH 107/202] Update sctk to include new saturation script - New docker image - Updated process parameters, limit max polars threads to 6 - Enable sample-specific parameters --- src/singlecelltoolkit/conf/sctk_saturation.config | 2 +- src/singlecelltoolkit/processes/saturation.nf | 9 +++++++-- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/singlecelltoolkit/conf/sctk_saturation.config b/src/singlecelltoolkit/conf/sctk_saturation.config index d3f13e92..de26c3da 100644 --- a/src/singlecelltoolkit/conf/sctk_saturation.config +++ b/src/singlecelltoolkit/conf/sctk_saturation.config @@ -3,8 +3,8 @@ params { singlecelltoolkit { saturation { percentages = '0.3,0.6,0.9' + sampling_fractions = '0.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0' min_frags_per_cb = 200 - subsamplings = 10 } } } diff --git a/src/singlecelltoolkit/processes/saturation.nf b/src/singlecelltoolkit/processes/saturation.nf index ff7abd7b..f249074c 100644 --- a/src/singlecelltoolkit/processes/saturation.nf +++ b/src/singlecelltoolkit/processes/saturation.nf @@ -23,15 +23,20 @@ process SCTK__SATURATION { script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams) - //processParams = sampleParams.local + processParams = sampleParams.local def bc_wl_param = optional == 'RUN' ? '-w selected_barcodes/' + sampleId + '.cell_barcodes.txt' : '' + def polars_max_threads = (task.cpus > 6) ? 6 : task.cpus """ + # Max threads polars is allowed to use (else will uses all cores). + export POLARS_MAX_THREADS=${polars_max_threads}; + # Max threads pyarrow is allowed to use (else will uses all cores) (used to read the fragments file in the beginning). + export OMP_NUM_THREADS=${polars_max_threads}; calculate_saturation_from_fragments.py \ -i ${fragments} \ -o ${sampleId} \ -p ${toolParams.saturation.percentages} \ -m ${toolParams.saturation.min_frags_per_cb} \ - -s ${toolParams.saturation.subsamplings} \ + -s ${toolParams.saturation.sampling_fractions} \ ${bc_wl_param} """ } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 0447facd..9ad57650 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-06-17-bcf4653' + container = 'vibsinglecellnf/singlecelltoolkit:2021-06-24-9dac781' } } } From 903e173c111f54551802555e2bd8aaa6b6baadff Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 29 Jun 2021 15:52:17 +0200 Subject: [PATCH 108/202] Documentation updates [scATAC-seq] - Add qc documentation - Minor updates to preprocessing docs --- docs/index.rst | 1 + docs/scatac-seq.rst | 21 ++++- docs/scatac-seq_qc.rst | 185 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 docs/scatac-seq_qc.rst diff --git a/docs/index.rst b/docs/index.rst index 34efaead..3bdc205a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,7 @@ :caption: scATAC-seq scatac-seq + scatac-seq_qc .. toctree:: :maxdepth: 2 diff --git a/docs/scatac-seq.rst b/docs/scatac-seq.rst index a4580791..57721528 100644 --- a/docs/scatac-seq.rst +++ b/docs/scatac-seq.rst @@ -4,6 +4,10 @@ scATAC-seq Preprocessing This pipeline takes fastq files from paired end single cell ATAC-seq, and applies preprocessing steps to align the reads to a reference genome, and produce a bam file and scATAC-seq fragments file. +This workflow is currently available in the ``develop_atac`` branch (use the ``-r develop_atac`` option when running ``nextflow pull`` and ``nextflow run``). + +---- + Pipeline Steps ************** @@ -275,6 +279,8 @@ And produces paired fastq files with the barcode integrated into the read name ( ##A#################################### +---- + Running the workflow ******************** @@ -345,8 +351,17 @@ Temporary directory mapping ___________________________ For large BAM files, the system default temp location may become full. -A workaround is to map ``/tmp`` to an alternate path using the volume mount options in Docker or Singularity. For example in the container engine options: - - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/tmp:/path/to/tmp'`` - - Docker run options: ``runOptions = '-i -v /data:/data -v /tmp:/path/to/tmp'`` + A workaround is to include a volume mapping to the alternate ``/tmp`` ``-B /alternate/path/to/tmp:/tmp`` using the volume mount options in Docker or Singularity. + For example in the container engine options: + - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/alternate/path/to/tmp:/tmp'`` + - Docker run options: ``runOptions = '-i -v /data:/data -v /alternate/path/to/tmp:/tmp'`` + +Alternate Nextflow work location +________________________________ + +Direct the Nextflow work directory to an alternate path (e.g. a scratch drive) using the ``NXF_WORK`` environmental variable:: + nwork=/path/to/scratch/example_project + mkdir $nwork + export NXF_WORK=$nwork diff --git a/docs/scatac-seq_qc.rst b/docs/scatac-seq_qc.rst new file mode 100644 index 00000000..88b72890 --- /dev/null +++ b/docs/scatac-seq_qc.rst @@ -0,0 +1,185 @@ +scATAC-seq QC and Cell Calling +============================== + +This workflow uses the Python implementation of `cisTopic `_ (pycisTopic) to perform quality control and cell calling. +The inputs here are a fragments and bam file for each sample. + +This workflow is currently available in the ``develop_atac`` branch (use the ``-r develop_atac`` option when running ``nextflow pull`` and ``nextflow run``). + +---- + +Optional Steps +************** + +1. Direct the Nextflow work directory to an alternate path (e.g. a scratch drive) using the ``NXF_WORK`` environmental variable:: + + nwork=/path/to/scratch/example_project + mkdir $nwork + export NXF_WORK=$nwork + + +2. Important for pycisTopic Ray issues: the system default temp location may become full. + A workaround is to include a volume mapping to the alternate ``/tmp`` ``-B /alternate/path/to/tmp:/tmp`` using the volume mount options in Docker or Singularity. + For example in the container engine options: + - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/alternate/path/to/tmp:/tmp'`` + - Docker run options: ``runOptions = '-i -v /data:/data -v /alternate/path/to/tmp:/tmp'`` + + +3. Use the ``--quiet`` flag with ``nextflow run`` to suppress the printing of each file that is detected by the pipeline. + +---- + +Configuration +************* + +For each sample, this pipeline take a bam and a fragments file. +These can be specified separately, or from a Cell Ranger ATAC/ARC ``outs/`` path. + +Input with independent bam and fragments files +---------------------------------------------- + +Use the profiles ``bam`` and ``fragments``:: + + nextflow config vib-singlecell-nf/vsn-pipelines/main_atac.nf \ + -profile atac_qc_filtering,bam,fragments,vsc,pycistopic_hg38 \ + > atac_qc.config + +Preset profiles are available for human (``pycistopic_hg38``), mouse (``pycistopic_mm10``), and fly (``pycistopic_dmel``). +Or, these profiles can be omitted and set manually in the config (biomart, macs2). + + +Input data (bam and fragments files) are specified in the data section:: + + data { + fragments { + file_paths = '/staging/leuven/stg_00002/lcb/cflerin/analysis/asap/20210527_hydrop-atac_asabr/atac_preprocess/out_run1/data/fragments/ASA__*tsv.gz' + suffix = '.sinto.fragments.tsv.gz' + index_extension = '.tbi' + } + bam { + file_paths = '/staging/leuven/stg_00002/lcb/cflerin/analysis/asap/20210527_hydrop-atac_asabr/atac_preprocess/out_run1/data/bam/ASA*bam' + suffix = '.bwa.out.possorted.bam' + index_extension = '.bai' + } + } + + +Multiple files can be specified with ``*`` in ``file_paths`` or by separating the paths with a comma. + +.. warning:: + + The ``suffix`` for both bam and fragments will be removed from the filename to get sample IDs. + The sample names obtained must match between bam and fragments for the files to be paired properly in the workflow. + + +Input with Cell Ranger ATAC data +-------------------------------- + +Use the ``tenx_atac`` profile:: + + nextflow config vib-singlecell-nf/vsn-pipelines/main_atac.nf \ + -profile atac_qc_filtering,tenx_atac,vsc,pycistopic_hg38 \ + > atac_qc.config + +Input data (the Cell Ranger ``outs/`` path) are specified in the data section:: + + data { + tenx_atac { + cellranger_mex = '/data/cellranger_atac_2.0/*/outs,/data/processed/cellranger_arc_2.0.0/*/outs' + } + } + +Multiple files can be specified with ``*`` in ``tenx_atac`` or by separating the paths with a comma. + +---- + +Execution +********* + +After configuring, the workflow can be run with: + +.. code:: bash + + nextflow -C atac_qc.config run \ + vib-singlecell-nf/vsn-pipelines/main_atac.nf \ + -entry atac_qc_filtering --quiet -resume + +After completing, view the report in ``out/notebooks/__pycisTopic_QC_report.html``. To change the filtering settings, use the ``params.tools.pycistopic.call_cells`` section. + +Adjusting the filter settings +----------------------------- + +In the pycisTopic parameters, filter settings can be applied in this section:: + + pycistopic { + call_cells { + report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb' + use_density_coloring_on_scatterplot = true + use_detailed_title_on_scatterplot = true + filter_frags_lower = '1000' + filter_frags_upper = '' + filter_tss_lower = '8' + filter_tss_upper = '' + filter_frip_lower = '' + filter_frip_upper = '' + filter_dup_rate_lower = '' + filter_dup_rate_upper = '' + } + } + +If a setting is empty (``''``), this filter will not be applied. +If set to a single value (i.e. ``filter_frags_lower=1000``), this will apply this filter value to all samples. +To use sample-specific filters, this can be written as:: + + filter_frags_lower = [ + 'default': 1000, + 'Sample_1': 1500, + 'Sample_2': 2000, + ] + +The ``default`` setting (optional) is applied to all samples not listed in array. +If this default setting is missing, no filter will be applied to samples not listed in the array (all barcodes kept). + +After setting the filters, the pipeline can be re-run to apply the new filters (use ``-resume``). + +The additional settings control the output of the scatter plots in the report: +* ``use_density_coloring_on_scatterplot``: Slower when turned on; it can be helpful to set this to ``false`` until the proper thresholds are determined. +* ``use_detailed_title_on_scatterplot``: Adds the cell count and median values after filtering to the title of each plot. + +---- + +Output +****** + +An example output tree is shown here. + +.. code:: bash + + out/ + ├── data + │   ├── macs2 + │   │   ├── sample_1.peaks.narrowPeak + │   │   ├── sample_1.summits.bed + │   │   ├── sample_2.peaks.narrowPeak + │   │   └── sample_2.summits.bed + │   └── pycistopic + │   └── qc + │   ├── benchmark_library_downsampled__metadata.pickle + │   ├── benchmark_library_downsampled__profile_data.pickle + │   ├── selected_barcodes + │   │   ├── sample_1.cell_barcodes.txt + │   │   └── sample_2.cell_barcodes.txt + │   └── selected_barcodes_nFrag + │   ├── sample_1.barcodes_nFrag_thr.txt + │   └── sample_2.barcodes_nFrag_thr.txt + └── notebooks + ├── example_project__pycisTopic_QC_report.html + └── example_project__pycisTopic_QC_report.ipynb + + +* ``macs2``: contains the narrowPeak and bed file for each sample. +* ``pycistopic``: + * ``qc``: contains Python objects (in pickle format) for the metadata and profile data computed by pycisTopic. + * ``selected_barcodes``: contains a text file with selected cell barcodes (one per line) based on the thresholds set in the config file. + * ``selected_barcodes_nFrag``: contains a text file with barcodes (one per line) that have unique fragment counts greater than the ``params.tools.pycistopic.compute_qc_stats.n_frag`` setting in the pycisTopic parameters. + From 2bc0d9fd00325334fc9b4e7b54fe94e51cb459f9 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 30 Jun 2021 12:02:57 +0200 Subject: [PATCH 109/202] Add hydrop entry to sctk whitelist params --- src/singlecelltoolkit/conf/sctk_mapping.config | 1 + 1 file changed, 1 insertion(+) diff --git a/src/singlecelltoolkit/conf/sctk_mapping.config b/src/singlecelltoolkit/conf/sctk_mapping.config index 599df060..a87bffc8 100644 --- a/src/singlecelltoolkit/conf/sctk_mapping.config +++ b/src/singlecelltoolkit/conf/sctk_mapping.config @@ -5,6 +5,7 @@ params { whitelist { standard = '' multiome = '' + hydrop = '' } } barcode_10x_scatac_fastqs { From 6d9afb69e0073964f79681b5ab8d0b095d94fcc5 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 1 Jul 2021 12:03:22 +0200 Subject: [PATCH 110/202] Update singlecelltoolkit - Fix detectino of gzip whitelists - Update docker container --- src/singlecelltoolkit/Dockerfile | 2 ++ src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index 4010cc87..7851e6a4 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -32,11 +32,13 @@ RUN pip install --no-cache-dir --upgrade pip wheel && \ uncertainties \ typing \ pathlib \ + polars>=0.8.7 \ matplotlib \ numpy # install seq (https://github.com/seq-lang/seq/): +#0.9.11 ENV SEQ_VERSION=0.10.1 RUN mkdir -p /opt/seq && \ wget https://github.com/seq-lang/seq/releases/download/v${SEQ_VERSION}/seq-linux-x86_64.tar.gz && \ diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 9ad57650..73ef1318 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-06-24-9dac781' + container = 'vibsinglecellnf/singlecelltoolkit:2021-06-29-a0ad3d6' } } } From 5c26db6e067467fe7ee222e5295c1e7108fe4627 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 7 Jul 2021 17:05:46 +0200 Subject: [PATCH 111/202] Closes #348 Split the download and compression into 2 NXF processes: One NXF process is downloading the SRA file and extracting the FASTQ files and another NXF process is fixing (renaming the read sequence names) the FASTQ files and compressing them Final fixed and renamed FASTQ files are put in out/data/sra --- main.nf | 10 ++--- nextflow.config | 1 + src/sratoolkit/processes/downloadFastQ.nf | 14 +++++-- .../processes/fixAndCompressFastQ.nf | 40 +++++++++++++++++++ src/sratoolkit/sratoolkit.config | 5 ++- src/sratoolkit/workflows/downloadFastQ.nf | 30 ++++++++++++++ src/utils/conf/sra_normalize_fastqs.config | 8 ++++ src/utils/processes/sra.nf | 15 ++++--- src/utils/workflows/downloadFromSRA.nf | 8 ++-- 9 files changed, 113 insertions(+), 18 deletions(-) create mode 100644 src/sratoolkit/processes/fixAndCompressFastQ.nf create mode 100644 src/sratoolkit/workflows/downloadFastQ.nf create mode 100644 src/utils/conf/sra_normalize_fastqs.config diff --git a/main.nf b/main.nf index 139a42e2..9c30a3de 100644 --- a/main.nf +++ b/main.nf @@ -1042,10 +1042,10 @@ workflow sra { DOWNLOAD_FROM_SRA( getSRAChannel( params.data.sra ) ) if(params.utils?.publish) { PUBLISH( - DOWNLOAD_FROM_SRA.out, + DOWNLOAD_FROM_SRA.out.transpose(), null, - "fastqs", null, + "sra", false ) } @@ -1059,7 +1059,7 @@ workflow sra_cellranger_bbknn { getChannel as getSRAChannel; } from './src/channels/sra' params(params) include { - DOWNLOAD_FROM_SRA; + SRATOOLKIT__DOWNLOAD_FASTQS; } from './src/utils/workflows/downloadFromSRA' params(params) include { SC__CELLRANGER__PREPARE_FOLDER; @@ -1070,8 +1070,8 @@ workflow sra_cellranger_bbknn { } from './workflows/bbknn' params(params) // Run - DOWNLOAD_FROM_SRA( getSRAChannel( params.data.sra ) ) - SC__CELLRANGER__PREPARE_FOLDER( DOWNLOAD_FROM_SRA.out.groupTuple() ) + SRATOOLKIT__DOWNLOAD_FASTQS( getSRAChannel( params.data.sra ) ) + SC__CELLRANGER__PREPARE_FOLDER( SRATOOLKIT__DOWNLOAD_FASTQS.out.groupTuple() ) SC__CELLRANGER__COUNT( file(params.sc.cellranger.count.transcriptome), SC__CELLRANGER__PREPARE_FOLDER.out diff --git a/nextflow.config b/nextflow.config index cd0d9e09..1ad01120 100644 --- a/nextflow.config +++ b/nextflow.config @@ -294,6 +294,7 @@ profiles { sra { includeConfig 'src/channels/conf/sra.config' includeConfig 'src/utils/conf/sra_metadata.config' + includeConfig 'src/utils/conf/sra_normalize_fastqs.config' includeConfig 'src/sratoolkit/sratoolkit.config' } seurat_rds { diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index 4768e388..538f252f 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -18,7 +18,7 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { tuple val(sraId), val(sampleId) output: - tuple val(sraId), file("${sraId}*.fastq.gz") + tuple val(sraId), file("${sraId}*.fastq") script: if(sampleId == null || sampleId.length() < 1) { @@ -30,9 +30,17 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { echo "SRA file lock found for ${sraId}. Removing file lock..." rm \${SRA_FILE_LOCK} fi + # Fetch SRA file prefetch -v -p 1 ${sraId} - fasterq-dump -S -v -p -e ${task.cpus} -O . ${sraId} - pigz -p ${task.cpus} *.fastq + # Convert SRA file to FASTQ files + fasterq-dump \ + -S \ + -v \ + -p \ + -e ${task.cpus} \ + ${params.sratoolkit?.includeTechnicalReads ? '--include-technical' : ''} \ + -O . \ + ${sraId} """ } diff --git a/src/sratoolkit/processes/fixAndCompressFastQ.nf b/src/sratoolkit/processes/fixAndCompressFastQ.nf new file mode 100644 index 00000000..0dcfd8ec --- /dev/null +++ b/src/sratoolkit/processes/fixAndCompressFastQ.nf @@ -0,0 +1,40 @@ +nextflow.enable.dsl=2 + +if(!params.containsKey("test")) { + binDir = "${workflow.projectDir}/src/sratoolkit/bin/" +} else { + binDir = "" +} + +toolParams = params.sratoolkit + +process FIX_AND_COMPRESS_SRA_FASTQ { + + container toolParams.container + publishDir "${params.global.outdir}/data/raw/fastqs_fixed_and_compressed", mode: 'symlink', overwrite: true + label 'compute_resources__sratoolkit' + maxRetries 0 + + input: + tuple val(sraId), file("${sraId}_*.fastq") + + output: + tuple val(sraId), file("${sraId}_*.fastq.gz") + + script: + """ + # Fetch script to fix SRA FASTQ (fasterq-dump does not have the -F option as fastq-dump do to keep original sequence names). + # Fixing the FASTQ files is required for future pre-processing (e.g.: scATAC-seq pipelines) + # We cannot source the script directly: + # - 1) by default it generates help text to stdout + # - 2) if redirecting the stdout of to the trash i.e. /dev/null, Nextflow will think no files have been generated + curl -fsSL https://raw.githubusercontent.com/aertslab/single_cell_toolkit/master/fix_sra_fastq.sh -o fix_sra_fastq.sh + chmod a+x ./fix_sra_fastq.sh + # Fix the FASTQ files and compress them + export compress_fastq_threads="${task.cpus}" + NUM_FASTQ_FILES=\$(ls ./*.fastq | wc -l) + echo "Fixing and compressing \${NUM_FASTQ_FILES} FASTQ files in parallel with \${compress_fastq_threads} compression threads for each task..." + echo *.fastq | tr ' ' '\n' | xargs -P "\${NUM_FASTQ_FILES}" -n 1 -I {} ./fix_sra_fastq.sh "{}" "{}.gz" pigz + """ + +} diff --git a/src/sratoolkit/sratoolkit.config b/src/sratoolkit/sratoolkit.config index 1ac9b143..486b8365 100644 --- a/src/sratoolkit/sratoolkit.config +++ b/src/sratoolkit/sratoolkit.config @@ -1,6 +1,9 @@ params { sratoolkit { container = 'vibsinglecellnf/sratoolkit:2.9.4-1.1.0' + // --include-technical option (fasterq-dump) + // This option should be set to 'true' if data you're downloading is e.g.: 10x Genomics scATAC-seq + includeTechnicalReads = false } } @@ -8,7 +11,7 @@ params { process { withLabel: 'compute_resources__sratoolkit' { executor = 'local' - cpus = 20 + cpus = 4 memory = '40 GB' time = '24h' maxForks = 1 diff --git a/src/sratoolkit/workflows/downloadFastQ.nf b/src/sratoolkit/workflows/downloadFastQ.nf new file mode 100644 index 00000000..8aa4ce14 --- /dev/null +++ b/src/sratoolkit/workflows/downloadFastQ.nf @@ -0,0 +1,30 @@ +nextflow.enable.dsl=2 + +//////////////////////////////////////////////////////// +// Import sub-workflows/processes from the utils module: +//////////////////////////////////////////////////////// +// Import sub-workflows/processes from the tool module: +include { + DOWNLOAD_FASTQS_FROM_SRA_ACC_ID; +} from "../processes/downloadFastQ" params(params) +include { + FIX_AND_COMPRESS_SRA_FASTQ; +} from "../processes/fixAndCompressFastQ" params(params) + + +workflow SRATOOLKIT__DOWNLOAD_FASTQS { + + take: + // Expects (sraId, sampleId) + data + + main: + out = data | \ + DOWNLOAD_FASTQS_FROM_SRA_ACC_ID | \ + FIX_AND_COMPRESS_SRA_FASTQ + + emit: + // Returns (sraId, *.fastq.gz) + out + +} \ No newline at end of file diff --git a/src/utils/conf/sra_normalize_fastqs.config b/src/utils/conf/sra_normalize_fastqs.config new file mode 100644 index 00000000..63cdbf39 --- /dev/null +++ b/src/utils/conf/sra_normalize_fastqs.config @@ -0,0 +1,8 @@ +params { + utils { + sra_normalize_fastqs { + // Downloading FASTQ from SRA will give FASTQ in the following format SRRXXXXXX_[1-9].fastq. This index minus one will be used to retrieve the FASTQ read suffix from the array of suffixes defined hereunder + fastq_read_suffixes = ["R1","R2"] // ["R1","R2","I1","I2"] would be used for SRR11442498 (this requires params.sratoolkit.includeTechnicalReads = true) + } + } +} diff --git a/src/utils/processes/sra.nf b/src/utils/processes/sra.nf index 2fc2afc5..21020de0 100644 --- a/src/utils/processes/sra.nf +++ b/src/utils/processes/sra.nf @@ -65,13 +65,18 @@ process SRA_TO_METADATA { } -def normalizeSRAFastQ(fastQPath, sampleName) { +def normalizeSRAFastQ(fastQPath, sampleName, fastqReadSuffixes) { /* - * Rename samples SRRXXXXXX_[1|2].fastq.gz to more comprehensive file name ${sampleName}_S1_L001_R[1|2]_001.fastq.gz + * Rename samples SRRXXXXXX_[1-9].fastq.gz to more comprehensive file name ${sampleName}_S1_L001_${fastqReadSuffixes[\1-1]}_001.fastq.gz * Here we follow 10xGenomics file naming convention */ - (full, srrId, readType) = (fastQPath =~ /(SRR[0-9]*)_([1-2]).fastq.gz/)[0] - normalizedFastQName = "${sampleName}_S1_L001_R${readType}_001.fastq.gz" + (full, srrId, readType) = (fastQPath =~ /(SRR[0-9]*)_([1-9]).fastq.gz/)[0] + + if(readType.toInteger()-1 >= fastqReadSuffixes.size()) { + throw new Exception("Read suffix for the current FASTQ file from "+ srrId +"SRA ID with index "+ readType + " cannot be extracted from params.utils.sra_normalize_fastqs.fastq_read_suffixes.") + } + readSuffix = fastqReadSuffixes[readType.toInteger()-1] + normalizedFastQName = "${sampleName}_S1_L001_${readSuffix}_001.fastq.gz" return [fastQPath, normalizedFastQName] } @@ -89,7 +94,7 @@ process NORMALIZE_SRA_FASTQS { script: def normalizedFastqs = fastqs .collect { - fastq -> normalizeSRAFastQ(fastq, sampleId) + fastq -> normalizeSRAFastQ(fastq, sampleId, params.utils.sra_normalize_fastqs.fastq_read_suffixes) } def cmd = '' for(int i = 0; i < normalizedFastqs.size(); i++) diff --git a/src/utils/workflows/downloadFromSRA.nf b/src/utils/workflows/downloadFromSRA.nf index 990e4371..3c73c4c4 100644 --- a/src/utils/workflows/downloadFromSRA.nf +++ b/src/utils/workflows/downloadFromSRA.nf @@ -6,15 +6,15 @@ import java.nio.file.Paths ////////////////////////////////////////////////////// // process imports: +include { + SRATOOLKIT__DOWNLOAD_FASTQS; +} from './../../sratoolkit/workflows/downloadFastQ' params(params) include { GET_SRA_DB; } from './../processes/sra' params(params) include { SRA_TO_METADATA; } from './../processes/sra' params(params) -include { - DOWNLOAD_FASTQS_FROM_SRA_ACC_ID; -} from './../../sratoolkit/processes/downloadFastQ' params(params) include { NORMALIZE_SRA_FASTQS; } from './../processes/sra' params(params) @@ -73,7 +73,7 @@ workflow DOWNLOAD_FROM_SRA { } if(!params.containsKey('quiet')) metadata.view() // Download and compress all the SRA runs defined in the metadata - data = DOWNLOAD_FASTQS_FROM_SRA_ACC_ID( + data = SRATOOLKIT__DOWNLOAD_FASTQS( metadata ).join( metadata From fade105e46d22fa553a537fe07259d9627a552e4 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 7 Jul 2021 17:06:46 +0200 Subject: [PATCH 112/202] Update and fix docs related to sra workflow --- docs/pipelines.rst | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index 81876d85..c3bffc21 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -604,12 +604,10 @@ Now we can run it with the following command: -entry cell_filter -**sra** |sra| -------------- +**sra** +------- -.. |sra| image:: https://github.com/vib-singlecell-nf/vsn-pipelines/workflows/cell_annotate_filter/badge.svg - -Runs the ``sra`` workflow which will download all (or user-defined selected) FASTQ files from a particular SRA project ID and format with properly and humand friendly names. +Runs the ``sra`` workflow which will download all (or user-defined selected) FASTQ files from a particular SRA project and format those with properly and human readable names. First, generate the config : @@ -620,7 +618,11 @@ First, generate the config : -profile sra,singularity \ > nextflow.config -NOTE: If you're a VSC user, you might want to add the ``vsc`` profile. +NOTES: + +- If you're a VSC user, you might want to add the ``vsc`` profile. +- The final output (FASTQ files) will available in ``out/data/sra`` +- If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. Now we can run it with the following command: @@ -629,7 +631,7 @@ Now we can run it with the following command: nextflow -C nextflow.config \ run ~/vib-singlecell-nf/vsn-pipelines \ - -entry cell_filter + -entry sra $ nextflow -C nextflow.config run ~/vib-singlecell-nf/vsn-pipelines -entry sra N E X T F L O W ~ version 20.11.0-edge From 227b5788d7ccb97f0158a728c4df474f5e37b0ce Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 7 Jul 2021 19:25:59 +0200 Subject: [PATCH 113/202] Remove uncompressed FASTQ files after having fixed and compressed them --- src/sratoolkit/processes/fixAndCompressFastQ.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sratoolkit/processes/fixAndCompressFastQ.nf b/src/sratoolkit/processes/fixAndCompressFastQ.nf index 0dcfd8ec..94eab8cb 100644 --- a/src/sratoolkit/processes/fixAndCompressFastQ.nf +++ b/src/sratoolkit/processes/fixAndCompressFastQ.nf @@ -27,7 +27,8 @@ process FIX_AND_COMPRESS_SRA_FASTQ { # Fixing the FASTQ files is required for future pre-processing (e.g.: scATAC-seq pipelines) # We cannot source the script directly: # - 1) by default it generates help text to stdout - # - 2) if redirecting the stdout of to the trash i.e. /dev/null, Nextflow will think no files have been generated + # - 2) if redirecting the stdout to the trash i.e. /dev/null, Nextflow will think no files have been generated + # So we need to save the file before executing the script curl -fsSL https://raw.githubusercontent.com/aertslab/single_cell_toolkit/master/fix_sra_fastq.sh -o fix_sra_fastq.sh chmod a+x ./fix_sra_fastq.sh # Fix the FASTQ files and compress them @@ -35,6 +36,12 @@ process FIX_AND_COMPRESS_SRA_FASTQ { NUM_FASTQ_FILES=\$(ls ./*.fastq | wc -l) echo "Fixing and compressing \${NUM_FASTQ_FILES} FASTQ files in parallel with \${compress_fastq_threads} compression threads for each task..." echo *.fastq | tr ' ' '\n' | xargs -P "\${NUM_FASTQ_FILES}" -n 1 -I {} ./fix_sra_fastq.sh "{}" "{}.gz" pigz + echo "Removing all uncompressed FASTQ files" + for FASTQ in *.fastq; do + echo "Removing uncompressed FASTQ file \${FASTQ}..." + rm "$(readlink -f \${FASTQ})" + done + echo "Done." """ } From b5c7baecb93ea2d36dfec5db79d4a18f75600899 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 7 Jul 2021 19:34:27 +0200 Subject: [PATCH 114/202] [sratoolkit] Add maxSize option to set -X option of prefetch in case files to download exceed 20 Gb (default limit) --- docs/pipelines.rst | 2 +- src/sratoolkit/processes/downloadFastQ.nf | 6 +++++- src/sratoolkit/processes/fixAndCompressFastQ.nf | 2 +- src/sratoolkit/sratoolkit.config | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index c3bffc21..3e430729 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -623,7 +623,7 @@ NOTES: - If you're a VSC user, you might want to add the ``vsc`` profile. - The final output (FASTQ files) will available in ``out/data/sra`` - If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. - +- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.sratoolkit.maxSize`` accordingly. This limit can be removed by setting the parameter to arbitrarily high number (e.g.: 9999999999999). Now we can run it with the following command: diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index 538f252f..cd34e7c5 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -31,7 +31,11 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { rm \${SRA_FILE_LOCK} fi # Fetch SRA file - prefetch -v -p 1 ${sraId} + prefetch \ + -v \ + -p 1 \ + ${params.sratoolkit?.maxSize ? '--max-size '+ params.sratoolkit.maxSize: ''} \ + ${sraId} # Convert SRA file to FASTQ files fasterq-dump \ -S \ diff --git a/src/sratoolkit/processes/fixAndCompressFastQ.nf b/src/sratoolkit/processes/fixAndCompressFastQ.nf index 94eab8cb..33becf72 100644 --- a/src/sratoolkit/processes/fixAndCompressFastQ.nf +++ b/src/sratoolkit/processes/fixAndCompressFastQ.nf @@ -39,7 +39,7 @@ process FIX_AND_COMPRESS_SRA_FASTQ { echo "Removing all uncompressed FASTQ files" for FASTQ in *.fastq; do echo "Removing uncompressed FASTQ file \${FASTQ}..." - rm "$(readlink -f \${FASTQ})" + rm "\$(readlink -f \${FASTQ})" done echo "Done." """ diff --git a/src/sratoolkit/sratoolkit.config b/src/sratoolkit/sratoolkit.config index 486b8365..b2d3e6e4 100644 --- a/src/sratoolkit/sratoolkit.config +++ b/src/sratoolkit/sratoolkit.config @@ -4,6 +4,8 @@ params { // --include-technical option (fasterq-dump) // This option should be set to 'true' if data you're downloading is e.g.: 10x Genomics scATAC-seq includeTechnicalReads = false + // --max-size (prefetch), maximum file size to download in KB (exclusive). Default: 20G + maxSize = 20000000 } } From fbec125d81d590bb55dc796d0f2734e2a51ad340 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 7 Jul 2021 23:36:09 +0200 Subject: [PATCH 115/202] Reuse sra NXF workflow in main.nf --- main.nf | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index 9c30a3de..a140d373 100644 --- a/main.nf +++ b/main.nf @@ -1039,28 +1039,24 @@ workflow sra { } from "./src/utils/workflows/utils" params(params) // Run - DOWNLOAD_FROM_SRA( getSRAChannel( params.data.sra ) ) + out = DOWNLOAD_FROM_SRA( getSRAChannel( params.data.sra ) ) if(params.utils?.publish) { PUBLISH( - DOWNLOAD_FROM_SRA.out.transpose(), + out.transpose(), null, null, "sra", false ) - } + } + emit: + out } workflow sra_cellranger_bbknn { main: - include { - getChannel as getSRAChannel; - } from './src/channels/sra' params(params) - include { - SRATOOLKIT__DOWNLOAD_FASTQS; - } from './src/utils/workflows/downloadFromSRA' params(params) include { SC__CELLRANGER__PREPARE_FOLDER; SC__CELLRANGER__COUNT; @@ -1070,8 +1066,8 @@ workflow sra_cellranger_bbknn { } from './workflows/bbknn' params(params) // Run - SRATOOLKIT__DOWNLOAD_FASTQS( getSRAChannel( params.data.sra ) ) - SC__CELLRANGER__PREPARE_FOLDER( SRATOOLKIT__DOWNLOAD_FASTQS.out.groupTuple() ) + out = sra() + SC__CELLRANGER__PREPARE_FOLDER( out.groupTuple() ) SC__CELLRANGER__COUNT( file(params.sc.cellranger.count.transcriptome), SC__CELLRANGER__PREPARE_FOLDER.out From b0540d30ae9812287ac1ca01e606d71c890df6d0 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 7 Jul 2021 23:42:42 +0200 Subject: [PATCH 116/202] Update docs for sra NXF workflow --- docs/pipelines.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index 3e430729..9ddd5b61 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -615,15 +615,15 @@ First, generate the config : nextflow config \ ~/vib-singlecell-nf/vsn-pipelines \ - -profile sra,singularity \ - > nextflow.config + -profile sra,singularity \ + > nextflow.config NOTES: - If you're a VSC user, you might want to add the ``vsc`` profile. - The final output (FASTQ files) will available in ``out/data/sra`` - If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. -- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.sratoolkit.maxSize`` accordingly. This limit can be removed by setting the parameter to arbitrarily high number (e.g.: 9999999999999). +- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.sratoolkit.maxSize`` accordingly. This limit can be 'removed' by setting the parameter to an arbitrarily high number (e.g.: 9999999999999). Now we can run it with the following command: @@ -631,16 +631,16 @@ Now we can run it with the following command: nextflow -C nextflow.config \ run ~/vib-singlecell-nf/vsn-pipelines \ - -entry sra + -entry sra $ nextflow -C nextflow.config run ~/vib-singlecell-nf/vsn-pipelines -entry sra N E X T F L O W ~ version 20.11.0-edge - Launching `~/vib-singlecell-nf/vsn-pipelines/main.nf` [cranky_kare] - revision: c5e34d476a - executor > local (1) - [c3/4bf7a2] process > sra:DOWNLOAD_FROM_SRA:SRA_TO_METADATA (1) [100%] 1 of 1 _ - [- ] process > sra:DOWNLOAD_FROM_SRA:DOWNLOAD_FASTQS_FROM_SRA_ACC_ID - - [- ] process > sra:DOWNLOAD_FROM_SRA:NORMALIZE_SRA_FASTQS - - [SRR11442507, scATAC_Control_Superior_and_Middle_Temporal_Gyri_1] - [SRR11442506, scATAC_Control_Substantia_Nigra_2] + Launching `~/vib-singlecell-nf/vsn-pipelines/main.nf` [sleepy_goldstine] - revision: 7527661b07 + executor > local (23) + [12/25b9d4] process > sra:DOWNLOAD_FROM_SRA:SRA_TO_METADATA (1) [100%] 1 of 1 _ + [e2/d5a429] process > sra:DOWNLOAD_FROM_SRA:SRATOOLKIT__DOWNLOAD_FASTQS:DOWNLOAD_FASTQS_FROM_SRA_ACC_ID (4) [ 33%] 3 of 9 + [30/cba7a0] process > sra:DOWNLOAD_FROM_SRA:SRATOOLKIT__DOWNLOAD_FASTQS:FIX_AND_COMPRESS_SRA_FASTQ (3) [100%] 3 of 3 + [76/97ce6e] process > sra:DOWNLOAD_FROM_SRA:NORMALIZE_SRA_FASTQS (3) [100%] 3 of 3 + [8c/3125c4] process > sra:PUBLISH:SC__PUBLISH (11) [100%] 12 of 12 ... From a5217550e2ead017dd442d95445c96576d5cb21a Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 00:11:13 +0200 Subject: [PATCH 117/202] SRA lock file is locating relative to the current process work dir --- src/sratoolkit/processes/downloadFastQ.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index cd34e7c5..bb09a3a8 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -25,7 +25,7 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { throw new Exception("DOWNLOAD_FASTQS_FROM_SRA_ACC_ID: Sample ID is empty.") } """ - SRA_FILE_LOCK=~/ncbi/public/sra/${sraId}.sra.lock + SRA_FILE_LOCK=./ncbi/public/sra/${sraId}.sra.lock if [[ -f "\${SRA_FILE_LOCK}" ]]; then echo "SRA file lock found for ${sraId}. Removing file lock..." rm \${SRA_FILE_LOCK} From 7cc8c189b2571d7e0a1669d4e1225ed1c404dc44 Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 18 Feb 2021 11:55:37 +0100 Subject: [PATCH 118/202] Update Scanpy Dockerfile and package requirements - Scanpy version 1.7.0 - Closes #285 - Closes #299 --- src/scanpy/Dockerfile | 2 +- src/scanpy/requirements.txt | 189 ++++++++++++++++++++---------------- 2 files changed, 106 insertions(+), 85 deletions(-) diff --git a/src/scanpy/Dockerfile b/src/scanpy/Dockerfile index 50944f6c..3e6191d1 100644 --- a/src/scanpy/Dockerfile +++ b/src/scanpy/Dockerfile @@ -18,7 +18,7 @@ COPY requirements.txt /tmp/ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /tmp/requirements.txt -RUN pip install --no-cache-dir scanpy==1.4.4.post1 +RUN pip install --no-cache-dir scanpy==1.7.0 FROM python:3.7.4-slim AS build-image diff --git a/src/scanpy/requirements.txt b/src/scanpy/requirements.txt index 2b812492..b2539068 100644 --- a/src/scanpy/requirements.txt +++ b/src/scanpy/requirements.txt @@ -1,103 +1,124 @@ -anndata==0.6.22.post1 -annoy==1.15.2 +anndata==0.7.5 +annoy==1.17.0 ansiwrap==0.8.4 -appdirs==1.4.3 -attrs==19.3.0 -backcall==0.1.0 -bbknn==1.3.6 -beautifulsoup4==4.8.2 -bioservices==1.6.0 -bleach==3.1.4 -certifi==2019.9.11 -cffi==1.12.3 -chardet==3.0.4 -Click==7.0 -colorama==0.4.3 -colorlog==4.1.0 +appdirs==1.4.4 +async-generator==1.10 +attrs==20.3.0 +backcall==0.2.0 +bbknn==1.4.0 +beautifulsoup4==4.9.3 +bioservices==1.7.11 +black==20.8b1 +bleach==3.3.0 +cached-property==1.5.2 +certifi==2020.12.5 +cffi==1.14.5 +chardet==4.0.0 +click==7.1.2 +colorama==0.4.4 +colorlog==4.7.2 cycler==0.10.0 -Cython==0.29.13 -decorator==4.4.0 +Cython==0.29.21 +decorator==4.4.2 defusedxml==0.6.0 -easydev==0.9.38 +easydev==0.11.0 entrypoints==0.3 -future==0.18.0 -gevent==1.4.0 -greenlet==0.4.15 -grequests==0.4.0 -h5py==2.9.0 -idna==2.8 -importlib-metadata==0.23 -ipykernel==5.3.4 -ipython==7.8.0 +get-version==2.1 +gevent==21.1.2 +greenlet==1.0.0 +grequests==0.6.0 +h5py==3.1.0 +idna==2.10 +importlib-metadata==3.4.0 +ipykernel==5.4.3 +ipython==7.20.0 ipython-genutils==0.2.0 -jedi==0.15.1 -Jinja2==2.10.3 -joblib==0.14.0 -jsonschema==3.1.1 -jupyter-client==6.1.5 -jupyter-core==4.6.0 -kiwisolver==1.1.0 -leidenalg==0.7.0 -llvmlite==0.31.0 +jedi==0.18.0 +Jinja2==2.11.3 +joblib==1.0.1 +jsonschema==3.2.0 +jupyter-client==6.1.11 +jupyter-core==4.7.1 +jupyterlab-pygments==0.1.2 +kiwisolver==1.3.1 +legacy-api-wrap==1.2 +leidenalg==0.8.3 +llvmlite==0.35.0 loompy==3.0.6 -louvain==0.6.1 -lxml==4.5.0 +louvain==0.7.0 +lxml==4.6.2 MarkupSafe==1.1.1 -matplotlib==3.0.3 +matplotlib==3.3.4 mistune==0.8.4 mnnpy==0.1.9.5 -mock==3.0.5 -more-itertools==7.2.0 +mock==4.0.3 +more-itertools==8.7.0 MulticoreTSNE==0.1 -natsort==6.0.0 -nbconvert==5.6.0 -nbformat==4.4.0 +mypy-extensions==0.4.3 +natsort==7.1.1 +nbclient==0.5.2 +nbconvert==6.0.7 +nbformat==5.1.2 nbmerge==0.0.4 -networkx==2.3 -numba==0.48.0 -numexpr==2.7.0 -numpy==1.17.2 -numpy-groupies==0.9.9 -pandas==0.25.1 -pandocfilters==1.4.2 -papermill==2.1.3 -parso==0.5.1 +nest-asyncio==1.5.1 +networkx==2.5 +numba==0.52.0 +numexpr==2.7.2 +numpy==1.20.1 +numpy-groupies==0.9.13 +packaging==20.9 +pandas==1.2.2 +pandocfilters==1.4.3 +papermill==2.3.2 +parso==0.8.1 +pathspec==0.8.1 patsy==0.5.1 -pexpect==4.7.0 +pexpect==4.8.0 pickleshare==0.7.5 -prompt-toolkit==2.0.10 -ptyprocess==0.6.0 -pycparser==2.19 -Pygments==2.4.2 -pyparsing==2.4.2 -pyrsistent==0.15.4 -python-dateutil==2.8.0 -python-igraph==0.7.1.post6 -pytz==2019.3 -PyYAML==5.1.2 -pyzmq==18.1.0 -requests==2.22.0 +Pillow==8.1.0 +prompt-toolkit==3.0.16 +ptyprocess==0.7.0 +pycparser==2.20 +Pygments==2.8.0 +pynndescent==0.5.2 +pyparsing==2.4.7 +pyrsistent==0.17.3 +python-dateutil==2.8.1 +python-igraph==0.8.3 +pytz==2021.1 +PyYAML==5.4.1 +pyzmq==22.0.3 +regex==2020.11.13 +requests==2.25.1 requests-cache==0.5.2 -scanpy==1.4.4.post1 -scikit-learn==0.21.3 -scipy==1.3.1 -seaborn==0.9.0 -six==1.12.0 +scikit-learn==0.24.1 +scipy==1.6.1 +seaborn==0.11.1 +sinfo==0.3.1 +six==1.15.0 sklearn==0.0 -soupsieve==2.0 -statsmodels==0.10.1 +soupsieve==2.2 +statsmodels==0.12.2 +stdlib-list==0.8.0 suds-jurko==0.6 -tables==3.5.2 -tenacity==5.1.1 -testpath==0.4.2 +tables==3.6.1 +tenacity==6.3.1 +testpath==0.4.4 +texttable==1.6.3 textwrap3==0.9.2 -tornado==6.0.3 -tqdm==4.36.1 -traitlets==4.3.3 -umap-learn==0.3.10 -urllib3==1.25.6 -wcwidth==0.1.7 +threadpoolctl==2.1.0 +toml==0.10.2 +tornado==6.1 +tqdm==4.57.0 +traitlets==5.0.5 +typed-ast==1.4.2 +typing-extensions==3.7.4.3 +umap-learn==0.5.1 +urllib3==1.26.3 +wcwidth==0.2.5 webencodings==0.5.1 -wrapt==1.12.0 +wrapt==1.12.1 xmltodict==0.12.0 -zipp==0.6.0 \ No newline at end of file +zipp==3.4.0 +zope.event==4.5.0 +zope.interface==5.2.0 From 2fd236163696dcb1ff21bd336bbddc625be5a2b5 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 11:04:43 +0100 Subject: [PATCH 119/202] [scanpy] Latest versions returns by default all genes when running rank_genes_groups --- src/scanpy/bin/aggregate/sc_batch_effect_correction.py | 6 ++++-- src/scanpy/bin/cluster/sc_marker_genes.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/scanpy/bin/aggregate/sc_batch_effect_correction.py b/src/scanpy/bin/aggregate/sc_batch_effect_correction.py index f645725e..bb29cbe5 100755 --- a/src/scanpy/bin/aggregate/sc_batch_effect_correction.py +++ b/src/scanpy/bin/aggregate/sc_batch_effect_correction.py @@ -145,7 +145,8 @@ if args.method == 'combat': sc.pp.combat( adatas[0], - key=args.key) + key=args.key + ) elif args.method == 'bbknn': # Expects: # - the PCA to have been computed and stored in adata.obsm['X_pca'] @@ -157,7 +158,8 @@ batch_key=args.batch_key, n_pcs=args.n_pcs, neighbors_within_batch=args.neighbors_within_batch, - trim=args.trim) + trim=args.trim + ) elif args.method == 'mnncorrect': # Run MNN_CORRECT (mnnpy) # GitHub: https://github.com/chriscainx/mnnpy/tree/master diff --git a/src/scanpy/bin/cluster/sc_marker_genes.py b/src/scanpy/bin/cluster/sc_marker_genes.py index a5c21928..dddbc536 100755 --- a/src/scanpy/bin/cluster/sc_marker_genes.py +++ b/src/scanpy/bin/cluster/sc_marker_genes.py @@ -50,8 +50,8 @@ type=int, action="store", dest="ngenes", - default=0, - help="The number of genes that appear in the returned tables. Value of 0 will report all genes." + default=None, + help="The number of genes that appear in the returned tables. Defaults to all genes." ) args = parser.parse_args() From e3b256ab609d1c1d464f352f47a0c9492c576c36 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 11:16:39 +0100 Subject: [PATCH 120/202] [scanpy] Allow to set perplexity parameter when computing t-SNE --- src/scanpy/bin/dim_reduction/sc_dim_reduction.py | 16 +++++++++++++--- src/scanpy/processes/dim_reduction.nf | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/scanpy/bin/dim_reduction/sc_dim_reduction.py b/src/scanpy/bin/dim_reduction/sc_dim_reduction.py index a579cff4..184e183d 100755 --- a/src/scanpy/bin/dim_reduction/sc_dim_reduction.py +++ b/src/scanpy/bin/dim_reduction/sc_dim_reduction.py @@ -40,6 +40,15 @@ def str2bool(v): help="Reduce the dimensionality of the data. Choose one of : PCA, UMAP, t-SNE" ) +parser.add_argument( + "-p", "--perplexity", + type=int, + action="store", + dest="perplexity", + default=30, + help="[t-SNE], The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms." +) + parser.add_argument( "-c", "--n-comps", type=int, @@ -140,10 +149,11 @@ def str2bool(v): # Use the indicated representation. 'X' or any key for .obsm is valid. If None, the representation is chosen automatically: For .n_vars < 50, .X is used, otherwise ‘X_pca’ is used. If ‘X_pca’ is not present, it’s computed with default parameters. sc.tl.tsne( adata=adata, - n_jobs=args.n_jobs, - use_fast_tsne=args.use_fast_tsne, + perplexity=args.perplexity, n_pcs=args.n_pcs, - random_state=args.seed + random_state=args.seed, + use_fast_tsne=args.use_fast_tsne, + n_jobs=args.n_jobs ) else: raise Exception("VSN ERROR: The dimensionality reduction method {} does not exist.".format(args.method)) diff --git a/src/scanpy/processes/dim_reduction.nf b/src/scanpy/processes/dim_reduction.nf index 722430c9..5ae965bf 100644 --- a/src/scanpy/processes/dim_reduction.nf +++ b/src/scanpy/processes/dim_reduction.nf @@ -125,6 +125,7 @@ process SC__SCANPY__DIM_REDUCTION { --seed ${params.global.seed} \ --method ${processParams.method} \ ${(processParams.containsKey('svdSolver')) ? '--svd-solver ' + processParams.svdSolver : ''} \ + ${(processParams.containsKey('perplexity')) ? '--perplexity ' + processParams.perplexity : ''} \ ${(processParams.containsKey('nNeighbors')) ? '--n-neighbors ' + processParams.nNeighbors : ''} \ ${_processParams.getNCompsAsArgument(nComps)} \ ${(processParams.containsKey('nPcs')) ? '--n-pcs ' + processParams.nPcs : ''} \ From ed77df6eff651f2f5f3f10408456a8beaea7988b Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 11:38:42 +0100 Subject: [PATCH 121/202] [scanpy] Rename method to factor (as it's a parameter in latest version) when running find highly variable genes --- .../sc_find_variable_genes.py | 15 ++++++------ src/scanpy/conf/base.config | 24 +++++++++---------- src/scanpy/processes/feature_selection.nf | 2 +- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py index 548823c8..3deefb8b 100755 --- a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py +++ b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py @@ -21,12 +21,12 @@ ) parser.add_argument( - "-x", "--method", + "-f", "--flavor", type=str, action="store", - dest="method", - default="mean_disp_plot", - help="Method to choose top variable features. Choose one of : mean_disp_plot" + dest="flavor", + default="flavor", + help="Flavor to choose top variable features. Choose one of : seurat" ) parser.add_argument( @@ -82,7 +82,7 @@ # Feature selection # -if args.method == "mean_disp_plot": +if args.flavor == "seurat": # Identify highly variable genes. # Expects logarithmized data: https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.highly_variable_genes.html#scanpy.api.pp.highly_variable_genes sc.pp.highly_variable_genes( @@ -90,7 +90,8 @@ min_mean=args.min_mean, max_mean=np.inf if args.max_mean is None else args.max_mean, min_disp=args.min_disp, - max_disp=args.max_disp + max_disp=args.max_disp, + flavor=args.flavor ) num_variable_genes = sum(adata.var["highly_variable"]) if num_variable_genes == 0: @@ -100,7 +101,7 @@ "Low number of variables genes found. Make sure the following options (minMean, maxMean, minDisp, maxDisp) are in the right range of your data." ) else: - raise Exception("VSN ERROR: Method does not exist.") + raise Exception("VSN ERROR: Flavor does not exist.") # I/O adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME)) diff --git a/src/scanpy/conf/base.config b/src/scanpy/conf/base.config index 0abfc0fe..7ed83e1b 100644 --- a/src/scanpy/conf/base.config +++ b/src/scanpy/conf/base.config @@ -7,30 +7,30 @@ params { } feature_selection { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_select_variable_genes_report.ipynb" - method = 'mean_disp_plot' + flavor = 'seurat' minMean = 0.0125 // 0.125 maxMean = 3 // 2.5 minDisp = 0.5 //0.7 - off = 'h5ad' + off = 'h5ad' } feature_scaling { - method = 'zscore_scale' - maxSD = 10 - off = 'h5ad' + method = 'zscore_scale' + maxSD = 10 + off = 'h5ad' } neighborhood_graph { nPcs = 50 - off = 'h5ad' + off = 'h5ad' } dim_reduction { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_dim_reduction_report.ipynb" pca { - method = 'pca' + method = 'pca' nComps = 50 - off = 'h5ad' + off = 'h5ad' } umap { - method = 'umap' + method = 'umap' off = 'h5ad' } tsne { @@ -41,9 +41,9 @@ params { clustering { preflight_checks = true report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_clustering_report.ipynb" - method = 'louvain' - resolution = 0.8 - off = 'h5ad' + method = 'louvain' + resolution = 0.8 + off = 'h5ad' } marker_genes { method = 'wilcoxon' diff --git a/src/scanpy/processes/feature_selection.nf b/src/scanpy/processes/feature_selection.nf index 8124bb0a..d15a92b4 100644 --- a/src/scanpy/processes/feature_selection.nf +++ b/src/scanpy/processes/feature_selection.nf @@ -21,7 +21,7 @@ process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { processParams = sampleParams.local """ ${binDir}/feature_selection/sc_find_variable_genes.py \ - --method ${processParams.method} \ + ${(processParams.containsKey('flavor')) ? '--flavor ' + processParams.flavor : ''} \ ${(processParams.containsKey('minMean')) ? '--min-mean ' + processParams.minMean : ''} \ ${(processParams.containsKey('maxMean')) ? '--max-mean ' + processParams.maxMean : ''} \ ${(processParams.containsKey('minDisp')) ? '--min-disp ' + processParams.minDisp : ''} \ From 57d47ac43b75e6a501f6fbad3a02b46f849acdcc Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 11:49:14 +0100 Subject: [PATCH 122/202] [scanpy] Add flavors (cellranger and seurat_v3) to find highly variable genes --- .../sc_find_variable_genes.py | 41 ++++++++++++++----- src/scanpy/processes/feature_selection.nf | 1 + 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py index 3deefb8b..c376c778 100755 --- a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py +++ b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py @@ -26,7 +26,16 @@ action="store", dest="flavor", default="flavor", - help="Flavor to choose top variable features. Choose one of : seurat" + help="Flavor to choose top variable features. Choose one of : 'seurat', 'cell_ranger', 'seurat_v3'" +) + +parser.add_argument( + "-n", "--n-top-genes", + type=int, + action="store", + dest="n_top_genes", + default=None, + help="[cell_ranger, seurat_v3] Number of highly-variable genes to keep. Mandatory if flavor is 'seurat_v3'." ) parser.add_argument( @@ -81,10 +90,10 @@ # # Feature selection # +# Identify highly variable genes. +# Expects logarithmized data: https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.highly_variable_genes.html#scanpy.api.pp.highly_variable_genes if args.flavor == "seurat": - # Identify highly variable genes. - # Expects logarithmized data: https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.highly_variable_genes.html#scanpy.api.pp.highly_variable_genes sc.pp.highly_variable_genes( adata, min_mean=args.min_mean, @@ -93,15 +102,27 @@ max_disp=args.max_disp, flavor=args.flavor ) - num_variable_genes = sum(adata.var["highly_variable"]) - if num_variable_genes == 0: - raise Exception("No variable genes found. Make sure the following options (minMean, maxMean, minDisp, maxDisp) are in the right range of your data.") - if num_variable_genes < 100: - warnings.warn( - "Low number of variables genes found. Make sure the following options (minMean, maxMean, minDisp, maxDisp) are in the right range of your data." - ) +elif args.flavor == "cell_ranger" or args.flavor == "seurat_v3": + + if args.flavor == "seurat_v3": + raise Exception("VSN ERROR: --n-top-genes (nTopGenes in config) is required when flavor is 'seurat_v3',") + + sc.pp.highly_variable_genes( + adata, + n_top_genes=args.n_top_genes, + flavor=args.flavor + ) else: raise Exception("VSN ERROR: Flavor does not exist.") +num_variable_genes = sum(adata.var["highly_variable"]) +if num_variable_genes == 0: + raise Exception("No variable genes found. Make sure the following options (minMean, maxMean, minDisp, maxDisp) are in the right range of your data.") +if num_variable_genes < 100: + warnings.warn( + "Low number of variables genes found. Make sure the following options (minMean, maxMean, minDisp, maxDisp) are in the right range of your data." + ) + + # I/O adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME)) diff --git a/src/scanpy/processes/feature_selection.nf b/src/scanpy/processes/feature_selection.nf index d15a92b4..d09cbb14 100644 --- a/src/scanpy/processes/feature_selection.nf +++ b/src/scanpy/processes/feature_selection.nf @@ -22,6 +22,7 @@ process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { """ ${binDir}/feature_selection/sc_find_variable_genes.py \ ${(processParams.containsKey('flavor')) ? '--flavor ' + processParams.flavor : ''} \ + ${(processParams.containsKey('nTopGenes')) ? '--n-top-genes ' + processParams.nTopGenes : ''} \ ${(processParams.containsKey('minMean')) ? '--min-mean ' + processParams.minMean : ''} \ ${(processParams.containsKey('maxMean')) ? '--max-mean ' + processParams.maxMean : ''} \ ${(processParams.containsKey('minDisp')) ? '--min-disp ' + processParams.minDisp : ''} \ From c406f0070d150cfcfb66b4f49b8dcf3db4afc669 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 12:21:00 +0100 Subject: [PATCH 123/202] [scanpy] Update normalization function from normalize_per_cell to normalize_total --- src/scanpy/bin/transform/sc_normalization.py | 13 ++++++++----- src/scanpy/conf/normalization.config | 2 +- src/scanpy/processes/transform.nf | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/scanpy/bin/transform/sc_normalization.py b/src/scanpy/bin/transform/sc_normalization.py index fabec801..ff7fc86c 100755 --- a/src/scanpy/bin/transform/sc_normalization.py +++ b/src/scanpy/bin/transform/sc_normalization.py @@ -24,16 +24,16 @@ action="store", dest="method", default="cpx", - help="Normalize the data. Choose one of : cpx, regress_out" + help="Normalize the data. Choose one of : cpx" ) parser.add_argument( - "-f", "--counts-per-cell-after", + "-f", "--target-sum", type=int, action="store", - dest="counts_per_cell_after", + dest="target_sum", default=1e4, - help="Multiplying factor used when running 'cpx' method." + help="Multiplying factor used when running 'cpx' method. If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization." ) args = parser.parse_args() @@ -57,7 +57,10 @@ if args.method == 'cpx': # Total-count normalize (library-size correct) to '-r' reads/cell - sc.pp.normalize_per_cell(adata, counts_per_cell_after=args.counts_per_cell_after) + sc.pp.normalize_total( + adata, + target_sum=args.target_sum + ) else: raise Exception("VSN ERROR: Method does not exist.") diff --git a/src/scanpy/conf/normalization.config b/src/scanpy/conf/normalization.config index d260dd0d..3dd9fe4b 100644 --- a/src/scanpy/conf/normalization.config +++ b/src/scanpy/conf/normalization.config @@ -3,7 +3,7 @@ params { scanpy { normalization { method = 'cpx' - countsPerCellAfter = 10000 + targetSum = 10000 off = 'h5ad' } } diff --git a/src/scanpy/processes/transform.nf b/src/scanpy/processes/transform.nf index 7b1017e1..83f62dca 100644 --- a/src/scanpy/processes/transform.nf +++ b/src/scanpy/processes/transform.nf @@ -22,7 +22,7 @@ process SC__SCANPY__NORMALIZATION { """ ${binDir}/transform/sc_normalization.py \ ${(processParams.containsKey('method')) ? '--method ' + processParams.method : ''} \ - ${(processParams.containsKey('countsPerCellAfter')) ? '--counts-per-cell-after ' + processParams.countsPerCellAfter : ''} \ + ${(processParams.containsKey('targetSum')) ? '--target-sum ' + processParams.targetSum : ''} \ $f \ "${sampleId}.SC__SCANPY__NORMALIZATION.${processParams.off}" """ From 46c35f1cc00ebd8e6074de397d099ae2dea0d479 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 12:52:39 +0100 Subject: [PATCH 124/202] [scanpy] Allow to not produce the QC reports --- src/scanpy/workflows/qc_filter.nf | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/scanpy/workflows/qc_filter.nf b/src/scanpy/workflows/qc_filter.nf index b3c07494..aefc3e68 100644 --- a/src/scanpy/workflows/qc_filter.nf +++ b/src/scanpy/workflows/qc_filter.nf @@ -29,15 +29,16 @@ workflow QC_FILTER { SC__SCANPY__COMPUTE_QC_STATS | \ SC__SCANPY__CELL_FILTER | \ SC__SCANPY__GENE_FILTER - - report = GENERATE_DUAL_INPUT_REPORT( - SC__SCANPY__COMPUTE_QC_STATS.out.join(filtered).map { - it -> tuple(*it[0..(it.size()-1)], null) - }, - file(workflow.projectDir + params.sc.scanpy.filter.report_ipynb), - 'SC_QC_filtering_report', - false - ) + + report = !params.sc.scanpy.filter?.report_ipynb ? Channel.empty() : + GENERATE_DUAL_INPUT_REPORT( + SC__SCANPY__COMPUTE_QC_STATS.out.join(filtered).map { + it -> tuple(*it[0..(it.size()-1)], null) + }, + file(workflow.projectDir + params.sc.scanpy.filter.report_ipynb), + 'SC_QC_filtering_report', + false + ) emit: filtered From 7f7c630dd8c4b2ef6eba5553fed099460340c7a1 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 12:53:41 +0100 Subject: [PATCH 125/202] [scanpy] Add multi-sample QC workflow --- main.nf | 29 +++++++++++++++++++++++--- src/scanpy/main.nf | 52 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index 139a42e2..b822f8e4 100644 --- a/main.nf +++ b/main.nf @@ -273,15 +273,15 @@ workflow single_sample_qc { include { single_sample_qc as SINGLE_SAMPLE_QC; - } from './main' params(params) + } from './src/scanpy/main' params(params) include { - PUBLISH as PUBLISH_SINGLE_SAMPLE_SCOPE; + PUBLISH; } from "./src/utils/workflows/utils" params(params) getDataChannel | SINGLE_SAMPLE_QC if(params.utils?.publish) { - PUBLISH_SINGLE_SAMPLE_SCOPE( + PUBLISH( SINGLE_SAMPLE_QC.out.filtered, "SINGLE_SAMPLE_QC", "h5ad", @@ -292,6 +292,29 @@ workflow single_sample_qc { } +workflow multi_sample_qc { + + include { + multi_sample_qc as MULTI_SAMPLE_QC; + } from './src/scanpy/main' params(params) + include { + PUBLISH; + } from "./src/utils/workflows/utils" params(params) + + getDataChannel | MULTI_SAMPLE_QC + + if(params.utils?.publish) { + PUBLISH( + MULTI_SAMPLE_QC.out.filtered, + "MULTI_SAMPLE_QC", + "h5ad", + null, + false + ) + } + +} + workflow multi_sample { include { diff --git a/src/scanpy/main.nf b/src/scanpy/main.nf index e0e10ff9..2d93a789 100644 --- a/src/scanpy/main.nf +++ b/src/scanpy/main.nf @@ -6,14 +6,19 @@ import static groovy.json.JsonOutput.* ////////////////////////////////////////////////////// // Import sub-workflows from the modules: - +include { + getBaseName; +} from '../utils/processes/files.nf' +include { + FILTER_AND_ANNOTATE_AND_CLEAN; +} from '../utils/workflows/filterAnnotateClean.nf' params(params) include { INIT; - getDataChannel; } from '../utils/workflows/utils' params(params) INIT(params) include { - SC__FILE_CONVERTER + SC__FILE_CONVERTER; + SC__FILE_CONCATENATOR; } from '../utils/processes/utils' params(params) include { getDataChannel @@ -25,7 +30,6 @@ include { SINGLE_SAMPLE } from './workflows/single_sample.nf' params(params) - workflow single_sample { main: @@ -38,9 +42,43 @@ workflow single_sample { workflow single_sample_qc { + take: + data + main: - getDataChannel | \ - SC__FILE_CONVERTER | \ - QC_FILTER + out = SC__FILE_CONVERTER( data ) + out = FILTER_AND_ANNOTATE_AND_CLEAN( out ) + QC_FILTER( out ) } + +workflow multi_sample_qc { + + take: + data + + main: + if(!params?.sc?.scanpy?.filter) { + throw new Exception("VSN ERROR: Missing params.sc.scanpy.filter config.") + } + if(!params?.sc?.file_concatenator) { + throw new Exception("VSN ERROR: Missing params.sc.file_concatenator config.") + } + + out = data | \ + SC__FILE_CONVERTER | \ + FILTER_AND_ANNOTATE_AND_CLEAN + + out = QC_FILTER( out ).filtered + out = SC__FILE_CONCATENATOR( + out.map { + it -> it[1] + }.toSortedList( + { a, b -> getBaseName(a, "SC") <=> getBaseName(b, "SC") } + ) + ) + + emit: + filtered = out + +} \ No newline at end of file From 8d11a5bc6b5bba06abd20bb240cf8b6ad50a2d8d Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 13:15:48 +0100 Subject: [PATCH 126/202] [scanpy] Use np.inf if max_disp is None Should fix TypeError: '<' not supported between instances of 'float' and 'NoneType' --- src/scanpy/bin/feature_selection/sc_find_variable_genes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py index c376c778..c00c228d 100755 --- a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py +++ b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py @@ -94,12 +94,13 @@ # Expects logarithmized data: https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.highly_variable_genes.html#scanpy.api.pp.highly_variable_genes if args.flavor == "seurat": + max_disp = np.inf if args.max_disp is None else args.max_disp sc.pp.highly_variable_genes( adata, min_mean=args.min_mean, max_mean=np.inf if args.max_mean is None else args.max_mean, min_disp=args.min_disp, - max_disp=args.max_disp, + max_disp=max_disp, flavor=args.flavor ) elif args.flavor == "cell_ranger" or args.flavor == "seurat_v3": From 909a9550507895fe783c1da60252d3ccd2a58a92 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 13:22:03 +0100 Subject: [PATCH 127/202] [scanpy] Fix conflicting option strings in dimensionality reduction script sc_dim_reduction.py --- src/scanpy/bin/dim_reduction/sc_dim_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/bin/dim_reduction/sc_dim_reduction.py b/src/scanpy/bin/dim_reduction/sc_dim_reduction.py index 184e183d..7f41e78b 100755 --- a/src/scanpy/bin/dim_reduction/sc_dim_reduction.py +++ b/src/scanpy/bin/dim_reduction/sc_dim_reduction.py @@ -68,7 +68,7 @@ def str2bool(v): ) parser.add_argument( - "-p", "--n-pcs", + "-n", "--n-pcs", type=int, action="store", dest="n_pcs", From 0ca1fc319db020ee1d36a6d165744e9f3834147b Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 14:53:22 +0100 Subject: [PATCH 128/202] [scanpy] Switch from missing sc.pl.palettes.default_64 palette in latest version to sc.pl.palettes.godsnot_102 --- src/harmony/bin/reports/sc_harmony_report.ipynb | 8 ++++---- src/scanpy/bin/reports/sc_bbknn_report.ipynb | 4 ++-- src/scanpy/bin/reports/sc_clustering_report.ipynb | 6 +++--- src/scanpy/bin/reports/sc_mnncorrect_report.ipynb | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/harmony/bin/reports/sc_harmony_report.ipynb b/src/harmony/bin/reports/sc_harmony_report.ipynb index 2e61674a..09ca4d42 100644 --- a/src/harmony/bin/reports/sc_harmony_report.ipynb +++ b/src/harmony/bin/reports/sc_harmony_report.ipynb @@ -223,8 +223,8 @@ "ax2 = sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", - "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", - "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax4, show=False, wspace=0.5)\n", "\n", "ax1.set_title('Pre-batch correction (batch)')\n", "ax2.set_title('Post-batch correction (batch)')\n", @@ -273,8 +273,8 @@ "ax2 = sc.pl.umap(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", - "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", - "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax4, show=False, wspace=0.5)\n", "\n", "ax1.set_title('Pre-batch correction (batch)')\n", "ax2.set_title('Post-batch correction (batch)')\n", diff --git a/src/scanpy/bin/reports/sc_bbknn_report.ipynb b/src/scanpy/bin/reports/sc_bbknn_report.ipynb index 302d33b9..b8c26cfe 100644 --- a/src/scanpy/bin/reports/sc_bbknn_report.ipynb +++ b/src/scanpy/bin/reports/sc_bbknn_report.ipynb @@ -216,8 +216,8 @@ "ax2 = sc.pl.umap(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", - "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", - "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax4, show=False, wspace=0.5)\n", "\n", "ax1.set_title('Pre-batch correction (batch)')\n", "ax2.set_title('Post-batch correction (batch)')\n", diff --git a/src/scanpy/bin/reports/sc_clustering_report.ipynb b/src/scanpy/bin/reports/sc_clustering_report.ipynb index 1a451311..dcca707b 100644 --- a/src/scanpy/bin/reports/sc_clustering_report.ipynb +++ b/src/scanpy/bin/reports/sc_clustering_report.ipynb @@ -72,21 +72,21 @@ " sc.pl.pca(\n", " adata,\n", " color=get_features(additional_features=annotations_to_plot + [algorithm]),\n", - " palette=sc.pl.palettes.default_64,\n", + " palette=sc.pl.palettes.godsnot_102,\n", " wspace=0.5\n", " )\n", " if('X_tsne' in adata.obsm):\n", " sc.pl.tsne(\n", " adata,\n", " color=get_features(additional_features=annotations_to_plot + [algorithm]),\n", - " palette=sc.pl.palettes.default_64,\n", + " palette=sc.pl.palettes.godsnot_102,\n", " wspace=0.5\n", " )\n", " if('X_umap' in adata.obsm):\n", " sc.pl.umap(\n", " adata,\n", " color=get_features(additional_features=annotations_to_plot + [algorithm]),\n", - " palette=sc.pl.palettes.default_64,\n", + " palette=sc.pl.palettes.godsnot_102,\n", " wspace=0.5\n", " )" ] diff --git a/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb b/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb index a6579adf..64d1f7f1 100644 --- a/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb +++ b/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb @@ -226,8 +226,8 @@ "ax2 = sc.pl.tsne(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", - "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", - "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "sc.pl.tsne(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.tsne(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax4, show=False, wspace=0.5)\n", "\n", "ax1.set_title('Pre-batch correction (batch)')\n", "ax2.set_title('Post-batch correction (batch)')\n", @@ -279,8 +279,8 @@ "ax2 = sc.pl.umap(adata2, color=batch, alpha=a, ax=ax2, show=False, wspace=0.5, title='batch')\n", "ax1.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", "ax2.legend(fancybox=True, framealpha=0.5, loc='right', bbox_to_anchor=(1.15, 0.5))\n", - "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax3, show=False, wspace=0.5)\n", - "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.default_64, ax=ax4, show=False, wspace=0.5)\n", + "sc.pl.umap(adata1, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax3, show=False, wspace=0.5)\n", + "sc.pl.umap(adata2, color=clustering_algorithm, alpha=a, palette=sc.pl.palettes.godsnot_102, ax=ax4, show=False, wspace=0.5)\n", "\n", "ax1.set_title('Pre-batch correction (batch)')\n", "ax2.set_title('Post-batch correction (batch)')\n", From 8dc000669715b9f37c9907786387df4494dcf5af Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 22 Mar 2021 14:54:47 +0100 Subject: [PATCH 129/202] [scanpy] Fix type in sklearn version in requirements.txt --- src/scanpy/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/requirements.txt b/src/scanpy/requirements.txt index b2539068..b7b4b6ac 100644 --- a/src/scanpy/requirements.txt +++ b/src/scanpy/requirements.txt @@ -96,7 +96,7 @@ scipy==1.6.1 seaborn==0.11.1 sinfo==0.3.1 six==1.15.0 -sklearn==0.0 +sklearn==0.24.1 soupsieve==2.2 statsmodels==0.12.2 stdlib-list==0.8.0 From 96a97d76ed2fc8df518d741f415c55d9876aa938 Mon Sep 17 00:00:00 2001 From: cflerin Date: Mon, 22 Mar 2021 16:18:13 +0100 Subject: [PATCH 130/202] Update Scrublet Dockerfile - simplify docker image (Scanpy base) - tag 0.2.1 --- src/scrublet/Dockerfile | 26 ++------------------------ src/scrublet/conf/base.config | 2 +- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/src/scrublet/Dockerfile b/src/scrublet/Dockerfile index a11bcbc7..c1edd24b 100644 --- a/src/scrublet/Dockerfile +++ b/src/scrublet/Dockerfile @@ -1,26 +1,4 @@ -FROM vibsinglecellnf/scanpy:0.5.2 AS compile-image +FROM vibsinglecellnf/scanpy:0.6.1 -ENV DEBIAN_FRONTEND=noninteractive -RUN BUILDPKGS="build-essential apt-utils \ - python3-dev libhdf5-dev libfreetype6-dev libtool \ - m4 autoconf automake patch bison flex libpng-dev libopenblas-dev \ - tcl-dev tk-dev libxml2-dev zlib1g-dev libffi-dev cmake" && \ - apt-get update && \ - apt-get install -y --no-install-recommends apt-utils debconf locales && dpkg-reconfigure locales && \ - apt-get install -y --no-install-recommends $BUILDPKGS +RUN pip install --no-cache-dir scrublet==0.2.3 -RUN python -m venv /opt/venv -# Make sure we use the virtualenv: -ENV PATH="/opt/venv/bin:$PATH" - -# install dependencies: -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir scrublet==0.2.1 - -FROM python:3.7.4-slim AS build-image - -ENV DEBIAN_FRONTEND=noninteractive - -COPY --from=compile-image /opt/venv /opt/venv -# Make sure we use the virtualenv: -ENV PATH="/opt/venv/bin:$PATH" diff --git a/src/scrublet/conf/base.config b/src/scrublet/conf/base.config index 3ddf9314..6a51207f 100644 --- a/src/scrublet/conf/base.config +++ b/src/scrublet/conf/base.config @@ -1,7 +1,7 @@ params { sc { scrublet { - container = 'vibsinglecellnf/scrublet:0.1.4' + container = 'vibsinglecellnf/scrublet:0.2.1' doublet_detection { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb" useVariableFeatures = 'False' From e0868ebf58c81ab46a9722db956ec464cfbe92c3 Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 13:47:11 +0200 Subject: [PATCH 131/202] [harmony] Use dynverse/anndata to read h5ad instead of hdf5r Update Docker from 1.0-1 to 1.0-2 --- src/harmony/Dockerfile | 5 +++-- src/harmony/bin/run_harmony.R | 20 ++++++++------------ src/harmony/environment.yml | 3 ++- src/harmony/harmony.config | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/harmony/Dockerfile b/src/harmony/Dockerfile index ee273d3a..9bca142d 100644 --- a/src/harmony/Dockerfile +++ b/src/harmony/Dockerfile @@ -13,8 +13,9 @@ RUN conda init bash # Activate the environment, and make sure it's activated: RUN echo "conda activate $(cat /tmp/version)" >> ~/.bashrc && \ - conda activate $(cat /tmp/version) && \ - R -e "devtools::install_github(repo = 'aertslab/SCopeLoomR')" + conda activate $(cat /tmp/version) && \ + R -e "devtools::install_github(repo = 'dynverse/anndata', ref = '0.7.5.2')" && \ + R -e "devtools::install_github(repo = 'aertslab/SCopeLoomR')" RUN apt-get -y update \ # Need to run ps diff --git a/src/harmony/bin/run_harmony.R b/src/harmony/bin/run_harmony.R index fb1d6fcf..9b3f27c7 100755 --- a/src/harmony/bin/run_harmony.R +++ b/src/harmony/bin/run_harmony.R @@ -8,6 +8,8 @@ print("#############################################################") # Loading dependencies scripts library("argparse") +library("reticulate") +library("anndata") parser <- ArgumentParser(description='Scalable integration of single cell RNAseq data for batch correction and meta analysis') parser$add_argument( @@ -106,22 +108,16 @@ args <- lapply(X = args, FUN = function(arg) { input_ext <- tools::file_ext(args$input) if(input_ext == "h5ad") { - # Current fix until https://github.com/satijalab/seurat/issues/2485 is fixed - file <- hdf5r::h5file(filename = args$input, mode = 'r') - if(!("X_pca" %in% names(x = file[["obsm"]]))) { + adata <- anndata::read_h5ad(filename = args$input) + + if(!("X_pca" %in% names(x = adata$obsm))) { stop("X_pca slot is not found in the AnnData (h5ad).") } - obs <- file[['obs']][] - pca_embeddings <- t(x = file[["obsm"]][["X_pca"]][,]) - row.names(x = pca_embeddings) <- obs$index + obs <- adata$obs + pca_embeddings <- adata$obsm[["X_pca"]] + row.names(x = pca_embeddings) <- row.names(x = obs) colnames(x = pca_embeddings) <- paste0("PCA_", seq(from = 1, to = ncol(x = pca_embeddings))) metadata <- obs - # seurat <- Seurat::ReadH5AD(file = args$input) - # if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca)) - # stop("Expects a PCA embeddings data matrix but it does not exist.") - # data <- seurat@reductions$pca - # pca_embeddings <- data@cell.embeddings - # metadata <- seurat@meta.data } else { stop(paste0("Unrecognized input file format: ", input_ext, ".")) } diff --git a/src/harmony/environment.yml b/src/harmony/environment.yml index 8b2603e6..085595fa 100644 --- a/src/harmony/environment.yml +++ b/src/harmony/environment.yml @@ -1,4 +1,4 @@ -name: harmony-v1.0 +name: harmony-v1.0-2 channels: - r - conda-forge @@ -8,5 +8,6 @@ dependencies: - r-base=4.0.2 - r-argparse=2.0.1 - r-devtools + - r-reticulate=1.20 - r-hdf5r - r-harmony diff --git a/src/harmony/harmony.config b/src/harmony/harmony.config index e7396d8a..b4a45c73 100644 --- a/src/harmony/harmony.config +++ b/src/harmony/harmony.config @@ -1,7 +1,7 @@ params { sc { harmony { - container = 'vibsinglecellnf/harmony:1.0-1' + container = 'vibsinglecellnf/harmony:1.0-2' report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/harmony/bin/reports/sc_harmony_report.ipynb" varsUse = ['batch'] // theta = '' From 74371bfa808fd7c2d55fe33eaad7af24cfcb8b3e Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 14:20:47 +0200 Subject: [PATCH 132/202] [harmony] Update Docker from 1.0-2 to 1.0-3 --- src/harmony/bin/run_harmony.R | 4 ++++ src/harmony/environment.yml | 3 ++- src/harmony/harmony.config | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/harmony/bin/run_harmony.R b/src/harmony/bin/run_harmony.R index 9b3f27c7..5880a903 100755 --- a/src/harmony/bin/run_harmony.R +++ b/src/harmony/bin/run_harmony.R @@ -11,6 +11,10 @@ library("argparse") library("reticulate") library("anndata") +# Link Python to this R session +use_python("/opt/conda/envs/harmony-v1.0-3/bin") +Sys.setenv(RETICULATE_PYTHON = "/opt/conda/envs/harmony-v1.0-3/bin") + parser <- ArgumentParser(description='Scalable integration of single cell RNAseq data for batch correction and meta analysis') parser$add_argument( 'input', diff --git a/src/harmony/environment.yml b/src/harmony/environment.yml index 085595fa..38de24f8 100644 --- a/src/harmony/environment.yml +++ b/src/harmony/environment.yml @@ -1,10 +1,11 @@ -name: harmony-v1.0-2 +name: harmony-v1.0-3 channels: - r - conda-forge - bioconda dependencies: - python=3.7 + - anndata=0.7.6 - r-base=4.0.2 - r-argparse=2.0.1 - r-devtools diff --git a/src/harmony/harmony.config b/src/harmony/harmony.config index b4a45c73..f7781f77 100644 --- a/src/harmony/harmony.config +++ b/src/harmony/harmony.config @@ -1,7 +1,7 @@ params { sc { harmony { - container = 'vibsinglecellnf/harmony:1.0-2' + container = 'vibsinglecellnf/harmony:1.0-3' report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/harmony/bin/reports/sc_harmony_report.ipynb" varsUse = ['batch'] // theta = '' From 4a68bd97850e8eb5ad45bd9a0b5b728e99310125 Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 17:00:38 +0200 Subject: [PATCH 133/202] [scrublet] Write index_label (was done by default in previous pandas version) --- src/scrublet/bin/sc_doublet_detection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scrublet/bin/sc_doublet_detection.py b/src/scrublet/bin/sc_doublet_detection.py index 3a373d29..57d94ff6 100755 --- a/src/scrublet/bin/sc_doublet_detection.py +++ b/src/scrublet/bin/sc_doublet_detection.py @@ -280,6 +280,7 @@ def save_histograms(out_basename, scrublet): path_or_buf=f"{FILE_PATH_OUT_BASENAME}.ScrubletDoubletTable.tsv", sep="\t", index=True, + index_label='index', header=True ) f = gzip.open(f"{FILE_PATH_OUT_BASENAME}.ScrubletObject.pklz", 'wb') From 3d3ff11197d630037a71b7ef85bd8af097cacc7f Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 17:00:52 +0200 Subject: [PATCH 134/202] getChannel expects 3 inputs --- src/utils/workflows/annotateByCellMetadata.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 3fceb5e7..7c616067 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -49,7 +49,8 @@ workflow ANNOTATE_BY_CELL_METADATA { if(metadata == null) { metadata = getChannel( workflowParams.cellMetaDataFilePath, - workflowParams.sampleSuffixWithExtension + workflowParams.sampleSuffixWithExtension, + 'NULL' ) } out = SC__ANNOTATE_BY_CELL_METADATA( From ce7ca6535efb8542331051d51dc43a984daec520 Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 18:02:59 +0200 Subject: [PATCH 135/202] [scrublet] Fix bug coloring by boolean variables in report --- .../bin/reports/sc_doublet_detection_report.ipynb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb b/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb index d88be0f5..af7c250b 100644 --- a/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb +++ b/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb @@ -222,11 +222,12 @@ "if scrub.predicted_doublets_ is None:\n", " print(\"No doublets predicted using default method.\")\n", "else:\n", + " adata_dr.obs['scrublet__predicted_doublets_cat'] = adata_dr.obs['scrublet__predicted_doublets'].astype(str).astype('category')\n", " a = 0.6 # alpha setting\n", " fig, (axs) = plt.subplots(1,2, figsize=(10,5), dpi=150 )\n", - " ax1 = sc.pl.tsne(adata_dr, color='scrublet__predicted_doublets',size=10,color_map='plasma_r',alpha=a, ax=axs[0], show=False, wspace=0.5)\n", + " ax1 = sc.pl.tsne(adata_dr, color='scrublet__predicted_doublets_cat',size=10, color_map='plasma_r', alpha=a, ax=axs[0], show=False, wspace=0.5)\n", " ax1.set_title(f\"Scrublet - Predicted Doublet\")\n", - " ax2 = sc.pl.umap(adata_dr, color='scrublet__predicted_doublets',size=10,color_map='plasma_r',alpha=a, ax=axs[1], show=False, wspace=0.5)\n", + " ax2 = sc.pl.umap(adata_dr, color='scrublet__predicted_doublets_cat',size=10, color_map='plasma_r', alpha=a, ax=axs[1], show=False, wspace=0.5)\n", " ax2.set_title(f\"Scrublet - Predicted Doublet\")\n", " plt.tight_layout()" ] @@ -254,11 +255,12 @@ "metadata": {}, "outputs": [], "source": [ + "adata_dr.obs['scrublet__predicted_doublets_based_on_10x_chromium_spec_cat'] = adata_dr.obs['scrublet__predicted_doublets_based_on_10x_chromium_spec'].astype(str).astype('category')\n", "a = 0.6 # alpha setting\n", "fig, (axs) = plt.subplots(1,2, figsize=(10,5), dpi=150 )\n", - "ax1 = sc.pl.tsne(adata_dr, color='scrublet__predicted_doublets_based_on_10x_chromium_spec',size=10,color_map='plasma_r',alpha=a, ax=axs[0], show=False, wspace=0.5)\n", + "ax1 = sc.pl.tsne(adata_dr, color='scrublet__predicted_doublets_based_on_10x_chromium_spec_cat',size=10, color_map='plasma_r',alpha=a, ax=axs[0], show=False, wspace=0.5)\n", "ax1.set_title(f\"Scrublet - Predicted Doublet\")\n", - "ax2 = sc.pl.umap(adata_dr, color='scrublet__predicted_doublets_based_on_10x_chromium_spec',size=10,color_map='plasma_r',alpha=a, ax=axs[1], show=False, wspace=0.5)\n", + "ax2 = sc.pl.umap(adata_dr, color='scrublet__predicted_doublets_based_on_10x_chromium_spec_cat',size=10, color_map='plasma_r',alpha=a, ax=axs[1], show=False, wspace=0.5)\n", "ax2.set_title(f\"Scrublet - Predicted Doublet\")\n", "plt.tight_layout()" ] From 8370443bc09e2ffda10d0bf5f091ca81ae8692b7 Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 8 Jul 2021 10:46:22 +0200 Subject: [PATCH 136/202] [scanpy, scrublet] Update scanpy docker image to 1.8.1 and scrublet docker image to 0.2.3 Closes #340 Closes #331 --- conf/test__cell_annotate_filter.config | 2 +- ...gle_sample_decontx_correct_scrublet.config | 2 +- conf/test__single_sample_scrublet.config | 2 +- docs/development.rst | 2 +- docs/features.rst | 4 ++-- docs/pipelines.rst | 4 ++-- src/celda/nextflow.config | 2 +- src/scanpy/Dockerfile | 20 ++++++++--------- src/scanpy/conf/base.config | 2 +- src/scanpy/conf/min.config | 2 +- src/scanpy/requirements.txt | 22 +++++++++---------- src/scrublet/Dockerfile | 3 +-- src/scrublet/conf/base.config | 2 +- src/soupx/nextflow.config | 2 +- src/utils/conf/test.config | 2 +- 15 files changed, 36 insertions(+), 37 deletions(-) diff --git a/conf/test__cell_annotate_filter.config b/conf/test__cell_annotate_filter.config index 6690c16b..fb9b85f1 100644 --- a/conf/test__cell_annotate_filter.config +++ b/conf/test__cell_annotate_filter.config @@ -16,7 +16,7 @@ params { makeVarIndexUnique = false } scanpy { - container = 'vibsinglecellnf/scanpy:0.5.0' + container = 'vibsinglecellnf/scanpy:1.8.1' } cell_annotate { off = 'h5ad' diff --git a/conf/test__single_sample_decontx_correct_scrublet.config b/conf/test__single_sample_decontx_correct_scrublet.config index 478b0882..26a5b412 100644 --- a/conf/test__single_sample_decontx_correct_scrublet.config +++ b/conf/test__single_sample_decontx_correct_scrublet.config @@ -33,7 +33,7 @@ params { } } scrublet { - container = 'vibsinglecellnf/scrublet:0.1.4' + container = 'vibsinglecellnf/scrublet:0.2.3' labels { processExecutor = 'local' } diff --git a/conf/test__single_sample_scrublet.config b/conf/test__single_sample_scrublet.config index eb169d30..596722e0 100644 --- a/conf/test__single_sample_scrublet.config +++ b/conf/test__single_sample_scrublet.config @@ -27,7 +27,7 @@ params { } } scrublet { - container = 'vibsinglecellnf/scrublet:0.1.4' + container = 'vibsinglecellnf/scrublet:0.2.3' labels { processExecutor = 'local' } diff --git a/docs/development.rst b/docs/development.rst index 3ed80745..64398e7b 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -788,7 +788,7 @@ The parameter structure internally (post-merge) is: } } scanpy { - container = 'docker://vib-singlecell-nf/scanpy:0.5.0' + container = 'docker://vib-singlecell-nf/scanpy:1.8.1' filter { ... } diff --git a/docs/features.rst b/docs/features.rst index 5e604c4a..9ddbfae3 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -333,7 +333,7 @@ You'll just have to repeat the following structure for the parameters which you params { sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:1.8.1' filter { report_ipynb = '/src/scanpy/bin/reports/sc_filter_qc_report.ipynb' // Here we enable the multi-sample feature for the cellFilterMinNgenes parameter @@ -364,7 +364,7 @@ If you want to apply custom parameters for some specific samples and have a "gen params { sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:1.8.1' filter { report_ipynb = '/src/scanpy/bin/reports/sc_filter_qc_report.ipynb' // Here we enable the multi-sample feature for the cellFilterMinNgenes parameter diff --git a/docs/pipelines.rst b/docs/pipelines.rst index 81876d85..5e075477 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -514,7 +514,7 @@ Make sure the following parts of the generated config are properly set: } sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:1.8.1' } cell_annotate { off = 'h5ad' @@ -569,7 +569,7 @@ Make sure the following parts of the generated config are properly set: } sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:1.8.1' } cell_annotate { off = 'h5ad' diff --git a/src/celda/nextflow.config b/src/celda/nextflow.config index 4e1920b1..b0b0c05a 100644 --- a/src/celda/nextflow.config +++ b/src/celda/nextflow.config @@ -1,7 +1,7 @@ params { sc { template { - container = 'vibsinglecellnf/scanpy:0.5.0' + container = 'vibsinglecellnf/scanpy:1.8.1' process1 { param1 = '' } diff --git a/src/scanpy/Dockerfile b/src/scanpy/Dockerfile index 3e6191d1..11d4ef4c 100644 --- a/src/scanpy/Dockerfile +++ b/src/scanpy/Dockerfile @@ -2,9 +2,9 @@ FROM python:3.7.4-slim AS compile-image ENV DEBIAN_FRONTEND=noninteractive RUN BUILDPKGS="build-essential apt-utils \ - python3-dev libhdf5-dev libfreetype6-dev libtool \ - m4 autoconf automake patch bison flex libpng-dev libopenblas-dev \ - tcl-dev tk-dev libxml2-dev zlib1g-dev libffi-dev cmake" && \ + python3-dev libhdf5-dev libfreetype6-dev libtool \ + m4 autoconf automake patch bison flex libpng-dev libopenblas-dev \ + tcl-dev tk-dev libxml2-dev zlib1g-dev libffi-dev cmake" && \ apt-get update && \ apt-get install -y --no-install-recommends apt-utils debconf locales && dpkg-reconfigure locales && \ apt-get install -y --no-install-recommends $BUILDPKGS @@ -14,11 +14,11 @@ RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" # install dependencies: -COPY requirements.txt /tmp/ +COPY requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /tmp/requirements.txt -RUN pip install --no-cache-dir scanpy==1.7.0 +RUN pip install --no-cache-dir scanpy==1.8.1 FROM python:3.7.4-slim AS build-image @@ -26,11 +26,11 @@ FROM python:3.7.4-slim AS build-image ENV DEBIAN_FRONTEND=noninteractive RUN apt-get -y update && \ apt-get -y --no-install-recommends install \ - # Need to run ps - procps \ - libxml2 \ - # Need to run MulticoreTSNE - libgomp1 && \ + # Need to run ps + procps \ + libxml2 \ + # Need to run MulticoreTSNE + libgomp1 && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* diff --git a/src/scanpy/conf/base.config b/src/scanpy/conf/base.config index 7ed83e1b..d2544cc3 100644 --- a/src/scanpy/conf/base.config +++ b/src/scanpy/conf/base.config @@ -1,7 +1,7 @@ params { sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:1.8.1' report { annotations_to_plot = [] } diff --git a/src/scanpy/conf/min.config b/src/scanpy/conf/min.config index a4e4eb32..a39b30ad 100644 --- a/src/scanpy/conf/min.config +++ b/src/scanpy/conf/min.config @@ -1,7 +1,7 @@ params { sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:1.8.1' report { annotations_to_plot = [] } diff --git a/src/scanpy/requirements.txt b/src/scanpy/requirements.txt index b7b4b6ac..75b6c2af 100644 --- a/src/scanpy/requirements.txt +++ b/src/scanpy/requirements.txt @@ -1,11 +1,11 @@ -anndata==0.7.5 -annoy==1.17.0 +anndata==0.7.6 +annoy==1.15.2 ansiwrap==0.8.4 appdirs==1.4.4 async-generator==1.10 attrs==20.3.0 backcall==0.2.0 -bbknn==1.4.0 +bbknn==1.5.1 beautifulsoup4==4.9.3 bioservices==1.7.11 black==20.8b1 @@ -27,7 +27,7 @@ get-version==2.1 gevent==21.1.2 greenlet==1.0.0 grequests==0.6.0 -h5py==3.1.0 +h5py==2.10.0 idna==2.10 importlib-metadata==3.4.0 ipykernel==5.4.3 @@ -46,7 +46,7 @@ leidenalg==0.8.3 llvmlite==0.35.0 loompy==3.0.6 louvain==0.7.0 -lxml==4.6.2 +lxml==4.6.3 MarkupSafe==1.1.1 matplotlib==3.3.4 mistune==0.8.4 @@ -64,10 +64,10 @@ nest-asyncio==1.5.1 networkx==2.5 numba==0.52.0 numexpr==2.7.2 -numpy==1.20.1 +numpy==1.21.0 numpy-groupies==0.9.13 packaging==20.9 -pandas==1.2.2 +pandas==1.3.0 pandocfilters==1.4.3 papermill==2.3.2 parso==0.8.1 @@ -91,7 +91,7 @@ pyzmq==22.0.3 regex==2020.11.13 requests==2.25.1 requests-cache==0.5.2 -scikit-learn==0.24.1 +scikit-learn==0.22.0 scipy==1.6.1 seaborn==0.11.1 sinfo==0.3.1 @@ -113,12 +113,12 @@ tqdm==4.57.0 traitlets==5.0.5 typed-ast==1.4.2 typing-extensions==3.7.4.3 -umap-learn==0.5.1 -urllib3==1.26.3 +umap-learn==0.3.10 +urllib3==1.26.5 wcwidth==0.2.5 webencodings==0.5.1 wrapt==1.12.1 xmltodict==0.12.0 zipp==3.4.0 zope.event==4.5.0 -zope.interface==5.2.0 +zope.interface==5.2.0 \ No newline at end of file diff --git a/src/scrublet/Dockerfile b/src/scrublet/Dockerfile index c1edd24b..7c127636 100644 --- a/src/scrublet/Dockerfile +++ b/src/scrublet/Dockerfile @@ -1,4 +1,3 @@ -FROM vibsinglecellnf/scanpy:0.6.1 +FROM vibsinglecellnf/scanpy:1.8.1 RUN pip install --no-cache-dir scrublet==0.2.3 - diff --git a/src/scrublet/conf/base.config b/src/scrublet/conf/base.config index 6a51207f..74299626 100644 --- a/src/scrublet/conf/base.config +++ b/src/scrublet/conf/base.config @@ -1,7 +1,7 @@ params { sc { scrublet { - container = 'vibsinglecellnf/scrublet:0.2.1' + container = 'vibsinglecellnf/scrublet:0.2.3' doublet_detection { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb" useVariableFeatures = 'False' diff --git a/src/soupx/nextflow.config b/src/soupx/nextflow.config index 4e1920b1..b0b0c05a 100644 --- a/src/soupx/nextflow.config +++ b/src/soupx/nextflow.config @@ -1,7 +1,7 @@ params { sc { template { - container = 'vibsinglecellnf/scanpy:0.5.0' + container = 'vibsinglecellnf/scanpy:1.8.1' process1 { param1 = '' } diff --git a/src/utils/conf/test.config b/src/utils/conf/test.config index 9789212b..1cb11c8e 100644 --- a/src/utils/conf/test.config +++ b/src/utils/conf/test.config @@ -1,7 +1,7 @@ params { sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.5.2' + container = 'vibsinglecellnf/scanpy:0.7.0' } file_converter { iff = '10x_cellranger_mex' From 8264649f56d1792c10190e8fff07cc98c1fbb986 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 9 Jul 2021 00:45:33 +0200 Subject: [PATCH 137/202] [utils] Use Scanpy docker image 1.8.1 in test.config --- src/utils/conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/conf/test.config b/src/utils/conf/test.config index 1cb11c8e..5b2ac5a5 100644 --- a/src/utils/conf/test.config +++ b/src/utils/conf/test.config @@ -1,7 +1,7 @@ params { sc { scanpy { - container = 'vibsinglecellnf/scanpy:0.7.0' + container = 'vibsinglecellnf/scanpy:1.8.1' } file_converter { iff = '10x_cellranger_mex' From efc96e8e05120247d7b8f33dde202c73821616f6 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 9 Jul 2021 10:37:44 +0200 Subject: [PATCH 138/202] [sratoolkit] Move FIX_AND_COMPRESS_SRA_FASTQS to singlecelltoolkit tool Update singlecelltoolkit from 0.6-dev to 2021-07-06-ea48b36 Rename fixAndCompressFastQ to fix_and_compress_fastqs --- src/singlecelltoolkit/processes/.gitkeep | 0 .../processes/fix_and_compress_fastqs.nf} | 16 ++++------------ src/singlecelltoolkit/singlecelltoolkit.config | 2 +- src/sratoolkit/workflows/downloadFastQ.nf | 4 ++-- 4 files changed, 7 insertions(+), 15 deletions(-) delete mode 100644 src/singlecelltoolkit/processes/.gitkeep rename src/{sratoolkit/processes/fixAndCompressFastQ.nf => singlecelltoolkit/processes/fix_and_compress_fastqs.nf} (59%) diff --git a/src/singlecelltoolkit/processes/.gitkeep b/src/singlecelltoolkit/processes/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/sratoolkit/processes/fixAndCompressFastQ.nf b/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf similarity index 59% rename from src/sratoolkit/processes/fixAndCompressFastQ.nf rename to src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf index 33becf72..4a2bfcf4 100644 --- a/src/sratoolkit/processes/fixAndCompressFastQ.nf +++ b/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf @@ -8,12 +8,11 @@ if(!params.containsKey("test")) { toolParams = params.sratoolkit -process FIX_AND_COMPRESS_SRA_FASTQ { +process FIX_AND_COMPRESS_SRA_FASTQS { container toolParams.container publishDir "${params.global.outdir}/data/raw/fastqs_fixed_and_compressed", mode: 'symlink', overwrite: true - label 'compute_resources__sratoolkit' - maxRetries 0 + label 'compute_resources__cpu' input: tuple val(sraId), file("${sraId}_*.fastq") @@ -23,19 +22,12 @@ process FIX_AND_COMPRESS_SRA_FASTQ { script: """ - # Fetch script to fix SRA FASTQ (fasterq-dump does not have the -F option as fastq-dump do to keep original sequence names). - # Fixing the FASTQ files is required for future pre-processing (e.g.: scATAC-seq pipelines) - # We cannot source the script directly: - # - 1) by default it generates help text to stdout - # - 2) if redirecting the stdout to the trash i.e. /dev/null, Nextflow will think no files have been generated - # So we need to save the file before executing the script - curl -fsSL https://raw.githubusercontent.com/aertslab/single_cell_toolkit/master/fix_sra_fastq.sh -o fix_sra_fastq.sh - chmod a+x ./fix_sra_fastq.sh + # Fixing the FASTQ files is required for future pre-processing (e.g.: scATAC-seq pipelines) because fasterq-dump does not have the -F option as fastq-dump do to keep original sequence names. # Fix the FASTQ files and compress them export compress_fastq_threads="${task.cpus}" NUM_FASTQ_FILES=\$(ls ./*.fastq | wc -l) echo "Fixing and compressing \${NUM_FASTQ_FILES} FASTQ files in parallel with \${compress_fastq_threads} compression threads for each task..." - echo *.fastq | tr ' ' '\n' | xargs -P "\${NUM_FASTQ_FILES}" -n 1 -I {} ./fix_sra_fastq.sh "{}" "{}.gz" pigz + echo *.fastq | tr ' ' '\n' | xargs -P "\${NUM_FASTQ_FILES}" -n 1 -I {} fix_sra_fastq.sh "{}" "{}.gz" pigz echo "Removing all uncompressed FASTQ files" for FASTQ in *.fastq; do echo "Removing uncompressed FASTQ file \${FASTQ}..." diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 7607d525..447802c6 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:0.6-dev' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-06-ea48b36' barcode_correction { whitelist { standard = '' diff --git a/src/sratoolkit/workflows/downloadFastQ.nf b/src/sratoolkit/workflows/downloadFastQ.nf index 8aa4ce14..ebc88a2f 100644 --- a/src/sratoolkit/workflows/downloadFastQ.nf +++ b/src/sratoolkit/workflows/downloadFastQ.nf @@ -8,8 +8,8 @@ include { DOWNLOAD_FASTQS_FROM_SRA_ACC_ID; } from "../processes/downloadFastQ" params(params) include { - FIX_AND_COMPRESS_SRA_FASTQ; -} from "../processes/fixAndCompressFastQ" params(params) + FIX_AND_COMPRESS_SRA_FASTQS; +} from "../../singlecelltoolkit/processes/fix_and_compress_fastqs" params(params) workflow SRATOOLKIT__DOWNLOAD_FASTQS { From 742f3bd44f5161865b30470a7a4c15a179d9a308 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 9 Jul 2021 10:41:19 +0200 Subject: [PATCH 139/202] [pcacv] Write press errors before it fails not finding optimal number of PCs --- src/pcacv/bin/run_pca_cv.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pcacv/bin/run_pca_cv.R b/src/pcacv/bin/run_pca_cv.R index 89efd83a..e16d8a9e 100755 --- a/src/pcacv/bin/run_pca_cv.R +++ b/src/pcacv/bin/run_pca_cv.R @@ -392,7 +392,15 @@ if(optimum_npcs == 1) { stop(paste0("Invalid value for --n-pc-fallback parameter: value should be > 0.")) } if(args$`n-pc-fallback` == 0) { - stop(paste0("Could not find an optimal number of PCs. You can set --n-pc-fallback parameter (> 0) in order to return a minimum number of PCs.")) + write.table( + x = out, + file = paste0(args$`output-prefix`, ".PRESS_ERRORS.tsv"), + quote = F, + sep = "\t", + row.names = F, + col.names = T + ) + stop(paste0("Could not find an optimal number of PCs. You can either set --n-pc-fallback parameter (> 0) in order to return a minimum number of PCs or adapt the following parameters: --k-fold, --from-n-pc, --to-n-pc, --by-n-pc.")) } msg <- paste0("No optimal number of PCs found. The number of PCs returned is defined by --n-pc-fallback: ", args$`n-pc-fallback`) warning(msg) From 44cdfaf941a9df29d6a92de3a235a239f18ac4f3 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 12 Feb 2021 20:48:50 +0100 Subject: [PATCH 140/202] [utils] Add new generic function to extract params of given tools. This new supplement getToolParams from utils/processes/utils.nf so it is removed --- conf/generic.config | 17 +++++++++++++++++ src/utils/processes/utils.nf | 15 +++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/conf/generic.config b/conf/generic.config index 504b87ae..3f26e539 100644 --- a/conf/generic.config +++ b/conf/generic.config @@ -19,4 +19,21 @@ params { } return [global: paramsGlobal, local: pL] } + getToolParams = { toolKey -> + def _get = { p -> + if(p?.tools) { + return p.tools[toolKey] + } + if(p?.sc) { + return p.sc[toolKey] + } + throw new Exception("VSN ERROR: Cannot get tool params from NXF params.") + } + if(!toolKey.contains(".")) { + return _get(params) + } + def entry = params + toolKey.split('\\.').each { entry = entry?.get(it) } + return entry + } } diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 2186ef09..2cb0f4d0 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -6,15 +6,6 @@ import static groovy.json.JsonOutput.* binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin") -def getToolParams(params, toolKey) { - if(!toolKey.contains(".")) { - return params[toolKey] - } - def entry = params - toolKey.split('\\.').each { entry = entry?.get(it) } - return entry -} - def boolean isCollectionOrArray(object) { [Collection, Object[]].any { it.isAssignableFrom(object.getClass()) } } @@ -157,7 +148,7 @@ def getConverterContainer = { params, type -> return "vibsinglecellnf/scconverter:0.0.1" break; case "python": - return params.sc.scanpy.container + return params.getToolParams("scanpy").container } } @@ -340,7 +331,7 @@ process SC__FILE_CONVERTER_FROM_SCE { process SC__FILE_CONCATENATOR { cache 'deep' - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -363,7 +354,7 @@ process SC__FILE_CONCATENATOR { process SC__STAR_CONCATENATOR() { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' From 1ae30c06b9b7ac918926b8ebf1b66b7aa3296c2b Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 12 Feb 2021 22:36:47 +0100 Subject: [PATCH 141/202] Migrate to params.tools for scanpy and file_converter tools --- conf/generic.config | 8 ++++++++ docs/development.rst | 8 ++++---- src/channels/channels.nf | 4 ++-- src/harmony/workflows/bec_harmony.nf | 2 +- src/scanpy/conf/base.config | 2 +- src/scanpy/conf/bbknn.config | 2 +- src/scanpy/conf/data_transformation.config | 2 +- src/scanpy/conf/filter.config | 2 +- src/scanpy/conf/min.config | 2 +- src/scanpy/conf/mnncorrect.config | 2 +- src/scanpy/conf/normalization.config | 2 +- src/scanpy/conf/regress_out.config | 2 +- src/scanpy/processes/batch_effect_correct.nf | 4 ++-- src/scanpy/processes/cluster.nf | 12 ++++++------ src/scanpy/processes/dim_reduction.nf | 4 ++-- src/scanpy/processes/feature_selection.nf | 8 ++++---- src/scanpy/processes/filter.nf | 12 ++++++------ src/scanpy/processes/marker_genes.nf | 8 ++++---- src/scanpy/processes/neighborhood_graph.nf | 4 ++-- src/scanpy/processes/regress_out.nf | 4 ++-- src/scanpy/processes/reports.nf | 16 ++++++++-------- src/scanpy/processes/transform.nf | 12 ++++++------ src/scanpy/workflows/bec_bbknn.nf | 6 +++--- src/scanpy/workflows/bec_mnncorrect.nf | 6 +++--- src/scanpy/workflows/cluster_identification.nf | 6 +++--- src/scanpy/workflows/combine_reports.nf | 2 +- src/scanpy/workflows/dim_reduction.nf | 4 ++-- src/scanpy/workflows/hvg_selection.nf | 4 ++-- src/scanpy/workflows/single_sample.nf | 14 ++++++++------ src/utils/processes/h5adAnnotate.nf | 7 +++---- src/utils/processes/h5adExtractMetadata.nf | 2 +- src/utils/processes/h5adMerge.nf | 2 +- src/utils/processes/h5adSubset.nf | 9 ++++----- src/utils/processes/h5adToLoom.nf | 4 ++-- src/utils/processes/h5adUpdate.nf | 6 +++--- src/utils/processes/h5adUpdateMetadata.nf | 2 +- src/utils/processes/utils.nf | 2 +- src/utils/workflows/annotateByCellMetadata.nf | 3 +-- src/utils/workflows/filterAnnotateClean.nf | 8 ++++---- src/utils/workflows/filterByCellMetadata.nf | 3 +-- workflows/bbknn.nf | 6 +++--- workflows/harmony.nf | 6 +++--- workflows/mnncorrect.nf | 6 +++--- workflows/multi_sample.nf | 6 +++--- workflows/single_sample.nf | 2 +- workflows/single_sample_star.nf | 6 +++--- 46 files changed, 125 insertions(+), 119 deletions(-) diff --git a/conf/generic.config b/conf/generic.config index 3f26e539..0d8a5d87 100644 --- a/conf/generic.config +++ b/conf/generic.config @@ -36,4 +36,12 @@ params { toolKey.split('\\.').each { entry = entry?.get(it) } return entry } + hasToolParams = { toolKey -> + if(params?.tools) { + return params.tools.containsKey(toolKey) + } + if(params?.sc) { + return params.sc.containsKey(toolKey) + } + } } diff --git a/docs/development.rst b/docs/development.rst index 64398e7b..8355f5e1 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -364,7 +364,7 @@ Steps: // Run clustering // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, @@ -490,7 +490,7 @@ Steps: SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.sc.scanpy.containsKey("filter")) { + if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.sc.containsKey("file_concatenator")) { @@ -502,7 +502,7 @@ Steps: ) ) } - if(params.sc.scanpy.containsKey("data_transformation") && params.sc.scanpy.containsKey("normalization")) { + if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -552,7 +552,7 @@ Steps: // Collect the reports: // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Pairing clustering reports with bec reports if(!clusteringParams.isParameterExplorationModeOn()) { clusteringBECReports = BEC_HARMONY.out.cluster_report.map { diff --git a/src/channels/channels.nf b/src/channels/channels.nf index 0797fe5b..311f73db 100644 --- a/src/channels/channels.nf +++ b/src/channels/channels.nf @@ -36,8 +36,8 @@ workflow getDataChannel { } else { // If not dynamically set, we use h5ad by default outputFileFormat = "h5ad" - if(params.sc.file_converter.containsKey("off")) { - outputFileFormat = params.sc.file_converter.off + if(params.getToolParams("file_converter").containsKey("off")) { + outputFileFormat = params.getToolParams("file_converter").off } } diff --git a/src/harmony/workflows/bec_harmony.nf b/src/harmony/workflows/bec_harmony.nf index 44345a0f..44e9f50a 100644 --- a/src/harmony/workflows/bec_harmony.nf +++ b/src/harmony/workflows/bec_harmony.nf @@ -95,7 +95,7 @@ workflow BEC_HARMONY { // Run clustering // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, diff --git a/src/scanpy/conf/base.config b/src/scanpy/conf/base.config index d2544cc3..d1135308 100644 --- a/src/scanpy/conf/base.config +++ b/src/scanpy/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' report { diff --git a/src/scanpy/conf/bbknn.config b/src/scanpy/conf/bbknn.config index 5a957800..e68636c7 100644 --- a/src/scanpy/conf/bbknn.config +++ b/src/scanpy/conf/bbknn.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { batch_effect_correct { method = 'bbknn' diff --git a/src/scanpy/conf/data_transformation.config b/src/scanpy/conf/data_transformation.config index 8610b720..f23e2b59 100644 --- a/src/scanpy/conf/data_transformation.config +++ b/src/scanpy/conf/data_transformation.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { data_transformation { method = 'log1p' diff --git a/src/scanpy/conf/filter.config b/src/scanpy/conf/filter.config index e21b736e..ca06cd90 100644 --- a/src/scanpy/conf/filter.config +++ b/src/scanpy/conf/filter.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { filter { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_filter_qc_report.ipynb" diff --git a/src/scanpy/conf/min.config b/src/scanpy/conf/min.config index a39b30ad..c0188bbb 100644 --- a/src/scanpy/conf/min.config +++ b/src/scanpy/conf/min.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' report { diff --git a/src/scanpy/conf/mnncorrect.config b/src/scanpy/conf/mnncorrect.config index 1e1e1634..746a5761 100644 --- a/src/scanpy/conf/mnncorrect.config +++ b/src/scanpy/conf/mnncorrect.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { batch_effect_correct { method = 'mnncorrect' diff --git a/src/scanpy/conf/normalization.config b/src/scanpy/conf/normalization.config index 3dd9fe4b..b79bb7e0 100644 --- a/src/scanpy/conf/normalization.config +++ b/src/scanpy/conf/normalization.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { normalization { method = 'cpx' diff --git a/src/scanpy/conf/regress_out.config b/src/scanpy/conf/regress_out.config index 04e9999d..0db5c768 100644 --- a/src/scanpy/conf/regress_out.config +++ b/src/scanpy/conf/regress_out.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { regress_out { variablesToRegressOut = ['n_counts', 'percent_mito'] diff --git a/src/scanpy/processes/batch_effect_correct.nf b/src/scanpy/processes/batch_effect_correct.nf index a1822719..a884ecba 100644 --- a/src/scanpy/processes/batch_effect_correct.nf +++ b/src/scanpy/processes/batch_effect_correct.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin/" process SC__SCANPY__BATCH_EFFECT_CORRECTION { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -21,7 +21,7 @@ process SC__SCANPY__BATCH_EFFECT_CORRECTION { val(stashedParams) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.batch_effect_correct) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").batch_effect_correct) processParams = sampleParams.local """ ${binDir}aggregate/sc_batch_effect_correction.py \ diff --git a/src/scanpy/processes/cluster.nf b/src/scanpy/processes/cluster.nf index 47128522..6c74cc7f 100644 --- a/src/scanpy/processes/cluster.nf +++ b/src/scanpy/processes/cluster.nf @@ -97,7 +97,7 @@ class SC__SCANPY__CLUSTERING_PARAMS { process SC__SCANPY__CLUSTERING_PREFLIGHT_CHECKS { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container label 'compute_resources__mem' input: @@ -111,7 +111,7 @@ process SC__SCANPY__CLUSTERING_PREFLIGHT_CHECKS { path(f) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").clustering) processParams = sampleParams.local methodAsArguments = processParams?.methods ? processParams.methods.collect({ '--method' + ' ' + it }).join(' ') : '--method ' + processParams.method resolutionAsArguments = processParams?.resolutions ? processParams?.resolutions.collect({ '--resolution' + ' ' + it }).join(' ') : '--resolution ' + processParams.resolution @@ -134,7 +134,7 @@ def SC__SCANPY__CLUSTERING_PARAMS(params) { */ process SC__SCANPY__CLUSTERING { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -145,7 +145,7 @@ process SC__SCANPY__CLUSTERING { tuple val(sampleId), path("${sampleId}.SC__SCANPY__CLUSTERING.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").clustering) processParams = sampleParams.local """ ${binDir}/cluster/sc_clustering.py \ @@ -163,7 +163,7 @@ process SC__SCANPY__CLUSTERING { */ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate/clustering/${isParamNull(method) ? "default": method.toLowerCase()}/${isParamNull(resolution) ? "default" : "res_" + resolution}", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -183,7 +183,7 @@ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING { val(resolution) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").clustering) processParams = sampleParams.local def _processParams = new SC__SCANPY__CLUSTERING_PARAMS() _processParams.setEnv(this) diff --git a/src/scanpy/processes/dim_reduction.nf b/src/scanpy/processes/dim_reduction.nf index 5ae965bf..6a8f6452 100644 --- a/src/scanpy/processes/dim_reduction.nf +++ b/src/scanpy/processes/dim_reduction.nf @@ -89,7 +89,7 @@ def SC__SCANPY__DIM_REDUCTION_PARAMS(params) { process SC__SCANPY__DIM_REDUCTION { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__cpu' @@ -108,7 +108,7 @@ process SC__SCANPY__DIM_REDUCTION { val(nComps) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.dim_reduction.get(params.method)) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").dim_reduction.get(params.method)) processParams = sampleParams.local // In parameter exploration mode, file output needs to be tagged with a unique identitifer because of: // - https://github.com/nextflow-io/nextflow/issues/470 diff --git a/src/scanpy/processes/feature_selection.nf b/src/scanpy/processes/feature_selection.nf index d09cbb14..067e31eb 100644 --- a/src/scanpy/processes/feature_selection.nf +++ b/src/scanpy/processes/feature_selection.nf @@ -6,7 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin" : process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -17,7 +17,7 @@ process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { tuple val(sampleId), path("${sampleId}.SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.feature_selection) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").feature_selection) processParams = sampleParams.local """ ${binDir}/feature_selection/sc_find_variable_genes.py \ @@ -35,7 +35,7 @@ process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { process SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -46,7 +46,7 @@ process SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES { tuple val(sampleId), path("${sampleId}.SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.feature_selection) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").feature_selection) processParams = sampleParams.local """ ${binDir}/feature_selection/sc_subset_variable_genes.py \ diff --git a/src/scanpy/processes/filter.nf b/src/scanpy/processes/filter.nf index dffdf9b2..321d1706 100644 --- a/src/scanpy/processes/filter.nf +++ b/src/scanpy/processes/filter.nf @@ -6,7 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin" : process SC__SCANPY__COMPUTE_QC_STATS { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -17,7 +17,7 @@ process SC__SCANPY__COMPUTE_QC_STATS { tuple val(sampleId), path("${sampleId}.SC__SCANPY__COMPUTE_QC_STATS.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.filter) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").filter) processParams = sampleParams.local """ ${binDir}/filter/sc_cell_gene_filtering.py \ @@ -38,7 +38,7 @@ process SC__SCANPY__COMPUTE_QC_STATS { process SC__SCANPY__GENE_FILTER { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -49,7 +49,7 @@ process SC__SCANPY__GENE_FILTER { tuple val(sampleId), path("${sampleId}.SC__SCANPY__GENE_FILTER.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.filter) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").filter) processParams = sampleParams.local """ ${binDir}/filter/sc_cell_gene_filtering.py \ @@ -64,7 +64,7 @@ process SC__SCANPY__GENE_FILTER { process SC__SCANPY__CELL_FILTER { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -75,7 +75,7 @@ process SC__SCANPY__CELL_FILTER { tuple val(sampleId), path("${sampleId}.SC__SCANPY__CELL_FILTER.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.filter) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").filter) processParams = sampleParams.local """ ${binDir}/filter/sc_cell_gene_filtering.py \ diff --git a/src/scanpy/processes/marker_genes.nf b/src/scanpy/processes/marker_genes.nf index da900fa8..341017e6 100644 --- a/src/scanpy/processes/marker_genes.nf +++ b/src/scanpy/processes/marker_genes.nf @@ -13,7 +13,7 @@ include { */ process SC__SCANPY__MARKER_GENES { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -29,7 +29,7 @@ process SC__SCANPY__MARKER_GENES { tuple val(sampleId), path("${sampleId}.SC__SCANPY__MARKER_GENES.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.marker_genes) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").marker_genes) processParams = sampleParams.local """ ${binDir}/cluster/sc_marker_genes.py \ @@ -48,7 +48,7 @@ process SC__SCANPY__MARKER_GENES { */ process SC__SCANPY__PARAM_EXPLORE_MARKER_GENES { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate/markers/${isParamNull(clusteringMethod) ? "default": clusteringMethod.toLowerCase()}/${isParamNull(clusteringResolution) ? "res_": clusteringResolution}", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -70,7 +70,7 @@ process SC__SCANPY__PARAM_EXPLORE_MARKER_GENES { val(clusteringResolution) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.marker_genes) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").marker_genes) processParams = sampleParams.local // In parameter exploration mode, file output needs to be tagged with a unique identitifer because of: // - https://github.com/nextflow-io/nextflow/issues/470 diff --git a/src/scanpy/processes/neighborhood_graph.nf b/src/scanpy/processes/neighborhood_graph.nf index 24255062..3b4c359f 100644 --- a/src/scanpy/processes/neighborhood_graph.nf +++ b/src/scanpy/processes/neighborhood_graph.nf @@ -85,7 +85,7 @@ class SC__SCANPY__NEIGHBORHOOD_GRAPH_PARAMS { process SC__SCANPY__NEIGHBORHOOD_GRAPH { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container label 'compute_resources__mem' input: @@ -106,7 +106,7 @@ process SC__SCANPY__NEIGHBORHOOD_GRAPH { def sampleParams = params.parseConfig( sampleId, params.global, - params.sc.scanpy.neighborhood_graph + params.getToolParams("scanpy").neighborhood_graph ) processParams = sampleParams.local // In parameter exploration mode, file output needs to be tagged with a unique identitifer because of: diff --git a/src/scanpy/processes/regress_out.nf b/src/scanpy/processes/regress_out.nf index 93e6d3f9..ce701b2e 100644 --- a/src/scanpy/processes/regress_out.nf +++ b/src/scanpy/processes/regress_out.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin/" process SC__SCANPY__REGRESS_OUT { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__cpu' @@ -19,7 +19,7 @@ process SC__SCANPY__REGRESS_OUT { path("${sampleId}.SC__SCANPY__REGRESS_OUT.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.regress_out) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").regress_out) processParams = sampleParams.local variablesToRegressOutAsArguments = processParams.variablesToRegressOut.collect({ '--variable-to-regress-out' + ' ' + it }).join(' ') """ diff --git a/src/scanpy/processes/reports.nf b/src/scanpy/processes/reports.nf index 2a731722..0fda37d2 100644 --- a/src/scanpy/processes/reports.nf +++ b/src/scanpy/processes/reports.nf @@ -16,7 +16,7 @@ include { */ process SC__SCANPY__GENERATE_REPORT { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' @@ -29,7 +29,7 @@ process SC__SCANPY__GENERATE_REPORT { tuple val(sampleId), path("${sampleId}.${reportTitle}.ipynb") script: - def reportParams = new Yaml().dump(annotations_to_plot: params.sc.scanpy.report.annotations_to_plot) + def reportParams = new Yaml().dump(annotations_to_plot: params.getToolParams("scanpy").report.annotations_to_plot) """ papermill ${ipynb} \ --report-mode \ @@ -51,7 +51,7 @@ process SC__SCANPY__GENERATE_REPORT { */ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING_GENERATE_REPORT { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/notebooks/intermediate/clustering/${isParamNull(method) ? "default": method.toLowerCase()}/${isParamNull(resolution) ? "res_": resolution}", mode: 'symlink', overwrite: true label 'compute_resources__report' @@ -77,7 +77,7 @@ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING_GENERATE_REPORT { stashedParams = [method, resolution] if(!isParamNull(stashedParams)) uuid = stashedParams.findAll { it != 'NULL' }.join('_') - def reportParams = new Yaml().dump(annotations_to_plot: params.sc.scanpy.report.annotations_to_plot) + def reportParams = new Yaml().dump(annotations_to_plot: params.getToolParams("scanpy").report.annotations_to_plot) """ papermill ${ipynb} \ --report-mode \ @@ -93,7 +93,7 @@ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING_GENERATE_REPORT { // QC report takes two inputs, so needs it own process process SC__SCANPY__GENERATE_DUAL_INPUT_REPORT { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' @@ -116,7 +116,7 @@ process SC__SCANPY__GENERATE_DUAL_INPUT_REPORT { script: if(!isParamNull(stashedParams)) uuid = stashedParams.findAll { it != 'NULL' }.join('_') - def reportParams = new Yaml().dump(annotations_to_plot: params.sc.scanpy.report.annotations_to_plot) + def reportParams = new Yaml().dump(annotations_to_plot: params.getToolParams("scanpy").report.annotations_to_plot) """ papermill ${ipynb} \ --report-mode \ @@ -131,7 +131,7 @@ process SC__SCANPY__GENERATE_DUAL_INPUT_REPORT { process SC__SCANPY__REPORT_TO_HTML { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true // copy final "merged_report" to notbooks root: publishDir "${params.global.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true @@ -152,7 +152,7 @@ process SC__SCANPY__REPORT_TO_HTML { process SC__SCANPY__MERGE_REPORTS { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true // copy final "merged_report" to notebooks root: publishDir "${params.global.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true diff --git a/src/scanpy/processes/transform.nf b/src/scanpy/processes/transform.nf index 83f62dca..a4181072 100644 --- a/src/scanpy/processes/transform.nf +++ b/src/scanpy/processes/transform.nf @@ -6,7 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin" : process SC__SCANPY__NORMALIZATION { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -17,7 +17,7 @@ process SC__SCANPY__NORMALIZATION { tuple val(sampleId), path("${sampleId}.SC__SCANPY__NORMALIZATION.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.normalization) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").normalization) processParams = sampleParams.local """ ${binDir}/transform/sc_normalization.py \ @@ -31,7 +31,7 @@ process SC__SCANPY__NORMALIZATION { process SC__SCANPY__DATA_TRANSFORMATION { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -42,7 +42,7 @@ process SC__SCANPY__DATA_TRANSFORMATION { tuple val(sampleId), path("${sampleId}.SC__SCANPY__DATA_TRANSFORMATION.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.data_transformation) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").data_transformation) processParams = sampleParams.local """ ${binDir}/transform/sc_data_transformation.py \ @@ -55,7 +55,7 @@ process SC__SCANPY__DATA_TRANSFORMATION { process SC__SCANPY__FEATURE_SCALING { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -70,7 +70,7 @@ process SC__SCANPY__FEATURE_SCALING { path("${sampleId}.SC__SCANPY__FEATURE_SCALING.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.feature_scaling) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").feature_scaling) processParams = sampleParams.local """ ${binDir}/transform/sc_feature_scaling.py \ diff --git a/src/scanpy/workflows/bec_bbknn.nf b/src/scanpy/workflows/bec_bbknn.nf index f37ab71e..68eb7dbc 100644 --- a/src/scanpy/workflows/bec_bbknn.nf +++ b/src/scanpy/workflows/bec_bbknn.nf @@ -71,7 +71,7 @@ workflow BEC_BBKNN { ) // Define the parameters for dimensionality reduction - def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.sc.scanpy.dim_reduction.umap) ) + def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.getToolParams("scanpy").dim_reduction.umap) ) SC__SCANPY__DIM_REDUCTION__UMAP( SC__SCANPY__BATCH_EFFECT_CORRECTION.out.combine( dimRedParams.$() @@ -87,7 +87,7 @@ workflow BEC_BBKNN { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, SC__SCANPY__DIM_REDUCTION__UMAP.out, @@ -122,7 +122,7 @@ workflow BEC_BBKNN { bbknn_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.sc.scanpy.batch_effect_correct.report_ipynb), + file(workflow.projectDir + params.getToolParams("scanpy").batch_effect_correct.report_ipynb), "SC_BEC_BBKNN_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/scanpy/workflows/bec_mnncorrect.nf b/src/scanpy/workflows/bec_mnncorrect.nf index 5dd8161d..88338a1b 100644 --- a/src/scanpy/workflows/bec_mnncorrect.nf +++ b/src/scanpy/workflows/bec_mnncorrect.nf @@ -63,7 +63,7 @@ workflow BEC_MNNCORRECT { clusterIdentificationPreBatchEffectCorrection main: - out = params.sc.scanpy.containsKey("regress_out") + out = params.getToolParams("scanpy").containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( hvg ) : data SC__SCANPY__BATCH_EFFECT_CORRECTION( out.map { @@ -99,7 +99,7 @@ workflow BEC_MNNCORRECT { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, @@ -136,7 +136,7 @@ workflow BEC_MNNCORRECT { mnncorrect_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.sc.scanpy.batch_effect_correct.report_ipynb), + file(workflow.projectDir + params.getToolParams("scanpy").batch_effect_correct.report_ipynb), "SC_BEC_MNNCORRECT_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/scanpy/workflows/cluster_identification.nf b/src/scanpy/workflows/cluster_identification.nf index 62563549..7a8e9434 100644 --- a/src/scanpy/workflows/cluster_identification.nf +++ b/src/scanpy/workflows/cluster_identification.nf @@ -32,9 +32,9 @@ workflow CLUSTER_IDENTIFICATION { main: // To run multiple clustering, we need at least 1 argument that is a list - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Run sanity checks - if(params.sc.scanpy.clustering?.preflight_checks) { + if(params.getToolParams("scanpy").clustering?.preflight_checks) { $data = SC__SCANPY__CLUSTERING_PREFLIGHT_CHECKS( data.map { it -> tuple(it[0], it[1]) } ) } else { $data = data @@ -60,7 +60,7 @@ workflow CLUSTER_IDENTIFICATION { report = GENERATE_REPORT( "CLUSTERING", out, - file(workflow.projectDir + params.sc.scanpy.clustering.report_ipynb), + file(workflow.projectDir + params.getToolParams("scanpy").clustering.report_ipynb), clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/scanpy/workflows/combine_reports.nf b/src/scanpy/workflows/combine_reports.nf index 45c2c54c..d4d7838b 100644 --- a/src/scanpy/workflows/combine_reports.nf +++ b/src/scanpy/workflows/combine_reports.nf @@ -18,7 +18,7 @@ workflow COMBINE_REPORTS { cluster_report main: - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) ipynbs = qc_filter_report.map { it -> tuple(it[0], it[1]) }.mix( diff --git a/src/scanpy/workflows/dim_reduction.nf b/src/scanpy/workflows/dim_reduction.nf index e9d3a1a3..af1bc185 100644 --- a/src/scanpy/workflows/dim_reduction.nf +++ b/src/scanpy/workflows/dim_reduction.nf @@ -33,7 +33,7 @@ workflow DIM_REDUCTION { report = GENERATE_REPORT( "DIMENSIONALITY_REDUCTION", DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap.map { it -> tuple(it[0], it[1]) }, - file(workflow.projectDir + params.sc.scanpy.dim_reduction.report_ipynb), + file(workflow.projectDir + params.getToolParams("scanpy").dim_reduction.report_ipynb), false ) @@ -57,7 +57,7 @@ workflow DIM_REDUCTION_TSNE_UMAP { report = GENERATE_REPORT( "DIMENSIONALITY_REDUCTION", dimred_tsne_umap.map { it -> tuple(it[0], it[1]) }, - file(workflow.projectDir + params.sc.scanpy.dim_reduction.report_ipynb), + file(workflow.projectDir + params.getToolParams("scanpy").dim_reduction.report_ipynb), false ) diff --git a/src/scanpy/workflows/hvg_selection.nf b/src/scanpy/workflows/hvg_selection.nf index 6381b91a..fcbcd3f4 100644 --- a/src/scanpy/workflows/hvg_selection.nf +++ b/src/scanpy/workflows/hvg_selection.nf @@ -36,7 +36,7 @@ workflow HVG_SELECTION { hvg = data \ | SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES \ | SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES - out = params.sc.scanpy.containsKey("regress_out") + out = params.getToolParams("scanpy").containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( hvg ) : hvg scaled = SC__SCANPY__FEATURE_SCALING( out ) PUBLISH_H5AD_HVG_SCALED( @@ -52,7 +52,7 @@ workflow HVG_SELECTION { report = GENERATE_REPORT( "HVG", SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES.out, - file(workflow.projectDir + params.sc.scanpy.feature_selection.report_ipynb), + file(workflow.projectDir + params.getToolParams("scanpy").feature_selection.report_ipynb), false ) diff --git a/src/scanpy/workflows/single_sample.nf b/src/scanpy/workflows/single_sample.nf index 2de848cb..0d1910ba 100644 --- a/src/scanpy/workflows/single_sample.nf +++ b/src/scanpy/workflows/single_sample.nf @@ -73,8 +73,10 @@ workflow SINGLE_SAMPLE { // Prefilter the data out = FILTER_AND_ANNOTATE_AND_CLEAN( data ) - filtered = params.sc.scanpy?.filter ? QC_FILTER( out ).filtered : out - transformed_normalized = params.sc.scanpy?.data_transformation && params.sc.scanpy?.normalization + def scanpyParams = params.getToolParams("scanpy") + + filtered = scanpyParams?.filter ? QC_FILTER( out ).filtered : out + transformed_normalized = scanpyParams?.data_transformation && scanpyParams?.normalization ? NORMALIZE_TRANSFORM( filtered ) : filtered out = HVG_SELECTION( transformed_normalized ) DIM_REDUCTION_PCA( out.scaled ) @@ -96,13 +98,13 @@ workflow SINGLE_SAMPLE { ipynbs = COMBINE_REPORTS( samples, UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out, - params.sc.scanpy?.filter ? QC_FILTER.out.report : Channel.empty(), + scanpyParams?.filter ? QC_FILTER.out.report : Channel.empty(), HVG_SELECTION.out.report, DIM_REDUCTION_TSNE_UMAP.out.report, CLUSTER_IDENTIFICATION.out.report ) - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) merged_report = SC__SCANPY__MERGE_REPORTS( ipynbs, "merged_report", @@ -114,7 +116,7 @@ workflow SINGLE_SAMPLE { // Finalize FINALIZE( - params.sc.scanpy?.filter ? QC_FILTER.out.filtered : data, + scanpyParams?.filter ? QC_FILTER.out.filtered : data, CLUSTER_IDENTIFICATION.out.marker_genes, 'SINGLE_SAMPLE.final_output' ) @@ -133,7 +135,7 @@ workflow SINGLE_SAMPLE { ) emit: - filtered_data = params.sc.scanpy?.filter ? QC_FILTER.out.filtered : Channel.empty() + filtered_data = scanpyParams?.filter ? QC_FILTER.out.filtered : Channel.empty() filtered_loom = FINALIZE.out.filteredloom hvg_data = HVG_SELECTION.out.hvg dr_pca_data = DIM_REDUCTION_PCA.out diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf index bdab3c1b..324f8661 100644 --- a/src/utils/processes/h5adAnnotate.nf +++ b/src/utils/processes/h5adAnnotate.nf @@ -6,7 +6,6 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : include { isParamNull; - getToolParams; } from './utils.nf' params(params) def getPublishDir = { outDir, toolName -> @@ -24,7 +23,7 @@ def getMode = { toolName -> process SC__ANNOTATE_BY_CELL_METADATA { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${getPublishDir(params.global.outdir,tool)}", mode: "${getMode(tool)}", overwrite: true label 'compute_resources__default' @@ -45,7 +44,7 @@ process SC__ANNOTATE_BY_CELL_METADATA { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.sc.cell_annotate : getToolParams(params.sc, tool)["cell_annotate"] + isParamNull(tool) ? params.sc.cell_annotate : params.getToolParams(tool)["cell_annotate"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -83,7 +82,7 @@ def hasMetadataFilePath(processParams) { process SC__ANNOTATE_BY_SAMPLE_METADATA { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' diff --git a/src/utils/processes/h5adExtractMetadata.nf b/src/utils/processes/h5adExtractMetadata.nf index f25d3430..cab96a84 100644 --- a/src/utils/processes/h5adExtractMetadata.nf +++ b/src/utils/processes/h5adExtractMetadata.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__UTILS__EXTRACT_FEATURE_METADATA { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' diff --git a/src/utils/processes/h5adMerge.nf b/src/utils/processes/h5adMerge.nf index 43acb06b..1724fd15 100644 --- a/src/utils/processes/h5adMerge.nf +++ b/src/utils/processes/h5adMerge.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__H5AD_MERGE { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index 680b3e15..f9f0e09e 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -6,13 +6,12 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : include { isParamNull; - getToolParams; isCollectionOrArray; } from './utils' params(params) process SC__PREPARE_OBS_FILTER { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -34,7 +33,7 @@ process SC__PREPARE_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.sc.cell_filter : getToolParams(params.sc, tool)["cell_filter"] + isParamNull(tool) ? params.sc.cell_filter : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -71,7 +70,7 @@ process SC__PREPARE_OBS_FILTER { process SC__APPLY_OBS_FILTER { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -92,7 +91,7 @@ process SC__APPLY_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.sc.cell_filter : getToolParams(params.sc, tool)["cell_filter"] + isParamNull(tool) ? params.sc.cell_filter : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf index d1d6cf5c..2e9a8bb4 100644 --- a/src/utils/processes/h5adToLoom.nf +++ b/src/utils/processes/h5adToLoom.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__H5AD_TO_LOOM { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/loom", mode: 'link', overwrite: true, saveAs: { filename -> "${sampleId}.SCope_output.loom" } label 'compute_resources__mem' @@ -42,7 +42,7 @@ process SC__H5AD_TO_LOOM { process SC__H5AD_TO_FILTERED_LOOM { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' diff --git a/src/utils/processes/h5adUpdate.nf b/src/utils/processes/h5adUpdate.nf index 5cb7506a..dd437b15 100644 --- a/src/utils/processes/h5adUpdate.nf +++ b/src/utils/processes/h5adUpdate.nf @@ -8,7 +8,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__H5AD_UPDATE_X_PCA { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container label 'compute_resources__mem' input: @@ -34,7 +34,7 @@ process SC__H5AD_UPDATE_X_PCA { process SC__H5AD_CLEAN { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container label 'compute_resources__mem' input: @@ -61,7 +61,7 @@ process SC__H5AD_CLEAN { process SC__H5AD_BEAUTIFY { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' diff --git a/src/utils/processes/h5adUpdateMetadata.nf b/src/utils/processes/h5adUpdateMetadata.nf index fb829160..09dc2b0c 100644 --- a/src/utils/processes/h5adUpdateMetadata.nf +++ b/src/utils/processes/h5adUpdateMetadata.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__UTILS__UPDATE_FEATURE_METADATA_INDEX { - container params.sc.scanpy.container + container params.getToolParams("scanpy").container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 2cb0f4d0..ed4402c4 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -197,7 +197,7 @@ process SC__FILE_CONVERTER { path("${sampleId}.SC__FILE_CONVERTER.${outputExtension}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.file_converter) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("file_converter")) processParams = sampleParams.local switch(inputDataType) { diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 7c616067..9231a3ee 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -4,7 +4,6 @@ nextflow.enable.dsl=2 // Process imports: include { isParamNull; - getToolParams; } from './../processes/utils.nf' params(params) include { getChannel; @@ -36,7 +35,7 @@ workflow ANNOTATE_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? params.sc.cell_annotate : - getToolParams(params.sc, tool)["cell_annotate"] + params.getToolParams(tool)["cell_annotate"] def method = workflowParams.method if(method == 'aio') { out = SC__ANNOTATE_BY_CELL_METADATA( diff --git a/src/utils/workflows/filterAnnotateClean.nf b/src/utils/workflows/filterAnnotateClean.nf index 5daea70c..1411d383 100644 --- a/src/utils/workflows/filterAnnotateClean.nf +++ b/src/utils/workflows/filterAnnotateClean.nf @@ -34,18 +34,18 @@ workflow FILTER_AND_ANNOTATE_AND_CLEAN { out = UPDATE_FEATURE_NOMENCLATURE( data ) } // Filter cells based on an indexed cell-based metadata table - if(params.sc.containsKey("cell_filter")) { + if(params.hasToolParams("cell_filter")) { out = FILTER_BY_CELL_METADATA( out, 'NULL' ) } // Annotate cells based on an indexed cell-based metadata table - if(params.sc.containsKey("cell_annotate")) { + if(params.hasToolParams("cell_annotate")) { out = STATIC__ANNOTATE_BY_CELL_METADATA( out, null ) } // Annotate cells based on an indexed sample-based metadata table - if(params.sc.containsKey("sample_annotate")) { + if(params.hasToolParams("sample_annotate")) { if (!hasMetadataFilePath(params.sc.sample_annotate)) { throw new Exception("The metadataFilePath param is missing in sample_annotate.") } @@ -54,7 +54,7 @@ workflow FILTER_AND_ANNOTATE_AND_CLEAN { // Clean // e.g.: // - h5ad: rename adata.obs values, remove adata.obs columns - if(params.sc.containsKey("file_cleaner")) { + if(params.hasToolParams("file_cleaner")) { out = SC__H5AD_BEAUTIFY( out ) } diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index 4e6b26e6..98a9ed95 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -4,7 +4,6 @@ nextflow.enable.dsl=2 // Process imports: include { isParamNull; - getToolParams; } from './../processes/utils.nf' params(params) include { SC__PREPARE_OBS_FILTER; @@ -34,7 +33,7 @@ workflow FILTER_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? params.sc.cell_filter : - getToolParams(params.sc, tool)["cell_filter"] + params.getToolParams(tool)["cell_filter"] Channel .from(workflowParams.filters) diff --git a/workflows/bbknn.nf b/workflows/bbknn.nf index 816667f8..5116a48a 100644 --- a/workflows/bbknn.nf +++ b/workflows/bbknn.nf @@ -76,7 +76,7 @@ workflow bbknn { SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.sc.scanpy.containsKey("filter")) { + if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.sc.containsKey("file_concatenator")) { @@ -88,7 +88,7 @@ workflow bbknn { ) ) } - if(params.sc.scanpy.containsKey("data_transformation") && params.sc.scanpy.containsKey("normalization")) { + if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -119,7 +119,7 @@ workflow bbknn { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/harmony.nf b/workflows/harmony.nf index 5743c008..4d296727 100644 --- a/workflows/harmony.nf +++ b/workflows/harmony.nf @@ -78,7 +78,7 @@ workflow harmony { SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.sc.scanpy.containsKey("filter")) { + if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.sc.containsKey("file_concatenator")) { @@ -90,7 +90,7 @@ workflow harmony { ) ) } - if(params.sc.scanpy.containsKey("data_transformation") && params.sc.scanpy.containsKey("normalization")) { + if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -121,7 +121,7 @@ workflow harmony { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/mnncorrect.nf b/workflows/mnncorrect.nf index 584621be..9d15ccaf 100644 --- a/workflows/mnncorrect.nf +++ b/workflows/mnncorrect.nf @@ -87,7 +87,7 @@ workflow mnncorrect { SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.sc.scanpy.containsKey("filter")) { + if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.sc.containsKey("file_concatenator")) { @@ -99,7 +99,7 @@ workflow mnncorrect { ) ) } - if(params.sc.scanpy.containsKey("data_transformation") && params.sc.scanpy.containsKey("normalization")) { + if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -128,7 +128,7 @@ workflow mnncorrect { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/multi_sample.nf b/workflows/multi_sample.nf index ff7f5368..a1dc9d34 100644 --- a/workflows/multi_sample.nf +++ b/workflows/multi_sample.nf @@ -84,7 +84,7 @@ workflow multi_sample { SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.sc.scanpy.containsKey("filter")) { + if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.sc.containsKey("file_concatenator")) { @@ -96,7 +96,7 @@ workflow multi_sample { ) ) } - if(params.sc.scanpy.containsKey("data_transformation") && params.sc.scanpy.containsKey("normalization")) { + if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -117,7 +117,7 @@ workflow multi_sample { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf index 1100c25e..446b30a1 100644 --- a/workflows/single_sample.nf +++ b/workflows/single_sample.nf @@ -30,7 +30,7 @@ workflow single_sample { SCANPY__SINGLE_SAMPLE( SC__FILE_CONVERTER.out ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/single_sample_star.nf b/workflows/single_sample_star.nf index 893aba35..8844f835 100644 --- a/workflows/single_sample_star.nf +++ b/workflows/single_sample_star.nf @@ -76,12 +76,12 @@ workflow single_sample_star { ) out = FILTER_AND_ANNOTATE_AND_CLEAN( data ) - if(params.sc.scanpy.containsKey("filter")) { + if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } NORMALIZE_TRANSFORM( out ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - if(params.sc.scanpy.containsKey("regress_out")) { + if(params.getToolParams("scanpy").containsKey("regress_out")) { preprocessed_data = SC__SCANPY__REGRESS_OUT( HVG_SELECTION.out.scaled ) } else { preprocessed_data = HVG_SELECTION.out.scaled @@ -106,7 +106,7 @@ workflow single_sample_star { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { From b0f5c4e9b4e4d179d41a7deb10fd043d6e5d8aa7 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 12 Feb 2021 23:48:46 +0100 Subject: [PATCH 142/202] Migrate to params.tools for celda tool --- conf/generic.config | 19 ++++++++++++++----- main.nf | 6 +++--- src/celda/conf/base.config | 2 +- src/celda/conf/decontx_correct.config | 2 +- src/celda/conf/decontx_filter.config | 10 +++++----- src/celda/conf/decontx_filter_defaults.config | 2 +- src/celda/main.nf | 8 ++++---- src/celda/nextflow.config | 14 -------------- src/celda/processes/runDecontX.nf | 4 ++-- src/scrublet/scrublet.config | 4 ++-- src/utils/processes/h5adSubset.nf | 4 ++-- src/utils/processes/utils.nf | 2 +- src/utils/workflows/filterByCellMetadata.nf | 2 +- 13 files changed, 37 insertions(+), 42 deletions(-) delete mode 100644 src/celda/nextflow.config diff --git a/conf/generic.config b/conf/generic.config index 0d8a5d87..e83c879e 100644 --- a/conf/generic.config +++ b/conf/generic.config @@ -21,10 +21,10 @@ params { } getToolParams = { toolKey -> def _get = { p -> - if(p?.tools) { + if(p.containsKey("tools")) { // weirdly p?.tools gives a WARN (same for the other if statements) return p.tools[toolKey] } - if(p?.sc) { + if(p.containsKey("sc")) { return p.sc[toolKey] } throw new Exception("VSN ERROR: Cannot get tool params from NXF params.") @@ -32,16 +32,25 @@ params { if(!toolKey.contains(".")) { return _get(params) } - def entry = params + def entry = null + if(params.containsKey("tools")) { + entry = params.tools + } else if(params.containsKey("sc")) { + entry = params.sc + } else { + throw new Exception("VSN ERROR: Missing params..") + } + toolKey.split('\\.').each { entry = entry?.get(it) } return entry } hasToolParams = { toolKey -> - if(params?.tools) { + if(params.containsKey("tools")) { return params.tools.containsKey(toolKey) } - if(params?.sc) { + if(params.containsKey("sc")) { return params.sc.containsKey(toolKey) } + return false } } diff --git a/main.nf b/main.nf index b6fc5726..5a370c97 100644 --- a/main.nf +++ b/main.nf @@ -506,7 +506,7 @@ workflow single_sample_decontx { if(params.utils?.publish) { PUBLISH( SC__H5AD_TO_LOOM.out, - "SINGLE_SAMPLE_CELDA_DECONTX_"+ params.sc.celda.decontx.strategy.toUpperCase(), + "SINGLE_SAMPLE_CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase(), "loom", null, false @@ -582,7 +582,7 @@ workflow single_sample_decontx_scrublet { // - potential doublets removed by Scrublet PUBLISH_CELDA_DECONTX_SCRUBLET( SCRUBLET__DOUBLET_REMOVAL.out.data_doublets_removed, - "CELDA_DECONTX_"+ params.sc.celda.decontx.strategy.toUpperCase() +"_SCRUBLET", + "CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase() +"_SCRUBLET", "h5ad", null, false @@ -1219,7 +1219,7 @@ workflow _cell_annotate_filter { null ) - if(params.sc.cell_filter.containsKey("publish") && params.sc.cell_filter.publish) { + if(params.getToolParams("cell_filter").containsKey("publish") && params.getToolParams("cell_filter").publish) { PUBLISH_H5AD_CELL_FILTERED( FILTER_BY_CELL_METADATA.out, "FILTER_BY_CELL_METADATA", diff --git a/src/celda/conf/base.config b/src/celda/conf/base.config index b88f86d7..c7dae8ef 100644 --- a/src/celda/conf/base.config +++ b/src/celda/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { celda { container = 'vibsinglecellnf/celda:1.4.5' decontx { diff --git a/src/celda/conf/decontx_correct.config b/src/celda/conf/decontx_correct.config index ded00be5..e4614494 100644 --- a/src/celda/conf/decontx_correct.config +++ b/src/celda/conf/decontx_correct.config @@ -1,5 +1,5 @@ params { - sc { + tools { celda { decontx { strategy = "correct" diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 68df40e8..55e07ecd 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,13 +1,13 @@ includeConfig '../../utils/conf/cell_annotate.config' -params.sc.celda.decontx.cell_annotate = params.sc.cell_annotate -params.sc.remove('cell_annotate') +params.tools.celda.decontx.cell_annotate = params.tools.cell_annotate +params.tools.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' -params.sc.celda.decontx.cell_filter = params.sc.cell_filter -params.sc.remove('cell_filter') +params.tools.celda.decontx.cell_filter = params.tools.cell_filter +params.tools.remove('cell_filter') includeConfig './decontx_filter_defaults.config' params { - sc { + tools { celda { decontx { strategy = "filter" // choices: 'filter' (default), 'correct' diff --git a/src/celda/conf/decontx_filter_defaults.config b/src/celda/conf/decontx_filter_defaults.config index 93c0feb7..13800fb9 100644 --- a/src/celda/conf/decontx_filter_defaults.config +++ b/src/celda/conf/decontx_filter_defaults.config @@ -1,5 +1,5 @@ params { - sc { + tools { celda { decontx { strategy = "filter" diff --git a/src/celda/main.nf b/src/celda/main.nf index 7eb95c0a..bc29a224 100644 --- a/src/celda/main.nf +++ b/src/celda/main.nf @@ -30,20 +30,20 @@ workflow decontx { data = getDataChannel \ | SC__FILE_CONVERTER - if(params.sc.celda.decontx.strategy == "filter") { + if(params.getToolParams("celda").decontx.strategy == "filter") { out = DECONTX_FILTER ( data ) processed = out.decontx_filtered - } else if (params.sc.celda.decontx.strategy == "correct") { + } else if (params.getToolParams("celda").decontx.strategy == "correct") { out = DECONTX_CORRECT ( data ) processed = out.decontx_corrected } else { - throw new Exception("VSN ERROR: The given strategy in params.sc.celda.decontx is not valid. Choose: filter or correct.") + throw new Exception("VSN ERROR: The given strategy in params..celda.decontx is not valid. Choose: filter or correct.") } if(params.utils.containsKey("publish")) { PUBLISH( processed, - "CELDA_DECONTX_"+ params.sc.celda.decontx.strategy.toUpperCase(), + "CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase(), "h5ad", null, false diff --git a/src/celda/nextflow.config b/src/celda/nextflow.config deleted file mode 100644 index b0b0c05a..00000000 --- a/src/celda/nextflow.config +++ /dev/null @@ -1,14 +0,0 @@ -params { - sc { - template { - container = 'vibsinglecellnf/scanpy:1.8.1' - process1 { - param1 = '' - } - process2 { - param2 = '' - } - } - } -} - diff --git a/src/celda/processes/runDecontX.nf b/src/celda/processes/runDecontX.nf index 0c749555..77f4e81e 100644 --- a/src/celda/processes/runDecontX.nf +++ b/src/celda/processes/runDecontX.nf @@ -8,7 +8,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/${moduleName} process SC__CELDA__DECONTX { - container params.sc.celda.container + container params.getToolParams("celda").container publishDir "${params.global.outdir}/data/${moduleName}", mode: 'link' label 'compute_resources__default' @@ -32,7 +32,7 @@ process SC__CELDA__DECONTX { emit: other script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.celda.decontx) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("celda").decontx) processParams = sampleParams.local def filterNumMadsThresholdsAsArguments = '' diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index 69d574fb..fb632fc1 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.sc.scrublet.cell_annotate = params.sc.cell_annotate +params.sc.scrublet.cell_annotate = params.tools.cell_annotate params.sc.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.sc.scrublet.cell_filter = params.sc.cell_filter +params.sc.scrublet.cell_filter = params.tools.cell_filter params.sc.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index f9f0e09e..5edaabdd 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -33,7 +33,7 @@ process SC__PREPARE_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.sc.cell_filter : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.getToolParams("cell_filter") : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -91,7 +91,7 @@ process SC__APPLY_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.sc.cell_filter : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.getToolParams("cell_filter") : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index ed4402c4..594ca308 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -305,7 +305,7 @@ process SC__FILE_CONVERTER_FROM_SCE { path("${sampleId}.SC__FILE_CONVERTER_FROM_SCE.${outputDataType}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.file_converter) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("file_converter")) processParams = sampleParams.local def _outputDataType = outputDataType converterToUse = getConverter( diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index 98a9ed95..6a7cdb2e 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -32,7 +32,7 @@ workflow FILTER_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? - params.sc.cell_filter : + params.getToolParams("cell_filter") : params.getToolParams(tool)["cell_filter"] Channel From 047eb44eb87811eec01a8097928803a0b360ff3f Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 12 Feb 2021 23:55:06 +0100 Subject: [PATCH 143/202] Migrate to params.tools for cellranger tool --- docs/development.rst | 4 +-- main.nf | 32 +++++++++---------- src/cellranger/conf/base.config | 2 +- .../conf/cellranger_libraries.config | 2 +- src/cellranger/conf/count.config | 2 +- src/cellranger/conf/count_libraries.config | 2 +- src/cellranger/conf/count_metadata.config | 2 +- src/cellranger/conf/mkfastq.config | 2 +- src/cellranger/main.nf | 4 +-- src/cellranger/processes/count.nf | 4 +-- .../workflows/cellranger_libraries.nf | 4 +-- 11 files changed, 30 insertions(+), 30 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 8355f5e1..3ef27fa6 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -722,8 +722,8 @@ Workflows import multiple processes and define the workflow by name: workflow CELLRANGER { main: - SC__CELLRANGER__MKFASTQ(file(params.sc.cellranger.mkfastq.csv), path(params.sc.cellranger.mkfastq.runFolder)) - SC__CELLRANGER__COUNT(file(params.sc.cellranger.count.transcriptome), SC__CELLRANGER__MKFASTQ.out.flatten()) + SC__CELLRANGER__MKFASTQ(file(params.getToolParams("cellranger").mkfastq.csv), path(params.getToolParams("cellranger").mkfastq.runFolder)) + SC__CELLRANGER__COUNT(file(params.getToolParams("cellranger").count.transcriptome), SC__CELLRANGER__MKFASTQ.out.flatten()) emit: SC__CELLRANGER__COUNT.out diff --git a/main.nf b/main.nf index 5a370c97..6ebf6598 100644 --- a/main.nf +++ b/main.nf @@ -784,9 +784,9 @@ workflow cellranger { } from './src/cellranger/main' params(params) CELLRANGER( - file(params.sc.cellranger.mkfastq.csv), - file(params.sc.cellranger.mkfastq.runFolder), - file(params.sc.cellranger.count.transcriptome) + file(params.getToolParams("cellranger").mkfastq.csv), + file(params.getToolParams("cellranger").mkfastq.runFolder), + file(params.getToolParams("cellranger").count.transcriptome) ) emit: @@ -800,10 +800,10 @@ workflow cellranger_libraries { } from './src/cellranger/workflows/cellranger_libraries' params(params) CELLRANGER_LIBRARIES( - file(params.sc.cellranger.mkfastq.csv), - file(params.sc.cellranger.mkfastq.runFolder), - file(params.sc.cellranger.count.transcriptome), - file(params.sc.cellranger.count.featureRef) + file(params.getToolParams("cellranger").mkfastq.csv), + file(params.getToolParams("cellranger").mkfastq.runFolder), + file(params.getToolParams("cellranger").count.transcriptome), + file(params.getToolParams("cellranger").count.featureRef) ) emit: @@ -818,8 +818,8 @@ workflow cellranger_count_metadata { } from './src/cellranger/workflows/cellRangerCountWithMetadata' params(params) CELLRANGER_COUNT_WITH_METADATA( - file(params.sc.cellranger.count.transcriptome), - file(params.sc.cellranger.count.metadata) + file(params.getToolParams("cellranger").count.transcriptome), + file(params.getToolParams("cellranger").count.metadata) ) emit: CELLRANGER_COUNT_WITH_METADATA.out @@ -843,9 +843,9 @@ workflow cellranger_count_libraries { } from './src/cellranger/workflows/cellRangerCountWithLibraries' params(params) CELLRANGER_COUNT_WITH_LIBRARIES( - file(params.sc.cellranger.count.transcriptome), - file(params.sc.cellranger.count.featureRef), - params.sc.cellranger.count.libraries + file(params.getToolParams("cellranger").count.transcriptome), + file(params.getToolParams("cellranger").count.featureRef), + params.getToolParams("cellranger").count.libraries ) emit: @@ -861,9 +861,9 @@ workflow cellranger_count_demuxlet { include { SC__CELLRANGER__COUNT as CELLRANGER_COUNT; } from './src/cellranger/processes/count' - if (params.sc.cellranger.count.fastqs instanceof Map) { + if (params.getToolParams("cellranger").count.fastqs instanceof Map) { // Remove default key - Channel.from(params.sc.cellranger.count.fastqs.findAll { + Channel.from(params.getToolParams("cellranger").count.fastqs.findAll { it.key != 'default' }.collect { k, v -> // Split possible multiple file paths @@ -882,7 +882,7 @@ workflow cellranger_count_demuxlet { .set { fastq_data } } data = CELLRANGER_COUNT( - params.sc.cellranger.count.transcriptome, + params.getToolParams("cellranger").count.transcriptome, fastq_data ) get_bam_barcodes_from_cellranger_rna(data) | @@ -1092,7 +1092,7 @@ workflow sra_cellranger_bbknn { out = sra() SC__CELLRANGER__PREPARE_FOLDER( out.groupTuple() ) SC__CELLRANGER__COUNT( - file(params.sc.cellranger.count.transcriptome), + file(params.getToolParams("cellranger").count.transcriptome), SC__CELLRANGER__PREPARE_FOLDER.out ) BBKNN( diff --git a/src/cellranger/conf/base.config b/src/cellranger/conf/base.config index 11e5b757..1c3382f3 100644 --- a/src/cellranger/conf/base.config +++ b/src/cellranger/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { container = '/path/to/cellranger/cellranger' } diff --git a/src/cellranger/conf/cellranger_libraries.config b/src/cellranger/conf/cellranger_libraries.config index eceddc6c..077f4825 100644 --- a/src/cellranger/conf/cellranger_libraries.config +++ b/src/cellranger/conf/cellranger_libraries.config @@ -1,7 +1,7 @@ includeConfig("mkfastq.config") params { - sc { + tools { cellranger { librariesMap = [ "sample1": [ diff --git a/src/cellranger/conf/count.config b/src/cellranger/conf/count.config index 8efa03f6..122f1eca 100644 --- a/src/cellranger/conf/count.config +++ b/src/cellranger/conf/count.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { transcriptome = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/indexes/CellRanger/3.0.2/hg38_iGenomes' diff --git a/src/cellranger/conf/count_libraries.config b/src/cellranger/conf/count_libraries.config index 360ee80d..987871aa 100644 --- a/src/cellranger/conf/count_libraries.config +++ b/src/cellranger/conf/count_libraries.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { featureRef = '' diff --git a/src/cellranger/conf/count_metadata.config b/src/cellranger/conf/count_metadata.config index b137cc12..a55c1ba9 100644 --- a/src/cellranger/conf/count_metadata.config +++ b/src/cellranger/conf/count_metadata.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { metadata = '' diff --git a/src/cellranger/conf/mkfastq.config b/src/cellranger/conf/mkfastq.config index e4bd4786..b83a8a58 100644 --- a/src/cellranger/conf/mkfastq.config +++ b/src/cellranger/conf/mkfastq.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { mkfastq { // https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq diff --git a/src/cellranger/main.nf b/src/cellranger/main.nf index 3f499b36..f824d421 100644 --- a/src/cellranger/main.nf +++ b/src/cellranger/main.nf @@ -31,9 +31,9 @@ workflow CELLRANGER { data = MKFASTQ(mkfastq_csv, runFolder) // Allow to combine old demultiplexed data with new data - if (params.sc.cellranger.count.fastqs instanceof Map) { + if (params.getToolParams("cellranger").count.fastqs instanceof Map) { // Remove default key - Channel.from(params.sc.cellranger.count.fastqs.findAll { + Channel.from(params.getToolParams("cellranger").count.fastqs.findAll { it.key != 'default' }.collect { k, v -> // Split possible multiple file paths diff --git a/src/cellranger/processes/count.nf b/src/cellranger/processes/count.nf index 7491d2ab..c30b22d1 100644 --- a/src/cellranger/processes/count.nf +++ b/src/cellranger/processes/count.nf @@ -126,7 +126,7 @@ process SC__CELLRANGER__COUNT { def sampleParams = params.parseConfig(sampleId, params.global, toolParams.count) processParams = sampleParams.local if(processParams.sample == '') { - throw new Exception("Regards params.sc.cellranger.count: sample parameter cannot be empty") + throw new Exception("Regards params.getToolParams("cellranger").count: sample parameter cannot be empty") } // Check if the current sample has multiple sequencing runs fastqs = fastqs instanceof List ? fastqs.join(',') : fastqs @@ -165,7 +165,7 @@ process SC__CELLRANGER__COUNT_WITH_LIBRARIES { processParams = sampleParams.local if(processParams.sample == '') { - throw new Exception("Regards params.sc.cellranger.count: sample parameter cannot be empty") + throw new Exception("Regards params.getToolParams("cellranger").count: sample parameter cannot be empty") } // We need to create the libraries.csv file here because it needs absolute paths diff --git a/src/cellranger/workflows/cellranger_libraries.nf b/src/cellranger/workflows/cellranger_libraries.nf index 077d6648..dc949bb1 100644 --- a/src/cellranger/workflows/cellranger_libraries.nf +++ b/src/cellranger/workflows/cellranger_libraries.nf @@ -28,12 +28,12 @@ workflow CELLRANGER_LIBRARIES { main: // Sanity Checking - libMap = params.sc.cellranger.librariesMap + libMap = params.getToolParams("cellranger").librariesMap if (! (libMap instanceof Map)) { throw new Exception("When running the full cellranger pipeline with libraries, you must specify the librariesMap (see docs).") } - librariesFiles = params.sc.cellranger.count.libraries + librariesFiles = params.getToolParams("cellranger").count.libraries if (!(librariesFiles instanceof Map) && librariesFiles) { poolName = params.global.containsKey('project_name') ? params.global.project_name : '' From 51d4f1ddc7dfe84bd2c6889b7053a6e3a9ee6f94 Mon Sep 17 00:00:00 2001 From: dweemx Date: Sat, 13 Feb 2021 00:16:39 +0100 Subject: [PATCH 144/202] Migrate to params.tools for directs tool --- src/directs/conf/base.config | 2 +- src/directs/conf/test__select_default_clustering.config | 2 +- src/directs/main.test.nf | 2 +- src/directs/processes/selectDefaultClustering.nf | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/directs/conf/base.config b/src/directs/conf/base.config index facaacce..364dfbba 100644 --- a/src/directs/conf/base.config +++ b/src/directs/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { directs { container = 'vibsinglecellnf/directs:0.1.0' select_default_clustering { diff --git a/src/directs/conf/test__select_default_clustering.config b/src/directs/conf/test__select_default_clustering.config index 2bdaa179..3ddd6a73 100644 --- a/src/directs/conf/test__select_default_clustering.config +++ b/src/directs/conf/test__select_default_clustering.config @@ -2,7 +2,7 @@ includeConfig '../../../conf/global.config' includeConfig '../../../conf/singularity.config' params { - sc { + tools { directs { inputLoom = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/fca/analysis/in-house/20200520_000__all__4b9e9810-8600-11ea-867e-a0000220fe80/out/data/20200520_FCA_BioHub_B1_B2_All.HARMONY_SCENIC.loom' } diff --git a/src/directs/main.test.nf b/src/directs/main.test.nf index 677b76a3..35e9334a 100644 --- a/src/directs/main.test.nf +++ b/src/directs/main.test.nf @@ -16,7 +16,7 @@ workflow { main: switch(params.test) { case "SC__DIRECTS__SELECT_DEFAULT_CLUSTERING": - test = Channel.of(tuple('TEST', params.sc.directs.inputLoom, null)) + test = Channel.of(tuple('TEST', params.getToolParams("directs").inputLoom, null)) SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( test ) break; default: diff --git a/src/directs/processes/selectDefaultClustering.nf b/src/directs/processes/selectDefaultClustering.nf index ea600809..a74e8460 100644 --- a/src/directs/processes/selectDefaultClustering.nf +++ b/src/directs/processes/selectDefaultClustering.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/directs/bin/" process SC__DIRECTS__SELECT_DEFAULT_CLUSTERING { - container params.sc.directs.container + container params.getToolParams("directs").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -21,7 +21,7 @@ process SC__DIRECTS__SELECT_DEFAULT_CLUSTERING { val(stashedParams) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.directs.select_default_clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("directs").select_default_clustering) processParams = sampleParams.local """ ${binDir}select_default_clustering.py \ From bce499de2852ecdf5266bac83e6ae4d4ea34fb98 Mon Sep 17 00:00:00 2001 From: dweemx Date: Sat, 13 Feb 2021 00:17:57 +0100 Subject: [PATCH 145/202] Migrate to params.tools for dropletutils tool --- src/dropletutils/dropletutils.config | 2 +- src/dropletutils/processes/barcode_selection.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dropletutils/dropletutils.config b/src/dropletutils/dropletutils.config index 00bc0d1e..5a90a97c 100644 --- a/src/dropletutils/dropletutils.config +++ b/src/dropletutils/dropletutils.config @@ -1,5 +1,5 @@ params { - sc { + tools { dropletutils { container = 'vibsinglecellnf/dropletutils:1.4.3' } diff --git a/src/dropletutils/processes/barcode_selection.nf b/src/dropletutils/processes/barcode_selection.nf index c4f10d03..cb701702 100644 --- a/src/dropletutils/processes/barcode_selection.nf +++ b/src/dropletutils/processes/barcode_selection.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROPLET_UTILS__BARCODE_SELECTION { - container params.sc.dropletutils.container + container params.getToolParams("dropletutils")..container publishDir "03.count", mode: 'symlink' label 'compute_resources__default' From 75c591d9b9716c7113fbe93813d37e3f1320b437 Mon Sep 17 00:00:00 2001 From: dweemx Date: Sat, 13 Feb 2021 00:21:08 +0100 Subject: [PATCH 146/202] Migrate to params.tools for dropseqtools tool --- src/dropletutils/processes/barcode_selection.nf | 2 +- src/dropseqtools/dropseqtools.config | 2 +- src/dropseqtools/processes/bam_tag_histogram.nf | 4 ++-- src/dropseqtools/processes/convert_to_ref_flat.nf | 2 +- src/dropseqtools/processes/detect_bead_synthesis_errors.nf | 4 ++-- src/dropseqtools/processes/digital_expression.nf | 2 +- src/dropseqtools/processes/filter_bam.nf | 4 ++-- src/dropseqtools/processes/gzip.nf | 2 +- src/dropseqtools/processes/polya_trimmer.nf | 4 ++-- .../processes/tag_bam_with_read_sequence_extended.nf | 6 +++--- src/dropseqtools/processes/tag_read_with_gene_exon.nf | 2 +- src/dropseqtools/processes/trim_starting_sequence.nf | 4 ++-- 12 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/dropletutils/processes/barcode_selection.nf b/src/dropletutils/processes/barcode_selection.nf index cb701702..add4090d 100644 --- a/src/dropletutils/processes/barcode_selection.nf +++ b/src/dropletutils/processes/barcode_selection.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROPLET_UTILS__BARCODE_SELECTION { - container params.getToolParams("dropletutils")..container + container params.getToolParams("dropletutils").container publishDir "03.count", mode: 'symlink' label 'compute_resources__default' diff --git a/src/dropseqtools/dropseqtools.config b/src/dropseqtools/dropseqtools.config index dde97f03..6ede9afa 100644 --- a/src/dropseqtools/dropseqtools.config +++ b/src/dropseqtools/dropseqtools.config @@ -1,5 +1,5 @@ params { - sc { + tools { dropseqtools { container = 'humancellatlas/dropseqtools:1.12' diff --git a/src/dropseqtools/processes/bam_tag_histogram.nf b/src/dropseqtools/processes/bam_tag_histogram.nf index 38e8489e..0a0c6a7d 100644 --- a/src/dropseqtools/processes/bam_tag_histogram.nf +++ b/src/dropseqtools/processes/bam_tag_histogram.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__BAM_TAG_HISTOGRAM { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/03.count", mode: 'symlink' label 'compute_resources__default' @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__BAM_TAG_HISTOGRAM { tuple val(sample), path("*.cell_readcounts.txt.gz") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.bam_tag_histogram) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").bam_tag_histogram) processParams = sampleParams.local """ BAMTagHistogram \ diff --git a/src/dropseqtools/processes/convert_to_ref_flat.nf b/src/dropseqtools/processes/convert_to_ref_flat.nf index ee91ad93..24a61263 100644 --- a/src/dropseqtools/processes/convert_to_ref_flat.nf +++ b/src/dropseqtools/processes/convert_to_ref_flat.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__CONVERT_TO_REFFLAT { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' label 'compute_resources__default' diff --git a/src/dropseqtools/processes/detect_bead_synthesis_errors.nf b/src/dropseqtools/processes/detect_bead_synthesis_errors.nf index 22d875bc..f38f040e 100644 --- a/src/dropseqtools/processes/detect_bead_synthesis_errors.nf +++ b/src/dropseqtools/processes/detect_bead_synthesis_errors.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__DETECT_REPAIR_BARCODE_SYNTHESIS_ERRORS { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -15,7 +15,7 @@ process SC__DROP_SEQ_TOOLS__DETECT_REPAIR_BARCODE_SYNTHESIS_ERRORS { // tuple file("*.synthesis_stats.summary.txt"), emit: statsSummary script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.detect_repair_barcode_synthesis_errors) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").detect_repair_barcode_synthesis_errors) processParams = sampleParams.local """ DetectBeadSynthesisErrors \ diff --git a/src/dropseqtools/processes/digital_expression.nf b/src/dropseqtools/processes/digital_expression.nf index 755fd548..f1d865f3 100644 --- a/src/dropseqtools/processes/digital_expression.nf +++ b/src/dropseqtools/processes/digital_expression.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__DIGITAL_EXPRESSION { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "03.count", mode: 'symlink' label 'compute_resources__default' diff --git a/src/dropseqtools/processes/filter_bam.nf b/src/dropseqtools/processes/filter_bam.nf index d99b02e4..100057b0 100644 --- a/src/dropseqtools/processes/filter_bam.nf +++ b/src/dropseqtools/processes/filter_bam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__FILTER_UNALIGNED_TAGGED_BAM { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__FILTER_UNALIGNED_TAGGED_BAM { tuple val(sample), path('*.unaligned_tagged_filtered.bam'), emit: bam script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.filter_unaligned_tagged_bam) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").filter_unaligned_tagged_bam) processParams = sampleParams.local """ FilterBAM \ diff --git a/src/dropseqtools/processes/gzip.nf b/src/dropseqtools/processes/gzip.nf index d17e149a..62790e6d 100644 --- a/src/dropseqtools/processes/gzip.nf +++ b/src/dropseqtools/processes/gzip.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process GZIP { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/dropseqtools/processes/polya_trimmer.nf b/src/dropseqtools/processes/polya_trimmer.nf index dd6cdef7..1529cb03 100644 --- a/src/dropseqtools/processes/polya_trimmer.nf +++ b/src/dropseqtools/processes/polya_trimmer.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__TRIM_POLYA_UNALIGNED_TAGGED_TRIMMED_SMART { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process SC__DROP_SEQ_TOOLS__TRIM_POLYA_UNALIGNED_TAGGED_TRIMMED_SMART { tuple file('*.polyA_trimming_report.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.trim_polya_unaligned_tagged_trimmed_smart) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").trim_polya_unaligned_tagged_trimmed_smart) processParams = sampleParams.local """ PolyATrimmer \ diff --git a/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf b/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf index de3e0f68..edd7cd6a 100644 --- a/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf +++ b/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf @@ -1,7 +1,7 @@ process SC__DROP_SEQ_TOOLS__TAG_UNALIGNED_BAM_WITH_CELLBARCODE { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__TAG_UNALIGNED_BAM_WITH_CELLBARCODE { tuple file('*.unaligned_tagged_Cellular.bam_summary.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.tag_unaligned_bam_with_cellbarcode) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").tag_unaligned_bam_with_cellbarcode) processParams = sampleParams.local """ TagBamWithReadSequenceExtended \ @@ -43,7 +43,7 @@ process SC__DROP_SEQ_TOOLS__TAG_UNALIGNED_BAM_WITH_CELLMOLECULAR { tuple file('*.unaligned_tagged_Molecular.bam_summary.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.tag_unaligned_bam_with_cellmolecular) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").tag_unaligned_bam_with_cellmolecular) processParams = sampleParams.local """ source $DWMAX/documents/aertslab/scripts/src_dwmax/bash-utils/utils.sh diff --git a/src/dropseqtools/processes/tag_read_with_gene_exon.nf b/src/dropseqtools/processes/tag_read_with_gene_exon.nf index 20a227bc..02ebd6db 100644 --- a/src/dropseqtools/processes/tag_read_with_gene_exon.nf +++ b/src/dropseqtools/processes/tag_read_with_gene_exon.nf @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__TAG_READ_WITH_GENE_EXON { tuple val(sample), path("*.merged_gene-exon-tagged.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.tag_read_with_gene_exon) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").tag_read_with_gene_exon) processParams = sampleParams.local """ source $DWMAX/documents/aertslab/scripts/src_dwmax/bash-utils/utils.sh diff --git a/src/dropseqtools/processes/trim_starting_sequence.nf b/src/dropseqtools/processes/trim_starting_sequence.nf index 58436846..32985481 100644 --- a/src/dropseqtools/processes/trim_starting_sequence.nf +++ b/src/dropseqtools/processes/trim_starting_sequence.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__TRIM_SMART_UNALIGNED_TAGGED_FILTERED_BAM { - container params.sc.dropseqtools.container + container params.getToolParams("dropseqtools").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process SC__DROP_SEQ_TOOLS__TRIM_SMART_UNALIGNED_TAGGED_FILTERED_BAM { tuple file('*.adapter_trimming_report.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.dropseqtools.trim_smart_unaligned_tagged_filtered_bam) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").trim_smart_unaligned_tagged_filtered_bam) processParams = sampleParams.local """ TrimStartingSequence \ From 2a54dcb9e556cc3b0528a6c879c6e48eb674ebeb Mon Sep 17 00:00:00 2001 From: dweemx Date: Sat, 13 Feb 2021 00:26:34 +0100 Subject: [PATCH 147/202] Migrate to params.tools for edirect tool --- src/edirect/edirect.config | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/edirect/edirect.config b/src/edirect/edirect.config index f5e066a3..f49a11c0 100644 --- a/src/edirect/edirect.config +++ b/src/edirect/edirect.config @@ -1,5 +1,7 @@ params { - edirect { - container = 'ncbi/edirect:latest' + tools { + edirect { + container = 'ncbi/edirect:latest' + } } } \ No newline at end of file From 039dd82fc137b913dc5f79c7d025b2c7858fdac0 Mon Sep 17 00:00:00 2001 From: dweemx Date: Sat, 13 Feb 2021 00:38:32 +0100 Subject: [PATCH 148/202] Migrate to params.tools for fastp tool --- conf/generic.config | 8 ++++++++ src/fastp/fastp.config | 14 ++++++++------ src/fastp/processes/clean_and_fastqc.nf | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/conf/generic.config b/conf/generic.config index e83c879e..a8e2d188 100644 --- a/conf/generic.config +++ b/conf/generic.config @@ -27,6 +27,9 @@ params { if(p.containsKey("sc")) { return p.sc[toolKey] } + if(p.containsKey("toolKey")) { + return p[toolKey] + } throw new Exception("VSN ERROR: Cannot get tool params from NXF params.") } if(!toolKey.contains(".")) { @@ -37,6 +40,8 @@ params { entry = params.tools } else if(params.containsKey("sc")) { entry = params.sc + } else if(params.containsKey("sc")) { + entry = params } else { throw new Exception("VSN ERROR: Missing params..") } @@ -51,6 +56,9 @@ params { if(params.containsKey("sc")) { return params.sc.containsKey(toolKey) } + if(params.containsKey("toolKey")) { + return params.containsKey(toolKey) + } return false } } diff --git a/src/fastp/fastp.config b/src/fastp/fastp.config index 1b518930..aac86685 100644 --- a/src/fastp/fastp.config +++ b/src/fastp/fastp.config @@ -1,11 +1,13 @@ params { - fastp { - container = 'vibsinglecellnf/fastp:0.20.0' - thread = 1 + tools { + fastp { + container = 'vibsinglecellnf/fastp:0.20.0' + thread = 1 - clean_and_fastqc { - length_required = 20 - adapter_fasta = "$baseDir/src/fastp/assets/fastp.adapters" + clean_and_fastqc { + length_required = 20 + adapter_fasta = "$baseDir/src/fastp/assets/fastp.adapters" + } } } } diff --git a/src/fastp/processes/clean_and_fastqc.nf b/src/fastp/processes/clean_and_fastqc.nf index 3550b84c..b0f0ee6c 100644 --- a/src/fastp/processes/clean_and_fastqc.nf +++ b/src/fastp/processes/clean_and_fastqc.nf @@ -5,7 +5,7 @@ nextflow.enable.dsl=2 */ process FASTP__CLEAN_AND_FASTQC { - container params.fastp.container + container params.getToolParams("fastp").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' From e55385f2bc27c26885d4d81b915f35c4ca3cbbf2 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:05:56 +0100 Subject: [PATCH 149/202] Migrate to params.tools for flybaser tool --- src/flybaser/flybaser.config | 12 +++++++----- src/flybaser/processes/convertNomenclature.nf | 12 ++++++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/flybaser/flybaser.config b/src/flybaser/flybaser.config index 54554f5e..6dbcccb8 100644 --- a/src/flybaser/flybaser.config +++ b/src/flybaser/flybaser.config @@ -1,9 +1,11 @@ params { - flybaser { - container = 'vibsinglecellnf/flybaser:0.2.1' - - convert_fbgn_to_gene_symbol { - columnName = '' + tools { + flybaser { + container = 'vibsinglecellnf/flybaser:0.2.1' + + convert_fbgn_to_gene_symbol { + columnName = '' + } } } } diff --git a/src/flybaser/processes/convertNomenclature.nf b/src/flybaser/processes/convertNomenclature.nf index 7db1134c..1ec3e7c3 100644 --- a/src/flybaser/processes/convertNomenclature.nf +++ b/src/flybaser/processes/convertNomenclature.nf @@ -8,18 +8,22 @@ if(!params.containsKey("test")) { process FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL { - container params.flybaser.container + container params.tools.flybaser.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' input: - tuple val(sampleId), path(f) + tuple \ + val(sampleId), \ + path(f) output: - tuple val(sampleId), path("${sampleId}.FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL.tsv") + tuple \ + val(sampleId), \ + path("${sampleId}.FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.flybaser.convert_fbgn_to_gene_symbol) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.flybaser.convert_fbgn_to_gene_symbol) processParams = sampleParams.local """ ${binDir}convertFBgnToGeneSymbol.R \ From fbc446b2c33c50dae81a2f8a96ff715bb20eb297 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:08:16 +0100 Subject: [PATCH 150/202] Migrate to params.tools for harmony tool --- docs/development.rst | 6 +++--- main.nf | 5 ++++- src/harmony/harmony.config | 2 +- src/harmony/processes/runHarmony.nf | 9 ++++----- src/harmony/workflows/bec_harmony.nf | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 3ef27fa6..8a30e11d 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -239,7 +239,7 @@ Steps: process SC__HARMONY__HARMONY_MATRIX { - container params.sc.harmony.container + container params.tools.harmony.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' clusterOptions "-l nodes=1:ppn=${params.global.threads} -l walltime=1:00:00 -A ${params.global.qsubaccount}" @@ -250,7 +250,7 @@ Steps: tuple val(sampleId), path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.harmony) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) processParams = sampleParams.local varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ @@ -400,7 +400,7 @@ Steps: ) harmony_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.sc.harmony.report_ipynb), + file(workflow.projectDir + params.tools.harmony.report_ipynb), "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/main.nf b/main.nf index 6ebf6598..2aebc069 100644 --- a/main.nf +++ b/main.nf @@ -80,7 +80,10 @@ workflow mnncorrect { } def getHarmonyBatchVariables = { params -> - batchVariables = params.sc.harmony.varsUse + batchVariables = params.tools.harmony.varsUse + if(batchVariables.size() > 1) { + throw new Exception("Currently it is not supported to run with multiple batch variables.") + } return batchVariables } diff --git a/src/harmony/harmony.config b/src/harmony/harmony.config index f7781f77..3af25635 100644 --- a/src/harmony/harmony.config +++ b/src/harmony/harmony.config @@ -1,5 +1,5 @@ params { - sc { + tools { harmony { container = 'vibsinglecellnf/harmony:1.0-3' report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/harmony/bin/reports/sc_harmony_report.ipynb" diff --git a/src/harmony/processes/runHarmony.nf b/src/harmony/processes/runHarmony.nf index 3c02dce9..517fd1d8 100644 --- a/src/harmony/processes/runHarmony.nf +++ b/src/harmony/processes/runHarmony.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/harmony/bin/" process SC__HARMONY__HARMONY_MATRIX { - container params.sc.harmony.container + container params.tools.harmony.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -19,10 +19,9 @@ process SC__HARMONY__HARMONY_MATRIX { path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.harmony) - def processParams = sampleParams.local - // Arguments - def varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) + processParams = sampleParams.local + varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ ${binDir}run_harmony.R \ ${f} \ diff --git a/src/harmony/workflows/bec_harmony.nf b/src/harmony/workflows/bec_harmony.nf index 44e9f50a..f4de29ff 100644 --- a/src/harmony/workflows/bec_harmony.nf +++ b/src/harmony/workflows/bec_harmony.nf @@ -131,7 +131,7 @@ workflow BEC_HARMONY { ) harmony_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.sc.harmony.report_ipynb), + file(workflow.projectDir + params.tools.harmony.report_ipynb), "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) From f8d2f073040844fcde87d7d61de846e4616d45f4 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:09:29 +0100 Subject: [PATCH 151/202] Migrate to params.tools for pcacv tool --- src/pcacv/pcacv.config | 31 ++++++++++++++++--------------- src/pcacv/processes/runPCACV.nf | 4 ++-- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/pcacv/pcacv.config b/src/pcacv/pcacv.config index 5b2867c9..1dfb8a79 100644 --- a/src/pcacv/pcacv.config +++ b/src/pcacv/pcacv.config @@ -1,19 +1,20 @@ params { - - pcacv { - container = "vibsinglecellnf/pcacv:0.2.0" - find_optimal_npcs { - accessor = '@assays$RNA@scale.data' - // useVariableFeatures = true // or false - // kFold = '' - // fromNPC = '' - // toNPC = '' - // byNPC = '' - // maxIters = '' - // seed = '' - // verbose = '' - // devaultSVD = '' - // nPCFallback = 0 + tools { + pcacv { + container = "vibsinglecellnf/pcacv:0.2.0" + find_optimal_npcs { + accessor = '@assays$RNA@scale.data' + // useVariableFeatures = true // or false + // kFold = '' + // fromNPC = '' + // toNPC = '' + // byNPC = '' + // maxIters = '' + // seed = '' + // verbose = '' + // devaultSVD = '' + // nPCFallback = 0 + } } } diff --git a/src/pcacv/processes/runPCACV.nf b/src/pcacv/processes/runPCACV.nf index 6a7dc94e..06141188 100644 --- a/src/pcacv/processes/runPCACV.nf +++ b/src/pcacv/processes/runPCACV.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pcacv/bin" : process PCACV__FIND_OPTIMAL_NPCS { - container params.pcacv.container + container params.tools.pcacv.container publishDir "${params.global.outdir}/data/pcacv", mode: 'link' label 'compute_resources__pcacv' @@ -27,7 +27,7 @@ process PCACV__FIND_OPTIMAL_NPCS { emit: files script: - def sampleParams = params.parseConfig(sampleId, params.global, params.pcacv.find_optimal_npcs) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.pcacv.find_optimal_npcs) processParams = sampleParams.local """ export OPENBLAS_NUM_THREADS=1 From b6ea3d32e56d33e2f8ae74257c81eb06ee7cc31e Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:11:19 +0100 Subject: [PATCH 152/202] Migrate to params.tools for picard tool --- src/picard/picard.config | 18 ++++++++++-------- .../processes/create_sequence_dictionary.nf | 2 +- src/picard/processes/fastq_to_bam.nf | 2 +- src/picard/processes/merge_bam_alignment.nf | 4 ++-- src/picard/processes/sam_to_fastq.nf | 2 +- src/picard/processes/sort_sam.nf | 4 ++-- 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/picard/picard.config b/src/picard/picard.config index b8b5a0e9..22f3196e 100644 --- a/src/picard/picard.config +++ b/src/picard/picard.config @@ -1,14 +1,16 @@ params { - picard { - container = 'vibsinglecellnf/picard:2.21.1' + tools{ + picard { + container = 'vibsinglecellnf/picard:2.21.1' - merge_bam_alignment { - includeSecondaryAlignments = false - pairedRun = false - } + merge_bam_alignment { + includeSecondaryAlignments = false + pairedRun = false + } - sort_sam { - so = "coordinate" + sort_sam { + so = "coordinate" + } } } } diff --git a/src/picard/processes/create_sequence_dictionary.nf b/src/picard/processes/create_sequence_dictionary.nf index 4b638ead..69fe92ee 100644 --- a/src/picard/processes/create_sequence_dictionary.nf +++ b/src/picard/processes/create_sequence_dictionary.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__CREATE_SEQUENCE_DICTIONARY { - container params.picard.container + container params.tools.picard.container publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' label 'compute_resources__default' diff --git a/src/picard/processes/fastq_to_bam.nf b/src/picard/processes/fastq_to_bam.nf index e97dcd99..1e958eb1 100644 --- a/src/picard/processes/fastq_to_bam.nf +++ b/src/picard/processes/fastq_to_bam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__FASTQ_TO_BAM { - container params.picard.container + container params.tools.picard.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/picard/processes/merge_bam_alignment.nf b/src/picard/processes/merge_bam_alignment.nf index b19a9b27..8f43b6f8 100644 --- a/src/picard/processes/merge_bam_alignment.nf +++ b/src/picard/processes/merge_bam_alignment.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__MERGE_BAM_ALIGNMENT { - container params.picard.container + container params.tools.picard.container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -17,7 +17,7 @@ process PICARD__MERGE_BAM_ALIGNMENT { tuple val(sample), path("*.merged.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.picard.merge_bam_alignment) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.picard.merge_bam_alignment) processParams = sampleParams.local """ java -Djava.io.tmpdir=$tmpDir -jar \ diff --git a/src/picard/processes/sam_to_fastq.nf b/src/picard/processes/sam_to_fastq.nf index ee87f02d..98a5448f 100644 --- a/src/picard/processes/sam_to_fastq.nf +++ b/src/picard/processes/sam_to_fastq.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__BAM_TO_FASTQ { - container params.picard.container + container params.tools.picard.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/picard/processes/sort_sam.nf b/src/picard/processes/sort_sam.nf index 32acc056..f10c3c2c 100644 --- a/src/picard/processes/sort_sam.nf +++ b/src/picard/processes/sort_sam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__SORT_SAM { - container params.picard.container + container params.tools.picard.container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process PICARD__SORT_SAM { tuple val(sample), path("*.STAR_aligned_sorted.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.picard.sort_sam) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.picard.sort_sam) processParams = sampleParams.local """ java -Djava.io.tmpdir=$tmpDir -jar \ From 2bc48cb52106a36dc2167da7c4c132456898807d Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:13:04 +0100 Subject: [PATCH 153/202] Migrate to params.tools for popscle tool --- docs/pipelines.rst | 6 +++--- src/popscle/popscle.config | 2 +- src/popscle/processes/demuxlet.nf | 12 ++++++------ src/popscle/processes/dsc_pileup.nf | 4 ++-- src/popscle/workflows/demuxlet.nf | 2 +- src/popscle/workflows/dsc_pileup.nf | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index ad1cacf4..b065780c 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -326,9 +326,9 @@ NOTES: Runs the ``demuxlet`` or ``freemuxlet`` workflows (``dsc-pileup`` [with prefiltering], then ``freemuxlet`` or ``demuxlet``) Input parameters are specified within the config file: -* ``params.sc.popscle.vcf``: path to the VCF file for demultiplexing -* ``params.sc.popscle.freemuxlet.nSamples``: Number of clusters to extract (should match the number of samples pooled) -* ``params.sc.popscle.demuxlet.field``: Field in the VCF with genotype information +* ``params.tools.popscle.vcf``: path to the VCF file for demultiplexing +* ``params.tools.popscle.freemuxlet.nSamples``: Number of clusters to extract (should match the number of samples pooled) +* ``params.tools.popscle.demuxlet.field``: Field in the VCF with genotype information ---- diff --git a/src/popscle/popscle.config b/src/popscle/popscle.config index 4ab2db38..607d102a 100644 --- a/src/popscle/popscle.config +++ b/src/popscle/popscle.config @@ -1,5 +1,5 @@ params { - sc { + tools { popscle { container = 'vibsinglecellnf/popscle:2021-05-05-da70fc7' vcf = '/path/to/vcf_file' diff --git a/src/popscle/processes/demuxlet.nf b/src/popscle/processes/demuxlet.nf index a394f2c6..51060956 100644 --- a/src/popscle/processes/demuxlet.nf +++ b/src/popscle/processes/demuxlet.nf @@ -4,8 +4,8 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" process SC__POPSCLE__DEMUXLET { - container params.sc.popscle.container - publishDir "${params.global.outdir}/data", mode: params.utils.publish.mode + container params.tools.popscle.container + publishDir "${params.global.outdir}/data", mode: 'symlink' label 'compute_resources__cpu' input: @@ -16,7 +16,7 @@ process SC__POPSCLE__DEMUXLET { tuple val(sampleId), path("${sampleId}_demuxlet*") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.popscle.demuxlet) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.demuxlet) processParams = sampleParams.local """ @@ -30,8 +30,8 @@ process SC__POPSCLE__DEMUXLET { process SC__POPSCLE__FREEMUXLET { - container params.sc.popscle.container - publishDir "${params.global.outdir}/data", mode: params.utils.publish.mode + container params.tools.popscle.container + publishDir "${params.global.outdir}/data", mode: 'symlink' label 'compute_resources__cpu' input: @@ -41,7 +41,7 @@ process SC__POPSCLE__FREEMUXLET { tuple val(sampleId), path("${sampleId}_freemuxlet*") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.popscle.freemuxlet) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.freemuxlet) processParams = sampleParams.local """ diff --git a/src/popscle/processes/dsc_pileup.nf b/src/popscle/processes/dsc_pileup.nf index d8d837cf..129837b7 100644 --- a/src/popscle/processes/dsc_pileup.nf +++ b/src/popscle/processes/dsc_pileup.nf @@ -6,7 +6,7 @@ toolParams = params.sc.popscle process SC__POPSCLE__DSC_PILEUP { - container params.sc.popscle.container + container params.tools.popscle.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -29,7 +29,7 @@ process SC__POPSCLE__DSC_PILEUP { process SC__POPSCLE__PREFILTER_DSC_PILEUP { - container params.sc.popscle.container + container params.tools.popscle.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__cpu' diff --git a/src/popscle/workflows/demuxlet.nf b/src/popscle/workflows/demuxlet.nf index f9450307..d4f4dcc7 100644 --- a/src/popscle/workflows/demuxlet.nf +++ b/src/popscle/workflows/demuxlet.nf @@ -113,7 +113,7 @@ workflow DEMUXLET { data main: - vcf = file(params.sc.popscle.vcf) + vcf = file(params.tools.popscle.vcf) DSC_PILEUP_FILTERED(data) SC__POPSCLE__DEMUXLET(DSC_PILEUP_FILTERED.out, vcf) diff --git a/src/popscle/workflows/dsc_pileup.nf b/src/popscle/workflows/dsc_pileup.nf index d360ac14..49eb6319 100644 --- a/src/popscle/workflows/dsc_pileup.nf +++ b/src/popscle/workflows/dsc_pileup.nf @@ -22,7 +22,7 @@ workflow DSC_PILEUP_FILTERED { data main: - vcf = file(params.sc.popscle.vcf) + vcf = file(params.tools.popscle.vcf) SC__POPSCLE__PREFILTER_DSC_PILEUP(data, vcf) SC__POPSCLE__DSC_PILEUP(SC__POPSCLE__PREFILTER_DSC_PILEUP.out, vcf) From 625c4e66c39c3cf7288281c7f12d81c2c280d9c3 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:20:40 +0100 Subject: [PATCH 154/202] Migrate to params.tools for scenic tool --- docs/pipelines.rst | 4 +- main.nf | 2 +- src/scenic/README.md | 14 ++-- src/scenic/bin/aucell_from_folder.py | 2 +- src/scenic/conf/append.config | 2 +- src/scenic/conf/min/aucell.config | 2 +- src/scenic/conf/min/base/v0.0.1.config | 10 +-- src/scenic/conf/min/cistarget.config | 8 +- .../cistarget-motifs-fly-dm6-v0.0.1.config | 2 +- .../cistarget-motifs-human-hg19-v0.0.1.config | 2 +- .../cistarget-motifs-human-hg38-v0.0.1.config | 2 +- .../cistarget-motifs-mouse-mm10-v0.0.1.config | 2 +- .../cistarget-tracks-fly-dm6-v0.0.1.config | 2 +- .../cistarget-tracks-human-hg19-v0.0.1.config | 2 +- .../cistarget-tracks-human-hg38-v0.0.1.config | 2 +- src/scenic/conf/min/grn.config | 8 +- src/scenic/conf/min/labels.config | 4 +- src/scenic/conf/min/scenic.config | 2 +- src/scenic/conf/min/scope.config | 2 +- src/scenic/conf/min/tfs/fly-v0.0.1.config | 2 +- src/scenic/conf/min/tfs/human-v0.0.1.config | 2 +- src/scenic/conf/min/tfs/mouse-v0.0.1.config | 2 +- src/scenic/conf/multi_runs.config | 2 +- src/scenic/conf/test.config | 2 +- src/scenic/conf/test_multi_runs.config | 2 +- src/scenic/main.nf | 42 +++++----- src/scenic/main.test.nf | 76 +++++++++---------- src/scenic/processes/add_correlation.nf | 4 +- .../arboreto_with_multiprocessing.nf | 4 +- src/scenic/processes/aucell.nf | 4 +- src/scenic/processes/cistarget.nf | 4 +- src/scenic/processes/loomHandler.nf | 6 +- .../processes/multiruns/aggregateFeatures.nf | 4 +- .../processes/multiruns/aggregateRegulons.nf | 2 +- .../processes/multiruns/aucellFromFolder.nf | 4 +- .../multiruns/convertMotifsToRegulons.nf | 2 +- src/scenic/processes/multiruns/saveToLoom.nf | 2 +- src/scenic/processes/reports.nf | 2 +- src/scenic/scenic.config | 2 +- 39 files changed, 122 insertions(+), 122 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index b065780c..3d04991a 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -179,7 +179,7 @@ See ``single_sample``, ``decontx`` and ``scrublet`` to know more about the files .. |scenic| image:: https://github.com/vib-singlecell-nf/vsn-pipelines/workflows/scenic/badge.svg Runs the ``scenic`` workflow alone, generating a loom file with only the SCENIC results. -Currently, the required input is a loom file (set by `params.sc.scenic.filteredLoom`). +Currently, the required input is a loom file (set by `params.tools.scenic.filteredLoom`). |SCENIC Workflow| @@ -194,7 +194,7 @@ Currently, the required input is a loom file (set by `params.sc.scenic.filteredL .. |scenic_multiruns| image:: https://github.com/vib-singlecell-nf/vsn-pipelines/workflows/scenic_multiruns/badge.svg .. |single_sample_scenic_multiruns| image:: https://github.com/vib-singlecell-nf/vsn-pipelines/workflows/single_sample_scenic_multiruns/badge.svg -Runs the ``scenic`` workflow multiple times (set by ``params.sc.scenic.numRuns``), generating a loom file with the aggregated results from the multiple SCENIC runs. +Runs the ``scenic`` workflow multiple times (set by ``params.tools.scenic.numRuns``), generating a loom file with the aggregated results from the multiple SCENIC runs. Note that this is not a complete entry-point itself, but a configuration option for the `scenic` module. Simply adding `-profile scenic_multiruns` during the config step will activate this analysis option for any of the standard entrypoints. diff --git a/main.nf b/main.nf index 2aebc069..6f262028 100644 --- a/main.nf +++ b/main.nf @@ -763,7 +763,7 @@ workflow scenic { } from "./src/utils/workflows/utils" params(params) SCENIC( - Channel.of( tuple(params.global.project_name, file(params.sc.scenic.filteredLoom))) + Channel.of( tuple(params.global.project_name, file(params.tools.scenic.filteredLoom))) ) if(params.utils?.publish) { diff --git a/src/scenic/README.md b/src/scenic/README.md index 54856b8d..b51cda1b 100644 --- a/src/scenic/README.md +++ b/src/scenic/README.md @@ -27,13 +27,13 @@ nextflow config \ Make sure the following parameters are correctly set: - `params.global.project_name` - `params.global.qsubaccount` if running on a cluster (SGE cluster) -- `params.sc.scenic.filteredLoom` -- `params.sc.scenic.grn.tfs` -- `params.sc.scenic.cistarget.motifsDb` -- `params.sc.scenic.cistarget.motifAnnotation` -- `params.sc.scenic.cistarget.tracksDb` if commented, track-based cisTarget won't run -- `params.sc.scenic.cistarget.tracksAnnotation` if commented, track-based cisTarget won't run -- `params.sc.scenic.numRuns` if running SCENIC in multi-runs mode +- `params.tools.scenic.filteredLoom` +- `params.tools.scenic.grn.tfs` +- `params.tools.scenic.cistarget.motifsDb` +- `params.tools.scenic.cistarget.motifAnnotation` +- `params.tools.scenic.cistarget.tracksDb` if commented, track-based cisTarget won't run +- `params.tools.scenic.cistarget.tracksAnnotation` if commented, track-based cisTarget won't run +- `params.tools.scenic.numRuns` if running SCENIC in multi-runs mode - `singularity.runOptions` Specify the paths to mount - `params.sc.scope.tree` diff --git a/src/scenic/bin/aucell_from_folder.py b/src/scenic/bin/aucell_from_folder.py index a8be9b2a..43c53c4c 100755 --- a/src/scenic/bin/aucell_from_folder.py +++ b/src/scenic/bin/aucell_from_folder.py @@ -95,7 +95,7 @@ ) if len(signatures) == 0: - raise Exception(f"No signature passing filtering. Please consider to adapt 'min_genes_regulon = {args.min_genes_regulon}' and 'min_regulon_gene_occurrence = {args.min_regulon_gene_occurrence}' (see params.sc.scenic.aucell). Make sure these settings are smaller than numRuns (params.sc.scenic).") + raise Exception(f"No signature passing filtering. Please consider to adapt 'min_genes_regulon = {args.min_genes_regulon}' and 'min_regulon_gene_occurrence = {args.min_regulon_gene_occurrence}' (see params.tools.scenic.aucell). Make sure these settings are smaller than numRuns (params.tools.scenic).") auc_threshold = args.auc_threshold diff --git a/src/scenic/conf/append.config b/src/scenic/conf/append.config index dd0af3e5..2a32607b 100644 --- a/src/scenic/conf/append.config +++ b/src/scenic/conf/append.config @@ -1,6 +1,6 @@ params { - sc { + tools { scenic { report_ipynb = '/src/scenic/bin/reports/scenic_report.ipynb' existingScenicLoom = '' diff --git a/src/scenic/conf/min/aucell.config b/src/scenic/conf/min/aucell.config index 1a427a08..64ec0b57 100644 --- a/src/scenic/conf/min/aucell.config +++ b/src/scenic/conf/min/aucell.config @@ -1,6 +1,6 @@ params { - sc { + tools { scenic { aucell { output = 'aucell_output.loom' diff --git a/src/scenic/conf/min/base/v0.0.1.config b/src/scenic/conf/min/base/v0.0.1.config index ac6c4b81..b2531630 100644 --- a/src/scenic/conf/min/base/v0.0.1.config +++ b/src/scenic/conf/min/base/v0.0.1.config @@ -1,5 +1,5 @@ // Define local variable otherwise it's going to be kept in the final config -def _ = params.sc.scenic +def _ = params.tools.scenic // Sanity checks if(!params.global.containsKey("species")) throw new Exception("The params.global.species parameter is required.") @@ -10,7 +10,7 @@ if(!params.global.containsKey("outdir")) params { global = params.global - sc { + tools { scenic { // Container settings container = 'aertslab/pyscenic:0.10.0' @@ -39,9 +39,9 @@ params { // Databases versions // PUBLIC -params.sc.scenic.tfsVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tfsVersion") ? _.tfsVersion : "${params.global.species}-v0.0.1" -params.sc.scenic.motifsDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("motifsDbVersion") ? _.motifsDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" -params.sc.scenic.tracksDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tracksDbVersion") ? _.tracksDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" +params.tools.scenic.tfsVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tfsVersion") ? _.tfsVersion : "${params.global.species}-v0.0.1" +params.tools.scenic.motifsDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("motifsDbVersion") ? _.motifsDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" +params.tools.scenic.tracksDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tracksDbVersion") ? _.tracksDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" includeConfig '../labels.config' diff --git a/src/scenic/conf/min/cistarget.config b/src/scenic/conf/min/cistarget.config index 45c09862..30f9546c 100644 --- a/src/scenic/conf/min/cistarget.config +++ b/src/scenic/conf/min/cistarget.config @@ -1,4 +1,4 @@ -def _ = params.sc.scenic +def _ = params.tools.scenic // Sanity checks if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No cisTarget databases found for the given species: "+ params.global.species) @@ -6,7 +6,7 @@ if(params.global.species == "human" && !(params.global.genome.assembly in ["hg38 throw new Exception("No cisTarget databases found for the given genome: "+ params.global.genome.assembly) params { - sc { + tools { scenic { cistarget { adj = "adj.tsv" @@ -49,6 +49,6 @@ def useMotifs = _.containsKey("cistarget") && _.cistarget.containsKey("useMotifs def useTracks = _.containsKey("cistarget") && _.cistarget.containsKey("useTracks") ? _.cistarget.useTracks: false if(useMotifs) - includeConfig "dbs/cistarget-motifs-${params.sc.scenic.motifsDbVersion}.config" + includeConfig "dbs/cistarget-motifs-${params.tools.scenic.motifsDbVersion}.config" if(useTracks) - includeConfig "dbs/cistarget-tracks-${params.sc.scenic.tracksDbVersion}.config" + includeConfig "dbs/cistarget-tracks-${params.tools.scenic.tracksDbVersion}.config" diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config index a0346c58..d170c3ca 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config index d805f8b7..ec4fb229 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config index 6ea5fcf3..f1cf7671 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config index 57a56695..56e01ef5 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config index 1e6fe1a5..ab3b5919 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // track feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config index 09ba7c20..b48ddeab 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Track feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config index e74060a2..12552632 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Track feather format databases diff --git a/src/scenic/conf/min/grn.config b/src/scenic/conf/min/grn.config index ccaa21a3..3178cf92 100644 --- a/src/scenic/conf/min/grn.config +++ b/src/scenic/conf/min/grn.config @@ -1,10 +1,10 @@ -def _ = params.sc.scenic +def _ = params.tools.scenic // Sanity checks if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No TFs found for the given species: "+ params.global.species) params { - sc { + tools { scenic { grn { // seed = 617 @@ -12,7 +12,7 @@ params { // PUBLIC // maxForks = _.containsKey("grn") && _.grn.containsKey("maxForks") ? _.grn.maxForks : 1 // numWorkers = _.containsKey("grn") && _.grn.containsKey("numWorkers") ? _.grn.numWorkers : 2 - // // Following parameters are not used except params.sc.scenic.labels.processExecutor = 'qsub' + // // Following parameters are not used except params.tools.scenic.labels.processExecutor = 'qsub' // pmem = _.containsKey("grn") && _.grn.containsKey("pmem") ? _.grn.pmem : '2gb' // walltime = '24:00:00' } @@ -20,4 +20,4 @@ params { } } -includeConfig "tfs/${params.sc.scenic.tfsVersion}.config" +includeConfig "tfs/${params.tools.scenic.tfsVersion}.config" diff --git a/src/scenic/conf/min/labels.config b/src/scenic/conf/min/labels.config index c9bc3c66..b2072584 100644 --- a/src/scenic/conf/min/labels.config +++ b/src/scenic/conf/min/labels.config @@ -1,7 +1,7 @@ -def _ = params.sc.scenic +def _ = params.tools.scenic params { - sc { + tools { scenic { labels { // Resources settings: diff --git a/src/scenic/conf/min/scenic.config b/src/scenic/conf/min/scenic.config index 6ec3d049..fe51e982 100644 --- a/src/scenic/conf/min/scenic.config +++ b/src/scenic/conf/min/scenic.config @@ -10,7 +10,7 @@ params { outdir = "out" } - sc { + tools { scenic { configVersion = "v0.0.1" } diff --git a/src/scenic/conf/min/scope.config b/src/scenic/conf/min/scope.config index b8d0a8d9..80b8dc9f 100644 --- a/src/scenic/conf/min/scope.config +++ b/src/scenic/conf/min/scope.config @@ -1,5 +1,5 @@ params { - sc { + tools { scope { genome = "" tree { diff --git a/src/scenic/conf/min/tfs/fly-v0.0.1.config b/src/scenic/conf/min/tfs/fly-v0.0.1.config index 0606757b..996f3227 100644 --- a/src/scenic/conf/min/tfs/fly-v0.0.1.config +++ b/src/scenic/conf/min/tfs/fly-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_dmel.txt" diff --git a/src/scenic/conf/min/tfs/human-v0.0.1.config b/src/scenic/conf/min/tfs/human-v0.0.1.config index b7f2b1d1..062477dd 100644 --- a/src/scenic/conf/min/tfs/human-v0.0.1.config +++ b/src/scenic/conf/min/tfs/human-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_hg38.txt" diff --git a/src/scenic/conf/min/tfs/mouse-v0.0.1.config b/src/scenic/conf/min/tfs/mouse-v0.0.1.config index 1d10fe65..903ced9f 100644 --- a/src/scenic/conf/min/tfs/mouse-v0.0.1.config +++ b/src/scenic/conf/min/tfs/mouse-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt" diff --git a/src/scenic/conf/multi_runs.config b/src/scenic/conf/multi_runs.config index 349c2a20..9433a60a 100644 --- a/src/scenic/conf/multi_runs.config +++ b/src/scenic/conf/multi_runs.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { numRuns = 2 // AUCell parameters diff --git a/src/scenic/conf/test.config b/src/scenic/conf/test.config index 234d2034..198e2dd9 100644 --- a/src/scenic/conf/test.config +++ b/src/scenic/conf/test.config @@ -3,7 +3,7 @@ params { project_name = 'Test' } - sc { + tools { scenic { filteredLoom = '/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/testruns/scenic-nf_testing/expr_mat.loom' // for testing diff --git a/src/scenic/conf/test_multi_runs.config b/src/scenic/conf/test_multi_runs.config index 016d282c..b9fabfe6 100644 --- a/src/scenic/conf/test_multi_runs.config +++ b/src/scenic/conf/test_multi_runs.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { numRuns = 2 // AUCell parameters diff --git a/src/scenic/main.nf b/src/scenic/main.nf index 78f21893..70489ce1 100644 --- a/src/scenic/main.nf +++ b/src/scenic/main.nf @@ -6,7 +6,7 @@ include { resolveParams(params, true) -def isAppendOnlyMode = params.sc.scenic.containsKey("existingScenicLoom") +def isAppendOnlyMode = params.tools.scenic.containsKey("existingScenicLoom") def ALLOWED_GENOME_ASSEMBLIES = ['dm6','hg19','hg38', 'mm10'] ////////////////////////////////////////////////////// @@ -69,8 +69,8 @@ include { */ // Create channel for the different runs -if(params.sc.scenic.containsKey("numRuns")) { - runs = Channel.from( 1..params.sc.scenic.numRuns ) +if(params.tools.scenic.containsKey("numRuns")) { + runs = Channel.from( 1..params.tools.scenic.numRuns ) } else { runs = Channel.from( 1..1 ) } @@ -83,44 +83,44 @@ workflow scenic { main: /* GRN */ - tfs = file(params.sc.scenic.grn.tfs) + tfs = file(params.tools.scenic.grn.tfs) grn = ARBORETO_WITH_MULTIPROCESSING( filteredLoom.combine(runs), tfs ) grn_with_correlation = ADD_PEARSON_CORRELATION(grn) /* cisTarget motif analysis */ // channel for SCENIC databases resources: motifsDb = Channel - .fromPath( params.sc.scenic.cistarget.motifsDb ) + .fromPath( params.tools.scenic.cistarget.motifsDb ) .collect() // use all files together in the ctx command - motifsAnnotation = file(params.sc.scenic.cistarget.motifsAnnotation) + motifsAnnotation = file(params.tools.scenic.cistarget.motifsAnnotation) ctx_mtf = CISTARGET__MOTIF( grn_with_correlation, motifsDb, motifsAnnotation, 'mtf' ) /* cisTarget track analysis */ - if(params.sc.scenic.cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { tracksDb = Channel - .fromPath( params.sc.scenic.cistarget.tracksDb ) + .fromPath( params.tools.scenic.cistarget.tracksDb ) .collect() // use all files together in the ctx command - tracksAnnotation = file(params.sc.scenic.cistarget.tracksAnnotation) + tracksAnnotation = file(params.tools.scenic.cistarget.tracksAnnotation) ctx_trk = CISTARGET__TRACK( grn_with_correlation, tracksDb, tracksAnnotation, 'trk' ) } /* AUCell, motif regulons */ auc_mtf = AUCELL__MOTIF( ctx_mtf, 'mtf' ) - if(params.sc.scenic.cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { /* AUCell, track regulons */ auc_trk = AUCELL__TRACK( ctx_trk, 'trk' ) } // multi-runs aggregation: - if(params.sc.scenic.containsKey("numRuns") && params.sc.scenic.numRuns > 1) { + if(params.tools.scenic.containsKey("numRuns") && params.tools.scenic.numRuns > 1) { scenic_loom_mtf = MULTI_RUNS_TO_LOOM__MOTIF( filteredLoom, ctx_mtf, auc_mtf, 'mtf' ) - if(params.sc.scenic.cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { scenic_loom_trk = MULTI_RUNS_TO_LOOM__TRACK( filteredLoom, ctx_trk, @@ -135,7 +135,7 @@ workflow scenic { out = VISUALIZE(scenic_loom_mtf) } } else { - if(params.sc.scenic.cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { out = VISUALIZE( MERGE_MOTIF_TRACK_LOOMS( auc_mtf @@ -163,10 +163,10 @@ workflow scenic_append { scopeLoom main: - if(params.sc.scenic.containsKey("existingScenicLoom")) { + if(params.tools.scenic.containsKey("existingScenicLoom")) { scenicLoom = getChannelFromFilePath( - params.sc.scenic.existingScenicLoom, - params.sc.scenic.sampleSuffixWithExtension + params.tools.scenic.existingScenicLoom, + params.tools.scenic.sampleSuffixWithExtension ) if(!params.containsKey('quiet')) { Channel.from('').view { @@ -194,9 +194,9 @@ workflow scenic_append { throw new Exception("Cannot append SCENIC loom to SCope loom because the IDs do not match.") } ) - if(!params.sc.scenic.skipReports) { + if(!params.tools.scenic.skipReports) { report_notebook = GENERATE_REPORT( - file(workflow.projectDir + params.sc.scenic.report_ipynb), + file(workflow.projectDir + params.tools.scenic.report_ipynb), APPEND_SCENIC_LOOM.out, "SCENIC_report" ) @@ -213,8 +213,8 @@ workflow scenic_append { workflow { main: - if(!("filteredLoom" in params.sc.scenic)) - throw new Exception("The given filteredLoom required parameter does not exist in the params.sc.scenic scope.") - scenic( Channel.of( tuple(params.global.project_name, file(params.sc.scenic.filteredLoom)) ) ) + if(!("filteredLoom" in params.tools.scenic)) + throw new Exception("The given filteredLoom required parameter does not exist in the params.tools.scenic scope.") + scenic( Channel.of( tuple(params.global.project_name, file(params.tools.scenic.filteredLoom)) ) ) } diff --git a/src/scenic/main.test.nf b/src/scenic/main.test.nf index 9ceef3d2..f29aa1c2 100644 --- a/src/scenic/main.test.nf +++ b/src/scenic/main.test.nf @@ -79,7 +79,7 @@ include { } from './processes/loomHandler' params(params) // Create channel for the different runs -runs = Channel.from( 1..params.sc.scenic.numRuns ) +runs = Channel.from( 1..params.tools.scenic.numRuns ) // Make the test workflow workflow test_GRNBOOST2WITHOUTDASK { @@ -88,7 +88,7 @@ workflow test_GRNBOOST2WITHOUTDASK { loom main: - tfs = file(params.sc.scenic.grn.TFs) + tfs = file(params.tools.scenic.grn.TFs) GRNBOOST2WITHOUTDASK( runs, loom, tfs ) emit: @@ -106,18 +106,18 @@ workflow test_CISTARGET { main: // channel for SCENIC databases resources: motifDB = Channel - .fromPath( params.sc.scenic.cistarget.mtfDB ) + .fromPath( params.tools.scenic.cistarget.mtfDB ) .collect() // use all files together in the ctx command - motifANN = file(params.sc.scenic.cistarget.mtfANN) + motifANN = file(params.tools.scenic.cistarget.mtfANN) ctx_mtf = CISTARGET__MOTIF( runs, filteredloom, grn, motifDB, motifANN, 'mtf' ) /* cisTarget track analysis */ trackDB = Channel - .fromPath( params.sc.scenic.cistarget.trkDB ) + .fromPath( params.tools.scenic.cistarget.trkDB ) .collect() // use all files together in the ctx command - trackANN = file(params.sc.scenic.cistarget.trkANN) + trackANN = file(params.tools.scenic.cistarget.trkANN) ctx_trk = CISTARGET__TRACK( runs, filteredloom, grn, trackDB, trackANN, 'trk' ) emit: @@ -154,15 +154,15 @@ workflow test_SINGLE_RUN_BY_ID { runId main: - filteredloom = file( params.sc.scenic.filteredloom ) - tfs = file(params.sc.scenic.grn.TFs) + filteredloom = file( params.tools.scenic.filteredloom ) + tfs = file(params.tools.scenic.grn.TFs) run = Channel.from( runId..runId ) grn = GRNBOOST2WITHOUTDASK( run, filteredloom, tfs ) // channel for SCENIC databases resources: motifDB = Channel - .fromPath( params.sc.scenic.cistarget.mtfDB ) + .fromPath( params.tools.scenic.cistarget.mtfDB ) .collect() // use all files together in the ctx command - motifANN = file(params.sc.scenic.cistarget.mtfANN) + motifANN = file(params.tools.scenic.cistarget.mtfANN) ctx_mtf = CISTARGET__MOTIF( run, filteredloom, grn, motifDB, motifANN, 'mtf' ) /* AUCell, motif regulons */ auc_mtf = AUCELL__MOTIF( run, filteredloom, ctx_mtf, 'mtf' ) @@ -201,52 +201,52 @@ workflow { test_SINGLE_RUN_BY_ID( params.runId ) break; case "GRNBOOST2WITHOUTDASK": - test_GRNBOOST2WITHOUTDASK( file( params.sc.scenic.filteredloom ) ) + test_GRNBOOST2WITHOUTDASK( file( params.tools.scenic.filteredloom ) ) break; case "CISTARGET": - grn = Channel.fromPath(params.sc.scenic.scenicoutdir + "/grnboost2withoutDask/run_*/run_*__adj.tsv") - test_CISTARGET( file( params.sc.scenic.filteredloom ), grn ) + grn = Channel.fromPath(params.tools.scenic.scenicoutdir + "/grnboost2withoutDask/run_*/run_*__adj.tsv") + test_CISTARGET( file( params.tools.scenic.filteredloom ), grn ) break; case "AUCELL": - ctx_mtf = Channel.fromPath(params.sc.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") - ctx_trk = Channel.fromPath(params.sc.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") - test_AUCELL( file( params.sc.scenic.filteredloom ), ctx_mtf, ctx_trk ) + ctx_mtf = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") + ctx_trk = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") + test_AUCELL( file( params.tools.scenic.filteredloom ), ctx_mtf, ctx_trk ) break; case "AGGR_MULTI_RUNS_FEATURES": /* Aggregate motifs from multiple runs */ - reg_mtf = Channel.fromPath(params.sc.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") + reg_mtf = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") AGGR_MULTI_RUNS_FEATURES__MOTIF( reg_mtf.collect(), 'mtf' ) - if(params.sc.scenic.cistarget.trkDB) { + if(params.tools.scenic.cistarget.trkDB) { /* Aggregate tracks from multiple runs */ - reg_trk = Channel.fromPath(params.sc.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") + reg_trk = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") AGGR_MULTI_RUNS_FEATURES__TRACK( reg_trk.collect(), 'trk' ) } break; case "AGGR_MULTI_RUNS_REGULONS": /* Aggregate motif regulons from multiple runs */ - auc_mtf_looms = Channel.fromPath(params.sc.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_mtf.loom") + auc_mtf_looms = Channel.fromPath(params.tools.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_mtf.loom") AGGR_MULTI_RUNS_REGULONS__MOTIF( auc_mtf_looms.collect(), 'mtf' ) - if(params.sc.scenic.cistarget.trkDB) { + if(params.tools.scenic.cistarget.trkDB) { /* Aggregate track regulons from multiple runs */ - auc_trk_looms = Channel.fromPath(params.sc.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_trk.loom") + auc_trk_looms = Channel.fromPath(params.tools.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_trk.loom") AGGR_MULTI_RUNS_REGULONS__TRACK( auc_trk_looms.collect(), 'trk' ) } break; case "AUCELL_FROM_FOLDER": /* Aggregate motif regulons from multiple runs */ - regulons_folder_mtf = file(params.sc.scenic.scenicoutdir + "/multi_runs_regulons_mtf") - AUCELL_FROM_FOLDER__MOTIF( file(params.sc.scenic.filteredloom), regulons_folder_mtf, 'mtf' ) - if(params.sc.scenic.cistarget.trkDB) { + regulons_folder_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_mtf") + AUCELL_FROM_FOLDER__MOTIF( file(params.tools.scenic.filteredloom), regulons_folder_mtf, 'mtf' ) + if(params.tools.scenic.cistarget.trkDB) { /* Aggregate track regulons from multiple runs */ - regulons_folder_trk = file(params.sc.scenic.scenicoutdir + "/multi_runs_regulons_trk") - AUCELL_FROM_FOLDER__TRACK( file(params.sc.scenic.filteredloom), regulons_folder_trk, 'trk' ) + regulons_folder_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_trk") + AUCELL_FROM_FOLDER__TRACK( file(params.tools.scenic.filteredloom), regulons_folder_trk, 'trk' ) } break; case "SAVE_SCENIC_MULTI_RUNS_TO_LOOM_MOTIF": - filteredloom = file(params.sc.scenic.filteredloom) - aggr_features_mtf = file(params.sc.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_mtf.csv.gz") - regulons_folder_mtf = file(params.sc.scenic.scenicoutdir + "/multi_runs_regulons_mtf") - regulons_auc_mtf = file(params.sc.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_mtf.tsv") + filteredloom = file(params.tools.scenic.filteredloom) + aggr_features_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_mtf.csv.gz") + regulons_folder_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_mtf") + regulons_auc_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_mtf.tsv") /* Save multiple motif SCENIC runs to loom*/ SAVE_SCENIC_MULTI_RUNS_TO_LOOM_MOTIF( @@ -258,10 +258,10 @@ workflow { ) break; case "SAVE_SCENIC_MULTI_RUNS_TO_LOOM_TRACK": - filteredloom = file(params.sc.scenic.filteredloom) - regulons_folder_trk = file(params.sc.scenic.scenicoutdir + "/multi_runs_regulons_trk") - aggr_features_trk = file(params.sc.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_trk.csv.gz") - regulons_auc_trk = file(params.sc.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_trk.tsv") + filteredloom = file(params.tools.scenic.filteredloom) + regulons_folder_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_trk") + aggr_features_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_trk.csv.gz") + regulons_auc_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_trk.tsv") /* Save multiple track SCENIC runs to loom*/ SAVE_SCENIC_MULTI_RUNS_TO_LOOM_TRACK( filteredloom, @@ -272,8 +272,8 @@ workflow { ) break; case "MERGE_MOTIF_TRACK_LOOMS": - scenic_loom_mtf = file( params.sc.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_mtf.loom" ) - scenic_loom_trk = file( params.sc.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_trk.loom" ) + scenic_loom_mtf = file( params.tools.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_mtf.loom" ) + scenic_loom_trk = file( params.tools.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_trk.loom" ) MERGE_MOTIF_TRACK_LOOMS( scenic_loom_mtf, scenic_loom_trk @@ -281,7 +281,7 @@ workflow { break; case "VISUALIZE_PUBLISH": /* Aggregate motif regulons from multiple runs */ - scenic_loom = file( params.sc.scenic.scenicoutdir + "/" + params.sc.scenic.scenicOutputLoom ) + scenic_loom = file( params.tools.scenic.scenicoutdir + "/" + params.tools.scenic.scenicOutputLoom ) PUBLISH_LOOM( VISUALIZE( scenic_loom ) ) break; default: diff --git a/src/scenic/processes/add_correlation.nf b/src/scenic/processes/add_correlation.nf index 52b2c44c..af7d9b2a 100644 --- a/src/scenic/processes/add_correlation.nf +++ b/src/scenic/processes/add_correlation.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic -processParams = params.sc.scenic.grn +toolParams = params.tools.scenic +processParams = params.tools.scenic.grn process ADD_PEARSON_CORRELATION { diff --git a/src/scenic/processes/arboreto_with_multiprocessing.nf b/src/scenic/processes/arboreto_with_multiprocessing.nf index 08ce726f..fe4f5f8b 100644 --- a/src/scenic/processes/arboreto_with_multiprocessing.nf +++ b/src/scenic/processes/arboreto_with_multiprocessing.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic -processParams = params.sc.scenic.grn +toolParams = params.tools.scenic +processParams = params.tools.scenic.grn process ARBORETO_WITH_MULTIPROCESSING { diff --git a/src/scenic/processes/aucell.nf b/src/scenic/processes/aucell.nf index 5c476c62..e110b651 100644 --- a/src/scenic/processes/aucell.nf +++ b/src/scenic/processes/aucell.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic -processParams = params.sc.scenic.aucell +toolParams = params.tools.scenic +processParams = params.tools.scenic.aucell process AUCELL { diff --git a/src/scenic/processes/cistarget.nf b/src/scenic/processes/cistarget.nf index b8a6a08d..b5a86ecd 100644 --- a/src/scenic/processes/cistarget.nf +++ b/src/scenic/processes/cistarget.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic -processParams = params.sc.scenic.cistarget +toolParams = params.tools.scenic +processParams = params.tools.scenic.cistarget process CISTARGET { diff --git a/src/scenic/processes/loomHandler.nf b/src/scenic/processes/loomHandler.nf index cebf52c2..4bf17556 100644 --- a/src/scenic/processes/loomHandler.nf +++ b/src/scenic/processes/loomHandler.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic +toolParams = params.tools.scenic process PUBLISH_LOOM { @@ -57,7 +57,7 @@ process MERGE_MOTIF_TRACK_LOOMS { tuple val(sampleId), path(toolParams.scenicOutputLoom) script: - toolParams = params.sc.scenic + toolParams = params.tools.scenic """ ${binDir}merge_motif_track_loom.py \ --loom_motif ${motifLoom} \ @@ -83,7 +83,7 @@ process APPEND_SCENIC_LOOM { tuple val(sampleId), path("${sampleId}.${toolParams.scenicScopeOutputLoom}") script: - toolParams = params.sc.scenic + toolParams = params.tools.scenic """ ${binDir}append_results_to_existing_loom.py \ --loom_scope ${scopeLoom} \ diff --git a/src/scenic/processes/multiruns/aggregateFeatures.nf b/src/scenic/processes/multiruns/aggregateFeatures.nf index a7da8ece..13f0f2e5 100644 --- a/src/scenic/processes/multiruns/aggregateFeatures.nf +++ b/src/scenic/processes/multiruns/aggregateFeatures.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic -processParams = params.sc.scenic.aggregate_features +toolParams = params.tools.scenic +processParams = params.tools.scenic.aggregate_features process AGGR_MULTI_RUNS_FEATURES { diff --git a/src/scenic/processes/multiruns/aggregateRegulons.nf b/src/scenic/processes/multiruns/aggregateRegulons.nf index da1546a2..e6a1572e 100644 --- a/src/scenic/processes/multiruns/aggregateRegulons.nf +++ b/src/scenic/processes/multiruns/aggregateRegulons.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic +toolParams = params.tools.scenic process AGGR_MULTI_RUNS_REGULONS { diff --git a/src/scenic/processes/multiruns/aucellFromFolder.nf b/src/scenic/processes/multiruns/aucellFromFolder.nf index 2a3c88f9..23e16498 100644 --- a/src/scenic/processes/multiruns/aucellFromFolder.nf +++ b/src/scenic/processes/multiruns/aucellFromFolder.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic -processParams = params.sc.scenic.aucell +toolParams = params.tools.scenic +processParams = params.tools.scenic.aucell process AUCELL_FROM_FOLDER { diff --git a/src/scenic/processes/multiruns/convertMotifsToRegulons.nf b/src/scenic/processes/multiruns/convertMotifsToRegulons.nf index f735446f..355f447b 100644 --- a/src/scenic/processes/multiruns/convertMotifsToRegulons.nf +++ b/src/scenic/processes/multiruns/convertMotifsToRegulons.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic +toolParams = params.tools.scenic process CONVERT_MULTI_RUNS_FEATURES_TO_REGULONS { diff --git a/src/scenic/processes/multiruns/saveToLoom.nf b/src/scenic/processes/multiruns/saveToLoom.nf index d2d8b6a7..9bb4f976 100644 --- a/src/scenic/processes/multiruns/saveToLoom.nf +++ b/src/scenic/processes/multiruns/saveToLoom.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.sc.scenic +toolParams = params.tools.scenic process SAVE_MULTI_RUNS_TO_LOOM { diff --git a/src/scenic/processes/reports.nf b/src/scenic/processes/reports.nf index b8552dc0..2d79beac 100644 --- a/src/scenic/processes/reports.nf +++ b/src/scenic/processes/reports.nf @@ -7,7 +7,7 @@ takes a template ipynb and adata as input, outputs ipynb named by the value in ${reportTitle} */ -toolParams = params.sc.scenic +toolParams = params.tools.scenic process GENERATE_REPORT { diff --git a/src/scenic/scenic.config b/src/scenic/scenic.config index 34a2287c..5eee79bc 100644 --- a/src/scenic/scenic.config +++ b/src/scenic/scenic.config @@ -1,6 +1,6 @@ params { - sc { + tools { scenic { // Label for the processes container = 'aertslab/pyscenic:0.10.4' From bf1e64cf58c97c9286d024d4cc99d7da56cd9a0f Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:23:27 +0100 Subject: [PATCH 155/202] Migrate to params.tools for scrublet tool --- src/scrublet/bin/sc_doublet_detection.py | 2 +- src/scrublet/conf/base.config | 2 +- src/scrublet/conf/scrublet_defaults.conf | 2 +- src/scrublet/processes/doublet_detection.nf | 4 ++-- src/scrublet/processes/reports.nf | 2 +- src/scrublet/scrublet.config | 4 ++-- src/scrublet/workflows/doublet_removal.nf | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/scrublet/bin/sc_doublet_detection.py b/src/scrublet/bin/sc_doublet_detection.py index 57d94ff6..956eff24 100755 --- a/src/scrublet/bin/sc_doublet_detection.py +++ b/src/scrublet/bin/sc_doublet_detection.py @@ -217,7 +217,7 @@ def save_histograms(out_basename, scrublet): ) raise Exception(f""" VSN ERROR: Scrublet failed to automatically identify a doublet score threshold for {SAMPLE_NAME}. -A manual doublet score threshold can be set using the --threshold (params.sc.scrublet.threshold) argument. +A manual doublet score threshold can be set using the --threshold (params.tools.scrublet.threshold) argument. Consider to use sample-based parameter setting as described at https://vsn-pipelines.readthedocs.io/en/develop/features.html#multi-sample-parameters. E.g.: params {{ sc {{ diff --git a/src/scrublet/conf/base.config b/src/scrublet/conf/base.config index 74299626..bb711e07 100644 --- a/src/scrublet/conf/base.config +++ b/src/scrublet/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { scrublet { container = 'vibsinglecellnf/scrublet:0.2.3' doublet_detection { diff --git a/src/scrublet/conf/scrublet_defaults.conf b/src/scrublet/conf/scrublet_defaults.conf index 800dd6dc..e82b41a6 100644 --- a/src/scrublet/conf/scrublet_defaults.conf +++ b/src/scrublet/conf/scrublet_defaults.conf @@ -1,5 +1,5 @@ params { - sc { + tools { scrublet { // add sensible default parameters for Scrublet: cell_annotate { diff --git a/src/scrublet/processes/doublet_detection.nf b/src/scrublet/processes/doublet_detection.nf index 9f289694..4dc683a8 100644 --- a/src/scrublet/processes/doublet_detection.nf +++ b/src/scrublet/processes/doublet_detection.nf @@ -47,7 +47,7 @@ def SC__SCRUBLET__DOUBLET_DETECTION_PARAMS(params) { process SC__SCRUBLET__DOUBLET_DETECTION { - container params.sc.scrublet.container + container params.tools.scrublet.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -68,7 +68,7 @@ process SC__SCRUBLET__DOUBLET_DETECTION { val(nPrinComps) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scrublet.doublet_detection) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scrublet.doublet_detection) processParams = sampleParams.local def _processParams = new SC__SCRUBLET__DOUBLET_DETECTION_PARAMS() _processParams.setEnv(this) diff --git a/src/scrublet/processes/reports.nf b/src/scrublet/processes/reports.nf index 86c96b5d..67ca9b9d 100644 --- a/src/scrublet/processes/reports.nf +++ b/src/scrublet/processes/reports.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__SCRUBLET__DOUBLET_DETECTION_REPORT { - container params.sc.scrublet.container + container params.tools.scrublet.container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index fb632fc1..da749425 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.sc.scrublet.cell_annotate = params.tools.cell_annotate +params.tools.scrublet.cell_annotate = params.tools.cell_annotate params.sc.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.sc.scrublet.cell_filter = params.tools.cell_filter +params.tools.scrublet.cell_filter = params.tools.cell_filter params.sc.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' diff --git a/src/scrublet/workflows/doublet_removal.nf b/src/scrublet/workflows/doublet_removal.nf index eb50af71..1f6dfd21 100644 --- a/src/scrublet/workflows/doublet_removal.nf +++ b/src/scrublet/workflows/doublet_removal.nf @@ -86,7 +86,7 @@ workflow DOUBLET_REMOVAL { SC__SCRUBLET__DOUBLET_DETECTION_REPORT( - file(workflow.projectDir + params.sc.scrublet.doublet_detection.report_ipynb), + file(workflow.projectDir + params.tools.scrublet.doublet_detection.report_ipynb), SC__SCRUBLET__DOUBLET_DETECTION.out.map { // Extract the Scrublet object file it -> tuple(it[0], it[2]) From bc4f6b96c45ab1740039fbc30291ea58947633c0 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:28:13 +0100 Subject: [PATCH 156/202] Migrate to params.tools for soupx tool --- src/soupx/bin/.gitkeep | 0 src/soupx/bin/process1.py | 44 ----------------------------- src/soupx/conf/.gitkeep | 0 src/soupx/conf/base.config | 2 +- src/soupx/conf/soupx_correct.config | 2 +- src/soupx/nextflow.config | 14 --------- src/soupx/processes/.gitkeep | 0 src/soupx/processes/process1.nf | 29 ------------------- src/soupx/processes/runSoupX.nf | 2 +- 9 files changed, 3 insertions(+), 90 deletions(-) delete mode 100644 src/soupx/bin/.gitkeep delete mode 100644 src/soupx/bin/process1.py delete mode 100644 src/soupx/conf/.gitkeep delete mode 100644 src/soupx/nextflow.config delete mode 100644 src/soupx/processes/.gitkeep delete mode 100644 src/soupx/processes/process1.nf diff --git a/src/soupx/bin/.gitkeep b/src/soupx/bin/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soupx/bin/process1.py b/src/soupx/bin/process1.py deleted file mode 100644 index 655ca36e..00000000 --- a/src/soupx/bin/process1.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import os -import scanpy as sc - -parser = argparse.ArgumentParser(description='Template script') - -parser.add_argument( - "input", - type=argparse.FileType('r'), - help='Input h5ad file.' -) - -parser.add_argument( - "output", - type=argparse.FileType('w'), - help='Output h5ad file.' -) - -args = parser.parse_args() - - -# Define the arguments properly -FILE_PATH_IN = args.input -FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0] - -# I/O -# Expects h5ad file -try: - adata = sc.read_h5ad(filename=FILE_PATH_IN.name) -except IOError: - raise Exception("Wrong input format. Expects .h5ad files, got .{}".format(os.path.splitext(FILE_PATH_IN)[0])) - -################################################################################ -# do some processing here... - -print(adata) - -################################################################################ - -# I/O -adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME)) - diff --git a/src/soupx/conf/.gitkeep b/src/soupx/conf/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soupx/conf/base.config b/src/soupx/conf/base.config index 62526949..780c361c 100644 --- a/src/soupx/conf/base.config +++ b/src/soupx/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { soupx { container = 'vibsinglecellnf/soupx:1.4.8' } diff --git a/src/soupx/conf/soupx_correct.config b/src/soupx/conf/soupx_correct.config index 589f1022..377eaaf2 100644 --- a/src/soupx/conf/soupx_correct.config +++ b/src/soupx/conf/soupx_correct.config @@ -1,5 +1,5 @@ params { - sc { + tools { soupx { roundToInt = false } diff --git a/src/soupx/nextflow.config b/src/soupx/nextflow.config deleted file mode 100644 index b0b0c05a..00000000 --- a/src/soupx/nextflow.config +++ /dev/null @@ -1,14 +0,0 @@ -params { - sc { - template { - container = 'vibsinglecellnf/scanpy:1.8.1' - process1 { - param1 = '' - } - process2 { - param2 = '' - } - } - } -} - diff --git a/src/soupx/processes/.gitkeep b/src/soupx/processes/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soupx/processes/process1.nf b/src/soupx/processes/process1.nf deleted file mode 100644 index a572755f..00000000 --- a/src/soupx/processes/process1.nf +++ /dev/null @@ -1,29 +0,0 @@ -nextflow.enable.dsl=2 - -binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" - -process SC__TEMPLATE__PROCESS1 { - - container params.sc.template.container - publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' - label 'compute_resources__default' - - input: - tuple val(sampleId), path(f) - - output: - tuple val(sampleId), path("${sampleId}.SC__TEMPLATE__PROCESS1.h5ad") - - script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.template) - processParams = sampleParams.local - //varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') - """ - ${binDir}process1.py \ - --input ${f} \ - --n_workers ${task.cpus} \ - --memory_limit ${task.memory.toGiga()} \ - --output ${sampleId}.SC__TEMPLATE__PROCESS1.h5ad - """ -} - diff --git a/src/soupx/processes/runSoupX.nf b/src/soupx/processes/runSoupX.nf index 11553a2f..569fbbbc 100644 --- a/src/soupx/processes/runSoupX.nf +++ b/src/soupx/processes/runSoupX.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl=2 import java.nio.file.Paths moduleName = "soupx" -toolParams = params.sc[moduleName] +toolParams = params.tools[moduleName] binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/${moduleName}/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "${moduleName}/bin") From 8db2dbeda776def1a7f017051d31890425c5eb63 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:29:43 +0100 Subject: [PATCH 157/202] Migrate to params.tools for sratoolkit tool --- src/sratoolkit/processes/downloadFastQ.nf | 2 +- src/sratoolkit/sratoolkit.config | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index bb09a3a8..24abc905 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -6,7 +6,7 @@ if(!params.containsKey("test")) { binDir = "" } -toolParams = params.sratoolkit +toolParams = params.tools.sratoolkit process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { diff --git a/src/sratoolkit/sratoolkit.config b/src/sratoolkit/sratoolkit.config index b2d3e6e4..709ec3de 100644 --- a/src/sratoolkit/sratoolkit.config +++ b/src/sratoolkit/sratoolkit.config @@ -1,11 +1,13 @@ params { - sratoolkit { - container = 'vibsinglecellnf/sratoolkit:2.9.4-1.1.0' - // --include-technical option (fasterq-dump) - // This option should be set to 'true' if data you're downloading is e.g.: 10x Genomics scATAC-seq - includeTechnicalReads = false - // --max-size (prefetch), maximum file size to download in KB (exclusive). Default: 20G - maxSize = 20000000 + tools { + sratoolkit { + container = 'vibsinglecellnf/sratoolkit:2.9.4-1.1.0' + // --include-technical option (fasterq-dump) + // This option should be set to 'true' if data you're downloading is e.g.: 10x Genomics scATAC-seq + includeTechnicalReads = false + // --max-size (prefetch), maximum file size to download in KB (exclusive). Default: 20G + maxSize = 20000000 + } } } @@ -19,7 +21,6 @@ process { maxForks = 1 maxRetries = 5 errorStrategy = 'retry' - } } From 04ec07b2bd43dd339ddb5ffc10aa989c0867fb6d Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 11:31:49 +0100 Subject: [PATCH 158/202] Migrate to params.tools for star tool --- src/star/main.nf | 6 +++--- src/star/processes/build_genome.nf | 4 ++-- src/star/processes/load_genome.nf | 2 +- src/star/processes/map_count.nf | 4 ++-- src/star/processes/solo_map_count.nf | 10 +++++----- src/star/processes/unload_genome.nf | 2 +- src/star/star.config | 7 +++---- src/utils/processes/utils.nf | 2 +- workflows/star.nf | 8 ++++---- 9 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/star/main.nf b/src/star/main.nf index eafd9381..cb129d74 100644 --- a/src/star/main.nf +++ b/src/star/main.nf @@ -22,9 +22,9 @@ include { workflow star { main: - SC__STAR__LOAD_GENOME( file(params.sc.star.map_count.transcriptome) ) - SC__STAR__MAP_COUNT( file(params.sc.star.map_count.transcriptome), SC__STAR__LOAD_GENOME.out, path(params.sc.star.map_count.fastqs) ) - SC__STAR__UNLOAD_GENOME( file(params.sc.star.map_count.transcriptome), SC__STAR__MAP_COUNT.out[0] ) + SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.transcriptome) ) + SC__STAR__MAP_COUNT( file(params.tools.star.map_count.transcriptome), SC__STAR__LOAD_GENOME.out, path(params.tools.star.map_count.fastqs) ) + SC__STAR__UNLOAD_GENOME( file(params.tools.star.map_count.transcriptome), SC__STAR__MAP_COUNT.out[0] ) emit: SC__STAR__MAP_COUNT.out diff --git a/src/star/processes/build_genome.nf b/src/star/processes/build_genome.nf index 273ee58b..eb4532ce 100644 --- a/src/star/processes/build_genome.nf +++ b/src/star/processes/build_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__BUILD_INDEX { - container params.sc.star.container + container params.tools.star.container label 'compute_resources__star_build_genome' input: @@ -13,7 +13,7 @@ process SC__STAR__BUILD_INDEX { file("STAR_index") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.star.build_genome) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star.build_genome) processParams = sampleParams.local """ mkdir STAR_index diff --git a/src/star/processes/load_genome.nf b/src/star/processes/load_genome.nf index adc794a7..2b78a38b 100644 --- a/src/star/processes/load_genome.nf +++ b/src/star/processes/load_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__LOAD_GENOME { - container params.sc.star.container + container params.tools.star.container label 'compute_resources__default' input: diff --git a/src/star/processes/map_count.nf b/src/star/processes/map_count.nf index 1fac3f90..f3a3edc1 100644 --- a/src/star/processes/map_count.nf +++ b/src/star/processes/map_count.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__MAP_COUNT { - container params.sc.star.container + container params.tools.star.container label 'compute_resources__star_map_count' input: @@ -16,7 +16,7 @@ process SC__STAR__MAP_COUNT { tuple val(sample), path("*.STAR_Aligned.sortedByCoord.out.bam"), emit: bam script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.star.map_count) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star.map_count) processParams = sampleParams.local success = true """ diff --git a/src/star/processes/solo_map_count.nf b/src/star/processes/solo_map_count.nf index b839329e..2b76d67d 100644 --- a/src/star/processes/solo_map_count.nf +++ b/src/star/processes/solo_map_count.nf @@ -1,7 +1,7 @@ nextflow.enable.dsl=2 process SC__STAR__SOLO_MAP_COUNT { - container params.sc.star.container + container params.tools.star.container label 'compute_resources__star_map_count' input: @@ -24,10 +24,10 @@ process SC__STAR__SOLO_MAP_COUNT { --soloType Droplet \ --genomeDir ${transcriptome} \ --runThreadN ${task.cpus} \ - ${(params.sc.star.map_count.containsKey('limitBAMsortRAM')) ? '--limitBAMsortRAM ' + params.sc.star.map_count.limitBAMsortRAM: ''} \ - ${(params.sc.star.map_count.containsKey('outSAMtype')) ? '--outSAMtype ' + params.sc.star.map_count.outSAMtype: ''} \ - ${(params.sc.star.map_count.containsKey('quantMode')) ? '--quantMode ' + params.sc.star.map_count.quantMode: ''} \ - ${(params.sc.star.map_count.containsKey('outReadsUnmapped')) ? '--outReadsUnmapped ' + params.sc.star.map_count.outReadsUnmapped: ''} \ + ${(params.tools.star.map_count.containsKey('limitBAMsortRAM')) ? '--limitBAMsortRAM ' + params.tools.star.map_count.limitBAMsortRAM: ''} \ + ${(params.tools.star.map_count.containsKey('outSAMtype')) ? '--outSAMtype ' + params.tools.star.map_count.outSAMtype: ''} \ + ${(params.tools.star.map_count.containsKey('quantMode')) ? '--quantMode ' + params.tools.star.map_count.quantMode: ''} \ + ${(params.tools.star.map_count.containsKey('outReadsUnmapped')) ? '--outReadsUnmapped ' + params.tools.star.map_count.outReadsUnmapped: ''} \ --readFilesIn ${fastqs} \ ${(fastqs.name.endsWith(".gz")) ? '--readFilesCommand zcat' : ''} \ --outFileNamePrefix ${_sampleName} diff --git a/src/star/processes/unload_genome.nf b/src/star/processes/unload_genome.nf index 77d5a429..afae2bd3 100644 --- a/src/star/processes/unload_genome.nf +++ b/src/star/processes/unload_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__UNLOAD_GENOME { - container params.sc.star.container + container params.tools.star.container label 'compute_resources__default' input: diff --git a/src/star/star.config b/src/star/star.config index 72ae8f45..1fb5743b 100644 --- a/src/star/star.config +++ b/src/star/star.config @@ -1,9 +1,8 @@ params { - - sc { + tools { star { version = '2.7.1a' - container = "/ddn1/vol1/staging/leuven/res_00001/software/STAR/${params.sc.star.version}/STAR_${params.sc.star.version}.sif" + container = "/ddn1/vol1/staging/leuven/res_00001/software/STAR/${params.tools.star.version}/STAR_${params.tools.star.version}.sif" build_genome { runMode = 'genomeGenerate' @@ -13,7 +12,7 @@ params { map_count { fastqs = "/ddn1/vol1/staging/leuven/stg_00003/cbd-bioinf/CBD__PVDH__Leila_Ryohei__Human_Neuronal_Maturation/NextSeq500_20190926/2MX_SmartSeq_A*/01.CLEAN_FASTQ/*_R1.clean.fastq.gz" - index = "/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/indexes/STAR/${params.sc.star.version}/" + index = "/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/indexes/STAR/${params.tools.star.version}/" runThreadN = 8 limitBAMsortRAM = 50000000000 outSAMtype = 'BAM SortedByCoordinate' diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 594ca308..20061297 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -369,7 +369,7 @@ process SC__STAR_CONCATENATOR() { path("${params.global.project_name}.SC__STAR_CONCATENATOR.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.star_concatenator) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star_concatenator) processParams = sampleParams.local id = params.global.project_name """ diff --git a/workflows/star.nf b/workflows/star.nf index e4065e5f..6e9cb66b 100644 --- a/workflows/star.nf +++ b/workflows/star.nf @@ -29,14 +29,14 @@ include { workflow star { main: - SC__STAR__LOAD_GENOME( file(params.sc.star.map_count.index) ) + SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.index) ) SC__STAR__MAP_COUNT( - file(params.sc.star.map_count.index), + file(params.tools.star.map_count.index), SC__STAR__LOAD_GENOME.out, - getSingleEndChannel(params.sc.star.map_count.fastqs) + getSingleEndChannel(params.tools.star.map_count.fastqs) ) SC__STAR__UNLOAD_GENOME( - file(params.sc.star.map_count.index), + file(params.tools.star.map_count.index), SC__STAR__MAP_COUNT.out.isDone.collect() ) SC__STAR_CONCATENATOR( SC__STAR__MAP_COUNT.out.counts.map { it[1] }.collect() ) From cb30bbba6bd37d4cfb31f6e02c0b6bcb8556e438 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 15:18:19 +0100 Subject: [PATCH 159/202] Migrate to params.tools for cellranger-(atac) tools --- main_atac.nf | 8 ++++---- src/cellranger-atac/processes/count.nf | 4 ++-- src/cellranger-atac/processes/mkfastq.nf | 2 +- src/cellranger/processes/count.nf | 2 +- src/cellranger/processes/mkfastq.nf | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/main_atac.nf b/main_atac.nf index 9eb940a0..6a9fead7 100644 --- a/main_atac.nf +++ b/main_atac.nf @@ -29,9 +29,9 @@ workflow cellranger_atac { } from './src/cellranger-atac/main.nf' params(params) CELLRANGER_ATAC( - file(params.sc.cellranger_atac.mkfastq.csv), - file(params.sc.cellranger_atac.mkfastq.runFolder), - file(params.sc.cellranger_atac.count.reference) + file(params.tools.cellranger_atac.mkfastq.csv), + file(params.tools.cellranger_atac.mkfastq.runFolder), + file(params.tools.cellranger_atac.count.reference) ) } @@ -111,7 +111,7 @@ workflow atac_preprocess_freemuxlet { freemuxlet as FREEMUXLET; } from './workflows/popscle' params(params) - ATAC_PREPROCESS_WITH_METADATA(file(params.sc.atac.preprocess.metadata)) + ATAC_PREPROCESS_WITH_METADATA(file(params.tools.atac.preprocess.metadata)) FREEMUXLET(ATAC_PREPROCESS_WITH_METADATA.out.bam) } diff --git a/src/cellranger-atac/processes/count.nf b/src/cellranger-atac/processes/count.nf index 7d28ffd5..0388b540 100644 --- a/src/cellranger-atac/processes/count.nf +++ b/src/cellranger-atac/processes/count.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.sc.cellranger_atac +toolParams = params.tools.cellranger_atac def runCellRangerAtacCount = { id, @@ -46,7 +46,7 @@ process SC__CELLRANGER_ATAC__COUNT { def sampleParams = params.parseConfig(sampleId, params.global, toolParams.count) processParams = sampleParams.local if(processParams.sample == '') { - throw new Exception("Regards params.sc.cellranger_atac.count: sample parameter cannot be empty") + throw new Exception("Regards params.tools.cellranger_atac.count: sample parameter cannot be empty") } runCellRangerAtacCount( sampleId, diff --git a/src/cellranger-atac/processes/mkfastq.nf b/src/cellranger-atac/processes/mkfastq.nf index 3694039a..8dc49330 100644 --- a/src/cellranger-atac/processes/mkfastq.nf +++ b/src/cellranger-atac/processes/mkfastq.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.sc.cellranger_atac +toolParams = params.tools.cellranger_atac process SC__CELLRANGER_ATAC__MKFASTQ { diff --git a/src/cellranger/processes/count.nf b/src/cellranger/processes/count.nf index c30b22d1..ba352b3f 100644 --- a/src/cellranger/processes/count.nf +++ b/src/cellranger/processes/count.nf @@ -4,7 +4,7 @@ include { isParamNull; } from './../../utils/processes/utils.nf' params(params) -toolParams = params.sc.cellranger +toolParams = params.tools.cellranger def generateCellRangerCountCommandDefaults = { diff --git a/src/cellranger/processes/mkfastq.nf b/src/cellranger/processes/mkfastq.nf index 85e23da4..cd9065f4 100644 --- a/src/cellranger/processes/mkfastq.nf +++ b/src/cellranger/processes/mkfastq.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.sc.cellranger +toolParams = params.tools.cellranger process SC__CELLRANGER__MKFASTQ { From c59656edae3aee0585d5388e34a2554f9e226a5d Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 15:18:42 +0100 Subject: [PATCH 160/202] Migrate to params.tools for utils and docs --- conf/generic.config | 25 ++++++++++++++++++ docs/development.rst | 4 +-- docs/features.rst | 14 +++++----- docs/getting-started.rst | 4 +-- docs/input_formats.rst | 2 +- docs/pipelines.rst | 6 ++--- main.nf | 22 ++++++++-------- src/celda/conf/decontx_filter.config | 4 +-- src/celda/main.nf | 2 +- src/channels/channels.nf | 4 +-- src/scanpy/bin/cluster/sc_clustering.py | 2 +- src/scanpy/workflows/single_sample.nf | 2 +- src/scenic/README.md | 2 +- src/scenic/processes/multiruns/saveToLoom.nf | 8 +++--- src/scrublet/scrublet.config | 8 +++--- src/soupx/main.nf | 2 +- src/utils/bin/sc_file_converter.py | 2 +- .../bin/sc_h5ad_annotate_by_cell_metadata.py | 2 +- src/utils/conf/base.config | 2 +- src/utils/conf/cell_annotate.config | 2 +- src/utils/conf/cell_filter.config | 2 +- src/utils/conf/h5ad_clean.config | 2 +- src/utils/conf/h5ad_concatenate.config | 2 +- src/utils/conf/sample_annotate.config | 2 +- src/utils/conf/sample_annotate_old_v1.config | 2 +- src/utils/conf/scope.config | 2 +- src/utils/conf/star_concatenate.config | 2 +- src/utils/conf/test.config | 4 ++- src/utils/main.test.nf | 12 ++++----- src/utils/processes/h5adAnnotate.nf | 8 +++--- src/utils/processes/h5adExtractMetadata.nf | 2 +- src/utils/processes/h5adSubset.nf | 6 ++--- src/utils/processes/h5adToLoom.nf | 17 ++++++------ src/utils/processes/h5adUpdate.nf | 2 +- src/utils/processes/h5adUpdateMetadata.nf | 2 +- src/utils/processes/utils.nf | 26 +++++++++---------- src/utils/workflows/annotateByCellMetadata.nf | 4 +-- src/utils/workflows/filterAnnotateClean.nf | 12 ++++----- src/utils/workflows/filterByCellMetadata.nf | 4 +-- workflows/bbknn.nf | 6 ++--- workflows/harmony.nf | 6 ++--- workflows/mnncorrect.nf | 6 ++--- workflows/multi_sample.nf | 6 ++--- workflows/nemesh.nf | 8 +++--- workflows/single_sample.nf | 2 +- workflows/single_sample_star.nf | 4 +-- 46 files changed, 149 insertions(+), 121 deletions(-) diff --git a/conf/generic.config b/conf/generic.config index a8e2d188..707df5e0 100644 --- a/conf/generic.config +++ b/conf/generic.config @@ -1,4 +1,9 @@ +import static groovy.json.JsonOutput.* + params { + breakPrettyPrintMap = { p -> + throw new Exception(prettyPrint(toJson(p))) + } // This closure facilitates the usage of sample specific parameters parseConfig = { sample, paramsGlobal, paramsLocal -> def lv = { a,b -> return org.codehaus.groovy.runtime.MethodRankHelper.delDistance(a, b) } @@ -61,4 +66,24 @@ params { } return false } + hasUtilsParams = { utilityKey -> + if(params.utils.containsKey(utilityKey)) { + return true + } + // backward-compatible + if(params.containsKey("sc")) { + return params.sc.containsKey(utilityKey) + } + return false + } + getUtilsParams = { utilityKey -> + if(params.utils.containsKey(utilityKey)) { + return params.utils[utilityKey] + } + // backward-compatible + if(params.containsKey("sc")) { + return params.sc[utilityKey] + } + throw new Exception("Cannot find utility " + utilityKey + " in Nextflow config.") + } } diff --git a/docs/development.rst b/docs/development.rst index 8a30e11d..54c99ea9 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -493,7 +493,7 @@ Steps: if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.sc.containsKey("file_concatenator")) { + if(params.getUtilsParams("file_concatenator")) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -527,7 +527,7 @@ Steps: // Conversion // Convert h5ad to X (here we choose: loom format) - if(params.sc.containsKey("file_concatenator")) { + if(params.hasUtilsParams("file_concatenator")) { filteredloom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONCATENATOR.out ) scopeloom = FILE_CONVERTER( BEC_HARMONY.out.data.groupTuple(), diff --git a/docs/features.rst b/docs/features.rst index 9ddbfae3..326114a1 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -8,7 +8,7 @@ Typically, cell- and gene-level filtering is one of the first steps performed in This usually results in the pipeline being run in two passes. In the **first pass**, the default filters are applied (which are probably not valid for new datasets), and a separate QC report is generated for each sample. These QC reports can be inspected and the filters can be adjusted in the config file -either for all samples (by editing the ``params.sc.scanpy.filter`` settings directly, or for individual samples by using the strategy described in multi-sample parameters. +either for all samples (by editing the ``params.tools.scanpy.filter`` settings directly, or for individual samples by using the strategy described in multi-sample parameters. Then, the **second pass** restarts the pipeline with the correct filtering parameters applied (use ``nextflow run ... -resume`` to skip already completed steps). Other notes @@ -172,7 +172,7 @@ The ``utils_cell_annotate`` profile is adding the following part to the config: } } -Two methods (``params.sc.cell_annotate.method``) are available: +Two methods (``params.utils.cell_annotate.method``) are available: - ``aio`` - ``obo`` @@ -274,7 +274,7 @@ The ``utils_cell_filter`` profile is required when generating the config file. T Part of an end-to-end pipeline ****************************** -For more detailed information about the parameters to set in ``params.sc.cell_filter``, please check the `cell_filter parameter details `_ section below. +For more detailed information about the parameters to set in ``params.utils.cell_filter``, please check the `cell_filter parameter details `_ section below. As an independent workflow ************************** @@ -287,7 +287,7 @@ Please check the `cell_filter`_ workflow or `cell_annotate_filter`_ workflow to Parameters of cell_filter ************************* -Two methods (``params.sc.cell_filter.method``) are available: +Two methods (``params.utils.cell_filter.method``) are available: - ``internal`` - ``external`` @@ -377,7 +377,7 @@ If you want to apply custom parameters for some specific samples and have a "gen } } -Using this config, the parameter ``params.sc.scanpy.cellFilterMinNGenes`` will be applied with a threshold value of ``600`` to ``1k_pbmc_v2_chemistry``. The rest of the samples will use the value ``800`` to filter the cells having less than that number of genes. +Using this config, the parameter ``params.tools.scanpy.cellFilterMinNGenes`` will be applied with a threshold value of ``600`` to ``1k_pbmc_v2_chemistry``. The rest of the samples will use the value ``800`` to filter the cells having less than that number of genes. This strategy can be applied to any other parameter of the config. @@ -386,7 +386,7 @@ Parameter exploration Since ``v0.9.0``, it is possible to explore several combinations of parameters. The latest version of the VSN-Pipelines allows to explore the following parameters: -- ``params.sc.scanpy.clustering`` +- ``params.tools.scanpy.clustering`` - ``method`` :: @@ -399,7 +399,7 @@ Since ``v0.9.0``, it is possible to explore several combinations of parameters. Select default clustering ************************* -In case the parameter exploration mode is used within the ``params.sc.scanpy.clustering`` parameter, it will generated a range of different clusterings. +In case the parameter exploration mode is used within the ``params.tools.scanpy.clustering`` parameter, it will generated a range of different clusterings. For non-expert, it's often difficult to know which clustering to pick. It's however possible to use the ``DIRECTS`` module in order to select a default clustering. In order, to use this automated clustering selection method, add the ``directs`` profile when generating the main config using ``nextflow config``. The config will get populated with: diff --git a/docs/getting-started.rst b/docs/getting-started.rst index b24e94bf..1244f4e3 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -166,8 +166,8 @@ The tool-specific parameters, as well as Docker/Singularity profiles, are includ * ``params.global.project_name``: a project name which will be included in some of the output file names. * ``params.data.tenx.cellranger_mex``, which should point to the ``outs/`` folder generated by Cell Ranger (if using 10x data). See :ref:`Information on using 10x Genomics datasets` for additional info. - * Filtering parameters (``params.sc.scanpy.filter``): filtering parameters, which will be applied to all samples, can be set here: min/max genes, mitochondrial read fraction, and min cells. See :ref:`Multi-sample parameters` for additional info on how to specify sample-specific parameters. - * Louvain cluster resolution: ``params.sc.scanpy.clustering.resolution``. + * Filtering parameters (``params.tools.scanpy.filter``): filtering parameters, which will be applied to all samples, can be set here: min/max genes, mitochondrial read fraction, and min cells. See :ref:`Multi-sample parameters` for additional info on how to specify sample-specific parameters. + * Louvain cluster resolution: ``params.tools.scanpy.clustering.resolution``. * :ref:`Cell-` and :ref:`sample-` level annotations are also possible. diff --git a/docs/input_formats.rst b/docs/input_formats.rst index e6419850..3c765072 100644 --- a/docs/input_formats.rst +++ b/docs/input_formats.rst @@ -112,7 +112,7 @@ or will recursively find all 10x samples in that directory. -The pipeline will use either the ``outs/filtered_feature_bc_matrix/`` or the ``outs/raw_feature_bc_matrix/`` depending on the setting of the ``params.sc.file_converter.useFilteredMatrix`` (``true`` uses filtered; ``false`` uses raw). +The pipeline will use either the ``outs/filtered_feature_bc_matrix/`` or the ``outs/raw_feature_bc_matrix/`` depending on the setting of the ``params.utils.file_converter.useFilteredMatrix`` (``true`` uses filtered; ``false`` uses raw). ---- diff --git a/docs/pipelines.rst b/docs/pipelines.rst index 3d04991a..d176db48 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -211,9 +211,9 @@ Simply adding `-profile scenic_multiruns` during the config step will activate t Runs the ``cellranger`` workflow (``makefastq``, then ``count``). Input parameters are specified within the config file: -* ``params.sc.cellranger.mkfastq.csv``: path to the CSV samplesheet -* ``params.sc.cellranger.mkfastq.runFolder``: path of Illumina BCL run folder -* ``params.sc.cellranger.count.transcriptome``: path to the Cell Ranger compatible transcriptome reference +* ``params.tools.cellranger.mkfastq.csv``: path to the CSV samplesheet +* ``params.tools.cellranger.mkfastq.runFolder``: path of Illumina BCL run folder +* ``params.tools.cellranger.count.transcriptome``: path to the Cell Ranger compatible transcriptome reference **cellranger_count_metadata** ----------------------------- diff --git a/main.nf b/main.nf index 6f262028..648da558 100644 --- a/main.nf +++ b/main.nf @@ -1195,15 +1195,15 @@ workflow _cell_annotate_filter { getDataChannel | \ SC__FILE_CONVERTER - if(!params.sc.containsKey("cell_annotate")) - throw new Exception("VSN ERROR: The cell_annotate param is missing in params.sc.") + if(!params.hasUtilsParams("cell_annotate")) + throw new Exception("VSN ERROR: The cell_annotate param is missing in params.utils.") // Annotate & publish ANNOTATE_BY_CELL_METADATA( SC__FILE_CONVERTER.out, null, ) - if(params.sc.cell_annotate.containsKey("publish") && params.sc.cell_annotate.publish) { + if(params.getUtilsParams("cell_annotate").containsKey("publish") && params.getUtilsParams("cell_annotate").publish) { PUBLISH_H5AD_CELL_ANNOTATED( ANNOTATE_BY_CELL_METADATA.out, "ANNOTATE_BY_CELL_METADATA", @@ -1213,8 +1213,8 @@ workflow _cell_annotate_filter { ) } - if(!params.sc.containsKey("cell_filter")) - throw new Exception("VSN ERROR: The cell_filter param is missing in params.sc.") + if(!params.hasUtilsParams("cell_filter")) + throw new Exception("VSN ERROR: The cell_filter param is missing in params.utils.") // Filter (& clean) & publish FILTER_BY_CELL_METADATA( @@ -1222,7 +1222,7 @@ workflow _cell_annotate_filter { null ) - if(params.getToolParams("cell_filter").containsKey("publish") && params.getToolParams("cell_filter").publish) { + if(params.getUtilsParams("cell_filter")?.publish) { PUBLISH_H5AD_CELL_FILTERED( FILTER_BY_CELL_METADATA.out, "FILTER_BY_CELL_METADATA", @@ -1231,7 +1231,7 @@ workflow _cell_annotate_filter { false ) } - if(params.utils.containsKey("publish") && publish) { + if(params.hasUtilsParams("publish") && publish) { PUBLISH_H5AD_CELL_FILTERED( FILTER_BY_CELL_METADATA.out, "CELL_ANNOTATE_FILTER", @@ -1265,15 +1265,15 @@ workflow cell_annotate_filter_and_sample_annotate { out = _cell_annotate_filter(false) // Annotate cells based on an indexed sample-based metadata table - if(!params.sc.containsKey("sample_annotate")) - throw new Exception("VSN ERROR: The sample_annotate param is missing in params.sc.") + if(!params.hasUtilsParams("sample_annotate")) + throw new Exception("VSN ERROR: The sample_annotate param is missing in params.utils.") - if (!hasMetadataFilePath(params.sc.sample_annotate)) { + if (!hasMetadataFilePath(params.getUtilsParams("sample_annotate"))) { throw new Exception("VSN ERROR: The metadataFilePath param is missing in sample_annotate.") } out = SC__ANNOTATE_BY_SAMPLE_METADATA( out ) - if(params.sc.file_cleaner) { + if(params.getUtilsParams("file_cleaner")) { out = SC__H5AD_BEAUTIFY( out ) } diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 55e07ecd..68e65fb4 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,8 +1,8 @@ includeConfig '../../utils/conf/cell_annotate.config' -params.tools.celda.decontx.cell_annotate = params.tools.cell_annotate +params.tools.celda.decontx.cell_annotate = params.utils.cell_annotate params.tools.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' -params.tools.celda.decontx.cell_filter = params.tools.cell_filter +params.tools.celda.decontx.cell_filter = params.utils.cell_filter params.tools.remove('cell_filter') includeConfig './decontx_filter_defaults.config' diff --git a/src/celda/main.nf b/src/celda/main.nf index bc29a224..2caff6db 100644 --- a/src/celda/main.nf +++ b/src/celda/main.nf @@ -40,7 +40,7 @@ workflow decontx { throw new Exception("VSN ERROR: The given strategy in params..celda.decontx is not valid. Choose: filter or correct.") } - if(params.utils.containsKey("publish")) { + if(params.hasUtilsParams("publish")) { PUBLISH( processed, "CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase(), diff --git a/src/channels/channels.nf b/src/channels/channels.nf index 311f73db..70debeda 100644 --- a/src/channels/channels.nf +++ b/src/channels/channels.nf @@ -36,8 +36,8 @@ workflow getDataChannel { } else { // If not dynamically set, we use h5ad by default outputFileFormat = "h5ad" - if(params.getToolParams("file_converter").containsKey("off")) { - outputFileFormat = params.getToolParams("file_converter").off + if(params.getUtilsParams("file_converter").containsKey("off")) { + outputFileFormat = params.getUtilsParams("file_converter").off } } diff --git a/src/scanpy/bin/cluster/sc_clustering.py b/src/scanpy/bin/cluster/sc_clustering.py index efd53750..f224a16d 100755 --- a/src/scanpy/bin/cluster/sc_clustering.py +++ b/src/scanpy/bin/cluster/sc_clustering.py @@ -81,7 +81,7 @@ def check_no_single_cluster(adata, method, resolution): num_clusters = len(np.unique(adata.obs[method])) if num_clusters == 1: - raise Exception(f"Single cluster found when running clustering algorithm {method} with resolution {resolution}. Please remove this one from params.sc.scanpy.clustering.resolutions.") + raise Exception(f"Single cluster found when running clustering algorithm {method} with resolution {resolution}. Please remove this one from params.tools.scanpy.clustering.resolutions.") # # Clustering the data diff --git a/src/scanpy/workflows/single_sample.nf b/src/scanpy/workflows/single_sample.nf index 0d1910ba..982db235 100644 --- a/src/scanpy/workflows/single_sample.nf +++ b/src/scanpy/workflows/single_sample.nf @@ -92,7 +92,7 @@ workflow SINGLE_SAMPLE { // Reporting samples = data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) ) ipynbs = COMBINE_REPORTS( diff --git a/src/scenic/README.md b/src/scenic/README.md index b51cda1b..40b7f0ab 100644 --- a/src/scenic/README.md +++ b/src/scenic/README.md @@ -35,7 +35,7 @@ Make sure the following parameters are correctly set: - `params.tools.scenic.cistarget.tracksAnnotation` if commented, track-based cisTarget won't run - `params.tools.scenic.numRuns` if running SCENIC in multi-runs mode - `singularity.runOptions` Specify the paths to mount -- `params.sc.scope.tree` +- `params.utils.scope.tree` Additionally, you can update the other paraemeters for the different steps. diff --git a/src/scenic/processes/multiruns/saveToLoom.nf b/src/scenic/processes/multiruns/saveToLoom.nf index 9bb4f976..2f26e29a 100644 --- a/src/scenic/processes/multiruns/saveToLoom.nf +++ b/src/scenic/processes/multiruns/saveToLoom.nf @@ -30,10 +30,10 @@ process SAVE_MULTI_RUNS_TO_LOOM { --cell-id-attribute ${toolParams.cell_id_attribute} \ --gene-attribute ${toolParams.gene_attribute} \ --title "${sampleId} - pySCENIC (${type})" \ - --nomenclature "${params.sc.scope.genome}" \ - --scope-tree-level-1 "${params.sc.scope.tree.level_1}" \ - --scope-tree-level-2 "${params.sc.scope.tree.level_2}" \ - --scope-tree-level-3 "${params.sc.scope.tree.level_3}" + --nomenclature "${params.getUtilsParams('scope').genome}" \ + --scope-tree-level-1 "${params.getUtilsParams('scope').tree.level_1}" \ + --scope-tree-level-2 "${params.getUtilsParams('scope').tree.level_2}" \ + --scope-tree-level-3 "${params.getUtilsParams('scope').tree.level_3}" """ } diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index da749425..5665e399 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.tools.scrublet.cell_annotate = params.tools.cell_annotate -params.sc.remove('cell_annotate') +params.tools.scrublet.cell_annotate = params.utils.cell_annotate +params.tools.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.tools.scrublet.cell_filter = params.tools.cell_filter -params.sc.remove('cell_filter') +params.tools.scrublet.cell_filter = params.utils.cell_filter +params.tools.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' diff --git a/src/soupx/main.nf b/src/soupx/main.nf index d46da119..0eb5d4ae 100644 --- a/src/soupx/main.nf +++ b/src/soupx/main.nf @@ -35,7 +35,7 @@ workflow soupx { processed = SOUPX_CORRECT( data ) - if(params.utils.containsKey("publish")) { + if(params.hasUtilsParams("publish")) { PUBLISH( processed, "SOUPX_CORRECT", diff --git a/src/utils/bin/sc_file_converter.py b/src/utils/bin/sc_file_converter.py index 00c45091..f9021902 100755 --- a/src/utils/bin/sc_file_converter.py +++ b/src/utils/bin/sc_file_converter.py @@ -202,7 +202,7 @@ def update_var(adata, args): adata.var.index = adata.var.index.astype(str) # Check if var index is unique if len(np.unique(adata.var.index)) < len(adata.var.index) and not args.make_var_index_unique: - raise Exception("VSN ERROR: AnnData var index is not unique. This can be fixed by making it unique. To do so update the following param 'makeVarIndexUnique = true' (under params.sc.sc_file_converter) in your config.") + raise Exception("VSN ERROR: AnnData var index is not unique. This can be fixed by making it unique. To do so update the following param 'makeVarIndexUnique = true' (under params.utils.sc_file_converter) in your config.") if len(np.unique(adata.var.index)) < len(adata.var.index) and args.make_var_index_unique: adata.var_names_make_unique() print("Making AnnData var index unique...") diff --git a/src/utils/bin/sc_h5ad_annotate_by_cell_metadata.py b/src/utils/bin/sc_h5ad_annotate_by_cell_metadata.py index b9fe75ff..346b8cbd 100755 --- a/src/utils/bin/sc_h5ad_annotate_by_cell_metadata.py +++ b/src/utils/bin/sc_h5ad_annotate_by_cell_metadata.py @@ -160,7 +160,7 @@ num_matching_cells = np.sum(np.isin(adata.obs.index, metadata_subset.index)) if num_matching_cells != len(adata.obs): - raise Exception(f"VSN ERROR: Dimensions mismatch between {args.input.name} and {args.cell_meta_data_file_paths[0].name}: expected {len(adata.obs)} but got {num_matching_cells} cells matching. Make sur all cells from metadata file can be found in the data and/or make sure the sample IDs inferred from the data files (e.g.: {args.sample_id}) exist in the column {args.sample_column_name} of the following metadata file ({args.cell_meta_data_file_paths[0].name}) you provided in params.sc.cell_annotate.cellMetaDataFilePath.") + raise Exception(f"VSN ERROR: Dimensions mismatch between {args.input.name} and {args.cell_meta_data_file_paths[0].name}: expected {len(adata.obs)} but got {num_matching_cells} cells matching. Make sur all cells from metadata file can be found in the data and/or make sure the sample IDs inferred from the data files (e.g.: {args.sample_id}) exist in the column {args.sample_column_name} of the following metadata file ({args.cell_meta_data_file_paths[0].name}) you provided in params.utils.cell_annotate.cellMetaDataFilePath.") # Annotate adata.obs[annotation_column_name] = None diff --git a/src/utils/conf/base.config b/src/utils/conf/base.config index 7f872648..11f060bf 100644 --- a/src/utils/conf/base.config +++ b/src/utils/conf/base.config @@ -8,7 +8,7 @@ params { mode = 'link' } } - sc { + tools { file_converter { off = 'h5ad' tagCellWithSampleId = true diff --git a/src/utils/conf/cell_annotate.config b/src/utils/conf/cell_annotate.config index 6add0dc0..089eade3 100644 --- a/src/utils/conf/cell_annotate.config +++ b/src/utils/conf/cell_annotate.config @@ -1,5 +1,5 @@ params { - sc { + utils { cell_annotate { off = 'h5ad' method = 'obo' // or 'aio' diff --git a/src/utils/conf/cell_filter.config b/src/utils/conf/cell_filter.config index 9924f4c7..92bce446 100644 --- a/src/utils/conf/cell_filter.config +++ b/src/utils/conf/cell_filter.config @@ -1,5 +1,5 @@ params { - sc { + utils { cell_filter { off = 'h5ad' method = 'internal' // or 'external' (requires the following additional params cellMetaDataFilePath, sampleColumnName, indexColumnName) diff --git a/src/utils/conf/h5ad_clean.config b/src/utils/conf/h5ad_clean.config index 8d32aa26..f63827a2 100644 --- a/src/utils/conf/h5ad_clean.config +++ b/src/utils/conf/h5ad_clean.config @@ -1,5 +1,5 @@ params { - sc { + utils { file_cleaner { obsColumnMapper = [] obsColumnValueMapper = [] diff --git a/src/utils/conf/h5ad_concatenate.config b/src/utils/conf/h5ad_concatenate.config index aeb602ba..ec3fe291 100644 --- a/src/utils/conf/h5ad_concatenate.config +++ b/src/utils/conf/h5ad_concatenate.config @@ -1,5 +1,5 @@ params { - sc { + utils { file_concatenator { join = 'outer' off = 'h5ad' diff --git a/src/utils/conf/sample_annotate.config b/src/utils/conf/sample_annotate.config index 57697cce..cc69fcc3 100644 --- a/src/utils/conf/sample_annotate.config +++ b/src/utils/conf/sample_annotate.config @@ -1,5 +1,5 @@ params { - sc { + utils { sample_annotate { off = 'h5ad' by { diff --git a/src/utils/conf/sample_annotate_old_v1.config b/src/utils/conf/sample_annotate_old_v1.config index 2ee74b6a..a427f397 100644 --- a/src/utils/conf/sample_annotate_old_v1.config +++ b/src/utils/conf/sample_annotate_old_v1.config @@ -1,5 +1,5 @@ params { - sc { + utils { sample_annotate_v1 { iff = '10x_cellranger_mex' off = 'h5ad' diff --git a/src/utils/conf/scope.config b/src/utils/conf/scope.config index d4c59376..ac9f6edd 100644 --- a/src/utils/conf/scope.config +++ b/src/utils/conf/scope.config @@ -1,5 +1,5 @@ params { - sc { + tools { scope { genome = '' tree { diff --git a/src/utils/conf/star_concatenate.config b/src/utils/conf/star_concatenate.config index b75585e5..aaa357b4 100644 --- a/src/utils/conf/star_concatenate.config +++ b/src/utils/conf/star_concatenate.config @@ -1,5 +1,5 @@ params { - sc { + utils { star_concatenator { stranded = 'no' off = 'tsv' diff --git a/src/utils/conf/test.config b/src/utils/conf/test.config index 5b2ac5a5..4a3b63cc 100644 --- a/src/utils/conf/test.config +++ b/src/utils/conf/test.config @@ -1,8 +1,10 @@ params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' } + } + utils { file_converter { iff = '10x_cellranger_mex' off = 'h5ad' diff --git a/src/utils/main.test.nf b/src/utils/main.test.nf index 92159c70..0b93856a 100644 --- a/src/utils/main.test.nf +++ b/src/utils/main.test.nf @@ -76,7 +76,7 @@ workflow { SC__ANNOTATE_BY_SAMPLE_METADATA; } from './processes/h5adAnnotate' params(params) // Run - if(params.sc.sample_annotate) { + if(params.hasUtilsParams("sample_annotate")) { getDataChannel | \ SC__FILE_CONVERTER | \ SC__ANNOTATE_BY_SAMPLE_METADATA @@ -90,7 +90,7 @@ workflow { STATIC__ANNOTATE_BY_CELL_METADATA as ANNOTATE_BY_CELL_METADATA; } from './workflows/annotateByCellMetadata' params(params) // Run - if(params.sc.cell_annotate) { + if(params.hasUtilsParams("cell_annotate")) { getDataChannel | \ SC__FILE_CONVERTER ANNOTATE_BY_CELL_METADATA( @@ -107,7 +107,7 @@ workflow { FILTER_BY_CELL_METADATA; } from './workflows/filterByCellMetadata' params(params) // Run - if(params.sc.cell_filter) { + if(params.hasUtilsParams("cell_filter")) { getDataChannel | \ SC__FILE_CONVERTER FILTER_BY_CELL_METADATA( @@ -125,7 +125,7 @@ workflow { FILTER_BY_CELL_METADATA; } from './workflows/filterByCellMetadata' params(params) // Run - if(params.sc.cell_annotate) { + if(params.hasUtilsParams("cell_annotate")) { getDataChannel | \ SC__FILE_CONVERTER @@ -149,7 +149,7 @@ workflow { } from './processes/sra' params(params) // Run sra = getSRAChannel( params.data.sra ) - db = file(params.utils.sra_metadata.sraDbOutDir + "/SRAmetadb.sqlite") + db = file(params.getUtilsParams("sra_metadata").sraDbOutDir + "/SRAmetadb.sqlite") SRA_TO_METADATA( sra, db ) break; case "GET_METADATA_FROM_SRA_WEB": @@ -221,7 +221,7 @@ workflow { SC__H5AD_BEAUTIFY; } from './processes/h5adUpdate' params(params) // Run - if(params.sc.file_cleaner) { + if(params.hasUtilsParams("file_cleaner")) { getDataChannel | \ map { it -> tuple(it[0], it[1], null) diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf index 324f8661..bd718f0e 100644 --- a/src/utils/processes/h5adAnnotate.nf +++ b/src/utils/processes/h5adAnnotate.nf @@ -44,7 +44,7 @@ process SC__ANNOTATE_BY_CELL_METADATA { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.sc.cell_annotate : params.getToolParams(tool)["cell_annotate"] + isParamNull(tool) ? params.getUtilsParams("cell_annotate") : params.getToolParams(tool)["cell_annotate"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -97,7 +97,7 @@ process SC__ANNOTATE_BY_SAMPLE_METADATA { path("${sampleId}.SC__ANNOTATE_BY_SAMPLE_METADATA.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.sample_annotate) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("sample_annotate")) processParams = sampleParams.local // method / type param @@ -131,12 +131,12 @@ process SC__ANNOTATE_BY_SAMPLE_METADATA { sampleColumnName = '' if(processParams.containsKey("by")) { if(!processParams.by.containsKey("sampleColumnName")) { - throw new Exception("VSN ERROR: Missing sampleColumnName param in params.sc.sample_annotate.by.") + throw new Exception("VSN ERROR: Missing sampleColumnName param in params.utils.sample_annotate.by.") } sampleColumnName = processParams.by.sampleColumnName } else { if(!processParams.containsKey("sampleColumnName")) { - throw new Exception("VSN ERROR: Missing sampleColumnName param in params.sc.sample_annotate.") + throw new Exception("VSN ERROR: Missing sampleColumnName param in params.utils.sample_annotate.") } // make it backward compatible (see sample_annotate_old_v1.config) sampleColumnName = processParams.sampleColumnName diff --git a/src/utils/processes/h5adExtractMetadata.nf b/src/utils/processes/h5adExtractMetadata.nf index cab96a84..eaccb791 100644 --- a/src/utils/processes/h5adExtractMetadata.nf +++ b/src/utils/processes/h5adExtractMetadata.nf @@ -18,7 +18,7 @@ process SC__UTILS__EXTRACT_FEATURE_METADATA { tuple val(sampleId), path("${sampleId}.SC__UTILS__EXTRACT_FEATURE_METADATA.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.utils.extract_feature_metadata) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("extract_feature_metadata")) processParams = sampleParams.local columnNamesAsArguments = processParams.columnNames.collect({ '--column-name' + ' ' + it }).join(' ') """ diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index 5edaabdd..aa2090a8 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -33,7 +33,7 @@ process SC__PREPARE_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.getToolParams("cell_filter") : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.getUtilsParams("cell_filter") : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -43,7 +43,7 @@ process SC__PREPARE_OBS_FILTER { input = f } else if (processParams.method == 'external') { if(!filterConfig.cellMetaDataFilePath) { - throw new Exception("VSN ERROR: A filter in params.sc.cell_filter does not provide a cellMetaDataFilePath entry.") + throw new Exception("VSN ERROR: A filter in params.utils.cell_filter does not provide a cellMetaDataFilePath entry.") } input = filterConfig.cellMetaDataFilePath } else { @@ -91,7 +91,7 @@ process SC__APPLY_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.getToolParams("cell_filter") : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.getUtilsParams("cell_filter") : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf index 2e9a8bb4..77f7049b 100644 --- a/src/utils/processes/h5adToLoom.nf +++ b/src/utils/processes/h5adToLoom.nf @@ -21,18 +21,19 @@ process SC__H5AD_TO_LOOM { path(data) output: - tuple val(sampleId), \ - path("${sampleId}.SC__H5AD_TO_LOOM.loom") + tuple \ + val(sampleId), \ + path("${sampleId}.SC__H5AD_TO_LOOM.loom") script: """ ${binDir}/h5ad_to_loom.py \ - ${(params.sc.containsKey('scope') && params.sc.scope.genome.length() > 0) ? '--nomenclature "' + params.sc.scope.genome + '"' : ''} \ - ${(params.sc.containsKey('scope') && params.sc.scope.tree.level_1.length() > 0 ) ? '--scope-tree-level-1 "' + params.sc.scope.tree.level_1 + '"' : ''} \ - ${(params.sc.containsKey('scope') && params.sc.scope.tree.level_2.length() > 0 ) ? '--scope-tree-level-2 "' + params.sc.scope.tree.level_2 + '"' : ''} \ - ${(params.sc.containsKey('scope') && params.sc.scope.tree.level_3.length() > 0 ) ? '--scope-tree-level-3 "' + params.sc.scope.tree.level_3 + '"' : ''} \ - ${(params.sc.containsKey('scope') && params.sc.scope.containsKey('markers') && params.sc.scope.markers.log_fc_threshold.length() > 0 ) ? '--markers-log-fc-threshold ' + params.sc.scope.markers.log_fc_threshold : ''} \ - ${(params.sc.containsKey('scope') && params.sc.scope.containsKey('markers') && params.sc.scope.markers.fdr_threshold.length() > 0 ) ? '--markers-fdr-threshold ' + params.sc.scope.markers.fdr_threshold : ''} \ + ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').genome.length() > 0) ? '--nomenclature "' + params.getUtilsParams('scope').genome + '"' : ''} \ + ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').tree.level_1.length() > 0 ) ? '--scope-tree-level-1 "' + params.getUtilsParams('scope').tree.level_1 + '"' : ''} \ + ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').tree.level_2.length() > 0 ) ? '--scope-tree-level-2 "' + params.getUtilsParams('scope').tree.level_2 + '"' : ''} \ + ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').tree.level_3.length() > 0 ) ? '--scope-tree-level-3 "' + params.getUtilsParams('scope').tree.level_3 + '"' : ''} \ + ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope')?.markers?.log_fc_threshold) ? '--markers-log-fc-threshold ' + params.getUtilsParams('scope').markers.log_fc_threshold : ''} \ + ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope')?.markers?.fdr_threshold) ? '--markers-fdr-threshold ' + params.getUtilsParams('scope').markers.fdr_threshold : ''} \ $data \ $rawFilteredData \ "${sampleId}.SC__H5AD_TO_LOOM.loom" diff --git a/src/utils/processes/h5adUpdate.nf b/src/utils/processes/h5adUpdate.nf index dd437b15..f2d42bcd 100644 --- a/src/utils/processes/h5adUpdate.nf +++ b/src/utils/processes/h5adUpdate.nf @@ -78,7 +78,7 @@ process SC__H5AD_BEAUTIFY { val(stashedParams) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.sc.file_cleaner) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("file_cleaner")) processParams = sampleParams.local obsColumnsToRemoveAsArgument = processParams.containsKey("obsColumnsToRemove") ? diff --git a/src/utils/processes/h5adUpdateMetadata.nf b/src/utils/processes/h5adUpdateMetadata.nf index 09dc2b0c..d4d4d08a 100644 --- a/src/utils/processes/h5adUpdateMetadata.nf +++ b/src/utils/processes/h5adUpdateMetadata.nf @@ -18,7 +18,7 @@ process SC__UTILS__UPDATE_FEATURE_METADATA_INDEX { tuple val(sampleId), path("${sampleId}.SC__UTILS__UPDATE_FEATURE_METADATA_INDEX.h5ad") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.utils.update_feature_metadata_index) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("update_feature_metadata_index")) processParams = sampleParams.local """ ${binDir}/sc_h5ad_update_metadata.py \ diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 20061297..631db186 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -143,7 +143,7 @@ def runRConverter = { def getConverterContainer = { params, type -> switch(type) { case "cistopic": - return params.sc.cistopic.container + return params.tools.cistopic.container case "r": return "vibsinglecellnf/scconverter:0.0.1" break; @@ -197,7 +197,7 @@ process SC__FILE_CONVERTER { path("${sampleId}.SC__FILE_CONVERTER.${outputExtension}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("file_converter")) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("file_converter")) processParams = sampleParams.local switch(inputDataType) { @@ -305,7 +305,7 @@ process SC__FILE_CONVERTER_FROM_SCE { path("${sampleId}.SC__FILE_CONVERTER_FROM_SCE.${outputDataType}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("file_converter")) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("file_converter")) processParams = sampleParams.local def _outputDataType = outputDataType converterToUse = getConverter( @@ -342,7 +342,7 @@ process SC__FILE_CONCATENATOR { tuple val(params.global.project_name), path("${params.global.project_name}.SC__FILE_CONCATENATOR.${processParams.off}") script: - processParams = params.sc.file_concatenator + processParams = params.getUtilsParams("file_concatenator") """ ${binDir}/sc_file_concatenator.py \ --file-format $processParams.off \ @@ -369,7 +369,7 @@ process SC__STAR_CONCATENATOR() { path("${params.global.project_name}.SC__STAR_CONCATENATOR.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star_concatenator) + def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("star_concatenator")) processParams = sampleParams.local id = params.global.project_name """ @@ -401,13 +401,13 @@ def getOutputFileName(params, tag, f, fileOutputSuffix, isParameterExplorationMo return isParamNull(fileOutputSuffix) ? "${tag}.${stashedParams.findAll { it != 'NULL' }.join('_')}.${f.extension}" : "${tag}.${fileOutputSuffix}.${stashedParams.findAll { it != 'NULL' }.join('_')}.${f.extension}" - if(params.utils.containsKey("publish") - && params.utils.publish.containsKey("pipelineOutputSuffix")) { - if(params.utils.publish.pipelineOutputSuffix == 'none') + def utilsPublishParams = params.getUtilsParams("publish") + if(utilsPublishParams?.pipelineOutputSuffix) { + if(utilsPublishParams.pipelineOutputSuffix == 'none') return "${tag}.${f.extension}" - if(params.utils.publish.pipelineOutputSuffix.length() == 0) + if(utilsPublishParams.pipelineOutputSuffix.length() == 0) throw new Exception("VSN ERROR: The parameter 'params.utils.publish.outputFileSuffix' cannot be empty. If you don't want to add a suffix to the final output, please set this param to 'none'.") - return params.utils.publish.pipelineOutputSuffix + return utilsPublishParams.pipelineOutputSuffix } if(isParamNull(fileOutputSuffix)) return "${f.baseName}.${f.extension}" @@ -483,9 +483,9 @@ process COMPRESS_HDF5() { val(stashedParams) shell: - def compressionLevel = params.utils.containsKey("publish") && - params.utils.publish.containsKey("compressionLevel") ? - params.utils.publish.compressionLevel : + def compressionLevel = params.hasUtilsParams("publish") && + params.getUtilsParams("publish")?.compressionLevel ? + params.getUtilsParams("publish").compressionLevel : 6 outputFileName = getOutputFileName( diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 9231a3ee..beb32ba2 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -29,12 +29,12 @@ workflow ANNOTATE_BY_CELL_METADATA { // - The given tool is performing itself a cell-based annotation // - params.sc[tool] should exist // - tool == null: - // - params.sc.cell_annotate should exist + // - params.utils.cell_annotate should exist tool main: def workflowParams = isParamNull(tool) ? - params.sc.cell_annotate : + params.getUtilsParams("cell_annotate") : params.getToolParams(tool)["cell_annotate"] def method = workflowParams.method if(method == 'aio') { diff --git a/src/utils/workflows/filterAnnotateClean.nf b/src/utils/workflows/filterAnnotateClean.nf index 1411d383..7b040578 100644 --- a/src/utils/workflows/filterAnnotateClean.nf +++ b/src/utils/workflows/filterAnnotateClean.nf @@ -30,23 +30,23 @@ workflow FILTER_AND_ANNOTATE_AND_CLEAN { main: out = data - if(params.utils.update_feature_metadata_index) { + if(params.hasUtilsParams("update_feature_metadata_index")) { out = UPDATE_FEATURE_NOMENCLATURE( data ) } // Filter cells based on an indexed cell-based metadata table - if(params.hasToolParams("cell_filter")) { + if(params.hasUtilsParams("cell_filter")) { out = FILTER_BY_CELL_METADATA( out, 'NULL' ) } // Annotate cells based on an indexed cell-based metadata table - if(params.hasToolParams("cell_annotate")) { + if(params.hasUtilsParams("cell_annotate")) { out = STATIC__ANNOTATE_BY_CELL_METADATA( out, null ) } // Annotate cells based on an indexed sample-based metadata table - if(params.hasToolParams("sample_annotate")) { - if (!hasMetadataFilePath(params.sc.sample_annotate)) { + if(params.hasUtilsParams("sample_annotate")) { + if (!hasMetadataFilePath(params.getUtilsParams("sample_annotate"))) { throw new Exception("The metadataFilePath param is missing in sample_annotate.") } out = SC__ANNOTATE_BY_SAMPLE_METADATA( out ) @@ -54,7 +54,7 @@ workflow FILTER_AND_ANNOTATE_AND_CLEAN { // Clean // e.g.: // - h5ad: rename adata.obs values, remove adata.obs columns - if(params.hasToolParams("file_cleaner")) { + if(params.hasUtilsParams("file_cleaner")) { out = SC__H5AD_BEAUTIFY( out ) } diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index 6a7cdb2e..463544c4 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -27,12 +27,12 @@ workflow FILTER_BY_CELL_METADATA { // - The given tool is performing itself a cell-based filtering // - params.sc[tool] should exist // - tool == null: - // - params.sc.cell_filter should exist + // - params.utils.cell_filter should exist tool main: def workflowParams = isParamNull(tool) ? - params.getToolParams("cell_filter") : + params.getUtilsParams("cell_filter") : params.getToolParams(tool)["cell_filter"] Channel diff --git a/workflows/bbknn.nf b/workflows/bbknn.nf index 5116a48a..44b6dffa 100644 --- a/workflows/bbknn.nf +++ b/workflows/bbknn.nf @@ -79,7 +79,7 @@ workflow bbknn { if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.sc.containsKey("file_concatenator")) { + if(params.hasUtilsParams("file_concatenator")) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -122,7 +122,7 @@ workflow bbknn { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode - if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { + if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -134,7 +134,7 @@ workflow bbknn { project = BEC_BBKNN.out.data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) ) // Collect the reports: diff --git a/workflows/harmony.nf b/workflows/harmony.nf index 4d296727..1d6075b6 100644 --- a/workflows/harmony.nf +++ b/workflows/harmony.nf @@ -81,7 +81,7 @@ workflow harmony { if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.sc.containsKey("file_concatenator")) { + if(params.hasUtilsParams("file_concatenator")) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -124,7 +124,7 @@ workflow harmony { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode - if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { + if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -136,7 +136,7 @@ workflow harmony { project = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) ) // Collect the reports: diff --git a/workflows/mnncorrect.nf b/workflows/mnncorrect.nf index 9d15ccaf..5f4bf794 100644 --- a/workflows/mnncorrect.nf +++ b/workflows/mnncorrect.nf @@ -90,7 +90,7 @@ workflow mnncorrect { if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.sc.containsKey("file_concatenator")) { + if(params.hasUtilsParams("file_concatenator")) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -131,7 +131,7 @@ workflow mnncorrect { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode - if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { + if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -143,7 +143,7 @@ workflow mnncorrect { project = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) ) // Collect the reports: diff --git a/workflows/multi_sample.nf b/workflows/multi_sample.nf index a1dc9d34..192fbf57 100644 --- a/workflows/multi_sample.nf +++ b/workflows/multi_sample.nf @@ -87,7 +87,7 @@ workflow multi_sample { if(params.getToolParams("scanpy").containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.sc.containsKey("file_concatenator")) { + if(params.hasUtilsParams("file_concatenator")) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -120,7 +120,7 @@ workflow multi_sample { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode - if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { + if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -143,7 +143,7 @@ workflow multi_sample { samples = data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) ) ipynbs = QC_FILTER.out.report.map { diff --git a/workflows/nemesh.nf b/workflows/nemesh.nf index d489e46c..d880898e 100644 --- a/workflows/nemesh.nf +++ b/workflows/nemesh.nf @@ -84,11 +84,11 @@ workflow nemesh { data.subscribe { println it } // Check if custom selected barcodes file has been specified - if (params.sc.nemesh.custom_selected_barcodes) { + if (params.tools.nemesh.custom_selected_barcodes) { Channel - .fromPath(params.sc.nemesh.custom_selected_barcodes) + .fromPath(params.tools.nemesh.custom_selected_barcodes) .map { - path -> tuple(path.baseName.split('\\.')[0], params.sc.nemesh.custom_selected_barcodes, path) + path -> tuple(path.baseName.split('\\.')[0], params.tools.nemesh.custom_selected_barcodes, path) } .set { selectedBarcodesByCustom } selectedBarcodesByCustom.subscribe { println it } @@ -138,7 +138,7 @@ workflow nemesh { a = FINAL_BAM.combine(SC__DROPLET_UTILS__BARCODE_SELECTION.out.selectedCellBarcodesByKnee, by: 0) b = FINAL_BAM.combine(SC__DROPLET_UTILS__BARCODE_SELECTION.out.selectedCellBarcodesByInflection, by: 0) - if (params.sc.nemesh.custom_selected_barcodes) { + if (params.tools.nemesh.custom_selected_barcodes) { c = FINAL_BAM.combine(selectedBarcodesByCustom, by: 0) SC__DROP_SEQ_TOOLS__DIGITAL_EXPRESSION( a.mix(b,c) diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf index 446b30a1..b6907e01 100644 --- a/workflows/single_sample.nf +++ b/workflows/single_sample.nf @@ -33,7 +33,7 @@ workflow single_sample { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode - if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { + if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( SCANPY__SINGLE_SAMPLE.out.final_processed_scope_loom ) diff --git a/workflows/single_sample_star.nf b/workflows/single_sample_star.nf index 8844f835..d5b802e5 100644 --- a/workflows/single_sample_star.nf +++ b/workflows/single_sample_star.nf @@ -72,7 +72,7 @@ workflow single_sample_star { data = STAR() samples = data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) ) out = FILTER_AND_ANNOTATE_AND_CLEAN( data ) @@ -109,7 +109,7 @@ workflow single_sample_star { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) // Select a default clustering when in parameter exploration mode - if(params.sc.containsKey("directs") && clusteringParams.isParameterExplorationModeOn()) { + if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( scopeloom ) } From c08fa3b122cdf24d07e2362735b11e92990d873d Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 15:26:27 +0100 Subject: [PATCH 161/202] Temp switch back to params.sc for CI --- src/archr/archr.config | 2 +- src/bap/bap.config | 2 +- src/bwamaptools/bwamaptools.config | 4 ++-- src/celda/conf/base.config | 2 +- src/celda/conf/decontx_correct.config | 2 +- src/celda/conf/decontx_filter.config | 2 +- src/celda/conf/decontx_filter_defaults.config | 2 +- src/cellranger/conf/base.config | 2 +- src/cellranger/conf/cellranger_libraries.config | 2 +- src/cellranger/conf/count.config | 2 +- src/cellranger/conf/count_libraries.config | 2 +- src/cellranger/conf/count_metadata.config | 2 +- src/cellranger/conf/mkfastq.config | 2 +- src/directs/conf/base.config | 2 +- src/directs/conf/test__select_default_clustering.config | 2 +- src/dropletutils/dropletutils.config | 2 +- src/dropseqtools/dropseqtools.config | 4 ++-- src/edirect/edirect.config | 2 +- src/fastp/fastp.config | 2 +- src/flybaser/flybaser.config | 2 +- src/harmony/harmony.config | 2 +- src/pcacv/pcacv.config | 2 +- src/popscle/popscle.config | 2 +- src/pycistopic/pycistopic.config | 2 +- src/scanpy/conf/base.config | 2 +- src/scanpy/conf/bbknn.config | 2 +- src/scanpy/conf/data_transformation.config | 2 +- src/scanpy/conf/filter.config | 2 +- src/scanpy/conf/min.config | 2 +- src/scanpy/conf/mnncorrect.config | 2 +- src/scanpy/conf/normalization.config | 2 +- src/scanpy/conf/regress_out.config | 2 +- src/scenic/conf/append.config | 2 +- src/scenic/conf/min/aucell.config | 2 +- src/scenic/conf/min/base/v0.0.1.config | 2 +- src/scenic/conf/min/cistarget.config | 2 +- .../conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config | 2 +- .../conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config | 2 +- .../conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config | 2 +- .../conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config | 2 +- .../conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config | 2 +- .../conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config | 2 +- .../conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config | 2 +- src/scenic/conf/min/grn.config | 2 +- src/scenic/conf/min/labels.config | 2 +- src/scenic/conf/min/scenic.config | 2 +- src/scenic/conf/min/scope.config | 2 +- src/scenic/conf/min/tfs/fly-v0.0.1.config | 2 +- src/scenic/conf/min/tfs/human-v0.0.1.config | 2 +- src/scenic/conf/min/tfs/mouse-v0.0.1.config | 2 +- src/scenic/conf/multi_runs.config | 2 +- src/scenic/conf/test.config | 2 +- src/scenic/conf/test_multi_runs.config | 2 +- src/scenic/scenic.config | 2 +- src/scrublet/conf/base.config | 2 +- src/scrublet/conf/scrublet_defaults.conf | 2 +- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- src/sinto/sinto.config | 2 +- src/soupx/conf/base.config | 2 +- src/soupx/conf/soupx_correct.config | 2 +- src/sratoolkit/sratoolkit.config | 2 +- src/star/star.config | 2 +- src/trimgalore/trimgalore.config | 2 +- src/utils/conf/base.config | 2 +- src/utils/conf/cell_annotate.config | 2 +- src/utils/conf/cell_filter.config | 2 +- src/utils/conf/h5ad_clean.config | 2 +- src/utils/conf/h5ad_concatenate.config | 2 +- src/utils/conf/sample_annotate.config | 2 +- src/utils/conf/sample_annotate_old_v1.config | 2 +- src/utils/conf/scope.config | 2 +- src/utils/conf/test.config | 2 +- src/utils/processes/h5adSubset.nf | 2 +- src/utils/workflows/annotateByCellMetadata.nf | 2 +- src/utils/workflows/filterByCellMetadata.nf | 2 +- 75 files changed, 77 insertions(+), 77 deletions(-) diff --git a/src/archr/archr.config b/src/archr/archr.config index 9980d80d..33b32d5b 100644 --- a/src/archr/archr.config +++ b/src/archr/archr.config @@ -1,5 +1,5 @@ params { - tools { + sc { archr { container = 'vibsinglecellnf/archr:2020-07-13-ddcaae4' genome = 'hg38' diff --git a/src/bap/bap.config b/src/bap/bap.config index fc763f72..1b6f80a1 100644 --- a/src/bap/bap.config +++ b/src/bap/bap.config @@ -1,5 +1,5 @@ params { - tools { + sc { bap { container = 'vibsinglecellnf/bap:2021-04-27-3b48f4b' } diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index e6cb08ae..194221da 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,6 +1,6 @@ params { - tools { - bwamaptools { + sc { + bwamapsc { container = 'vibsinglecellnf/bwamaptools:2020-07-02-13b5637' } } diff --git a/src/celda/conf/base.config b/src/celda/conf/base.config index c7dae8ef..b88f86d7 100644 --- a/src/celda/conf/base.config +++ b/src/celda/conf/base.config @@ -1,5 +1,5 @@ params { - tools { + sc { celda { container = 'vibsinglecellnf/celda:1.4.5' decontx { diff --git a/src/celda/conf/decontx_correct.config b/src/celda/conf/decontx_correct.config index e4614494..ded00be5 100644 --- a/src/celda/conf/decontx_correct.config +++ b/src/celda/conf/decontx_correct.config @@ -1,5 +1,5 @@ params { - tools { + sc { celda { decontx { strategy = "correct" diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 68e65fb4..02aa4999 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -7,7 +7,7 @@ params.tools.remove('cell_filter') includeConfig './decontx_filter_defaults.config' params { - tools { + sc { celda { decontx { strategy = "filter" // choices: 'filter' (default), 'correct' diff --git a/src/celda/conf/decontx_filter_defaults.config b/src/celda/conf/decontx_filter_defaults.config index 13800fb9..93c0feb7 100644 --- a/src/celda/conf/decontx_filter_defaults.config +++ b/src/celda/conf/decontx_filter_defaults.config @@ -1,5 +1,5 @@ params { - tools { + sc { celda { decontx { strategy = "filter" diff --git a/src/cellranger/conf/base.config b/src/cellranger/conf/base.config index 1c3382f3..11e5b757 100644 --- a/src/cellranger/conf/base.config +++ b/src/cellranger/conf/base.config @@ -1,5 +1,5 @@ params { - tools { + sc { cellranger { container = '/path/to/cellranger/cellranger' } diff --git a/src/cellranger/conf/cellranger_libraries.config b/src/cellranger/conf/cellranger_libraries.config index 077f4825..eceddc6c 100644 --- a/src/cellranger/conf/cellranger_libraries.config +++ b/src/cellranger/conf/cellranger_libraries.config @@ -1,7 +1,7 @@ includeConfig("mkfastq.config") params { - tools { + sc { cellranger { librariesMap = [ "sample1": [ diff --git a/src/cellranger/conf/count.config b/src/cellranger/conf/count.config index 122f1eca..8efa03f6 100644 --- a/src/cellranger/conf/count.config +++ b/src/cellranger/conf/count.config @@ -1,5 +1,5 @@ params { - tools { + sc { cellranger { count { transcriptome = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/indexes/CellRanger/3.0.2/hg38_iGenomes' diff --git a/src/cellranger/conf/count_libraries.config b/src/cellranger/conf/count_libraries.config index 987871aa..360ee80d 100644 --- a/src/cellranger/conf/count_libraries.config +++ b/src/cellranger/conf/count_libraries.config @@ -1,5 +1,5 @@ params { - tools { + sc { cellranger { count { featureRef = '' diff --git a/src/cellranger/conf/count_metadata.config b/src/cellranger/conf/count_metadata.config index a55c1ba9..b137cc12 100644 --- a/src/cellranger/conf/count_metadata.config +++ b/src/cellranger/conf/count_metadata.config @@ -1,5 +1,5 @@ params { - tools { + sc { cellranger { count { metadata = '' diff --git a/src/cellranger/conf/mkfastq.config b/src/cellranger/conf/mkfastq.config index b83a8a58..e4bd4786 100644 --- a/src/cellranger/conf/mkfastq.config +++ b/src/cellranger/conf/mkfastq.config @@ -1,5 +1,5 @@ params { - tools { + sc { cellranger { mkfastq { // https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq diff --git a/src/directs/conf/base.config b/src/directs/conf/base.config index 364dfbba..facaacce 100644 --- a/src/directs/conf/base.config +++ b/src/directs/conf/base.config @@ -1,5 +1,5 @@ params { - tools { + sc { directs { container = 'vibsinglecellnf/directs:0.1.0' select_default_clustering { diff --git a/src/directs/conf/test__select_default_clustering.config b/src/directs/conf/test__select_default_clustering.config index 3ddd6a73..2bdaa179 100644 --- a/src/directs/conf/test__select_default_clustering.config +++ b/src/directs/conf/test__select_default_clustering.config @@ -2,7 +2,7 @@ includeConfig '../../../conf/global.config' includeConfig '../../../conf/singularity.config' params { - tools { + sc { directs { inputLoom = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/fca/analysis/in-house/20200520_000__all__4b9e9810-8600-11ea-867e-a0000220fe80/out/data/20200520_FCA_BioHub_B1_B2_All.HARMONY_SCENIC.loom' } diff --git a/src/dropletutils/dropletutils.config b/src/dropletutils/dropletutils.config index 5a90a97c..00bc0d1e 100644 --- a/src/dropletutils/dropletutils.config +++ b/src/dropletutils/dropletutils.config @@ -1,5 +1,5 @@ params { - tools { + sc { dropletutils { container = 'vibsinglecellnf/dropletutils:1.4.3' } diff --git a/src/dropseqtools/dropseqtools.config b/src/dropseqtools/dropseqtools.config index 6ede9afa..8af341f1 100644 --- a/src/dropseqtools/dropseqtools.config +++ b/src/dropseqtools/dropseqtools.config @@ -1,6 +1,6 @@ params { - tools { - dropseqtools { + sc { + dropseqsc { container = 'humancellatlas/dropseqtools:1.12' tag_unaligned_bam_with_cellbarcode { diff --git a/src/edirect/edirect.config b/src/edirect/edirect.config index f49a11c0..90ae09e8 100644 --- a/src/edirect/edirect.config +++ b/src/edirect/edirect.config @@ -1,5 +1,5 @@ params { - tools { + sc { edirect { container = 'ncbi/edirect:latest' } diff --git a/src/fastp/fastp.config b/src/fastp/fastp.config index aac86685..0f47f446 100644 --- a/src/fastp/fastp.config +++ b/src/fastp/fastp.config @@ -1,5 +1,5 @@ params { - tools { + sc { fastp { container = 'vibsinglecellnf/fastp:0.20.0' thread = 1 diff --git a/src/flybaser/flybaser.config b/src/flybaser/flybaser.config index 6dbcccb8..400fffd2 100644 --- a/src/flybaser/flybaser.config +++ b/src/flybaser/flybaser.config @@ -1,5 +1,5 @@ params { - tools { + sc { flybaser { container = 'vibsinglecellnf/flybaser:0.2.1' diff --git a/src/harmony/harmony.config b/src/harmony/harmony.config index 3af25635..f7781f77 100644 --- a/src/harmony/harmony.config +++ b/src/harmony/harmony.config @@ -1,5 +1,5 @@ params { - tools { + sc { harmony { container = 'vibsinglecellnf/harmony:1.0-3' report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/harmony/bin/reports/sc_harmony_report.ipynb" diff --git a/src/pcacv/pcacv.config b/src/pcacv/pcacv.config index 1dfb8a79..c0af0209 100644 --- a/src/pcacv/pcacv.config +++ b/src/pcacv/pcacv.config @@ -1,5 +1,5 @@ params { - tools { + sc { pcacv { container = "vibsinglecellnf/pcacv:0.2.0" find_optimal_npcs { diff --git a/src/popscle/popscle.config b/src/popscle/popscle.config index 607d102a..4ab2db38 100644 --- a/src/popscle/popscle.config +++ b/src/popscle/popscle.config @@ -1,5 +1,5 @@ params { - tools { + sc { popscle { container = 'vibsinglecellnf/popscle:2021-05-05-da70fc7' vcf = '/path/to/vcf_file' diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index 46f6c470..abe29678 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -1,5 +1,5 @@ params { - tools { + sc { pycistopic { container = 'vibsinglecellnf/pycistopic:0.2' macs2_call_peaks { diff --git a/src/scanpy/conf/base.config b/src/scanpy/conf/base.config index d1135308..d2544cc3 100644 --- a/src/scanpy/conf/base.config +++ b/src/scanpy/conf/base.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' report { diff --git a/src/scanpy/conf/bbknn.config b/src/scanpy/conf/bbknn.config index e68636c7..5a957800 100644 --- a/src/scanpy/conf/bbknn.config +++ b/src/scanpy/conf/bbknn.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { batch_effect_correct { method = 'bbknn' diff --git a/src/scanpy/conf/data_transformation.config b/src/scanpy/conf/data_transformation.config index f23e2b59..8610b720 100644 --- a/src/scanpy/conf/data_transformation.config +++ b/src/scanpy/conf/data_transformation.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { data_transformation { method = 'log1p' diff --git a/src/scanpy/conf/filter.config b/src/scanpy/conf/filter.config index ca06cd90..e21b736e 100644 --- a/src/scanpy/conf/filter.config +++ b/src/scanpy/conf/filter.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { filter { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_filter_qc_report.ipynb" diff --git a/src/scanpy/conf/min.config b/src/scanpy/conf/min.config index c0188bbb..a39b30ad 100644 --- a/src/scanpy/conf/min.config +++ b/src/scanpy/conf/min.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' report { diff --git a/src/scanpy/conf/mnncorrect.config b/src/scanpy/conf/mnncorrect.config index 746a5761..1e1e1634 100644 --- a/src/scanpy/conf/mnncorrect.config +++ b/src/scanpy/conf/mnncorrect.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { batch_effect_correct { method = 'mnncorrect' diff --git a/src/scanpy/conf/normalization.config b/src/scanpy/conf/normalization.config index b79bb7e0..3dd9fe4b 100644 --- a/src/scanpy/conf/normalization.config +++ b/src/scanpy/conf/normalization.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { normalization { method = 'cpx' diff --git a/src/scanpy/conf/regress_out.config b/src/scanpy/conf/regress_out.config index 0db5c768..04e9999d 100644 --- a/src/scanpy/conf/regress_out.config +++ b/src/scanpy/conf/regress_out.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { regress_out { variablesToRegressOut = ['n_counts', 'percent_mito'] diff --git a/src/scenic/conf/append.config b/src/scenic/conf/append.config index 2a32607b..dd0af3e5 100644 --- a/src/scenic/conf/append.config +++ b/src/scenic/conf/append.config @@ -1,6 +1,6 @@ params { - tools { + sc { scenic { report_ipynb = '/src/scenic/bin/reports/scenic_report.ipynb' existingScenicLoom = '' diff --git a/src/scenic/conf/min/aucell.config b/src/scenic/conf/min/aucell.config index 64ec0b57..1a427a08 100644 --- a/src/scenic/conf/min/aucell.config +++ b/src/scenic/conf/min/aucell.config @@ -1,6 +1,6 @@ params { - tools { + sc { scenic { aucell { output = 'aucell_output.loom' diff --git a/src/scenic/conf/min/base/v0.0.1.config b/src/scenic/conf/min/base/v0.0.1.config index b2531630..c4bbc175 100644 --- a/src/scenic/conf/min/base/v0.0.1.config +++ b/src/scenic/conf/min/base/v0.0.1.config @@ -10,7 +10,7 @@ if(!params.global.containsKey("outdir")) params { global = params.global - tools { + sc { scenic { // Container settings container = 'aertslab/pyscenic:0.10.0' diff --git a/src/scenic/conf/min/cistarget.config b/src/scenic/conf/min/cistarget.config index 30f9546c..50e1fc96 100644 --- a/src/scenic/conf/min/cistarget.config +++ b/src/scenic/conf/min/cistarget.config @@ -6,7 +6,7 @@ if(params.global.species == "human" && !(params.global.genome.assembly in ["hg38 throw new Exception("No cisTarget databases found for the given genome: "+ params.global.genome.assembly) params { - tools { + sc { scenic { cistarget { adj = "adj.tsv" diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config index d170c3ca..a0346c58 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config index ec4fb229..d805f8b7 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config index f1cf7671..6ea5fcf3 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config index 56e01ef5..57a56695 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config index ab3b5919..1e6fe1a5 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // track feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config index b48ddeab..09ba7c20 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // Track feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config index 12552632..e74060a2 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { cistarget { // Track feather format databases diff --git a/src/scenic/conf/min/grn.config b/src/scenic/conf/min/grn.config index 3178cf92..908b053e 100644 --- a/src/scenic/conf/min/grn.config +++ b/src/scenic/conf/min/grn.config @@ -4,7 +4,7 @@ if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No TFs found for the given species: "+ params.global.species) params { - tools { + sc { scenic { grn { // seed = 617 diff --git a/src/scenic/conf/min/labels.config b/src/scenic/conf/min/labels.config index b2072584..b64a6e33 100644 --- a/src/scenic/conf/min/labels.config +++ b/src/scenic/conf/min/labels.config @@ -1,7 +1,7 @@ def _ = params.tools.scenic params { - tools { + sc { scenic { labels { // Resources settings: diff --git a/src/scenic/conf/min/scenic.config b/src/scenic/conf/min/scenic.config index fe51e982..6ec3d049 100644 --- a/src/scenic/conf/min/scenic.config +++ b/src/scenic/conf/min/scenic.config @@ -10,7 +10,7 @@ params { outdir = "out" } - tools { + sc { scenic { configVersion = "v0.0.1" } diff --git a/src/scenic/conf/min/scope.config b/src/scenic/conf/min/scope.config index 80b8dc9f..b8d0a8d9 100644 --- a/src/scenic/conf/min/scope.config +++ b/src/scenic/conf/min/scope.config @@ -1,5 +1,5 @@ params { - tools { + sc { scope { genome = "" tree { diff --git a/src/scenic/conf/min/tfs/fly-v0.0.1.config b/src/scenic/conf/min/tfs/fly-v0.0.1.config index 996f3227..0606757b 100644 --- a/src/scenic/conf/min/tfs/fly-v0.0.1.config +++ b/src/scenic/conf/min/tfs/fly-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_dmel.txt" diff --git a/src/scenic/conf/min/tfs/human-v0.0.1.config b/src/scenic/conf/min/tfs/human-v0.0.1.config index 062477dd..b7f2b1d1 100644 --- a/src/scenic/conf/min/tfs/human-v0.0.1.config +++ b/src/scenic/conf/min/tfs/human-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_hg38.txt" diff --git a/src/scenic/conf/min/tfs/mouse-v0.0.1.config b/src/scenic/conf/min/tfs/mouse-v0.0.1.config index 903ced9f..1d10fe65 100644 --- a/src/scenic/conf/min/tfs/mouse-v0.0.1.config +++ b/src/scenic/conf/min/tfs/mouse-v0.0.1.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt" diff --git a/src/scenic/conf/multi_runs.config b/src/scenic/conf/multi_runs.config index 9433a60a..349c2a20 100644 --- a/src/scenic/conf/multi_runs.config +++ b/src/scenic/conf/multi_runs.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { numRuns = 2 // AUCell parameters diff --git a/src/scenic/conf/test.config b/src/scenic/conf/test.config index 198e2dd9..234d2034 100644 --- a/src/scenic/conf/test.config +++ b/src/scenic/conf/test.config @@ -3,7 +3,7 @@ params { project_name = 'Test' } - tools { + sc { scenic { filteredLoom = '/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/testruns/scenic-nf_testing/expr_mat.loom' // for testing diff --git a/src/scenic/conf/test_multi_runs.config b/src/scenic/conf/test_multi_runs.config index b9fabfe6..016d282c 100644 --- a/src/scenic/conf/test_multi_runs.config +++ b/src/scenic/conf/test_multi_runs.config @@ -1,5 +1,5 @@ params { - tools { + sc { scenic { numRuns = 2 // AUCell parameters diff --git a/src/scenic/scenic.config b/src/scenic/scenic.config index 5eee79bc..34a2287c 100644 --- a/src/scenic/scenic.config +++ b/src/scenic/scenic.config @@ -1,6 +1,6 @@ params { - tools { + sc { scenic { // Label for the processes container = 'aertslab/pyscenic:0.10.4' diff --git a/src/scrublet/conf/base.config b/src/scrublet/conf/base.config index bb711e07..74299626 100644 --- a/src/scrublet/conf/base.config +++ b/src/scrublet/conf/base.config @@ -1,5 +1,5 @@ params { - tools { + sc { scrublet { container = 'vibsinglecellnf/scrublet:0.2.3' doublet_detection { diff --git a/src/scrublet/conf/scrublet_defaults.conf b/src/scrublet/conf/scrublet_defaults.conf index e82b41a6..800dd6dc 100644 --- a/src/scrublet/conf/scrublet_defaults.conf +++ b/src/scrublet/conf/scrublet_defaults.conf @@ -1,5 +1,5 @@ params { - tools { + sc { scrublet { // add sensible default parameters for Scrublet: cell_annotate { diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 447802c6..40153265 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,5 +1,5 @@ params { - tools { + sc { singlecelltoolkit { container = 'vibsinglecellnf/singlecelltoolkit:2021-07-06-ea48b36' barcode_correction { diff --git a/src/sinto/sinto.config b/src/sinto/sinto.config index 7ba9b70c..1243fd68 100644 --- a/src/sinto/sinto.config +++ b/src/sinto/sinto.config @@ -1,5 +1,5 @@ params { - tools { + sc { sinto { container = 'vibsinglecellnf/sinto:0.7.2-dev' fragments { diff --git a/src/soupx/conf/base.config b/src/soupx/conf/base.config index 780c361c..62526949 100644 --- a/src/soupx/conf/base.config +++ b/src/soupx/conf/base.config @@ -1,5 +1,5 @@ params { - tools { + sc { soupx { container = 'vibsinglecellnf/soupx:1.4.8' } diff --git a/src/soupx/conf/soupx_correct.config b/src/soupx/conf/soupx_correct.config index 377eaaf2..589f1022 100644 --- a/src/soupx/conf/soupx_correct.config +++ b/src/soupx/conf/soupx_correct.config @@ -1,5 +1,5 @@ params { - tools { + sc { soupx { roundToInt = false } diff --git a/src/sratoolkit/sratoolkit.config b/src/sratoolkit/sratoolkit.config index 709ec3de..9dbc2f68 100644 --- a/src/sratoolkit/sratoolkit.config +++ b/src/sratoolkit/sratoolkit.config @@ -1,5 +1,5 @@ params { - tools { + sc { sratoolkit { container = 'vibsinglecellnf/sratoolkit:2.9.4-1.1.0' // --include-technical option (fasterq-dump) diff --git a/src/star/star.config b/src/star/star.config index 1fb5743b..7778af07 100644 --- a/src/star/star.config +++ b/src/star/star.config @@ -1,5 +1,5 @@ params { - tools { + sc { star { version = '2.7.1a' container = "/ddn1/vol1/staging/leuven/res_00001/software/STAR/${params.tools.star.version}/STAR_${params.tools.star.version}.sif" diff --git a/src/trimgalore/trimgalore.config b/src/trimgalore/trimgalore.config index 6d4ee56b..fdb78aff 100644 --- a/src/trimgalore/trimgalore.config +++ b/src/trimgalore/trimgalore.config @@ -1,5 +1,5 @@ params { - tools { + sc { trimgalore { container = 'vibsinglecellnf/trimgalore:0.6.6' trim { diff --git a/src/utils/conf/base.config b/src/utils/conf/base.config index 11f060bf..7f872648 100644 --- a/src/utils/conf/base.config +++ b/src/utils/conf/base.config @@ -8,7 +8,7 @@ params { mode = 'link' } } - tools { + sc { file_converter { off = 'h5ad' tagCellWithSampleId = true diff --git a/src/utils/conf/cell_annotate.config b/src/utils/conf/cell_annotate.config index 089eade3..6add0dc0 100644 --- a/src/utils/conf/cell_annotate.config +++ b/src/utils/conf/cell_annotate.config @@ -1,5 +1,5 @@ params { - utils { + sc { cell_annotate { off = 'h5ad' method = 'obo' // or 'aio' diff --git a/src/utils/conf/cell_filter.config b/src/utils/conf/cell_filter.config index 92bce446..9924f4c7 100644 --- a/src/utils/conf/cell_filter.config +++ b/src/utils/conf/cell_filter.config @@ -1,5 +1,5 @@ params { - utils { + sc { cell_filter { off = 'h5ad' method = 'internal' // or 'external' (requires the following additional params cellMetaDataFilePath, sampleColumnName, indexColumnName) diff --git a/src/utils/conf/h5ad_clean.config b/src/utils/conf/h5ad_clean.config index f63827a2..8d32aa26 100644 --- a/src/utils/conf/h5ad_clean.config +++ b/src/utils/conf/h5ad_clean.config @@ -1,5 +1,5 @@ params { - utils { + sc { file_cleaner { obsColumnMapper = [] obsColumnValueMapper = [] diff --git a/src/utils/conf/h5ad_concatenate.config b/src/utils/conf/h5ad_concatenate.config index ec3fe291..aeb602ba 100644 --- a/src/utils/conf/h5ad_concatenate.config +++ b/src/utils/conf/h5ad_concatenate.config @@ -1,5 +1,5 @@ params { - utils { + sc { file_concatenator { join = 'outer' off = 'h5ad' diff --git a/src/utils/conf/sample_annotate.config b/src/utils/conf/sample_annotate.config index cc69fcc3..57697cce 100644 --- a/src/utils/conf/sample_annotate.config +++ b/src/utils/conf/sample_annotate.config @@ -1,5 +1,5 @@ params { - utils { + sc { sample_annotate { off = 'h5ad' by { diff --git a/src/utils/conf/sample_annotate_old_v1.config b/src/utils/conf/sample_annotate_old_v1.config index a427f397..2ee74b6a 100644 --- a/src/utils/conf/sample_annotate_old_v1.config +++ b/src/utils/conf/sample_annotate_old_v1.config @@ -1,5 +1,5 @@ params { - utils { + sc { sample_annotate_v1 { iff = '10x_cellranger_mex' off = 'h5ad' diff --git a/src/utils/conf/scope.config b/src/utils/conf/scope.config index ac9f6edd..d4c59376 100644 --- a/src/utils/conf/scope.config +++ b/src/utils/conf/scope.config @@ -1,5 +1,5 @@ params { - tools { + sc { scope { genome = '' tree { diff --git a/src/utils/conf/test.config b/src/utils/conf/test.config index 4a3b63cc..2871011c 100644 --- a/src/utils/conf/test.config +++ b/src/utils/conf/test.config @@ -1,5 +1,5 @@ params { - tools { + sc { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' } diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index aa2090a8..a4229187 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -43,7 +43,7 @@ process SC__PREPARE_OBS_FILTER { input = f } else if (processParams.method == 'external') { if(!filterConfig.cellMetaDataFilePath) { - throw new Exception("VSN ERROR: A filter in params.utils.cell_filter does not provide a cellMetaDataFilePath entry.") + throw new Exception("VSN ERROR: A filter in params.sc.cell_filter does not provide a cellMetaDataFilePath entry.") } input = filterConfig.cellMetaDataFilePath } else { diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index beb32ba2..6f10e4e3 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -29,7 +29,7 @@ workflow ANNOTATE_BY_CELL_METADATA { // - The given tool is performing itself a cell-based annotation // - params.sc[tool] should exist // - tool == null: - // - params.utils.cell_annotate should exist + // - params.sc.cell_annotate should exist tool main: diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index 463544c4..bd1b95a1 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -27,7 +27,7 @@ workflow FILTER_BY_CELL_METADATA { // - The given tool is performing itself a cell-based filtering // - params.sc[tool] should exist // - tool == null: - // - params.utils.cell_filter should exist + // - params.sc.cell_filter should exist tool main: From 94f273d04670585f43e8fbfd6897b290939914b2 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 15:49:22 +0100 Subject: [PATCH 162/202] Temp switch back to params.sc for CI (2): forgot scrublet, decontx configs and seenic nf files --- main.nf | 2 +- src/celda/conf/decontx_filter.config | 4 +- src/scanpy/workflows/single_sample.nf | 1 + src/scenic/conf/min/base/v0.0.1.config | 8 +- src/scenic/conf/min/cistarget.config | 6 +- src/scenic/conf/min/grn.config | 6 +- src/scenic/conf/min/labels.config | 2 +- src/scenic/main.nf | 40 +++++----- src/scenic/main.test.nf | 76 +++++++++---------- src/scenic/processes/add_correlation.nf | 4 +- .../arboreto_with_multiprocessing.nf | 4 +- src/scenic/processes/aucell.nf | 4 +- src/scenic/processes/cistarget.nf | 4 +- src/scenic/processes/loomHandler.nf | 4 +- .../processes/multiruns/aggregateFeatures.nf | 4 +- .../processes/multiruns/aggregateRegulons.nf | 2 +- .../processes/multiruns/aucellFromFolder.nf | 4 +- .../multiruns/convertMotifsToRegulons.nf | 2 +- src/scenic/processes/multiruns/saveToLoom.nf | 2 +- src/scenic/processes/reports.nf | 2 +- src/scrublet/scrublet.config | 4 +- 21 files changed, 92 insertions(+), 93 deletions(-) diff --git a/main.nf b/main.nf index 648da558..b3edfb88 100644 --- a/main.nf +++ b/main.nf @@ -763,7 +763,7 @@ workflow scenic { } from "./src/utils/workflows/utils" params(params) SCENIC( - Channel.of( tuple(params.global.project_name, file(params.tools.scenic.filteredLoom))) + Channel.of( tuple(params.global.project_name, file(params.getToolParams("scenic").filteredLoom))) ) if(params.utils?.publish) { diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 02aa4999..2452ba3e 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,8 +1,8 @@ includeConfig '../../utils/conf/cell_annotate.config' -params.tools.celda.decontx.cell_annotate = params.utils.cell_annotate +params.tools.celda.decontx.cell_annotate = params.sc.cell_annotate params.tools.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' -params.tools.celda.decontx.cell_filter = params.utils.cell_filter +params.tools.celda.decontx.cell_filter = params.sc.cell_filter params.tools.remove('cell_filter') includeConfig './decontx_filter_defaults.config' diff --git a/src/scanpy/workflows/single_sample.nf b/src/scanpy/workflows/single_sample.nf index 982db235..030ac486 100644 --- a/src/scanpy/workflows/single_sample.nf +++ b/src/scanpy/workflows/single_sample.nf @@ -73,6 +73,7 @@ workflow SINGLE_SAMPLE { // Prefilter the data out = FILTER_AND_ANNOTATE_AND_CLEAN( data ) + // To avoid Variable `params` already defined in the process scope def scanpyParams = params.getToolParams("scanpy") filtered = scanpyParams?.filter ? QC_FILTER( out ).filtered : out diff --git a/src/scenic/conf/min/base/v0.0.1.config b/src/scenic/conf/min/base/v0.0.1.config index c4bbc175..ac6c4b81 100644 --- a/src/scenic/conf/min/base/v0.0.1.config +++ b/src/scenic/conf/min/base/v0.0.1.config @@ -1,5 +1,5 @@ // Define local variable otherwise it's going to be kept in the final config -def _ = params.tools.scenic +def _ = params.sc.scenic // Sanity checks if(!params.global.containsKey("species")) throw new Exception("The params.global.species parameter is required.") @@ -39,9 +39,9 @@ params { // Databases versions // PUBLIC -params.tools.scenic.tfsVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tfsVersion") ? _.tfsVersion : "${params.global.species}-v0.0.1" -params.tools.scenic.motifsDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("motifsDbVersion") ? _.motifsDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" -params.tools.scenic.tracksDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tracksDbVersion") ? _.tracksDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" +params.sc.scenic.tfsVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tfsVersion") ? _.tfsVersion : "${params.global.species}-v0.0.1" +params.sc.scenic.motifsDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("motifsDbVersion") ? _.motifsDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" +params.sc.scenic.tracksDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tracksDbVersion") ? _.tracksDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" includeConfig '../labels.config' diff --git a/src/scenic/conf/min/cistarget.config b/src/scenic/conf/min/cistarget.config index 50e1fc96..45c09862 100644 --- a/src/scenic/conf/min/cistarget.config +++ b/src/scenic/conf/min/cistarget.config @@ -1,4 +1,4 @@ -def _ = params.tools.scenic +def _ = params.sc.scenic // Sanity checks if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No cisTarget databases found for the given species: "+ params.global.species) @@ -49,6 +49,6 @@ def useMotifs = _.containsKey("cistarget") && _.cistarget.containsKey("useMotifs def useTracks = _.containsKey("cistarget") && _.cistarget.containsKey("useTracks") ? _.cistarget.useTracks: false if(useMotifs) - includeConfig "dbs/cistarget-motifs-${params.tools.scenic.motifsDbVersion}.config" + includeConfig "dbs/cistarget-motifs-${params.sc.scenic.motifsDbVersion}.config" if(useTracks) - includeConfig "dbs/cistarget-tracks-${params.tools.scenic.tracksDbVersion}.config" + includeConfig "dbs/cistarget-tracks-${params.sc.scenic.tracksDbVersion}.config" diff --git a/src/scenic/conf/min/grn.config b/src/scenic/conf/min/grn.config index 908b053e..ccaa21a3 100644 --- a/src/scenic/conf/min/grn.config +++ b/src/scenic/conf/min/grn.config @@ -1,4 +1,4 @@ -def _ = params.tools.scenic +def _ = params.sc.scenic // Sanity checks if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No TFs found for the given species: "+ params.global.species) @@ -12,7 +12,7 @@ params { // PUBLIC // maxForks = _.containsKey("grn") && _.grn.containsKey("maxForks") ? _.grn.maxForks : 1 // numWorkers = _.containsKey("grn") && _.grn.containsKey("numWorkers") ? _.grn.numWorkers : 2 - // // Following parameters are not used except params.tools.scenic.labels.processExecutor = 'qsub' + // // Following parameters are not used except params.sc.scenic.labels.processExecutor = 'qsub' // pmem = _.containsKey("grn") && _.grn.containsKey("pmem") ? _.grn.pmem : '2gb' // walltime = '24:00:00' } @@ -20,4 +20,4 @@ params { } } -includeConfig "tfs/${params.tools.scenic.tfsVersion}.config" +includeConfig "tfs/${params.sc.scenic.tfsVersion}.config" diff --git a/src/scenic/conf/min/labels.config b/src/scenic/conf/min/labels.config index b64a6e33..c9bc3c66 100644 --- a/src/scenic/conf/min/labels.config +++ b/src/scenic/conf/min/labels.config @@ -1,4 +1,4 @@ -def _ = params.tools.scenic +def _ = params.sc.scenic params { sc { diff --git a/src/scenic/main.nf b/src/scenic/main.nf index 70489ce1..a03dd805 100644 --- a/src/scenic/main.nf +++ b/src/scenic/main.nf @@ -6,7 +6,7 @@ include { resolveParams(params, true) -def isAppendOnlyMode = params.tools.scenic.containsKey("existingScenicLoom") +def isAppendOnlyMode = params.getToolParams("scenic").containsKey("existingScenicLoom") def ALLOWED_GENOME_ASSEMBLIES = ['dm6','hg19','hg38', 'mm10'] ////////////////////////////////////////////////////// @@ -69,8 +69,8 @@ include { */ // Create channel for the different runs -if(params.tools.scenic.containsKey("numRuns")) { - runs = Channel.from( 1..params.tools.scenic.numRuns ) +if(params.getToolParams("scenic").containsKey("numRuns")) { + runs = Channel.from( 1..params.getToolParams("scenic").numRuns ) } else { runs = Channel.from( 1..1 ) } @@ -83,44 +83,44 @@ workflow scenic { main: /* GRN */ - tfs = file(params.tools.scenic.grn.tfs) + tfs = file(params.getToolParams("scenic").grn.tfs) grn = ARBORETO_WITH_MULTIPROCESSING( filteredLoom.combine(runs), tfs ) grn_with_correlation = ADD_PEARSON_CORRELATION(grn) /* cisTarget motif analysis */ // channel for SCENIC databases resources: motifsDb = Channel - .fromPath( params.tools.scenic.cistarget.motifsDb ) + .fromPath( params.getToolParams("scenic").cistarget.motifsDb ) .collect() // use all files together in the ctx command - motifsAnnotation = file(params.tools.scenic.cistarget.motifsAnnotation) + motifsAnnotation = file(params.getToolParams("scenic").cistarget.motifsAnnotation) ctx_mtf = CISTARGET__MOTIF( grn_with_correlation, motifsDb, motifsAnnotation, 'mtf' ) /* cisTarget track analysis */ - if(params.tools.scenic.cistarget.tracksDb) { + if(params.getToolParams("scenic").cistarget.tracksDb) { tracksDb = Channel - .fromPath( params.tools.scenic.cistarget.tracksDb ) + .fromPath( params.getToolParams("scenic").cistarget.tracksDb ) .collect() // use all files together in the ctx command - tracksAnnotation = file(params.tools.scenic.cistarget.tracksAnnotation) + tracksAnnotation = file(params.getToolParams("scenic").cistarget.tracksAnnotation) ctx_trk = CISTARGET__TRACK( grn_with_correlation, tracksDb, tracksAnnotation, 'trk' ) } /* AUCell, motif regulons */ auc_mtf = AUCELL__MOTIF( ctx_mtf, 'mtf' ) - if(params.tools.scenic.cistarget.tracksDb) { + if(params.getToolParams("scenic").cistarget.tracksDb) { /* AUCell, track regulons */ auc_trk = AUCELL__TRACK( ctx_trk, 'trk' ) } // multi-runs aggregation: - if(params.tools.scenic.containsKey("numRuns") && params.tools.scenic.numRuns > 1) { + if(params.getToolParams("scenic").containsKey("numRuns") && params.getToolParams("scenic").numRuns > 1) { scenic_loom_mtf = MULTI_RUNS_TO_LOOM__MOTIF( filteredLoom, ctx_mtf, auc_mtf, 'mtf' ) - if(params.tools.scenic.cistarget.tracksDb) { + if(params.getToolParams("scenic").cistarget.tracksDb) { scenic_loom_trk = MULTI_RUNS_TO_LOOM__TRACK( filteredLoom, ctx_trk, @@ -135,7 +135,7 @@ workflow scenic { out = VISUALIZE(scenic_loom_mtf) } } else { - if(params.tools.scenic.cistarget.tracksDb) { + if(params.getToolParams("scenic").cistarget.tracksDb) { out = VISUALIZE( MERGE_MOTIF_TRACK_LOOMS( auc_mtf @@ -163,10 +163,10 @@ workflow scenic_append { scopeLoom main: - if(params.tools.scenic.containsKey("existingScenicLoom")) { + if(params.getToolParams("scenic").containsKey("existingScenicLoom")) { scenicLoom = getChannelFromFilePath( - params.tools.scenic.existingScenicLoom, - params.tools.scenic.sampleSuffixWithExtension + params.getToolParams("scenic").existingScenicLoom, + params.getToolParams("scenic").sampleSuffixWithExtension ) if(!params.containsKey('quiet')) { Channel.from('').view { @@ -194,9 +194,9 @@ workflow scenic_append { throw new Exception("Cannot append SCENIC loom to SCope loom because the IDs do not match.") } ) - if(!params.tools.scenic.skipReports) { + if(!params.getToolParams("scenic").skipReports) { report_notebook = GENERATE_REPORT( - file(workflow.projectDir + params.tools.scenic.report_ipynb), + file(workflow.projectDir + params.getToolParams("scenic").report_ipynb), APPEND_SCENIC_LOOM.out, "SCENIC_report" ) @@ -213,8 +213,8 @@ workflow scenic_append { workflow { main: - if(!("filteredLoom" in params.tools.scenic)) + if(!("filteredLoom" in params.getToolParams("scenic"))) throw new Exception("The given filteredLoom required parameter does not exist in the params.tools.scenic scope.") - scenic( Channel.of( tuple(params.global.project_name, file(params.tools.scenic.filteredLoom)) ) ) + scenic( Channel.of( tuple(params.global.project_name, file(params.getToolParams("scenic").filteredLoom)) ) ) } diff --git a/src/scenic/main.test.nf b/src/scenic/main.test.nf index f29aa1c2..f71ed9a2 100644 --- a/src/scenic/main.test.nf +++ b/src/scenic/main.test.nf @@ -79,7 +79,7 @@ include { } from './processes/loomHandler' params(params) // Create channel for the different runs -runs = Channel.from( 1..params.tools.scenic.numRuns ) +runs = Channel.from( 1..params.getToolParams("scenic").numRuns ) // Make the test workflow workflow test_GRNBOOST2WITHOUTDASK { @@ -88,7 +88,7 @@ workflow test_GRNBOOST2WITHOUTDASK { loom main: - tfs = file(params.tools.scenic.grn.TFs) + tfs = file(params.getToolParams("scenic").grn.TFs) GRNBOOST2WITHOUTDASK( runs, loom, tfs ) emit: @@ -106,18 +106,18 @@ workflow test_CISTARGET { main: // channel for SCENIC databases resources: motifDB = Channel - .fromPath( params.tools.scenic.cistarget.mtfDB ) + .fromPath( params.getToolParams("scenic").cistarget.mtfDB ) .collect() // use all files together in the ctx command - motifANN = file(params.tools.scenic.cistarget.mtfANN) + motifANN = file(params.getToolParams("scenic").cistarget.mtfANN) ctx_mtf = CISTARGET__MOTIF( runs, filteredloom, grn, motifDB, motifANN, 'mtf' ) /* cisTarget track analysis */ trackDB = Channel - .fromPath( params.tools.scenic.cistarget.trkDB ) + .fromPath( params.getToolParams("scenic").cistarget.trkDB ) .collect() // use all files together in the ctx command - trackANN = file(params.tools.scenic.cistarget.trkANN) + trackANN = file(params.getToolParams("scenic").cistarget.trkANN) ctx_trk = CISTARGET__TRACK( runs, filteredloom, grn, trackDB, trackANN, 'trk' ) emit: @@ -154,15 +154,15 @@ workflow test_SINGLE_RUN_BY_ID { runId main: - filteredloom = file( params.tools.scenic.filteredloom ) - tfs = file(params.tools.scenic.grn.TFs) + filteredloom = file( params.getToolParams("scenic").filteredloom ) + tfs = file(params.getToolParams("scenic").grn.TFs) run = Channel.from( runId..runId ) grn = GRNBOOST2WITHOUTDASK( run, filteredloom, tfs ) // channel for SCENIC databases resources: motifDB = Channel - .fromPath( params.tools.scenic.cistarget.mtfDB ) + .fromPath( params.getToolParams("scenic").cistarget.mtfDB ) .collect() // use all files together in the ctx command - motifANN = file(params.tools.scenic.cistarget.mtfANN) + motifANN = file(params.getToolParams("scenic").cistarget.mtfANN) ctx_mtf = CISTARGET__MOTIF( run, filteredloom, grn, motifDB, motifANN, 'mtf' ) /* AUCell, motif regulons */ auc_mtf = AUCELL__MOTIF( run, filteredloom, ctx_mtf, 'mtf' ) @@ -201,52 +201,52 @@ workflow { test_SINGLE_RUN_BY_ID( params.runId ) break; case "GRNBOOST2WITHOUTDASK": - test_GRNBOOST2WITHOUTDASK( file( params.tools.scenic.filteredloom ) ) + test_GRNBOOST2WITHOUTDASK( file( params.getToolParams("scenic").filteredloom ) ) break; case "CISTARGET": - grn = Channel.fromPath(params.tools.scenic.scenicoutdir + "/grnboost2withoutDask/run_*/run_*__adj.tsv") - test_CISTARGET( file( params.tools.scenic.filteredloom ), grn ) + grn = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/grnboost2withoutDask/run_*/run_*__adj.tsv") + test_CISTARGET( file( params.getToolParams("scenic").filteredloom ), grn ) break; case "AUCELL": - ctx_mtf = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") - ctx_trk = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") - test_AUCELL( file( params.tools.scenic.filteredloom ), ctx_mtf, ctx_trk ) + ctx_mtf = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") + ctx_trk = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") + test_AUCELL( file( params.getToolParams("scenic").filteredloom ), ctx_mtf, ctx_trk ) break; case "AGGR_MULTI_RUNS_FEATURES": /* Aggregate motifs from multiple runs */ - reg_mtf = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") + reg_mtf = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") AGGR_MULTI_RUNS_FEATURES__MOTIF( reg_mtf.collect(), 'mtf' ) - if(params.tools.scenic.cistarget.trkDB) { + if(params.getToolParams("scenic").cistarget.trkDB) { /* Aggregate tracks from multiple runs */ - reg_trk = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") + reg_trk = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") AGGR_MULTI_RUNS_FEATURES__TRACK( reg_trk.collect(), 'trk' ) } break; case "AGGR_MULTI_RUNS_REGULONS": /* Aggregate motif regulons from multiple runs */ - auc_mtf_looms = Channel.fromPath(params.tools.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_mtf.loom") + auc_mtf_looms = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/aucell/run_*/run_*__auc_mtf.loom") AGGR_MULTI_RUNS_REGULONS__MOTIF( auc_mtf_looms.collect(), 'mtf' ) - if(params.tools.scenic.cistarget.trkDB) { + if(params.getToolParams("scenic").cistarget.trkDB) { /* Aggregate track regulons from multiple runs */ - auc_trk_looms = Channel.fromPath(params.tools.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_trk.loom") + auc_trk_looms = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/aucell/run_*/run_*__auc_trk.loom") AGGR_MULTI_RUNS_REGULONS__TRACK( auc_trk_looms.collect(), 'trk' ) } break; case "AUCELL_FROM_FOLDER": /* Aggregate motif regulons from multiple runs */ - regulons_folder_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_mtf") - AUCELL_FROM_FOLDER__MOTIF( file(params.tools.scenic.filteredloom), regulons_folder_mtf, 'mtf' ) - if(params.tools.scenic.cistarget.trkDB) { + regulons_folder_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_mtf") + AUCELL_FROM_FOLDER__MOTIF( file(params.getToolParams("scenic").filteredloom), regulons_folder_mtf, 'mtf' ) + if(params.getToolParams("scenic").cistarget.trkDB) { /* Aggregate track regulons from multiple runs */ - regulons_folder_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_trk") - AUCELL_FROM_FOLDER__TRACK( file(params.tools.scenic.filteredloom), regulons_folder_trk, 'trk' ) + regulons_folder_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_trk") + AUCELL_FROM_FOLDER__TRACK( file(params.getToolParams("scenic").filteredloom), regulons_folder_trk, 'trk' ) } break; case "SAVE_SCENIC_MULTI_RUNS_TO_LOOM_MOTIF": - filteredloom = file(params.tools.scenic.filteredloom) - aggr_features_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_mtf.csv.gz") - regulons_folder_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_mtf") - regulons_auc_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_mtf.tsv") + filteredloom = file(params.getToolParams("scenic").filteredloom) + aggr_features_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_cistarget/multi_runs_features_mtf.csv.gz") + regulons_folder_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_mtf") + regulons_auc_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_mtf.tsv") /* Save multiple motif SCENIC runs to loom*/ SAVE_SCENIC_MULTI_RUNS_TO_LOOM_MOTIF( @@ -258,10 +258,10 @@ workflow { ) break; case "SAVE_SCENIC_MULTI_RUNS_TO_LOOM_TRACK": - filteredloom = file(params.tools.scenic.filteredloom) - regulons_folder_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_trk") - aggr_features_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_trk.csv.gz") - regulons_auc_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_trk.tsv") + filteredloom = file(params.getToolParams("scenic").filteredloom) + regulons_folder_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_trk") + aggr_features_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_cistarget/multi_runs_features_trk.csv.gz") + regulons_auc_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_trk.tsv") /* Save multiple track SCENIC runs to loom*/ SAVE_SCENIC_MULTI_RUNS_TO_LOOM_TRACK( filteredloom, @@ -272,8 +272,8 @@ workflow { ) break; case "MERGE_MOTIF_TRACK_LOOMS": - scenic_loom_mtf = file( params.tools.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_mtf.loom" ) - scenic_loom_trk = file( params.tools.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_trk.loom" ) + scenic_loom_mtf = file( params.getToolParams("scenic").scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_mtf.loom" ) + scenic_loom_trk = file( params.getToolParams("scenic").scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_trk.loom" ) MERGE_MOTIF_TRACK_LOOMS( scenic_loom_mtf, scenic_loom_trk @@ -281,7 +281,7 @@ workflow { break; case "VISUALIZE_PUBLISH": /* Aggregate motif regulons from multiple runs */ - scenic_loom = file( params.tools.scenic.scenicoutdir + "/" + params.tools.scenic.scenicOutputLoom ) + scenic_loom = file( params.getToolParams("scenic").scenicoutdir + "/" + params.getToolParams("scenic").scenicOutputLoom ) PUBLISH_LOOM( VISUALIZE( scenic_loom ) ) break; default: diff --git a/src/scenic/processes/add_correlation.nf b/src/scenic/processes/add_correlation.nf index af7d9b2a..a53bf462 100644 --- a/src/scenic/processes/add_correlation.nf +++ b/src/scenic/processes/add_correlation.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic -processParams = params.tools.scenic.grn +def toolParams = params.getToolParams("scenic") +def processParams = toolParams.grn process ADD_PEARSON_CORRELATION { diff --git a/src/scenic/processes/arboreto_with_multiprocessing.nf b/src/scenic/processes/arboreto_with_multiprocessing.nf index fe4f5f8b..531b0127 100644 --- a/src/scenic/processes/arboreto_with_multiprocessing.nf +++ b/src/scenic/processes/arboreto_with_multiprocessing.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic -processParams = params.tools.scenic.grn +def toolParams = params.getToolParams("scenic") +def processParams = toolParams.grn process ARBORETO_WITH_MULTIPROCESSING { diff --git a/src/scenic/processes/aucell.nf b/src/scenic/processes/aucell.nf index e110b651..5ab5733d 100644 --- a/src/scenic/processes/aucell.nf +++ b/src/scenic/processes/aucell.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic -processParams = params.tools.scenic.aucell +def toolParams = params.getToolParams("scenic") +def processParams = toolParams.aucell process AUCELL { diff --git a/src/scenic/processes/cistarget.nf b/src/scenic/processes/cistarget.nf index b5a86ecd..2533c94d 100644 --- a/src/scenic/processes/cistarget.nf +++ b/src/scenic/processes/cistarget.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic -processParams = params.tools.scenic.cistarget +def toolParams = params.getToolParams("scenic") +def processParams = toolParams.cistarget process CISTARGET { diff --git a/src/scenic/processes/loomHandler.nf b/src/scenic/processes/loomHandler.nf index 4bf17556..d8294e4f 100644 --- a/src/scenic/processes/loomHandler.nf +++ b/src/scenic/processes/loomHandler.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic +def toolParams = params.getToolParams("scenic") process PUBLISH_LOOM { @@ -57,7 +57,6 @@ process MERGE_MOTIF_TRACK_LOOMS { tuple val(sampleId), path(toolParams.scenicOutputLoom) script: - toolParams = params.tools.scenic """ ${binDir}merge_motif_track_loom.py \ --loom_motif ${motifLoom} \ @@ -83,7 +82,6 @@ process APPEND_SCENIC_LOOM { tuple val(sampleId), path("${sampleId}.${toolParams.scenicScopeOutputLoom}") script: - toolParams = params.tools.scenic """ ${binDir}append_results_to_existing_loom.py \ --loom_scope ${scopeLoom} \ diff --git a/src/scenic/processes/multiruns/aggregateFeatures.nf b/src/scenic/processes/multiruns/aggregateFeatures.nf index 13f0f2e5..c88d8014 100644 --- a/src/scenic/processes/multiruns/aggregateFeatures.nf +++ b/src/scenic/processes/multiruns/aggregateFeatures.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic -processParams = params.tools.scenic.aggregate_features +def toolParams = params.getToolParams("scenic") +def processParams = toolParams.aggregate_features process AGGR_MULTI_RUNS_FEATURES { diff --git a/src/scenic/processes/multiruns/aggregateRegulons.nf b/src/scenic/processes/multiruns/aggregateRegulons.nf index e6a1572e..4d75639f 100644 --- a/src/scenic/processes/multiruns/aggregateRegulons.nf +++ b/src/scenic/processes/multiruns/aggregateRegulons.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic +def toolParams = params.getToolParams("scenic") process AGGR_MULTI_RUNS_REGULONS { diff --git a/src/scenic/processes/multiruns/aucellFromFolder.nf b/src/scenic/processes/multiruns/aucellFromFolder.nf index 23e16498..8ba5cf5d 100644 --- a/src/scenic/processes/multiruns/aucellFromFolder.nf +++ b/src/scenic/processes/multiruns/aucellFromFolder.nf @@ -2,8 +2,8 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic -processParams = params.tools.scenic.aucell +def toolParams = params.getToolParams("scenic") +def processParams = toolParams.aucell process AUCELL_FROM_FOLDER { diff --git a/src/scenic/processes/multiruns/convertMotifsToRegulons.nf b/src/scenic/processes/multiruns/convertMotifsToRegulons.nf index 355f447b..f8483e4d 100644 --- a/src/scenic/processes/multiruns/convertMotifsToRegulons.nf +++ b/src/scenic/processes/multiruns/convertMotifsToRegulons.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic +def toolParams = params.getToolParams("scenic") process CONVERT_MULTI_RUNS_FEATURES_TO_REGULONS { diff --git a/src/scenic/processes/multiruns/saveToLoom.nf b/src/scenic/processes/multiruns/saveToLoom.nf index 2f26e29a..f9787c5f 100644 --- a/src/scenic/processes/multiruns/saveToLoom.nf +++ b/src/scenic/processes/multiruns/saveToLoom.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -toolParams = params.tools.scenic +def toolParams = params.getToolParams("scenic") process SAVE_MULTI_RUNS_TO_LOOM { diff --git a/src/scenic/processes/reports.nf b/src/scenic/processes/reports.nf index 2d79beac..d25c36cc 100644 --- a/src/scenic/processes/reports.nf +++ b/src/scenic/processes/reports.nf @@ -7,7 +7,7 @@ takes a template ipynb and adata as input, outputs ipynb named by the value in ${reportTitle} */ -toolParams = params.tools.scenic +def toolParams = params.getToolParams("scenic") process GENERATE_REPORT { diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index 5665e399..f878011a 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.tools.scrublet.cell_annotate = params.utils.cell_annotate +params.tools.scrublet.cell_annotate = params.sc.cell_annotate params.tools.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.tools.scrublet.cell_filter = params.utils.cell_filter +params.tools.scrublet.cell_filter = params.sc.cell_filter params.tools.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' From 5a4cd3ea8273e7604222d2c022e13ac81314b347 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 15:58:40 +0100 Subject: [PATCH 163/202] Fix to avoid Variable `params` already defined in the process scope --- workflows/bbknn.nf | 12 ++++++++---- workflows/harmony.nf | 11 +++++++---- workflows/mnncorrect.nf | 11 +++++++---- workflows/multi_sample.nf | 11 +++++++---- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/workflows/bbknn.nf b/workflows/bbknn.nf index 44b6dffa..28ac4aab 100644 --- a/workflows/bbknn.nf +++ b/workflows/bbknn.nf @@ -72,11 +72,15 @@ workflow bbknn { /******************************************* * Data processing */ + + // To avoid Variable `params` already defined in the process scope + def scanpyParams = params.getToolParams("scanpy") + out = data | \ SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.getToolParams("scanpy").containsKey("filter")) { + if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.hasUtilsParams("file_concatenator")) { @@ -88,7 +92,7 @@ workflow bbknn { ) ) } - if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { + if(scanpyParams.containsKey("data_transformation") && scanpyParams.containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -113,13 +117,13 @@ workflow bbknn { // Finalize FINALIZE( - params.sc?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, BEC_BBKNN.out.data, 'BBKNN.final_output' ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/harmony.nf b/workflows/harmony.nf index 1d6075b6..3234b940 100644 --- a/workflows/harmony.nf +++ b/workflows/harmony.nf @@ -74,11 +74,14 @@ workflow harmony { /******************************************* * Data processing */ + // To avoid Variable `params` already defined in the process scope + def scanpyParams = params.getToolParams("scanpy") + out = data | \ SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.getToolParams("scanpy").containsKey("filter")) { + if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.hasUtilsParams("file_concatenator")) { @@ -90,7 +93,7 @@ workflow harmony { ) ) } - if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { + if(scanpyParams.containsKey("data_transformation") && scanpyParams.containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -115,13 +118,13 @@ workflow harmony { // Finalize FINALIZE( - params.sc?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, BEC_HARMONY.out.data, 'HARMONY.final_output' ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/mnncorrect.nf b/workflows/mnncorrect.nf index 5f4bf794..0c5d79fb 100644 --- a/workflows/mnncorrect.nf +++ b/workflows/mnncorrect.nf @@ -83,11 +83,14 @@ workflow mnncorrect { /******************************************* * Data processing */ + // To avoid Variable `params` already defined in the process scope + def scanpyParams = params.getToolParams("scanpy") + out = data | \ SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.getToolParams("scanpy").containsKey("filter")) { + if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.hasUtilsParams("file_concatenator")) { @@ -99,7 +102,7 @@ workflow mnncorrect { ) ) } - if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { + if(scanpyParams.containsKey("data_transformation") && scanpyParams.containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -122,13 +125,13 @@ workflow mnncorrect { // Finalize FINALIZE( - params.sc?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, BEC_MNNCORRECT.out.data, 'MNNCORRECT.final_output' ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { diff --git a/workflows/multi_sample.nf b/workflows/multi_sample.nf index 192fbf57..17be872f 100644 --- a/workflows/multi_sample.nf +++ b/workflows/multi_sample.nf @@ -80,11 +80,14 @@ workflow multi_sample { /******************************************* * Data processing */ + // To avoid Variable `params` already defined in the process scope + def scanpyParams = params.getToolParams("scanpy") + out = data | \ SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.getToolParams("scanpy").containsKey("filter")) { + if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } if(params.hasUtilsParams("file_concatenator")) { @@ -96,7 +99,7 @@ workflow multi_sample { ) ) } - if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { + if(scanpyParams.containsKey("data_transformation") && scanpyParams.containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -111,13 +114,13 @@ workflow multi_sample { // Finalize FINALIZE( - params.sc?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, CLUSTER_IDENTIFICATION.out.marker_genes, 'MULTI_SAMPLE.final_output', ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { From d0ec7190b9db3bba485c81c057c0b76625abd6fc Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 16:16:40 +0100 Subject: [PATCH 164/202] Other missed conversions: from params\.tools\.([a-z_]+) to params.getToolParams("$1") --- docs/development.rst | 6 +++--- main_atac.nf | 6 +++--- src/archr/processes/cell_calling.nf | 2 +- src/archr/processes/createArrow_unfiltered.nf | 2 +- src/bap/processes/barcode_multiplet.nf | 2 +- src/bap/processes/biorad_debarcode.nf | 2 +- src/bwamaptools/main.nf | 2 +- src/bwamaptools/processes/add_barcode_as_tag.nf | 2 +- src/bwamaptools/processes/index.nf | 2 +- src/bwamaptools/processes/mapping.nf | 2 +- src/bwamaptools/processes/mapping_summary.nf | 2 +- src/cellranger-atac/processes/count.nf | 2 +- src/cellranger-atac/processes/mkfastq.nf | 2 +- src/cellranger/processes/count.nf | 2 +- src/cellranger/processes/mkfastq.nf | 2 +- src/flybaser/processes/convertNomenclature.nf | 4 ++-- src/harmony/processes/runHarmony.nf | 4 ++-- src/harmony/workflows/bec_harmony.nf | 2 +- src/pcacv/processes/runPCACV.nf | 4 ++-- src/picard/processes/create_sequence_dictionary.nf | 2 +- src/picard/processes/fastq_to_bam.nf | 2 +- src/picard/processes/merge_bam_alignment.nf | 4 ++-- src/picard/processes/sam_to_fastq.nf | 2 +- src/picard/processes/sort_sam.nf | 4 ++-- src/popscle/processes/demuxlet.nf | 8 ++++---- src/popscle/processes/dsc_pileup.nf | 4 ++-- src/popscle/workflows/demuxlet.nf | 2 +- src/popscle/workflows/dsc_pileup.nf | 2 +- src/pycistopic/processes/macs2_call_peaks.nf | 2 +- src/scrublet/processes/doublet_detection.nf | 4 ++-- src/scrublet/processes/reports.nf | 2 +- src/scrublet/workflows/doublet_removal.nf | 2 +- src/singlecelltoolkit/processes/barcode_correction.nf | 2 +- .../processes/debarcode_10x_scatac_fastqs.nf | 2 +- src/sinto/processes/fragments.nf | 2 +- src/sratoolkit/processes/downloadFastQ.nf | 2 +- src/star/main.nf | 6 +++--- src/star/processes/build_genome.nf | 4 ++-- src/star/processes/load_genome.nf | 2 +- src/star/processes/map_count.nf | 4 ++-- src/star/processes/solo_map_count.nf | 10 +++++----- src/star/processes/unload_genome.nf | 2 +- src/trimgalore/processes/trim.nf | 2 +- src/utils/processes/utils.nf | 2 +- workflows/nemesh.nf | 8 ++++---- workflows/star.nf | 8 ++++---- 46 files changed, 74 insertions(+), 74 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 54c99ea9..ee6ab40c 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -239,7 +239,7 @@ Steps: process SC__HARMONY__HARMONY_MATRIX { - container params.tools.harmony.container + container params.getToolParams("harmony").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' clusterOptions "-l nodes=1:ppn=${params.global.threads} -l walltime=1:00:00 -A ${params.global.qsubaccount}" @@ -250,7 +250,7 @@ Steps: tuple val(sampleId), path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("harmony")) processParams = sampleParams.local varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ @@ -400,7 +400,7 @@ Steps: ) harmony_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.tools.harmony.report_ipynb), + file(workflow.projectDir + params.getToolParams("harmony").report_ipynb), "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/main_atac.nf b/main_atac.nf index 6a9fead7..fb39692c 100644 --- a/main_atac.nf +++ b/main_atac.nf @@ -29,9 +29,9 @@ workflow cellranger_atac { } from './src/cellranger-atac/main.nf' params(params) CELLRANGER_ATAC( - file(params.tools.cellranger_atac.mkfastq.csv), - file(params.tools.cellranger_atac.mkfastq.runFolder), - file(params.tools.cellranger_atac.count.reference) + file(params.getToolParams("cellranger_atac").mkfastq.csv), + file(params.getToolParams("cellranger_atac").mkfastq.runFolder), + file(params.getToolParams("cellranger_atac").count.reference) ) } diff --git a/src/archr/processes/cell_calling.nf b/src/archr/processes/cell_calling.nf index cdff4674..01f568ca 100644 --- a/src/archr/processes/cell_calling.nf +++ b/src/archr/processes/cell_calling.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/archr/bin/" : "" -toolParams = params.tools.archr +toolParams = params.getToolParams("archr") process SC__ARCHR__CELL_CALLING { diff --git a/src/archr/processes/createArrow_unfiltered.nf b/src/archr/processes/createArrow_unfiltered.nf index c82cac6e..ec57f727 100644 --- a/src/archr/processes/createArrow_unfiltered.nf +++ b/src/archr/processes/createArrow_unfiltered.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/archr/bin/" : "" -toolParams = params.tools.archr +toolParams = params.getToolParams("archr") process SC__ARCHR__CREATE_ARROW_UNFILTERED { diff --git a/src/bap/processes/barcode_multiplet.nf b/src/bap/processes/barcode_multiplet.nf index bf8e4310..08235df4 100644 --- a/src/bap/processes/barcode_multiplet.nf +++ b/src/bap/processes/barcode_multiplet.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.bap +toolParams = params.getToolParams("bap") process SC__BAP__BARCODE_MULTIPLET_PIPELINE { diff --git a/src/bap/processes/biorad_debarcode.nf b/src/bap/processes/biorad_debarcode.nf index 8fc438b2..79ee5873 100644 --- a/src/bap/processes/biorad_debarcode.nf +++ b/src/bap/processes/biorad_debarcode.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.bap +toolParams = params.getToolParams("bap") process SC__BAP__BIORAD_DEBARCODE { diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index 5dad0edb..0fa09639 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -57,7 +57,7 @@ workflow BWA_MAPPING_PE { 1) create a channel linking bwa index files from genome.fa in params, and 2) combine this channel with the items in the data channel */ - bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) + bwa_inputs = get_bwa_index(params.getToolParams("bwamaptools").bwa_fasta).combine(data) bam = SC__BWAMAPTOOLS__BWA_MEM_PE(bwa_inputs) diff --git a/src/bwamaptools/processes/add_barcode_as_tag.nf b/src/bwamaptools/processes/add_barcode_as_tag.nf index 7af22614..50cf5d1d 100644 --- a/src/bwamaptools/processes/add_barcode_as_tag.nf +++ b/src/bwamaptools/processes/add_barcode_as_tag.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.bwamaptools +toolParams = params.getToolParams("bwamaptools") process SC__BWAMAPTOOLS__ADD_BARCODE_TAG { diff --git a/src/bwamaptools/processes/index.nf b/src/bwamaptools/processes/index.nf index 9ae964e9..4e8560e1 100644 --- a/src/bwamaptools/processes/index.nf +++ b/src/bwamaptools/processes/index.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.bwamaptools +toolParams = params.getToolParams("bwamaptools") process SC__BWAMAPTOOLS__INDEX_BAM { diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index c6372ada..2ebe69d9 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.bwamaptools +toolParams = params.getToolParams("bwamaptools") process SC__BWAMAPTOOLS__BWA_MEM_PE { diff --git a/src/bwamaptools/processes/mapping_summary.nf b/src/bwamaptools/processes/mapping_summary.nf index 3517efad..5a0e7031 100644 --- a/src/bwamaptools/processes/mapping_summary.nf +++ b/src/bwamaptools/processes/mapping_summary.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/bwamaptools/bin/" : "" -toolParams = params.tools.bwamaptools +toolParams = params.getToolParams("bwamaptools") process SC__BWAMAPTOOLS__MAPPING_SUMMARY { diff --git a/src/cellranger-atac/processes/count.nf b/src/cellranger-atac/processes/count.nf index 0388b540..2fb1f173 100644 --- a/src/cellranger-atac/processes/count.nf +++ b/src/cellranger-atac/processes/count.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.tools.cellranger_atac +toolParams = params.getToolParams("cellranger_atac") def runCellRangerAtacCount = { id, diff --git a/src/cellranger-atac/processes/mkfastq.nf b/src/cellranger-atac/processes/mkfastq.nf index 8dc49330..d72dd6cd 100644 --- a/src/cellranger-atac/processes/mkfastq.nf +++ b/src/cellranger-atac/processes/mkfastq.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.tools.cellranger_atac +toolParams = params.getToolParams("cellranger_atac") process SC__CELLRANGER_ATAC__MKFASTQ { diff --git a/src/cellranger/processes/count.nf b/src/cellranger/processes/count.nf index ba352b3f..aa8188f2 100644 --- a/src/cellranger/processes/count.nf +++ b/src/cellranger/processes/count.nf @@ -4,7 +4,7 @@ include { isParamNull; } from './../../utils/processes/utils.nf' params(params) -toolParams = params.tools.cellranger +toolParams = params.getToolParams("cellranger") def generateCellRangerCountCommandDefaults = { diff --git a/src/cellranger/processes/mkfastq.nf b/src/cellranger/processes/mkfastq.nf index cd9065f4..192fd087 100644 --- a/src/cellranger/processes/mkfastq.nf +++ b/src/cellranger/processes/mkfastq.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.tools.cellranger +toolParams = params.getToolParams("cellranger") process SC__CELLRANGER__MKFASTQ { diff --git a/src/flybaser/processes/convertNomenclature.nf b/src/flybaser/processes/convertNomenclature.nf index 1ec3e7c3..3e60f8f7 100644 --- a/src/flybaser/processes/convertNomenclature.nf +++ b/src/flybaser/processes/convertNomenclature.nf @@ -8,7 +8,7 @@ if(!params.containsKey("test")) { process FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL { - container params.tools.flybaser.container + container params.getToolParams("flybaser").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -23,7 +23,7 @@ process FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL { path("${sampleId}.FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.flybaser.convert_fbgn_to_gene_symbol) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("flybaser").convert_fbgn_to_gene_symbol) processParams = sampleParams.local """ ${binDir}convertFBgnToGeneSymbol.R \ diff --git a/src/harmony/processes/runHarmony.nf b/src/harmony/processes/runHarmony.nf index 517fd1d8..7d386a8c 100644 --- a/src/harmony/processes/runHarmony.nf +++ b/src/harmony/processes/runHarmony.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/harmony/bin/" process SC__HARMONY__HARMONY_MATRIX { - container params.tools.harmony.container + container params.getToolParams("harmony").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -19,7 +19,7 @@ process SC__HARMONY__HARMONY_MATRIX { path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("harmony")) processParams = sampleParams.local varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ diff --git a/src/harmony/workflows/bec_harmony.nf b/src/harmony/workflows/bec_harmony.nf index f4de29ff..3cdae2dc 100644 --- a/src/harmony/workflows/bec_harmony.nf +++ b/src/harmony/workflows/bec_harmony.nf @@ -131,7 +131,7 @@ workflow BEC_HARMONY { ) harmony_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.tools.harmony.report_ipynb), + file(workflow.projectDir + params.getToolParams("harmony").report_ipynb), "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/pcacv/processes/runPCACV.nf b/src/pcacv/processes/runPCACV.nf index 06141188..93385615 100644 --- a/src/pcacv/processes/runPCACV.nf +++ b/src/pcacv/processes/runPCACV.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pcacv/bin" : process PCACV__FIND_OPTIMAL_NPCS { - container params.tools.pcacv.container + container params.getToolParams("pcacv").container publishDir "${params.global.outdir}/data/pcacv", mode: 'link' label 'compute_resources__pcacv' @@ -27,7 +27,7 @@ process PCACV__FIND_OPTIMAL_NPCS { emit: files script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.pcacv.find_optimal_npcs) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("pcacv").find_optimal_npcs) processParams = sampleParams.local """ export OPENBLAS_NUM_THREADS=1 diff --git a/src/picard/processes/create_sequence_dictionary.nf b/src/picard/processes/create_sequence_dictionary.nf index 69fe92ee..5b6a887d 100644 --- a/src/picard/processes/create_sequence_dictionary.nf +++ b/src/picard/processes/create_sequence_dictionary.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__CREATE_SEQUENCE_DICTIONARY { - container params.tools.picard.container + container params.getToolParams("picard").container publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' label 'compute_resources__default' diff --git a/src/picard/processes/fastq_to_bam.nf b/src/picard/processes/fastq_to_bam.nf index 1e958eb1..7c22d555 100644 --- a/src/picard/processes/fastq_to_bam.nf +++ b/src/picard/processes/fastq_to_bam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__FASTQ_TO_BAM { - container params.tools.picard.container + container params.getToolParams("picard").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/picard/processes/merge_bam_alignment.nf b/src/picard/processes/merge_bam_alignment.nf index 8f43b6f8..3e39c934 100644 --- a/src/picard/processes/merge_bam_alignment.nf +++ b/src/picard/processes/merge_bam_alignment.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__MERGE_BAM_ALIGNMENT { - container params.tools.picard.container + container params.getToolParams("picard").container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -17,7 +17,7 @@ process PICARD__MERGE_BAM_ALIGNMENT { tuple val(sample), path("*.merged.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.picard.merge_bam_alignment) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("picard").merge_bam_alignment) processParams = sampleParams.local """ java -Djava.io.tmpdir=$tmpDir -jar \ diff --git a/src/picard/processes/sam_to_fastq.nf b/src/picard/processes/sam_to_fastq.nf index 98a5448f..b88c6841 100644 --- a/src/picard/processes/sam_to_fastq.nf +++ b/src/picard/processes/sam_to_fastq.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__BAM_TO_FASTQ { - container params.tools.picard.container + container params.getToolParams("picard").container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/picard/processes/sort_sam.nf b/src/picard/processes/sort_sam.nf index f10c3c2c..07a1d2cc 100644 --- a/src/picard/processes/sort_sam.nf +++ b/src/picard/processes/sort_sam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__SORT_SAM { - container params.tools.picard.container + container params.getToolParams("picard").container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process PICARD__SORT_SAM { tuple val(sample), path("*.STAR_aligned_sorted.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.picard.sort_sam) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("picard").sort_sam) processParams = sampleParams.local """ java -Djava.io.tmpdir=$tmpDir -jar \ diff --git a/src/popscle/processes/demuxlet.nf b/src/popscle/processes/demuxlet.nf index 51060956..a195a93e 100644 --- a/src/popscle/processes/demuxlet.nf +++ b/src/popscle/processes/demuxlet.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" process SC__POPSCLE__DEMUXLET { - container params.tools.popscle.container + container params.getToolParams("popscle").container publishDir "${params.global.outdir}/data", mode: 'symlink' label 'compute_resources__cpu' @@ -16,7 +16,7 @@ process SC__POPSCLE__DEMUXLET { tuple val(sampleId), path("${sampleId}_demuxlet*") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.demuxlet) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("popscle").demuxlet) processParams = sampleParams.local """ @@ -30,7 +30,7 @@ process SC__POPSCLE__DEMUXLET { process SC__POPSCLE__FREEMUXLET { - container params.tools.popscle.container + container params.getToolParams("popscle").container publishDir "${params.global.outdir}/data", mode: 'symlink' label 'compute_resources__cpu' @@ -41,7 +41,7 @@ process SC__POPSCLE__FREEMUXLET { tuple val(sampleId), path("${sampleId}_freemuxlet*") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.freemuxlet) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("popscle").freemuxlet) processParams = sampleParams.local """ diff --git a/src/popscle/processes/dsc_pileup.nf b/src/popscle/processes/dsc_pileup.nf index 129837b7..06b917dd 100644 --- a/src/popscle/processes/dsc_pileup.nf +++ b/src/popscle/processes/dsc_pileup.nf @@ -6,7 +6,7 @@ toolParams = params.sc.popscle process SC__POPSCLE__DSC_PILEUP { - container params.tools.popscle.container + container params.getToolParams("popscle").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -29,7 +29,7 @@ process SC__POPSCLE__DSC_PILEUP { process SC__POPSCLE__PREFILTER_DSC_PILEUP { - container params.tools.popscle.container + container params.getToolParams("popscle").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__cpu' diff --git a/src/popscle/workflows/demuxlet.nf b/src/popscle/workflows/demuxlet.nf index d4f4dcc7..237e233c 100644 --- a/src/popscle/workflows/demuxlet.nf +++ b/src/popscle/workflows/demuxlet.nf @@ -113,7 +113,7 @@ workflow DEMUXLET { data main: - vcf = file(params.tools.popscle.vcf) + vcf = file(params.getToolParams("popscle").vcf) DSC_PILEUP_FILTERED(data) SC__POPSCLE__DEMUXLET(DSC_PILEUP_FILTERED.out, vcf) diff --git a/src/popscle/workflows/dsc_pileup.nf b/src/popscle/workflows/dsc_pileup.nf index 49eb6319..71bca29b 100644 --- a/src/popscle/workflows/dsc_pileup.nf +++ b/src/popscle/workflows/dsc_pileup.nf @@ -22,7 +22,7 @@ workflow DSC_PILEUP_FILTERED { data main: - vcf = file(params.tools.popscle.vcf) + vcf = file(params.getToolParams("popscle").vcf) SC__POPSCLE__PREFILTER_DSC_PILEUP(data, vcf) SC__POPSCLE__DSC_PILEUP(SC__POPSCLE__PREFILTER_DSC_PILEUP.out, vcf) diff --git a/src/pycistopic/processes/macs2_call_peaks.nf b/src/pycistopic/processes/macs2_call_peaks.nf index ff43cef2..416d1694 100644 --- a/src/pycistopic/processes/macs2_call_peaks.nf +++ b/src/pycistopic/processes/macs2_call_peaks.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.pycistopic +toolParams = params.getToolParams("pycistopic") process SC__PYCISTOPIC__MACS2_CALL_PEAKS { diff --git a/src/scrublet/processes/doublet_detection.nf b/src/scrublet/processes/doublet_detection.nf index 4dc683a8..796b7185 100644 --- a/src/scrublet/processes/doublet_detection.nf +++ b/src/scrublet/processes/doublet_detection.nf @@ -47,7 +47,7 @@ def SC__SCRUBLET__DOUBLET_DETECTION_PARAMS(params) { process SC__SCRUBLET__DOUBLET_DETECTION { - container params.tools.scrublet.container + container params.getToolParams("scrublet").container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -68,7 +68,7 @@ process SC__SCRUBLET__DOUBLET_DETECTION { val(nPrinComps) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scrublet.doublet_detection) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scrublet").doublet_detection) processParams = sampleParams.local def _processParams = new SC__SCRUBLET__DOUBLET_DETECTION_PARAMS() _processParams.setEnv(this) diff --git a/src/scrublet/processes/reports.nf b/src/scrublet/processes/reports.nf index 67ca9b9d..7a6bf165 100644 --- a/src/scrublet/processes/reports.nf +++ b/src/scrublet/processes/reports.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__SCRUBLET__DOUBLET_DETECTION_REPORT { - container params.tools.scrublet.container + container params.getToolParams("scrublet").container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' diff --git a/src/scrublet/workflows/doublet_removal.nf b/src/scrublet/workflows/doublet_removal.nf index 1f6dfd21..85be2fdc 100644 --- a/src/scrublet/workflows/doublet_removal.nf +++ b/src/scrublet/workflows/doublet_removal.nf @@ -86,7 +86,7 @@ workflow DOUBLET_REMOVAL { SC__SCRUBLET__DOUBLET_DETECTION_REPORT( - file(workflow.projectDir + params.tools.scrublet.doublet_detection.report_ipynb), + file(workflow.projectDir + params.getToolParams("scrublet").doublet_detection.report_ipynb), SC__SCRUBLET__DOUBLET_DETECTION.out.map { // Extract the Scrublet object file it -> tuple(it[0], it[2]) diff --git a/src/singlecelltoolkit/processes/barcode_correction.nf b/src/singlecelltoolkit/processes/barcode_correction.nf index 4fe9d9ac..08e14098 100644 --- a/src/singlecelltoolkit/processes/barcode_correction.nf +++ b/src/singlecelltoolkit/processes/barcode_correction.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" -toolParams = params.tools.singlecelltoolkit +toolParams = params.getToolParams("singlecelltoolkit") process SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION { diff --git a/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf index faf59338..246c0afc 100644 --- a/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" -toolParams = params.tools.singlecelltoolkit +toolParams = params.getToolParams("singlecelltoolkit") process SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ { diff --git a/src/sinto/processes/fragments.nf b/src/sinto/processes/fragments.nf index 810c694d..09d33b9f 100644 --- a/src/sinto/processes/fragments.nf +++ b/src/sinto/processes/fragments.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.sinto +toolParams = params.getToolParams("sinto") process SC__SINTO__FRAGMENTS { diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index 24abc905..17dabfe7 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -6,7 +6,7 @@ if(!params.containsKey("test")) { binDir = "" } -toolParams = params.tools.sratoolkit +toolParams = params.getToolParams("sratoolkit") process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { diff --git a/src/star/main.nf b/src/star/main.nf index cb129d74..d0632204 100644 --- a/src/star/main.nf +++ b/src/star/main.nf @@ -22,9 +22,9 @@ include { workflow star { main: - SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.transcriptome) ) - SC__STAR__MAP_COUNT( file(params.tools.star.map_count.transcriptome), SC__STAR__LOAD_GENOME.out, path(params.tools.star.map_count.fastqs) ) - SC__STAR__UNLOAD_GENOME( file(params.tools.star.map_count.transcriptome), SC__STAR__MAP_COUNT.out[0] ) + SC__STAR__LOAD_GENOME( file(params.getToolParams("star").map_count.transcriptome) ) + SC__STAR__MAP_COUNT( file(params.getToolParams("star").map_count.transcriptome), SC__STAR__LOAD_GENOME.out, path(params.getToolParams("star").map_count.fastqs) ) + SC__STAR__UNLOAD_GENOME( file(params.getToolParams("star").map_count.transcriptome), SC__STAR__MAP_COUNT.out[0] ) emit: SC__STAR__MAP_COUNT.out diff --git a/src/star/processes/build_genome.nf b/src/star/processes/build_genome.nf index eb4532ce..070b4da7 100644 --- a/src/star/processes/build_genome.nf +++ b/src/star/processes/build_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__BUILD_INDEX { - container params.tools.star.container + container params.getToolParams("star").container label 'compute_resources__star_build_genome' input: @@ -13,7 +13,7 @@ process SC__STAR__BUILD_INDEX { file("STAR_index") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star.build_genome) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("star").build_genome) processParams = sampleParams.local """ mkdir STAR_index diff --git a/src/star/processes/load_genome.nf b/src/star/processes/load_genome.nf index 2b78a38b..0e40b910 100644 --- a/src/star/processes/load_genome.nf +++ b/src/star/processes/load_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__LOAD_GENOME { - container params.tools.star.container + container params.getToolParams("star").container label 'compute_resources__default' input: diff --git a/src/star/processes/map_count.nf b/src/star/processes/map_count.nf index f3a3edc1..ee662a56 100644 --- a/src/star/processes/map_count.nf +++ b/src/star/processes/map_count.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__MAP_COUNT { - container params.tools.star.container + container params.getToolParams("star").container label 'compute_resources__star_map_count' input: @@ -16,7 +16,7 @@ process SC__STAR__MAP_COUNT { tuple val(sample), path("*.STAR_Aligned.sortedByCoord.out.bam"), emit: bam script: - def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star.map_count) + def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("star").map_count) processParams = sampleParams.local success = true """ diff --git a/src/star/processes/solo_map_count.nf b/src/star/processes/solo_map_count.nf index 2b76d67d..d3171ddc 100644 --- a/src/star/processes/solo_map_count.nf +++ b/src/star/processes/solo_map_count.nf @@ -1,7 +1,7 @@ nextflow.enable.dsl=2 process SC__STAR__SOLO_MAP_COUNT { - container params.tools.star.container + container params.getToolParams("star").container label 'compute_resources__star_map_count' input: @@ -24,10 +24,10 @@ process SC__STAR__SOLO_MAP_COUNT { --soloType Droplet \ --genomeDir ${transcriptome} \ --runThreadN ${task.cpus} \ - ${(params.tools.star.map_count.containsKey('limitBAMsortRAM')) ? '--limitBAMsortRAM ' + params.tools.star.map_count.limitBAMsortRAM: ''} \ - ${(params.tools.star.map_count.containsKey('outSAMtype')) ? '--outSAMtype ' + params.tools.star.map_count.outSAMtype: ''} \ - ${(params.tools.star.map_count.containsKey('quantMode')) ? '--quantMode ' + params.tools.star.map_count.quantMode: ''} \ - ${(params.tools.star.map_count.containsKey('outReadsUnmapped')) ? '--outReadsUnmapped ' + params.tools.star.map_count.outReadsUnmapped: ''} \ + ${(params.getToolParams("star").map_count.containsKey('limitBAMsortRAM')) ? '--limitBAMsortRAM ' + params.getToolParams("star").map_count.limitBAMsortRAM: ''} \ + ${(params.getToolParams("star").map_count.containsKey('outSAMtype')) ? '--outSAMtype ' + params.getToolParams("star").map_count.outSAMtype: ''} \ + ${(params.getToolParams("star").map_count.containsKey('quantMode')) ? '--quantMode ' + params.getToolParams("star").map_count.quantMode: ''} \ + ${(params.getToolParams("star").map_count.containsKey('outReadsUnmapped')) ? '--outReadsUnmapped ' + params.getToolParams("star").map_count.outReadsUnmapped: ''} \ --readFilesIn ${fastqs} \ ${(fastqs.name.endsWith(".gz")) ? '--readFilesCommand zcat' : ''} \ --outFileNamePrefix ${_sampleName} diff --git a/src/star/processes/unload_genome.nf b/src/star/processes/unload_genome.nf index afae2bd3..26bd7950 100644 --- a/src/star/processes/unload_genome.nf +++ b/src/star/processes/unload_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__UNLOAD_GENOME { - container params.tools.star.container + container params.getToolParams("star").container label 'compute_resources__default' input: diff --git a/src/trimgalore/processes/trim.nf b/src/trimgalore/processes/trim.nf index 02e9a9f9..4401103c 100644 --- a/src/trimgalore/processes/trim.nf +++ b/src/trimgalore/processes/trim.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.tools.trimgalore +toolParams = params.getToolParams("trimgalore") process SC__TRIMGALORE__TRIM { diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 631db186..ffddbf81 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -143,7 +143,7 @@ def runRConverter = { def getConverterContainer = { params, type -> switch(type) { case "cistopic": - return params.tools.cistopic.container + return params.getToolParams("cistopic").container case "r": return "vibsinglecellnf/scconverter:0.0.1" break; diff --git a/workflows/nemesh.nf b/workflows/nemesh.nf index d880898e..85fdab09 100644 --- a/workflows/nemesh.nf +++ b/workflows/nemesh.nf @@ -84,11 +84,11 @@ workflow nemesh { data.subscribe { println it } // Check if custom selected barcodes file has been specified - if (params.tools.nemesh.custom_selected_barcodes) { + if (params.getToolParams("nemesh").custom_selected_barcodes) { Channel - .fromPath(params.tools.nemesh.custom_selected_barcodes) + .fromPath(params.getToolParams("nemesh").custom_selected_barcodes) .map { - path -> tuple(path.baseName.split('\\.')[0], params.tools.nemesh.custom_selected_barcodes, path) + path -> tuple(path.baseName.split('\\.')[0], params.getToolParams("nemesh").custom_selected_barcodes, path) } .set { selectedBarcodesByCustom } selectedBarcodesByCustom.subscribe { println it } @@ -138,7 +138,7 @@ workflow nemesh { a = FINAL_BAM.combine(SC__DROPLET_UTILS__BARCODE_SELECTION.out.selectedCellBarcodesByKnee, by: 0) b = FINAL_BAM.combine(SC__DROPLET_UTILS__BARCODE_SELECTION.out.selectedCellBarcodesByInflection, by: 0) - if (params.tools.nemesh.custom_selected_barcodes) { + if (params.getToolParams("nemesh").custom_selected_barcodes) { c = FINAL_BAM.combine(selectedBarcodesByCustom, by: 0) SC__DROP_SEQ_TOOLS__DIGITAL_EXPRESSION( a.mix(b,c) diff --git a/workflows/star.nf b/workflows/star.nf index 6e9cb66b..81c972f9 100644 --- a/workflows/star.nf +++ b/workflows/star.nf @@ -29,14 +29,14 @@ include { workflow star { main: - SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.index) ) + SC__STAR__LOAD_GENOME( file(params.getToolParams("star").map_count.index) ) SC__STAR__MAP_COUNT( - file(params.tools.star.map_count.index), + file(params.getToolParams("star").map_count.index), SC__STAR__LOAD_GENOME.out, - getSingleEndChannel(params.tools.star.map_count.fastqs) + getSingleEndChannel(params.getToolParams("star").map_count.fastqs) ) SC__STAR__UNLOAD_GENOME( - file(params.tools.star.map_count.index), + file(params.getToolParams("star").map_count.index), SC__STAR__MAP_COUNT.out.isDone.collect() ) SC__STAR_CONCATENATOR( SC__STAR__MAP_COUNT.out.counts.map { it[1] }.collect() ) From b64291d920f8b9826846acef09e9c10ebff50fec Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 16:32:45 +0100 Subject: [PATCH 165/202] Other fixes: params.tools left in scrublet and decontx plus add fix to avoid Variable `params` already defined in the process scope --- src/celda/conf/decontx_filter.config | 8 ++++---- src/scanpy/workflows/bec_bbknn.nf | 8 ++++++-- src/scanpy/workflows/bec_mnncorrect.nf | 10 +++++++--- src/scrublet/scrublet.config | 8 ++++---- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 2452ba3e..68df40e8 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,9 +1,9 @@ includeConfig '../../utils/conf/cell_annotate.config' -params.tools.celda.decontx.cell_annotate = params.sc.cell_annotate -params.tools.remove('cell_annotate') +params.sc.celda.decontx.cell_annotate = params.sc.cell_annotate +params.sc.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' -params.tools.celda.decontx.cell_filter = params.sc.cell_filter -params.tools.remove('cell_filter') +params.sc.celda.decontx.cell_filter = params.sc.cell_filter +params.sc.remove('cell_filter') includeConfig './decontx_filter_defaults.config' params { diff --git a/src/scanpy/workflows/bec_bbknn.nf b/src/scanpy/workflows/bec_bbknn.nf index 68eb7dbc..fb1daede 100644 --- a/src/scanpy/workflows/bec_bbknn.nf +++ b/src/scanpy/workflows/bec_bbknn.nf @@ -57,6 +57,10 @@ workflow BEC_BBKNN { clusterIdentificationPreBatchEffectCorrection main: + + // To avoid Variable `params` already defined in the process scope + def scanpyParams = params.getToolParams("scanpy") + SC__SCANPY__BATCH_EFFECT_CORRECTION( dimReductionData.map { it -> tuple(it[0], it[1], it[2]) @@ -71,7 +75,7 @@ workflow BEC_BBKNN { ) // Define the parameters for dimensionality reduction - def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.getToolParams("scanpy").dim_reduction.umap) ) + def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(scanpyParams.dim_reduction.umap) ) SC__SCANPY__DIM_REDUCTION__UMAP( SC__SCANPY__BATCH_EFFECT_CORRECTION.out.combine( dimRedParams.$() @@ -87,7 +91,7 @@ workflow BEC_BBKNN { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, SC__SCANPY__DIM_REDUCTION__UMAP.out, diff --git a/src/scanpy/workflows/bec_mnncorrect.nf b/src/scanpy/workflows/bec_mnncorrect.nf index 88338a1b..0dfa9d93 100644 --- a/src/scanpy/workflows/bec_mnncorrect.nf +++ b/src/scanpy/workflows/bec_mnncorrect.nf @@ -63,7 +63,11 @@ workflow BEC_MNNCORRECT { clusterIdentificationPreBatchEffectCorrection main: - out = params.getToolParams("scanpy").containsKey("regress_out") + + // To avoid Variable `params` already defined in the process scope + def scanpyParams = params.getToolParams("scanpy") + + out = scanpyParams.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( hvg ) : data SC__SCANPY__BATCH_EFFECT_CORRECTION( out.map { @@ -99,7 +103,7 @@ workflow BEC_MNNCORRECT { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, @@ -136,7 +140,7 @@ workflow BEC_MNNCORRECT { mnncorrect_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.getToolParams("scanpy").batch_effect_correct.report_ipynb), + file(workflow.projectDir + scanpyParams.batch_effect_correct.report_ipynb), "SC_BEC_MNNCORRECT_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index f878011a..69d574fb 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.tools.scrublet.cell_annotate = params.sc.cell_annotate -params.tools.remove('cell_annotate') +params.sc.scrublet.cell_annotate = params.sc.cell_annotate +params.sc.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.tools.scrublet.cell_filter = params.sc.cell_filter -params.tools.remove('cell_filter') +params.sc.scrublet.cell_filter = params.sc.cell_filter +params.sc.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' From e8b377d1c064f1c2a2fe84aa97c02d26e0c968fe Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 17:31:35 +0100 Subject: [PATCH 166/202] Move to new config structure: from params.sc to params.tools. Decided to not go for backward compatibility hence this will be a breaking change i.e.: older configs will very likely not work. --- conf/generic.config | 62 --------------- conf/nemesh.config | 2 +- conf/test.config | 2 +- conf/test__bbknn.config | 2 +- conf/test__bbknn_scenic.config | 2 +- conf/test__cell_annotate_filter.config | 2 +- conf/test__decontx.config | 2 +- conf/test__harmony.config | 2 +- conf/test__mnncorrect.config | 2 +- conf/test__scenic.config | 2 +- conf/test__scenic_multiruns.config | 2 +- conf/test__single_sample.config | 2 +- ...test__single_sample_decontx_correct.config | 2 +- ...gle_sample_decontx_correct_scrublet.config | 2 +- .../test__single_sample_decontx_filter.config | 2 +- ...st__single_sample_param_exploration.config | 2 +- conf/test__single_sample_scenic.config | 2 +- ...est__single_sample_scenic_multiruns.config | 2 +- conf/test__single_sample_scrublet.config | 2 +- conf/test_disabled.config | 2 +- docs/development.rst | 26 +++---- docs/features.rst | 16 ++-- docs/pipelines.rst | 6 +- main.nf | 74 ++++++++---------- main_atac.nf | 6 +- src/archr/archr.config | 2 +- src/archr/processes/cell_calling.nf | 2 +- src/archr/processes/createArrow_unfiltered.nf | 2 +- src/bap/bap.config | 2 +- src/bap/processes/barcode_multiplet.nf | 2 +- src/bap/processes/biorad_debarcode.nf | 2 +- src/bwamaptools/bwamaptools.config | 4 +- src/bwamaptools/main.nf | 2 +- .../processes/add_barcode_as_tag.nf | 2 +- src/bwamaptools/processes/index.nf | 2 +- src/bwamaptools/processes/mapping.nf | 2 +- src/bwamaptools/processes/mapping_summary.nf | 2 +- src/celda/conf/base.config | 2 +- src/celda/conf/decontx_correct.config | 2 +- src/celda/conf/decontx_filter.config | 10 +-- src/celda/conf/decontx_filter_defaults.config | 2 +- src/celda/main.nf | 10 +-- src/celda/processes/runDecontX.nf | 4 +- src/cellranger-atac/conf/base.config | 2 +- src/cellranger-atac/conf/count.config | 2 +- .../conf/count_metadata.config | 2 +- src/cellranger-atac/conf/mkfastq.config | 2 +- src/cellranger-atac/processes/count.nf | 2 +- src/cellranger-atac/processes/mkfastq.nf | 2 +- src/cellranger/conf/base.config | 2 +- .../conf/cellranger_libraries.config | 2 +- src/cellranger/conf/count.config | 2 +- src/cellranger/conf/count_libraries.config | 2 +- src/cellranger/conf/count_metadata.config | 2 +- src/cellranger/conf/mkfastq.config | 2 +- src/cellranger/main.nf | 4 +- src/cellranger/processes/count.nf | 6 +- src/cellranger/processes/mkfastq.nf | 2 +- .../workflows/cellranger_libraries.nf | 4 +- src/channels/channels.nf | 4 +- src/channels/conf/bam.config | 2 +- src/channels/conf/csv.config | 2 +- src/channels/conf/fragments.config | 2 +- src/channels/conf/h5ad.config | 2 +- src/channels/conf/loom.config | 2 +- src/channels/conf/seurat_rds.config | 2 +- .../conf/tenx_atac_cellranger_mex.config | 2 +- src/channels/conf/tsv.config | 2 +- src/directs/conf/base.config | 2 +- .../test__select_default_clustering.config | 2 +- src/directs/main.test.nf | 2 +- .../processes/selectDefaultClustering.nf | 4 +- src/dropletutils/dropletutils.config | 2 +- .../processes/barcode_selection.nf | 2 +- src/dropseqtools/dropseqtools.config | 4 +- .../processes/bam_tag_histogram.nf | 4 +- .../processes/convert_to_ref_flat.nf | 2 +- .../processes/detect_bead_synthesis_errors.nf | 4 +- .../processes/digital_expression.nf | 2 +- src/dropseqtools/processes/filter_bam.nf | 4 +- src/dropseqtools/processes/gzip.nf | 2 +- src/dropseqtools/processes/polya_trimmer.nf | 4 +- .../tag_bam_with_read_sequence_extended.nf | 6 +- .../processes/tag_read_with_gene_exon.nf | 2 +- .../processes/trim_starting_sequence.nf | 4 +- src/edirect/edirect.config | 2 +- src/fastp/fastp.config | 2 +- src/fastp/processes/clean_and_fastqc.nf | 2 +- src/flybaser/flybaser.config | 2 +- src/flybaser/processes/convertNomenclature.nf | 4 +- src/harmony/harmony.config | 2 +- src/harmony/processes/runHarmony.nf | 4 +- src/harmony/workflows/bec_harmony.nf | 4 +- src/pcacv/pcacv.config | 2 +- src/pcacv/processes/runPCACV.nf | 4 +- .../processes/create_sequence_dictionary.nf | 2 +- src/picard/processes/fastq_to_bam.nf | 2 +- src/picard/processes/merge_bam_alignment.nf | 4 +- src/picard/processes/sam_to_fastq.nf | 2 +- src/picard/processes/sort_sam.nf | 4 +- src/popscle/popscle.config | 2 +- src/popscle/processes/demuxlet.nf | 8 +- src/popscle/processes/dsc_pileup.nf | 4 +- src/popscle/workflows/demuxlet.nf | 2 +- src/popscle/workflows/dsc_pileup.nf | 2 +- src/pycistopic/processes/macs2_call_peaks.nf | 2 +- src/pycistopic/pycistopic.config | 2 +- src/scanpy/conf/base.config | 2 +- src/scanpy/conf/bbknn.config | 2 +- src/scanpy/conf/data_transformation.config | 2 +- src/scanpy/conf/filter.config | 2 +- src/scanpy/conf/min.config | 2 +- src/scanpy/conf/mnncorrect.config | 2 +- src/scanpy/conf/normalization.config | 2 +- src/scanpy/conf/regress_out.config | 2 +- src/scanpy/processes/batch_effect_correct.nf | 4 +- src/scanpy/processes/cluster.nf | 12 +-- src/scanpy/processes/dim_reduction.nf | 4 +- src/scanpy/processes/feature_selection.nf | 8 +- src/scanpy/processes/filter.nf | 12 +-- src/scanpy/processes/marker_genes.nf | 8 +- src/scanpy/processes/neighborhood_graph.nf | 4 +- src/scanpy/processes/regress_out.nf | 4 +- src/scanpy/processes/reports.nf | 16 ++-- src/scanpy/processes/transform.nf | 12 +-- src/scanpy/workflows/bec_bbknn.nf | 4 +- src/scanpy/workflows/bec_mnncorrect.nf | 2 +- .../workflows/cluster_identification.nf | 6 +- src/scanpy/workflows/combine_reports.nf | 2 +- src/scanpy/workflows/dim_reduction.nf | 4 +- src/scanpy/workflows/hvg_selection.nf | 4 +- src/scanpy/workflows/qc_filter.nf | 2 +- src/scanpy/workflows/single_sample.nf | 4 +- src/scenic/conf/append.config | 2 +- src/scenic/conf/min/aucell.config | 2 +- src/scenic/conf/min/base/v0.0.1.config | 10 +-- src/scenic/conf/min/cistarget.config | 10 +-- .../cistarget-motifs-fly-dm6-v0.0.1.config | 2 +- .../cistarget-motifs-human-hg19-v0.0.1.config | 2 +- .../cistarget-motifs-human-hg38-v0.0.1.config | 2 +- .../cistarget-motifs-mouse-mm10-v0.0.1.config | 2 +- .../cistarget-tracks-fly-dm6-v0.0.1.config | 2 +- .../cistarget-tracks-human-hg19-v0.0.1.config | 2 +- .../cistarget-tracks-human-hg38-v0.0.1.config | 2 +- src/scenic/conf/min/grn.config | 8 +- src/scenic/conf/min/labels.config | 4 +- src/scenic/conf/min/scenic.config | 2 +- src/scenic/conf/min/scope.config | 2 +- src/scenic/conf/min/tfs/fly-v0.0.1.config | 2 +- src/scenic/conf/min/tfs/human-v0.0.1.config | 2 +- src/scenic/conf/min/tfs/mouse-v0.0.1.config | 2 +- src/scenic/conf/multi_runs.config | 2 +- src/scenic/conf/test.config | 2 +- src/scenic/conf/test_multi_runs.config | 2 +- src/scenic/main.nf | 40 +++++----- src/scenic/main.test.nf | 76 +++++++++---------- src/scenic/processes/add_correlation.nf | 2 +- .../arboreto_with_multiprocessing.nf | 2 +- src/scenic/processes/aucell.nf | 2 +- src/scenic/processes/cistarget.nf | 2 +- src/scenic/processes/loomHandler.nf | 2 +- .../processes/multiruns/aggregateFeatures.nf | 2 +- .../processes/multiruns/aggregateRegulons.nf | 2 +- .../processes/multiruns/aucellFromFolder.nf | 2 +- .../multiruns/convertMotifsToRegulons.nf | 2 +- src/scenic/processes/multiruns/saveToLoom.nf | 2 +- src/scenic/processes/reports.nf | 2 +- src/scenic/scenic.config | 4 +- src/scrublet/bin/sc_doublet_detection.py | 2 +- src/scrublet/conf/base.config | 2 +- src/scrublet/conf/scrublet_defaults.conf | 2 +- src/scrublet/processes/doublet_detection.nf | 4 +- src/scrublet/processes/reports.nf | 2 +- src/scrublet/scrublet.config | 8 +- src/scrublet/workflows/doublet_removal.nf | 2 +- .../processes/barcode_correction.nf | 2 +- .../processes/debarcode_10x_scatac_fastqs.nf | 2 +- .../singlecelltoolkit.config | 2 +- src/sinto/processes/fragments.nf | 2 +- src/sinto/sinto.config | 2 +- src/soupx/conf/base.config | 2 +- src/soupx/conf/soupx_correct.config | 2 +- src/soupx/main.nf | 2 +- src/sratoolkit/processes/downloadFastQ.nf | 2 +- src/sratoolkit/sratoolkit.config | 2 +- src/star/main.nf | 6 +- src/star/processes/build_genome.nf | 4 +- src/star/processes/load_genome.nf | 2 +- src/star/processes/map_count.nf | 4 +- src/star/processes/solo_map_count.nf | 10 +-- src/star/processes/unload_genome.nf | 2 +- src/star/star.config | 2 +- src/trimgalore/processes/trim.nf | 2 +- src/trimgalore/trimgalore.config | 2 +- src/utils/README.md | 4 +- src/utils/conf/base.config | 2 +- src/utils/conf/cell_annotate.config | 2 +- src/utils/conf/cell_filter.config | 2 +- src/utils/conf/h5ad_clean.config | 2 +- src/utils/conf/h5ad_concatenate.config | 2 +- src/utils/conf/sample_annotate.config | 2 +- src/utils/conf/sample_annotate_old_v1.config | 2 +- src/utils/conf/scope.config | 2 +- src/utils/conf/test.config | 2 +- src/utils/main.test.nf | 12 +-- src/utils/processes/h5adAnnotate.nf | 8 +- src/utils/processes/h5adExtractMetadata.nf | 4 +- src/utils/processes/h5adMerge.nf | 2 +- src/utils/processes/h5adSubset.nf | 10 +-- src/utils/processes/h5adToLoom.nf | 4 +- src/utils/processes/h5adUpdate.nf | 8 +- src/utils/processes/h5adUpdateMetadata.nf | 4 +- src/utils/processes/utils.nf | 24 +++--- src/utils/workflows/annotateByCellMetadata.nf | 6 +- src/utils/workflows/filterAnnotateClean.nf | 12 +-- src/utils/workflows/filterByCellMetadata.nf | 6 +- workflows/bbknn.nf | 10 +-- workflows/harmony.nf | 10 +-- workflows/mnncorrect.nf | 10 +-- workflows/multi_sample.nf | 10 +-- workflows/nemesh.nf | 8 +- workflows/single_sample.nf | 4 +- workflows/single_sample_star.nf | 10 +-- workflows/star.nf | 8 +- 224 files changed, 497 insertions(+), 567 deletions(-) diff --git a/conf/generic.config b/conf/generic.config index 707df5e0..71c4dfce 100644 --- a/conf/generic.config +++ b/conf/generic.config @@ -24,66 +24,4 @@ params { } return [global: paramsGlobal, local: pL] } - getToolParams = { toolKey -> - def _get = { p -> - if(p.containsKey("tools")) { // weirdly p?.tools gives a WARN (same for the other if statements) - return p.tools[toolKey] - } - if(p.containsKey("sc")) { - return p.sc[toolKey] - } - if(p.containsKey("toolKey")) { - return p[toolKey] - } - throw new Exception("VSN ERROR: Cannot get tool params from NXF params.") - } - if(!toolKey.contains(".")) { - return _get(params) - } - def entry = null - if(params.containsKey("tools")) { - entry = params.tools - } else if(params.containsKey("sc")) { - entry = params.sc - } else if(params.containsKey("sc")) { - entry = params - } else { - throw new Exception("VSN ERROR: Missing params..") - } - - toolKey.split('\\.').each { entry = entry?.get(it) } - return entry - } - hasToolParams = { toolKey -> - if(params.containsKey("tools")) { - return params.tools.containsKey(toolKey) - } - if(params.containsKey("sc")) { - return params.sc.containsKey(toolKey) - } - if(params.containsKey("toolKey")) { - return params.containsKey(toolKey) - } - return false - } - hasUtilsParams = { utilityKey -> - if(params.utils.containsKey(utilityKey)) { - return true - } - // backward-compatible - if(params.containsKey("sc")) { - return params.sc.containsKey(utilityKey) - } - return false - } - getUtilsParams = { utilityKey -> - if(params.utils.containsKey(utilityKey)) { - return params.utils[utilityKey] - } - // backward-compatible - if(params.containsKey("sc")) { - return params.sc[utilityKey] - } - throw new Exception("Cannot find utility " + utilityKey + " in Nextflow config.") - } } diff --git a/conf/nemesh.config b/conf/nemesh.config index ff29c2c8..0e724150 100644 --- a/conf/nemesh.config +++ b/conf/nemesh.config @@ -7,7 +7,7 @@ params { qsubaccount = '' } - sc { + tools { nemesh { // User can extract custom cell barcodes by providing it with a TSV containing all the barcodes // custom_selected_barcodes = '' diff --git a/conf/test.config b/conf/test.config index 8c00d078..f28e01eb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,5 +1,5 @@ params { - misc { + mitools { test { enabled = true } diff --git a/conf/test__bbknn.config b/conf/test__bbknn.config index c3494f6b..f1cc88dd 100644 --- a/conf/test__bbknn.config +++ b/conf/test__bbknn.config @@ -8,7 +8,7 @@ params { cellranger_mex = "testdata/*/outs/" } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__bbknn_scenic.config b/conf/test__bbknn_scenic.config index fb4f2ede..7cfa1bf4 100644 --- a/conf/test__bbknn_scenic.config +++ b/conf/test__bbknn_scenic.config @@ -8,7 +8,7 @@ params { cellranger_mex = "testdata/*/outs/" } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__cell_annotate_filter.config b/conf/test__cell_annotate_filter.config index fb9b85f1..7ce386fe 100644 --- a/conf/test__cell_annotate_filter.config +++ b/conf/test__cell_annotate_filter.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_converter { off = 'h5ad' tagCellWithSampleId = false diff --git a/conf/test__decontx.config b/conf/test__decontx.config index bd1b66d6..55e63711 100644 --- a/conf/test__decontx.config +++ b/conf/test__decontx.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__harmony.config b/conf/test__harmony.config index b8049771..ea3562da 100644 --- a/conf/test__harmony.config +++ b/conf/test__harmony.config @@ -8,7 +8,7 @@ params { cellranger_mex = "testdata/*/outs/" } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__mnncorrect.config b/conf/test__mnncorrect.config index bac8f9a1..327f41c7 100644 --- a/conf/test__mnncorrect.config +++ b/conf/test__mnncorrect.config @@ -8,7 +8,7 @@ params { cellranger_mex = "testdata/*/outs/" } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__scenic.config b/conf/test__scenic.config index f21ce3c8..d35d0cfb 100644 --- a/conf/test__scenic.config +++ b/conf/test__scenic.config @@ -3,7 +3,7 @@ params { global { project_name = 'scenic_CI' } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__scenic_multiruns.config b/conf/test__scenic_multiruns.config index 366f1dec..fa59bb26 100644 --- a/conf/test__scenic_multiruns.config +++ b/conf/test__scenic_multiruns.config @@ -3,7 +3,7 @@ params { global { project_name = 'scenic_multiruns_CI' } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample.config b/conf/test__single_sample.config index 14420fe4..5ca6efd4 100644 --- a/conf/test__single_sample.config +++ b/conf/test__single_sample.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_decontx_correct.config b/conf/test__single_sample_decontx_correct.config index 408c6a05..7b730bf6 100644 --- a/conf/test__single_sample_decontx_correct.config +++ b/conf/test__single_sample_decontx_correct.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_decontx_correct_scrublet.config b/conf/test__single_sample_decontx_correct_scrublet.config index 26a5b412..27fa6007 100644 --- a/conf/test__single_sample_decontx_correct_scrublet.config +++ b/conf/test__single_sample_decontx_correct_scrublet.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_decontx_filter.config b/conf/test__single_sample_decontx_filter.config index 826c8b60..ad59c4a7 100644 --- a/conf/test__single_sample_decontx_filter.config +++ b/conf/test__single_sample_decontx_filter.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_param_exploration.config b/conf/test__single_sample_param_exploration.config index 82e51612..4c2212d9 100644 --- a/conf/test__single_sample_param_exploration.config +++ b/conf/test__single_sample_param_exploration.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_scenic.config b/conf/test__single_sample_scenic.config index b3d85420..56f8bc6e 100644 --- a/conf/test__single_sample_scenic.config +++ b/conf/test__single_sample_scenic.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_scenic_multiruns.config b/conf/test__single_sample_scenic_multiruns.config index faeb6dee..e2102f76 100644 --- a/conf/test__single_sample_scenic_multiruns.config +++ b/conf/test__single_sample_scenic_multiruns.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test__single_sample_scrublet.config b/conf/test__single_sample_scrublet.config index 596722e0..c30a3c63 100644 --- a/conf/test__single_sample_scrublet.config +++ b/conf/test__single_sample_scrublet.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - sc { + tools { file_annotator { metadataFilePath = '' } diff --git a/conf/test_disabled.config b/conf/test_disabled.config index 76ebe776..9c619278 100644 --- a/conf/test_disabled.config +++ b/conf/test_disabled.config @@ -1,5 +1,5 @@ params { - misc { + mitools { test { enabled = false } diff --git a/docs/development.rst b/docs/development.rst index ee6ab40c..88632a08 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -92,7 +92,7 @@ Steps: .. code:: groovy params { - sc { + tools { harmony { container = 'vibsinglecellnf/harmony:1.0' report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/harmony/bin/reports/sc_harmony_report.ipynb" @@ -239,7 +239,7 @@ Steps: process SC__HARMONY__HARMONY_MATRIX { - container params.getToolParams("harmony").container + container params.tools.harmony.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' clusterOptions "-l nodes=1:ppn=${params.global.threads} -l walltime=1:00:00 -A ${params.global.qsubaccount}" @@ -250,7 +250,7 @@ Steps: tuple val(sampleId), path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("harmony")) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) processParams = sampleParams.local varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ @@ -364,7 +364,7 @@ Steps: // Run clustering // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, @@ -400,7 +400,7 @@ Steps: ) harmony_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.getToolParams("harmony").report_ipynb), + file(workflow.projectDir + params.tools.harmony.report_ipynb), "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) @@ -490,10 +490,10 @@ Steps: SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - if(params.getToolParams("scanpy").containsKey("filter")) { + if(params.tools.scanpy.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.getUtilsParams("file_concatenator")) { + if(params.utils.file_concatenator) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -502,7 +502,7 @@ Steps: ) ) } - if(params.getToolParams("scanpy").containsKey("data_transformation") && params.getToolParams("scanpy").containsKey("normalization")) { + if(params.tools.scanpy.containsKey("data_transformation") && params.tools.scanpy.containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) @@ -527,7 +527,7 @@ Steps: // Conversion // Convert h5ad to X (here we choose: loom format) - if(params.hasUtilsParams("file_concatenator")) { + if(params.utils?.file_concatenator) { filteredloom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONCATENATOR.out ) scopeloom = FILE_CONVERTER( BEC_HARMONY.out.data.groupTuple(), @@ -552,7 +552,7 @@ Steps: // Collect the reports: // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) // Pairing clustering reports with bec reports if(!clusteringParams.isParameterExplorationModeOn()) { clusteringBECReports = BEC_HARMONY.out.cluster_report.map { @@ -722,8 +722,8 @@ Workflows import multiple processes and define the workflow by name: workflow CELLRANGER { main: - SC__CELLRANGER__MKFASTQ(file(params.getToolParams("cellranger").mkfastq.csv), path(params.getToolParams("cellranger").mkfastq.runFolder)) - SC__CELLRANGER__COUNT(file(params.getToolParams("cellranger").count.transcriptome), SC__CELLRANGER__MKFASTQ.out.flatten()) + SC__CELLRANGER__MKFASTQ(file(params.tools.cellranger.mkfastq.csv), path(params.tools.cellranger.mkfastq.runFolder)) + SC__CELLRANGER__COUNT(file(params.tools.cellranger.count.transcriptome), SC__CELLRANGER__MKFASTQ.out.flatten()) emit: SC__CELLRANGER__COUNT.out @@ -775,7 +775,7 @@ The parameter structure internally (post-merge) is: project_name = "MCF7" ... } - sc { + tools { utils { file_converter { ... diff --git a/docs/features.rst b/docs/features.rst index 326114a1..f2417512 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -51,7 +51,7 @@ This will add a different scenic entry in the config: .. code:: bash params { - sc { + tools { scenic { container = 'vibsinglecellnf/scenic:0.9.19' report_ipynb = '/src/scenic/bin/reports/scenic_report.ipynb' @@ -105,7 +105,7 @@ If you want to change those thresholds applied on the markers genes, edit the `` .. code:: groovy params { - sc { + tools { scope { markers { log_fc_threshold = 0.5 @@ -159,7 +159,7 @@ The ``utils_cell_annotate`` profile is adding the following part to the config: .. code:: groovy params { - sc { + tools { cell_annotate { off = 'h5ad' method = '' @@ -215,7 +215,7 @@ The profile ``utils_sample_annotate`` should be added when generating the main c .. code:: groovy params { - sc { + tools { sample_annotate { iff = '10x_cellranger_mex' off = 'h5ad' @@ -255,7 +255,7 @@ The ``utils_cell_filter`` profile is required when generating the config file. T .. code:: groovy params { - sc { + tools { cell_filter { off = 'h5ad' method = '' @@ -331,7 +331,7 @@ You'll just have to repeat the following structure for the parameters which you .. code:: groovy params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' filter { @@ -362,7 +362,7 @@ If you want to apply custom parameters for some specific samples and have a "gen .. code:: groovy params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' filter { @@ -427,7 +427,7 @@ By default, don't regress any variable out. To enable this features, the ``scanp .. code:: groovy params { - sc { + tools { scanpy { regress_out { variablesToRegressOut = [] diff --git a/docs/pipelines.rst b/docs/pipelines.rst index d176db48..dbb1e909 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -77,7 +77,7 @@ Cuurently there are 3 methods available to call doublets from Scrublet doublet s .. code:: bash params { - sc { + tools { scublet { threshold = [ "": @@ -512,7 +512,7 @@ Make sure the following parts of the generated config are properly set: cellranger_mex = '~/out/counts/*/outs/' } } - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' } @@ -567,7 +567,7 @@ Make sure the following parts of the generated config are properly set: cellranger_mex = '~/out/counts/*/outs/' } } - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' } diff --git a/main.nf b/main.nf index b3edfb88..8baaf058 100644 --- a/main.nf +++ b/main.nf @@ -79,14 +79,6 @@ workflow mnncorrect { } -def getHarmonyBatchVariables = { params -> - batchVariables = params.tools.harmony.varsUse - if(batchVariables.size() > 1) { - throw new Exception("Currently it is not supported to run with multiple batch variables.") - } - return batchVariables -} - // run multi-sample with bbknn, output a scope loom file workflow harmony { @@ -98,7 +90,7 @@ workflow harmony { PUBLISH as PUBLISH_SCANPY; } from "./src/utils/workflows/utils" params(params) - batchVariables = getHarmonyBatchVariables(params) + batchVariables = params.tools.harmony.varsUse outputSuffix = params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables.join("_").toUpperCase() : "HARMONY" getDataChannel | HARMONY @@ -132,15 +124,15 @@ workflow harmony_only { } from "./src/utils/workflows/utils" params(params) - batchVariables = getHarmonyBatchVariables(params) - outputSuffix = params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables[0].toUpperCase() : "HARMONY" + batchVariables = params.tools.harmony.varsUse + outputSuffix = params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables.join("_").toUpperCase() : "HARMONY" getDataChannel | HARMONY if(params.utils?.publish) { PUBLISH_HARMONY( HARMONY.out.scanpyh5ad, - params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables[0].toUpperCase() : "HARMONY", + params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables.join("_").toUpperCase() : "HARMONY", "h5ad", null, false @@ -206,8 +198,8 @@ workflow harmony_scenic { PUBLISH as PUBLISH_HARMONY_SCENIC; } from "./src/utils/workflows/utils" params(params) - batchVariables = getHarmonyBatchVariables(params) - outputSuffix = params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables[0].toUpperCase() : "HARMONY" + batchVariables = params.tools.harmony.varsUse + outputSuffix = params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables.join("_").toUpperCase() : "HARMONY" getDataChannel | HARMONY @@ -509,7 +501,7 @@ workflow single_sample_decontx { if(params.utils?.publish) { PUBLISH( SC__H5AD_TO_LOOM.out, - "SINGLE_SAMPLE_CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase(), + "SINGLE_SAMPLE_CELDA_DECONTX_"+ params.tools.celda.decontx.strategy.toUpperCase(), "loom", null, false @@ -585,7 +577,7 @@ workflow single_sample_decontx_scrublet { // - potential doublets removed by Scrublet PUBLISH_CELDA_DECONTX_SCRUBLET( SCRUBLET__DOUBLET_REMOVAL.out.data_doublets_removed, - "CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase() +"_SCRUBLET", + "CELDA_DECONTX_"+ params.tools.celda.decontx.strategy.toUpperCase() +"_SCRUBLET", "h5ad", null, false @@ -763,7 +755,7 @@ workflow scenic { } from "./src/utils/workflows/utils" params(params) SCENIC( - Channel.of( tuple(params.global.project_name, file(params.getToolParams("scenic").filteredLoom))) + Channel.of( tuple(params.global.project_name, file(params.tools.scenic.filteredLoom))) ) if(params.utils?.publish) { @@ -787,9 +779,9 @@ workflow cellranger { } from './src/cellranger/main' params(params) CELLRANGER( - file(params.getToolParams("cellranger").mkfastq.csv), - file(params.getToolParams("cellranger").mkfastq.runFolder), - file(params.getToolParams("cellranger").count.transcriptome) + file(params.tools.cellranger.mkfastq.csv), + file(params.tools.cellranger.mkfastq.runFolder), + file(params.tools.cellranger.count.transcriptome) ) emit: @@ -803,10 +795,10 @@ workflow cellranger_libraries { } from './src/cellranger/workflows/cellranger_libraries' params(params) CELLRANGER_LIBRARIES( - file(params.getToolParams("cellranger").mkfastq.csv), - file(params.getToolParams("cellranger").mkfastq.runFolder), - file(params.getToolParams("cellranger").count.transcriptome), - file(params.getToolParams("cellranger").count.featureRef) + file(params.tools.cellranger.mkfastq.csv), + file(params.tools.cellranger.mkfastq.runFolder), + file(params.tools.cellranger.count.transcriptome), + file(params.tools.cellranger.count.featureRef) ) emit: @@ -821,8 +813,8 @@ workflow cellranger_count_metadata { } from './src/cellranger/workflows/cellRangerCountWithMetadata' params(params) CELLRANGER_COUNT_WITH_METADATA( - file(params.getToolParams("cellranger").count.transcriptome), - file(params.getToolParams("cellranger").count.metadata) + file(params.tools.cellranger.count.transcriptome), + file(params.tools.cellranger.count.metadata) ) emit: CELLRANGER_COUNT_WITH_METADATA.out @@ -846,9 +838,9 @@ workflow cellranger_count_libraries { } from './src/cellranger/workflows/cellRangerCountWithLibraries' params(params) CELLRANGER_COUNT_WITH_LIBRARIES( - file(params.getToolParams("cellranger").count.transcriptome), - file(params.getToolParams("cellranger").count.featureRef), - params.getToolParams("cellranger").count.libraries + file(params.tools.cellranger.count.transcriptome), + file(params.tools.cellranger.count.featureRef), + params.tools.cellranger.count.libraries ) emit: @@ -864,9 +856,9 @@ workflow cellranger_count_demuxlet { include { SC__CELLRANGER__COUNT as CELLRANGER_COUNT; } from './src/cellranger/processes/count' - if (params.getToolParams("cellranger").count.fastqs instanceof Map) { + if (params.tools.cellranger.count.fastqs instanceof Map) { // Remove default key - Channel.from(params.getToolParams("cellranger").count.fastqs.findAll { + Channel.from(params.tools.cellranger.count.fastqs.findAll { it.key != 'default' }.collect { k, v -> // Split possible multiple file paths @@ -885,7 +877,7 @@ workflow cellranger_count_demuxlet { .set { fastq_data } } data = CELLRANGER_COUNT( - params.getToolParams("cellranger").count.transcriptome, + params.tools.cellranger.count.transcriptome, fastq_data ) get_bam_barcodes_from_cellranger_rna(data) | @@ -1095,7 +1087,7 @@ workflow sra_cellranger_bbknn { out = sra() SC__CELLRANGER__PREPARE_FOLDER( out.groupTuple() ) SC__CELLRANGER__COUNT( - file(params.getToolParams("cellranger").count.transcriptome), + file(params.tools.cellranger.count.transcriptome), SC__CELLRANGER__PREPARE_FOLDER.out ) BBKNN( @@ -1195,7 +1187,7 @@ workflow _cell_annotate_filter { getDataChannel | \ SC__FILE_CONVERTER - if(!params.hasUtilsParams("cell_annotate")) + if(!params.utils?.cell_annotate) throw new Exception("VSN ERROR: The cell_annotate param is missing in params.utils.") // Annotate & publish @@ -1203,7 +1195,7 @@ workflow _cell_annotate_filter { SC__FILE_CONVERTER.out, null, ) - if(params.getUtilsParams("cell_annotate").containsKey("publish") && params.getUtilsParams("cell_annotate").publish) { + if(params.utils.cell_annotate.containsKey("publish") && params.utils.cell_annotate.publish) { PUBLISH_H5AD_CELL_ANNOTATED( ANNOTATE_BY_CELL_METADATA.out, "ANNOTATE_BY_CELL_METADATA", @@ -1213,7 +1205,7 @@ workflow _cell_annotate_filter { ) } - if(!params.hasUtilsParams("cell_filter")) + if(!params.utils?.cell_filter) throw new Exception("VSN ERROR: The cell_filter param is missing in params.utils.") // Filter (& clean) & publish @@ -1222,7 +1214,7 @@ workflow _cell_annotate_filter { null ) - if(params.getUtilsParams("cell_filter")?.publish) { + if(params.utils.cell_filter?.publish) { PUBLISH_H5AD_CELL_FILTERED( FILTER_BY_CELL_METADATA.out, "FILTER_BY_CELL_METADATA", @@ -1231,7 +1223,7 @@ workflow _cell_annotate_filter { false ) } - if(params.hasUtilsParams("publish") && publish) { + if(params.utils?.publish && publish) { PUBLISH_H5AD_CELL_FILTERED( FILTER_BY_CELL_METADATA.out, "CELL_ANNOTATE_FILTER", @@ -1265,15 +1257,15 @@ workflow cell_annotate_filter_and_sample_annotate { out = _cell_annotate_filter(false) // Annotate cells based on an indexed sample-based metadata table - if(!params.hasUtilsParams("sample_annotate")) + if(!params.utils?.sample_annotate) throw new Exception("VSN ERROR: The sample_annotate param is missing in params.utils.") - if (!hasMetadataFilePath(params.getUtilsParams("sample_annotate"))) { + if (!hasMetadataFilePath(params.utils.sample_annotate)) { throw new Exception("VSN ERROR: The metadataFilePath param is missing in sample_annotate.") } out = SC__ANNOTATE_BY_SAMPLE_METADATA( out ) - if(params.getUtilsParams("file_cleaner")) { + if(params.utils.file_cleaner) { out = SC__H5AD_BEAUTIFY( out ) } diff --git a/main_atac.nf b/main_atac.nf index fb39692c..6a9fead7 100644 --- a/main_atac.nf +++ b/main_atac.nf @@ -29,9 +29,9 @@ workflow cellranger_atac { } from './src/cellranger-atac/main.nf' params(params) CELLRANGER_ATAC( - file(params.getToolParams("cellranger_atac").mkfastq.csv), - file(params.getToolParams("cellranger_atac").mkfastq.runFolder), - file(params.getToolParams("cellranger_atac").count.reference) + file(params.tools.cellranger_atac.mkfastq.csv), + file(params.tools.cellranger_atac.mkfastq.runFolder), + file(params.tools.cellranger_atac.count.reference) ) } diff --git a/src/archr/archr.config b/src/archr/archr.config index 33b32d5b..9980d80d 100644 --- a/src/archr/archr.config +++ b/src/archr/archr.config @@ -1,5 +1,5 @@ params { - sc { + tools { archr { container = 'vibsinglecellnf/archr:2020-07-13-ddcaae4' genome = 'hg38' diff --git a/src/archr/processes/cell_calling.nf b/src/archr/processes/cell_calling.nf index 01f568ca..cdff4674 100644 --- a/src/archr/processes/cell_calling.nf +++ b/src/archr/processes/cell_calling.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/archr/bin/" : "" -toolParams = params.getToolParams("archr") +toolParams = params.tools.archr process SC__ARCHR__CELL_CALLING { diff --git a/src/archr/processes/createArrow_unfiltered.nf b/src/archr/processes/createArrow_unfiltered.nf index ec57f727..c82cac6e 100644 --- a/src/archr/processes/createArrow_unfiltered.nf +++ b/src/archr/processes/createArrow_unfiltered.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/archr/bin/" : "" -toolParams = params.getToolParams("archr") +toolParams = params.tools.archr process SC__ARCHR__CREATE_ARROW_UNFILTERED { diff --git a/src/bap/bap.config b/src/bap/bap.config index 1b6f80a1..fc763f72 100644 --- a/src/bap/bap.config +++ b/src/bap/bap.config @@ -1,5 +1,5 @@ params { - sc { + tools { bap { container = 'vibsinglecellnf/bap:2021-04-27-3b48f4b' } diff --git a/src/bap/processes/barcode_multiplet.nf b/src/bap/processes/barcode_multiplet.nf index 08235df4..bf8e4310 100644 --- a/src/bap/processes/barcode_multiplet.nf +++ b/src/bap/processes/barcode_multiplet.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("bap") +toolParams = params.tools.bap process SC__BAP__BARCODE_MULTIPLET_PIPELINE { diff --git a/src/bap/processes/biorad_debarcode.nf b/src/bap/processes/biorad_debarcode.nf index 79ee5873..8fc438b2 100644 --- a/src/bap/processes/biorad_debarcode.nf +++ b/src/bap/processes/biorad_debarcode.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("bap") +toolParams = params.tools.bap process SC__BAP__BIORAD_DEBARCODE { diff --git a/src/bwamaptools/bwamaptools.config b/src/bwamaptools/bwamaptools.config index 194221da..d3ce94a4 100644 --- a/src/bwamaptools/bwamaptools.config +++ b/src/bwamaptools/bwamaptools.config @@ -1,6 +1,6 @@ params { - sc { - bwamapsc { + tools { + bwamap { container = 'vibsinglecellnf/bwamaptools:2020-07-02-13b5637' } } diff --git a/src/bwamaptools/main.nf b/src/bwamaptools/main.nf index 0fa09639..5dad0edb 100644 --- a/src/bwamaptools/main.nf +++ b/src/bwamaptools/main.nf @@ -57,7 +57,7 @@ workflow BWA_MAPPING_PE { 1) create a channel linking bwa index files from genome.fa in params, and 2) combine this channel with the items in the data channel */ - bwa_inputs = get_bwa_index(params.getToolParams("bwamaptools").bwa_fasta).combine(data) + bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) bam = SC__BWAMAPTOOLS__BWA_MEM_PE(bwa_inputs) diff --git a/src/bwamaptools/processes/add_barcode_as_tag.nf b/src/bwamaptools/processes/add_barcode_as_tag.nf index 50cf5d1d..7af22614 100644 --- a/src/bwamaptools/processes/add_barcode_as_tag.nf +++ b/src/bwamaptools/processes/add_barcode_as_tag.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("bwamaptools") +toolParams = params.tools.bwamaptools process SC__BWAMAPTOOLS__ADD_BARCODE_TAG { diff --git a/src/bwamaptools/processes/index.nf b/src/bwamaptools/processes/index.nf index 4e8560e1..9ae964e9 100644 --- a/src/bwamaptools/processes/index.nf +++ b/src/bwamaptools/processes/index.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("bwamaptools") +toolParams = params.tools.bwamaptools process SC__BWAMAPTOOLS__INDEX_BAM { diff --git a/src/bwamaptools/processes/mapping.nf b/src/bwamaptools/processes/mapping.nf index 2ebe69d9..c6372ada 100644 --- a/src/bwamaptools/processes/mapping.nf +++ b/src/bwamaptools/processes/mapping.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("bwamaptools") +toolParams = params.tools.bwamaptools process SC__BWAMAPTOOLS__BWA_MEM_PE { diff --git a/src/bwamaptools/processes/mapping_summary.nf b/src/bwamaptools/processes/mapping_summary.nf index 5a0e7031..3517efad 100644 --- a/src/bwamaptools/processes/mapping_summary.nf +++ b/src/bwamaptools/processes/mapping_summary.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/bwamaptools/bin/" : "" -toolParams = params.getToolParams("bwamaptools") +toolParams = params.tools.bwamaptools process SC__BWAMAPTOOLS__MAPPING_SUMMARY { diff --git a/src/celda/conf/base.config b/src/celda/conf/base.config index b88f86d7..c7dae8ef 100644 --- a/src/celda/conf/base.config +++ b/src/celda/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { celda { container = 'vibsinglecellnf/celda:1.4.5' decontx { diff --git a/src/celda/conf/decontx_correct.config b/src/celda/conf/decontx_correct.config index ded00be5..e4614494 100644 --- a/src/celda/conf/decontx_correct.config +++ b/src/celda/conf/decontx_correct.config @@ -1,5 +1,5 @@ params { - sc { + tools { celda { decontx { strategy = "correct" diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 68df40e8..55e07ecd 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,13 +1,13 @@ includeConfig '../../utils/conf/cell_annotate.config' -params.sc.celda.decontx.cell_annotate = params.sc.cell_annotate -params.sc.remove('cell_annotate') +params.tools.celda.decontx.cell_annotate = params.tools.cell_annotate +params.tools.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' -params.sc.celda.decontx.cell_filter = params.sc.cell_filter -params.sc.remove('cell_filter') +params.tools.celda.decontx.cell_filter = params.tools.cell_filter +params.tools.remove('cell_filter') includeConfig './decontx_filter_defaults.config' params { - sc { + tools { celda { decontx { strategy = "filter" // choices: 'filter' (default), 'correct' diff --git a/src/celda/conf/decontx_filter_defaults.config b/src/celda/conf/decontx_filter_defaults.config index 93c0feb7..13800fb9 100644 --- a/src/celda/conf/decontx_filter_defaults.config +++ b/src/celda/conf/decontx_filter_defaults.config @@ -1,5 +1,5 @@ params { - sc { + tools { celda { decontx { strategy = "filter" diff --git a/src/celda/main.nf b/src/celda/main.nf index 2caff6db..9831707d 100644 --- a/src/celda/main.nf +++ b/src/celda/main.nf @@ -30,20 +30,20 @@ workflow decontx { data = getDataChannel \ | SC__FILE_CONVERTER - if(params.getToolParams("celda").decontx.strategy == "filter") { + if(params.tools.celda.decontx.strategy == "filter") { out = DECONTX_FILTER ( data ) processed = out.decontx_filtered - } else if (params.getToolParams("celda").decontx.strategy == "correct") { + } else if (params.tools.celda.decontx.strategy == "correct") { out = DECONTX_CORRECT ( data ) processed = out.decontx_corrected } else { - throw new Exception("VSN ERROR: The given strategy in params..celda.decontx is not valid. Choose: filter or correct.") + throw new Exception("VSN ERROR: The given strategy in params.tools.celda.decontx is not valid. Choose: filter or correct.") } - if(params.hasUtilsParams("publish")) { + if(params.utils?.publish) { PUBLISH( processed, - "CELDA_DECONTX_"+ params.getToolParams("celda").decontx.strategy.toUpperCase(), + "CELDA_DECONTX_"+ params.tools.celda.decontx.strategy.toUpperCase(), "h5ad", null, false diff --git a/src/celda/processes/runDecontX.nf b/src/celda/processes/runDecontX.nf index 77f4e81e..bae6685d 100644 --- a/src/celda/processes/runDecontX.nf +++ b/src/celda/processes/runDecontX.nf @@ -8,7 +8,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/${moduleName} process SC__CELDA__DECONTX { - container params.getToolParams("celda").container + container params.tools.celda.container publishDir "${params.global.outdir}/data/${moduleName}", mode: 'link' label 'compute_resources__default' @@ -32,7 +32,7 @@ process SC__CELDA__DECONTX { emit: other script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("celda").decontx) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.celda.decontx) processParams = sampleParams.local def filterNumMadsThresholdsAsArguments = '' diff --git a/src/cellranger-atac/conf/base.config b/src/cellranger-atac/conf/base.config index dba51cdf..46eca501 100644 --- a/src/cellranger-atac/conf/base.config +++ b/src/cellranger-atac/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger_atac { container = 'vibsinglecellnf/cellranger-atac:1.2.0' } diff --git a/src/cellranger-atac/conf/count.config b/src/cellranger-atac/conf/count.config index ef2ac877..09e97d5a 100644 --- a/src/cellranger-atac/conf/count.config +++ b/src/cellranger-atac/conf/count.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger_atac { count { reference = '/ddn1/vol1/staging/leuven/stg_00002/resources/cellranger-atac/refdata-cellranger-atac-GRCh38-1.0.0' diff --git a/src/cellranger-atac/conf/count_metadata.config b/src/cellranger-atac/conf/count_metadata.config index 378e18fb..017d090b 100644 --- a/src/cellranger-atac/conf/count_metadata.config +++ b/src/cellranger-atac/conf/count_metadata.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { metadata = '' diff --git a/src/cellranger-atac/conf/mkfastq.config b/src/cellranger-atac/conf/mkfastq.config index 131caee4..7d144c68 100644 --- a/src/cellranger-atac/conf/mkfastq.config +++ b/src/cellranger-atac/conf/mkfastq.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger_atac { mkfastq { // https://support.10xgenomics.com/single-cell-atac/software/pipelines/latest/using/mkfastq diff --git a/src/cellranger-atac/processes/count.nf b/src/cellranger-atac/processes/count.nf index 2fb1f173..0388b540 100644 --- a/src/cellranger-atac/processes/count.nf +++ b/src/cellranger-atac/processes/count.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.getToolParams("cellranger_atac") +toolParams = params.tools.cellranger_atac def runCellRangerAtacCount = { id, diff --git a/src/cellranger-atac/processes/mkfastq.nf b/src/cellranger-atac/processes/mkfastq.nf index d72dd6cd..8dc49330 100644 --- a/src/cellranger-atac/processes/mkfastq.nf +++ b/src/cellranger-atac/processes/mkfastq.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.getToolParams("cellranger_atac") +toolParams = params.tools.cellranger_atac process SC__CELLRANGER_ATAC__MKFASTQ { diff --git a/src/cellranger/conf/base.config b/src/cellranger/conf/base.config index 11e5b757..1c3382f3 100644 --- a/src/cellranger/conf/base.config +++ b/src/cellranger/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { container = '/path/to/cellranger/cellranger' } diff --git a/src/cellranger/conf/cellranger_libraries.config b/src/cellranger/conf/cellranger_libraries.config index eceddc6c..077f4825 100644 --- a/src/cellranger/conf/cellranger_libraries.config +++ b/src/cellranger/conf/cellranger_libraries.config @@ -1,7 +1,7 @@ includeConfig("mkfastq.config") params { - sc { + tools { cellranger { librariesMap = [ "sample1": [ diff --git a/src/cellranger/conf/count.config b/src/cellranger/conf/count.config index 8efa03f6..122f1eca 100644 --- a/src/cellranger/conf/count.config +++ b/src/cellranger/conf/count.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { transcriptome = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/indexes/CellRanger/3.0.2/hg38_iGenomes' diff --git a/src/cellranger/conf/count_libraries.config b/src/cellranger/conf/count_libraries.config index 360ee80d..987871aa 100644 --- a/src/cellranger/conf/count_libraries.config +++ b/src/cellranger/conf/count_libraries.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { featureRef = '' diff --git a/src/cellranger/conf/count_metadata.config b/src/cellranger/conf/count_metadata.config index b137cc12..a55c1ba9 100644 --- a/src/cellranger/conf/count_metadata.config +++ b/src/cellranger/conf/count_metadata.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { count { metadata = '' diff --git a/src/cellranger/conf/mkfastq.config b/src/cellranger/conf/mkfastq.config index e4bd4786..b83a8a58 100644 --- a/src/cellranger/conf/mkfastq.config +++ b/src/cellranger/conf/mkfastq.config @@ -1,5 +1,5 @@ params { - sc { + tools { cellranger { mkfastq { // https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq diff --git a/src/cellranger/main.nf b/src/cellranger/main.nf index f824d421..5a503455 100644 --- a/src/cellranger/main.nf +++ b/src/cellranger/main.nf @@ -31,9 +31,9 @@ workflow CELLRANGER { data = MKFASTQ(mkfastq_csv, runFolder) // Allow to combine old demultiplexed data with new data - if (params.getToolParams("cellranger").count.fastqs instanceof Map) { + if (params.tools.cellranger.count.fastqs instanceof Map) { // Remove default key - Channel.from(params.getToolParams("cellranger").count.fastqs.findAll { + Channel.from(params.tools.cellranger.count.fastqs.findAll { it.key != 'default' }.collect { k, v -> // Split possible multiple file paths diff --git a/src/cellranger/processes/count.nf b/src/cellranger/processes/count.nf index aa8188f2..fa93b9f8 100644 --- a/src/cellranger/processes/count.nf +++ b/src/cellranger/processes/count.nf @@ -4,7 +4,7 @@ include { isParamNull; } from './../../utils/processes/utils.nf' params(params) -toolParams = params.getToolParams("cellranger") +toolParams = params.tools.cellranger def generateCellRangerCountCommandDefaults = { @@ -126,7 +126,7 @@ process SC__CELLRANGER__COUNT { def sampleParams = params.parseConfig(sampleId, params.global, toolParams.count) processParams = sampleParams.local if(processParams.sample == '') { - throw new Exception("Regards params.getToolParams("cellranger").count: sample parameter cannot be empty") + throw new Exception("Regards params.tools.cellranger.count: sample parameter cannot be empty") } // Check if the current sample has multiple sequencing runs fastqs = fastqs instanceof List ? fastqs.join(',') : fastqs @@ -165,7 +165,7 @@ process SC__CELLRANGER__COUNT_WITH_LIBRARIES { processParams = sampleParams.local if(processParams.sample == '') { - throw new Exception("Regards params.getToolParams("cellranger").count: sample parameter cannot be empty") + throw new Exception("Regards params.tools.cellranger.count: sample parameter cannot be empty") } // We need to create the libraries.csv file here because it needs absolute paths diff --git a/src/cellranger/processes/mkfastq.nf b/src/cellranger/processes/mkfastq.nf index 192fd087..cd9065f4 100644 --- a/src/cellranger/processes/mkfastq.nf +++ b/src/cellranger/processes/mkfastq.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.getToolParams("cellranger") +toolParams = params.tools.cellranger process SC__CELLRANGER__MKFASTQ { diff --git a/src/cellranger/workflows/cellranger_libraries.nf b/src/cellranger/workflows/cellranger_libraries.nf index dc949bb1..f68cfdc1 100644 --- a/src/cellranger/workflows/cellranger_libraries.nf +++ b/src/cellranger/workflows/cellranger_libraries.nf @@ -28,12 +28,12 @@ workflow CELLRANGER_LIBRARIES { main: // Sanity Checking - libMap = params.getToolParams("cellranger").librariesMap + libMap = params.tools.cellranger.librariesMap if (! (libMap instanceof Map)) { throw new Exception("When running the full cellranger pipeline with libraries, you must specify the librariesMap (see docs).") } - librariesFiles = params.getToolParams("cellranger").count.libraries + librariesFiles = params.tools.cellranger.count.libraries if (!(librariesFiles instanceof Map) && librariesFiles) { poolName = params.global.containsKey('project_name') ? params.global.project_name : '' diff --git a/src/channels/channels.nf b/src/channels/channels.nf index 70debeda..fdadad67 100644 --- a/src/channels/channels.nf +++ b/src/channels/channels.nf @@ -36,8 +36,8 @@ workflow getDataChannel { } else { // If not dynamically set, we use h5ad by default outputFileFormat = "h5ad" - if(params.getUtilsParams("file_converter").containsKey("off")) { - outputFileFormat = params.getUtilsParams("file_converter").off + if(params.utils.file_converter.containsKey("off")) { + outputFileFormat = params.utils.file_converter.off } } diff --git a/src/channels/conf/bam.config b/src/channels/conf/bam.config index 56d7bcd1..cfc941ad 100644 --- a/src/channels/conf/bam.config +++ b/src/channels/conf/bam.config @@ -6,7 +6,7 @@ params { index_extension = '.bai' } } - sc { + tools { file_converter { iff = 'bam' } diff --git a/src/channels/conf/csv.config b/src/channels/conf/csv.config index 5ebc4b09..75191e1b 100644 --- a/src/channels/conf/csv.config +++ b/src/channels/conf/csv.config @@ -5,7 +5,7 @@ params { suffix = '.csv' } } - sc { + tools { file_converter { iff = 'csv' } diff --git a/src/channels/conf/fragments.config b/src/channels/conf/fragments.config index a22cc312..58db140a 100644 --- a/src/channels/conf/fragments.config +++ b/src/channels/conf/fragments.config @@ -6,7 +6,7 @@ params { index_extension = '.tbi' } } - sc { + tools { file_converter { iff = 'fragments' } diff --git a/src/channels/conf/h5ad.config b/src/channels/conf/h5ad.config index 76ea9804..e2da502d 100644 --- a/src/channels/conf/h5ad.config +++ b/src/channels/conf/h5ad.config @@ -5,7 +5,7 @@ params { suffix = '.h5ad' } } - sc { + tools { file_converter { iff = 'h5ad' } diff --git a/src/channels/conf/loom.config b/src/channels/conf/loom.config index 55040e03..11087464 100644 --- a/src/channels/conf/loom.config +++ b/src/channels/conf/loom.config @@ -5,7 +5,7 @@ params { suffix = '.loom' } } - sc { + tools { file_converter { iff = 'loom' } diff --git a/src/channels/conf/seurat_rds.config b/src/channels/conf/seurat_rds.config index 8c73a42e..d5a571cb 100644 --- a/src/channels/conf/seurat_rds.config +++ b/src/channels/conf/seurat_rds.config @@ -5,7 +5,7 @@ params { suffix = '.Rds' } } - sc { + tools { file_converter { iff = 'seurat_rds' } diff --git a/src/channels/conf/tenx_atac_cellranger_mex.config b/src/channels/conf/tenx_atac_cellranger_mex.config index e44303d4..d544f29c 100644 --- a/src/channels/conf/tenx_atac_cellranger_mex.config +++ b/src/channels/conf/tenx_atac_cellranger_mex.config @@ -5,7 +5,7 @@ params { } } - sc { + tools { file_converter { off = 'cistopic_rds' } diff --git a/src/channels/conf/tsv.config b/src/channels/conf/tsv.config index e9ce9cb9..8db27979 100644 --- a/src/channels/conf/tsv.config +++ b/src/channels/conf/tsv.config @@ -5,7 +5,7 @@ params { suffix = '.tsv' } } - sc { + tools { file_converter { iff = 'tsv' } diff --git a/src/directs/conf/base.config b/src/directs/conf/base.config index facaacce..364dfbba 100644 --- a/src/directs/conf/base.config +++ b/src/directs/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { directs { container = 'vibsinglecellnf/directs:0.1.0' select_default_clustering { diff --git a/src/directs/conf/test__select_default_clustering.config b/src/directs/conf/test__select_default_clustering.config index 2bdaa179..3ddd6a73 100644 --- a/src/directs/conf/test__select_default_clustering.config +++ b/src/directs/conf/test__select_default_clustering.config @@ -2,7 +2,7 @@ includeConfig '../../../conf/global.config' includeConfig '../../../conf/singularity.config' params { - sc { + tools { directs { inputLoom = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/fca/analysis/in-house/20200520_000__all__4b9e9810-8600-11ea-867e-a0000220fe80/out/data/20200520_FCA_BioHub_B1_B2_All.HARMONY_SCENIC.loom' } diff --git a/src/directs/main.test.nf b/src/directs/main.test.nf index 35e9334a..55c88941 100644 --- a/src/directs/main.test.nf +++ b/src/directs/main.test.nf @@ -16,7 +16,7 @@ workflow { main: switch(params.test) { case "SC__DIRECTS__SELECT_DEFAULT_CLUSTERING": - test = Channel.of(tuple('TEST', params.getToolParams("directs").inputLoom, null)) + test = Channel.of(tuple('TEST', params.tools.directs.inputLoom, null)) SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( test ) break; default: diff --git a/src/directs/processes/selectDefaultClustering.nf b/src/directs/processes/selectDefaultClustering.nf index a74e8460..cfffa063 100644 --- a/src/directs/processes/selectDefaultClustering.nf +++ b/src/directs/processes/selectDefaultClustering.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/directs/bin/" process SC__DIRECTS__SELECT_DEFAULT_CLUSTERING { - container params.getToolParams("directs").container + container params.tools.directs.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -21,7 +21,7 @@ process SC__DIRECTS__SELECT_DEFAULT_CLUSTERING { val(stashedParams) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("directs").select_default_clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.directs.select_default_clustering) processParams = sampleParams.local """ ${binDir}select_default_clustering.py \ diff --git a/src/dropletutils/dropletutils.config b/src/dropletutils/dropletutils.config index 00bc0d1e..5a90a97c 100644 --- a/src/dropletutils/dropletutils.config +++ b/src/dropletutils/dropletutils.config @@ -1,5 +1,5 @@ params { - sc { + tools { dropletutils { container = 'vibsinglecellnf/dropletutils:1.4.3' } diff --git a/src/dropletutils/processes/barcode_selection.nf b/src/dropletutils/processes/barcode_selection.nf index add4090d..708c449b 100644 --- a/src/dropletutils/processes/barcode_selection.nf +++ b/src/dropletutils/processes/barcode_selection.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROPLET_UTILS__BARCODE_SELECTION { - container params.getToolParams("dropletutils").container + container params.tools.dropletutils.container publishDir "03.count", mode: 'symlink' label 'compute_resources__default' diff --git a/src/dropseqtools/dropseqtools.config b/src/dropseqtools/dropseqtools.config index 8af341f1..6ede9afa 100644 --- a/src/dropseqtools/dropseqtools.config +++ b/src/dropseqtools/dropseqtools.config @@ -1,6 +1,6 @@ params { - sc { - dropseqsc { + tools { + dropseqtools { container = 'humancellatlas/dropseqtools:1.12' tag_unaligned_bam_with_cellbarcode { diff --git a/src/dropseqtools/processes/bam_tag_histogram.nf b/src/dropseqtools/processes/bam_tag_histogram.nf index 0a0c6a7d..7f914580 100644 --- a/src/dropseqtools/processes/bam_tag_histogram.nf +++ b/src/dropseqtools/processes/bam_tag_histogram.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__BAM_TAG_HISTOGRAM { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/03.count", mode: 'symlink' label 'compute_resources__default' @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__BAM_TAG_HISTOGRAM { tuple val(sample), path("*.cell_readcounts.txt.gz") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").bam_tag_histogram) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.bam_tag_histogram) processParams = sampleParams.local """ BAMTagHistogram \ diff --git a/src/dropseqtools/processes/convert_to_ref_flat.nf b/src/dropseqtools/processes/convert_to_ref_flat.nf index 24a61263..45baa939 100644 --- a/src/dropseqtools/processes/convert_to_ref_flat.nf +++ b/src/dropseqtools/processes/convert_to_ref_flat.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__CONVERT_TO_REFFLAT { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' label 'compute_resources__default' diff --git a/src/dropseqtools/processes/detect_bead_synthesis_errors.nf b/src/dropseqtools/processes/detect_bead_synthesis_errors.nf index f38f040e..5952d44b 100644 --- a/src/dropseqtools/processes/detect_bead_synthesis_errors.nf +++ b/src/dropseqtools/processes/detect_bead_synthesis_errors.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__DETECT_REPAIR_BARCODE_SYNTHESIS_ERRORS { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -15,7 +15,7 @@ process SC__DROP_SEQ_TOOLS__DETECT_REPAIR_BARCODE_SYNTHESIS_ERRORS { // tuple file("*.synthesis_stats.summary.txt"), emit: statsSummary script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").detect_repair_barcode_synthesis_errors) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.detect_repair_barcode_synthesis_errors) processParams = sampleParams.local """ DetectBeadSynthesisErrors \ diff --git a/src/dropseqtools/processes/digital_expression.nf b/src/dropseqtools/processes/digital_expression.nf index f1d865f3..1bcd5325 100644 --- a/src/dropseqtools/processes/digital_expression.nf +++ b/src/dropseqtools/processes/digital_expression.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__DIGITAL_EXPRESSION { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "03.count", mode: 'symlink' label 'compute_resources__default' diff --git a/src/dropseqtools/processes/filter_bam.nf b/src/dropseqtools/processes/filter_bam.nf index 100057b0..7570cf5a 100644 --- a/src/dropseqtools/processes/filter_bam.nf +++ b/src/dropseqtools/processes/filter_bam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__FILTER_UNALIGNED_TAGGED_BAM { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__FILTER_UNALIGNED_TAGGED_BAM { tuple val(sample), path('*.unaligned_tagged_filtered.bam'), emit: bam script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").filter_unaligned_tagged_bam) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.filter_unaligned_tagged_bam) processParams = sampleParams.local """ FilterBAM \ diff --git a/src/dropseqtools/processes/gzip.nf b/src/dropseqtools/processes/gzip.nf index 62790e6d..1a70ccca 100644 --- a/src/dropseqtools/processes/gzip.nf +++ b/src/dropseqtools/processes/gzip.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process GZIP { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/dropseqtools/processes/polya_trimmer.nf b/src/dropseqtools/processes/polya_trimmer.nf index 1529cb03..e4c6c3c7 100644 --- a/src/dropseqtools/processes/polya_trimmer.nf +++ b/src/dropseqtools/processes/polya_trimmer.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__TRIM_POLYA_UNALIGNED_TAGGED_TRIMMED_SMART { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process SC__DROP_SEQ_TOOLS__TRIM_POLYA_UNALIGNED_TAGGED_TRIMMED_SMART { tuple file('*.polyA_trimming_report.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").trim_polya_unaligned_tagged_trimmed_smart) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.trim_polya_unaligned_tagged_trimmed_smart) processParams = sampleParams.local """ PolyATrimmer \ diff --git a/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf b/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf index edd7cd6a..2b2fd19c 100644 --- a/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf +++ b/src/dropseqtools/processes/tag_bam_with_read_sequence_extended.nf @@ -1,7 +1,7 @@ process SC__DROP_SEQ_TOOLS__TAG_UNALIGNED_BAM_WITH_CELLBARCODE { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__TAG_UNALIGNED_BAM_WITH_CELLBARCODE { tuple file('*.unaligned_tagged_Cellular.bam_summary.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").tag_unaligned_bam_with_cellbarcode) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.tag_unaligned_bam_with_cellbarcode) processParams = sampleParams.local """ TagBamWithReadSequenceExtended \ @@ -43,7 +43,7 @@ process SC__DROP_SEQ_TOOLS__TAG_UNALIGNED_BAM_WITH_CELLMOLECULAR { tuple file('*.unaligned_tagged_Molecular.bam_summary.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").tag_unaligned_bam_with_cellmolecular) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.tag_unaligned_bam_with_cellmolecular) processParams = sampleParams.local """ source $DWMAX/documents/aertslab/scripts/src_dwmax/bash-utils/utils.sh diff --git a/src/dropseqtools/processes/tag_read_with_gene_exon.nf b/src/dropseqtools/processes/tag_read_with_gene_exon.nf index 02ebd6db..e44ca6cb 100644 --- a/src/dropseqtools/processes/tag_read_with_gene_exon.nf +++ b/src/dropseqtools/processes/tag_read_with_gene_exon.nf @@ -13,7 +13,7 @@ process SC__DROP_SEQ_TOOLS__TAG_READ_WITH_GENE_EXON { tuple val(sample), path("*.merged_gene-exon-tagged.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").tag_read_with_gene_exon) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.tag_read_with_gene_exon) processParams = sampleParams.local """ source $DWMAX/documents/aertslab/scripts/src_dwmax/bash-utils/utils.sh diff --git a/src/dropseqtools/processes/trim_starting_sequence.nf b/src/dropseqtools/processes/trim_starting_sequence.nf index 32985481..1af9fc4d 100644 --- a/src/dropseqtools/processes/trim_starting_sequence.nf +++ b/src/dropseqtools/processes/trim_starting_sequence.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__DROP_SEQ_TOOLS__TRIM_SMART_UNALIGNED_TAGGED_FILTERED_BAM { - container params.getToolParams("dropseqtools").container + container params.tools.dropseqtools.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process SC__DROP_SEQ_TOOLS__TRIM_SMART_UNALIGNED_TAGGED_FILTERED_BAM { tuple file('*.adapter_trimming_report.txt'), emit: report script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("dropseqtools").trim_smart_unaligned_tagged_filtered_bam) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.dropseqtools.trim_smart_unaligned_tagged_filtered_bam) processParams = sampleParams.local """ TrimStartingSequence \ diff --git a/src/edirect/edirect.config b/src/edirect/edirect.config index 90ae09e8..f49a11c0 100644 --- a/src/edirect/edirect.config +++ b/src/edirect/edirect.config @@ -1,5 +1,5 @@ params { - sc { + tools { edirect { container = 'ncbi/edirect:latest' } diff --git a/src/fastp/fastp.config b/src/fastp/fastp.config index 0f47f446..aac86685 100644 --- a/src/fastp/fastp.config +++ b/src/fastp/fastp.config @@ -1,5 +1,5 @@ params { - sc { + tools { fastp { container = 'vibsinglecellnf/fastp:0.20.0' thread = 1 diff --git a/src/fastp/processes/clean_and_fastqc.nf b/src/fastp/processes/clean_and_fastqc.nf index b0f0ee6c..668b9d4b 100644 --- a/src/fastp/processes/clean_and_fastqc.nf +++ b/src/fastp/processes/clean_and_fastqc.nf @@ -5,7 +5,7 @@ nextflow.enable.dsl=2 */ process FASTP__CLEAN_AND_FASTQC { - container params.getToolParams("fastp").container + container params.tools.fastp.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/flybaser/flybaser.config b/src/flybaser/flybaser.config index 400fffd2..6dbcccb8 100644 --- a/src/flybaser/flybaser.config +++ b/src/flybaser/flybaser.config @@ -1,5 +1,5 @@ params { - sc { + tools { flybaser { container = 'vibsinglecellnf/flybaser:0.2.1' diff --git a/src/flybaser/processes/convertNomenclature.nf b/src/flybaser/processes/convertNomenclature.nf index 3e60f8f7..1ec3e7c3 100644 --- a/src/flybaser/processes/convertNomenclature.nf +++ b/src/flybaser/processes/convertNomenclature.nf @@ -8,7 +8,7 @@ if(!params.containsKey("test")) { process FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL { - container params.getToolParams("flybaser").container + container params.tools.flybaser.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -23,7 +23,7 @@ process FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL { path("${sampleId}.FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("flybaser").convert_fbgn_to_gene_symbol) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.flybaser.convert_fbgn_to_gene_symbol) processParams = sampleParams.local """ ${binDir}convertFBgnToGeneSymbol.R \ diff --git a/src/harmony/harmony.config b/src/harmony/harmony.config index f7781f77..3af25635 100644 --- a/src/harmony/harmony.config +++ b/src/harmony/harmony.config @@ -1,5 +1,5 @@ params { - sc { + tools { harmony { container = 'vibsinglecellnf/harmony:1.0-3' report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/harmony/bin/reports/sc_harmony_report.ipynb" diff --git a/src/harmony/processes/runHarmony.nf b/src/harmony/processes/runHarmony.nf index 7d386a8c..517fd1d8 100644 --- a/src/harmony/processes/runHarmony.nf +++ b/src/harmony/processes/runHarmony.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/harmony/bin/" process SC__HARMONY__HARMONY_MATRIX { - container params.getToolParams("harmony").container + container params.tools.harmony.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__default' @@ -19,7 +19,7 @@ process SC__HARMONY__HARMONY_MATRIX { path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("harmony")) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) processParams = sampleParams.local varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ diff --git a/src/harmony/workflows/bec_harmony.nf b/src/harmony/workflows/bec_harmony.nf index 3cdae2dc..c0e3b48e 100644 --- a/src/harmony/workflows/bec_harmony.nf +++ b/src/harmony/workflows/bec_harmony.nf @@ -95,7 +95,7 @@ workflow BEC_HARMONY { // Run clustering // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) CLUSTER_IDENTIFICATION( normalizedTransformedData, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, @@ -131,7 +131,7 @@ workflow BEC_HARMONY { ) harmony_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.getToolParams("harmony").report_ipynb), + file(workflow.projectDir + params.tools.harmony.report_ipynb), "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/pcacv/pcacv.config b/src/pcacv/pcacv.config index c0af0209..1dfb8a79 100644 --- a/src/pcacv/pcacv.config +++ b/src/pcacv/pcacv.config @@ -1,5 +1,5 @@ params { - sc { + tools { pcacv { container = "vibsinglecellnf/pcacv:0.2.0" find_optimal_npcs { diff --git a/src/pcacv/processes/runPCACV.nf b/src/pcacv/processes/runPCACV.nf index 93385615..06141188 100644 --- a/src/pcacv/processes/runPCACV.nf +++ b/src/pcacv/processes/runPCACV.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pcacv/bin" : process PCACV__FIND_OPTIMAL_NPCS { - container params.getToolParams("pcacv").container + container params.tools.pcacv.container publishDir "${params.global.outdir}/data/pcacv", mode: 'link' label 'compute_resources__pcacv' @@ -27,7 +27,7 @@ process PCACV__FIND_OPTIMAL_NPCS { emit: files script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("pcacv").find_optimal_npcs) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.pcacv.find_optimal_npcs) processParams = sampleParams.local """ export OPENBLAS_NUM_THREADS=1 diff --git a/src/picard/processes/create_sequence_dictionary.nf b/src/picard/processes/create_sequence_dictionary.nf index 5b6a887d..69fe92ee 100644 --- a/src/picard/processes/create_sequence_dictionary.nf +++ b/src/picard/processes/create_sequence_dictionary.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__CREATE_SEQUENCE_DICTIONARY { - container params.getToolParams("picard").container + container params.tools.picard.container publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' label 'compute_resources__default' diff --git a/src/picard/processes/fastq_to_bam.nf b/src/picard/processes/fastq_to_bam.nf index 7c22d555..1e958eb1 100644 --- a/src/picard/processes/fastq_to_bam.nf +++ b/src/picard/processes/fastq_to_bam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__FASTQ_TO_BAM { - container params.getToolParams("picard").container + container params.tools.picard.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/picard/processes/merge_bam_alignment.nf b/src/picard/processes/merge_bam_alignment.nf index 3e39c934..8f43b6f8 100644 --- a/src/picard/processes/merge_bam_alignment.nf +++ b/src/picard/processes/merge_bam_alignment.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__MERGE_BAM_ALIGNMENT { - container params.getToolParams("picard").container + container params.tools.picard.container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -17,7 +17,7 @@ process PICARD__MERGE_BAM_ALIGNMENT { tuple val(sample), path("*.merged.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("picard").merge_bam_alignment) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.picard.merge_bam_alignment) processParams = sampleParams.local """ java -Djava.io.tmpdir=$tmpDir -jar \ diff --git a/src/picard/processes/sam_to_fastq.nf b/src/picard/processes/sam_to_fastq.nf index b88c6841..98a5448f 100644 --- a/src/picard/processes/sam_to_fastq.nf +++ b/src/picard/processes/sam_to_fastq.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__BAM_TO_FASTQ { - container params.getToolParams("picard").container + container params.tools.picard.container publishDir "${params.global.outdir}/01.clean", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' diff --git a/src/picard/processes/sort_sam.nf b/src/picard/processes/sort_sam.nf index 07a1d2cc..f10c3c2c 100644 --- a/src/picard/processes/sort_sam.nf +++ b/src/picard/processes/sort_sam.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process PICARD__SORT_SAM { - container params.getToolParams("picard").container + container params.tools.picard.container publishDir "${params.global.outdir}/02.map", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -14,7 +14,7 @@ process PICARD__SORT_SAM { tuple val(sample), path("*.STAR_aligned_sorted.bam") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("picard").sort_sam) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.picard.sort_sam) processParams = sampleParams.local """ java -Djava.io.tmpdir=$tmpDir -jar \ diff --git a/src/popscle/popscle.config b/src/popscle/popscle.config index 4ab2db38..607d102a 100644 --- a/src/popscle/popscle.config +++ b/src/popscle/popscle.config @@ -1,5 +1,5 @@ params { - sc { + tools { popscle { container = 'vibsinglecellnf/popscle:2021-05-05-da70fc7' vcf = '/path/to/vcf_file' diff --git a/src/popscle/processes/demuxlet.nf b/src/popscle/processes/demuxlet.nf index a195a93e..51060956 100644 --- a/src/popscle/processes/demuxlet.nf +++ b/src/popscle/processes/demuxlet.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" process SC__POPSCLE__DEMUXLET { - container params.getToolParams("popscle").container + container params.tools.popscle.container publishDir "${params.global.outdir}/data", mode: 'symlink' label 'compute_resources__cpu' @@ -16,7 +16,7 @@ process SC__POPSCLE__DEMUXLET { tuple val(sampleId), path("${sampleId}_demuxlet*") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("popscle").demuxlet) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.demuxlet) processParams = sampleParams.local """ @@ -30,7 +30,7 @@ process SC__POPSCLE__DEMUXLET { process SC__POPSCLE__FREEMUXLET { - container params.getToolParams("popscle").container + container params.tools.popscle.container publishDir "${params.global.outdir}/data", mode: 'symlink' label 'compute_resources__cpu' @@ -41,7 +41,7 @@ process SC__POPSCLE__FREEMUXLET { tuple val(sampleId), path("${sampleId}_freemuxlet*") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("popscle").freemuxlet) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.freemuxlet) processParams = sampleParams.local """ diff --git a/src/popscle/processes/dsc_pileup.nf b/src/popscle/processes/dsc_pileup.nf index 06b917dd..129837b7 100644 --- a/src/popscle/processes/dsc_pileup.nf +++ b/src/popscle/processes/dsc_pileup.nf @@ -6,7 +6,7 @@ toolParams = params.sc.popscle process SC__POPSCLE__DSC_PILEUP { - container params.getToolParams("popscle").container + container params.tools.popscle.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__cpu','compute_resources__24hqueue' @@ -29,7 +29,7 @@ process SC__POPSCLE__DSC_PILEUP { process SC__POPSCLE__PREFILTER_DSC_PILEUP { - container params.getToolParams("popscle").container + container params.tools.popscle.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' label 'compute_resources__cpu' diff --git a/src/popscle/workflows/demuxlet.nf b/src/popscle/workflows/demuxlet.nf index 237e233c..d4f4dcc7 100644 --- a/src/popscle/workflows/demuxlet.nf +++ b/src/popscle/workflows/demuxlet.nf @@ -113,7 +113,7 @@ workflow DEMUXLET { data main: - vcf = file(params.getToolParams("popscle").vcf) + vcf = file(params.tools.popscle.vcf) DSC_PILEUP_FILTERED(data) SC__POPSCLE__DEMUXLET(DSC_PILEUP_FILTERED.out, vcf) diff --git a/src/popscle/workflows/dsc_pileup.nf b/src/popscle/workflows/dsc_pileup.nf index 71bca29b..49eb6319 100644 --- a/src/popscle/workflows/dsc_pileup.nf +++ b/src/popscle/workflows/dsc_pileup.nf @@ -22,7 +22,7 @@ workflow DSC_PILEUP_FILTERED { data main: - vcf = file(params.getToolParams("popscle").vcf) + vcf = file(params.tools.popscle.vcf) SC__POPSCLE__PREFILTER_DSC_PILEUP(data, vcf) SC__POPSCLE__DSC_PILEUP(SC__POPSCLE__PREFILTER_DSC_PILEUP.out, vcf) diff --git a/src/pycistopic/processes/macs2_call_peaks.nf b/src/pycistopic/processes/macs2_call_peaks.nf index 416d1694..ff43cef2 100644 --- a/src/pycistopic/processes/macs2_call_peaks.nf +++ b/src/pycistopic/processes/macs2_call_peaks.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("pycistopic") +toolParams = params.tools.pycistopic process SC__PYCISTOPIC__MACS2_CALL_PEAKS { diff --git a/src/pycistopic/pycistopic.config b/src/pycistopic/pycistopic.config index abe29678..46f6c470 100644 --- a/src/pycistopic/pycistopic.config +++ b/src/pycistopic/pycistopic.config @@ -1,5 +1,5 @@ params { - sc { + tools { pycistopic { container = 'vibsinglecellnf/pycistopic:0.2' macs2_call_peaks { diff --git a/src/scanpy/conf/base.config b/src/scanpy/conf/base.config index d2544cc3..d1135308 100644 --- a/src/scanpy/conf/base.config +++ b/src/scanpy/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' report { diff --git a/src/scanpy/conf/bbknn.config b/src/scanpy/conf/bbknn.config index 5a957800..e68636c7 100644 --- a/src/scanpy/conf/bbknn.config +++ b/src/scanpy/conf/bbknn.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { batch_effect_correct { method = 'bbknn' diff --git a/src/scanpy/conf/data_transformation.config b/src/scanpy/conf/data_transformation.config index 8610b720..f23e2b59 100644 --- a/src/scanpy/conf/data_transformation.config +++ b/src/scanpy/conf/data_transformation.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { data_transformation { method = 'log1p' diff --git a/src/scanpy/conf/filter.config b/src/scanpy/conf/filter.config index e21b736e..ca06cd90 100644 --- a/src/scanpy/conf/filter.config +++ b/src/scanpy/conf/filter.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { filter { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_filter_qc_report.ipynb" diff --git a/src/scanpy/conf/min.config b/src/scanpy/conf/min.config index a39b30ad..c0188bbb 100644 --- a/src/scanpy/conf/min.config +++ b/src/scanpy/conf/min.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' report { diff --git a/src/scanpy/conf/mnncorrect.config b/src/scanpy/conf/mnncorrect.config index 1e1e1634..746a5761 100644 --- a/src/scanpy/conf/mnncorrect.config +++ b/src/scanpy/conf/mnncorrect.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { batch_effect_correct { method = 'mnncorrect' diff --git a/src/scanpy/conf/normalization.config b/src/scanpy/conf/normalization.config index 3dd9fe4b..b79bb7e0 100644 --- a/src/scanpy/conf/normalization.config +++ b/src/scanpy/conf/normalization.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { normalization { method = 'cpx' diff --git a/src/scanpy/conf/regress_out.config b/src/scanpy/conf/regress_out.config index 04e9999d..0db5c768 100644 --- a/src/scanpy/conf/regress_out.config +++ b/src/scanpy/conf/regress_out.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { regress_out { variablesToRegressOut = ['n_counts', 'percent_mito'] diff --git a/src/scanpy/processes/batch_effect_correct.nf b/src/scanpy/processes/batch_effect_correct.nf index a884ecba..10c6f7bd 100644 --- a/src/scanpy/processes/batch_effect_correct.nf +++ b/src/scanpy/processes/batch_effect_correct.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin/" process SC__SCANPY__BATCH_EFFECT_CORRECTION { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -21,7 +21,7 @@ process SC__SCANPY__BATCH_EFFECT_CORRECTION { val(stashedParams) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").batch_effect_correct) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.batch_effect_correct) processParams = sampleParams.local """ ${binDir}aggregate/sc_batch_effect_correction.py \ diff --git a/src/scanpy/processes/cluster.nf b/src/scanpy/processes/cluster.nf index 6c74cc7f..571217a4 100644 --- a/src/scanpy/processes/cluster.nf +++ b/src/scanpy/processes/cluster.nf @@ -97,7 +97,7 @@ class SC__SCANPY__CLUSTERING_PARAMS { process SC__SCANPY__CLUSTERING_PREFLIGHT_CHECKS { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container label 'compute_resources__mem' input: @@ -111,7 +111,7 @@ process SC__SCANPY__CLUSTERING_PREFLIGHT_CHECKS { path(f) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.clustering) processParams = sampleParams.local methodAsArguments = processParams?.methods ? processParams.methods.collect({ '--method' + ' ' + it }).join(' ') : '--method ' + processParams.method resolutionAsArguments = processParams?.resolutions ? processParams?.resolutions.collect({ '--resolution' + ' ' + it }).join(' ') : '--resolution ' + processParams.resolution @@ -134,7 +134,7 @@ def SC__SCANPY__CLUSTERING_PARAMS(params) { */ process SC__SCANPY__CLUSTERING { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -145,7 +145,7 @@ process SC__SCANPY__CLUSTERING { tuple val(sampleId), path("${sampleId}.SC__SCANPY__CLUSTERING.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.clustering) processParams = sampleParams.local """ ${binDir}/cluster/sc_clustering.py \ @@ -163,7 +163,7 @@ process SC__SCANPY__CLUSTERING { */ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate/clustering/${isParamNull(method) ? "default": method.toLowerCase()}/${isParamNull(resolution) ? "default" : "res_" + resolution}", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -183,7 +183,7 @@ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING { val(resolution) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").clustering) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.clustering) processParams = sampleParams.local def _processParams = new SC__SCANPY__CLUSTERING_PARAMS() _processParams.setEnv(this) diff --git a/src/scanpy/processes/dim_reduction.nf b/src/scanpy/processes/dim_reduction.nf index 6a8f6452..5eec2a6b 100644 --- a/src/scanpy/processes/dim_reduction.nf +++ b/src/scanpy/processes/dim_reduction.nf @@ -89,7 +89,7 @@ def SC__SCANPY__DIM_REDUCTION_PARAMS(params) { process SC__SCANPY__DIM_REDUCTION { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__cpu' @@ -108,7 +108,7 @@ process SC__SCANPY__DIM_REDUCTION { val(nComps) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").dim_reduction.get(params.method)) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.dim_reduction.get(params.method)) processParams = sampleParams.local // In parameter exploration mode, file output needs to be tagged with a unique identitifer because of: // - https://github.com/nextflow-io/nextflow/issues/470 diff --git a/src/scanpy/processes/feature_selection.nf b/src/scanpy/processes/feature_selection.nf index 067e31eb..9ad9b6ec 100644 --- a/src/scanpy/processes/feature_selection.nf +++ b/src/scanpy/processes/feature_selection.nf @@ -6,7 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin" : process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -17,7 +17,7 @@ process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { tuple val(sampleId), path("${sampleId}.SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").feature_selection) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.feature_selection) processParams = sampleParams.local """ ${binDir}/feature_selection/sc_find_variable_genes.py \ @@ -35,7 +35,7 @@ process SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES { process SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -46,7 +46,7 @@ process SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES { tuple val(sampleId), path("${sampleId}.SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").feature_selection) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.feature_selection) processParams = sampleParams.local """ ${binDir}/feature_selection/sc_subset_variable_genes.py \ diff --git a/src/scanpy/processes/filter.nf b/src/scanpy/processes/filter.nf index 321d1706..d91b3019 100644 --- a/src/scanpy/processes/filter.nf +++ b/src/scanpy/processes/filter.nf @@ -6,7 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin" : process SC__SCANPY__COMPUTE_QC_STATS { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -17,7 +17,7 @@ process SC__SCANPY__COMPUTE_QC_STATS { tuple val(sampleId), path("${sampleId}.SC__SCANPY__COMPUTE_QC_STATS.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").filter) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.filter) processParams = sampleParams.local """ ${binDir}/filter/sc_cell_gene_filtering.py \ @@ -38,7 +38,7 @@ process SC__SCANPY__COMPUTE_QC_STATS { process SC__SCANPY__GENE_FILTER { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -49,7 +49,7 @@ process SC__SCANPY__GENE_FILTER { tuple val(sampleId), path("${sampleId}.SC__SCANPY__GENE_FILTER.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").filter) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.filter) processParams = sampleParams.local """ ${binDir}/filter/sc_cell_gene_filtering.py \ @@ -64,7 +64,7 @@ process SC__SCANPY__GENE_FILTER { process SC__SCANPY__CELL_FILTER { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -75,7 +75,7 @@ process SC__SCANPY__CELL_FILTER { tuple val(sampleId), path("${sampleId}.SC__SCANPY__CELL_FILTER.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").filter) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.filter) processParams = sampleParams.local """ ${binDir}/filter/sc_cell_gene_filtering.py \ diff --git a/src/scanpy/processes/marker_genes.nf b/src/scanpy/processes/marker_genes.nf index 341017e6..1da58737 100644 --- a/src/scanpy/processes/marker_genes.nf +++ b/src/scanpy/processes/marker_genes.nf @@ -13,7 +13,7 @@ include { */ process SC__SCANPY__MARKER_GENES { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -29,7 +29,7 @@ process SC__SCANPY__MARKER_GENES { tuple val(sampleId), path("${sampleId}.SC__SCANPY__MARKER_GENES.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").marker_genes) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.marker_genes) processParams = sampleParams.local """ ${binDir}/cluster/sc_marker_genes.py \ @@ -48,7 +48,7 @@ process SC__SCANPY__MARKER_GENES { */ process SC__SCANPY__PARAM_EXPLORE_MARKER_GENES { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate/markers/${isParamNull(clusteringMethod) ? "default": clusteringMethod.toLowerCase()}/${isParamNull(clusteringResolution) ? "res_": clusteringResolution}", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -70,7 +70,7 @@ process SC__SCANPY__PARAM_EXPLORE_MARKER_GENES { val(clusteringResolution) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").marker_genes) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.marker_genes) processParams = sampleParams.local // In parameter exploration mode, file output needs to be tagged with a unique identitifer because of: // - https://github.com/nextflow-io/nextflow/issues/470 diff --git a/src/scanpy/processes/neighborhood_graph.nf b/src/scanpy/processes/neighborhood_graph.nf index 3b4c359f..05c68e89 100644 --- a/src/scanpy/processes/neighborhood_graph.nf +++ b/src/scanpy/processes/neighborhood_graph.nf @@ -85,7 +85,7 @@ class SC__SCANPY__NEIGHBORHOOD_GRAPH_PARAMS { process SC__SCANPY__NEIGHBORHOOD_GRAPH { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container label 'compute_resources__mem' input: @@ -106,7 +106,7 @@ process SC__SCANPY__NEIGHBORHOOD_GRAPH { def sampleParams = params.parseConfig( sampleId, params.global, - params.getToolParams("scanpy").neighborhood_graph + params.tools.scanpy.neighborhood_graph ) processParams = sampleParams.local // In parameter exploration mode, file output needs to be tagged with a unique identitifer because of: diff --git a/src/scanpy/processes/regress_out.nf b/src/scanpy/processes/regress_out.nf index ce701b2e..674d1ba9 100644 --- a/src/scanpy/processes/regress_out.nf +++ b/src/scanpy/processes/regress_out.nf @@ -4,7 +4,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin/" process SC__SCANPY__REGRESS_OUT { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__cpu' @@ -19,7 +19,7 @@ process SC__SCANPY__REGRESS_OUT { path("${sampleId}.SC__SCANPY__REGRESS_OUT.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").regress_out) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.regress_out) processParams = sampleParams.local variablesToRegressOutAsArguments = processParams.variablesToRegressOut.collect({ '--variable-to-regress-out' + ' ' + it }).join(' ') """ diff --git a/src/scanpy/processes/reports.nf b/src/scanpy/processes/reports.nf index 0fda37d2..36a5a264 100644 --- a/src/scanpy/processes/reports.nf +++ b/src/scanpy/processes/reports.nf @@ -16,7 +16,7 @@ include { */ process SC__SCANPY__GENERATE_REPORT { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' @@ -29,7 +29,7 @@ process SC__SCANPY__GENERATE_REPORT { tuple val(sampleId), path("${sampleId}.${reportTitle}.ipynb") script: - def reportParams = new Yaml().dump(annotations_to_plot: params.getToolParams("scanpy").report.annotations_to_plot) + def reportParams = new Yaml().dump(annotations_to_plot: params.tools.scanpy.report.annotations_to_plot) """ papermill ${ipynb} \ --report-mode \ @@ -51,7 +51,7 @@ process SC__SCANPY__GENERATE_REPORT { */ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING_GENERATE_REPORT { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/notebooks/intermediate/clustering/${isParamNull(method) ? "default": method.toLowerCase()}/${isParamNull(resolution) ? "res_": resolution}", mode: 'symlink', overwrite: true label 'compute_resources__report' @@ -77,7 +77,7 @@ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING_GENERATE_REPORT { stashedParams = [method, resolution] if(!isParamNull(stashedParams)) uuid = stashedParams.findAll { it != 'NULL' }.join('_') - def reportParams = new Yaml().dump(annotations_to_plot: params.getToolParams("scanpy").report.annotations_to_plot) + def reportParams = new Yaml().dump(annotations_to_plot: params.tools.scanpy.report.annotations_to_plot) """ papermill ${ipynb} \ --report-mode \ @@ -93,7 +93,7 @@ process SC__SCANPY__PARAM_EXPLORE_CLUSTERING_GENERATE_REPORT { // QC report takes two inputs, so needs it own process process SC__SCANPY__GENERATE_DUAL_INPUT_REPORT { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' @@ -116,7 +116,7 @@ process SC__SCANPY__GENERATE_DUAL_INPUT_REPORT { script: if(!isParamNull(stashedParams)) uuid = stashedParams.findAll { it != 'NULL' }.join('_') - def reportParams = new Yaml().dump(annotations_to_plot: params.getToolParams("scanpy").report.annotations_to_plot) + def reportParams = new Yaml().dump(annotations_to_plot: params.tools.scanpy.report.annotations_to_plot) """ papermill ${ipynb} \ --report-mode \ @@ -131,7 +131,7 @@ process SC__SCANPY__GENERATE_DUAL_INPUT_REPORT { process SC__SCANPY__REPORT_TO_HTML { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true // copy final "merged_report" to notbooks root: publishDir "${params.global.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true @@ -152,7 +152,7 @@ process SC__SCANPY__REPORT_TO_HTML { process SC__SCANPY__MERGE_REPORTS { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true // copy final "merged_report" to notebooks root: publishDir "${params.global.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true diff --git a/src/scanpy/processes/transform.nf b/src/scanpy/processes/transform.nf index a4181072..886065d5 100644 --- a/src/scanpy/processes/transform.nf +++ b/src/scanpy/processes/transform.nf @@ -6,7 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scanpy/bin" : process SC__SCANPY__NORMALIZATION { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -17,7 +17,7 @@ process SC__SCANPY__NORMALIZATION { tuple val(sampleId), path("${sampleId}.SC__SCANPY__NORMALIZATION.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").normalization) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.normalization) processParams = sampleParams.local """ ${binDir}/transform/sc_normalization.py \ @@ -31,7 +31,7 @@ process SC__SCANPY__NORMALIZATION { process SC__SCANPY__DATA_TRANSFORMATION { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -42,7 +42,7 @@ process SC__SCANPY__DATA_TRANSFORMATION { tuple val(sampleId), path("${sampleId}.SC__SCANPY__DATA_TRANSFORMATION.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").data_transformation) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.data_transformation) processParams = sampleParams.local """ ${binDir}/transform/sc_data_transformation.py \ @@ -55,7 +55,7 @@ process SC__SCANPY__DATA_TRANSFORMATION { process SC__SCANPY__FEATURE_SCALING { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -70,7 +70,7 @@ process SC__SCANPY__FEATURE_SCALING { path("${sampleId}.SC__SCANPY__FEATURE_SCALING.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scanpy").feature_scaling) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scanpy.feature_scaling) processParams = sampleParams.local """ ${binDir}/transform/sc_feature_scaling.py \ diff --git a/src/scanpy/workflows/bec_bbknn.nf b/src/scanpy/workflows/bec_bbknn.nf index fb1daede..42f1ee03 100644 --- a/src/scanpy/workflows/bec_bbknn.nf +++ b/src/scanpy/workflows/bec_bbknn.nf @@ -59,7 +59,7 @@ workflow BEC_BBKNN { main: // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy SC__SCANPY__BATCH_EFFECT_CORRECTION( dimReductionData.map { @@ -126,7 +126,7 @@ workflow BEC_BBKNN { bbknn_report = GENERATE_DUAL_INPUT_REPORT( becDualDataPrePost, - file(workflow.projectDir + params.getToolParams("scanpy").batch_effect_correct.report_ipynb), + file(workflow.projectDir + params.tools.scanpy.batch_effect_correct.report_ipynb), "SC_BEC_BBKNN_report", clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/scanpy/workflows/bec_mnncorrect.nf b/src/scanpy/workflows/bec_mnncorrect.nf index 0dfa9d93..e1d8a5d3 100644 --- a/src/scanpy/workflows/bec_mnncorrect.nf +++ b/src/scanpy/workflows/bec_mnncorrect.nf @@ -65,7 +65,7 @@ workflow BEC_MNNCORRECT { main: // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy out = scanpyParams.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( hvg ) : data diff --git a/src/scanpy/workflows/cluster_identification.nf b/src/scanpy/workflows/cluster_identification.nf index 7a8e9434..b0ee1011 100644 --- a/src/scanpy/workflows/cluster_identification.nf +++ b/src/scanpy/workflows/cluster_identification.nf @@ -32,9 +32,9 @@ workflow CLUSTER_IDENTIFICATION { main: // To run multiple clustering, we need at least 1 argument that is a list - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) // Run sanity checks - if(params.getToolParams("scanpy").clustering?.preflight_checks) { + if(params.tools.scanpy.clustering?.preflight_checks) { $data = SC__SCANPY__CLUSTERING_PREFLIGHT_CHECKS( data.map { it -> tuple(it[0], it[1]) } ) } else { $data = data @@ -60,7 +60,7 @@ workflow CLUSTER_IDENTIFICATION { report = GENERATE_REPORT( "CLUSTERING", out, - file(workflow.projectDir + params.getToolParams("scanpy").clustering.report_ipynb), + file(workflow.projectDir + params.tools.scanpy.clustering.report_ipynb), clusteringParams.isParameterExplorationModeOn() ) diff --git a/src/scanpy/workflows/combine_reports.nf b/src/scanpy/workflows/combine_reports.nf index d4d7838b..dcdb0933 100644 --- a/src/scanpy/workflows/combine_reports.nf +++ b/src/scanpy/workflows/combine_reports.nf @@ -18,7 +18,7 @@ workflow COMBINE_REPORTS { cluster_report main: - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) ipynbs = qc_filter_report.map { it -> tuple(it[0], it[1]) }.mix( diff --git a/src/scanpy/workflows/dim_reduction.nf b/src/scanpy/workflows/dim_reduction.nf index af1bc185..996bcb8b 100644 --- a/src/scanpy/workflows/dim_reduction.nf +++ b/src/scanpy/workflows/dim_reduction.nf @@ -33,7 +33,7 @@ workflow DIM_REDUCTION { report = GENERATE_REPORT( "DIMENSIONALITY_REDUCTION", DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap.map { it -> tuple(it[0], it[1]) }, - file(workflow.projectDir + params.getToolParams("scanpy").dim_reduction.report_ipynb), + file(workflow.projectDir + params.tools.scanpy.dim_reduction.report_ipynb), false ) @@ -57,7 +57,7 @@ workflow DIM_REDUCTION_TSNE_UMAP { report = GENERATE_REPORT( "DIMENSIONALITY_REDUCTION", dimred_tsne_umap.map { it -> tuple(it[0], it[1]) }, - file(workflow.projectDir + params.getToolParams("scanpy").dim_reduction.report_ipynb), + file(workflow.projectDir + params.tools.scanpy.dim_reduction.report_ipynb), false ) diff --git a/src/scanpy/workflows/hvg_selection.nf b/src/scanpy/workflows/hvg_selection.nf index fcbcd3f4..1e4213ea 100644 --- a/src/scanpy/workflows/hvg_selection.nf +++ b/src/scanpy/workflows/hvg_selection.nf @@ -36,7 +36,7 @@ workflow HVG_SELECTION { hvg = data \ | SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES \ | SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES - out = params.getToolParams("scanpy").containsKey("regress_out") + out = params.tools.scanpy.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( hvg ) : hvg scaled = SC__SCANPY__FEATURE_SCALING( out ) PUBLISH_H5AD_HVG_SCALED( @@ -52,7 +52,7 @@ workflow HVG_SELECTION { report = GENERATE_REPORT( "HVG", SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES.out, - file(workflow.projectDir + params.getToolParams("scanpy").feature_selection.report_ipynb), + file(workflow.projectDir + params.tools.scanpy.feature_selection.report_ipynb), false ) diff --git a/src/scanpy/workflows/qc_filter.nf b/src/scanpy/workflows/qc_filter.nf index aefc3e68..e878aa6f 100644 --- a/src/scanpy/workflows/qc_filter.nf +++ b/src/scanpy/workflows/qc_filter.nf @@ -35,7 +35,7 @@ workflow QC_FILTER { SC__SCANPY__COMPUTE_QC_STATS.out.join(filtered).map { it -> tuple(*it[0..(it.size()-1)], null) }, - file(workflow.projectDir + params.sc.scanpy.filter.report_ipynb), + file(workflow.projectDir + params.tools.scanpy.filter.report_ipynb), 'SC_QC_filtering_report', false ) diff --git a/src/scanpy/workflows/single_sample.nf b/src/scanpy/workflows/single_sample.nf index 030ac486..00c2cdd3 100644 --- a/src/scanpy/workflows/single_sample.nf +++ b/src/scanpy/workflows/single_sample.nf @@ -74,7 +74,7 @@ workflow SINGLE_SAMPLE { out = FILTER_AND_ANNOTATE_AND_CLEAN( data ) // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy filtered = scanpyParams?.filter ? QC_FILTER( out ).filtered : out transformed_normalized = scanpyParams?.data_transformation && scanpyParams?.normalization @@ -93,7 +93,7 @@ workflow SINGLE_SAMPLE { // Reporting samples = data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) ipynbs = COMBINE_REPORTS( diff --git a/src/scenic/conf/append.config b/src/scenic/conf/append.config index dd0af3e5..2a32607b 100644 --- a/src/scenic/conf/append.config +++ b/src/scenic/conf/append.config @@ -1,6 +1,6 @@ params { - sc { + tools { scenic { report_ipynb = '/src/scenic/bin/reports/scenic_report.ipynb' existingScenicLoom = '' diff --git a/src/scenic/conf/min/aucell.config b/src/scenic/conf/min/aucell.config index 1a427a08..64ec0b57 100644 --- a/src/scenic/conf/min/aucell.config +++ b/src/scenic/conf/min/aucell.config @@ -1,6 +1,6 @@ params { - sc { + tools { scenic { aucell { output = 'aucell_output.loom' diff --git a/src/scenic/conf/min/base/v0.0.1.config b/src/scenic/conf/min/base/v0.0.1.config index ac6c4b81..b2531630 100644 --- a/src/scenic/conf/min/base/v0.0.1.config +++ b/src/scenic/conf/min/base/v0.0.1.config @@ -1,5 +1,5 @@ // Define local variable otherwise it's going to be kept in the final config -def _ = params.sc.scenic +def _ = params.tools.scenic // Sanity checks if(!params.global.containsKey("species")) throw new Exception("The params.global.species parameter is required.") @@ -10,7 +10,7 @@ if(!params.global.containsKey("outdir")) params { global = params.global - sc { + tools { scenic { // Container settings container = 'aertslab/pyscenic:0.10.0' @@ -39,9 +39,9 @@ params { // Databases versions // PUBLIC -params.sc.scenic.tfsVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tfsVersion") ? _.tfsVersion : "${params.global.species}-v0.0.1" -params.sc.scenic.motifsDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("motifsDbVersion") ? _.motifsDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" -params.sc.scenic.tracksDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tracksDbVersion") ? _.tracksDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" +params.tools.scenic.tfsVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tfsVersion") ? _.tfsVersion : "${params.global.species}-v0.0.1" +params.tools.scenic.motifsDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("motifsDbVersion") ? _.motifsDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" +params.tools.scenic.tracksDbVersion = _.containsKey("cistarget") && _.cistarget.containsKey("tracksDbVersion") ? _.tracksDbVersion : "${params.global.species}-${params.global.genome.assembly}-v0.0.1" includeConfig '../labels.config' diff --git a/src/scenic/conf/min/cistarget.config b/src/scenic/conf/min/cistarget.config index 45c09862..a0b3ae39 100644 --- a/src/scenic/conf/min/cistarget.config +++ b/src/scenic/conf/min/cistarget.config @@ -1,4 +1,4 @@ -def _ = params.sc.scenic +def _ = params.tools.scenic // Sanity checks if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No cisTarget databases found for the given species: "+ params.global.species) @@ -6,7 +6,7 @@ if(params.global.species == "human" && !(params.global.genome.assembly in ["hg38 throw new Exception("No cisTarget databases found for the given genome: "+ params.global.genome.assembly) params { - sc { + tools { scenic { cistarget { adj = "adj.tsv" @@ -36,7 +36,7 @@ params { top_n_regulators = "5,10,50" min_genes = "20" - // expression_mtx_fname = "" // uses params.scenic.filteredLoom + // expression_mtx_fname = "" // uses params.tools.scenic.filteredLoom } } @@ -49,6 +49,6 @@ def useMotifs = _.containsKey("cistarget") && _.cistarget.containsKey("useMotifs def useTracks = _.containsKey("cistarget") && _.cistarget.containsKey("useTracks") ? _.cistarget.useTracks: false if(useMotifs) - includeConfig "dbs/cistarget-motifs-${params.sc.scenic.motifsDbVersion}.config" + includeConfig "dbs/cistarget-motifs-${params.tools.scenic.motifsDbVersion}.config" if(useTracks) - includeConfig "dbs/cistarget-tracks-${params.sc.scenic.tracksDbVersion}.config" + includeConfig "dbs/cistarget-tracks-${params.tools.scenic.tracksDbVersion}.config" diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config index a0346c58..d170c3ca 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-fly-dm6-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config index d805f8b7..ec4fb229 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg19-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config index 6ea5fcf3..f1cf7671 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-human-hg38-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config index 57a56695..56e01ef5 100644 --- a/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-motifs-mouse-mm10-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Motif feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config index 1e6fe1a5..ab3b5919 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-fly-dm6-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // track feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config index 09ba7c20..b48ddeab 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg19-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Track feather format databases diff --git a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config index e74060a2..12552632 100644 --- a/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config +++ b/src/scenic/conf/min/dbs/cistarget-tracks-human-hg38-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { cistarget { // Track feather format databases diff --git a/src/scenic/conf/min/grn.config b/src/scenic/conf/min/grn.config index ccaa21a3..3178cf92 100644 --- a/src/scenic/conf/min/grn.config +++ b/src/scenic/conf/min/grn.config @@ -1,10 +1,10 @@ -def _ = params.sc.scenic +def _ = params.tools.scenic // Sanity checks if(!(params.global.species in ["human", "mouse", "fly"])) throw new Exception("No TFs found for the given species: "+ params.global.species) params { - sc { + tools { scenic { grn { // seed = 617 @@ -12,7 +12,7 @@ params { // PUBLIC // maxForks = _.containsKey("grn") && _.grn.containsKey("maxForks") ? _.grn.maxForks : 1 // numWorkers = _.containsKey("grn") && _.grn.containsKey("numWorkers") ? _.grn.numWorkers : 2 - // // Following parameters are not used except params.sc.scenic.labels.processExecutor = 'qsub' + // // Following parameters are not used except params.tools.scenic.labels.processExecutor = 'qsub' // pmem = _.containsKey("grn") && _.grn.containsKey("pmem") ? _.grn.pmem : '2gb' // walltime = '24:00:00' } @@ -20,4 +20,4 @@ params { } } -includeConfig "tfs/${params.sc.scenic.tfsVersion}.config" +includeConfig "tfs/${params.tools.scenic.tfsVersion}.config" diff --git a/src/scenic/conf/min/labels.config b/src/scenic/conf/min/labels.config index c9bc3c66..b2072584 100644 --- a/src/scenic/conf/min/labels.config +++ b/src/scenic/conf/min/labels.config @@ -1,7 +1,7 @@ -def _ = params.sc.scenic +def _ = params.tools.scenic params { - sc { + tools { scenic { labels { // Resources settings: diff --git a/src/scenic/conf/min/scenic.config b/src/scenic/conf/min/scenic.config index 6ec3d049..fe51e982 100644 --- a/src/scenic/conf/min/scenic.config +++ b/src/scenic/conf/min/scenic.config @@ -10,7 +10,7 @@ params { outdir = "out" } - sc { + tools { scenic { configVersion = "v0.0.1" } diff --git a/src/scenic/conf/min/scope.config b/src/scenic/conf/min/scope.config index b8d0a8d9..80b8dc9f 100644 --- a/src/scenic/conf/min/scope.config +++ b/src/scenic/conf/min/scope.config @@ -1,5 +1,5 @@ params { - sc { + tools { scope { genome = "" tree { diff --git a/src/scenic/conf/min/tfs/fly-v0.0.1.config b/src/scenic/conf/min/tfs/fly-v0.0.1.config index 0606757b..996f3227 100644 --- a/src/scenic/conf/min/tfs/fly-v0.0.1.config +++ b/src/scenic/conf/min/tfs/fly-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_dmel.txt" diff --git a/src/scenic/conf/min/tfs/human-v0.0.1.config b/src/scenic/conf/min/tfs/human-v0.0.1.config index b7f2b1d1..062477dd 100644 --- a/src/scenic/conf/min/tfs/human-v0.0.1.config +++ b/src/scenic/conf/min/tfs/human-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_hg38.txt" diff --git a/src/scenic/conf/min/tfs/mouse-v0.0.1.config b/src/scenic/conf/min/tfs/mouse-v0.0.1.config index 1d10fe65..903ced9f 100644 --- a/src/scenic/conf/min/tfs/mouse-v0.0.1.config +++ b/src/scenic/conf/min/tfs/mouse-v0.0.1.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { grn { tfs = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt" diff --git a/src/scenic/conf/multi_runs.config b/src/scenic/conf/multi_runs.config index 349c2a20..9433a60a 100644 --- a/src/scenic/conf/multi_runs.config +++ b/src/scenic/conf/multi_runs.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { numRuns = 2 // AUCell parameters diff --git a/src/scenic/conf/test.config b/src/scenic/conf/test.config index 234d2034..198e2dd9 100644 --- a/src/scenic/conf/test.config +++ b/src/scenic/conf/test.config @@ -3,7 +3,7 @@ params { project_name = 'Test' } - sc { + tools { scenic { filteredLoom = '/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/testruns/scenic-nf_testing/expr_mat.loom' // for testing diff --git a/src/scenic/conf/test_multi_runs.config b/src/scenic/conf/test_multi_runs.config index 016d282c..b9fabfe6 100644 --- a/src/scenic/conf/test_multi_runs.config +++ b/src/scenic/conf/test_multi_runs.config @@ -1,5 +1,5 @@ params { - sc { + tools { scenic { numRuns = 2 // AUCell parameters diff --git a/src/scenic/main.nf b/src/scenic/main.nf index a03dd805..70489ce1 100644 --- a/src/scenic/main.nf +++ b/src/scenic/main.nf @@ -6,7 +6,7 @@ include { resolveParams(params, true) -def isAppendOnlyMode = params.getToolParams("scenic").containsKey("existingScenicLoom") +def isAppendOnlyMode = params.tools.scenic.containsKey("existingScenicLoom") def ALLOWED_GENOME_ASSEMBLIES = ['dm6','hg19','hg38', 'mm10'] ////////////////////////////////////////////////////// @@ -69,8 +69,8 @@ include { */ // Create channel for the different runs -if(params.getToolParams("scenic").containsKey("numRuns")) { - runs = Channel.from( 1..params.getToolParams("scenic").numRuns ) +if(params.tools.scenic.containsKey("numRuns")) { + runs = Channel.from( 1..params.tools.scenic.numRuns ) } else { runs = Channel.from( 1..1 ) } @@ -83,44 +83,44 @@ workflow scenic { main: /* GRN */ - tfs = file(params.getToolParams("scenic").grn.tfs) + tfs = file(params.tools.scenic.grn.tfs) grn = ARBORETO_WITH_MULTIPROCESSING( filteredLoom.combine(runs), tfs ) grn_with_correlation = ADD_PEARSON_CORRELATION(grn) /* cisTarget motif analysis */ // channel for SCENIC databases resources: motifsDb = Channel - .fromPath( params.getToolParams("scenic").cistarget.motifsDb ) + .fromPath( params.tools.scenic.cistarget.motifsDb ) .collect() // use all files together in the ctx command - motifsAnnotation = file(params.getToolParams("scenic").cistarget.motifsAnnotation) + motifsAnnotation = file(params.tools.scenic.cistarget.motifsAnnotation) ctx_mtf = CISTARGET__MOTIF( grn_with_correlation, motifsDb, motifsAnnotation, 'mtf' ) /* cisTarget track analysis */ - if(params.getToolParams("scenic").cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { tracksDb = Channel - .fromPath( params.getToolParams("scenic").cistarget.tracksDb ) + .fromPath( params.tools.scenic.cistarget.tracksDb ) .collect() // use all files together in the ctx command - tracksAnnotation = file(params.getToolParams("scenic").cistarget.tracksAnnotation) + tracksAnnotation = file(params.tools.scenic.cistarget.tracksAnnotation) ctx_trk = CISTARGET__TRACK( grn_with_correlation, tracksDb, tracksAnnotation, 'trk' ) } /* AUCell, motif regulons */ auc_mtf = AUCELL__MOTIF( ctx_mtf, 'mtf' ) - if(params.getToolParams("scenic").cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { /* AUCell, track regulons */ auc_trk = AUCELL__TRACK( ctx_trk, 'trk' ) } // multi-runs aggregation: - if(params.getToolParams("scenic").containsKey("numRuns") && params.getToolParams("scenic").numRuns > 1) { + if(params.tools.scenic.containsKey("numRuns") && params.tools.scenic.numRuns > 1) { scenic_loom_mtf = MULTI_RUNS_TO_LOOM__MOTIF( filteredLoom, ctx_mtf, auc_mtf, 'mtf' ) - if(params.getToolParams("scenic").cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { scenic_loom_trk = MULTI_RUNS_TO_LOOM__TRACK( filteredLoom, ctx_trk, @@ -135,7 +135,7 @@ workflow scenic { out = VISUALIZE(scenic_loom_mtf) } } else { - if(params.getToolParams("scenic").cistarget.tracksDb) { + if(params.tools.scenic.cistarget.tracksDb) { out = VISUALIZE( MERGE_MOTIF_TRACK_LOOMS( auc_mtf @@ -163,10 +163,10 @@ workflow scenic_append { scopeLoom main: - if(params.getToolParams("scenic").containsKey("existingScenicLoom")) { + if(params.tools.scenic.containsKey("existingScenicLoom")) { scenicLoom = getChannelFromFilePath( - params.getToolParams("scenic").existingScenicLoom, - params.getToolParams("scenic").sampleSuffixWithExtension + params.tools.scenic.existingScenicLoom, + params.tools.scenic.sampleSuffixWithExtension ) if(!params.containsKey('quiet')) { Channel.from('').view { @@ -194,9 +194,9 @@ workflow scenic_append { throw new Exception("Cannot append SCENIC loom to SCope loom because the IDs do not match.") } ) - if(!params.getToolParams("scenic").skipReports) { + if(!params.tools.scenic.skipReports) { report_notebook = GENERATE_REPORT( - file(workflow.projectDir + params.getToolParams("scenic").report_ipynb), + file(workflow.projectDir + params.tools.scenic.report_ipynb), APPEND_SCENIC_LOOM.out, "SCENIC_report" ) @@ -213,8 +213,8 @@ workflow scenic_append { workflow { main: - if(!("filteredLoom" in params.getToolParams("scenic"))) + if(!("filteredLoom" in params.tools.scenic)) throw new Exception("The given filteredLoom required parameter does not exist in the params.tools.scenic scope.") - scenic( Channel.of( tuple(params.global.project_name, file(params.getToolParams("scenic").filteredLoom)) ) ) + scenic( Channel.of( tuple(params.global.project_name, file(params.tools.scenic.filteredLoom)) ) ) } diff --git a/src/scenic/main.test.nf b/src/scenic/main.test.nf index f71ed9a2..f29aa1c2 100644 --- a/src/scenic/main.test.nf +++ b/src/scenic/main.test.nf @@ -79,7 +79,7 @@ include { } from './processes/loomHandler' params(params) // Create channel for the different runs -runs = Channel.from( 1..params.getToolParams("scenic").numRuns ) +runs = Channel.from( 1..params.tools.scenic.numRuns ) // Make the test workflow workflow test_GRNBOOST2WITHOUTDASK { @@ -88,7 +88,7 @@ workflow test_GRNBOOST2WITHOUTDASK { loom main: - tfs = file(params.getToolParams("scenic").grn.TFs) + tfs = file(params.tools.scenic.grn.TFs) GRNBOOST2WITHOUTDASK( runs, loom, tfs ) emit: @@ -106,18 +106,18 @@ workflow test_CISTARGET { main: // channel for SCENIC databases resources: motifDB = Channel - .fromPath( params.getToolParams("scenic").cistarget.mtfDB ) + .fromPath( params.tools.scenic.cistarget.mtfDB ) .collect() // use all files together in the ctx command - motifANN = file(params.getToolParams("scenic").cistarget.mtfANN) + motifANN = file(params.tools.scenic.cistarget.mtfANN) ctx_mtf = CISTARGET__MOTIF( runs, filteredloom, grn, motifDB, motifANN, 'mtf' ) /* cisTarget track analysis */ trackDB = Channel - .fromPath( params.getToolParams("scenic").cistarget.trkDB ) + .fromPath( params.tools.scenic.cistarget.trkDB ) .collect() // use all files together in the ctx command - trackANN = file(params.getToolParams("scenic").cistarget.trkANN) + trackANN = file(params.tools.scenic.cistarget.trkANN) ctx_trk = CISTARGET__TRACK( runs, filteredloom, grn, trackDB, trackANN, 'trk' ) emit: @@ -154,15 +154,15 @@ workflow test_SINGLE_RUN_BY_ID { runId main: - filteredloom = file( params.getToolParams("scenic").filteredloom ) - tfs = file(params.getToolParams("scenic").grn.TFs) + filteredloom = file( params.tools.scenic.filteredloom ) + tfs = file(params.tools.scenic.grn.TFs) run = Channel.from( runId..runId ) grn = GRNBOOST2WITHOUTDASK( run, filteredloom, tfs ) // channel for SCENIC databases resources: motifDB = Channel - .fromPath( params.getToolParams("scenic").cistarget.mtfDB ) + .fromPath( params.tools.scenic.cistarget.mtfDB ) .collect() // use all files together in the ctx command - motifANN = file(params.getToolParams("scenic").cistarget.mtfANN) + motifANN = file(params.tools.scenic.cistarget.mtfANN) ctx_mtf = CISTARGET__MOTIF( run, filteredloom, grn, motifDB, motifANN, 'mtf' ) /* AUCell, motif regulons */ auc_mtf = AUCELL__MOTIF( run, filteredloom, ctx_mtf, 'mtf' ) @@ -201,52 +201,52 @@ workflow { test_SINGLE_RUN_BY_ID( params.runId ) break; case "GRNBOOST2WITHOUTDASK": - test_GRNBOOST2WITHOUTDASK( file( params.getToolParams("scenic").filteredloom ) ) + test_GRNBOOST2WITHOUTDASK( file( params.tools.scenic.filteredloom ) ) break; case "CISTARGET": - grn = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/grnboost2withoutDask/run_*/run_*__adj.tsv") - test_CISTARGET( file( params.getToolParams("scenic").filteredloom ), grn ) + grn = Channel.fromPath(params.tools.scenic.scenicoutdir + "/grnboost2withoutDask/run_*/run_*__adj.tsv") + test_CISTARGET( file( params.tools.scenic.filteredloom ), grn ) break; case "AUCELL": - ctx_mtf = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") - ctx_trk = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") - test_AUCELL( file( params.getToolParams("scenic").filteredloom ), ctx_mtf, ctx_trk ) + ctx_mtf = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") + ctx_trk = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") + test_AUCELL( file( params.tools.scenic.filteredloom ), ctx_mtf, ctx_trk ) break; case "AGGR_MULTI_RUNS_FEATURES": /* Aggregate motifs from multiple runs */ - reg_mtf = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") + reg_mtf = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_mtf.csv") AGGR_MULTI_RUNS_FEATURES__MOTIF( reg_mtf.collect(), 'mtf' ) - if(params.getToolParams("scenic").cistarget.trkDB) { + if(params.tools.scenic.cistarget.trkDB) { /* Aggregate tracks from multiple runs */ - reg_trk = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") + reg_trk = Channel.fromPath(params.tools.scenic.scenicoutdir + "/cistarget/run_*/run_*__reg_trk.csv") AGGR_MULTI_RUNS_FEATURES__TRACK( reg_trk.collect(), 'trk' ) } break; case "AGGR_MULTI_RUNS_REGULONS": /* Aggregate motif regulons from multiple runs */ - auc_mtf_looms = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/aucell/run_*/run_*__auc_mtf.loom") + auc_mtf_looms = Channel.fromPath(params.tools.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_mtf.loom") AGGR_MULTI_RUNS_REGULONS__MOTIF( auc_mtf_looms.collect(), 'mtf' ) - if(params.getToolParams("scenic").cistarget.trkDB) { + if(params.tools.scenic.cistarget.trkDB) { /* Aggregate track regulons from multiple runs */ - auc_trk_looms = Channel.fromPath(params.getToolParams("scenic").scenicoutdir + "/aucell/run_*/run_*__auc_trk.loom") + auc_trk_looms = Channel.fromPath(params.tools.scenic.scenicoutdir + "/aucell/run_*/run_*__auc_trk.loom") AGGR_MULTI_RUNS_REGULONS__TRACK( auc_trk_looms.collect(), 'trk' ) } break; case "AUCELL_FROM_FOLDER": /* Aggregate motif regulons from multiple runs */ - regulons_folder_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_mtf") - AUCELL_FROM_FOLDER__MOTIF( file(params.getToolParams("scenic").filteredloom), regulons_folder_mtf, 'mtf' ) - if(params.getToolParams("scenic").cistarget.trkDB) { + regulons_folder_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_mtf") + AUCELL_FROM_FOLDER__MOTIF( file(params.tools.scenic.filteredloom), regulons_folder_mtf, 'mtf' ) + if(params.tools.scenic.cistarget.trkDB) { /* Aggregate track regulons from multiple runs */ - regulons_folder_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_trk") - AUCELL_FROM_FOLDER__TRACK( file(params.getToolParams("scenic").filteredloom), regulons_folder_trk, 'trk' ) + regulons_folder_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_trk") + AUCELL_FROM_FOLDER__TRACK( file(params.tools.scenic.filteredloom), regulons_folder_trk, 'trk' ) } break; case "SAVE_SCENIC_MULTI_RUNS_TO_LOOM_MOTIF": - filteredloom = file(params.getToolParams("scenic").filteredloom) - aggr_features_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_cistarget/multi_runs_features_mtf.csv.gz") - regulons_folder_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_mtf") - regulons_auc_mtf = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_mtf.tsv") + filteredloom = file(params.tools.scenic.filteredloom) + aggr_features_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_mtf.csv.gz") + regulons_folder_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_mtf") + regulons_auc_mtf = file(params.tools.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_mtf.tsv") /* Save multiple motif SCENIC runs to loom*/ SAVE_SCENIC_MULTI_RUNS_TO_LOOM_MOTIF( @@ -258,10 +258,10 @@ workflow { ) break; case "SAVE_SCENIC_MULTI_RUNS_TO_LOOM_TRACK": - filteredloom = file(params.getToolParams("scenic").filteredloom) - regulons_folder_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_regulons_trk") - aggr_features_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_cistarget/multi_runs_features_trk.csv.gz") - regulons_auc_trk = file(params.getToolParams("scenic").scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_trk.tsv") + filteredloom = file(params.tools.scenic.filteredloom) + regulons_folder_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_regulons_trk") + aggr_features_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_cistarget/multi_runs_features_trk.csv.gz") + regulons_auc_trk = file(params.tools.scenic.scenicoutdir + "/multi_runs_aucell/multi_runs_regulons_auc_trk.tsv") /* Save multiple track SCENIC runs to loom*/ SAVE_SCENIC_MULTI_RUNS_TO_LOOM_TRACK( filteredloom, @@ -272,8 +272,8 @@ workflow { ) break; case "MERGE_MOTIF_TRACK_LOOMS": - scenic_loom_mtf = file( params.getToolParams("scenic").scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_mtf.loom" ) - scenic_loom_trk = file( params.getToolParams("scenic").scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_trk.loom" ) + scenic_loom_mtf = file( params.tools.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_mtf.loom" ) + scenic_loom_trk = file( params.tools.scenic.scenicoutdir + "/multi_runs_looms/multi_runs_regulons_auc_trk.loom" ) MERGE_MOTIF_TRACK_LOOMS( scenic_loom_mtf, scenic_loom_trk @@ -281,7 +281,7 @@ workflow { break; case "VISUALIZE_PUBLISH": /* Aggregate motif regulons from multiple runs */ - scenic_loom = file( params.getToolParams("scenic").scenicoutdir + "/" + params.getToolParams("scenic").scenicOutputLoom ) + scenic_loom = file( params.tools.scenic.scenicoutdir + "/" + params.tools.scenic.scenicOutputLoom ) PUBLISH_LOOM( VISUALIZE( scenic_loom ) ) break; default: diff --git a/src/scenic/processes/add_correlation.nf b/src/scenic/processes/add_correlation.nf index a53bf462..84df7de0 100644 --- a/src/scenic/processes/add_correlation.nf +++ b/src/scenic/processes/add_correlation.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic def processParams = toolParams.grn process ADD_PEARSON_CORRELATION { diff --git a/src/scenic/processes/arboreto_with_multiprocessing.nf b/src/scenic/processes/arboreto_with_multiprocessing.nf index 531b0127..89cef01c 100644 --- a/src/scenic/processes/arboreto_with_multiprocessing.nf +++ b/src/scenic/processes/arboreto_with_multiprocessing.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic def processParams = toolParams.grn process ARBORETO_WITH_MULTIPROCESSING { diff --git a/src/scenic/processes/aucell.nf b/src/scenic/processes/aucell.nf index 5ab5733d..722f67b5 100644 --- a/src/scenic/processes/aucell.nf +++ b/src/scenic/processes/aucell.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic def processParams = toolParams.aucell process AUCELL { diff --git a/src/scenic/processes/cistarget.nf b/src/scenic/processes/cistarget.nf index 2533c94d..89054a68 100644 --- a/src/scenic/processes/cistarget.nf +++ b/src/scenic/processes/cistarget.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic def processParams = toolParams.cistarget process CISTARGET { diff --git a/src/scenic/processes/loomHandler.nf b/src/scenic/processes/loomHandler.nf index d8294e4f..a5a1a483 100644 --- a/src/scenic/processes/loomHandler.nf +++ b/src/scenic/processes/loomHandler.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic process PUBLISH_LOOM { diff --git a/src/scenic/processes/multiruns/aggregateFeatures.nf b/src/scenic/processes/multiruns/aggregateFeatures.nf index c88d8014..0e5dcc18 100644 --- a/src/scenic/processes/multiruns/aggregateFeatures.nf +++ b/src/scenic/processes/multiruns/aggregateFeatures.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic def processParams = toolParams.aggregate_features process AGGR_MULTI_RUNS_FEATURES { diff --git a/src/scenic/processes/multiruns/aggregateRegulons.nf b/src/scenic/processes/multiruns/aggregateRegulons.nf index 4d75639f..4f1bb225 100644 --- a/src/scenic/processes/multiruns/aggregateRegulons.nf +++ b/src/scenic/processes/multiruns/aggregateRegulons.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic process AGGR_MULTI_RUNS_REGULONS { diff --git a/src/scenic/processes/multiruns/aucellFromFolder.nf b/src/scenic/processes/multiruns/aucellFromFolder.nf index 8ba5cf5d..61dafa21 100644 --- a/src/scenic/processes/multiruns/aucellFromFolder.nf +++ b/src/scenic/processes/multiruns/aucellFromFolder.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic def processParams = toolParams.aucell process AUCELL_FROM_FOLDER { diff --git a/src/scenic/processes/multiruns/convertMotifsToRegulons.nf b/src/scenic/processes/multiruns/convertMotifsToRegulons.nf index f8483e4d..1bbc35a0 100644 --- a/src/scenic/processes/multiruns/convertMotifsToRegulons.nf +++ b/src/scenic/processes/multiruns/convertMotifsToRegulons.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic process CONVERT_MULTI_RUNS_FEATURES_TO_REGULONS { diff --git a/src/scenic/processes/multiruns/saveToLoom.nf b/src/scenic/processes/multiruns/saveToLoom.nf index f9787c5f..5f93ad0f 100644 --- a/src/scenic/processes/multiruns/saveToLoom.nf +++ b/src/scenic/processes/multiruns/saveToLoom.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/scenic/bin/" : "" -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic process SAVE_MULTI_RUNS_TO_LOOM { diff --git a/src/scenic/processes/reports.nf b/src/scenic/processes/reports.nf index d25c36cc..ba73cfcc 100644 --- a/src/scenic/processes/reports.nf +++ b/src/scenic/processes/reports.nf @@ -7,7 +7,7 @@ takes a template ipynb and adata as input, outputs ipynb named by the value in ${reportTitle} */ -def toolParams = params.getToolParams("scenic") +def toolParams = params.tools.scenic process GENERATE_REPORT { diff --git a/src/scenic/scenic.config b/src/scenic/scenic.config index 34a2287c..26a13c01 100644 --- a/src/scenic/scenic.config +++ b/src/scenic/scenic.config @@ -1,6 +1,6 @@ params { - sc { + tools { scenic { // Label for the processes container = 'aertslab/pyscenic:0.10.4' @@ -43,7 +43,7 @@ params { top_n_regulators = '5,10,50' min_genes = 20 all_modules = false - // expression_mtx_fname = '' // uses params.scenic.filteredLoom + // expression_mtx_fname = '' // uses params.toolsenic.filteredLoom } aucell { output = 'aucell_output.loom' diff --git a/src/scrublet/bin/sc_doublet_detection.py b/src/scrublet/bin/sc_doublet_detection.py index 956eff24..f0ce1258 100755 --- a/src/scrublet/bin/sc_doublet_detection.py +++ b/src/scrublet/bin/sc_doublet_detection.py @@ -220,7 +220,7 @@ def save_histograms(out_basename, scrublet): A manual doublet score threshold can be set using the --threshold (params.tools.scrublet.threshold) argument. Consider to use sample-based parameter setting as described at https://vsn-pipelines.readthedocs.io/en/develop/features.html#multi-sample-parameters. E.g.: params {{ - sc {{ + tools {{ scrublet {{ threshold = [ {SAMPLE_NAME}: [your-custom-threshold-for-that-sample], diff --git a/src/scrublet/conf/base.config b/src/scrublet/conf/base.config index 74299626..bb711e07 100644 --- a/src/scrublet/conf/base.config +++ b/src/scrublet/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { scrublet { container = 'vibsinglecellnf/scrublet:0.2.3' doublet_detection { diff --git a/src/scrublet/conf/scrublet_defaults.conf b/src/scrublet/conf/scrublet_defaults.conf index 800dd6dc..e82b41a6 100644 --- a/src/scrublet/conf/scrublet_defaults.conf +++ b/src/scrublet/conf/scrublet_defaults.conf @@ -1,5 +1,5 @@ params { - sc { + tools { scrublet { // add sensible default parameters for Scrublet: cell_annotate { diff --git a/src/scrublet/processes/doublet_detection.nf b/src/scrublet/processes/doublet_detection.nf index 796b7185..4dc683a8 100644 --- a/src/scrublet/processes/doublet_detection.nf +++ b/src/scrublet/processes/doublet_detection.nf @@ -47,7 +47,7 @@ def SC__SCRUBLET__DOUBLET_DETECTION_PARAMS(params) { process SC__SCRUBLET__DOUBLET_DETECTION { - container params.getToolParams("scrublet").container + container params.tools.scrublet.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -68,7 +68,7 @@ process SC__SCRUBLET__DOUBLET_DETECTION { val(nPrinComps) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("scrublet").doublet_detection) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.scrublet.doublet_detection) processParams = sampleParams.local def _processParams = new SC__SCRUBLET__DOUBLET_DETECTION_PARAMS() _processParams.setEnv(this) diff --git a/src/scrublet/processes/reports.nf b/src/scrublet/processes/reports.nf index 7a6bf165..67ca9b9d 100644 --- a/src/scrublet/processes/reports.nf +++ b/src/scrublet/processes/reports.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__SCRUBLET__DOUBLET_DETECTION_REPORT { - container params.getToolParams("scrublet").container + container params.tools.scrublet.container publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true label 'compute_resources__report' diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index 69d574fb..de9cbbaf 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.sc.scrublet.cell_annotate = params.sc.cell_annotate -params.sc.remove('cell_annotate') +params.tools.scrublet.cell_annotate = params.tools.cell_annotate +params.tools.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.sc.scrublet.cell_filter = params.sc.cell_filter -params.sc.remove('cell_filter') +params.tools.scrublet.cell_filter = params.tools.cell_filter +params.tools.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' diff --git a/src/scrublet/workflows/doublet_removal.nf b/src/scrublet/workflows/doublet_removal.nf index 85be2fdc..1f6dfd21 100644 --- a/src/scrublet/workflows/doublet_removal.nf +++ b/src/scrublet/workflows/doublet_removal.nf @@ -86,7 +86,7 @@ workflow DOUBLET_REMOVAL { SC__SCRUBLET__DOUBLET_DETECTION_REPORT( - file(workflow.projectDir + params.getToolParams("scrublet").doublet_detection.report_ipynb), + file(workflow.projectDir + params.tools.scrublet.doublet_detection.report_ipynb), SC__SCRUBLET__DOUBLET_DETECTION.out.map { // Extract the Scrublet object file it -> tuple(it[0], it[2]) diff --git a/src/singlecelltoolkit/processes/barcode_correction.nf b/src/singlecelltoolkit/processes/barcode_correction.nf index 08e14098..4fe9d9ac 100644 --- a/src/singlecelltoolkit/processes/barcode_correction.nf +++ b/src/singlecelltoolkit/processes/barcode_correction.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" -toolParams = params.getToolParams("singlecelltoolkit") +toolParams = params.tools.singlecelltoolkit process SC__SINGLECELLTOOLKIT__BARCODE_CORRECTION { diff --git a/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf index 246c0afc..faf59338 100644 --- a/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/debarcode_10x_scatac_fastqs.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" -toolParams = params.getToolParams("singlecelltoolkit") +toolParams = params.tools.singlecelltoolkit process SC__SINGLECELLTOOLKIT__DEBARCODE_10X_FASTQ { diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 40153265..447802c6 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,5 +1,5 @@ params { - sc { + tools { singlecelltoolkit { container = 'vibsinglecellnf/singlecelltoolkit:2021-07-06-ea48b36' barcode_correction { diff --git a/src/sinto/processes/fragments.nf b/src/sinto/processes/fragments.nf index 09d33b9f..810c694d 100644 --- a/src/sinto/processes/fragments.nf +++ b/src/sinto/processes/fragments.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("sinto") +toolParams = params.tools.sinto process SC__SINTO__FRAGMENTS { diff --git a/src/sinto/sinto.config b/src/sinto/sinto.config index 1243fd68..7ba9b70c 100644 --- a/src/sinto/sinto.config +++ b/src/sinto/sinto.config @@ -1,5 +1,5 @@ params { - sc { + tools { sinto { container = 'vibsinglecellnf/sinto:0.7.2-dev' fragments { diff --git a/src/soupx/conf/base.config b/src/soupx/conf/base.config index 62526949..780c361c 100644 --- a/src/soupx/conf/base.config +++ b/src/soupx/conf/base.config @@ -1,5 +1,5 @@ params { - sc { + tools { soupx { container = 'vibsinglecellnf/soupx:1.4.8' } diff --git a/src/soupx/conf/soupx_correct.config b/src/soupx/conf/soupx_correct.config index 589f1022..377eaaf2 100644 --- a/src/soupx/conf/soupx_correct.config +++ b/src/soupx/conf/soupx_correct.config @@ -1,5 +1,5 @@ params { - sc { + tools { soupx { roundToInt = false } diff --git a/src/soupx/main.nf b/src/soupx/main.nf index 0eb5d4ae..dd52d05d 100644 --- a/src/soupx/main.nf +++ b/src/soupx/main.nf @@ -35,7 +35,7 @@ workflow soupx { processed = SOUPX_CORRECT( data ) - if(params.hasUtilsParams("publish")) { + if(params.utils?.publish) { PUBLISH( processed, "SOUPX_CORRECT", diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index 17dabfe7..24abc905 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -6,7 +6,7 @@ if(!params.containsKey("test")) { binDir = "" } -toolParams = params.getToolParams("sratoolkit") +toolParams = params.tools.sratoolkit process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { diff --git a/src/sratoolkit/sratoolkit.config b/src/sratoolkit/sratoolkit.config index 9dbc2f68..709ec3de 100644 --- a/src/sratoolkit/sratoolkit.config +++ b/src/sratoolkit/sratoolkit.config @@ -1,5 +1,5 @@ params { - sc { + tools { sratoolkit { container = 'vibsinglecellnf/sratoolkit:2.9.4-1.1.0' // --include-technical option (fasterq-dump) diff --git a/src/star/main.nf b/src/star/main.nf index d0632204..cb129d74 100644 --- a/src/star/main.nf +++ b/src/star/main.nf @@ -22,9 +22,9 @@ include { workflow star { main: - SC__STAR__LOAD_GENOME( file(params.getToolParams("star").map_count.transcriptome) ) - SC__STAR__MAP_COUNT( file(params.getToolParams("star").map_count.transcriptome), SC__STAR__LOAD_GENOME.out, path(params.getToolParams("star").map_count.fastqs) ) - SC__STAR__UNLOAD_GENOME( file(params.getToolParams("star").map_count.transcriptome), SC__STAR__MAP_COUNT.out[0] ) + SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.transcriptome) ) + SC__STAR__MAP_COUNT( file(params.tools.star.map_count.transcriptome), SC__STAR__LOAD_GENOME.out, path(params.tools.star.map_count.fastqs) ) + SC__STAR__UNLOAD_GENOME( file(params.tools.star.map_count.transcriptome), SC__STAR__MAP_COUNT.out[0] ) emit: SC__STAR__MAP_COUNT.out diff --git a/src/star/processes/build_genome.nf b/src/star/processes/build_genome.nf index 070b4da7..eb4532ce 100644 --- a/src/star/processes/build_genome.nf +++ b/src/star/processes/build_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__BUILD_INDEX { - container params.getToolParams("star").container + container params.tools.star.container label 'compute_resources__star_build_genome' input: @@ -13,7 +13,7 @@ process SC__STAR__BUILD_INDEX { file("STAR_index") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("star").build_genome) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star.build_genome) processParams = sampleParams.local """ mkdir STAR_index diff --git a/src/star/processes/load_genome.nf b/src/star/processes/load_genome.nf index 0e40b910..2b78a38b 100644 --- a/src/star/processes/load_genome.nf +++ b/src/star/processes/load_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__LOAD_GENOME { - container params.getToolParams("star").container + container params.tools.star.container label 'compute_resources__default' input: diff --git a/src/star/processes/map_count.nf b/src/star/processes/map_count.nf index ee662a56..f3a3edc1 100644 --- a/src/star/processes/map_count.nf +++ b/src/star/processes/map_count.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__MAP_COUNT { - container params.getToolParams("star").container + container params.tools.star.container label 'compute_resources__star_map_count' input: @@ -16,7 +16,7 @@ process SC__STAR__MAP_COUNT { tuple val(sample), path("*.STAR_Aligned.sortedByCoord.out.bam"), emit: bam script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getToolParams("star").map_count) + def sampleParams = params.parseConfig(sampleId, params.global, params.tools.star.map_count) processParams = sampleParams.local success = true """ diff --git a/src/star/processes/solo_map_count.nf b/src/star/processes/solo_map_count.nf index d3171ddc..2b76d67d 100644 --- a/src/star/processes/solo_map_count.nf +++ b/src/star/processes/solo_map_count.nf @@ -1,7 +1,7 @@ nextflow.enable.dsl=2 process SC__STAR__SOLO_MAP_COUNT { - container params.getToolParams("star").container + container params.tools.star.container label 'compute_resources__star_map_count' input: @@ -24,10 +24,10 @@ process SC__STAR__SOLO_MAP_COUNT { --soloType Droplet \ --genomeDir ${transcriptome} \ --runThreadN ${task.cpus} \ - ${(params.getToolParams("star").map_count.containsKey('limitBAMsortRAM')) ? '--limitBAMsortRAM ' + params.getToolParams("star").map_count.limitBAMsortRAM: ''} \ - ${(params.getToolParams("star").map_count.containsKey('outSAMtype')) ? '--outSAMtype ' + params.getToolParams("star").map_count.outSAMtype: ''} \ - ${(params.getToolParams("star").map_count.containsKey('quantMode')) ? '--quantMode ' + params.getToolParams("star").map_count.quantMode: ''} \ - ${(params.getToolParams("star").map_count.containsKey('outReadsUnmapped')) ? '--outReadsUnmapped ' + params.getToolParams("star").map_count.outReadsUnmapped: ''} \ + ${(params.tools.star.map_count.containsKey('limitBAMsortRAM')) ? '--limitBAMsortRAM ' + params.tools.star.map_count.limitBAMsortRAM: ''} \ + ${(params.tools.star.map_count.containsKey('outSAMtype')) ? '--outSAMtype ' + params.tools.star.map_count.outSAMtype: ''} \ + ${(params.tools.star.map_count.containsKey('quantMode')) ? '--quantMode ' + params.tools.star.map_count.quantMode: ''} \ + ${(params.tools.star.map_count.containsKey('outReadsUnmapped')) ? '--outReadsUnmapped ' + params.tools.star.map_count.outReadsUnmapped: ''} \ --readFilesIn ${fastqs} \ ${(fastqs.name.endsWith(".gz")) ? '--readFilesCommand zcat' : ''} \ --outFileNamePrefix ${_sampleName} diff --git a/src/star/processes/unload_genome.nf b/src/star/processes/unload_genome.nf index 26bd7950..afae2bd3 100644 --- a/src/star/processes/unload_genome.nf +++ b/src/star/processes/unload_genome.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process SC__STAR__UNLOAD_GENOME { - container params.getToolParams("star").container + container params.tools.star.container label 'compute_resources__default' input: diff --git a/src/star/star.config b/src/star/star.config index 7778af07..1fb5743b 100644 --- a/src/star/star.config +++ b/src/star/star.config @@ -1,5 +1,5 @@ params { - sc { + tools { star { version = '2.7.1a' container = "/ddn1/vol1/staging/leuven/res_00001/software/STAR/${params.tools.star.version}/STAR_${params.tools.star.version}.sif" diff --git a/src/trimgalore/processes/trim.nf b/src/trimgalore/processes/trim.nf index 4401103c..02e9a9f9 100644 --- a/src/trimgalore/processes/trim.nf +++ b/src/trimgalore/processes/trim.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" -toolParams = params.getToolParams("trimgalore") +toolParams = params.tools.trimgalore process SC__TRIMGALORE__TRIM { diff --git a/src/trimgalore/trimgalore.config b/src/trimgalore/trimgalore.config index fdb78aff..6d4ee56b 100644 --- a/src/trimgalore/trimgalore.config +++ b/src/trimgalore/trimgalore.config @@ -1,5 +1,5 @@ params { - sc { + tools { trimgalore { container = 'vibsinglecellnf/trimgalore:0.6.6' trim { diff --git a/src/utils/README.md b/src/utils/README.md index e166042a..85237a05 100644 --- a/src/utils/README.md +++ b/src/utils/README.md @@ -6,7 +6,7 @@ The profile `utils_cell_annotate` should be added when generating the main confi ``` params { - sc { + tools { cell_annotate { iff = '10x_cellranger_mex' off = 'h5ad' @@ -30,7 +30,7 @@ The profile `utils_sample_annotate` should be added when generating the main con ``` params { - sc { + tools { sample_annotate { iff = '10x_cellranger_mex' off = 'h5ad' diff --git a/src/utils/conf/base.config b/src/utils/conf/base.config index 7f872648..11f060bf 100644 --- a/src/utils/conf/base.config +++ b/src/utils/conf/base.config @@ -8,7 +8,7 @@ params { mode = 'link' } } - sc { + tools { file_converter { off = 'h5ad' tagCellWithSampleId = true diff --git a/src/utils/conf/cell_annotate.config b/src/utils/conf/cell_annotate.config index 6add0dc0..ab114c31 100644 --- a/src/utils/conf/cell_annotate.config +++ b/src/utils/conf/cell_annotate.config @@ -1,5 +1,5 @@ params { - sc { + tools { cell_annotate { off = 'h5ad' method = 'obo' // or 'aio' diff --git a/src/utils/conf/cell_filter.config b/src/utils/conf/cell_filter.config index 9924f4c7..20a732d3 100644 --- a/src/utils/conf/cell_filter.config +++ b/src/utils/conf/cell_filter.config @@ -1,5 +1,5 @@ params { - sc { + tools { cell_filter { off = 'h5ad' method = 'internal' // or 'external' (requires the following additional params cellMetaDataFilePath, sampleColumnName, indexColumnName) diff --git a/src/utils/conf/h5ad_clean.config b/src/utils/conf/h5ad_clean.config index 8d32aa26..d33c038b 100644 --- a/src/utils/conf/h5ad_clean.config +++ b/src/utils/conf/h5ad_clean.config @@ -1,5 +1,5 @@ params { - sc { + tools { file_cleaner { obsColumnMapper = [] obsColumnValueMapper = [] diff --git a/src/utils/conf/h5ad_concatenate.config b/src/utils/conf/h5ad_concatenate.config index aeb602ba..db867b60 100644 --- a/src/utils/conf/h5ad_concatenate.config +++ b/src/utils/conf/h5ad_concatenate.config @@ -1,5 +1,5 @@ params { - sc { + tools { file_concatenator { join = 'outer' off = 'h5ad' diff --git a/src/utils/conf/sample_annotate.config b/src/utils/conf/sample_annotate.config index 57697cce..80dc3512 100644 --- a/src/utils/conf/sample_annotate.config +++ b/src/utils/conf/sample_annotate.config @@ -1,5 +1,5 @@ params { - sc { + tools { sample_annotate { off = 'h5ad' by { diff --git a/src/utils/conf/sample_annotate_old_v1.config b/src/utils/conf/sample_annotate_old_v1.config index 2ee74b6a..73f9344d 100644 --- a/src/utils/conf/sample_annotate_old_v1.config +++ b/src/utils/conf/sample_annotate_old_v1.config @@ -1,5 +1,5 @@ params { - sc { + tools { sample_annotate_v1 { iff = '10x_cellranger_mex' off = 'h5ad' diff --git a/src/utils/conf/scope.config b/src/utils/conf/scope.config index d4c59376..ac9f6edd 100644 --- a/src/utils/conf/scope.config +++ b/src/utils/conf/scope.config @@ -1,5 +1,5 @@ params { - sc { + tools { scope { genome = '' tree { diff --git a/src/utils/conf/test.config b/src/utils/conf/test.config index 2871011c..4a3b63cc 100644 --- a/src/utils/conf/test.config +++ b/src/utils/conf/test.config @@ -1,5 +1,5 @@ params { - sc { + tools { scanpy { container = 'vibsinglecellnf/scanpy:1.8.1' } diff --git a/src/utils/main.test.nf b/src/utils/main.test.nf index 0b93856a..f0f6fa93 100644 --- a/src/utils/main.test.nf +++ b/src/utils/main.test.nf @@ -76,7 +76,7 @@ workflow { SC__ANNOTATE_BY_SAMPLE_METADATA; } from './processes/h5adAnnotate' params(params) // Run - if(params.hasUtilsParams("sample_annotate")) { + if(params.utils?.sample_annotate) { getDataChannel | \ SC__FILE_CONVERTER | \ SC__ANNOTATE_BY_SAMPLE_METADATA @@ -90,7 +90,7 @@ workflow { STATIC__ANNOTATE_BY_CELL_METADATA as ANNOTATE_BY_CELL_METADATA; } from './workflows/annotateByCellMetadata' params(params) // Run - if(params.hasUtilsParams("cell_annotate")) { + if(params.utils?.cell_annotate) { getDataChannel | \ SC__FILE_CONVERTER ANNOTATE_BY_CELL_METADATA( @@ -107,7 +107,7 @@ workflow { FILTER_BY_CELL_METADATA; } from './workflows/filterByCellMetadata' params(params) // Run - if(params.hasUtilsParams("cell_filter")) { + if(params.utils?.cell_filter) { getDataChannel | \ SC__FILE_CONVERTER FILTER_BY_CELL_METADATA( @@ -125,7 +125,7 @@ workflow { FILTER_BY_CELL_METADATA; } from './workflows/filterByCellMetadata' params(params) // Run - if(params.hasUtilsParams("cell_annotate")) { + if(params.utils?.cell_annotate) { getDataChannel | \ SC__FILE_CONVERTER @@ -149,7 +149,7 @@ workflow { } from './processes/sra' params(params) // Run sra = getSRAChannel( params.data.sra ) - db = file(params.getUtilsParams("sra_metadata").sraDbOutDir + "/SRAmetadb.sqlite") + db = file(params.utils.sra_metadata.sraDbOutDir + "/SRAmetadb.sqlite") SRA_TO_METADATA( sra, db ) break; case "GET_METADATA_FROM_SRA_WEB": @@ -221,7 +221,7 @@ workflow { SC__H5AD_BEAUTIFY; } from './processes/h5adUpdate' params(params) // Run - if(params.hasUtilsParams("file_cleaner")) { + if(params.utils?.file_cleaner) { getDataChannel | \ map { it -> tuple(it[0], it[1], null) diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf index bd718f0e..952c57e5 100644 --- a/src/utils/processes/h5adAnnotate.nf +++ b/src/utils/processes/h5adAnnotate.nf @@ -23,7 +23,7 @@ def getMode = { toolName -> process SC__ANNOTATE_BY_CELL_METADATA { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${getPublishDir(params.global.outdir,tool)}", mode: "${getMode(tool)}", overwrite: true label 'compute_resources__default' @@ -44,7 +44,7 @@ process SC__ANNOTATE_BY_CELL_METADATA { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.getUtilsParams("cell_annotate") : params.getToolParams(tool)["cell_annotate"] + isParamNull(tool) ? params.utils.cell_annotate : params.getToolParams(tool)["cell_annotate"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -82,7 +82,7 @@ def hasMetadataFilePath(processParams) { process SC__ANNOTATE_BY_SAMPLE_METADATA { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -97,7 +97,7 @@ process SC__ANNOTATE_BY_SAMPLE_METADATA { path("${sampleId}.SC__ANNOTATE_BY_SAMPLE_METADATA.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("sample_annotate")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.sample_annotate) processParams = sampleParams.local // method / type param diff --git a/src/utils/processes/h5adExtractMetadata.nf b/src/utils/processes/h5adExtractMetadata.nf index eaccb791..9409c96d 100644 --- a/src/utils/processes/h5adExtractMetadata.nf +++ b/src/utils/processes/h5adExtractMetadata.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__UTILS__EXTRACT_FEATURE_METADATA { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -18,7 +18,7 @@ process SC__UTILS__EXTRACT_FEATURE_METADATA { tuple val(sampleId), path("${sampleId}.SC__UTILS__EXTRACT_FEATURE_METADATA.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("extract_feature_metadata")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.extract_feature_metadata) processParams = sampleParams.local columnNamesAsArguments = processParams.columnNames.collect({ '--column-name' + ' ' + it }).join(' ') """ diff --git a/src/utils/processes/h5adMerge.nf b/src/utils/processes/h5adMerge.nf index 1724fd15..f1f108cc 100644 --- a/src/utils/processes/h5adMerge.nf +++ b/src/utils/processes/h5adMerge.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__H5AD_MERGE { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index a4229187..2c1d284d 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -11,7 +11,7 @@ include { process SC__PREPARE_OBS_FILTER { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -33,7 +33,7 @@ process SC__PREPARE_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.getUtilsParams("cell_filter") : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.utils.cell_filter : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -43,7 +43,7 @@ process SC__PREPARE_OBS_FILTER { input = f } else if (processParams.method == 'external') { if(!filterConfig.cellMetaDataFilePath) { - throw new Exception("VSN ERROR: A filter in params.sc.cell_filter does not provide a cellMetaDataFilePath entry.") + throw new Exception("VSN ERROR: A filter in params.tools.cell_filter does not provide a cellMetaDataFilePath entry.") } input = filterConfig.cellMetaDataFilePath } else { @@ -70,7 +70,7 @@ process SC__PREPARE_OBS_FILTER { process SC__APPLY_OBS_FILTER { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -91,7 +91,7 @@ process SC__APPLY_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.getUtilsParams("cell_filter") : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.utils.cell_filter : params.getToolParams(tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf index 77f7049b..56a439be 100644 --- a/src/utils/processes/h5adToLoom.nf +++ b/src/utils/processes/h5adToLoom.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__H5AD_TO_LOOM { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/loom", mode: 'link', overwrite: true, saveAs: { filename -> "${sampleId}.SCope_output.loom" } label 'compute_resources__mem' @@ -43,7 +43,7 @@ process SC__H5AD_TO_LOOM { process SC__H5AD_TO_FILTERED_LOOM { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' diff --git a/src/utils/processes/h5adUpdate.nf b/src/utils/processes/h5adUpdate.nf index f2d42bcd..1f5dad23 100644 --- a/src/utils/processes/h5adUpdate.nf +++ b/src/utils/processes/h5adUpdate.nf @@ -8,7 +8,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__H5AD_UPDATE_X_PCA { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container label 'compute_resources__mem' input: @@ -34,7 +34,7 @@ process SC__H5AD_UPDATE_X_PCA { process SC__H5AD_CLEAN { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container label 'compute_resources__mem' input: @@ -61,7 +61,7 @@ process SC__H5AD_CLEAN { process SC__H5AD_BEAUTIFY { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -78,7 +78,7 @@ process SC__H5AD_BEAUTIFY { val(stashedParams) script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("file_cleaner")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.file_cleaner) processParams = sampleParams.local obsColumnsToRemoveAsArgument = processParams.containsKey("obsColumnsToRemove") ? diff --git a/src/utils/processes/h5adUpdateMetadata.nf b/src/utils/processes/h5adUpdateMetadata.nf index d4d4d08a..4cbfcd26 100644 --- a/src/utils/processes/h5adUpdateMetadata.nf +++ b/src/utils/processes/h5adUpdateMetadata.nf @@ -7,7 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : process SC__UTILS__UPDATE_FEATURE_METADATA_INDEX { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true label 'compute_resources__default' @@ -18,7 +18,7 @@ process SC__UTILS__UPDATE_FEATURE_METADATA_INDEX { tuple val(sampleId), path("${sampleId}.SC__UTILS__UPDATE_FEATURE_METADATA_INDEX.h5ad") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("update_feature_metadata_index")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.update_feature_metadata_index) processParams = sampleParams.local """ ${binDir}/sc_h5ad_update_metadata.py \ diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index ffddbf81..d73d1032 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -143,12 +143,12 @@ def runRConverter = { def getConverterContainer = { params, type -> switch(type) { case "cistopic": - return params.getToolParams("cistopic").container + return params.tools.cistopic.container case "r": return "vibsinglecellnf/scconverter:0.0.1" break; case "python": - return params.getToolParams("scanpy").container + return params.tools.scanpy.container } } @@ -197,7 +197,7 @@ process SC__FILE_CONVERTER { path("${sampleId}.SC__FILE_CONVERTER.${outputExtension}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("file_converter")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.file_converter) processParams = sampleParams.local switch(inputDataType) { @@ -305,7 +305,7 @@ process SC__FILE_CONVERTER_FROM_SCE { path("${sampleId}.SC__FILE_CONVERTER_FROM_SCE.${outputDataType}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("file_converter")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.file_converter) processParams = sampleParams.local def _outputDataType = outputDataType converterToUse = getConverter( @@ -331,7 +331,7 @@ process SC__FILE_CONVERTER_FROM_SCE { process SC__FILE_CONCATENATOR { cache 'deep' - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -342,7 +342,7 @@ process SC__FILE_CONCATENATOR { tuple val(params.global.project_name), path("${params.global.project_name}.SC__FILE_CONCATENATOR.${processParams.off}") script: - processParams = params.getUtilsParams("file_concatenator") + processParams = params.utils.file_concatenator """ ${binDir}/sc_file_concatenator.py \ --file-format $processParams.off \ @@ -354,7 +354,7 @@ process SC__FILE_CONCATENATOR { process SC__STAR_CONCATENATOR() { - container params.getToolParams("scanpy").container + container params.tools.scanpy.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true label 'compute_resources__mem' @@ -369,7 +369,7 @@ process SC__STAR_CONCATENATOR() { path("${params.global.project_name}.SC__STAR_CONCATENATOR.${processParams.off}") script: - def sampleParams = params.parseConfig(sampleId, params.global, params.getUtilsParams("star_concatenator")) + def sampleParams = params.parseConfig(sampleId, params.global, params.utils.star_concatenator) processParams = sampleParams.local id = params.global.project_name """ @@ -401,7 +401,7 @@ def getOutputFileName(params, tag, f, fileOutputSuffix, isParameterExplorationMo return isParamNull(fileOutputSuffix) ? "${tag}.${stashedParams.findAll { it != 'NULL' }.join('_')}.${f.extension}" : "${tag}.${fileOutputSuffix}.${stashedParams.findAll { it != 'NULL' }.join('_')}.${f.extension}" - def utilsPublishParams = params.getUtilsParams("publish") + def utilsPublishParams = params.utils.publish if(utilsPublishParams?.pipelineOutputSuffix) { if(utilsPublishParams.pipelineOutputSuffix == 'none') return "${tag}.${f.extension}" @@ -483,9 +483,9 @@ process COMPRESS_HDF5() { val(stashedParams) shell: - def compressionLevel = params.hasUtilsParams("publish") && - params.getUtilsParams("publish")?.compressionLevel ? - params.getUtilsParams("publish").compressionLevel : + def compressionLevel = params.utils?.publish && + params.utils.publish?.compressionLevel ? + params.utils.publish.compressionLevel : 6 outputFileName = getOutputFileName( diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 6f10e4e3..916b5b7d 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -27,14 +27,14 @@ workflow ANNOTATE_BY_CELL_METADATA { // Values // - tool != null: // - The given tool is performing itself a cell-based annotation - // - params.sc[tool] should exist + // - params.tools[tool] should exist // - tool == null: - // - params.sc.cell_annotate should exist + // - params.tools.cell_annotate should exist tool main: def workflowParams = isParamNull(tool) ? - params.getUtilsParams("cell_annotate") : + params.utils.cell_annotate : params.getToolParams(tool)["cell_annotate"] def method = workflowParams.method if(method == 'aio') { diff --git a/src/utils/workflows/filterAnnotateClean.nf b/src/utils/workflows/filterAnnotateClean.nf index 7b040578..b7ebebc2 100644 --- a/src/utils/workflows/filterAnnotateClean.nf +++ b/src/utils/workflows/filterAnnotateClean.nf @@ -30,23 +30,23 @@ workflow FILTER_AND_ANNOTATE_AND_CLEAN { main: out = data - if(params.hasUtilsParams("update_feature_metadata_index")) { + if(params.utils?.update_feature_metadata_index) { out = UPDATE_FEATURE_NOMENCLATURE( data ) } // Filter cells based on an indexed cell-based metadata table - if(params.hasUtilsParams("cell_filter")) { + if(params.utils?.cell_filter) { out = FILTER_BY_CELL_METADATA( out, 'NULL' ) } // Annotate cells based on an indexed cell-based metadata table - if(params.hasUtilsParams("cell_annotate")) { + if(params.utils?.cell_annotate) { out = STATIC__ANNOTATE_BY_CELL_METADATA( out, null ) } // Annotate cells based on an indexed sample-based metadata table - if(params.hasUtilsParams("sample_annotate")) { - if (!hasMetadataFilePath(params.getUtilsParams("sample_annotate"))) { + if(params.utils?.sample_annotate) { + if (!hasMetadataFilePath(params.utils.sample_annotate)) { throw new Exception("The metadataFilePath param is missing in sample_annotate.") } out = SC__ANNOTATE_BY_SAMPLE_METADATA( out ) @@ -54,7 +54,7 @@ workflow FILTER_AND_ANNOTATE_AND_CLEAN { // Clean // e.g.: // - h5ad: rename adata.obs values, remove adata.obs columns - if(params.hasUtilsParams("file_cleaner")) { + if(params.utils?.file_cleaner) { out = SC__H5AD_BEAUTIFY( out ) } diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index bd1b95a1..e38d4c69 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -25,14 +25,14 @@ workflow FILTER_BY_CELL_METADATA { // Values // - tool != null: // - The given tool is performing itself a cell-based filtering - // - params.sc[tool] should exist + // - params.tools[tool] should exist // - tool == null: - // - params.sc.cell_filter should exist + // - params.tools.cell_filter should exist tool main: def workflowParams = isParamNull(tool) ? - params.getUtilsParams("cell_filter") : + params.utils.cell_filter : params.getToolParams(tool)["cell_filter"] Channel diff --git a/workflows/bbknn.nf b/workflows/bbknn.nf index 28ac4aab..1bb188b0 100644 --- a/workflows/bbknn.nf +++ b/workflows/bbknn.nf @@ -74,7 +74,7 @@ workflow bbknn { */ // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy out = data | \ SC__FILE_CONVERTER | \ @@ -83,7 +83,7 @@ workflow bbknn { if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.hasUtilsParams("file_concatenator")) { + if(params.utils?.file_concatenator) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -117,7 +117,7 @@ workflow bbknn { // Finalize FINALIZE( - params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.utils?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, BEC_BBKNN.out.data, 'BBKNN.final_output' ) @@ -126,7 +126,7 @@ workflow bbknn { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode - if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -138,7 +138,7 @@ workflow bbknn { project = BEC_BBKNN.out.data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) // Collect the reports: diff --git a/workflows/harmony.nf b/workflows/harmony.nf index 3234b940..8d654cb4 100644 --- a/workflows/harmony.nf +++ b/workflows/harmony.nf @@ -75,7 +75,7 @@ workflow harmony { * Data processing */ // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy out = data | \ SC__FILE_CONVERTER | \ @@ -84,7 +84,7 @@ workflow harmony { if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.hasUtilsParams("file_concatenator")) { + if(params.utils?.file_concatenator) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -118,7 +118,7 @@ workflow harmony { // Finalize FINALIZE( - params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.utils?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, BEC_HARMONY.out.data, 'HARMONY.final_output' ) @@ -127,7 +127,7 @@ workflow harmony { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode - if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -139,7 +139,7 @@ workflow harmony { project = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) // Collect the reports: diff --git a/workflows/mnncorrect.nf b/workflows/mnncorrect.nf index 0c5d79fb..fbc151e6 100644 --- a/workflows/mnncorrect.nf +++ b/workflows/mnncorrect.nf @@ -84,7 +84,7 @@ workflow mnncorrect { * Data processing */ // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy out = data | \ SC__FILE_CONVERTER | \ @@ -93,7 +93,7 @@ workflow mnncorrect { if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.hasUtilsParams("file_concatenator")) { + if(params.utils?.file_concatenator) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -125,7 +125,7 @@ workflow mnncorrect { // Finalize FINALIZE( - params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.utils?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, BEC_MNNCORRECT.out.data, 'MNNCORRECT.final_output' ) @@ -134,7 +134,7 @@ workflow mnncorrect { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode - if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -146,7 +146,7 @@ workflow mnncorrect { project = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) // Collect the reports: diff --git a/workflows/multi_sample.nf b/workflows/multi_sample.nf index 17be872f..51eef352 100644 --- a/workflows/multi_sample.nf +++ b/workflows/multi_sample.nf @@ -81,7 +81,7 @@ workflow multi_sample { * Data processing */ // To avoid Variable `params` already defined in the process scope - def scanpyParams = params.getToolParams("scanpy") + def scanpyParams = params.tools.scanpy out = data | \ SC__FILE_CONVERTER | \ @@ -90,7 +90,7 @@ workflow multi_sample { if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.hasUtilsParams("file_concatenator")) { + if(params.utils?.file_concatenator) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -114,7 +114,7 @@ workflow multi_sample { // Finalize FINALIZE( - params.hasUtilsParams("file_concatenator") ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + params.utils?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, CLUSTER_IDENTIFICATION.out.marker_genes, 'MULTI_SAMPLE.final_output', ) @@ -123,7 +123,7 @@ workflow multi_sample { def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) // Select a default clustering when in parameter exploration mode - if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { scopeloom = FINALIZE.out.scopeloom @@ -146,7 +146,7 @@ workflow multi_sample { samples = data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) ipynbs = QC_FILTER.out.report.map { diff --git a/workflows/nemesh.nf b/workflows/nemesh.nf index 85fdab09..d880898e 100644 --- a/workflows/nemesh.nf +++ b/workflows/nemesh.nf @@ -84,11 +84,11 @@ workflow nemesh { data.subscribe { println it } // Check if custom selected barcodes file has been specified - if (params.getToolParams("nemesh").custom_selected_barcodes) { + if (params.tools.nemesh.custom_selected_barcodes) { Channel - .fromPath(params.getToolParams("nemesh").custom_selected_barcodes) + .fromPath(params.tools.nemesh.custom_selected_barcodes) .map { - path -> tuple(path.baseName.split('\\.')[0], params.getToolParams("nemesh").custom_selected_barcodes, path) + path -> tuple(path.baseName.split('\\.')[0], params.tools.nemesh.custom_selected_barcodes, path) } .set { selectedBarcodesByCustom } selectedBarcodesByCustom.subscribe { println it } @@ -138,7 +138,7 @@ workflow nemesh { a = FINAL_BAM.combine(SC__DROPLET_UTILS__BARCODE_SELECTION.out.selectedCellBarcodesByKnee, by: 0) b = FINAL_BAM.combine(SC__DROPLET_UTILS__BARCODE_SELECTION.out.selectedCellBarcodesByInflection, by: 0) - if (params.getToolParams("nemesh").custom_selected_barcodes) { + if (params.tools.nemesh.custom_selected_barcodes) { c = FINAL_BAM.combine(selectedBarcodesByCustom, by: 0) SC__DROP_SEQ_TOOLS__DIGITAL_EXPRESSION( a.mix(b,c) diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf index b6907e01..dc37ffef 100644 --- a/workflows/single_sample.nf +++ b/workflows/single_sample.nf @@ -30,10 +30,10 @@ workflow single_sample { SCANPY__SINGLE_SAMPLE( SC__FILE_CONVERTER.out ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) // Select a default clustering when in parameter exploration mode - if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( SCANPY__SINGLE_SAMPLE.out.final_processed_scope_loom ) diff --git a/workflows/single_sample_star.nf b/workflows/single_sample_star.nf index d5b802e5..89bb4075 100644 --- a/workflows/single_sample_star.nf +++ b/workflows/single_sample_star.nf @@ -72,16 +72,16 @@ workflow single_sample_star { data = STAR() samples = data.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.getUtilsParams("workflow_configuration").report_ipynb) + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) out = FILTER_AND_ANNOTATE_AND_CLEAN( data ) - if(params.getToolParams("scanpy").containsKey("filter")) { + if(params.tools.scanpy.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } NORMALIZE_TRANSFORM( out ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - if(params.getToolParams("scanpy").containsKey("regress_out")) { + if(params.tools.scanpy.containsKey("regress_out")) { preprocessed_data = SC__SCANPY__REGRESS_OUT( HVG_SELECTION.out.scaled ) } else { preprocessed_data = HVG_SELECTION.out.scaled @@ -106,10 +106,10 @@ workflow single_sample_star { ) // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.getToolParams("scanpy").clustering) ) + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) // Select a default clustering when in parameter exploration mode - if(params.hasToolParams('directs') && clusteringParams.isParameterExplorationModeOn()) { + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( scopeloom ) } diff --git a/workflows/star.nf b/workflows/star.nf index 81c972f9..6e9cb66b 100644 --- a/workflows/star.nf +++ b/workflows/star.nf @@ -29,14 +29,14 @@ include { workflow star { main: - SC__STAR__LOAD_GENOME( file(params.getToolParams("star").map_count.index) ) + SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.index) ) SC__STAR__MAP_COUNT( - file(params.getToolParams("star").map_count.index), + file(params.tools.star.map_count.index), SC__STAR__LOAD_GENOME.out, - getSingleEndChannel(params.getToolParams("star").map_count.fastqs) + getSingleEndChannel(params.tools.star.map_count.fastqs) ) SC__STAR__UNLOAD_GENOME( - file(params.getToolParams("star").map_count.index), + file(params.tools.star.map_count.index), SC__STAR__MAP_COUNT.out.isDone.collect() ) SC__STAR_CONCATENATOR( SC__STAR__MAP_COUNT.out.counts.map { it[1] }.collect() ) From 32ef310594462bee3fed18f9e5451c7cd1e28284 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 17:35:58 +0100 Subject: [PATCH 167/202] misc config entry was wrongly renamed --- conf/test.config | 2 +- conf/test_disabled.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index f28e01eb..8c00d078 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,5 +1,5 @@ params { - mitools { + misc { test { enabled = true } diff --git a/conf/test_disabled.config b/conf/test_disabled.config index 9c619278..76ebe776 100644 --- a/conf/test_disabled.config +++ b/conf/test_disabled.config @@ -1,5 +1,5 @@ params { - mitools { + misc { test { enabled = false } From b7b250cb6f8c2204e55ff644035634602389bd89 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 17:47:19 +0100 Subject: [PATCH 168/202] scope utils config under params.utils --- conf/test__bbknn.config | 3 --- conf/test__bbknn_scenic.config | 3 --- conf/test__cell_annotate_filter.config | 2 +- conf/test__decontx.config | 3 --- conf/test__harmony.config | 3 --- conf/test__mnncorrect.config | 3 --- conf/test__scenic_multiruns.config | 3 --- conf/test__single_sample.config | 3 --- conf/test__single_sample_decontx_correct.config | 3 --- ...__single_sample_decontx_correct_scrublet.config | 3 --- conf/test__single_sample_decontx_filter.config | 3 --- conf/test__single_sample_param_exploration.config | 3 --- conf/test__single_sample_scenic.config | 3 --- conf/test__single_sample_scenic_multiruns.config | 3 --- conf/test__single_sample_scrublet.config | 3 --- src/celda/conf/decontx_filter.config | 4 ++-- src/scrublet/scrublet.config | 4 ++-- src/utils/conf/base.config | 14 ++++++-------- src/utils/conf/cell_annotate.config | 2 +- src/utils/conf/cell_filter.config | 2 +- src/utils/conf/h5ad_clean.config | 2 +- src/utils/conf/h5ad_concatenate.config | 2 +- src/utils/conf/sample_annotate.config | 2 +- src/utils/conf/sample_annotate_old_v1.config | 2 +- src/utils/conf/scope.config | 2 +- src/utils/processes/h5adSubset.nf | 2 +- src/utils/workflows/annotateByCellMetadata.nf | 2 +- src/utils/workflows/filterByCellMetadata.nf | 2 +- 28 files changed, 21 insertions(+), 65 deletions(-) diff --git a/conf/test__bbknn.config b/conf/test__bbknn.config index f1cc88dd..a0812172 100644 --- a/conf/test__bbknn.config +++ b/conf/test__bbknn.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__bbknn_scenic.config b/conf/test__bbknn_scenic.config index 7cfa1bf4..80ae0b68 100644 --- a/conf/test__bbknn_scenic.config +++ b/conf/test__bbknn_scenic.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__cell_annotate_filter.config b/conf/test__cell_annotate_filter.config index 7ce386fe..40e1f437 100644 --- a/conf/test__cell_annotate_filter.config +++ b/conf/test__cell_annotate_filter.config @@ -8,7 +8,7 @@ params { cellranger_mex = 'sample_data/outs' } } - tools { + utils { file_converter { off = 'h5ad' tagCellWithSampleId = false diff --git a/conf/test__decontx.config b/conf/test__decontx.config index 55e63711..d5638368 100644 --- a/conf/test__decontx.config +++ b/conf/test__decontx.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } celda { container = 'vibsinglecellnf/celda:1.4.5' decontx { diff --git a/conf/test__harmony.config b/conf/test__harmony.config index ea3562da..b3261d8d 100644 --- a/conf/test__harmony.config +++ b/conf/test__harmony.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__mnncorrect.config b/conf/test__mnncorrect.config index 327f41c7..b4da9ca0 100644 --- a/conf/test__mnncorrect.config +++ b/conf/test__mnncorrect.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__scenic_multiruns.config b/conf/test__scenic_multiruns.config index fa59bb26..75b85a07 100644 --- a/conf/test__scenic_multiruns.config +++ b/conf/test__scenic_multiruns.config @@ -4,9 +4,6 @@ params { project_name = 'scenic_multiruns_CI' } tools { - file_annotator { - metadataFilePath = '' - } scenic { filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_small.loom' grn { diff --git a/conf/test__single_sample.config b/conf/test__single_sample.config index 5ca6efd4..b6b92cfc 100644 --- a/conf/test__single_sample.config +++ b/conf/test__single_sample.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_decontx_correct.config b/conf/test__single_sample_decontx_correct.config index 7b730bf6..4f5e61b1 100644 --- a/conf/test__single_sample_decontx_correct.config +++ b/conf/test__single_sample_decontx_correct.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_decontx_correct_scrublet.config b/conf/test__single_sample_decontx_correct_scrublet.config index 27fa6007..a7f6878c 100644 --- a/conf/test__single_sample_decontx_correct_scrublet.config +++ b/conf/test__single_sample_decontx_correct_scrublet.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_decontx_filter.config b/conf/test__single_sample_decontx_filter.config index ad59c4a7..5be49bd4 100644 --- a/conf/test__single_sample_decontx_filter.config +++ b/conf/test__single_sample_decontx_filter.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_param_exploration.config b/conf/test__single_sample_param_exploration.config index 4c2212d9..e4a78fd5 100644 --- a/conf/test__single_sample_param_exploration.config +++ b/conf/test__single_sample_param_exploration.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_scenic.config b/conf/test__single_sample_scenic.config index 56f8bc6e..f0ec5f7e 100644 --- a/conf/test__single_sample_scenic.config +++ b/conf/test__single_sample_scenic.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_scenic_multiruns.config b/conf/test__single_sample_scenic_multiruns.config index e2102f76..1a79aba4 100644 --- a/conf/test__single_sample_scenic_multiruns.config +++ b/conf/test__single_sample_scenic_multiruns.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/conf/test__single_sample_scrublet.config b/conf/test__single_sample_scrublet.config index c30a3c63..877bb7c4 100644 --- a/conf/test__single_sample_scrublet.config +++ b/conf/test__single_sample_scrublet.config @@ -9,9 +9,6 @@ params { } } tools { - file_annotator { - metadataFilePath = '' - } scanpy { filter { cellFilterMinNGenes = 1 diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 55e07ecd..68e65fb4 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,8 +1,8 @@ includeConfig '../../utils/conf/cell_annotate.config' -params.tools.celda.decontx.cell_annotate = params.tools.cell_annotate +params.tools.celda.decontx.cell_annotate = params.utils.cell_annotate params.tools.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' -params.tools.celda.decontx.cell_filter = params.tools.cell_filter +params.tools.celda.decontx.cell_filter = params.utils.cell_filter params.tools.remove('cell_filter') includeConfig './decontx_filter_defaults.config' diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index de9cbbaf..5665e399 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' -params.tools.scrublet.cell_annotate = params.tools.cell_annotate +params.tools.scrublet.cell_annotate = params.utils.cell_annotate params.tools.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' -params.tools.scrublet.cell_filter = params.tools.cell_filter +params.tools.scrublet.cell_filter = params.utils.cell_filter params.tools.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' diff --git a/src/utils/conf/base.config b/src/utils/conf/base.config index 11f060bf..608c2ad2 100644 --- a/src/utils/conf/base.config +++ b/src/utils/conf/base.config @@ -1,14 +1,6 @@ params { utils { container = 'vibsinglecellnf/utils:0.4.0' - publish { - // pipelineOutputSuffix = '' - compressionLevel = 6 - annotateWithBatchVariableName = false - mode = 'link' - } - } - tools { file_converter { off = 'h5ad' tagCellWithSampleId = true @@ -16,5 +8,11 @@ params { useFilteredMatrix = true makeVarIndexUnique = false } + publish { + // pipelineOutputSuffix = '' + compressionLevel = 6 + annotateWithBatchVariableName = false + mode = 'link' + } } } diff --git a/src/utils/conf/cell_annotate.config b/src/utils/conf/cell_annotate.config index ab114c31..089eade3 100644 --- a/src/utils/conf/cell_annotate.config +++ b/src/utils/conf/cell_annotate.config @@ -1,5 +1,5 @@ params { - tools { + utils { cell_annotate { off = 'h5ad' method = 'obo' // or 'aio' diff --git a/src/utils/conf/cell_filter.config b/src/utils/conf/cell_filter.config index 20a732d3..92bce446 100644 --- a/src/utils/conf/cell_filter.config +++ b/src/utils/conf/cell_filter.config @@ -1,5 +1,5 @@ params { - tools { + utils { cell_filter { off = 'h5ad' method = 'internal' // or 'external' (requires the following additional params cellMetaDataFilePath, sampleColumnName, indexColumnName) diff --git a/src/utils/conf/h5ad_clean.config b/src/utils/conf/h5ad_clean.config index d33c038b..f63827a2 100644 --- a/src/utils/conf/h5ad_clean.config +++ b/src/utils/conf/h5ad_clean.config @@ -1,5 +1,5 @@ params { - tools { + utils { file_cleaner { obsColumnMapper = [] obsColumnValueMapper = [] diff --git a/src/utils/conf/h5ad_concatenate.config b/src/utils/conf/h5ad_concatenate.config index db867b60..ec3fe291 100644 --- a/src/utils/conf/h5ad_concatenate.config +++ b/src/utils/conf/h5ad_concatenate.config @@ -1,5 +1,5 @@ params { - tools { + utils { file_concatenator { join = 'outer' off = 'h5ad' diff --git a/src/utils/conf/sample_annotate.config b/src/utils/conf/sample_annotate.config index 80dc3512..cc69fcc3 100644 --- a/src/utils/conf/sample_annotate.config +++ b/src/utils/conf/sample_annotate.config @@ -1,5 +1,5 @@ params { - tools { + utils { sample_annotate { off = 'h5ad' by { diff --git a/src/utils/conf/sample_annotate_old_v1.config b/src/utils/conf/sample_annotate_old_v1.config index 73f9344d..a427f397 100644 --- a/src/utils/conf/sample_annotate_old_v1.config +++ b/src/utils/conf/sample_annotate_old_v1.config @@ -1,5 +1,5 @@ params { - tools { + utils { sample_annotate_v1 { iff = '10x_cellranger_mex' off = 'h5ad' diff --git a/src/utils/conf/scope.config b/src/utils/conf/scope.config index ac9f6edd..cf7f7e0b 100644 --- a/src/utils/conf/scope.config +++ b/src/utils/conf/scope.config @@ -1,5 +1,5 @@ params { - tools { + utils { scope { genome = '' tree { diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index 2c1d284d..71654348 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -43,7 +43,7 @@ process SC__PREPARE_OBS_FILTER { input = f } else if (processParams.method == 'external') { if(!filterConfig.cellMetaDataFilePath) { - throw new Exception("VSN ERROR: A filter in params.tools.cell_filter does not provide a cellMetaDataFilePath entry.") + throw new Exception("VSN ERROR: A filter in params.utils.cell_filter does not provide a cellMetaDataFilePath entry.") } input = filterConfig.cellMetaDataFilePath } else { diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 916b5b7d..3a9f482c 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -29,7 +29,7 @@ workflow ANNOTATE_BY_CELL_METADATA { // - The given tool is performing itself a cell-based annotation // - params.tools[tool] should exist // - tool == null: - // - params.tools.cell_annotate should exist + // - params.utils.cell_annotate should exist tool main: diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index e38d4c69..a88af898 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -27,7 +27,7 @@ workflow FILTER_BY_CELL_METADATA { // - The given tool is performing itself a cell-based filtering // - params.tools[tool] should exist // - tool == null: - // - params.tools.cell_filter should exist + // - params.utils.cell_filter should exist tool main: From a9907cac34966adabf7b7d2278d98c94768dbecc Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 17:56:52 +0100 Subject: [PATCH 169/202] Remove the use of backward compatible functions --- src/scenic/processes/multiruns/saveToLoom.nf | 8 ++++---- src/utils/processes/h5adAnnotate.nf | 2 +- src/utils/processes/h5adSubset.nf | 4 ++-- src/utils/processes/h5adToLoom.nf | 12 ++++++------ src/utils/workflows/annotateByCellMetadata.nf | 2 +- src/utils/workflows/filterByCellMetadata.nf | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/scenic/processes/multiruns/saveToLoom.nf b/src/scenic/processes/multiruns/saveToLoom.nf index 5f93ad0f..2649bf18 100644 --- a/src/scenic/processes/multiruns/saveToLoom.nf +++ b/src/scenic/processes/multiruns/saveToLoom.nf @@ -30,10 +30,10 @@ process SAVE_MULTI_RUNS_TO_LOOM { --cell-id-attribute ${toolParams.cell_id_attribute} \ --gene-attribute ${toolParams.gene_attribute} \ --title "${sampleId} - pySCENIC (${type})" \ - --nomenclature "${params.getUtilsParams('scope').genome}" \ - --scope-tree-level-1 "${params.getUtilsParams('scope').tree.level_1}" \ - --scope-tree-level-2 "${params.getUtilsParams('scope').tree.level_2}" \ - --scope-tree-level-3 "${params.getUtilsParams('scope').tree.level_3}" + --nomenclature "${params.utils?.scope.genome}" \ + --scope-tree-level-1 "${params.utils?.scope.tree.level_1}" \ + --scope-tree-level-2 "${params.utils?.scope.tree.level_2}" \ + --scope-tree-level-3 "${params.utils?.scope.tree.level_3}" """ } diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf index 952c57e5..c58a7bce 100644 --- a/src/utils/processes/h5adAnnotate.nf +++ b/src/utils/processes/h5adAnnotate.nf @@ -44,7 +44,7 @@ process SC__ANNOTATE_BY_CELL_METADATA { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.utils.cell_annotate : params.getToolParams(tool)["cell_annotate"] + isParamNull(tool) ? params.utils.cell_annotate : params.tools[tool]["cell_annotate"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index 71654348..76a20aa6 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -33,7 +33,7 @@ process SC__PREPARE_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.utils.cell_filter : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.utils.cell_filter : params.tools[tool]["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' @@ -91,7 +91,7 @@ process SC__APPLY_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.utils.cell_filter : params.getToolParams(tool)["cell_filter"] + isParamNull(tool) ? params.utils.cell_filter : params.tools[tool]["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf index 56a439be..ae2175c3 100644 --- a/src/utils/processes/h5adToLoom.nf +++ b/src/utils/processes/h5adToLoom.nf @@ -28,12 +28,12 @@ process SC__H5AD_TO_LOOM { script: """ ${binDir}/h5ad_to_loom.py \ - ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').genome.length() > 0) ? '--nomenclature "' + params.getUtilsParams('scope').genome + '"' : ''} \ - ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').tree.level_1.length() > 0 ) ? '--scope-tree-level-1 "' + params.getUtilsParams('scope').tree.level_1 + '"' : ''} \ - ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').tree.level_2.length() > 0 ) ? '--scope-tree-level-2 "' + params.getUtilsParams('scope').tree.level_2 + '"' : ''} \ - ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope').tree.level_3.length() > 0 ) ? '--scope-tree-level-3 "' + params.getUtilsParams('scope').tree.level_3 + '"' : ''} \ - ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope')?.markers?.log_fc_threshold) ? '--markers-log-fc-threshold ' + params.getUtilsParams('scope').markers.log_fc_threshold : ''} \ - ${(params.hasUtilsParams('scope') && params.getUtilsParams('scope')?.markers?.fdr_threshold) ? '--markers-fdr-threshold ' + params.getUtilsParams('scope').markers.fdr_threshold : ''} \ + ${(params.utils?.scope.genome.length() > 0) ? '--nomenclature "' + params.utils?.scope.genome + '"' : ''} \ + ${(params.utils?.scope.tree.level_1.length() > 0 ) ? '--scope-tree-level-1 "' + params.utils.scope.tree.level_1 + '"' : ''} \ + ${(params.utils?.scope.tree.level_2.length() > 0 ) ? '--scope-tree-level-2 "' + params.utils.scope.tree.level_2 + '"' : ''} \ + ${(params.utils?.scope.tree.level_3.length() > 0 ) ? '--scope-tree-level-3 "' + params.utils.scope.tree.level_3 + '"' : ''} \ + ${(params.utils?.scope.?.markers?.log_fc_threshold) ? '--markers-log-fc-threshold ' + params.utils.scope.markers.log_fc_threshold : ''} \ + ${(params.utils?.scope.?.markers?.fdr_threshold) ? '--markers-fdr-threshold ' + params.utils.scope.markers.fdr_threshold : ''} \ $data \ $rawFilteredData \ "${sampleId}.SC__H5AD_TO_LOOM.loom" diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index 3a9f482c..ad71c74a 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -35,7 +35,7 @@ workflow ANNOTATE_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? params.utils.cell_annotate : - params.getToolParams(tool)["cell_annotate"] + params.tools[tool]["cell_annotate"] def method = workflowParams.method if(method == 'aio') { out = SC__ANNOTATE_BY_CELL_METADATA( diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index a88af898..c730d522 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -33,7 +33,7 @@ workflow FILTER_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? params.utils.cell_filter : - params.getToolParams(tool)["cell_filter"] + params.tools[tool]["cell_filter"] Channel .from(workflowParams.filters) From e0ee972c31f3eee027e7aa5706ddef81f1084c4a Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 18:04:52 +0100 Subject: [PATCH 170/202] Fix typo and fix test__cell_annotate_filter config --- conf/test__cell_annotate_filter.config | 8 +++++--- src/utils/processes/h5adToLoom.nf | 12 ++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/conf/test__cell_annotate_filter.config b/conf/test__cell_annotate_filter.config index 40e1f437..4224d7bb 100644 --- a/conf/test__cell_annotate_filter.config +++ b/conf/test__cell_annotate_filter.config @@ -8,6 +8,11 @@ params { cellranger_mex = 'sample_data/outs' } } + tools { + scanpy { + container = 'vibsinglecellnf/scanpy:1.8.1' + } + } utils { file_converter { off = 'h5ad' @@ -15,9 +20,6 @@ params { useFilteredMatrix = true makeVarIndexUnique = false } - scanpy { - container = 'vibsinglecellnf/scanpy:1.8.1' - } cell_annotate { off = 'h5ad' method = 'aio' diff --git a/src/utils/processes/h5adToLoom.nf b/src/utils/processes/h5adToLoom.nf index ae2175c3..8eb831be 100644 --- a/src/utils/processes/h5adToLoom.nf +++ b/src/utils/processes/h5adToLoom.nf @@ -28,12 +28,12 @@ process SC__H5AD_TO_LOOM { script: """ ${binDir}/h5ad_to_loom.py \ - ${(params.utils?.scope.genome.length() > 0) ? '--nomenclature "' + params.utils?.scope.genome + '"' : ''} \ - ${(params.utils?.scope.tree.level_1.length() > 0 ) ? '--scope-tree-level-1 "' + params.utils.scope.tree.level_1 + '"' : ''} \ - ${(params.utils?.scope.tree.level_2.length() > 0 ) ? '--scope-tree-level-2 "' + params.utils.scope.tree.level_2 + '"' : ''} \ - ${(params.utils?.scope.tree.level_3.length() > 0 ) ? '--scope-tree-level-3 "' + params.utils.scope.tree.level_3 + '"' : ''} \ - ${(params.utils?.scope.?.markers?.log_fc_threshold) ? '--markers-log-fc-threshold ' + params.utils.scope.markers.log_fc_threshold : ''} \ - ${(params.utils?.scope.?.markers?.fdr_threshold) ? '--markers-fdr-threshold ' + params.utils.scope.markers.fdr_threshold : ''} \ + ${params.utils?.scope.genome.length() > 0 ? '--nomenclature "' + params.utils?.scope.genome + '"' : ''} \ + ${params.utils?.scope.tree.level_1.length() > 0 ? '--scope-tree-level-1 "' + params.utils.scope.tree.level_1 + '"' : ''} \ + ${params.utils?.scope.tree.level_2.length() > 0 ? '--scope-tree-level-2 "' + params.utils.scope.tree.level_2 + '"' : ''} \ + ${params.utils?.scope.tree.level_3.length() > 0 ? '--scope-tree-level-3 "' + params.utils.scope.tree.level_3 + '"' : ''} \ + ${params.utils?.scope?.markers?.log_fc_threshold ? '--markers-log-fc-threshold ' + params.utils.scope.markers.log_fc_threshold : ''} \ + ${params.utils?.scope?.markers?.fdr_threshold ? '--markers-fdr-threshold ' + params.utils.scope.markers.fdr_threshold : ''} \ $data \ $rawFilteredData \ "${sampleId}.SC__H5AD_TO_LOOM.loom" From ee62d322facce45a452ccb529a623c9868dd0671 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 18:20:10 +0100 Subject: [PATCH 171/202] Other fixes in notebooks to access params.tools and fix bug getting cell_annotate and cell_filter params for given tools --- src/harmony/bin/reports/sc_harmony_report.ipynb | 4 ++-- src/scanpy/bin/reports/sc_bbknn_report.ipynb | 4 ++-- src/scanpy/bin/reports/sc_mnncorrect_report.ipynb | 4 ++-- src/utils/processes/utils.nf | 9 +++++++++ src/utils/workflows/annotateByCellMetadata.nf | 5 +++-- src/utils/workflows/filterByCellMetadata.nf | 7 +++---- 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/harmony/bin/reports/sc_harmony_report.ipynb b/src/harmony/bin/reports/sc_harmony_report.ipynb index 09ca4d42..bd192dc7 100644 --- a/src/harmony/bin/reports/sc_harmony_report.ipynb +++ b/src/harmony/bin/reports/sc_harmony_report.ipynb @@ -50,7 +50,7 @@ "outputs": [], "source": [ "params = json.loads(WORKFLOW_PARAMETERS)\n", - "bec_params = params[\"sc\"][\"harmony\"]\n", + "bec_params = params[\"tools\"][\"harmony\"]\n", "batch = bec_params[\"varsUse\"][0] if \"varsUse\" in bec_params else \"sample_id\"" ] }, @@ -340,4 +340,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/src/scanpy/bin/reports/sc_bbknn_report.ipynb b/src/scanpy/bin/reports/sc_bbknn_report.ipynb index b8c26cfe..6880c845 100644 --- a/src/scanpy/bin/reports/sc_bbknn_report.ipynb +++ b/src/scanpy/bin/reports/sc_bbknn_report.ipynb @@ -50,7 +50,7 @@ "outputs": [], "source": [ "params = json.loads(WORKFLOW_PARAMETERS)\n", - "bec_params = params[\"sc\"][\"scanpy\"][\"batch_effect_correct\"]\n", + "bec_params = params[\"tools\"][\"scanpy\"][\"batch_effect_correct\"]\n", "batch = bec_params[\"batchKey\"] if \"batchKey\" in bec_params and bec_params[\"batchKey\"] != \"batch\" else \"batch\"" ] }, @@ -283,4 +283,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb b/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb index 64d1f7f1..ecad303e 100644 --- a/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb +++ b/src/scanpy/bin/reports/sc_mnncorrect_report.ipynb @@ -50,7 +50,7 @@ "outputs": [], "source": [ "params = json.loads(WORKFLOW_PARAMETERS)\n", - "bec_params = params[\"sc\"][\"scanpy\"][\"batch_effect_correct\"]\n", + "bec_params = params[\"tools\"][\"scanpy\"][\"batch_effect_correct\"]\n", "batch = bec_params[\"batchKey\"] if \"batchKey\" in bec_params and bec_params[\"batchKey\"] != \"batch\" else \"batch\"" ] }, @@ -346,4 +346,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index d73d1032..c3c45f92 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -10,6 +10,15 @@ def boolean isCollectionOrArray(object) { [Collection, Object[]].any { it.isAssignableFrom(object.getClass()) } } +def getToolParams(params, toolKey) { + if(!toolKey.contains(".")) { + return params[toolKey] + } + def entry = params + toolKey.split('\\.').each { entry = entry?.get(it) } + return entry +} + def isParamNull(param) { return param == null || param == "NULL" } diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index ad71c74a..fc09aebf 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -4,6 +4,7 @@ nextflow.enable.dsl=2 // Process imports: include { isParamNull; + getToolParams; } from './../processes/utils.nf' params(params) include { getChannel; @@ -34,8 +35,8 @@ workflow ANNOTATE_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? - params.utils.cell_annotate : - params.tools[tool]["cell_annotate"] + params.tools.cell_annotate : + getToolParams(params.tools, tool)["cell_annotate"] def method = workflowParams.method if(method == 'aio') { out = SC__ANNOTATE_BY_CELL_METADATA( diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index c730d522..3c6baac1 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -31,10 +31,9 @@ workflow FILTER_BY_CELL_METADATA { tool main: - def workflowParams = isParamNull(tool) ? - params.utils.cell_filter : - params.tools[tool]["cell_filter"] - + def workflowParams = isParamNull(tool) ? + params.tools.cell_filter : + getToolParams(params.tools, tool)["cell_filter"] Channel .from(workflowParams.filters) .set{ filters } From e045e72d7fe5b9da11c19af7ffd51dac35dd3ad4 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 18:47:47 +0100 Subject: [PATCH 172/202] Add fixes for scrublet and decontx --- src/celda/conf/decontx_filter.config | 4 ++-- src/scrublet/scrublet.config | 4 ++-- src/utils/processes/h5adAnnotate.nf | 3 ++- src/utils/processes/h5adSubset.nf | 2 +- src/utils/workflows/annotateByCellMetadata.nf | 2 +- src/utils/workflows/filterByCellMetadata.nf | 3 ++- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/celda/conf/decontx_filter.config b/src/celda/conf/decontx_filter.config index 68e65fb4..39c15b72 100644 --- a/src/celda/conf/decontx_filter.config +++ b/src/celda/conf/decontx_filter.config @@ -1,9 +1,9 @@ includeConfig '../../utils/conf/cell_annotate.config' params.tools.celda.decontx.cell_annotate = params.utils.cell_annotate -params.tools.remove('cell_annotate') +params.utils.remove('cell_annotate') includeConfig '../../utils/conf/cell_filter.config' params.tools.celda.decontx.cell_filter = params.utils.cell_filter -params.tools.remove('cell_filter') +params.utils.remove('cell_filter') includeConfig './decontx_filter_defaults.config' params { diff --git a/src/scrublet/scrublet.config b/src/scrublet/scrublet.config index 5665e399..fcf50c68 100644 --- a/src/scrublet/scrublet.config +++ b/src/scrublet/scrublet.config @@ -1,9 +1,9 @@ includeConfig './conf/base.config' includeConfig '../utils/conf/cell_annotate.config' params.tools.scrublet.cell_annotate = params.utils.cell_annotate -params.tools.remove('cell_annotate') +params.utils.remove('cell_annotate') includeConfig '../utils/conf/cell_filter.config' params.tools.scrublet.cell_filter = params.utils.cell_filter -params.tools.remove('cell_filter') +params.utils.remove('cell_filter') includeConfig './conf/scrublet_defaults.conf' diff --git a/src/utils/processes/h5adAnnotate.nf b/src/utils/processes/h5adAnnotate.nf index c58a7bce..dcfa1afc 100644 --- a/src/utils/processes/h5adAnnotate.nf +++ b/src/utils/processes/h5adAnnotate.nf @@ -6,6 +6,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : include { isParamNull; + getToolParams; } from './utils.nf' params(params) def getPublishDir = { outDir, toolName -> @@ -44,7 +45,7 @@ process SC__ANNOTATE_BY_CELL_METADATA { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.utils.cell_annotate : params.tools[tool]["cell_annotate"] + isParamNull(tool) ? params.utils.cell_annotate : getToolParams(params.tools, tool)["cell_annotate"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index 76a20aa6..f89acb5d 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -91,7 +91,7 @@ process SC__APPLY_OBS_FILTER { def sampleParams = params.parseConfig( sampleId, params.global, - isParamNull(tool) ? params.utils.cell_filter : params.tools[tool]["cell_filter"] + isParamNull(tool) ? params.utils.cell_filter : getToolParams(params.tools, tool)["cell_filter"] ) processParams = sampleParams.local toolTag = isParamNull(tool) ? '' : tool.toUpperCase() + '.' diff --git a/src/utils/workflows/annotateByCellMetadata.nf b/src/utils/workflows/annotateByCellMetadata.nf index fc09aebf..b5a15e81 100644 --- a/src/utils/workflows/annotateByCellMetadata.nf +++ b/src/utils/workflows/annotateByCellMetadata.nf @@ -35,7 +35,7 @@ workflow ANNOTATE_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? - params.tools.cell_annotate : + params.utils.cell_annotate : getToolParams(params.tools, tool)["cell_annotate"] def method = workflowParams.method if(method == 'aio') { diff --git a/src/utils/workflows/filterByCellMetadata.nf b/src/utils/workflows/filterByCellMetadata.nf index 3c6baac1..6f096dda 100644 --- a/src/utils/workflows/filterByCellMetadata.nf +++ b/src/utils/workflows/filterByCellMetadata.nf @@ -4,6 +4,7 @@ nextflow.enable.dsl=2 // Process imports: include { isParamNull; + getToolParams; } from './../processes/utils.nf' params(params) include { SC__PREPARE_OBS_FILTER; @@ -32,7 +33,7 @@ workflow FILTER_BY_CELL_METADATA { main: def workflowParams = isParamNull(tool) ? - params.tools.cell_filter : + params.utils.cell_filter : getToolParams(params.tools, tool)["cell_filter"] Channel .from(workflowParams.filters) From 7ad6de1cf0331463086773024abfe12d7386706c Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 17 Feb 2021 19:06:50 +0100 Subject: [PATCH 173/202] Add missing import --- src/utils/processes/h5adSubset.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/processes/h5adSubset.nf b/src/utils/processes/h5adSubset.nf index f89acb5d..e238d42f 100644 --- a/src/utils/processes/h5adSubset.nf +++ b/src/utils/processes/h5adSubset.nf @@ -7,6 +7,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : include { isParamNull; isCollectionOrArray; + getToolParams; } from './utils' params(params) process SC__PREPARE_OBS_FILTER { From b3d2cd3880156dddf0d9d8269d862f0456e7903a Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 9 Jul 2021 12:28:34 +0200 Subject: [PATCH 174/202] Migrate missing config access for sratoolkit and file_concatenator should nested under utils --- docs/pipelines.rst | 4 ++-- src/cellranger/processes/preflight.nf | 2 +- src/popscle/processes/dsc_pileup.nf | 2 +- src/scanpy/main.nf | 4 ++-- src/scanpy/workflows/qc_filter.nf | 2 +- src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf | 2 +- src/sratoolkit/processes/downloadFastQ.nf | 4 ++-- src/utils/conf/sra_normalize_fastqs.config | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index dbb1e909..c2749642 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -622,8 +622,8 @@ NOTES: - If you're a VSC user, you might want to add the ``vsc`` profile. - The final output (FASTQ files) will available in ``out/data/sra`` -- If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. -- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.sratoolkit.maxSize`` accordingly. This limit can be 'removed' by setting the parameter to an arbitrarily high number (e.g.: 9999999999999). +- If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.tools.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. +- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.tools.sratoolkit.maxSize`` accordingly. This limit can be 'removed' by setting the parameter to an arbitrarily high number (e.g.: 9999999999999). Now we can run it with the following command: diff --git a/src/cellranger/processes/preflight.nf b/src/cellranger/processes/preflight.nf index b69177cb..e615efd4 100644 --- a/src/cellranger/processes/preflight.nf +++ b/src/cellranger/processes/preflight.nf @@ -1,6 +1,6 @@ nextflow.enable.dsl=2 -toolParams = params.sc.cellranger +toolParams = params.tools.cellranger process SC__CELLRANGER__PREFLIGHT { diff --git a/src/popscle/processes/dsc_pileup.nf b/src/popscle/processes/dsc_pileup.nf index 129837b7..63e77c74 100644 --- a/src/popscle/processes/dsc_pileup.nf +++ b/src/popscle/processes/dsc_pileup.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" : "" -toolParams = params.sc.popscle +toolParams = params.tools.popscle process SC__POPSCLE__DSC_PILEUP { diff --git a/src/scanpy/main.nf b/src/scanpy/main.nf index 2d93a789..8cc6be34 100644 --- a/src/scanpy/main.nf +++ b/src/scanpy/main.nf @@ -59,10 +59,10 @@ workflow multi_sample_qc { main: if(!params?.sc?.scanpy?.filter) { - throw new Exception("VSN ERROR: Missing params.sc.scanpy.filter config.") + throw new Exception("VSN ERROR: Missing params.tools.scanpy.filter config.") } if(!params?.sc?.file_concatenator) { - throw new Exception("VSN ERROR: Missing params.sc.file_concatenator config.") + throw new Exception("VSN ERROR: Missing params.utils.file_concatenator config.") } out = data | \ diff --git a/src/scanpy/workflows/qc_filter.nf b/src/scanpy/workflows/qc_filter.nf index e878aa6f..de8330cc 100644 --- a/src/scanpy/workflows/qc_filter.nf +++ b/src/scanpy/workflows/qc_filter.nf @@ -30,7 +30,7 @@ workflow QC_FILTER { SC__SCANPY__CELL_FILTER | \ SC__SCANPY__GENE_FILTER - report = !params.sc.scanpy.filter?.report_ipynb ? Channel.empty() : + report = !params.tools.scanpy.filter?.report_ipynb ? Channel.empty() : GENERATE_DUAL_INPUT_REPORT( SC__SCANPY__COMPUTE_QC_STATS.out.join(filtered).map { it -> tuple(*it[0..(it.size()-1)], null) diff --git a/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf b/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf index 4a2bfcf4..f265d633 100644 --- a/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf +++ b/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf @@ -6,7 +6,7 @@ if(!params.containsKey("test")) { binDir = "" } -toolParams = params.sratoolkit +toolParams = params.tools.sratoolkit process FIX_AND_COMPRESS_SRA_FASTQS { diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index 24abc905..2ca35234 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -34,7 +34,7 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { prefetch \ -v \ -p 1 \ - ${params.sratoolkit?.maxSize ? '--max-size '+ params.sratoolkit.maxSize: ''} \ + ${params.tools.sratoolkit?.maxSize ? '--max-size '+ params.tools.sratoolkit.maxSize: ''} \ ${sraId} # Convert SRA file to FASTQ files fasterq-dump \ @@ -42,7 +42,7 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { -v \ -p \ -e ${task.cpus} \ - ${params.sratoolkit?.includeTechnicalReads ? '--include-technical' : ''} \ + ${params.tools.sratoolkit?.includeTechnicalReads ? '--include-technical' : ''} \ -O . \ ${sraId} """ diff --git a/src/utils/conf/sra_normalize_fastqs.config b/src/utils/conf/sra_normalize_fastqs.config index 63cdbf39..ec76cac8 100644 --- a/src/utils/conf/sra_normalize_fastqs.config +++ b/src/utils/conf/sra_normalize_fastqs.config @@ -2,7 +2,7 @@ params { utils { sra_normalize_fastqs { // Downloading FASTQ from SRA will give FASTQ in the following format SRRXXXXXX_[1-9].fastq. This index minus one will be used to retrieve the FASTQ read suffix from the array of suffixes defined hereunder - fastq_read_suffixes = ["R1","R2"] // ["R1","R2","I1","I2"] would be used for SRR11442498 (this requires params.sratoolkit.includeTechnicalReads = true) + fastq_read_suffixes = ["R1","R2"] // ["R1","R2","I1","I2"] would be used for SRR11442498 (this requires params.tools.sratoolkit.includeTechnicalReads = true) } } } From 19e944c038f5a6dd93fe97f1b1cd09192ae9a9bb Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 9 Jul 2021 12:42:20 +0200 Subject: [PATCH 175/202] Fix test config for harmony_scenic pipeline --- conf/test__harmony_scenic.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/conf/test__harmony_scenic.config b/conf/test__harmony_scenic.config index e9af2d28..143da4c8 100644 --- a/conf/test__harmony_scenic.config +++ b/conf/test__harmony_scenic.config @@ -8,7 +8,12 @@ params { cellranger_mex = "testdata/*/outs/" } } - sc { + utils { + file_annotator { + metadataFilePath = '' + } + } + tools { file_annotator { metadataFilePath = '' } From f6d9d424b0f2684ac883655419850df4bf089fa0 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 12 Jul 2021 12:13:56 +0200 Subject: [PATCH 176/202] Update sctk Docker image - Updated polars and pyarrow packages - Fix for segfault in atac saturation script --- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 73ef1318..c60076ca 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-06-29-a0ad3d6' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-06-ea48b36-2' } } } From dd0542d1b04ca7aa1ab4ed275ec17b53f3217494 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 12 Jul 2021 13:43:31 +0200 Subject: [PATCH 177/202] Use long queue for macs2 peak calling --- src/pycistopic/processes/macs2_call_peaks.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pycistopic/processes/macs2_call_peaks.nf b/src/pycistopic/processes/macs2_call_peaks.nf index dd1398c7..9580efce 100644 --- a/src/pycistopic/processes/macs2_call_peaks.nf +++ b/src/pycistopic/processes/macs2_call_peaks.nf @@ -8,7 +8,7 @@ processParams = params.tools.pycistopic.macs2_call_peaks process PYCISTOPIC__MACS2_CALL_PEAKS { container toolParams.container - label 'compute_resources__default' + label 'compute_resources__default','compute_resources__24hqueue' input: tuple val(sampleId), From 30c27480aab826799e958eff7de19787bdc5cee5 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 13 Jul 2021 10:11:22 +0200 Subject: [PATCH 178/202] Add ability to chain atac_preprocess and QC steps - Remap bam/fragments output to be compatible with getDataChannel - Mix bam/fragments channels for input to qc steps --- main_atac.nf | 15 +++++++++++++++ workflows/atac/preprocess.nf | 5 +++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/main_atac.nf b/main_atac.nf index 58200699..a18f9c29 100644 --- a/main_atac.nf +++ b/main_atac.nf @@ -90,6 +90,21 @@ workflow atac_qc_filtering { } +workflow atac_preprocess_with_qc { + + // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation + include { + ATAC_PREPROCESS; + } from './workflows/atac/preprocess.nf' params(params) + include { + ATAC_QC_PREFILTER; + } from './workflows/atac/qc_filtering.nf' params(params) + + pp = ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) + ATAC_QC_PREFILTER(pp.bam.mix(pp.fragments)) + +} + workflow atac_preprocess_freemuxlet { // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 8fb3d73f..ee46b0a1 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -93,8 +93,9 @@ workflow ATAC_PREPROCESS { | mapping emit: - bam = mapping.out.bam - fragments = mapping.out.fragments + // emit in a format compatible with getDataChannel output: + bam = mapping.out.bam.map { it -> tuple(it[0], [it[1], it[2]], 'bam') } + fragments = mapping.out.fragments.map { it -> tuple(it[0], [it[1], it[2]], 'fragments') } } From 26bb1c4686e17025b611109c230747f15381a349 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 13 Jul 2021 10:44:16 +0200 Subject: [PATCH 179/202] Add cpu limitations to some processes - In some tools there is little benefit to increasing the number of threads beyond a certain number. Limits are set now to 6 threads for adapter trimming and barcoding steps. This will allow more processes to run in parallel. --- src/fastp/processes/adapter_trimming.nf | 3 ++- src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf | 2 ++ src/trimgalore/processes/trim.nf | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/fastp/processes/adapter_trimming.nf b/src/fastp/processes/adapter_trimming.nf index a65191d9..499a57ae 100644 --- a/src/fastp/processes/adapter_trimming.nf +++ b/src/fastp/processes/adapter_trimming.nf @@ -23,6 +23,7 @@ process FASTP__ADAPTER_TRIMMING { script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams) processParams = sampleParams.local + def max_threads = (task.cpus > 6) ? 6 : task.cpus """ fastp \ --in1 ${fastq_PE1} \ @@ -31,7 +32,7 @@ process FASTP__ADAPTER_TRIMMING { --out2 ${sampleId}_dex_R2_val_2.fq.gz \ --detect_adapter_for_pe \ --html ${sampleId}_fastp.html \ - --thread ${task.cpus} + --thread ${max_threads} """ } diff --git a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf index 150b244f..35462c5e 100644 --- a/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf +++ b/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf @@ -24,7 +24,9 @@ process SCTK__BARCODE_10X_SCATAC_FASTQ { script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_10x_scatac_fastqs) processParams = sampleParams.local + def max_threads = (task.cpus > 6) ? 6 : task.cpus """ + export compress_fastq_threads="${max_threads}" barcode_10x_scatac_fastqs.sh \ ${fastq_PE1} \ ${fastq_bc} \ diff --git a/src/trimgalore/processes/trim.nf b/src/trimgalore/processes/trim.nf index ed58a24a..8c433482 100644 --- a/src/trimgalore/processes/trim.nf +++ b/src/trimgalore/processes/trim.nf @@ -24,9 +24,10 @@ process TRIMGALORE__TRIM { script: def sampleParams = params.parseConfig(sampleId, params.global, toolParams.trim) processParams = sampleParams.local + def max_threads = (task.cpus > 6) ? 6 : task.cpus """ trim_galore \ - -j ${task.cpus} \ + -j ${max_threads} \ -o . \ ${fastq_PE1} \ ${fastq_PE2} \ From a1fdeec49c9551fe95632edd35989a21f4ed1788 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 13 Jul 2021 13:19:07 +0200 Subject: [PATCH 180/202] Fix publish methods for popscle tools - Freemuxlet/demuxlet now publishes to named directories in out/data - Publish mode from the config file is used --- src/popscle/processes/demuxlet.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popscle/processes/demuxlet.nf b/src/popscle/processes/demuxlet.nf index 51060956..135599dc 100644 --- a/src/popscle/processes/demuxlet.nf +++ b/src/popscle/processes/demuxlet.nf @@ -5,7 +5,7 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" process SC__POPSCLE__DEMUXLET { container params.tools.popscle.container - publishDir "${params.global.outdir}/data", mode: 'symlink' + publishDir "${params.global.outdir}/data/demuxlet", mode: params.utils.publish.mode label 'compute_resources__cpu' input: @@ -31,7 +31,7 @@ process SC__POPSCLE__DEMUXLET { process SC__POPSCLE__FREEMUXLET { container params.tools.popscle.container - publishDir "${params.global.outdir}/data", mode: 'symlink' + publishDir "${params.global.outdir}/data/freemuxlet", mode: params.utils.publish.mode label 'compute_resources__cpu' input: From 68740da3212615cf428da7af0b02adce64b0ccd1 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 20 Jul 2021 11:09:03 +0200 Subject: [PATCH 181/202] Update scATAC documentation --- docs/scatac-seq.rst | 195 ++++++++++++++++++++++++++++++++--------- docs/scatac-seq_qc.rst | 25 +++++- 2 files changed, 176 insertions(+), 44 deletions(-) diff --git a/docs/scatac-seq.rst b/docs/scatac-seq.rst index 57721528..a602d7d1 100644 --- a/docs/scatac-seq.rst +++ b/docs/scatac-seq.rst @@ -4,7 +4,28 @@ scATAC-seq Preprocessing This pipeline takes fastq files from paired end single cell ATAC-seq, and applies preprocessing steps to align the reads to a reference genome, and produce a bam file and scATAC-seq fragments file. -This workflow is currently available in the ``develop_atac`` branch (use the ``-r develop_atac`` option when running ``nextflow pull`` and ``nextflow run``). +This workflow is currently available in the ``develop_atac`` branch (use ``nextflow pull vib-singlecell-nf/vsn-pipelines -r develop_atac`` to sync this branch). + + +Optional Steps +************** + +1. Direct the Nextflow work directory to an alternate path (e.g. a scratch drive) using the ``NXF_WORK`` environmental variable:: + + nwork=/path/to/scratch/example_project + mkdir $nwork + export NXF_WORK=$nwork + +Note that if you start a new shell, ``NXF_WORK`` must be set again, or the pipeline will not resume properly. + + +2. Temporary directory mapping. + For large BAM files, the system default temp location may become full. + A workaround is to include a volume mapping to the alternate ``/tmp`` ``-B /alternate/path/to/tmp:/tmp`` using the volume mount options in Docker or Singularity. + For example in the container engine options: + - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/alternate/path/to/tmp:/tmp'`` + - Docker run options: ``runOptions = '-i -v /data:/data -v /alternate/path/to/tmp:/tmp'`` + ---- @@ -18,7 +39,10 @@ The full steps are: * For 'standard' and 'multiome' samples (e.g. 10x Genomics or similar) correction is performed against a whitelist by `this method `_ from `aertslab/single_cell_toolkit `_. - * For 'biorad' samples, barcode correction is performed by `BAP `_. + * For 'biorad' samples, barcode correction is performed by + `this script `_ + in our `aertslab/single_cell_toolkit `_ + (previously, this was done with `BAP `_). - Fastq barcoding: Add the barcode sequence to the comment field of the fastq sequence identifier. Uses methods from `aertslab/single_cell_toolkit `_. @@ -72,6 +96,11 @@ Note that the fastq file fields must be full paths; this is not shown here for c - sample_4_R1.fastq.gz - sample_2_R2.fastq.gz - sample_4_R3.fastq.gz + * - sample_5 + - hydrop + - sample_5_R1.fastq.gz + - sample_5_R2.fastq.gz + - sample_5_R3.fastq.gz The columns represent: @@ -140,7 +169,8 @@ Downstream steps are done with the merged data. Generating the metadata file ---------------------------- -Note that there is an easy way to create the metadata from the file paths for each sample by using the following bash command (expand to view): +Note that there is an easy way to create the metadata from the file paths for each sample by using the following bash command (expand to view). +Special thanks here to Gert Hulselmans for expanding the capabilities of this function. .. raw:: html @@ -149,21 +179,34 @@ Note that there is an easy way to create the metadata from the file paths for ea .. code-block:: none - create_atac_metadata() { - local sample="${1}" - local technology="${2}" - local file_prefix="${3}" - local read_labels="${4}" + create_atac_metadata () { + local sample="${1}"; + local technology="${2}"; + local fastq_prefix="${3}"; + local read_labels="${4}"; if [ "${sample}" == "header" ]; then - echo -e "sample_name\ttechnology\tfastq_PE1_path\tfastq_barcode_path\tfastq_PE2_path" - return 1 + printf 'sample_name\ttechnology\tfastq_PE1_path\tfastq_barcode_path\tfastq_PE2_path\n'; + return 0; + fi + if [ ${#@} -ne 4 ] ; then + printf 'Usage: create_atac_metadata sample technology fastq_prefix read_labels\n\n'; + printf 'Arguments:\n'; + printf ' sample: sample name\n'; + printf ' technology: "standard", "hydrop" or "biorad"\n'; + printf ' fastq_prefix: path prefix to FASTQ files.\n'; + printf ' read_labels: comma separated read labels for R1, R2 and R3 that select: R1,R2,R3.\n'; + return 1; fi - read_labels_arr=(${read_labels//,/ }) - R1=(${file_prefix}*${read_labels_arr[0]}*) - R2=(${file_prefix}*${read_labels_arr[1]}*) - R3=(${file_prefix}*${read_labels_arr[2]}*) - for i in "${!R1[@]}"; do - echo -e "${sample}\t${technology}\t${R1[i]}\t${R2[i]}\t${R3[i]}"; + read_labels_arr=(${read_labels//,/ }); + # Get R1, R2 and R3 FASTQ filenames for + R1=(${fastq_prefix}*${read_labels_arr[0]}*.{fastq,fq,fastq.gz,fq.gz}) + R2=(${fastq_prefix}*${read_labels_arr[1]}*.{fastq,fq,fastq.gz,fq.gz}) + R3=(${fastq_prefix}*${read_labels_arr[2]}*.{fastq,fq,fastq.gz,fq.gz}) + for i in "${!R1[@]}" ; do + # Check if R1 FASTQ file exist (and is not just a glob like "${sample}*R1*.fq"). + if [ -e "${R1[i]}" ] ; then + printf "${sample}\t${technology}\t${R1[i]}\t${R2[i]}\t${R3[i]}\n"; + fi done } @@ -179,6 +222,7 @@ To run use the options: create_atac_metadata header > auto_metadata.tsv create_atac_metadata sample_1 standard /path/to/sample_1_subset_S R1,R2,R3 >> auto_metadata.tsv create_atac_metadata sample_2 standard /path/to/sample_2_subset_S R1,R2,R3 >> auto_metadata.tsv + create_atac_metadata sample_5 hydrop /path/to/sample_5_ R1,R2,R3 >> auto_metadata.tsv .. raw:: html @@ -189,17 +233,23 @@ To run use the options: Technology ---------- -The "technology" field in the metadata table controls how technology-specific pipeline steps are applied, as well as which whitelist is used for barcode correction. -Currently the only the ``biorad`` setting uses alternate pipelines processes (to extract and correct the barcode sequence from the two input fastqs). -Except for the ``biorad`` setting, the samples will be processed in the standard pipeline (barcode correction against a whitelist). +The "technology" field in the metadata table controls two things: + +1. *How technology-specific pipeline steps are applied.* + Currently there are two specific settings (``biorad`` and ``hydrop``) that use alternate pipelines processes (to extract and correct the barcode sequence from the two input fastqs). + Using any other keyword is allowed, and samples will be run with the standard pipeline steps (barcode correction against a whitelist). -The "technology" field can be set to any string (e.g. ``standard``), but note that the entry in this field must match the barcode label given in the ``params.tools.singlecelltoolkit.barcode_correction.whitelist`` parameter. -Commonly used default settings are: +2. *Which whitelist is used for barcode correction.* + The "technology" field must match a key in the ``params.tools.singlecelltoolkit.barcode_correction.whitelist`` parameter list in the config file for that sample to be associated with a particular barcode whitelist. + The "technology" field and whitelist key name can be set to any arbitrary string (e.g. ``standard``), with the exception of the technology-specific keywords above. + +The main modes are: ``standard`` ____________ -The ``standard`` setting assumes a typical 10x Genomics style format with two read pair fastqs and a barcode fastq (note here that the barcode correction has already been performed, writing the ``CB`` tag into the comment of the barcode fastq):: +The ``standard`` setting is the main pipeline mode. +It assumes a typical 10x Genomics style format with two read pair fastqs and a barcode fastq (note here that the barcode correction has already been performed, writing the ``CB`` tag into the comment of the barcode fastq):: $ zcat sample_1_R1.fastq.gz | head -n 4 @A00311:74:HMLK5DMXX:1:1101:2013:1000 1:N:0:ACTCAGAC @@ -245,6 +295,29 @@ The whitelists are supplied in the params file (``params.tools.singlecelltoolkit This can be used to supply alternate whitelists for certain samples, for example if you need to supply a reverse complemented whitelist for samples run in certain sequencing machines. +``hydrop`` +__________ + +The ``hydrop`` setting processes data generated by the HyDrop ATAC protocol +(see `hydrop.aertslab.org `_ and `the associated preprint `_). +This approach differs from the standard pipeline in only the initial step, which is to extract and process the HyDrop barcodes from the sequencing output. +Here, `this script `_ is used to take the R2 read from the sequencer:: + + $ zcat sample_5_R2.fastq.gz | head -n 4 + @VH00445:5:AAAL5KYM5:1:1101:63923:1019 2:N:0:ACACGTGGAC + CACTGGTGGTAGGGTACTCGGACAAGTGGAGCAGTAGCTGAAGTGTAGAAG + + + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + +and transform it into:: + + $ zcat sample_5_hydrop_barcode_R2.fastq.gz + @VH00445:5:AAAL5KYM5:1:1101:63923:1019 2:N:0:ACACGTGGAC + CACTGGTGGTGACAAGTGGAAAGTGTAGAA + + + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + + ``biorad`` __________ @@ -298,6 +371,15 @@ Note that the full path to ``vib-singlecell-nf/vsn-pipelines/main_atac.nf`` must > atac_preprocess.config +.. note:: + + It is also possible to run the pycisTopic QC steps directly after this ``atac_preprocess`` pipeline, with a single command. + Please see + `here <#input-directly-from-the-preprocessing-pipeline>`_ + `here `_ + for details on how to run with this configuration. + + Parameters ---------- @@ -305,8 +387,13 @@ The ATAC-specific parameters are described here. The important parameters to verify are: - ``params.data.atac_preprocess.metadata``: the path to the metadata file. -- ``params.tools.bwamaptools.bwa_fasta``: the path to the bwa reference fasta file. This should be already indexed with ``bwa index``, and the index files located in the same directory as the fasta file. Note that ``bwa`` and ``bwa-mem2`` use different indexes that are not interchangeable. -- ``params.tools.singlecelltoolkit.barcode_correction.whitelist``: Whitelists for barcode correction are supplied here. The whitelists are matched to samples based on the parameter key here ('standard', 'multiome', etc.) and the technology field listed for each sample in the metadata file. +- ``params.tools.bwamaptools.bwa_fasta``: the path to the bwa reference fasta file. + This should be already indexed with ``bwa index``, and the index files located in the same directory as the fasta file. Note that ``bwa`` and ``bwa-mem2`` use different indexes that are not interchangeable. +- ``params.tools.singlecelltoolkit.barcode_correction.whitelist``: Whitelists for barcode correction are supplied here. + The whitelists are matched to samples based on the parameter key here ('standard', 'multiome', 'hydrop', etc.) and the technology field listed for each sample in the metadata file. + Barcode whitelists can (optionally) be gzipped. + There are currently no checks performed to ensure that the sample barcodes have any overlap to the whitelist (the barcode correction reports should be checked for this). + Choice of tools _______________ @@ -315,12 +402,15 @@ Several steps have options for the choice of method to use. These options are controlled within ``params.atac_preprocess_tools``. - Adapter trimming (``adapter_trimming_method``): Can be either of ``Trim_Galore`` (default), or ``fastp``. -- Duplicate marking (``mark_duplicates_method``): Can be either of ``MarkDuplicates`` (Picard tools, default) or ``MarkDuplicatesSpark`` (GATK). We currently recommend Picard MarkDuplicates because it has the capability to perform barcode-aware marking of PCR duplicates. MarkDuplicatesSpark has the advantage of parallelization, however it requires a large SSD to use for temporary files. +- Duplicate marking (``mark_duplicates_method``): Can be either of ``MarkDuplicates`` (Picard tools, default) or ``MarkDuplicatesSpark`` (GATK). + We currently recommend Picard MarkDuplicates because it has the capability to perform barcode-aware marking of PCR duplicates. + MarkDuplicatesSpark has the advantage of parallelization, however it requires a large SSD to use for temporary files. Additionally: - Mapping: Use parameter ``params.tools.bwamaptools.bwa_version`` to select either ``bwa`` or ``bwa-mem2``. These should give virtually identical results, however ``bwa-mem2``, while faster, has used more memory in our tests. Note that the index (``bwa_index``) is not interchangeable between the versions. + Optional parameters ___________________ @@ -330,7 +420,6 @@ ___________________ - ``mapq``: Controls quality filtering settings for generating the fragments file. Discards reads with quality score lower than this number (default 30). - Execution --------- @@ -342,26 +431,46 @@ After configuring, the workflow can be run with: vib-singlecell-nf/vsn-pipelines/main_atac.nf \ -entry atac_preprocess -resume ----- - -Other considerations --------------------- -Temporary directory mapping -___________________________ +Output +****** -For large BAM files, the system default temp location may become full. - A workaround is to include a volume mapping to the alternate ``/tmp`` ``-B /alternate/path/to/tmp:/tmp`` using the volume mount options in Docker or Singularity. - For example in the container engine options: - - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/alternate/path/to/tmp:/tmp'`` - - Docker run options: ``runOptions = '-i -v /data:/data -v /alternate/path/to/tmp:/tmp'`` +An example output tree is shown here. -Alternate Nextflow work location -________________________________ +.. code:: bash -Direct the Nextflow work directory to an alternate path (e.g. a scratch drive) using the ``NXF_WORK`` environmental variable:: + out/ + ├── data + │   ├── bam + │   │   ├── sample_1.bwa.out.possorted.bam + │   │   ├── sample_1.bwa.out.possorted.bam.bai + │   │   ├── sample_2.bwa.out.possorted.bam + │   │   └── sample_2.bwa.out.possorted.bam.bai + │   ├── fragments + │   │   ├── sample_1.sinto.fragments.tsv.gz + │   │   ├── sample_1.sinto.fragments.tsv.gz.tbi + │   │   ├── sample_2.sinto.fragments.tsv.gz + │   │   └── sample_2.sinto.fragments.tsv.gz.tbi + │   └── reports + │   ├── barcode + │   │   ├── sample_1____S7_R1_001.corrected.bc_stats.log + │   │   └── sample_2____S8_R1_001.corrected.bc_stats.log + │   ├── mapping_stats + │   │   ├── sample_1.mapping_stats.tsv + │   │   └── sample_2.mapping_stats.tsv + │   ├── mark_duplicates + │   │   ├── sample_1.library_complexity_metrics.txt + │   │   ├── sample_1.mark_duplicates_metrics.txt + │   │   ├── sample_2.library_complexity_metrics.txt + │   │   └── sample_2.mark_duplicates_metrics.txt + │   └── trim + │   ├── sample_1____S7_R1_001.fastp.trimming_report.html + │   └── sample_2____S8_R1_001.fastp.trimming_report.html + └── nextflow_reports + ├── execution_report.html + ├── execution_timeline.html + ├── execution_trace.txt + └── pipeline_dag.dot - nwork=/path/to/scratch/example_project - mkdir $nwork - export NXF_WORK=$nwork +---- diff --git a/docs/scatac-seq_qc.rst b/docs/scatac-seq_qc.rst index 88b72890..5a9ffac2 100644 --- a/docs/scatac-seq_qc.rst +++ b/docs/scatac-seq_qc.rst @@ -4,7 +4,7 @@ scATAC-seq QC and Cell Calling This workflow uses the Python implementation of `cisTopic `_ (pycisTopic) to perform quality control and cell calling. The inputs here are a fragments and bam file for each sample. -This workflow is currently available in the ``develop_atac`` branch (use the ``-r develop_atac`` option when running ``nextflow pull`` and ``nextflow run``). +This workflow is currently available in the ``develop_atac`` branch (use ``nextflow pull vib-singlecell-nf/vsn-pipelines -r develop_atac`` to sync this branch). ---- @@ -17,6 +17,8 @@ Optional Steps mkdir $nwork export NXF_WORK=$nwork +Note that if you start a new shell, ``NXF_WORK`` must be set again, or the pipeline will not resume properly. + 2. Important for pycisTopic Ray issues: the system default temp location may become full. A workaround is to include a volume mapping to the alternate ``/tmp`` ``-B /alternate/path/to/tmp:/tmp`` using the volume mount options in Docker or Singularity. @@ -91,6 +93,27 @@ Input data (the Cell Ranger ``outs/`` path) are specified in the data section:: Multiple files can be specified with ``*`` in ``tenx_atac`` or by separating the paths with a comma. + +Input directly from the preprocessing pipeline +---------------------------------------------- + +It is also possible to run these QC steps directly after the ``atac_preprocess`` pipeline, with a single command. +In this case, all the appropriate configuration profiles must be included at the configuration start:: + + nextflow config vib-singlecell-nf/vsn-pipelines/main_atac.nf \ + -profile atac_preprocess,atac_qc_filtering,pycistopic_hg38,vsc \ + > atac_preprocess_and_qc.config + +Note that here, we do not include ``bam`` and ``fragments`` profiles that specify the input data locations to the QC steps since these are piped directly from the preprocessing pipeline. +One caveat to this is that it could potentially make it harder to run the qc pipeline with ``-resume`` later on, especially if the Nextflow ``work/`` directory is not saved due to disk space concerns. + +To execute the preprocessing and mapping pipeline in one step, use the ``atac_preprocess_with_qc`` entry point:: + + nextflow -C atac_preprocess_and_qc.config run \ + vib-singlecell-nf/vsn-pipelines/main_atac.nf \ + -entry atac_preprocess_with_qc -resume --quiet + + ---- Execution From 3725f5392c6dc39e040d2a68a168013be7220a27 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 20 Jul 2021 12:21:12 +0200 Subject: [PATCH 182/202] Update scATAC docs: - Reformat headings, restructure sections - Update BioRad read details --- docs/scatac-seq.rst | 94 +++++++++++++++++++++--------------------- docs/scatac-seq_qc.rst | 18 ++++---- 2 files changed, 59 insertions(+), 53 deletions(-) diff --git a/docs/scatac-seq.rst b/docs/scatac-seq.rst index a602d7d1..58d76843 100644 --- a/docs/scatac-seq.rst +++ b/docs/scatac-seq.rst @@ -6,27 +6,6 @@ This pipeline takes fastq files from paired end single cell ATAC-seq, and applie This workflow is currently available in the ``develop_atac`` branch (use ``nextflow pull vib-singlecell-nf/vsn-pipelines -r develop_atac`` to sync this branch). - -Optional Steps -************** - -1. Direct the Nextflow work directory to an alternate path (e.g. a scratch drive) using the ``NXF_WORK`` environmental variable:: - - nwork=/path/to/scratch/example_project - mkdir $nwork - export NXF_WORK=$nwork - -Note that if you start a new shell, ``NXF_WORK`` must be set again, or the pipeline will not resume properly. - - -2. Temporary directory mapping. - For large BAM files, the system default temp location may become full. - A workaround is to include a volume mapping to the alternate ``/tmp`` ``-B /alternate/path/to/tmp:/tmp`` using the volume mount options in Docker or Singularity. - For example in the container engine options: - - Singularity run options: ``runOptions = '--cleanenv -H $PWD -B /data,/alternate/path/to/tmp:/tmp'`` - - Docker run options: ``runOptions = '-i -v /data:/data -v /alternate/path/to/tmp:/tmp'`` - - ---- Pipeline Steps @@ -61,8 +40,11 @@ The full steps are: ---- -Pipeline Input Metadata -*********************** +Pipeline Details +**************** + +Input Metadata +-------------- The input to this pipeline is a (tab-delimited) metadata table with the sample ID, sequencing technology, and locations of the fastq files. Note that the fastq file fields must be full paths; this is not shown here for clarity: @@ -230,16 +212,16 @@ To run use the options: ---- -Technology ----------- +Technology types +---------------- The "technology" field in the metadata table controls two things: -1. *How technology-specific pipeline steps are applied.* - Currently there are two specific settings (``biorad`` and ``hydrop``) that use alternate pipelines processes (to extract and correct the barcode sequence from the two input fastqs). +1. **How technology-specific pipeline steps are applied.** + Currently there are two specific settings (``biorad`` and ``hydrop``) that use alternate pipelines processes (to extract and correct the barcode sequence from the input fastqs). Using any other keyword is allowed, and samples will be run with the standard pipeline steps (barcode correction against a whitelist). -2. *Which whitelist is used for barcode correction.* +2. **Which whitelist is used for barcode correction.** The "technology" field must match a key in the ``params.tools.singlecelltoolkit.barcode_correction.whitelist`` parameter list in the config file for that sample to be associated with a particular barcode whitelist. The "technology" field and whitelist key name can be set to any arbitrary string (e.g. ``standard``), with the exception of the technology-specific keywords above. @@ -249,7 +231,7 @@ The main modes are: ____________ The ``standard`` setting is the main pipeline mode. -It assumes a typical 10x Genomics style format with two read pair fastqs and a barcode fastq (note here that the barcode correction has already been performed, writing the ``CB`` tag into the comment of the barcode fastq):: +It assumes a typical 10x Genomics style format with two read pair fastqs and a barcode fastq (note that in the example here, the barcode correction has already been performed, writing the ``CB`` tag into the comment of the barcode fastq):: $ zcat sample_1_R1.fastq.gz | head -n 4 @A00311:74:HMLK5DMXX:1:1101:2013:1000 1:N:0:ACTCAGAC @@ -321,42 +303,63 @@ and transform it into:: ``biorad`` __________ -The ``biorad`` setting processes BioRad data using `BAP `_. +The ``biorad`` setting processes BioRad data using +`this script `_ +in our `aertslab/single_cell_toolkit `_ +(previously, this was done with `BAP `_). This takes input data:: $ zcat sample_2_R1.fastq.gz | head -n 4 - @NB551608:167:HNYFJBGXC:1:11101:11281:1033 1:N:0:TAAGGCGA - GCGTANACGTATGCATGACGGAAGTTAGTCACTGAGTCAGCAATCGTCGGCAGCGTCAGATGAGTNTAAGAGACAGGGTCAGGATGCGAGATTGACGGCTGCAATAACTAATAGGAAC + @A00794:327:HTJ55DRXX:1:2101:1154:1016 1:N:0:TAAGGCGA + GATCACCATATGCATGACATTCACGAGTCACTGAGTAACGCCTCGTCGGCAGCGTCAGATGTGTATAAGAGACAGCTGCAATGGCTGGAGCACACCCCATACTCATTCTGGTCTCCTT + - AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEE`_ `here `_ for details on how to run with this configuration. diff --git a/docs/scatac-seq_qc.rst b/docs/scatac-seq_qc.rst index 5a9ffac2..287bc8dc 100644 --- a/docs/scatac-seq_qc.rst +++ b/docs/scatac-seq_qc.rst @@ -8,8 +8,12 @@ This workflow is currently available in the ``develop_atac`` branch (use ``nextf ---- -Optional Steps -************** + +Running the workflow +******************** + +Technical considerations +------------------------ 1. Direct the Nextflow work directory to an alternate path (e.g. a scratch drive) using the ``NXF_WORK`` environmental variable:: @@ -32,13 +36,13 @@ Note that if you start a new shell, ``NXF_WORK`` must be set again, or the pipel ---- Configuration -************* +------------- For each sample, this pipeline take a bam and a fragments file. These can be specified separately, or from a Cell Ranger ATAC/ARC ``outs/`` path. Input with independent bam and fragments files ----------------------------------------------- +______________________________________________ Use the profiles ``bam`` and ``fragments``:: @@ -75,7 +79,7 @@ Multiple files can be specified with ``*`` in ``file_paths`` or by separating th Input with Cell Ranger ATAC data --------------------------------- +________________________________ Use the ``tenx_atac`` profile:: @@ -95,7 +99,7 @@ Multiple files can be specified with ``*`` in ``tenx_atac`` or by separating the Input directly from the preprocessing pipeline ----------------------------------------------- +______________________________________________ It is also possible to run these QC steps directly after the ``atac_preprocess`` pipeline, with a single command. In this case, all the appropriate configuration profiles must be included at the configuration start:: @@ -117,7 +121,7 @@ To execute the preprocessing and mapping pipeline in one step, use the ``atac_pr ---- Execution -********* +--------- After configuring, the workflow can be run with: From 71ca904d9a696c0788372dff9b3e242d5b10e554 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 27 Jul 2021 12:34:56 +0200 Subject: [PATCH 183/202] Add barcode correction check to sctk: - Check how many barcodes were corrected, throw error if the fraction falls below a threhold (~50%) - Update docker image - Add params for max_mismatches and min_frac_bcs_to_find to the barcode correction process --- src/singlecelltoolkit/conf/sctk_mapping.config | 2 ++ src/singlecelltoolkit/processes/barcode_correction.nf | 6 ++++-- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/singlecelltoolkit/conf/sctk_mapping.config b/src/singlecelltoolkit/conf/sctk_mapping.config index a87bffc8..99731f8b 100644 --- a/src/singlecelltoolkit/conf/sctk_mapping.config +++ b/src/singlecelltoolkit/conf/sctk_mapping.config @@ -2,6 +2,8 @@ params { tools { singlecelltoolkit { barcode_correction { + max_mismatches = 1 + min_frac_bcs_to_find = 0.5 whitelist { standard = '' multiome = '' diff --git a/src/singlecelltoolkit/processes/barcode_correction.nf b/src/singlecelltoolkit/processes/barcode_correction.nf index 518ad8dd..b754b4f9 100644 --- a/src/singlecelltoolkit/processes/barcode_correction.nf +++ b/src/singlecelltoolkit/processes/barcode_correction.nf @@ -26,13 +26,15 @@ process SCTK__BARCODE_CORRECTION { path("${sampleId}_bc_corrected.fastq.gz.corrected.bc_stats.tsv") script: - def sampleParams = params.parseConfig(sampleId, params.global, toolParams) + def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_correction) processParams = sampleParams.local """ correct_barcode_in_fastq.sh \ ${bc_whitelist} \ ${fastq_bc} \ - ${sampleId}_bc_corrected.fastq.gz + ${sampleId}_bc_corrected.fastq.gz \ + ${processParams.max_mismatches} \ + ${processParams.min_frac_bcs_to_find} """ } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index c60076ca..bb56bb55 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-07-06-ea48b36-2' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-27-8af49da' } } } From bb5c090e6bf0308e9d109a9cc5cd69d77da7ba9b Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 27 Jul 2021 14:40:52 +0200 Subject: [PATCH 184/202] Saturation script: Change default values, update image --- src/singlecelltoolkit/conf/sctk_saturation.config | 2 +- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/singlecelltoolkit/conf/sctk_saturation.config b/src/singlecelltoolkit/conf/sctk_saturation.config index de26c3da..5590efcd 100644 --- a/src/singlecelltoolkit/conf/sctk_saturation.config +++ b/src/singlecelltoolkit/conf/sctk_saturation.config @@ -3,7 +3,7 @@ params { singlecelltoolkit { saturation { percentages = '0.3,0.6,0.9' - sampling_fractions = '0.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0' + sampling_fractions = '[0.0,0.1,0.2,0.3,0.4,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.92,0.94,0.96,0.98,1.0] min_frags_per_cb = 200 } } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index bb56bb55..6fb46fb1 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-07-27-8af49da' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-27-5f01069' } } } From 22264d4cf05f7630935a8c878c71338002715f2a Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Wed, 28 Jul 2021 15:46:31 +0200 Subject: [PATCH 185/202] Saturation script updates - Fix bug with gzip detection --- src/singlecelltoolkit/Dockerfile | 5 ++--- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/singlecelltoolkit/Dockerfile b/src/singlecelltoolkit/Dockerfile index 7851e6a4..5c0a5866 100644 --- a/src/singlecelltoolkit/Dockerfile +++ b/src/singlecelltoolkit/Dockerfile @@ -32,14 +32,13 @@ RUN pip install --no-cache-dir --upgrade pip wheel && \ uncertainties \ typing \ pathlib \ - polars>=0.8.7 \ + polars>=0.8.14 \ matplotlib \ numpy # install seq (https://github.com/seq-lang/seq/): -#0.9.11 -ENV SEQ_VERSION=0.10.1 +ENV SEQ_VERSION=0.10.2 RUN mkdir -p /opt/seq && \ wget https://github.com/seq-lang/seq/releases/download/v${SEQ_VERSION}/seq-linux-x86_64.tar.gz && \ tar xzf seq-linux-x86_64.tar.gz --strip-components 1 -C /opt/seq && \ diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 6fb46fb1..69de9feb 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-07-27-5f01069' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-28-051c6ee' } } } From f87074c8d35d3b7bd050637a373ab6e22bd3e7d0 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 29 Jul 2021 11:23:59 +0200 Subject: [PATCH 186/202] Use both hydrop ATAC designs in ATAC preprocessing - Two new keywords in the metatdata: hydrop_2x384 and hydrop_3x96. - Hydrop barcode extraction runs separately for each type, passing the parameter to the extract_hydrop_atac_barcode_from_R2_fastq.sh script - #352 --- .../processes/extract_hydrop_atac_barcode.nf | 2 ++ .../singlecelltoolkit.config | 2 +- workflows/atac/preprocess.nf | 19 +++++++++++++------ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf b/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf index fdad0d78..115f104e 100644 --- a/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf +++ b/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf @@ -15,6 +15,7 @@ process SCTK__EXTRACT_HYDROP_ATAC_BARCODE { path(fastq_PE1), path(fastq_bc), path(fastq_PE2) + val(hydrop_atac_barcode_design) output: tuple val(sampleId), @@ -30,6 +31,7 @@ process SCTK__EXTRACT_HYDROP_ATAC_BARCODE { extract_hydrop_atac_barcode_from_R2_fastq.sh \ ${fastq_bc} \ ${sampleId}_hydrop_barcode_R2.fastq.gz \ + ${hydrop_atac_barcode_design} \ pigz """ } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 69de9feb..543dcdd8 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-07-28-051c6ee' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-29-6863155' } } } diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index ee46b0a1..7b37a860 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -34,7 +34,8 @@ include { include { barcode_correction as bc_correct_standard; - barcode_correction as bc_correct_hydrop; + barcode_correction as bc_correct_hydrop_2x384; + barcode_correction as bc_correct_hydrop_3x96; biorad_bc as bc_correct_biorad; } from './../../src/singlecelltoolkit/main.nf' @@ -67,9 +68,10 @@ workflow ATAC_PREPROCESS { ) } .branch { - biorad: it[1] == 'biorad' - hydrop: it[1] == 'hydrop' - standard: true // capture all other technology types here + biorad: it[1] == 'biorad' + hydrop_3x96: it[1] == 'hydrop_3x96' + hydrop_2x384: it[1] == 'hydrop_2x384' + standard: true // capture all other technology types here } /* standard data @@ -78,7 +80,11 @@ workflow ATAC_PREPROCESS { /* HyDrop ATAC extract barcode and correct */ - SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop) \ + // HyDrop 3x96 + SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop_3x96, 'hydrop_3x96') \ + | bc_correct_hydrop + // HyDrop 2x384 + SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop_2x384, 'hydrop_2x384') \ | bc_correct_hydrop /* BioRad data @@ -87,7 +93,8 @@ workflow ATAC_PREPROCESS { /* downstream steps */ bc_correct_standard.out - .mix(bc_correct_hydrop.out) + .mix(bc_correct_hydrop_3x96.out) + .mix(bc_correct_hydrop_2x384.out) .mix(bc_correct_biorad.out) \ | adapter_trimming \ | mapping From b9f26df69a2f2ea3bbea11ae5776f7931937ab87 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 29 Jul 2021 14:02:47 +0200 Subject: [PATCH 187/202] Fixes for new hydrop design - Resolves #352 --- src/singlecelltoolkit/conf/sctk_mapping.config | 3 ++- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- workflows/atac/preprocess.nf | 11 ++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/singlecelltoolkit/conf/sctk_mapping.config b/src/singlecelltoolkit/conf/sctk_mapping.config index 99731f8b..2b37a683 100644 --- a/src/singlecelltoolkit/conf/sctk_mapping.config +++ b/src/singlecelltoolkit/conf/sctk_mapping.config @@ -7,7 +7,8 @@ params { whitelist { standard = '' multiome = '' - hydrop = '' + hydrop_3x96 = '' + hydrop_2x384 = '' } } barcode_10x_scatac_fastqs { diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 543dcdd8..09460037 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-07-29-6863155' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-29-09cac13' } } } diff --git a/workflows/atac/preprocess.nf b/workflows/atac/preprocess.nf index 7b37a860..4a6e7e42 100644 --- a/workflows/atac/preprocess.nf +++ b/workflows/atac/preprocess.nf @@ -2,7 +2,8 @@ nextflow.enable.dsl=2 // process imports include { - SCTK__EXTRACT_HYDROP_ATAC_BARCODE; + SCTK__EXTRACT_HYDROP_ATAC_BARCODE as SCTK__EXTRACT_HYDROP_ATAC_BARCODE_2x384; + SCTK__EXTRACT_HYDROP_ATAC_BARCODE as SCTK__EXTRACT_HYDROP_ATAC_BARCODE_3x96; } from './../../src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf' include { TRIMGALORE__TRIM; @@ -81,11 +82,11 @@ workflow ATAC_PREPROCESS { /* HyDrop ATAC extract barcode and correct */ // HyDrop 3x96 - SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop_3x96, 'hydrop_3x96') \ - | bc_correct_hydrop + SCTK__EXTRACT_HYDROP_ATAC_BARCODE_3x96(data.hydrop_3x96, '3x96') \ + | bc_correct_hydrop_3x96 // HyDrop 2x384 - SCTK__EXTRACT_HYDROP_ATAC_BARCODE(data.hydrop_2x384, 'hydrop_2x384') \ - | bc_correct_hydrop + SCTK__EXTRACT_HYDROP_ATAC_BARCODE_2x384(data.hydrop_2x384, '2x384') \ + | bc_correct_hydrop_2x384 /* BioRad data extract barcode and correct */ From ae1338130172dc86ad49eb1714be2333e7936d51 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 16 Aug 2021 15:47:58 +0200 Subject: [PATCH 188/202] Update singlecelltoolkit - Docker image update - Fix params for saturation script --- src/singlecelltoolkit/conf/sctk_saturation.config | 2 +- src/singlecelltoolkit/singlecelltoolkit.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/singlecelltoolkit/conf/sctk_saturation.config b/src/singlecelltoolkit/conf/sctk_saturation.config index 5590efcd..e84dcd27 100644 --- a/src/singlecelltoolkit/conf/sctk_saturation.config +++ b/src/singlecelltoolkit/conf/sctk_saturation.config @@ -3,7 +3,7 @@ params { singlecelltoolkit { saturation { percentages = '0.3,0.6,0.9' - sampling_fractions = '[0.0,0.1,0.2,0.3,0.4,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.92,0.94,0.96,0.98,1.0] + sampling_fractions = '0.0,0.1,0.2,0.3,0.4,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.92,0.94,0.96,0.98,1.0' min_frags_per_cb = 200 } } diff --git a/src/singlecelltoolkit/singlecelltoolkit.config b/src/singlecelltoolkit/singlecelltoolkit.config index 69de9feb..09460037 100644 --- a/src/singlecelltoolkit/singlecelltoolkit.config +++ b/src/singlecelltoolkit/singlecelltoolkit.config @@ -1,7 +1,7 @@ params { tools { singlecelltoolkit { - container = 'vibsinglecellnf/singlecelltoolkit:2021-07-28-051c6ee' + container = 'vibsinglecellnf/singlecelltoolkit:2021-07-29-09cac13' } } } From 388ccb706d32e2e11113c1c09bd26a2d4dfc7c72 Mon Sep 17 00:00:00 2001 From: KrisDavie Date: Thu, 19 Aug 2021 12:48:39 +0200 Subject: [PATCH 189/202] Implement check for config version --- VERSION | 1 + src/utils/workflows/utils.nf | 30 +++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 VERSION diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..9d041d8e --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.26.1 \ No newline at end of file diff --git a/src/utils/workflows/utils.nf b/src/utils/workflows/utils.nf index c4b544a4..bf699b13 100644 --- a/src/utils/workflows/utils.nf +++ b/src/utils/workflows/utils.nf @@ -1,6 +1,7 @@ nextflow.enable.dsl=2 import static groovy.json.JsonOutput.* +import java.nio.file.Paths ////////////////////////////////////////////////////// // Process imports: @@ -125,12 +126,39 @@ def setSeed(params) { def INIT(params) { - // Set the seed + // Version check + def repoFilePath = workflow.scriptFile.getParent().toRealPath().toString() + String repoVersion = new File(Paths.get(repoFilePath, '/VERSION').toString()).text + + if (params.containsKey('disableVersionCheck')) { + Channel.from('').view { + """ +------------------------------------------------------------------ +\u001B[33m Version check has been bypassed. \u001B[0m +\u001B[33m Config version: v${workflow.manifest.version}, VSN version: v${repoVersion}. \u001B[0m +------------------------------------------------------------------ + """ + } + } + else if (workflow.manifest.version != repoVersion) { + throw new Exception( + """ +------------------------------------------------------------------ +\u001B[31m The config file you have provided was generated with a different version of VSN (v${workflow.manifest.version}). \u001B[0m +\u001B[31m Compatibility of configs between versions is NOT guaranteed! \u001B[0m +\u001B[31m To continue, please regenerate your config using the current version (v${repoVersion}) and rerun.\u001B[0m + +\u001B[31m Bypass this check at your own risk with "--disableVersionCheck" \u001B[0m +------------------------------------------------------------------ + """ + ) + } setSeed(params) if(!params.containsKey("misc") || !params.misc.containsKey("test")) { includeConfig(params, 'conf/test.config') params.misc.test.enabled = false } + // Save manifest and params for notebook // Remove any closure attached to the config (this is for backward compatibility) def paramsCopy = params.findAll({!["parseConfig", "parse-config"].contains(it.key)}) From b68e899d96e6eb99c73b0e8f4371d04150c576d5 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Fri, 20 Aug 2021 14:22:06 +0200 Subject: [PATCH 190/202] Fix to avoid input filename collisions in ATAC QC workflow - When staging multiple cellranger fragments files, an input file collision would occur (files are named identically). This is fixed by adding a process to rename these files with the sample ID as a prefix. --- src/pycistopic/processes/compute_qc_stats.nf | 21 +++++++++++++++++ workflows/atac/qc_filtering.nf | 24 ++++++++++++++++---- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/pycistopic/processes/compute_qc_stats.nf b/src/pycistopic/processes/compute_qc_stats.nf index a83386f5..73a0b0f8 100644 --- a/src/pycistopic/processes/compute_qc_stats.nf +++ b/src/pycistopic/processes/compute_qc_stats.nf @@ -5,6 +5,27 @@ binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bi toolParams = params.tools.pycistopic processParams = params.tools.pycistopic.compute_qc_stats +process rename_fragments { + + container toolParams.container + label 'compute_resources__minimal' + + input: + tuple val(sampleId), + path(f) + output: + tuple val(sampleId), + path("${sampleId}_${f}*") + + script: + """ + ln -s ${f[0]} ${sampleId}_${f[0]} + ln -s ${f[1]} ${sampleId}_${f[1]} + """ + +} + + process PYCISTOPIC__COMPUTE_QC_STATS { publishDir "${params.global.outdir}/data/pycistopic/qc/", mode: params.utils.publish.mode diff --git a/workflows/atac/qc_filtering.nf b/workflows/atac/qc_filtering.nf index eea4a81a..39092704 100644 --- a/workflows/atac/qc_filtering.nf +++ b/workflows/atac/qc_filtering.nf @@ -7,7 +7,10 @@ include { SC__ARCHR__CELL_CALLING; } from './../../src/archr/processes/cell_call include { PYCISTOPIC__BIOMART_ANNOT; } from './../../src/pycistopic/processes/biomart_annot.nf' include { PYCISTOPIC__MACS2_CALL_PEAKS; } from './../../src/pycistopic/processes/macs2_call_peaks.nf' -include { PYCISTOPIC__COMPUTE_QC_STATS; } from './../../src/pycistopic/processes/compute_qc_stats.nf' +include { + rename_fragments; + PYCISTOPIC__COMPUTE_QC_STATS; +} from './../../src/pycistopic/processes/compute_qc_stats.nf' include { SCTK__SATURATION; SCTK__SATURATION as SCTK__SATURATION_BC_WL; @@ -77,13 +80,23 @@ workflow ATAC_QC_PREFILTER { } .set{ data_split } - // get cellranger data & merge + // split the cellranger data into separate bam and fragments channels data_split.cellranger \ | cellranger_output_to_bam_fragments | set { data_cr } - + /* 'mix' the separate bam and fragments channels with the + cellranger bam and fragments files, and use these channels going + forward + */ bam = data_split.bam.mix(data_cr.bam) - fragments = data_split.fragments.mix(data_cr.fragments) + /* for fragments, rename the files to include the sample ID + prefix (necessary for cellranger inputs, which all have the same + file name). This is not currently necessary for the bam files since + they are processed in separate processes. + */ + fragments = rename_fragments( + data_split.fragments.mix(data_cr.fragments) + ) biomart = PYCISTOPIC__BIOMART_ANNOT() @@ -94,7 +107,8 @@ workflow ATAC_QC_PREFILTER { /* pycisTopic qc: pass every fragment/peak file into a single process together. These will be formatted as a string "sampleId,fragments,peak", - which is parsed in the python script. + which is parsed in the python script. The fragments and peaks files + are staged separately */ fragments.map { it -> tuple(it[0], it[1][0].getName(), it[1][1].getName() ) } // [sampleId, fragments, fragments.tbi] .join(peaks.map{ it -> tuple(it[0], it[1].getName()) }) // combine with peaks for each sample From ae10e7b3e84e9b0da8d8fd70f5802a29bb0d9be8 Mon Sep 17 00:00:00 2001 From: dweemx Date: Tue, 24 Aug 2021 15:26:40 +0200 Subject: [PATCH 191/202] Fixes a bug not able to use seurat_v3 flavor Scanpy in hvg selection Add some docs --- docs/features.rst | 45 +++++++++++++++++++ .../sc_find_variable_genes.py | 2 +- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/docs/features.rst b/docs/features.rst index f2417512..d6158d59 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -439,6 +439,51 @@ By default, don't regress any variable out. To enable this features, the ``scanp Add any variable in ``variablesToRegressOut`` to regress out: e.g.: 'n_counts', 'percent_mito'. +Highly Variable Genes Selection +------------------------------- + +This step is a wrapper around the `Scanpy` ``scanpy.pp.highly_variable_genes`` function and regarding the parameters used it is following the documentation available at `scanpy-pp-highly-variable-genes`_. +By default, it will use the ``seurat`` flavor to select variable genes and will also keep the same default values for the 4 different thresholds (as the documentation): ``min_mean``, ``max_mean``, ``min_disp``, ``max_disp``. + +.. _`scanpy-pp-highly-variable-genes`: https://scanpy.readthedocs.io/en/latest/generated/scanpy.pp.highly_variable_genes.html#scanpy-pp-highly-variable-genes. + +.. code:: groovy + + params { + tools { + scanpy { + feature_selection { + report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_select_variable_genes_report.ipynb" + flavor = 'seurat' + minMean = 0.0125 + maxMean = 3 + minDisp = 0.5 + off = 'h5ad' + } + } + } + } + + +Other flavors are available as ``cell_ranger`` and ``seurat_v3``. In order to use the ``seurat_v3`` flavor, one parameter is required to be specified: ``nTopGenes`` in the config file as follows: + +.. code:: groovy + + params { + tools { + scanpy { + feature_selection { + report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_select_variable_genes_report.ipynb" + flavor = 'seurat_v3' + nTopGenes = 2000 + off = 'h5ad' + } + } + } + } + + + Skip steps ---------- diff --git a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py index c00c228d..29ed278e 100755 --- a/src/scanpy/bin/feature_selection/sc_find_variable_genes.py +++ b/src/scanpy/bin/feature_selection/sc_find_variable_genes.py @@ -105,7 +105,7 @@ ) elif args.flavor == "cell_ranger" or args.flavor == "seurat_v3": - if args.flavor == "seurat_v3": + if args.flavor == "seurat_v3" and args.n_top_genes is None: raise Exception("VSN ERROR: --n-top-genes (nTopGenes in config) is required when flavor is 'seurat_v3',") sc.pp.highly_variable_genes( From 7a314c61eb0eea38cece6ea761cdd879cc29cff2 Mon Sep 17 00:00:00 2001 From: dweemx Date: Wed, 25 Aug 2021 11:35:27 +0200 Subject: [PATCH 192/202] [sratoolkit] Fix typo in NXF process call --- src/sratoolkit/workflows/downloadFastQ.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sratoolkit/workflows/downloadFastQ.nf b/src/sratoolkit/workflows/downloadFastQ.nf index ebc88a2f..8779b2d2 100644 --- a/src/sratoolkit/workflows/downloadFastQ.nf +++ b/src/sratoolkit/workflows/downloadFastQ.nf @@ -21,7 +21,7 @@ workflow SRATOOLKIT__DOWNLOAD_FASTQS { main: out = data | \ DOWNLOAD_FASTQS_FROM_SRA_ACC_ID | \ - FIX_AND_COMPRESS_SRA_FASTQ + FIX_AND_COMPRESS_SRA_FASTQS emit: // Returns (sraId, *.fastq.gz) From 7276cc0471002739748eea5fb262687de4eb7e0f Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 26 Aug 2021 10:18:36 +0200 Subject: [PATCH 193/202] [singlecelltoolkit] FIX_AND_COMPRESS_SRA_FASTQS process should use singlecelltoolkit container --- src/scanpy/conf/min.config | 2 +- src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/scanpy/conf/min.config b/src/scanpy/conf/min.config index c0188bbb..2732ced9 100644 --- a/src/scanpy/conf/min.config +++ b/src/scanpy/conf/min.config @@ -7,7 +7,7 @@ params { } feature_selection { report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/scanpy/bin/reports/sc_select_variable_genes_report.ipynb" - method = 'mean_disp_plot' + flavor = 'seurat' minMean = 0.0125 // 0.125 maxMean = 3 // 2.5 minDisp = 0.5 //0.7 diff --git a/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf b/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf index f265d633..ecb2fe7b 100644 --- a/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf +++ b/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf @@ -6,11 +6,9 @@ if(!params.containsKey("test")) { binDir = "" } -toolParams = params.tools.sratoolkit - process FIX_AND_COMPRESS_SRA_FASTQS { - container toolParams.container + container "vibsinglecellnf/singlecelltoolkit:2021-07-29-09cac13" publishDir "${params.global.outdir}/data/raw/fastqs_fixed_and_compressed", mode: 'symlink', overwrite: true label 'compute_resources__cpu' From 31bcd5484b9508e8c4151637d926f42d75b83112 Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 26 Aug 2021 15:45:15 +0200 Subject: [PATCH 194/202] Update RtD --- docs/development.rst | 497 ++++++++++++++++++++++----------------- docs/features.rst | 12 +- docs/getting-started.rst | 101 ++++---- docs/pipelines.rst | 6 +- 4 files changed, 344 insertions(+), 272 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 88632a08..0d3ba68e 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -29,12 +29,12 @@ Steps: #. From your local copy of ``vsn-pipelines`` GitHub repository, create a new branch called ``feature/[github-issue-id]-[description]``. - In this case, +In this case, - - ``[github-issue-id] = 115`` - - ``[description] = add_harmony_batch_correction_method`` +- ``[github-issue-id] = 115`` +- ``[description] = add_harmony_batch_correction_method`` - It is highly recommended to start from the ``develop`` branch: +It is highly recommended to start from the ``develop`` branch: .. code:: bash @@ -43,7 +43,7 @@ Steps: git pull git checkout -b feature/115-add_harmony_batch_correction_method -#. Use the `template repository`_ in the vib-singlecell-nf organisation to create the framework for the new module in ``src/``: +#. Use the `template repository`_ in the ``vib-singlecell-nf`` organisation to create the framework for the new module in ``src/``: .. code:: bash @@ -51,8 +51,7 @@ Steps: .. _`template repository`: https://github.com/vib-singlecell-nf/template -#. Now, you can start to edit file in the tool module that is now located in ``src/``. - Optionally, you can delete the ``.git`` directory in the new module to avoid confusion in future local development: +#. Now, you can start to edit file in the tool module that is now located in ``src/``. Optionally, you can delete the ``.git`` directory in the new module to avoid confusion in future local development: .. code:: bash @@ -63,31 +62,63 @@ Steps: .. code:: dockerfile - FROM dweemx/sctx-seurat:3.1.2 - - RUN apt-get -y update && \ - apt-get install -y libcurl4-openssl-dev libxml2-dev zlib1g-dev libhdf5-dev && \ - apt-get install -y libssl-dev && \ - # png.h: No such file or directory - apt-get install -y libpng-dev && \ - R -e "install.packages('optparse')" && \ - R -e "devtools::install_github(repo = 'aertslab/SCopeLoomR')" && \ - R -e "devtools::install_github('immunogenomics/harmony')" && \ - # Need to run ps - apt-get -y install procps && \ - apt-get -y install libxml2 && \ - # Clean - rm -rf /tmp/* && \ - apt-get autoremove -y && \ - apt-get autoclean -y && \ - rm -rf /var/cache/apt/* && \ - rm -rf /var/lib/apt/lists/* && \ - apt-get clean - + FROM continuumio/miniconda3 + + SHELL ["/bin/bash", "--login", "-c"] + + ADD environment.yml /tmp/environment.yml + RUN conda env create -f /tmp/environment.yml + + RUN head -1 /tmp/environment.yml | cut -d' ' -f2 > /tmp/version \ + && ln -s "/opt/conda/envs/$(cat /tmp/version)" /opt/conda/venv + + # Initialize conda in bash config files: + RUN conda init bash + + # Activate the environment, and make sure it's activated: + RUN echo "conda activate $(cat /tmp/version)" >> ~/.bashrc && \ + conda activate $(cat /tmp/version) && \ + R -e "devtools::install_github(repo = 'dynverse/anndata', ref = '0.7.5.2')" && \ + R -e "devtools::install_github(repo = 'aertslab/SCopeLoomR')" + + RUN apt-get -y update \ + # Need to run ps + && apt-get -y install procps \ + && apt-get -y install libxml2 \ + # Clean + && conda clean -afy \ + && rm -rf /var/cache/apt/* \ + && rm -rf /var/lib/apt/lists/* + + RUN echo "source activate $(cat /tmp/version)" >> ~/.bashrc + ENV PATH="/opt/conda/venv/bin:${PATH}" + + + + .. code:: yaml + + # environment.yml + name: harmony-v1.0-3 + channels: + - r + - conda-forge + - bioconda + dependencies: + - python=3.7 + - anndata=0.7.6 + - r-base=4.0.2 + - r-argparse=2.0.1 + - r-devtools + - r-reticulate=1.20 + - r-hdf5r + - r-harmony #. Rename the ``nextflow.config`` file to create the ``harmony.config`` configuration file. - * Each process's options should be in their own level. With a single process, you do not need one extra level. + * Each process's options should be in their own level. With a single process, you do not need one extra level. The ``report_ipynb`` Jupyter Notebook is available here_. + + .. _here: https://github.com/vib-singlecell-nf/harmony/blob/master/bin/reports/sc_harmony_report.ipynb + .. code:: groovy @@ -102,125 +133,160 @@ Steps: } - The ``report_ipynb`` Jupyter Notebook is available here_. - - .. _here: https://github.com/vib-singlecell-nf/harmony/blob/master/bin/reports/sc_harmony_report.ipynb - #. Create the R script to run Harmony .. code:: r - #!/usr/bin/env Rscript - - print("##################################################") - print("# Harmony: Algorithm for single cell integration #") - print("##################################################") - + print("#############################################################") + print("# Harmony: Algorithm for single cell integration #") + print('# GitHub: https://github.com/immunogenomics/harmony #') + print('# Paper: https://www.nature.com/articles/s41592-019-0619-0 #') + print("#############################################################") + # Loading dependencies scripts - - library("optparse") - parser <- OptionParser( - prog = "run_harmony.R", - description = "Scalable integration of single cell RNAseq data for batch correction and meta analysis" + library("argparse") + library("reticulate") + library("anndata") + + # Link Python to this R session + use_python("/opt/conda/envs/harmony-v1.0-3/bin") + Sys.setenv(RETICULATE_PYTHON = "/opt/conda/envs/harmony-v1.0-3/bin") + + parser <- ArgumentParser(description='Scalable integration of single cell RNAseq data for batch correction and meta analysis') + parser$add_argument( + 'input', + metavar='INPUT', + type="character", + help='Input file [default]' ) - parser <- add_option( - parser, - c("-i", "--input-file"), - action = "store", - default = NULL, - help = "Input file [default]" + parser$add_argument( + '--output-prefix', + type="character", + dest='output_prefix', + default = "foo", + help="Prefix path to save output files. [default %default]" ) - parser <- add_option( - parser, - c("-a", "--vars-use"), - action = "store", - default = NULL, - help = "If meta_data is dataframe, this defined which variable(s) to remove (character vector)." + parser$add_argument( + '--seed', + type="character", + dest='seed', + default=617, + help='Seed. [default %default]' ) - parser <- add_option( - parser, - c("-p", "--do-pca"), - action = "store", - default = FALSE, - help = "Whether to perform PCA on input matrix." + parser$add_argument( + "--vars-use", + type="character", + dest='vars_use', + action="append", + default=NULL, + help='If meta_data is dataframe, this defined which variable(s) to remove (character vector).' ) - parser <- add_option( - parser, - c("-o", "--output-prefix"), - action = "store", - default = "foo", - help="Prefix path to save output files. [default %default]" + parser$add_argument( + '--do-pca', + type="logical", + dest='do_pca', + action="store", + default=FALSE, + help='Whether to perform PCA on input matrix.' ) - parser <- add_option( - parser, - c("-s", "--seed"), - action = "store", - default = 617, - help="Seed. [default %default]" + parser$add_argument( + '--theta', + type="double", + dest='theta', + default=NULL, + help='Diversity clustering penalty parameter. Specify for each variable in vars_use Default theta=2. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters. [default %default]' ) - - args <- parse_args(parser) - + parser$add_argument( + '--lambda', + type="double", + dest='lambda', + default=NULL, + help='Ridge regression penalty parameter. Specify for each variable in vars_use. Default lambda=1. Lambda must be strictly positive. Smaller values result in more aggressive correction. [default %default]' + ) + parser$add_argument( + '--epsilon-harmony', + type="double", + dest='epsilon_harmony', + default=1e-04, + help='Convergence tolerance for Harmony. Set to -Inf to never stop early. [default %default]' + ) + + + args <- parser$parse_args() + + if(args$epsilon_harmony < 0) { + args$epsilon_harmony <- -Inf + print("Setting epsilon.harmony argument to -Inf...") + } + cat("Parameters: \n") print(args) - - if(is.null(args$`vars-use`)) { + + if(is.null(args$vars_use)) { stop("The parameter --vars-use has to be set.") } - + # Required by irlba::irlba (which harmony depends on) for reproducibility if(!is.null(args$seed)) { - set.seed(args$seed) + set.seed(args$seed) } else { - warnings("No seed is set, this will likely give none reproducible results.") + warnings("No seed is set, this will likely give none reproducible results.") } - - input_ext <- tools::file_ext(args$`input-file`) - + + # Required for reproducibility in case numeric parameters are passed (e.g.: theta, lambda) + args <- lapply(X = args, FUN = function(arg) { + if(is.numeric(x = arg)) { + if(arg %% 1 == 0) { + return (as.integer(x = arg)) + } else { + return (arg) + } + } + return (arg) + }) + + input_ext <- tools::file_ext(args$input) + if(input_ext == "h5ad") { - # Current fix until https://github.com/satijalab/seurat/issues/2485 is fixed - file <- hdf5r::h5file(filename = args$`input-file`, mode = 'r') - if(!("X_pca" %in% names(x = file[["obsm"]]))) { - stop("X_pca slot is not found in the AnnData (h5ad).") - } - obs <- file[['obs']][] - pca_embeddings <- t(x = file[["obsm"]][["X_pca"]][,]) - row.names(x = pca_embeddings) <- obs$index - colnames(x = pca_embeddings) <- paste0("PCA_", seq(from = 1, to = ncol(x = pca_embeddings))) - metadata <- obs - # seurat <- Seurat::ReadH5AD(file = args$`input-file`) - # if(!("pca" %in% names(seurat@reductions)) || is.null(x = seurat@reductions$pca)) - # stop("Expects a PCA embeddings data matrix but it does not exist.") - # data <- seurat@reductions$pca - # pca_embeddings <- data@cell.embeddings - # metadata <- seurat@meta.data + adata <- anndata::read_h5ad(filename = args$input) + if(!("X_pca" %in% names(x = adata$obsm))) { + stop("X_pca slot is not found in the AnnData (h5ad).") + } + obs <- adata$obs + pca_embeddings <- adata$obsm[["X_pca"]] + row.names(x = pca_embeddings) <- row.names(x = obs) + colnames(x = pca_embeddings) <- paste0("PCA_", seq(from = 1, to = ncol(x = pca_embeddings))) + metadata <- obs } else { - stop(paste0("Unrecognized input file format: ", input_ext, ".")) + stop(paste0("Unrecognized input file format: ", input_ext, ".")) } - - print(paste0("PCA embeddings matrix has ", dim(x = data)[1], " rows, ", dim(x = data)[2], " columns.")) - - if(sum(args$`vars-use` %in% colnames(x = metadata)) != length(x = args$`vars-use`)) { + + print(paste0("PCA embeddings matrix has ", dim(x = pca_embeddings)[1], " rows, ", dim(x = pca_embeddings)[2], " columns.")) + + if(sum(args$vars_use %in% colnames(x = metadata)) != length(x = args$vars_use)) { stop("Some argument value from the parameter(s) --vars-use are not found in the metadata.") } - + + print(paste0("Batch variables used for integration: ", paste0(args$vars_use, collapse=", "))) + # Run Harmony # Expects PCA matrix (Cells as rows and PCs as columns.) harmony_embeddings <- harmony::HarmonyMatrix( - data_mat = pca_embeddings - , meta_data = metadata - , vars_use = args$`vars-use` - , do_pca = args$`do-pca` - , verbose = FALSE + data_mat = pca_embeddings, + meta_data = metadata, + vars_use = args$vars_use, + do_pca = args$do_pca, + theta = args$theta, + lambda = args$lambda, + epsilon.harmony = args$epsilon_harmony, + verbose = FALSE ) - + # Save the results - ## PCA corrected embeddings - write.table( x = harmony_embeddings, - file = paste0(args$`output-prefix`, ".tsv"), + file = paste0(args$output_prefix, ".tsv"), quote = FALSE, sep = "\t", row.names = TRUE, @@ -228,7 +294,6 @@ Steps: ) - #. Create the Nextflow process that will run the Harmony R script defined in the previous step. .. code:: groovy @@ -238,35 +303,40 @@ Steps: binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/harmony/bin/" : "" process SC__HARMONY__HARMONY_MATRIX { - + container params.tools.harmony.container publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' - clusterOptions "-l nodes=1:ppn=${params.global.threads} -l walltime=1:00:00 -A ${params.global.qsubaccount}" - + label 'compute_resources__default' + input: - tuple val(sampleId), path(f) - + tuple \ + val(sampleId), \ + path(f) + output: - tuple val(sampleId), path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") - + tuple \ + val(sampleId), \ + path("${sampleId}.SC__HARMONY__HARMONY_MATRIX.tsv") + script: def sampleParams = params.parseConfig(sampleId, params.global, params.tools.harmony) processParams = sampleParams.local varsUseAsArguments = processParams.varsUse.collect({ '--vars-use' + ' ' + it }).join(' ') """ ${binDir}run_harmony.R \ + ${f} \ --seed ${params.global.seed} \ - --input-file ${f} \ ${varsUseAsArguments} \ + ${processParams?.theta ? "--theta "+ processParams.theta : "" } \ + ${processParams?.lambda ? "--lambda "+ processParams.lambda : "" } \ + ${processParams?.epsilonHarmony ? "--epsilon-harmony "+ processParams.epsilonHarmony : "" } \ --output-prefix "${sampleId}.SC__HARMONY__HARMONY_MATRIX" """ - + } -#. Create a Nextflow "subworkflow" that will call the Nextflow process defined in the previous step and perform some other tasks (dimensionality reduction, cluster identification, marker genes identification and report generation) - - This step is not required. However it this step is skipped, the code would still need to added into the main ``harmony`` workflow (`workflows/harmony.nf`, see the next step) +#. Create a Nextflow "subworkflow" that will call the Nextflow process defined in the previous step and perform some other tasks (dimensionality reduction, cluster identification, marker genes identification and report generation). This step is not required. However it is skipped, the code would still need to added into the main ``harmony`` workflow (`workflows/harmony.nf`, see the next step) .. code:: groovy @@ -286,7 +356,7 @@ Steps: PUBLISH as PUBLISH_BEC_DIMRED_OUTPUT; PUBLISH as PUBLISH_FINAL_HARMONY_OUTPUT; } from "../../utils/workflows/utils.nf" params(params) - + include { SC__HARMONY__HARMONY_MATRIX; } from './../processes/runHarmony.nf' params(params) @@ -305,23 +375,23 @@ Steps: include { CLUSTER_IDENTIFICATION; } from './../../scanpy/workflows/cluster_identification.nf' params(params) // Don't only import a specific process (the function needs also to be imported) - + // reporting: include { GENERATE_DUAL_INPUT_REPORT } from './../../scanpy/workflows/create_report.nf' params(params) - + ////////////////////////////////////////////////////// // Define the workflow - + workflow BEC_HARMONY { - + take: normalizedTransformedData dimReductionData // Expects (sampleId, anndata) clusterIdentificationPreBatchEffectCorrection - + main: // Run Harmony harmony_embeddings = SC__HARMONY__HARMONY_MATRIX( @@ -334,15 +404,18 @@ Steps: it -> tuple(it[0], it[1]) }.join(harmony_embeddings) ) - + PUBLISH_BEC_OUTPUT( - SC__H5AD_UPDATE_X_PCA.out, + SC__H5AD_UPDATE_X_PCA.out.map { + // if stashedParams not there, just put null 3rd arg + it -> tuple(it[0], it[1], it.size() > 2 ? it[2]: null) + }, "BEC_HARMONY.output", "h5ad", null, false ) - + NEIGHBORHOOD_GRAPH( SC__H5AD_UPDATE_X_PCA.out.join( dimReductionData.map { @@ -350,10 +423,10 @@ Steps: } ) ) - + // Run dimensionality reduction DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) - + PUBLISH_BEC_DIMRED_OUTPUT( DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "BEC_HARMONY.dimred_output", @@ -361,7 +434,7 @@ Steps: null, false ) - + // Run clustering // Define the parameters for clustering def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) @@ -370,7 +443,7 @@ Steps: DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "Post Batch Effect Correction (Harmony)" ) - + marker_genes = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> tuple( it[0], // sampleId @@ -378,7 +451,7 @@ Steps: !clusteringParams.isParameterExplorationModeOn() ? null : it[2..(it.size()-1)], // Stash params ) } - + PUBLISH_FINAL_HARMONY_OUTPUT( marker_genes.map { it -> tuple(it[0], it[1], it[2]) @@ -404,19 +477,19 @@ Steps: "SC_BEC_HARMONY_report", clusteringParams.isParameterExplorationModeOn() ) - + emit: data = CLUSTER_IDENTIFICATION.out.marker_genes cluster_report = CLUSTER_IDENTIFICATION.out.report harmony_report - + } -#. In the ``vsn-pipelines``, create a new main workflow called ``harmony.nf`` under ``workflows/``: +#. In the ``vsn-pipelines``, create a new main workflow called ``harmony.nf`` under ``workflows/`` .. code:: groovy - nextflow.preview.dsl=2 + nextflow.enable.dsl=2 //////////////////////////////////////////////////////// // Import sub-workflows/processes from the utils module: @@ -426,74 +499,81 @@ Steps: include { clean; SC__FILE_CONVERTER; - SC__FILE_CONCATENATOR + SC__FILE_CONCATENATOR; } from '../src/utils/processes/utils.nf' params(params) include { - COMBINE_BY_PARAMS + COMBINE_BY_PARAMS; } from '../src/utils/workflows/utils.nf' params(params) include { - SC__H5AD_TO_FILTERED_LOOM - } from '../src/utils/processes/h5adToLoom.nf' params(params) + FINALIZE; + } from '../src/utils/workflows/finalize.nf' params(params) include { - FILE_CONVERTER - } from '../src/utils/workflows/fileConverter.nf' params(params) + FILTER_AND_ANNOTATE_AND_CLEAN; + } from '../src/utils/workflows/filterAnnotateClean.nf' params(params) include { - UTILS__GENERATE_WORKFLOW_CONFIG_REPORT + UTILS__GENERATE_WORKFLOW_CONFIG_REPORT; } from '../src/utils/processes/reports.nf' params(params) - + //////////////////////////////////////////////////////// // Import sub-workflows/processes from the tool module: include { - QC_FILTER + QC_FILTER; } from '../src/scanpy/workflows/qc_filter.nf' params(params) include { - NORMALIZE_TRANSFORM + NORMALIZE_TRANSFORM; } from '../src/scanpy/workflows/normalize_transform.nf' params(params) include { - HVG_SELECTION + HVG_SELECTION; } from '../src/scanpy/workflows/hvg_selection.nf' params(params) include { - NEIGHBORHOOD_GRAPH + NEIGHBORHOOD_GRAPH; } from '../src/scanpy/workflows/neighborhood_graph.nf' params(params) include { - DIM_REDUCTION_PCA + DIM_REDUCTION_PCA; } from '../src/scanpy/workflows/dim_reduction_pca.nf' params(params) include { - DIM_REDUCTION_TSNE_UMAP + DIM_REDUCTION_TSNE_UMAP; } from '../src/scanpy/workflows/dim_reduction.nf' params(params) // cluster identification include { - SC__SCANPY__CLUSTERING_PARAMS + SC__SCANPY__CLUSTERING_PARAMS; } from '../src/scanpy/processes/cluster.nf' params(params) include { - CLUSTER_IDENTIFICATION + CLUSTER_IDENTIFICATION; } from '../src/scanpy/workflows/cluster_identification.nf' params(params) include { - BEC_HARMONY + BEC_HARMONY; } from '../src/harmony/workflows/bec_harmony.nf' params(params) + include { + SC__DIRECTS__SELECT_DEFAULT_CLUSTERING + } from '../src/directs/processes/selectDefaultClustering.nf' // reporting: include { - SC__SCANPY__MERGE_REPORTS + SC__SCANPY__MERGE_REPORTS; } from '../src/scanpy/processes/reports.nf' params(params) include { - SC__SCANPY__REPORT_TO_HTML + SC__SCANPY__REPORT_TO_HTML; } from '../src/scanpy/processes/reports.nf' params(params) - - + + workflow harmony { - + take: data - + main: + // Data processing + // To avoid variable 'params' already defined in the process scope + def scanpyParams = params.tools.scanpy + out = data | \ SC__FILE_CONVERTER | \ FILTER_AND_ANNOTATE_AND_CLEAN - - if(params.tools.scanpy.containsKey("filter")) { + + if(scanpyParams.containsKey("filter")) { out = QC_FILTER( out ).filtered // Remove concat } - if(params.utils.file_concatenator) { + if(params.utils?.file_concatenator) { out = SC__FILE_CONCATENATOR( out.map { it -> it[1] @@ -502,21 +582,21 @@ Steps: ) ) } - if(params.tools.scanpy.containsKey("data_transformation") && params.tools.scanpy.containsKey("normalization")) { + if(scanpyParams.containsKey("data_transformation") && scanpyParams.containsKey("normalization")) { out = NORMALIZE_TRANSFORM( out ) } out = HVG_SELECTION( out ) - DIM_REDUCTION_PCA( out ) + DIM_REDUCTION_PCA( out.scaled ) NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) - + // Perform the clustering step w/o batch effect correction (for comparison matter) clusterIdentificationPreBatchEffectCorrection = CLUSTER_IDENTIFICATION( NORMALIZE_TRANSFORM.out, DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "Pre Batch Effect Correction" ) - + // Perform the batch effect correction BEC_HARMONY( NORMALIZE_TRANSFORM.out, @@ -525,34 +605,31 @@ Steps: clusterIdentificationPreBatchEffectCorrection.marker_genes ) - // Conversion - // Convert h5ad to X (here we choose: loom format) - if(params.utils?.file_concatenator) { - filteredloom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONCATENATOR.out ) - scopeloom = FILE_CONVERTER( - BEC_HARMONY.out.data.groupTuple(), - 'HARMONY.final_output', - 'loom', - SC__FILE_CONCATENATOR.out - ) + // Finalize + FINALIZE( + params.utils?.file_concatenator ? SC__FILE_CONCATENATOR.out : SC__FILE_CONVERTER.out, + BEC_HARMONY.out.data, + 'HARMONY.final_output' + ) + + // Define the parameters for clustering + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(scanpyParams.clustering) ) + + // Select a default clustering when in parameter exploration mode + if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { + scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( FINALIZE.out.scopeloom ) } else { - filteredloom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONVERTER.out ) - scopeloom = FILE_CONVERTER( - BEC_HARMONY.out.data.groupTuple(), - 'HARMONY.final_output', - 'loom', - SC__FILE_CONVERTER.out - ) + scopeloom = FINALIZE.out.scopeloom } + + // Reporting project = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) - + // Collect the reports: - // Define the parameters for clustering - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) // Pairing clustering reports with bec reports if(!clusteringParams.isParameterExplorationModeOn()) { clusteringBECReports = BEC_HARMONY.out.cluster_report.map { @@ -586,23 +663,22 @@ Steps: ).map { it -> tuple(it[0], it[1..it.size()-2], it[it.size()-1]) } - - // reporting: + SC__SCANPY__MERGE_REPORTS( ipynbs, "merged_report", clusteringParams.isParameterExplorationModeOn() ) SC__SCANPY__REPORT_TO_HTML(SC__SCANPY__MERGE_REPORTS.out) - + emit: - filteredloom - scopeloom - + filteredloom = FINALIZE.out.filteredloom + scopeloom = scopeloom + scanpyh5ad = FINALIZE.out.scanpyh5ad + } - #. Add a new Nextflow profile in the ``profiles`` section of the main ``nextflow.config`` of the ``vsn-pipelines`` repository: .. code:: groovy @@ -616,7 +692,7 @@ Steps: ... } -#. Finally add a new entry in ``main.nf`` of the ``vsn-pipelines`` repository +#. Finally add a new entry in ``main.nf`` of the ``vsn-pipelines`` repository. .. code:: groovy @@ -641,14 +717,15 @@ Steps: } - You should now be able to configure (``nextflow config ...``) and run the ``harmony`` pipeline (``nextflow run ...``). + +#. You should now be able to configure (``nextflow config ...``) and run the ``harmony`` pipeline (``nextflow run ...``). #. After confirming that your module is functional, you should create a pull request to merge your changes into the ``develop`` branch. - - Make sure you have removed all references to ``TEMPLATE`` in your repository - - Include some basic documentation for your module so people know what it does and how to use it. +- Make sure you have removed all references to ``TEMPLATE`` in your repository +- Include some basic documentation for your module so people know what it does and how to use it. - The pull request will be reviewed and accepted once it is confirmed to be working. Once the ``develop`` branch is merged into ``master``, the new tool will be part of the new release of VSN Pipelines! +The pull request will be reviewed and accepted once it is confirmed to be working. Once the ``develop`` branch is merged into ``master``, the new tool will be part of the new release of VSN Pipelines! Repository structure -------------------- @@ -739,7 +816,7 @@ Entire **sub-workflows** can also be imported in other workflows with one comman include CELLRANGER from '../cellranger/main.nf' params(params) -This leads to the ability to easily define **high-level workflows** in the master nf file: ``vib-singlecell-nf/vsn-pipelines/main.nf``: +This leads to the ability to easily define **high-level workflows** in the master Nextflow file: ``vib-singlecell-nf/vsn-pipelines/main.nf``: .. code:: groovy diff --git a/docs/features.rst b/docs/features.rst index f2417512..5680e924 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -7,8 +7,7 @@ Two-pass strategy Typically, cell- and gene-level filtering is one of the first steps performed in the analysis pipelines. This usually results in the pipeline being run in two passes. In the **first pass**, the default filters are applied (which are probably not valid for new datasets), and a separate QC report is generated for each sample. -These QC reports can be inspected and the filters can be adjusted in the config file -either for all samples (by editing the ``params.tools.scanpy.filter`` settings directly, or for individual samples by using the strategy described in multi-sample parameters. +These QC reports can be inspected and the filters can be adjusted in the config file either for all samples (by editing the ``params.tools.scanpy.filter`` settings directly, or for individual samples by using the strategy described in multi-sample parameters. Then, the **second pass** restarts the pipeline with the correct filtering parameters applied (use ``nextflow run ... -resume`` to skip already completed steps). Other notes @@ -53,7 +52,7 @@ This will add a different scenic entry in the config: params { tools { scenic { - container = 'vibsinglecellnf/scenic:0.9.19' + container = 'vibsinglecellnf/scenic:0.11.2' report_ipynb = '/src/scenic/bin/reports/scenic_report.ipynb' existingScenicLoom = '' sampleSuffixWithExtension = '' // Suffix after the sample name in the file path @@ -124,7 +123,7 @@ When generating the config using ``nextflow config`` (see above), add the ``pcac Remarks: -- Make sure ``nComps`` config parameter (under ``dim_reduction`` > ``pca``) is not set. +- Make sure ``nComps`` config parameter (under ``dim_reduction.pca``) is not set. - If ``nPcs`` is not set for t-SNE or UMAP config entries, then all the PCs from the PCA will be used in the computation. Currently, only the Scanpy related pipelines have this feature implemented. @@ -137,7 +136,7 @@ Cell-based metadata annotation There are 2 ways of using this feature: either when running an end-to-end pipeline (e.g.: ``single_sample``, ``harmony``, ``bbknn``, ...) or on its own as a independent workflow. -Part of an and-to-end pipeline +Part of an end-to-end pipeline ****************************** The profile ``utils_cell_annotate`` should be added along with the other profiles when generating the main config using the ``nextflow config`` command. @@ -463,7 +462,8 @@ The following command, will create a Nextflow config which the pipeline will und nextflow config \ ~/vib-singlecell-nf/vsn-pipelines \ - -profile min,[data-profile],scanpy_data_transformation,scanpy_normalization,[...],singularity > nextflow.config + -profile min,[data-profile],scanpy_data_transformation,scanpy_normalization,[...],singularity \ + > nextflow.config - ``[data-profile]``: Can be one of the different possible data profiles e.g.: ``h5ad`` - ``[...]``: Can be other profiles like ``bbknn``, ``harmony``, ``pcacv``, ... diff --git a/docs/getting-started.rst b/docs/getting-started.rst index 1244f4e3..b3c7877a 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -23,7 +23,7 @@ Make sure you have the following software installed, - Nextflow_ - - Currently VSN-Pipelines requires Nextflow version ``20.10.0`` or higher. + - Currently VSN-Pipelines requires Nextflow version ``21.04.03`` or higher. - A container system, either of: @@ -31,7 +31,7 @@ Make sure you have the following software installed, - Singularity_ **NOTE**: Due to licensing restrictions, to use the cellranger components of VSN you must build and/or provide a container with ``cellranger`` and ``bcl2fastq2`` installed yourself. -A sample ``Dockerfile`` can be found in ``./src/cellranger/``, you must download bcl2fastq2 from the Illumina_ website, and cellranger from the `10x Genomics`_ website yourself to build this container. +A sample ``Dockerfile`` can be found in ``./src/cellranger/``, you must download ``bcl2fastq2`` from the Illumina_ website, and cellranger from the `10x Genomics`_ website yourself to build this container. .. _Nextflow: https://www.nextflow.io/ .. _Docker: https://docs.docker.com/ @@ -67,58 +67,53 @@ Example Output .. code:: shell - $ nextflow -C nextflow.config run $VSN -entry single_sample - N E X T F L O W ~ version 20.10.0 - Launching `/staging/leuven/stg_00002/lcb/dwmax/documents/aertslab/GitHub/vib-singlecell-nf/vsn-pipelines/main.nf` [silly_pare] - revision: 77be3ba59d - WARN: DSL 2 IS AN EXPERIMENTAL FEATURE UNDER DEVELOPMENT -- SYNTAX MAY CHANGE IN FUTURE RELEASE - executor > local (83) - [44/e02c9e] process > single_sample:SINGLE_SAMPLE:SC__FILE_CONVERTER (1) [100%] 2 of 2 ✔ - [22/723593] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS (2) [100%] 2 of 2 ✔ - [2e/10d845] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__CELL_FILTER (2) [100%] 2 of 2 ✔ - [d6/fbe4b6] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__GENE_FILTER (2) [100%] 2 of 2 ✔ - [22/d4a31b] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT (2) [100%] 2 of 2 ✔ - [20/b43313] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 ✔ - [e3/ee3f9c] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION (2) [100%] 2 of 2 ✔ - [79/7f4e25] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:PUBLISH_H5AD_NORMALIZED:COMPRESS_HDF5 (2) [100%] 2 of 2 ✔ - [40/370971] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:PUBLISH_H5AD_NORMALIZED:SC__PUBLISH (2) [100%] 2 of 2 ✔ - [f1/aa0726] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:PUBLISH_H5AD_NORMALIZED:SC__PUBLISH_PROXY (2) [100%] 2 of 2 ✔ - [76/e42ef9] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION (2) [100%] 2 of 2 ✔ - [04/11b8b8] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES (2) [100%] 2 of 2 ✔ - [1e/e1d058] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES (2) [100%] 2 of 2 ✔ - [07/b3580a] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING (2) [100%] 2 of 2 ✔ - [b4/00bf5e] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:PUBLISH_H5AD_HVG_SCALED:COMPRESS_HDF5 (2) [100%] 2 of 2 ✔ - [8f/4d5d49] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:PUBLISH_H5AD_HVG_SCALED:SC__PUBLISH (2) [100%] 2 of 2 ✔ - [9a/3c5d0d] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:PUBLISH_H5AD_HVG_SCALED:SC__PUBLISH_PROXY (2) [100%] 2 of 2 ✔ - [dc/40cda6] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT (2) [100%] 2 of 2 ✔ - [62/9dc791] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 ✔ - [8c/ed79b8] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_PCA:SC__SCANPY__DIM_REDUCTION__PCA (2) [100%] 2 of 2 ✔ - [be/ed9c2e] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NEIGHBORHOOD_GRAPH:SC__SCANPY__NEIGHBORHOOD_GRAPH (2) [100%] 2 of 2 ✔ - [01/ec367e] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__TSNE (2) [100%] 2 of 2 ✔ - [ea/7fbf7c] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__UMAP (2) [100%] 2 of 2 ✔ - [e5/a5a70a] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT (2) [100%] 2 of 2 ✔ - [dd/b38b9b] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 ✔ - [5f/5bcb4d] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING (2) [100%] 2 of 2 ✔ - [fa/9765a9] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT (2) [100%] 2 of 2 ✔ - [aa/7b6adb] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 ✔ - [0f/82f171] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES (2) [100%] 2 of 2 ✔ - [96/04fc81] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:UTILS__GENERATE_WORKFLOW_CONFIG_REPORT [100%] 1 of 1 ✔ - [ee/7fe3fa] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:SC__SCANPY__MERGE_REPORTS (2) [100%] 2 of 2 ✔ - [6f/7cbcb5] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 ✔ - [87/7e681b] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:FINALIZE:SC__H5AD_TO_FILTERED_LOOM (2) [100%] 2 of 2 ✔ - [f0/176c0c] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:FINALIZE:FILE_CONVERTER_TO_SCOPE:SC__H5AD_TO_LOOM (1) [100%] 2 of 2 ✔ - [b3/608cde] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:FINALIZE:FILE_CONVERTER_TO_SCANPY:SC__H5AD_MERGE (2) [100%] 2 of 2 ✔ - [d1/43da78] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:PUBLISH:SC__PUBLISH_PROXY (2) [100%] 2 of 2 ✔ - [c3/6209f8] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCOPE:COMPRESS_HDF5 (2) [100%] 2 of 2 ✔ - [d5/e1a0c3] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCOPE:SC__PUBLISH (2) [100%] 2 of 2 ✔ - [4b/2e236a] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCOPE:SC__PUBLISH_PROXY (2) [100%] 2 of 2 ✔ - [87/f3f350] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCANPY:COMPRESS_HDF5 (2) [100%] 2 of 2 ✔ - [d4/2c09af] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCANPY:SC__PUBLISH (2) [100%] 2 of 2 ✔ - [da/3817b5] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCANPY:SC__PUBLISH_PROXY (2) [100%] 2 of 2 ✔ + $ nextflow -C nextflow_new_structure.config run $VSN -entry single_sample + N E X T F L O W ~ version 21.04.3 + Launching `/staging/leuven/stg_00002/lcb/dwmax/documents/aertslab/GitHub/vib-singlecell-nf/vsn-pipelines/main.nf` [loving_shockley] - revision: ba1dedbf51 + executor > local (75) + [1d/3b5a55] process > single_sample:SINGLE_SAMPLE:SC__FILE_CONVERTER (2) [100%] 2 of 2 _ + [2d/2152cf] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS (2) [100%] 2 of 2 _ + [48/bce024] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__CELL_FILTER (2) [100%] 2 of 2 _ + [60/d42cdf] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__GENE_FILTER (2) [100%] 2 of 2 _ + [4b/bb2635] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT (2) [100%] 2 of 2 _ + [64/add548] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 _ + [c6/4d8a66] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION (2) [100%] 2 of 2 _ + [01/8ba0d2] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:PUBLISH_H5AD_NORMALIZED:COMPRESS_HDF5 (2) [100%] 2 of 2 _ + [b3/ec4712] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:PUBLISH_H5AD_NORMALIZED:SC__PUBLISH (2) [100%] 2 of 2 _ + [1e/35bb2e] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION (2) [100%] 2 of 2 _ + [14/adfd7a] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FIND_HIGHLY_VARIABLE_GENES (2) [100%] 2 of 2 _ + [01/9c8a26] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__SUBSET_HIGHLY_VARIABLE_GENES (2) [100%] 2 of 2 _ + [dc/027334] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING (2) [100%] 2 of 2 _ + [8d/05ce2f] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:PUBLISH_H5AD_HVG_SCALED:COMPRESS_HDF5 (2) [100%] 2 of 2 _ + [0b/6d50b0] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:PUBLISH_H5AD_HVG_SCALED:SC__PUBLISH (2) [100%] 2 of 2 _ + [c1/f799be] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT (2) [100%] 2 of 2 _ + [c9/ae0cd9] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 _ + [a2/0a7824] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_PCA:SC__SCANPY__DIM_REDUCTION__PCA (2) [100%] 2 of 2 _ + [d6/1407b3] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:NEIGHBORHOOD_GRAPH:SC__SCANPY__NEIGHBORHOOD_GRAPH (2) [100%] 2 of 2 _ + [b7/8ab962] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__TSNE (2) [100%] 2 of 2 _ + [ee/485413] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__UMAP (2) [100%] 2 of 2 _ + [ba/2bfd23] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT (2) [100%] 2 of 2 _ + [10/a429ce] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 _ + [06/3412cd] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING (2) [100%] 2 of 2 _ + [23/3d82c4] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT (2) [100%] 2 of 2 _ + [bb/c9e11f] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 _ + [1c/2026be] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES (2) [100%] 2 of 2 _ + [57/13f0a8] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:UTILS__GENERATE_WORKFLOW_CONFIG_REPORT [100%] 1 of 1 _ + [60/8a3231] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:SC__SCANPY__MERGE_REPORTS (2) [100%] 2 of 2 _ + [cb/de1a4d] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:SC__SCANPY__REPORT_TO_HTML (2) [100%] 2 of 2 _ + [3f/265503] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:FINALIZE:SC__H5AD_TO_FILTERED_LOOM (2) [100%] 2 of 2 _ + [1f/de67e8] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:FINALIZE:FILE_CONVERTER_TO_SCOPE:SC__H5AD_TO_LOOM (2) [100%] 2 of 2 _ + [2a/10d5a2] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:FINALIZE:FILE_CONVERTER_TO_SCANPY:SC__H5AD_MERGE (2) [100%] 2 of 2 _ + [35/ce7256] process > single_sample:SINGLE_SAMPLE:SCANPY__SINGLE_SAMPLE:PUBLISH:SC__PUBLISH (2) [100%] 2 of 2 _ + [6f/1df294] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCOPE:COMPRESS_HDF5 (2) [100%] 2 of 2 _ + [90/a9e563] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCOPE:SC__PUBLISH (2) [100%] 2 of 2 _ + [23/d62b2e] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCANPY:COMPRESS_HDF5 (2) [100%] 2 of 2 _ + [d0/5834be] process > single_sample:PUBLISH_SINGLE_SAMPLE_SCANPY:SC__PUBLISH (1) [100%] 2 of 2 _ WARN: To render the execution DAG in the required format it is required to install Graphviz -- See http://www.graphviz.org for more info. - Completed at: 12-Nov-2020 10:55:52 - Duration : 2m 36s - CPU hours : 0.6 - Succeeded : 83 + Completed at: 26-Aug-2021 15:41:37 + Duration : 2m 29s + CPU hours : 0.4 + Succeeded : 75 Output diff --git a/docs/pipelines.rst b/docs/pipelines.rst index c2749642..ce14cbb9 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -620,10 +620,10 @@ First, generate the config : NOTES: +- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.tools.sratoolkit.maxSize`` accordingly. This limit can be 'removed' by setting the parameter to an arbitrarily high number (e.g.: 9999999999999). - If you're a VSC user, you might want to add the ``vsc`` profile. - The final output (FASTQ files) will available in ``out/data/sra`` - If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.tools.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. -- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.tools.sratoolkit.maxSize`` accordingly. This limit can be 'removed' by setting the parameter to an arbitrarily high number (e.g.: 9999999999999). Now we can run it with the following command: @@ -634,8 +634,8 @@ Now we can run it with the following command: -entry sra $ nextflow -C nextflow.config run ~/vib-singlecell-nf/vsn-pipelines -entry sra - N E X T F L O W ~ version 20.11.0-edge - Launching `~/vib-singlecell-nf/vsn-pipelines/main.nf` [sleepy_goldstine] - revision: 7527661b07 + N E X T F L O W ~ version 21.04.3 + Launching `~/vib-singlecell-nf/vsn-pipelines/main.nf` [sleepy_goldstine] - revision: ba1dedbf51 executor > local (23) [12/25b9d4] process > sra:DOWNLOAD_FROM_SRA:SRA_TO_METADATA (1) [100%] 1 of 1 _ [e2/d5a429] process > sra:DOWNLOAD_FROM_SRA:SRATOOLKIT__DOWNLOAD_FASTQS:DOWNLOAD_FASTQS_FROM_SRA_ACC_ID (4) [ 33%] 3 of 9 From 29916881ef93a65d4e6911e3383a7f5b5f6654a0 Mon Sep 17 00:00:00 2001 From: dweemx Date: Thu, 26 Aug 2021 16:06:10 +0200 Subject: [PATCH 195/202] Bump version from 0.26.1 to 0.27.0 Update Nextflow version from 20.10.0 to 21.04.3 --- .github/ISSUE_TEMPLATE/bug_report.md | 4 +- .github/workflows/bbknn.yml | 38 +++++++++--------- .github/workflows/bbknn_scenic.yml | 38 +++++++++--------- .github/workflows/cell_annotate_filter.yml | 33 ++++++++------- .github/workflows/decontx.yml | 31 +++++++------- .github/workflows/harmony.yml | 37 +++++++++-------- .github/workflows/harmony_scenic.yml | 40 +++++++++---------- .github/workflows/mnncorrect.yml | 37 +++++++++-------- .github/workflows/scenic.yml | 24 +++++------ .github/workflows/scenic_multiruns.yml | 25 +++++------- .github/workflows/single_sample.yml | 32 +++++++-------- .github/workflows/single_sample_decontx.yml | 32 +++++++-------- .../single_sample_decontx_scrublet.yml | 32 +++++++-------- .../single_sample_param_exploration.yml | 32 +++++++-------- .github/workflows/single_sample_scenic.yml | 32 +++++++-------- .../single_sample_scenic_multiruns.yml | 32 +++++++-------- .github/workflows/single_sample_scrublet.yml | 32 +++++++-------- README.rst | 2 +- VERSION | 2 +- nextflow.config | 4 +- 20 files changed, 255 insertions(+), 284 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 137442cd..d71c99cb 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -38,8 +38,8 @@ If applicable, add screenshots to help explain your problem. **Please complete the following information:** - OS: [e.g. Ubuntu] - - Nextflow Version: [e.g. 20.10.0] - - vsn-pipelines Version: [e.g. 0.26.1] + - Nextflow Version: [e.g. 21.04.3] + - vsn-pipelines Version: [e.g. 0.27.0] **Additional context** Add any other context about the problem here. diff --git a/.github/workflows/bbknn.yml b/.github/workflows/bbknn.yml index b0d22503..af439a5d 100644 --- a/.github/workflows/bbknn.yml +++ b/.github/workflows/bbknn.yml @@ -1,6 +1,6 @@ name: bbknn -on: +on: push: branches: - master @@ -12,25 +12,23 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - mkdir testdata - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xzvf sample_data_tiny.tar.gz - cp -r sample_data testdata/sample1 - mv sample_data testdata/sample2 - - name: Run single_sample test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile bbknn,test__bbknn,test__compute_resources,docker -entry bbknn -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + mkdir testdata + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz + cp -r sample_data testdata/sample1 + mv sample_data testdata/sample2 + - name: Run single_sample test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile bbknn,test__bbknn,test__compute_resources,docker -entry bbknn -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/bbknn_scenic.yml b/.github/workflows/bbknn_scenic.yml index e5afbaa2..bccdf77b 100644 --- a/.github/workflows/bbknn_scenic.yml +++ b/.github/workflows/bbknn_scenic.yml @@ -1,6 +1,6 @@ name: bbknn_scenic -on: +on: push: branches: - master @@ -12,25 +12,23 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - mkdir testdata - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xzvf sample_data_small.tar.gz - cp -r sample_data testdata/sample1 - mv sample_data testdata/sample2 - - name: Run bbknn_scenic test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile bbknn_scenic,test__bbknn_scenic,test__compute_resources,docker -entry bbknn_scenic -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + mkdir testdata + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz + cp -r sample_data testdata/sample1 + mv sample_data testdata/sample2 + - name: Run bbknn_scenic test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile bbknn_scenic,test__bbknn_scenic,test__compute_resources,docker -entry bbknn_scenic -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/cell_annotate_filter.yml b/.github/workflows/cell_annotate_filter.yml index 7b5e32bb..8688cb83 100644 --- a/.github/workflows/cell_annotate_filter.yml +++ b/.github/workflows/cell_annotate_filter.yml @@ -1,6 +1,6 @@ name: cell_annotate_filter -on: +on: push: branches: - master @@ -12,22 +12,21 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data & dummy annotation - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xzvf sample_data_tiny.tar.gz - wget https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/develop/data/sample_data_tiny/sample_data_tiny_dummy_annotation.tsv.gz - - name: Run cell_annotate_filter test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile utils_cell_annotate_filter,test__cell_annotate_filter,test__compute_resources,docker -entry cell_annotate_filter -ansi-log false - cat .nextflow.log + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data & dummy annotation + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz + wget https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/develop/data/sample_data_tiny/sample_data_tiny_dummy_annotation.tsv.gz + - name: Run cell_annotate_filter test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile utils_cell_annotate_filter,test__cell_annotate_filter,test__compute_resources,docker -entry cell_annotate_filter -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/decontx.yml b/.github/workflows/decontx.yml index f60d3ea8..ffdb1165 100644 --- a/.github/workflows/decontx.yml +++ b/.github/workflows/decontx.yml @@ -1,6 +1,6 @@ name: decontx -on: +on: push: branches: - master @@ -12,21 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xzvf sample_data.tar.gz - - name: Run decontx test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile decontx,test__decontx,test__compute_resources,docker -entry decontx -ansi-log false - cat .nextflow.log + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz + tar xzvf sample_data.tar.gz + - name: Run decontx test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile decontx,test__decontx,test__compute_resources,docker -entry decontx -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/harmony.yml b/.github/workflows/harmony.yml index 3ec15d66..c08a2ab3 100644 --- a/.github/workflows/harmony.yml +++ b/.github/workflows/harmony.yml @@ -1,6 +1,6 @@ name: harmony -on: +on: push: branches: - master @@ -12,24 +12,23 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - mkdir testdata - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xzvf sample_data_tiny.tar.gz - cp -r sample_data testdata/sample1 - mv sample_data testdata/sample2 - - name: Run single_sample test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile harmony,test__harmony,test__compute_resources,docker -entry harmony -ansi-log false - cat .nextflow.log + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + mkdir testdata + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz + cp -r sample_data testdata/sample1 + mv sample_data testdata/sample2 + - name: Run single_sample test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile harmony,test__harmony,test__compute_resources,docker -entry harmony -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/harmony_scenic.yml b/.github/workflows/harmony_scenic.yml index 21afeded..587a735b 100644 --- a/.github/workflows/harmony_scenic.yml +++ b/.github/workflows/harmony_scenic.yml @@ -1,6 +1,6 @@ name: harmony_scenic -on: +on: push: branches: - master @@ -12,26 +12,24 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - echo 0 | sudo tee /proc/sys/fs/protected_hardlinks - - name: Get sample data - run: | - mkdir testdata - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xzvf sample_data_small.tar.gz - cp -r sample_data testdata/sample1 - mv sample_data testdata/sample2 - - name: Run harmony_scenic test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile harmony_scenic,test__harmony_scenic,test__compute_resources,docker -entry harmony_scenic -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + echo 0 | sudo tee /proc/sys/fs/protected_hardlinks + - name: Get sample data + run: | + mkdir testdata + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz + cp -r sample_data testdata/sample1 + mv sample_data testdata/sample2 + - name: Run harmony_scenic test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile harmony_scenic,test__harmony_scenic,test__compute_resources,docker -entry harmony_scenic -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/mnncorrect.yml b/.github/workflows/mnncorrect.yml index 9a584f99..03b400a4 100644 --- a/.github/workflows/mnncorrect.yml +++ b/.github/workflows/mnncorrect.yml @@ -1,6 +1,6 @@ name: mnncorrect -on: +on: push: branches: - master @@ -12,24 +12,23 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - mkdir testdata - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xzvf sample_data.tar.gz - cp -r sample_data testdata/sample1 - mv sample_data testdata/sample2 - - name: Run single_sample test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile mnncorrect,test__mnncorrect,test__compute_resources,docker -entry mnncorrect -ansi-log false - cat .nextflow.log + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + mkdir testdata + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz + tar xzvf sample_data.tar.gz + cp -r sample_data testdata/sample1 + mv sample_data testdata/sample2 + - name: Run single_sample test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile mnncorrect,test__mnncorrect,test__compute_resources,docker -entry mnncorrect -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/scenic.yml b/.github/workflows/scenic.yml index b26e9131..22a84eeb 100644 --- a/.github/workflows/scenic.yml +++ b/.github/workflows/scenic.yml @@ -1,6 +1,6 @@ name: scenic -on: +on: push: branches: - master @@ -12,18 +12,16 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run scenic test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile scenic,test__scenic,test__compute_resources,docker -entry scenic -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Run scenic test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile scenic,test__scenic,test__compute_resources,docker -entry scenic -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/scenic_multiruns.yml b/.github/workflows/scenic_multiruns.yml index 764924f0..c735fb7b 100644 --- a/.github/workflows/scenic_multiruns.yml +++ b/.github/workflows/scenic_multiruns.yml @@ -1,6 +1,6 @@ name: scenic_multiruns -on: +on: push: branches: - master @@ -12,19 +12,16 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run scenic test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile scenic_multiruns,test__scenic_multiruns,test__compute_resources,docker -entry scenic -ansi-log false - cat .nextflow.log - - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Run scenic test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile scenic_multiruns,test__scenic_multiruns,test__compute_resources,docker -entry scenic -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample.yml b/.github/workflows/single_sample.yml index 901f1047..37adaa33 100644 --- a/.github/workflows/single_sample.yml +++ b/.github/workflows/single_sample.yml @@ -1,6 +1,6 @@ name: single_sample -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xzvf sample_data_tiny.tar.gz - - name: Run single_sample test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample,test__single_sample,test__compute_resources,docker -entry single_sample -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz + - name: Run single_sample test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample,test__single_sample,test__compute_resources,docker -entry single_sample -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample_decontx.yml b/.github/workflows/single_sample_decontx.yml index 675f5ed3..138c1db5 100644 --- a/.github/workflows/single_sample_decontx.yml +++ b/.github/workflows/single_sample_decontx.yml @@ -1,6 +1,6 @@ name: single_sample_decontx -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xzvf sample_data.tar.gz - - name: Run single_sample_decontx test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample_decontx_correct,test__single_sample_decontx_correct,test__compute_resources,docker -entry single_sample_decontx -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz + tar xzvf sample_data.tar.gz + - name: Run single_sample_decontx test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample_decontx_correct,test__single_sample_decontx_correct,test__compute_resources,docker -entry single_sample_decontx -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample_decontx_scrublet.yml b/.github/workflows/single_sample_decontx_scrublet.yml index 1cfe4aaf..380e6622 100644 --- a/.github/workflows/single_sample_decontx_scrublet.yml +++ b/.github/workflows/single_sample_decontx_scrublet.yml @@ -1,6 +1,6 @@ name: single_sample_decontx_scrublet -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xzvf sample_data.tar.gz - - name: Run single_sample_decontx_scrublet test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample_decontx_correct_scrublet,test__single_sample_decontx_correct_scrublet,test__compute_resources,docker -entry single_sample_decontx_scrublet -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz + tar xzvf sample_data.tar.gz + - name: Run single_sample_decontx_scrublet test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample_decontx_correct_scrublet,test__single_sample_decontx_correct_scrublet,test__compute_resources,docker -entry single_sample_decontx_scrublet -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample_param_exploration.yml b/.github/workflows/single_sample_param_exploration.yml index 216c79e2..d0afd13e 100644 --- a/.github/workflows/single_sample_param_exploration.yml +++ b/.github/workflows/single_sample_param_exploration.yml @@ -1,6 +1,6 @@ name: single_sample_param_exploration -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xzvf sample_data_tiny.tar.gz - - name: Run single_sample_param_exploration test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample,test__single_sample_param_exploration,test__compute_resources,docker -entry single_sample -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz + - name: Run single_sample_param_exploration test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample,test__single_sample_param_exploration,test__compute_resources,docker -entry single_sample -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample_scenic.yml b/.github/workflows/single_sample_scenic.yml index 006d1b98..ee7bd55b 100644 --- a/.github/workflows/single_sample_scenic.yml +++ b/.github/workflows/single_sample_scenic.yml @@ -1,6 +1,6 @@ name: single_sample_scenic -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xzvf sample_data_small.tar.gz - - name: Run single_sample_scenic test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scenic,test__single_sample_scenic,test__compute_resources,docker -entry single_sample_scenic -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz + - name: Run single_sample_scenic test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scenic,test__single_sample_scenic,test__compute_resources,docker -entry single_sample_scenic -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample_scenic_multiruns.yml b/.github/workflows/single_sample_scenic_multiruns.yml index 72b546ae..d3a43f77 100644 --- a/.github/workflows/single_sample_scenic_multiruns.yml +++ b/.github/workflows/single_sample_scenic_multiruns.yml @@ -1,6 +1,6 @@ name: single_sample_scenic_multiruns -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xzvf sample_data_small.tar.gz - - name: Run single_sample_scenic test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scenic,scenic_multiruns,test__single_sample_scenic_multiruns,test__compute_resources,docker -entry single_sample_scenic -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz + - name: Run single_sample_scenic test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scenic,scenic_multiruns,test__single_sample_scenic_multiruns,test__compute_resources,docker -entry single_sample_scenic -ansi-log false + cat .nextflow.log diff --git a/.github/workflows/single_sample_scrublet.yml b/.github/workflows/single_sample_scrublet.yml index 423db3cf..0bc40d8a 100644 --- a/.github/workflows/single_sample_scrublet.yml +++ b/.github/workflows/single_sample_scrublet.yml @@ -1,6 +1,6 @@ name: single_sample_scrublet -on: +on: push: branches: - master @@ -12,22 +12,20 @@ on: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - export NXF_VER='20.10.0' - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Get sample data - run: | - wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xzvf sample_data.tar.gz - - name: Run single_sample_scrublet test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scrublet,test__single_sample_scrublet,test__compute_resources,docker -entry single_sample_scrublet -ansi-log false - cat .nextflow.log - + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + export NXF_VER='21.04.3' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz + tar xzvf sample_data.tar.gz + - name: Run single_sample_scrublet test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scrublet,test__single_sample_scrublet,test__compute_resources,docker -entry single_sample_scrublet -ansi-log false + cat .nextflow.log diff --git a/README.rst b/README.rst index cdd47172..bf4b4386 100644 --- a/README.rst +++ b/README.rst @@ -134,7 +134,7 @@ See `here `_ for :target: https://vsn-pipelines.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status -.. |Nextflow| image:: https://img.shields.io/badge/nextflow-20.10.0-brightgreen.svg +.. |Nextflow| image:: https://img.shields.io/badge/nextflow-21.04.3-brightgreen.svg :target: https://www.nextflow.io/ :alt: Nextflow diff --git a/VERSION b/VERSION index 9d041d8e..81566e4e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.26.1 \ No newline at end of file +0.27.0 \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 1ad01120..381bde15 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,10 +3,10 @@ manifest { name = 'vib-singlecell-nf/vsn-pipelines' description = 'A repository of pipelines for single-cell data in Nextflow DSL2' homePage = 'https://github.com/vib-singlecell-nf/vsn-pipelines' - version = '0.26.1' + version = '0.27.0' mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=20.10.0' + nextflowVersion = '!>=21.04.3' } // load these configs first: From cab90a558631f308afd7c8080479470d989565ec Mon Sep 17 00:00:00 2001 From: Kris Davie Date: Tue, 21 Sep 2021 14:08:11 +0200 Subject: [PATCH 196/202] Remove `scanpyh5ad` reference which isn't an output of `harmonly_only` --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 8baaf058..bcad0b1d 100644 --- a/main.nf +++ b/main.nf @@ -131,7 +131,7 @@ workflow harmony_only { if(params.utils?.publish) { PUBLISH_HARMONY( - HARMONY.out.scanpyh5ad, + HARMONY.out, params.utils?.publish?.annotateWithBatchVariableName ? "HARMONY" + "_BY_" + batchVariables.join("_").toUpperCase() : "HARMONY", "h5ad", null, From f2a214e5521c7dc6fa4f692989dd521b4b58ffc4 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 27 Sep 2021 11:46:41 +0200 Subject: [PATCH 197/202] Fix for bap workflow - Input data is now in the proper format [sampleId, [bam, index], ... ] --- src/bap/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bap/main.nf b/src/bap/main.nf index 540fc70c..636faf19 100644 --- a/src/bap/main.nf +++ b/src/bap/main.nf @@ -32,7 +32,7 @@ workflow BAP__BARCODE_MULTIPLET_WF { main: - bap = BARCODE_MULTIPLET_PIPELINE(bam.map { it -> tuple(it[0], it[1], it[2]) }) + bap = BARCODE_MULTIPLET_PIPELINE(bam.map { it -> tuple(it[0], it[1][0], it[1][1]) }) GENERATE_REPORT( file(workflow.projectDir + params.tools.bap.barcode_multiplet.report_ipynb), From 5e53841ab6b8dc7ae69b17f193bc4dd4f87b2cf0 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 27 Sep 2021 12:13:47 +0200 Subject: [PATCH 198/202] Update docs for new HyDrop methods --- docs/scatac-seq.rst | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/docs/scatac-seq.rst b/docs/scatac-seq.rst index 58d76843..2d1e157b 100644 --- a/docs/scatac-seq.rst +++ b/docs/scatac-seq.rst @@ -79,7 +79,12 @@ Note that the fastq file fields must be full paths; this is not shown here for c - sample_2_R2.fastq.gz - sample_4_R3.fastq.gz * - sample_5 - - hydrop + - hydrop_3x96 + - sample_5_R1.fastq.gz + - sample_5_R2.fastq.gz + - sample_5_R3.fastq.gz + * - sample_6 + - hydrop_2x384 - sample_5_R1.fastq.gz - sample_5_R2.fastq.gz - sample_5_R3.fastq.gz @@ -174,7 +179,7 @@ Special thanks here to Gert Hulselmans for expanding the capabilities of this fu printf 'Usage: create_atac_metadata sample technology fastq_prefix read_labels\n\n'; printf 'Arguments:\n'; printf ' sample: sample name\n'; - printf ' technology: "standard", "hydrop" or "biorad"\n'; + printf ' technology: "standard", "hydrop_3x96", "hydrop_2x384", or "biorad"\n'; printf ' fastq_prefix: path prefix to FASTQ files.\n'; printf ' read_labels: comma separated read labels for R1, R2 and R3 that select: R1,R2,R3.\n'; return 1; @@ -204,7 +209,8 @@ To run use the options: create_atac_metadata header > auto_metadata.tsv create_atac_metadata sample_1 standard /path/to/sample_1_subset_S R1,R2,R3 >> auto_metadata.tsv create_atac_metadata sample_2 standard /path/to/sample_2_subset_S R1,R2,R3 >> auto_metadata.tsv - create_atac_metadata sample_5 hydrop /path/to/sample_5_ R1,R2,R3 >> auto_metadata.tsv + create_atac_metadata sample_5 hydrop_3x96 /path/to/sample_5_ R1,R2,R3 >> auto_metadata.tsv + create_atac_metadata sample_6 hydrop_2x384 /path/to/sample_6_ R1,R2,R3 >> auto_metadata.tsv .. raw:: html @@ -218,7 +224,7 @@ Technology types The "technology" field in the metadata table controls two things: 1. **How technology-specific pipeline steps are applied.** - Currently there are two specific settings (``biorad`` and ``hydrop``) that use alternate pipelines processes (to extract and correct the barcode sequence from the input fastqs). + Currently there are three specific settings (``biorad``, ``hydrop_3x96``, and ``hydrop_2x384``) that use alternate pipelines processes (to extract and correct the barcode sequence from the input fastqs). Using any other keyword is allowed, and samples will be run with the standard pipeline steps (barcode correction against a whitelist). 2. **Which whitelist is used for barcode correction.** @@ -277,10 +283,10 @@ The whitelists are supplied in the params file (``params.tools.singlecelltoolkit This can be used to supply alternate whitelists for certain samples, for example if you need to supply a reverse complemented whitelist for samples run in certain sequencing machines. -``hydrop`` +``hydrop_3x96``/``hydrop_2x384`` __________ -The ``hydrop`` setting processes data generated by the HyDrop ATAC protocol +The HyDrop settings (either ``hydrop_3x96`` or ``hydrop_2x384`` depending on the library preparation used) processes data generated by the HyDrop ATAC protocol (see `hydrop.aertslab.org `_ and `the associated preprint `_). This approach differs from the standard pipeline in only the initial step, which is to extract and process the HyDrop barcodes from the sequencing output. Here, `this script `_ is used to take the R2 read from the sequencer:: @@ -299,6 +305,8 @@ and transform it into:: + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +The two HyDrop modes (``hydrop_3x96``, ``hydrop_2x384``) differ only in the way the initial barcode extraction script works. +Following this, they are processed in the same way as the standard pipeline, including whitelist-based barcode correction (note that the two HyDrop modes require different barcode whitelists to be used here). ``biorad`` __________ @@ -392,7 +400,7 @@ The important parameters to verify are: - ``params.tools.bwamaptools.bwa_fasta``: the path to the bwa reference fasta file. This should be already indexed with ``bwa index``, and the index files located in the same directory as the fasta file. Note that ``bwa`` and ``bwa-mem2`` use different indexes that are not interchangeable. - ``params.tools.singlecelltoolkit.barcode_correction.whitelist``: Whitelists for barcode correction are supplied here. - The whitelists are matched to samples based on the parameter key here ('standard', 'multiome', 'hydrop', etc.) and the technology field listed for each sample in the metadata file. + The whitelists are matched to samples based on the parameter key here ('standard', 'multiome', 'hydrop_3x96', 'hydrop_2x384', etc.) and the technology field listed for each sample in the metadata file. Barcode whitelists can (optionally) be gzipped. There are currently no checks performed to ensure that the sample barcodes have any overlap to the whitelist (the barcode correction reports should be checked for this). From 968b8ea755ce5f037af18cf2d7ad82fdecf85f05 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Mon, 27 Sep 2021 13:50:26 +0200 Subject: [PATCH 199/202] Fix for bap ipynb path --- src/bap/conf/bap_barcode_multiplet.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bap/conf/bap_barcode_multiplet.config b/src/bap/conf/bap_barcode_multiplet.config index 1d537411..1223b050 100644 --- a/src/bap/conf/bap_barcode_multiplet.config +++ b/src/bap/conf/bap_barcode_multiplet.config @@ -2,7 +2,7 @@ params { tools { bap { barcode_multiplet { - report_ipynb = './src/bap/bin/bap_barcode_multiplet_report.ipynb' + report_ipynb = '/src/bap/bin/bap_barcode_multiplet_report.ipynb' bead_tag = 'CB' // existing tag in bam file drop_tag = 'DB' // new tag with merged barcodes // filters: From 00322ce95d0dbab12dfe4156c70ae163db087554 Mon Sep 17 00:00:00 2001 From: cflerin Date: Tue, 28 Sep 2021 11:26:53 +0200 Subject: [PATCH 200/202] Added Docker build descriptions to module readme files --- src/bwamaptools/README.rst | 11 +++++++++ src/popscle/README.rst | 10 ++++++++ src/pycistopic/Dockerfile | 0 src/samtools/README.rst | 42 ++++++++++++++++++++++++++++++++ src/singlecelltoolkit/README.rst | 11 +++++++++ src/sinto/README.rst | 9 +++++++ 6 files changed, 83 insertions(+) delete mode 100644 src/pycistopic/Dockerfile create mode 100644 src/samtools/README.rst diff --git a/src/bwamaptools/README.rst b/src/bwamaptools/README.rst index 1e98029a..350b75f0 100644 --- a/src/bwamaptools/README.rst +++ b/src/bwamaptools/README.rst @@ -5,3 +5,14 @@ BWA maptools module This repository contains an implementation of BWA for VIB-SingleCell-NF (VSN) pipelines, along with several supporing tools (htslib, samtools). See `lh3/bwa `_ for the original source. +To build the Docker image +------------------------- + +Image tag format: ``-``. + +.. code:: bash + + docker build -t vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1 . + +This image uses the ``vibsinglecellnf/samtools`` image as a base. + diff --git a/src/popscle/README.rst b/src/popscle/README.rst index 6bf6d2dc..3fe02a7f 100644 --- a/src/popscle/README.rst +++ b/src/popscle/README.rst @@ -13,6 +13,16 @@ element 1 is the sampleID and element 2 is the output folder of a 10X run. Currently the workflows are fixed to the filtered matrices. +To build the Docker image +------------------------- + +Image tag format: ``-``. + +.. code:: bash + + docker build -t vibsinglecellnf/popscle:2021-05-05-da70fc7 . + +This image uses the ``vibsinglecellnf/samtools`` image as a base. Acknowledgements ---------------- diff --git a/src/pycistopic/Dockerfile b/src/pycistopic/Dockerfile deleted file mode 100644 index e69de29b..00000000 diff --git a/src/samtools/README.rst b/src/samtools/README.rst new file mode 100644 index 00000000..ad3e9f2c --- /dev/null +++ b/src/samtools/README.rst @@ -0,0 +1,42 @@ + +Samtools Docker images +====================== + +This directory contains Dockerfiles for base images used here and for other images in the VSN Pipelines repository. + + +To build the Base image +----------------------- + +This base image is based on ``debian:buster-slim`` and has a compiled verison of +`zlib-ng `_ for faster compression and decompression. + +Image tag format: simple version numbers (0.1, 0.2, ...). + +.. code:: bash + + docker build -t vibsinglecellnf/samtools:base-0.2 . -f Dockerfile.samtools-base + +This base image is used in several other images within VSN:: + +- samtools [this directory] + + +To build the Samtools image +--------------------------- + +This uses the base image above and adds Samtools and HTSlib + +Image tag format: ``-``. + +.. code:: bash + + docker build -t vibsinglecellnf/samtools:0.2-1.12 . + +This samtools image is used in several other images within VSN:: + +- singlecelltoolkit +- bwamaptools +- popscle + + diff --git a/src/singlecelltoolkit/README.rst b/src/singlecelltoolkit/README.rst index 09d10e10..0e601acf 100644 --- a/src/singlecelltoolkit/README.rst +++ b/src/singlecelltoolkit/README.rst @@ -5,3 +5,14 @@ single_cell_toolkit template This repository contains an implementation of single_cell_toolkit for VIB-SingleCell-NF (VSN) pipelines. See `aertslab/single_cell_toolkit `_ for the original source. +To build the Docker image +------------------------- + +Image tag format: ``-``. + +.. code:: bash + + docker build -t vibsinglecellnf/singlecelltoolkit:2021-07-29-09cac13 . + +This image uses the ``vibsinglecellnf/samtools`` image as a base. + diff --git a/src/sinto/README.rst b/src/sinto/README.rst index 61415ec9..c2f1cba9 100644 --- a/src/sinto/README.rst +++ b/src/sinto/README.rst @@ -5,3 +5,12 @@ Sinto module This repository contains an implementation of Sinto for VIB-SingleCell-NF (VSN) pipelines. See `timoast/sinto `_ for the original source. +To build the Docker image +------------------------- + +Image tag format: Software release version. + +.. code:: bash + + docker build -t vibsinglecellnf/sinto:0.7.3.1 . + From 3f67a944dbe4a038fdff73993819031ba2bea990 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Tue, 28 Sep 2021 16:50:44 +0200 Subject: [PATCH 201/202] Add tenx_arc input data - Take outs directory for Cell Ranger ARC, produce data channel with the '10x_arc_cellranger_mex_outs' label --- nextflow.config | 3 +++ src/channels/channels.nf | 19 +++++++++++++++++++ .../conf/tenx_arc_cellranger_mex.config | 12 ++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 src/channels/conf/tenx_arc_cellranger_mex.config diff --git a/nextflow.config b/nextflow.config index 381bde15..3626db55 100644 --- a/nextflow.config +++ b/nextflow.config @@ -279,6 +279,9 @@ profiles { tenx_h5 { includeConfig 'src/channels/conf/tenx_cellranger_h5.config' } + tenx_arc { + includeConfig'src/channels/conf/tenx_arc_cellranger_mex.config' + } h5ad { includeConfig 'src/channels/conf/h5ad.config' } diff --git a/src/channels/channels.nf b/src/channels/channels.nf index fdadad67..e9dab603 100644 --- a/src/channels/channels.nf +++ b/src/channels/channels.nf @@ -79,6 +79,25 @@ workflow getDataChannel { ) } } + if(params.data.containsKey("tenx_arc") && params.data.tenx_arc.containsKey("cellranger_mex")) { + if(isOuts(params.data.tenx_arc.cellranger_mex)) { + data = data.concat( + getTenXCellRangerOutsChannel( + params.data.tenx_arc.cellranger_mex + ).map { + it -> tuple(it[0], it[1], "10x_arc_cellranger_mex_outs", outputFileFormat, 'NULL') + } + ) + } else { + data = data.concat( + getTenXCellRangerMEXChannel( + params.data.tenx_arc.cellranger_mex + ).map { + it -> tuple(it[0], it[1], "10x_arc_cellranger_mex", outputFileFormat, 'NULL') + } + ) + } + } if(params.data.containsKey("tenx") && params.data.tenx.containsKey("cellranger_h5")) { if(isOuts(params.data.tenx.cellranger_h5)) { data = data.concat( diff --git a/src/channels/conf/tenx_arc_cellranger_mex.config b/src/channels/conf/tenx_arc_cellranger_mex.config new file mode 100644 index 00000000..b292696f --- /dev/null +++ b/src/channels/conf/tenx_arc_cellranger_mex.config @@ -0,0 +1,12 @@ +params { + data { + tenx_arc { + cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/' + } + } + tools { + file_converter { + off = 'cistopic_rds' + } + } +} From 2f9e645cb78cd965c3f7b65ffd09fa612109b2dd Mon Sep 17 00:00:00 2001 From: KrisDavie Date: Tue, 30 Nov 2021 16:52:35 +0100 Subject: [PATCH 202/202] Update clustering resolution in tests --- conf/test__bbknn.config | 4 ++++ conf/test__bbknn_scenic.config | 4 ++++ conf/test__harmony.config | 4 ++++ conf/test__harmony_scenic.config | 4 ++++ conf/test__mnncorrect.config | 4 ++++ conf/test__single_sample.config | 4 ++++ conf/test__single_sample_decontx_correct.config | 4 ++++ conf/test__single_sample_decontx_correct_scrublet.config | 4 ++++ conf/test__single_sample_decontx_filter.config | 4 ++++ conf/test__single_sample_param_exploration.config | 2 +- conf/test__single_sample_scenic.config | 4 ++++ conf/test__single_sample_scenic_multiruns.config | 4 ++++ conf/test__single_sample_scrublet.config | 4 ++++ 13 files changed, 49 insertions(+), 1 deletion(-) diff --git a/conf/test__bbknn.config b/conf/test__bbknn.config index a0812172..eda1df9a 100644 --- a/conf/test__bbknn.config +++ b/conf/test__bbknn.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } } } diff --git a/conf/test__bbknn_scenic.config b/conf/test__bbknn_scenic.config index 80ae0b68..39e8a3a4 100644 --- a/conf/test__bbknn_scenic.config +++ b/conf/test__bbknn_scenic.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } scenic { numWorkers = 2 diff --git a/conf/test__harmony.config b/conf/test__harmony.config index b3261d8d..e7a1c9e0 100644 --- a/conf/test__harmony.config +++ b/conf/test__harmony.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } } } diff --git a/conf/test__harmony_scenic.config b/conf/test__harmony_scenic.config index 143da4c8..b76f0ef4 100644 --- a/conf/test__harmony_scenic.config +++ b/conf/test__harmony_scenic.config @@ -30,6 +30,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } scenic { numWorkers = 2 diff --git a/conf/test__mnncorrect.config b/conf/test__mnncorrect.config index b4da9ca0..bb95c6ba 100644 --- a/conf/test__mnncorrect.config +++ b/conf/test__mnncorrect.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } } } diff --git a/conf/test__single_sample.config b/conf/test__single_sample.config index b6b92cfc..95682a80 100644 --- a/conf/test__single_sample.config +++ b/conf/test__single_sample.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } } } diff --git a/conf/test__single_sample_decontx_correct.config b/conf/test__single_sample_decontx_correct.config index 4f5e61b1..55c65961 100644 --- a/conf/test__single_sample_decontx_correct.config +++ b/conf/test__single_sample_decontx_correct.config @@ -22,6 +22,10 @@ params { nComps = 10 } } + clustering { + method = 'louvain' + resolution = 1 + } } celda { container = 'vibsinglecellnf/celda:1.4.5' diff --git a/conf/test__single_sample_decontx_correct_scrublet.config b/conf/test__single_sample_decontx_correct_scrublet.config index a7f6878c..92a0c63f 100644 --- a/conf/test__single_sample_decontx_correct_scrublet.config +++ b/conf/test__single_sample_decontx_correct_scrublet.config @@ -22,6 +22,10 @@ params { nComps = 10 } } + clustering { + method = 'louvain' + resolution = 1 + } } celda { container = 'vibsinglecellnf/celda:1.4.5' diff --git a/conf/test__single_sample_decontx_filter.config b/conf/test__single_sample_decontx_filter.config index 5be49bd4..38690233 100644 --- a/conf/test__single_sample_decontx_filter.config +++ b/conf/test__single_sample_decontx_filter.config @@ -22,6 +22,10 @@ params { nComps = 10 } } + clustering { + method = 'louvain' + resolution = 1 + } } celda { container = 'vibsinglecellnf/celda:1.4.5' diff --git a/conf/test__single_sample_param_exploration.config b/conf/test__single_sample_param_exploration.config index e4a78fd5..1421a997 100644 --- a/conf/test__single_sample_param_exploration.config +++ b/conf/test__single_sample_param_exploration.config @@ -23,7 +23,7 @@ params { } } clustering { - resolutions = [0.8,1.0] + resolutions = [1.0,1.2] } } } diff --git a/conf/test__single_sample_scenic.config b/conf/test__single_sample_scenic.config index f0ec5f7e..b154af47 100644 --- a/conf/test__single_sample_scenic.config +++ b/conf/test__single_sample_scenic.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } scenic { grn { diff --git a/conf/test__single_sample_scenic_multiruns.config b/conf/test__single_sample_scenic_multiruns.config index 1a79aba4..1344d69c 100644 --- a/conf/test__single_sample_scenic_multiruns.config +++ b/conf/test__single_sample_scenic_multiruns.config @@ -22,6 +22,10 @@ params { nComps = 2 } } + clustering { + method = 'louvain' + resolution = 1 + } } scenic { //filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_small.loom' diff --git a/conf/test__single_sample_scrublet.config b/conf/test__single_sample_scrublet.config index 877bb7c4..e66c5224 100644 --- a/conf/test__single_sample_scrublet.config +++ b/conf/test__single_sample_scrublet.config @@ -22,6 +22,10 @@ params { nComps = 10 } } + clustering { + method = 'louvain' + resolution = 1 + } } scrublet { container = 'vibsinglecellnf/scrublet:0.2.3'