diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index c2681450..395b231c 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -19,9 +19,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: octokit/request-action@v2.x + if: github.event_name != 'workflow_dispatch' id: check_approvals with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews + route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - id: test_variables diff --git a/.nf-core.yml b/.nf-core.yml index 5d214b97..5d193256 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -18,5 +18,5 @@ template: org: nf-core outdir: . skip_features: null - version: 3.1.1dev + version: 3.2.0 update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index 56e19847..43c68211 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 3.2.0 [2024-10-25] +## 3.2.0 [2024-10-27] ### `Added` diff --git a/conf/modules.config b/conf/modules.config index 4fbd6184..fa48b5cb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -263,13 +263,13 @@ process { } withName: METASPADES { - ext.args = params.spades_options ?: '--meta' + ext.args = params.spades_options ? params.spades_options + ' --meta' : '--meta' ext.prefix = { "SPAdes-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/SPAdes" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } withName: METASPADESHYBRID { - ext.args = params.spades_options ?: '--meta' + ext.args = params.spades_options ? params.spades_options + ' --meta' : '--meta' ext.prefix = { "SPAdesHybrid-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } diff --git a/conf/test_full.config b/conf/test_full.config index b09e6fe1..ed5923d0 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -26,8 +26,8 @@ params { skip_gtdbtk = true // TODO TEMPORARY: deactivate SPAdes due to incompatibility of container with fusion file system - skip_spades = false - skip_spadeshybrid = false + skip_spades = true + skip_spadeshybrid = true // reproducibility options for assembly spades_fix_cpus = 10 diff --git a/docs/images/mag_workflow.png b/docs/images/mag_workflow.png index 0d7c805f..2a2abfa3 100644 Binary files a/docs/images/mag_workflow.png and b/docs/images/mag_workflow.png differ diff --git a/docs/images/mag_workflow.svg b/docs/images/mag_workflow.svg index b5c9fa8d..e2039d81 100644 --- a/docs/images/mag_workflow.svg +++ b/docs/images/mag_workflow.svg @@ -350,11 +350,11 @@ borderopacity="1.0" inkscape:pageopacity="0.0" inkscape:pageshadow="2" - inkscape:zoom="0.66558755" - inkscape:cx="641.53844" - inkscape:cy="333.53989" + inkscape:zoom="1.8825659" + inkscape:cx="392.01815" + inkscape:cy="508.34874" inkscape:document-units="mm" - inkscape:current-layer="g21914" + inkscape:current-layer="g3-6" showgrid="true" inkscape:window-width="1664" inkscape:window-height="1051" @@ -1267,7 +1267,7 @@ y="9.9461374" x="151.11928" id="tspan4772-6-5-1-9-3" - sodipodi:role="line">porechop_ABIiporechop_ABI + + + +image/svg+xmlTaxonomicclassificationCentrifugeKraken2VisualizationKronaDomain classificationReportingMultiQC(MAG summary)tsvDBShort reads(required)Adapter/qualitytrimmingBBNormfastpAdapterRemovalHost read removalDepth normalisationBowtie2Remove PhiXBowtie2FastQCEvaluation csvLong reads(optional)NanoPlotEvaluationNanoLyseRemove LambdaFiltlongQuality filteringAdapter/qualitytrimmingPorechopporechop_ABIDBTaxonomic classificationCATGTDB-TkTiaraMetaEukGenome annotationPROKKAProtein-codinggene predictionPRODIGALVirus identificationAssembly(sample- or group-wise)EvaluationQUASTaDNA ValidationpyDamageFreebayesBCFToolsgeNomadSPAdesMEGAHITSPAdesHybridDBBinningMetaBAT2MaxBin2CONCOCTEvaluationBUSCOCheckMGUNCQUAST(Abundance estimation and visualization)v3.2Binning refinementDAS Toolnf-core/magCC-BY 4.0 Design originally by Zandra FagernäsBin post-processing diff --git a/nextflow_schema.json b/nextflow_schema.json index 1f50cbd2..b96a1b07 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -571,8 +571,8 @@ }, "spades_options": { "type": "string", - "description": "Additional custom options for SPAdes and SPAdesHybrid. You must also specify `--meta` to run SPAdes in metagenomic mode if customising these options!", - "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")" + "description": "Additional custom options for SPAdes and SPAdesHybrid. Do not specify `--meta` as this will be added for you!", + "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not --meta, -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")" }, "megahit_options": { "type": "string", diff --git a/workflows/mag.nf b/workflows/mag.nf index a06dc78a..19b5deec 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -3,28 +3,30 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_pipeline' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_pipeline' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' -include { BINNING } from '../subworkflows/local/binning' -include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' -include { BUSCO_QC } from '../subworkflows/local/busco_qc' -include { VIRUS_IDENTIFICATION } from '../subworkflows/local/virus_identification' -include { CHECKM_QC } from '../subworkflows/local/checkm_qc' -include { GUNC_QC } from '../subworkflows/local/gunc_qc' -include { GTDBTK } from '../subworkflows/local/gtdbtk' -include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' -include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' -include { DEPTHS } from '../subworkflows/local/depths' -include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' -include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf' + +include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' +include { BINNING } from '../subworkflows/local/binning' +include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' +include { BUSCO_QC } from '../subworkflows/local/busco_qc' +include { VIRUS_IDENTIFICATION } from '../subworkflows/local/virus_identification' +include { CHECKM_QC } from '../subworkflows/local/checkm_qc' +include { GUNC_QC } from '../subworkflows/local/gunc_qc' +include { GTDBTK } from '../subworkflows/local/gtdbtk' +include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' +include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' +include { DEPTHS } from '../subworkflows/local/depths' +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf' // // MODULE: Installed directly from nf-core/modules @@ -57,28 +59,29 @@ include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/mai // // MODULE: Local to the pipeline // -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' -include { KRAKEN2 } from '../modules/local/kraken2' -include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' -include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' -include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' -include { QUAST } from '../modules/local/quast' -include { QUAST_BINS } from '../modules/local/quast_bins' -include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' -include { CAT_DB } from '../modules/local/cat_db' -include { CAT_DB_GENERATE } from '../modules/local/cat_db_generate' -include { CAT } from '../modules/local/cat' -include { CAT_SUMMARY } from '../modules/local/cat_summary' -include { BIN_SUMMARY } from '../modules/local/bin_summary' -include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv' + +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' +include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' +include { KRAKEN2 } from '../modules/local/kraken2' +include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' +include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' +include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' +include { QUAST } from '../modules/local/quast' +include { QUAST_BINS } from '../modules/local/quast_bins' +include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' +include { CAT_DB } from '../modules/local/cat_db' +include { CAT_DB_GENERATE } from '../modules/local/cat_db_generate' +include { CAT } from '../modules/local/cat' +include { CAT_SUMMARY } from '../modules/local/cat_summary' +include { BIN_SUMMARY } from '../modules/local/bin_summary' +include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv' workflow MAG { take: - ch_raw_short_reads // channel: samplesheet read in from --input + ch_raw_short_reads // channel: samplesheet read in from --input ch_raw_long_reads ch_input_assemblies @@ -508,10 +511,6 @@ workflow MAG { ch_long_reads_grouped = ch_long_reads } - ch_assembled_contigs = Channel.empty() - ch_assembled_contigs_gz = Channel.empty() - - // Co-assembly: pool reads for SPAdes if (!params.skip_spades || !params.skip_spadeshybrid) { if (params.coassemble_group) { if (params.bbnorm) { @@ -544,14 +543,19 @@ workflow MAG { ch_long_reads_spades = Channel.empty() } + + // Assembly + + ch_assembled_contigs = Channel.empty() + ch_assembled_contigs_gz = Channel.empty() + if (!params.single_end && !params.skip_spades) { METASPADES(ch_short_reads_spades.map { meta, reads -> [meta, reads, [], []] }, [], []) - ch_spades_assemblies = METASPADES.out.scaffolds - .map { meta, assembly -> - def meta_new = meta + [assembler: 'SPAdes'] - [meta_new, assembly] - } - .tap { ch_spades_assemblies_gz } + ch_spades_assemblies = METASPADES.out.scaffolds.map { meta, assembly -> + def meta_new = meta + [assembler: 'SPAdes'] + [meta_new, assembly] + } + .tap { ch_spades_assemblies_gz } ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies) ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_spades_assemblies_gz) @@ -567,13 +571,11 @@ workflow MAG { .map { id, meta_long, long_reads, meta_short, short_reads -> [meta_short, short_reads, [], long_reads] } METASPADESHYBRID(ch_reads_spadeshybrid, [], []) - ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds - .map { meta, assembly -> - def meta_new = meta + [assembler: "SPAdesHybrid"] - [meta_new, assembly] - } - .tap { ch_spadeshybrid_assemblies_gz } - + ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds.map { meta, assembly -> + def meta_new = meta + [assembler: "SPAdesHybrid"] + [meta_new, assembly] + } + .tap { ch_spadeshybrid_assemblies_gz } ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies) ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_spadeshybrid_assemblies_gz) @@ -582,18 +584,17 @@ workflow MAG { if (!params.skip_megahit) { MEGAHIT(ch_short_reads_grouped) - ch_megahit_assemblies = MEGAHIT.out.contigs - .map { meta, assembly -> - def meta_new = meta + [assembler: 'MEGAHIT'] - [meta_new, assembly] - } - .tap { ch_megahit_assemblies_gz } - + ch_megahit_assemblies = MEGAHIT.out.contigs.map { meta, assembly -> + def meta_new = meta + [assembler: 'MEGAHIT'] + [meta_new, assembly] + } + .tap { ch_megahit_assemblies_gz } ch_assembled_contigs = ch_assembled_contigs.mix(ch_megahit_assemblies) ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_megahit_assemblies_gz) ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) } + GUNZIP_ASSEMBLIES(ch_assembled_contigs) ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES.out.versions) @@ -803,13 +804,12 @@ workflow MAG { ch_busco_summary = BUSCO_QC.out.summary ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first()) // process information if BUSCO analysis failed for individual bins due to no matching genes - BUSCO_QC.out.failed_bin - .splitCsv(sep: '\t') - .map { bin, error -> - if (!bin.contains(".unbinned.")) { - busco_failed_bins[bin] = error - } + + BUSCO_QC.out.failed_bin.splitCsv(sep: '\t').map { bin, error -> + if (!bin.contains(".unbinned.")) { + busco_failed_bins[bin] = error } + } } if (!params.skip_binqc && params.binqc_tool == 'checkm') { @@ -1091,5 +1091,5 @@ workflow MAG { emit: multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] }