diff --git a/modules/busco/main.nf b/modules/busco/main.nf new file mode 100644 index 00000000000..e6d055825db --- /dev/null +++ b/modules/busco/main.nf @@ -0,0 +1,41 @@ +process BUSCO { + tag "$meta.id" + label 'process_medium' + conda (params.enable_conda ? "bioconda::busco=5.2.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0' : + 'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path(augustus_config) + val(lineage) + + output: + tuple val(meta), path("${meta.id}/run_*/full_table.tsv"), emit: tsv + tuple val(meta), path("${meta.id}/run_*/short_summary.txt"), emit: txt + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (lineage) args += " --lineage_dataset $lineage" + """ + # Ensure the input is uncompressed + gzip -cdf $fasta > __UNCOMPRESSED_FASTA_FILE__ + # Copy the image's AUGUSTUS config directory if it was not provided to the module + [ ! -e augustus_config ] && cp -a /usr/local/config augustus_config + AUGUSTUS_CONFIG_PATH=augustus_config \\ + busco \\ + $args \\ + --augustus \\ + --cpu $task.cpus \\ + --in __UNCOMPRESSED_FASTA_FILE__ \\ + --out $meta.id + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml new file mode 100644 index 00000000000..e58d02660cf --- /dev/null +++ b/modules/busco/meta.yml @@ -0,0 +1,52 @@ +name: busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: ttps://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format + pattern: "*.{fasta}" + - augustus_config: + type: directory + description: AUGUSTUS config directory + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: Full summary table + pattern: "*.{tsv}" + - txt: + type: file + description: Short summary text + pattern: "*.{txt}" + +authors: + - "@charles-plessy" + - "@priyanka-surana" diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 31e176183fb..931cba00d8b 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -114,8 +114,8 @@ params { genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi" transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta" genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta" - genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz" - + genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz" + chr22_odb10_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/BUSCO/chr22_odb10.tar.gz" dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" dbsnp_146_hg38_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" gnomad_r2_1_1_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf new file mode 100644 index 00000000000..28a4921c41f --- /dev/null +++ b/tests/modules/busco/main.nf @@ -0,0 +1,26 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BUSCO as BUSCO_BACTE } from '../../../modules/busco/main.nf' +include { BUSCO as BUSCO_CHR22 } from '../../../modules/busco/main.nf' +include { UNTAR } from '../../../modules/untar/main.nf' + +// This tests genome decompression, empty input channels and data download +workflow test_busco_bacteroidales { + input = [ [ id:'test' ], file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) ] + BUSCO_BACTE ( input, + [], + [] ) +} + +// This tests uncompressed genome, BUSCO lineage file provided via input channel, and offline mode +workflow test_busco_chr22 { + input = [ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + lineage_dataset = [ file(params.test_data['homo_sapiens']['genome']['chr22_odb10_tar_gz'], checkIfExists: true) ] + UNTAR(lineage_dataset) + BUSCO_CHR22 ( input, + [], + UNTAR.out.untar ) +} + diff --git a/tests/modules/busco/nextflow.config b/tests/modules/busco/nextflow.config new file mode 100644 index 00000000000..e9dec7d1ae9 --- /dev/null +++ b/tests/modules/busco/nextflow.config @@ -0,0 +1,14 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: BUSCO_BACTE { + ext.args = '--mode genome --lineage_dataset bacteroidales_odb10' + } + + withName: BUSCO_CHR22 { + ext.args = '--mode genome --offline' + } + +} + diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml new file mode 100644 index 00000000000..d660c2b4972 --- /dev/null +++ b/tests/modules/busco/test.yml @@ -0,0 +1,20 @@ +- name: busco test_busco_bacteroidales + command: nextflow run tests/modules/busco -entry test_busco_bacteroidales -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/test/run_bacteroidales_odb10/full_table.tsv + md5sum: 8d7b401d875ecd9291b01bf4485bf080 + - path: output/busco/test/run_bacteroidales_odb10/short_summary.txt + contains: ['Complete BUSCOs (C)'] + +- name: busco test_busco_chr22 + command: nextflow run tests/modules/busco -entry test_busco_chr22 -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/test/run_chr22_odb10/full_table.tsv + md5sum: 83f20e8996c591338ada73b6ab0eb269 + - path: output/busco/test/run_chr22_odb10/short_summary.txt + contains: ['Complete BUSCOs (C)'] +