-
Notifications
You must be signed in to change notification settings - Fork 751
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1239 from priyanka-surana/busco
Busco
Showing
6 changed files
with
748 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
process BUSCO { | ||
tag "$meta.id" | ||
label 'process_medium' | ||
|
||
conda (params.enable_conda ? "bioconda::busco=5.3.2" : null) | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/busco:5.3.2--pyhdfd78af_0': | ||
'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }" | ||
|
||
input: | ||
tuple val(meta), path('tmp_input/*') | ||
each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead | ||
path busco_lineages_path // Recommended: path to busco lineages - downloads if not set | ||
path config_file // Optional: busco configuration file | ||
|
||
output: | ||
tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary | ||
tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true | ||
tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true | ||
tuple val(meta), path("*-busco") , emit: busco_dir | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" | ||
def busco_config = config_file ? "--config $config_file" : '' | ||
def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" | ||
def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : '' | ||
""" | ||
# Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) | ||
# Check for container variable initialisation script and source it. | ||
if [ -f "/usr/local/env-activate.sh" ]; then | ||
set +u # Otherwise, errors out because of various unbound variables | ||
. "/usr/local/env-activate.sh" | ||
set -u | ||
fi | ||
# If the augustus config directory is not writable, then copy to writeable area | ||
if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then | ||
# Create writable tmp directory for augustus | ||
AUG_CONF_DIR=\$( mktemp -d -p \$PWD ) | ||
cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR | ||
export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR | ||
echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}" | ||
fi | ||
# Ensure the input is uncompressed | ||
INPUT_SEQS=input_seqs | ||
mkdir "\$INPUT_SEQS" | ||
cd "\$INPUT_SEQS" | ||
for FASTA in ../tmp_input/*; do | ||
if [ "\${FASTA##*.}" == 'gz' ]; then | ||
gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz ) | ||
else | ||
ln -s "\$FASTA" . | ||
fi | ||
done | ||
cd .. | ||
busco \\ | ||
--cpu $task.cpus \\ | ||
--in "\$INPUT_SEQS" \\ | ||
--out ${prefix}-busco \\ | ||
$busco_lineage \\ | ||
$busco_lineage_dir \\ | ||
$busco_config \\ | ||
$args | ||
# clean up | ||
rm -rf "\$INPUT_SEQS" | ||
# Move files to avoid staging/publishing issues | ||
mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt | ||
mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
name: busco | ||
description: Benchmarking Universal Single Copy Orthologs | ||
keywords: | ||
- quality control | ||
- genome | ||
- transcriptome | ||
- proteome | ||
tools: | ||
- busco: | ||
description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. | ||
homepage: https://busco.ezlab.org/ | ||
documentation: https://busco.ezlab.org/busco_userguide.html | ||
tool_dev_url: https://gitlab.com/ezlab/busco | ||
doi: "10.1007/978-1-4939-9173-0_14" | ||
licence: ["MIT"] | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- fasta: | ||
type: file | ||
description: Nucleic or amino acid sequence file in FASTA format. | ||
pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" | ||
- lineage: | ||
type: value | ||
description: The BUSCO lineage to use, or "auto" to automatically select lineage | ||
- busco_lineages_path: | ||
type: directory | ||
description: Path to local BUSCO lineages directory. | ||
- config_file: | ||
type: file | ||
description: Path to BUSCO config file. | ||
|
||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- batch_summary: | ||
type: file | ||
description: Summary of all sequence files analyzed | ||
pattern: "*-busco.batch_summary.txt" | ||
- short_summaries_txt: | ||
type: file | ||
description: Short Busco summary in plain text format | ||
pattern: "short_summary.*.txt" | ||
- short_summaries_json: | ||
type: file | ||
description: Short Busco summary in JSON format | ||
pattern: "short_summary.*.json" | ||
- busco_dir: | ||
type: directory | ||
description: BUSCO lineage specific output | ||
pattern: "*-busco" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@priyanka-surana" | ||
- "@charles-plessy" | ||
- "@mahesh-panchal" | ||
- "@muffato" | ||
- "@jvhagey" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
process { | ||
|
||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } | ||
|
||
withName: 'test_busco_genome_single_fasta:BUSCO' { | ||
ext.args = '--mode genome' | ||
} | ||
|
||
withName: 'test_busco_genome_multi_fasta:BUSCO' { | ||
ext.args = '--mode genome' | ||
} | ||
|
||
withName: 'test_busco_eukaryote_metaeuk:BUSCO' { | ||
ext.args = '--mode genome' | ||
} | ||
|
||
withName: 'test_busco_eukaryote_augustus:BUSCO' { | ||
ext.args = '--mode genome --augustus' | ||
} | ||
|
||
withName: 'test_busco_protein:BUSCO' { | ||
ext.args = '--mode proteins' | ||
} | ||
|
||
withName: 'test_busco_transcriptome:BUSCO'{ | ||
ext.args = '--mode transcriptome' | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
- name: busco test_busco_genome_single_fasta | ||
command: nextflow run tests/modules/busco -entry test_busco_genome_single_fasta -c tests/config/nextflow.config | ||
tags: | ||
- busco | ||
files: | ||
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt | ||
md5sum: e50690742e9ae6abdd2bf99334ff9e12 | ||
- path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt | ||
md5sum: 4c1b2c4317c88398eddc30877ed740d9 | ||
- path: output/busco/versions.yml | ||
md5sum: 8aa830f71587d859df35c6cfab59f35d | ||
|
||
- name: busco test_busco_genome_multi_fasta | ||
command: nextflow run tests/modules/busco -entry test_busco_genome_multi_fasta -c tests/config/nextflow.config | ||
tags: | ||
- busco | ||
files: | ||
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt | ||
md5sum: 5360dfe83bec1f5741ee115e53e6b517 | ||
- path: output/busco/versions.yml | ||
md5sum: 9a959eb0a1f765777dff1ea2f5c139c0 | ||
|
||
- name: busco test_busco_eukaryote_metaeuk | ||
command: nextflow run tests/modules/busco -entry test_busco_eukaryote_metaeuk -c tests/config/nextflow.config | ||
tags: | ||
- busco | ||
files: | ||
- path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt | ||
md5sum: a70806f99ba5706d7353d3353b3f1d2b | ||
- path: output/busco/versions.yml | ||
md5sum: 34a808c257e6db1b0456f3b4372bc477 | ||
|
||
- name: busco test_busco_eukaryote_augustus | ||
command: nextflow run tests/modules/busco -entry test_busco_eukaryote_augustus -c tests/config/nextflow.config | ||
tags: | ||
- busco | ||
files: | ||
- path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt | ||
md5sum: 660393dd43cd6a093b952d4b8ad41e40 | ||
- path: output/busco/versions.yml | ||
md5sum: 2caac915461410b16a1524ac064cd0df | ||
|
||
- name: busco test_busco_protein | ||
command: nextflow run tests/modules/busco -entry test_busco_protein -c tests/config/nextflow.config | ||
tags: | ||
- busco | ||
files: | ||
- path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt | ||
md5sum: fd3b4e30ce74d1fcb95d6286d6e2049f | ||
- path: output/busco/versions.yml | ||
md5sum: d7392261a57960a7e6aea609dce824f5 | ||
|
||
- name: busco test_busco_transcriptome | ||
command: nextflow run tests/modules/busco -entry test_busco_transcriptome -c tests/config/nextflow.config | ||
tags: | ||
- busco | ||
files: | ||
- path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json | ||
contains: | ||
- "one_line_summary" | ||
- "input_file" | ||
- "mode" | ||
- "dataset" | ||
- path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt | ||
contains: | ||
- "BUSCO version" | ||
- "The lineage dataset is" | ||
- "BUSCO was run in mode" | ||
- "Complete BUSCOs" | ||
- "Missing BUSCOs" | ||
- "Dependencies and versions" | ||
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt | ||
md5sum: 9a176cafe66ac0adca89dc34ad2be13f | ||
- path: output/busco/versions.yml | ||
md5sum: 30eacbc7df70f6b1e72e0a7b6d02a7e1 |