diff --git a/CHANGELOG.md b/CHANGELOG.md index da2191c2..28e9c384 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#353](https://github.com/nf-core/mag/pull/353) - Added the busco_clean parameter to optionally clean each BUSCO directory after a successful + ### `Changed` - [#340](https://github.com/nf-core/mag/pull/340) - Update to nf-core 2.6.1 `TEMPLATE` diff --git a/conf/test.config b/conf/test.config index 5df32bdb..922ca6ff 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,5 +27,6 @@ params { min_length_unbinned_contigs = 1 max_unbinned_contigs = 2 busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz" + busco_clean = true gtdb = false } diff --git a/modules/local/busco.nf b/modules/local/busco.nf index d6669ca7..1b88cef1 100644 --- a/modules/local/busco.nf +++ b/modules/local/busco.nf @@ -32,6 +32,10 @@ process BUSCO { if (params.busco_reference) lineage_dataset_provided = "Y" + def busco_clean = "N" + if (params.busco_clean) + busco_clean = "Y" + def p = "--auto-lineage" if (params.busco_reference){ p = "--lineage_dataset dataset/${db}" @@ -181,6 +185,13 @@ process BUSCO { mv BUSCO/logs/prodigal_out.log "${bin}_prodigal.gff" fi + # if needed delete temporary BUSCO files + if [ ${busco_clean} ]; then + find . -depth -type d -name "augustus_config" -execdir rm -rf "{}" \\; + find . -depth -type d -name "auto_lineage" -execdir rm -rf "{}" \\; + find . -depth -type d -name "run_*" -execdir rm -rf "{}" + + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version 2>&1 | sed 's/Python //g') diff --git a/nextflow.config b/nextflow.config index 4f202a9e..a9dd15a3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -97,6 +97,7 @@ params { busco_download_path = null busco_auto_lineage_prok = false save_busco_reference = false + busco_clean = false // Reproducibility options megahit_fix_cpu_1 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index cfa30939..96ac6bb1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -641,6 +641,11 @@ "description": "Specify which binning output is sent for downstream annotation, taxonomic classification, bin quality control etc.", "help_text": "`raw_bins_only`: only bins (and unbinned contigs) from the binners.\n`refined_bins_only`: only bins (and unbinned contigs) from the bin refinement step .\n`both`: bins and unbinned contigs from both the binning and bin refinement steps.", "enum": ["raw_bins_only", "refined_bins_only", "both"] + }, + "busco_clean": { + "type": "boolean", + "description": "Enable clean-up of temporary files created during BUSCO runs.", + "help_text": "By default, BUSCO creates a large number of intermediate files every run. This may cause problems on some clusters which have file number limits in plate, particularly with large numbers of bins. Enabling this option cleans these files, reducing the total file count of the work directory." } } },