diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 47953c77..a27616dc 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -45,9 +45,6 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72e51e1f..835b46a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,10 +1,13 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: - - dev + - "dev" pull_request: + branches: + - "dev" + - "master" release: types: [published] workflow_dispatch: @@ -13,18 +16,34 @@ env: NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + NFTEST_VER: "0.9.0" concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: + define_nxf_versions: + name: Choose nextflow versions to test against depending on target branch + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.nxf_versions.outputs.matrix }} + steps: + - id: nxf_versions + run: | + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT + else + echo matrix='["latest-everything", "23.10.0"]' | tee -a $GITHUB_OUTPUT + fi + test: name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" runs-on: ubuntu-latest strategy: + fail-fast: false matrix: NXF_VER: - "24.04.2" @@ -35,6 +54,17 @@ jobs: - "singularity" test_name: - "test" + - "test_nothing" + - "test_bakta" + - "test_prokka" + - "test_bgc_pyrodigal" + - "test_bgc_bakta" + - "test_bgc_prokka" + - "test_taxonomy_pyrodigal" + - "test_taxonomy_bakta" + - "test_taxonomy_prokka" + - "test_preannotated" + - "test_preannotated_bgc" isMaster: - ${{ github.base_ref == 'master' }} # Exclude conda and singularity on dev @@ -52,6 +82,14 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + - name: Check out test data + uses: actions/checkout@v3 + with: + repository: nf-core/test-datasets + ref: funcscan + path: test-datasets/ + fetch-depth: 1 + - name: Set up Apptainer if: matrix.profile == 'singularity' uses: eWaterCycle/setup-apptainer@main @@ -80,6 +118,23 @@ jobs: - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results + nf-test test --tag ${{ matrix.test_name }} --profile ${{ matrix.test_name }},${{ matrix.profile }} --junitxml=test.xml + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/*/tests/output/pipeline_info/software_versions.yml + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: "*.xml" diff --git a/.gitignore b/.gitignore index a42ce016..23b0c7de 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc null/ +.nf-test* diff --git a/.nf-core.yml b/.nf-core.yml index eb1f60b7..3fba6299 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -3,12 +3,14 @@ lint: nf_core_version: 3.1.0 repository_type: pipeline template: - author: Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James - A. Fellows Yates + author: Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates description: Pipeline for screening for functional components of assembled contigs force: false is_nfcore: true name: funcscan org: nf-core outdir: . + skip_features: + - igenomes + - fastqc version: 2.1.0dev diff --git a/.prettierignore b/.prettierignore index 437d763d..abb4b4d6 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,3 +10,4 @@ testing/ testing* *.pyc bin/ +tests/ diff --git a/CHANGELOG.md b/CHANGELOG.md index a1d052a9..b1fd07b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,280 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.1.0dev - [date] +## v2.1.0 - [unreleased] + +### `Added` + +- [#421](https://github.com/nf-core/funcscan/pull/421) Updated to nf-core template 3.0.2 (by @jfy133) +- [#427](https://github.com/nf-core/funcscan/pull/427) AMPcombi now can use multiple other databases for classifications. (by @darcy220606) + +### `Fixed` + +- [#427](https://github.com/nf-core/funcscan/pull/427) Fixed the AMP reference database issues reported by users, due to non-ASCII characters. (by @darcy220606) + +### `Dependencies` + +| Tool | Previous version | New version | +| -------- | ---------------- | ----------- | +| AMPcombi | 0.2.2 | 2.0.1 | +| Macrel | 1.2.0 | 1.4.0 | +| MultiQC | 1.24.0 | 1.25.1 | + +### `Deprecated` + +## v2.0.0 - [2024-09-05] + +### `Breaking change` + +- [#391](https://github.com/nf-core/funcscan/pull/391) Made all "database" parameter names consistent, skip hmmsearch by default. (by @jasmezz) + +| Old parameter | New parameter | +| ------------------------------------------------ | --------------------------------------- | +| `annotation_bakta_db_localpath` | `annotation_bakta_db` | +| `arg_abricate_db` | `arg_abricate_db_id` | +| `arg_abricate_localdbdir` | `arg_abricate_db` | +| `arg_deeparg_data` | `arg_deeparg_db` | +| `arg_deeparg_data_version` | `arg_deeparg_db_version` | +| `arg_rgi_database` | `arg_rgi_db` | +| `bgc_antismash_databases` | `bgc_antismash_db` | +| `bgc_antismash_installationdirectory` | `bgc_antismash_installdir` | +| `bgc_deepbgc_database` | `bgc_deepbgc_db` | +| `save_databases` | `save_db` | +| `taxa_classification_mmseqs_databases_localpath` | `taxa_classification_mmseqs_db` | +| `taxa_classification_mmseqs_databases_id` | `taxa_classification_mmseqs_db_id` | +| `taxa_classification_mmseqs_databases_savetmp` | `taxa_classification_mmseqs_db_savetmp` | +| `amp_skip_hmmsearch` | `amp_run_hmmsearch` | +| `bgc_skip_hmmsearch` | `bgc_run_hmmsearch` | + +- [#343](https://github.com/nf-core/funcscan/pull/343) Standardized the resulting workflow summary tables to always start with 'sample_id\tcontig_id\t..'. Reformatted the output of `hamronization/summarize` module. (by @darcy220606) +- [#411](https://github.com/nf-core/funcscan/pull/411) Optimised hAMRonization input: only high-quality hits from fARGene output are reported. (by @jasmezz, @jfy133) + +### `Added` + +- [#322](https://github.com/nf-core/funcscan/pull/322) Updated all modules: introduce environment.yml files. (by @jasmezz) +- [#324](https://github.com/nf-core/funcscan/pull/324) Removed separate DeepARG test profile because database download is now stable. (by @jasmezz) +- [#332](https://github.com/nf-core/funcscan/pull/332) & [#327](https://github.com/nf-core/funcscan/pull/327) Merged pipeline template of nf-core/tools version 2.12.1 (by @jfy133, @jasmezz) +- [#338](https://github.com/nf-core/funcscan/pull/338) Set `--meta` parameter to default for Bakta, with singlemode optional. (by @jasmezz) +- [#343](https://github.com/nf-core/funcscan/pull/343) Added contig taxonomic classification using [MMseqs2](https://github.com/soedinglab/MMseqs2/). (by @darcy220606) +- [#358](https://github.com/nf-core/funcscan/pull/358) Improved RGI databases handling, users can supply their own CARD now. (by @jasmezz) +- [#375](https://github.com/nf-core/funcscan/pull/375) Merged pipeline template of nf-core/tools version 2.14.1. (by @jfy133) +- [#381](https://github.com/nf-core/funcscan/pull/381) Added support for supplying pre-annotated sequences to the pipeline. (by @jfy133, @jasmezz) +- [#382](https://github.com/nf-core/funcscan/pull/382) Optimised BGC screening run time and prevent crashes due to too-short contigs by adding contig length filtering for BGC workflow only. (by @jfy133, @darcy220606) +- [#366](https://github.com/nf-core/funcscan/pull/366) Added nf-test on pipeline level. (by @jfy133, @Darcy220606, @jasmezz) +- [#403](https://github.com/nf-core/funcscan/pull/403) Added antiSMASH parameters `--pfam2go`, `--rre`, and `--tfbs`. (reported by @Darcy220606, added by @jasmezz) +- [#405](https://github.com/nf-core/funcscan/pull/405) Added argNorm to ARG subworkflow. (by @Vedanth-Ramji) + +### `Fixed` + +- [#348](https://github.com/nf-core/funcscan/pull/348) Updated samplesheet for pipeline tests to 'samplesheet_reduced.csv' with smaller datasets to reduce resource consumption. Updated prodigal module to fix pigz issue. Removed `tests/` from `.gitignore`. (by @darcy220606) +- [#362](https://github.com/nf-core/funcscan/pull/362) Save annotations from bakta in subdirectories per sample. (by @jasmezz) +- [#363](https://github.com/nf-core/funcscan/pull/363) Removed warning from DeepBGC usage docs. (by @jasmezz) +- [#365](https://github.com/nf-core/funcscan/pull/365) Fixed AMRFinderPlus module and usage docs for manual database download. (by @jasmezz) +- [#371](https://github.com/nf-core/funcscan/pull/371) Fixed AMRFinderPlus parameter `arg_amrfinderplus_name`. (by @m3hdad) +- [#377](https://github.com/nf-core/funcscan/pull/377) Fixed an occasional RGI process failure when certain files not produced. (❤️ to @amizeranschi for reporting, fix by @amizeranschi & @jfy133) +- [#386](https://github.com/nf-core/funcscan/pull/386) Updated DeepBGC module to fix output file names, separate annotation step for all BGC tools, add warning if no BGCs found, fix MultiQC reporting of annotation workflow. (by @jfy133, @jasmezz) +- [#393](https://github.com/nf-core/funcscan/pull/393) & [#397](https://github.com/nf-core/funcscan/pull/397) Fixed a docker/singularity only error appearing when running with conda. (❤️ to @ewissel for reporting, fix by @jfy33 & @jasmezz) +- [#391](https://github.com/nf-core/funcscan/pull/391) Skip hmmmsearch by default to not crash pipeline if user provides no HMM files, updated docs. (by @jasmezz) +- [#397](https://github.com/nf-core/funcscan/pull/397) Removed deprecated AMPcombi module, fixed variable name in BGC workflow, updated minor parts in docs (usage, parameter schema). (by @jasmezz) +- [#402](https://github.com/nf-core/funcscan/pull/402) Fixed BGC length calculation for antiSMASH hits by comBGC. (by @jasmezz) +- [#406](https://github.com/nf-core/funcscan/pull/406) Fixed prediction tools not being executed if annotation workflow skipped. (by @jasmezz) +- [#407](https://github.com/nf-core/funcscan/pull/407) Fixed comBGC bug when parsing multiple antiSMASH files. (by @jasmezz) +- [#409](https://github.com/nf-core/funcscan/pull/409) Fixed argNorm overwriting its output for DeepARG. (by @jasmezz, @jfy133) +- [#412](https://github.com/nf-core/funcscan/pull/412) Improve all pre-run database download documentation. (by @jfy133) + +### `Dependencies` + +| Tool | Previous version | New version | +| ------------- | ---------------- | ----------- | +| AMPcombi | 0.1.7 | 0.2.2 | +| AMPlify | 1.1.0 | 2.0.0 | +| AMRFinderPlus | 3.11.18 | 3.12.8 | +| antiSMASH | 6.1.1 | 7.1.0 | +| argNorm | NA | 0.5.0 | +| bioawk | 1.0 | NA | +| comBGC | 1.6.1 | 1.6.2 | +| DeepARG | 1.0.2 | 1.0.4 | +| DeepBGC | 0.1.30 | 0.1.31 | +| GECCO | 0.9.8 | 0.9.10 | +| hAMRonization | 1.1.1 | 1.1.4 | +| HMMER | 3.3.2 | 3.4 | +| MMSeqs | NA | 2:15.6f452 | +| MultiQC | 1.15 | 1.24 | +| Pyrodigal | 2.1.0 | 3.3.0 | +| RGI | 5.2.1 | 6.0.3 | +| seqkit | NA | 2.8.1 | +| tabix/htslib | 1.11 | 1.20 | + +### `Deprecated` + +- [#384](https://github.com/nf-core/funcscan/pull/384) Deprecated AMPcombi and exchanged it with full suite of AMPcombi2 submodules. (by @darcy220606) +- [#382](https://github.com/nf-core/funcscan/pull/382) Optimised BGC screening run time and prevent crashes due to too-short contigs by adding contig length filtering for BGC workflow only. Bioawk is replaced with seqkit. (by @jfy133, @darcy220606) + +## v1.1.6 - [2024-07-08] + +### `Added` + +### `Fixed` + +- [#396](https://github.com/nf-core/funcscan/pull/396) Fixed bioawk overwriting input files. (❤️ to @Microbion for reporting, fix by @jfy133) + +### `Dependencies` + +## v1.1.5 - [2024-03-20] + +### `Added` + +### `Fixed` + +- [#346](https://github.com/nf-core/funcscan/pull/346) Pinned version of nf-validation to 1.1.3 + +### `Dependencies` + +| Plugin | Previous | New version | +| ------------- | -------- | ----------- | +| Bakta | 1.8.2 | 1.9.3 | +| nf-validation | Latest | 1.1.3 | + +### `Deprecated` + +## v1.1.4 - [2023-11-07] + +### `Added` + +### `Fixed` + +- [#306](https://github.com/nf-core/funcscan/pull/306) Added new parameter `annotation_prokka_retaincontigheaders` to allow prokka to retain the original contig headers/locus tag. (by @darcy220606) +- [#307](https://github.com/nf-core/funcscan/pull/307) Fixed stability of deepARG tests by using Zenodo copy of database. (❤️ to Gustavo Arango and Liqing Zhang for uploading, fix by @jfy133) +- [#310](https://github.com/nf-core/funcscan/pull/310) Fixed error when supplying uncompressed input; added "fas" file extension for FASTA files. (by @tavareshugo) +- [#311](https://github.com/nf-core/funcscan/pull/311) Merged pipeline template of nf-core/tools version 2.10. (by @jasmezz) + +### `Dependencies` + +| Tool | Previous version | New version | +| ------------- | ---------------- | ----------- | +| AMRFinderPlus | 3.10.42 | 3.11.18 | +| Bakta | 1.7.0 | 1.8.2 | +| MultiQC | 1.14 | 1.15 | + +### `Deprecated` + +- FastQC + +## v1.1.3 - [2023-08-11] + +### `Added` + +- [#290](https://github.com/nf-core/funcscan/pull/290) Merged pipeline template of nf-core/tools version 2.9, updated references. (by @jfy133) +- [#285](https://github.com/nf-core/funcscan/pull/285) Use nf-validation for samplesheet checking and added support for `fna.gz` input FASTA files. (by @louperelo, @mirpedrol, @jfy133) +- [#295](https://github.com/nf-core/funcscan/pull/295) Add Prokka to MultiQC output. (by @louperelo) + +### `Fixed` + +- [#296](https://github.com/nf-core/funcscan/pull/296) Fixed empty output when saving prodigal annotations. (reported by @louperelo, fix by @jasmezz) +- [#297](https://github.com/nf-core/funcscan/pull/297) Added check for empty annotation files prior going into screening. (❤️ to @alexhbnr for requesting, added by @jfy133) +- [#299](https://github.com/nf-core/funcscan/pull/299) Fixed pigz error with symlinks in Pyrodigal. (by @jasmezz) +- [#300](https://github.com/nf-core/funcscan/pull/300) Fixed wrong Pyrodigal channels being submitted to antiSMASH. (reported by Till Bayer, fix by @jasmezz) +- [#302](https://github.com/nf-core/funcscan/pull/302) Removed trouble-causing default parameters in json schema. (by @robsyme) + +### `Dependencies` + +| Tool | Previous version | New version | +| ------ | ---------------- | ----------- | +| comBGC | 0.6.0 | 0.6.1 | +| GECCO | 0.9.2 | 0.9.8 | + +### `Deprecated` + +## v1.1.2 - [2023-06-30] + +### `Added` + +### `Fixed` + +- [#279](https://github.com/nf-core/funcscan/pull/279) Fix docker/podman registry definition for tower compatibility. (♥️ to sunitj for reporting, fix by @adamrtalbot) + +### `Dependencies` + +### `Deprecated` + +## v1.1.1 - [2023-05-24] + +### `Added` + +- [#270](https://github.com/nf-core/funcscan/pull/270) Merged pipeline template of nf-core/tools version 2.8 and updated modules accordingly. (by @jasmezz, @jfy133) +- [#274](https://github.com/nf-core/funcscan/pull/274) Update all modules: changed docker links according to the change of quay.io as default repository and Pyrodigal annotation output now zipped. (by @jasmezz) +- [#275](https://github.com/nf-core/funcscan/pull/275) Save DRAMP database in the common database directory if `--save_databases` is supplied. (by @jasmezz) + +### `Fixed` + +- [#272](https://github.com/nf-core/funcscan/pull/272) Fix typo in Prokka output path in modules.config. (by @jasmezz) +- [#273](https://github.com/nf-core/funcscan/pull/273) Update Ampir module after input bugfix in module. (reported by @mathavanpu, fix by @louperelo) +- [#276](https://github.com/nf-core/funcscan/pull/276) Fix Pyrodigal parameters in modules.config. (by @jasmezz) + +### `Dependencies` + +### `Deprecated` + +## v1.1.0 - British Beans on Toast - [2023-04-27] + +### `Added` + +- [#238](https://github.com/nf-core/funcscan/pull/238) Added dedicated DRAMP database downloading step for AMPcombi to prevent parallel downloads when no database provided by user. (by @jfy133) +- [#235](https://github.com/nf-core/funcscan/pull/235) Added parameter `annotation_bakta_db_downloadtype` to be able to switch between downloading either full (33.1 GB) or light (1.3 GB excluding UPS, IPS, PSC, see parameter description) versions of the Bakta database. (by @jasmezz) +- [#249](https://github.com/nf-core/funcscan/pull/249) Added bakta annotation to CI tests. (by @jasmezz) +- [#251](https://github.com/nf-core/funcscan/pull/251) Added annotation tool: Pyrodigal. (by @jasmezz) +- [#252](https://github.com/nf-core/funcscan/pull/252) Added a new parameter `-arg_rgi_savejson` that saves the file `.json` in the RGI directory. The default ouput for RGI is now only `.txt`. (by @darcy220606) +- [#253](https://github.com/nf-core/funcscan/pull/253) Updated Prodigal to have compressed output files. (by @jasmezz) +- [#262](https://github.com/nf-core/funcscan/pull/262) Added comBGC function to screen whole directory of antiSMASH output (one subfolder per sample). (by @jasmezz) +- [#263](https://github.com/nf-core/funcscan/pull/263) Removed `AMPlify` from test_full.config. (by @jasmezz) +- [#266](https://github.com/nf-core/funcscan/pull/266) Updated README.md with Pyrodigal. (by @jasmezz) + +### `Fixed` + +- [#243](https://github.com/nf-core/funcscan/pull/243) Compress the ampcombi_complete_summary.csv in the output directory. (by @louperelo) +- [#237](https://github.com/nf-core/funcscan/pull/237) Reactivate DeepARG automatic database downloading and CI tests as server is now back up. (by @jfy133) +- [#235](https://github.com/nf-core/funcscan/pull/235) Improved annotation speed by switching off pipeline-irrelevant Bakta annotation steps by default. (by @jasmezz) +- [#235](https://github.com/nf-core/funcscan/pull/235) Renamed parameter `annotation_bakta_db` to `annotation_bakta_db_localpath`. (by @jasmezz) +- [#242](https://github.com/nf-core/funcscan/pull/242) Fixed MACREL '.faa' issue that was generated when it was run on its own and upgraded MACREL from version `1.1.0` to `1.2.0` (by @Darcy220606) +- [#248](https://github.com/nf-core/funcscan/pull/248) Applied best-practice `error("message")` to all (sub)workflow files. (by @jasmezz) +- [#254](https://github.com/nf-core/funcscan/pull/254) Further resource optimisation based on feedback from 'real world' datasets. (ongoing, reported by @alexhbnr and @Darcy220606, fix by @jfy133) +- [#266](https://github.com/nf-core/funcscan/pull/266) Fixed wrong process name in base.config. (reported by @Darcy220606, fix by @jasmezz) + +### `Dependencies` + +| Tool | Previous version | New version | +| ----- | ---------------- | ----------- | +| Bakta | 1.6.1 | 1.7.0 | + +### `Deprecated` + +## v1.0.1 - [2023-02-27] + +### `Added` + +- [#229](https://github.com/nf-core/funcscan/pull/229) Added pipeline DOI to `WorkflowMain.groovy` to display citation info when executing the pipeline. (by @jasmezz) + +### `Fixed` + +- [#227](https://github.com/nf-core/funcscan/pull/227) Removed a header check in the `check_samplesheet.py` script that was producing false negatives. Presence of required columns is still validated. (by @Midnighter) +- [#228](https://github.com/nf-core/funcscan/pull/228) Improved database downloading guidance to emphasise it is recommended to let nf-core/funcscan do the downloading on a first run, rather than manually downloading yourself. (reported by @alexhbnr, fixed by @jfy133) + +### `Dependencies` + +### `Deprecated` + +## v1.0.0 - German Rollmops - [2023-02-15] Initial release of nf-core/funcscan, created with the [nf-core](https://nf-co.re/) template. ### `Added` +- Added annotation tools (Prokka, Prodigal, Bakta). +- Added AMP screening workflow (tools: Macrel, AMPlify, ampir, hmmsearch). +- Added ARG screening workflow (tools: ABRicate, AMRFinderPlus, DeepARG, fARGene). +- Added BGC screening workflow (tools: antiSMASH, DeepBGC, GECCO, hmmsearch). +- Added workflow summary tools (tools: hAMRonization, AMPcombi, comBGC). + ### `Fixed` ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index fd005177..50655554 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -2,21 +2,101 @@ ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) -> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. +> Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature biotechnology, 38(3), 276–278. [DOI: 10.1038/s41587-020-0439-x](https://doi.org/10.1038/s41587-020-0439-x) ## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) -> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. +> Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316–319. [DOI: 10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [ABRicate](https://github.com/tseemann/abricate) -> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + > Seemann, T. (2020). ABRicate. Github [https://github.com/tseemann/abricate](https://github.com/tseemann/abricate). -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) +- [AMPir](https://doi.org/10.1093/bioinformatics/btaa653) -> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Fingerhut, L., Miller, D. J., Strugnell, J. M., Daly, N. L., & Cooke, I. R. (2021). ampir: an R package for fast genome-wide prediction of antimicrobial peptides. Bioinformatics (Oxford, England), 36(21), 5262–5263. [DOI: 10.1093/bioinformatics/btaa653](https://doi.org/10.1093/bioinformatics/btaa653) + +- [AMPlify](https://doi.org/10.1186/s12864-022-08310-4) + + > Li, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. [DOI: 10.1186/s12864-022-08310-4](https://doi.org/10.1186/s12864-022-08310-4) + +- [AMRFinderPlus](https://doi.org/10.1038/s41598-021-91456-0) + + > Feldgarden, M., Brover, V., Gonzalez-Escalona, N., Frye, J. G., Haendiges, J., Haft, D. H., Hoffmann, M., Pettengill, J. B., Prasad, A. B., Tillman, G. E., Tyson, G. H., & Klimke, W. (2021). AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Scientific reports, 11(1), 12728. [DOI: 10.1038/s41598-021-91456-0](https://doi.org/10.1038/s41598-021-91456-0) + +- [AntiSMASH](https://doi.org/10.1093/nar/gkad344) + + > Blin, K., Shaw, S., Augustijn, H. E., Reitz, Z. L., Biermann, F., Alanjary, M., Fetter, A., Terlouw B. R., Metcalf, W. W., Helfrich, E. J. N., van Wezel, G. P., Medema, M. H., & Weber, T. (2023). antiSMASH 7.0: new and improved predictions for detection, regulation, chemical structures and visualisation. Nucleic acids research, 51(W1), W46–W50. [DOI: 10.1093/nar/gkad344](https://doi.org/10.1093/nar/gkad344) + +- [argNorm](https://doi.org/10.5204/rep.eprints.252448) + + > Ugarcina Perovic, S., Ramji, V., Chong, H., Duan, Y., Maguire, F., Coelho, L. P. (2024). argNorm: Normalization of antibiotic resistance gene annotations to the Antibiotic Resistance Ontology (ARO). [Preprint] (Unpublished) [DOI: 10.5204/rep.eprints.252448](https://doi.org/10.5204/rep.eprints.252448) + +- [Bakta](https://doi.org/10.1099/mgen.0.000685) + + > Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). [DOI: 10.1099/mgen.0.000685](https://doi.org/10.1099/mgen.0.000685) + +- [comBGC](https://github.com/nf-core/funcscan) + + > Frangenberg, J., Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. [DOI: 10.5281/zenodo.7643100](https://doi.org/10.5281/zenodo.7643099) + +- [DeepARG](https://doi.org/10.1186/s40168-018-0401-z) + + > Arango-Argoty, G., Garner, E., Pruden, A., Heath, L. S., Vikesland, P., & Zhang, L. (2018). DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data. Microbiome, 6(1), 23. [DOI: 10.1186/s40168-018-0401-z](https://doi.org/10.1186/s40168-018-0401-z) + +- [DeepBGC](https://doi.org/10.1093/nar/gkz654) + + > Hannigan, G. D., Prihoda, D., Palicka, A., Soukup, J., Klempir, O., Rampula, L., Durcak, J., Wurst, M., Kotowski, J., Chang, D., Wang, R., Piizzi, G., Temesi, G., Hazuda, D. J., Woelk, C. H., & Bitton, D. A. (2019). A deep learning genome-mining strategy for biosynthetic gene cluster prediction. Nucleic acids research, 47(18), e110. [DOI: 10.1093/nar/gkz654](https://doi.org/10.1093/nar/gkz654) + +- [fARGene](https://doi.org/10.1186/s40168-019-0670-1) + + > Berglund, F., Österlund, T., Boulund, F., Marathe, N. P., Larsson, D., & Kristiansson, E. (2019). Identification and reconstruction of novel antibiotic resistance genes from metagenomes. Microbiome, 7(1), 52. [DOI: 10.1186/s40168-019-0670-1](https://doi.org/10.1186/s40168-019-0670-1) + +- [GECCO](https://gecco.embl.de) + + > Carroll, L. M., Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv. [DOI: 10.1101/2021.05.03.442509](https://doi.org/10.1101/2021.05.03.442509) + +- [AMPcombi](https://github.com/Darcy220606/AMPcombi) + + > Ibrahim, A. & Perelo, L. (2023). Darcy220606/AMPcombi. [DOI: 10.5281/zenodo.7639121](https://doi.org/10.5281/zenodo.7639121). + +- [hAMRonization](https://github.com/pha4ge/hAMRonization) + + > Mendes, I., Griffiths, E., Manuele, A., Fornika, D., Tausch, S. H., Le-Viet, T., Phelan, J., Meehan, C. J., Raphenya, A. R., Alcock, B., Culp, E., Lorenzo, F., Haim, M. S., Witney, A., Black, A., Katz, L., Oluniyi, P., Olawoye, I., Timme, R., Neoh, H., Lam, S. D., Jamaluddin, T. Z. M. T., Nathan, S., Ang, M. Y., Di Gregorio, S., Vandelannoote, K., Dusadeepong, R, Chindelevitch, L., Nasar, M. I., Aanensen, D., Afolayan, A. O., Odih, E. E., McArthur, A. G., Feldgarden, M., Galas, M. M., Campos, J., Okeke, I. N., Underwood, A., Page, A. J., MacCannell, D., Maguire, F. (2023). hAMRonization: Enhancing antimicrobial resistance prediction using the PHA4GE AMR detection specification and tooling. bioRxiv. [DOI: 10.1101/2024.03.07.583950](https://doi.org/10.1101/2024.03.07.583950) + +- [HMMER](https://doi.org/10.1371/journal.pcbi.1002195.) + + > Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. [DOI: 10.1371/journal.pcbi.1002195](https://doi.org/10.1371/journal.pcbi.1002195) + +- [Macrel](https://doi.org/10.7717/peerj.10555) + + > Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. [DOI: 10.7717/peerj.10555](https://doi.org/10.7717/peerj.10555) + +- [MMseqs2](https://doi.org/10.1093/bioinformatics/btab184) + + > Mirdita, M., Steinegger, M., Breitwieser, F., Söding, J., Levy Karin, E. (2021). Fast and sensitive taxonomic assignment to metagenomic contigs. Bioinformatics, 37(18),3029–3031. [DOI: 10.1093/bioinformatics/btab184](https://doi.org/10.1093/bioinformatics/btab184) + +- [Prodigal](https://doi.org/10.1186/1471-2105-11-119) + + > Hyatt, D., Chen, G. L., Locascio, P. F., Land, M. L., Larimer, F. W., & Hauser, L. J. (2010). Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC bioinformatics, 11, 119. [DOI: 10.1186/1471-2105-11-119](https://doi.org/10.1186/1471-2105-11-119) + +- [PROKKA](https://doi.org/10.1093/bioinformatics/btu153) + + > Seemann, T. (2014). Prokka: rapid prokaryotic genome annotation. Bioinformatics (Oxford, England), 30(14), 2068–2069. [DOI: 10.1093/bioinformatics/btu153](https://doi.org/10.1093/bioinformatics/btu153) + +- [Pyrodigal](https://doi.org/10.1186/1471-2105-11-119) + + > Larralde, M. (2022). Pyrodigal: Python bindings and interface to Prodigal, an efficient method for gene prediction in prokaryotes. Journal of Open Source Software, 7(72), 4296. [DOI: 10.21105/joss.04296](https://doi.org/10.21105/joss.04296) + +- [RGI](https://doi.org/10.1093/nar/gkac920) + + > Alcock, B. P., Huynh, W., Chalil, R., Smith, K. W., Raphenya, A. R., Wlodarski, M. A., Edalatmand, A., Petkau, A., Syed, S. A., Tsang, K. K., Baker, S. J. C., Dave, M., McCarthy, M. C., Mukiri, K. M., Nasir, J. A., Golbon, B., Imtiaz, H., Jiang, X., Kaur, K., Kwong, M., Liang, Z. C., Niu, K. C., Shan, P., Yang, J. Y. J., Gray, K. L., Hoad, G. R., Jia, B., Bhando, T., Carfrae, L. A., Farha, M. A., French, S., Gordzevich, R., Rachwalski, K., Tu, M. M., Bordeleau, E., Dooley, D., Griffiths, E., Zubyk, H. L., Brown, E. D., Maguire, F., Beiko, R. G., Hsiao, W. W. L., Brinkman F. S. L., Van Domselaar, G., McArthur, A. G. (2023). CARD 2023: expanded curation, support for machine learning, and resistome prediction at the Comprehensive Antibiotic Resistance Database. Nucleic acids research, 51(D1):D690-D699. [DOI: 10.1093/nar/gkac920](https://doi.org/10.1093/nar/gkac920) + +- [SeqKit](https://bioinf.shenwei.me/seqkit/) + + > Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. iMeta, e191. [https://doi.org/10.1002/imt2.191](https://doi.org/10.1002/imt2.191) ## Software packaging/containerisation tools @@ -26,11 +106,11 @@ - [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + > Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature methods, 15(7), 475–476. [DOI: 10.1038/s41592-018-0046-7](https://doi.org/10.1038/s41592-018-0046-7) - [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. [DOI: 10.1093/bioinformatics/btx192](https://doi.org/10.1093/bioinformatics/btx192) - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) diff --git a/README.md b/README.md index 8c207e26..7753440d 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@

- - nf-core/funcscan + + nf-core/funcscan

[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) @@ -19,51 +19,55 @@ ## Introduction -**nf-core/funcscan** is a bioinformatics pipeline that ... +**nf-core/funcscan** is a bioinformatics best-practice analysis pipeline for the screening of nucleotide sequences such as assembled contigs for functional genes. It currently features mining for antimicrobial peptides, antibiotic resistance genes and biosynthetic gene clusters. - +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - - +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/funcscan/results). -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +The nf-core/funcscan AWS full test dataset are contigs generated by the MGnify service from the ENA. We used contigs generated from assemblies of chicken cecum shotgun metagenomes (study accession: MGYS00005631). + +## Pipeline summary + +1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/) +2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2) +3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta) +4. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify) +5. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms. +6. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/) +7. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs +8. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/) + +![funcscan metro workflow](docs/images/funcscan_metro_workflow.png) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - +Each row represents a (multi-)fasta file of assembled contig sequences. Now, you can run the pipeline using: - - ```bash nextflow run nf-core/funcscan \ - -profile \ + -profile \ --input samplesheet.csv \ - --outdir + --outdir \ + --run_amp_screening \ + --run_arg_screening \ + --run_bgc_screening ``` > [!WARNING] @@ -83,7 +87,7 @@ nf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Lou We thank the following people for their extensive assistance in the development of this pipeline: - +Adam Talbot, Alexandru Mizeranschi, Hugo Tavares, Júlia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion. ## Contributions and Support @@ -93,10 +97,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - - +If you use nf-core/funcscan for your analysis, please cite it using the following doi: [10.5281/zenodo.7643099](https://doi.org/10.5281/zenodo.7643099) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 2cc2aa50..f51e757f 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,7 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/funcscan Methods Description" section_href: "https://github.com/nf-core/funcscan" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object +## Inject metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/funcscan v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index bf128717..45ddd48c 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -10,6 +10,21 @@ report_section_order: "nf-core-funcscan-summary": order: -1002 +run_modules: + - prokka + - custom_content + +table_columns_visible: + Prokka: + organism: False + export_plots: true disable_version_detection: true + +custom_logo: "nf-core-funcscan_logo_flat_light.png" +custom_logo_url: https://nf-co.re/funcscan +custom_logo_title: "nf-core/funcscan" + +## Tool specific configuration +prokka_fn_snames: True diff --git a/assets/nf-core-funcscan_logo.png b/assets/nf-core-funcscan_logo.png new file mode 100644 index 00000000..9c8fb040 Binary files /dev/null and b/assets/nf-core-funcscan_logo.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..791912cd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,fasta,protein,gbk +sample_1,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_1.fasta.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_1.faa,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_1.gbk +sample_2,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_2.fasta.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_2.faa.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_2.gbk.gz +sample_3,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs.fasta diff --git a/assets/schema_input.json b/assets/schema_input.json index df1294f0..c13fd75e 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,21 +13,33 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "fasta": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.(fasta|fas|fna|fa)(\\.gz)?$", + "errorMessage": "Fasta file for reads must be provided, cannot contain spaces and must have extension `.fa.gz`, `.fna.gz` or `.fasta.gz`" }, - "fastq_2": { + "protein": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.(faa|fasta)(\\.gz)?$", + "errorMessage": "Input file for peptide annotations has incorrect file format. File must end in `.fasta` or `.faa`" + }, + "gbk": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(gbk|gbff)(\\.gz)?$", + "errorMessage": "Input file for feature annotations has incorrect file format. File must end in `.gbk.gz` or `.gbff.gz`" } }, - "required": ["sample", "fastq_1"] - } + "required": ["sample", "fasta"], + "dependentRequired": { + "protein": ["gbk"], + "gbk": ["protein"] + } + }, + "uniqueItems": true } diff --git a/bin/ampcombi_download.py b/bin/ampcombi_download.py new file mode 100755 index 00000000..c9a4f639 --- /dev/null +++ b/bin/ampcombi_download.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +######################################### +# Authors: [Anan Ibrahim](https://github.com/Darcy220606/AMPcombi), [Louisa Perelo](https://github.com/louperelo) +# File: amp_database.py +# Source: https://github.com/Darcy220606/AMPcombi/blob/main/ampcombi/amp_database.py +# This source code is licensed under the MIT license +######################################### + +# TITLE: Download the reference database specified by the user. + +import pandas as pd +import requests +import os +import re +import subprocess +import argparse + +from datetime import datetime +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio import SeqIO + +######################################## +# FUNCTION: DOWNLOAD DATABASES AND CLEAN DRAMP and APD +######################################### +def download_ref_db(database, threads): + """ + Downloads a specified AMP (antimicrobial peptide) reference database based on the + provided database name and saves it to the specified directory. + This supports downloading databases only from DRAMP, APD, and UniRef100. + Parameters: + ---------- + db : str + The directory path where the downloaded database should be saved. + database : str + The name of the database to download. Must be one of 'DRAMP', 'APD', or 'UniRef100'. + threads : int + Number of threads to use when downloading the UniRef100 database with `mmseqs`. + """ + # Check which database was given + if database == 'DRAMP': + # Create dir + db = 'amp_DRAMP_database' + os.makedirs(db, exist_ok=True) + # Download the file + try: + url = 'http://dramp.cpu-bioinfor.org/downloads/download.php?filename=download_data/DRAMP3.0_new/general_amps.txt' + response = requests.get(url, allow_redirects=True) + response.raise_for_status() # Check for any download errors + date = datetime.now().strftime("%Y_%m_%d") + with open(db + '/' + f'general_amps_{date}.txt', 'wb') as file: + file.write(response.content) + print(f"File downloaded successfully and saved to {db}/general_amps_{date}.txt") + # Create fasta version and clean it + db_df = pd.read_csv(f'{db}/general_amps_{date}.txt', sep='\t') + records = [] + valid_sequence_pattern = re.compile("^[ACDEFGHIKLMNPQRSTVWY]+$") + for index, row in db_df.iterrows(): + sequence = row['Sequence'] + if valid_sequence_pattern.match(sequence): + record = SeqRecord(Seq(sequence), id=str(row['DRAMP_ID']), description="") + records.append(record) + output_file = f'{db}/general_amps_{date}.fasta' + SeqIO.write(records, output_file, "fasta") + except requests.exceptions.RequestException as e: + print(f"Failed to download DRAMP AMP general database file: {e}") + return + + if database == 'APD': + # Create dir + db = 'amp_APD_database' + os.makedirs(db, exist_ok=True) + # Download the file + try: + url = 'https://aps.unmc.edu/assets/sequences/APD_sequence_release_09142020.fasta' + response = requests.get(url, allow_redirects=True, verify=False) # Disable SSL verification due to site certificate issue + response.raise_for_status() + content = response.text + print("APD AMP database downloaded successfully.") + except requests.exceptions.RequestException as e: + print(f"Failed to download content: {e}") + return + # Save the content line-by-line exactly as is + try: + with open(db + '/' + 'APD_orig.fasta', 'w') as file: + file.write(content) + with open(f'{db}/APD.fasta', 'w') as output_handle: + valid_sequence_pattern = re.compile("^[ACDEFGHIKLMNPQRSTVWY]+$") + for record in SeqIO.parse(f'{db}/APD_orig.fasta', "fasta"): + sequence = str(record.seq) + if valid_sequence_pattern.match(sequence): + SeqIO.write(record, output_handle, "fasta") + os.remove(db + '/' + 'APD_orig.fasta') + print(f"APD AMP database saved successfully to {db}/APD.fasta") + # Fasta to table + headers = [] + sequences = [] + seq_ids = [] + for i, record in enumerate(SeqIO.parse(f'{db}/APD.fasta', "fasta")): + sequence_id = record.description.split('|')[0] + headers.append(record.description) + sequences.append(str(record.seq)) + seq_ids.append(sequence_id) + db_df = pd.DataFrame({ + "APD_ID": seq_ids, + "APD_Description": headers, + "APD_Sequence": sequences}) + db_df.to_csv(f'{db}/APD.txt', sep='\t', index=False, header=True) + os.remove(db + '/' + 'APD.fasta') + # Table to fasta + records = [] + for index, row in db_df.iterrows(): + sequence = row['APD_Sequence'] + record = SeqRecord(Seq(sequence), id=str(row['APD_ID']), description="") + records.append(record) + output_file = f'{db}/APD.fasta' + SeqIO.write(records, output_file, "fasta") + except Exception as e: + print(f"Failed to save APD AMP database: {e}") + + if database == 'UniRef100': + # Create dir + db = 'amp_UniRef100_database' + os.makedirs(db, exist_ok=True) + # Download the file + try: + os.makedirs(f'{db}/mmseqs2', exist_ok=True) + command = f"mmseqs databases UniRef100 {db}/mmseqs2/ref_DB {db}/mmseqs2/tmp --remove-tmp-files true --threads {threads} -v 0" + subprocess.run(command, shell=True, check=True) + print(f"UniRef100 protein database downloaded successfully and saved to {db}/mmseqs2/UniRef100") + except subprocess.CalledProcessError as e: + print(f"Failed to download UniRef100 protein database: {e}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Downloads a specified AMP (antimicrobial peptide) reference database based on the provided database name and saves it to the specified directory.") + parser.add_argument("--database_id", dest="database", type=str, required=True, choices=["DRAMP", "APD", "UniRef100"], + help="Database ID - one of DRAMP, APD, or UniRef100. This parameter is required.") + parser.add_argument("--threads", type=int, default=4, + help="Number of threads supplied to mmseqs databases. Only relevant in the case of 'UniRef100'. Default is 4.") + + args = parser.parse_args() + download_ref_db(args.database, args.threads) diff --git a/bin/comBGC.py b/bin/comBGC.py new file mode 100755 index 00000000..dccece69 --- /dev/null +++ b/bin/comBGC.py @@ -0,0 +1,659 @@ +#!/usr/bin/env python3 + +# Written by Jasmin Frangenberg and released under the MIT license. +# See below for full license text. + +from Bio import SeqIO +import pandas as pd +import argparse +import os +import re + +""" +=============================================================================== +MIT License +=============================================================================== + +Copyright (c) 2023 Jasmin Frangenberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +tool_version = "0.6.3" +welcome = """\ + ........................ + * comBGC v.{version} * + ........................ + This tool aggregates the results of BGC prediction tools: + antiSMASH, deepBGC, and GECCO + For detailed usage documentation please refer + to https://nf-co.re/funcscan + .........................................................""".format( + version=tool_version +) + +# Initialize parser +parser = argparse.ArgumentParser( + prog="comBGC", + formatter_class=argparse.RawTextHelpFormatter, + description=(welcome), + add_help=True, +) + +# Input options +parser.add_argument( + "-i", + "--input", + metavar="PATH(s)", + dest="input", + nargs="*", + help="""path(s) to the required output file(s) of antiSMASH, DeepBGC and/or GECCO +these can be: +- antiSMASH: .gbk and (optional) knownclusterblast/ directory +- DeepBGC: .bgc.tsv +- GECCO: .clusters.tsv +Note: Please provide files from a single sample only. If you would like to +summarize multiple samples, please see the --antismash_multiple_samples flag.""", +) +parser.add_argument( + "-o", + "--outdir", + metavar="PATH", + dest="outdir", + nargs="?", + help="directory for comBGC output. Default: current directory", + type=str, + default=".", +) +parser.add_argument( + "-a", + "--antismash_multiple_samples", + metavar="PATH", + dest="antismash_multiple_samples", + nargs="?", + help="""directory of antiSMASH output. Should contain subfolders (one per +sample). Can only be used if --input is not specified.""", + type=str, +) +parser.add_argument( + "-vv", "--verbose", help="increase output verbosity", action="store_true" +) +parser.add_argument( + "-v", "--version", help="show version number and exit", action="store_true" +) + +# Get command line arguments +args = parser.parse_args() + +# Assign input arguments to variables +input = args.input +dir_antismash = args.antismash_multiple_samples +outdir = args.outdir +verbose = args.verbose +version = args.version + +if version: + exit("comBGC {version}".format(version=tool_version)) + +input_antismash = [] +input_deepbgc = [] +input_gecco = [] + +# Assign input files to respective tools +if input: + for path in input: + if path.endswith(".gbk") and not re.search("region\d\d\d\.gbk$", path): # Make sure to only fetch relevant GBK files, i.e. those containing all collective antiSMASH BGCs + with open(path) as infile: + for line in infile: + if re.search("##GECCO-Data-START##", line): + input_gecco.append(path) + break + elif re.search("##antiSMASH-Data-START##", line): + input_antismash.append(path) + break + elif path.endswith("bgc.tsv"): + input_deepbgc = path + elif path.endswith("clusters.tsv"): + input_gecco.append(path) + elif path.endswith("knownclusterblast/"): + input_antismash.append(path) + +if input and dir_antismash: + exit( + "The flags --input and --antismash_multiple_samples are mutually exclusive.\nPlease use only one of them (or see --help for how to use)." + ) + +# Make sure that at least one input argument is given +if not (input_antismash or input_gecco or input_deepbgc or dir_antismash): + exit( + "Please specify at least one input file (i.e. output from antismash, deepbgc, or gecco) or see --help" + ) + +######################## +# ANTISMASH FUNCTIONS +######################## + + +def prepare_multisample_input_antismash(antismash_dir): + """ + Prepare string of input paths of a given antiSMASH output folder (with sample subdirectories) + """ + sample_paths = [] + for root, subdirs, files in os.walk(antismash_dir): + antismash_file = "/".join([root, "index.html"]) + if os.path.exists(antismash_file): + sample = root.split("/")[-1] + gbk_path = "/".join([root, sample]) + ".gbk" + kkb_path = "/".join([root, "knownclusterblast"]) + if os.path.exists(kkb_path): + sample_paths.append([gbk_path, kkb_path]) + else: + sample_paths.append([gbk_path]) + return sample_paths + + +def parse_knownclusterblast(kcb_file_path): + """ + Extract MIBiG IDs from knownclusterblast TXT file. + """ + + with open(kcb_file_path) as kcb_file: + hits = 0 + MIBiG_IDs = [] + + for line in kcb_file: + if line == "Significant hits: \n" and not hits: + hits = 1 # Indicating that the following lines contain relevant information + elif line == "\n" and hits: + break + elif line != "Significant hits: \n" and hits: + MIBiG_ID = re.search("(BGC\d+)", line).group(1) + MIBiG_IDs.append(MIBiG_ID) + return MIBiG_IDs + + +def antismash_workflow(antismash_paths): + """ + Create data frame with aggregated antiSMASH output: + - Open summary GBK and grab relevant information. + - Extract the knownclusterblast output from the antiSMASH folder (MIBiG annotations) if present. + - Return data frame with aggregated info. + """ + + antismash_sum_cols = [ + "Sample_ID", + "Prediction_tool", + "Contig_ID", + "Product_class", + "BGC_probability", + "BGC_complete", + "BGC_start", + "BGC_end", + "BGC_length", + "CDS_ID", + "CDS_count", + "PFAM_domains", + "MIBiG_ID", + "InterPro_ID", + ] + antismash_out = pd.DataFrame(columns=antismash_sum_cols) + + CDS_ID = [] + CDS_count = 0 + + # Distinguish input files (i.e. GBK file and "knownclusterblast" folder) + kcb_path = [] + for path in antismash_paths: + if re.search("knownclusterblast", path): + kcb_path = re.search(".*knownclusterblast.*", path).group() + else: + gbk_path = path + + kcb_files = [] + if kcb_path: + kcb_files = [ + file + for file in os.listdir(kcb_path) + if file.startswith("c") and file.endswith(".txt") + ] + + # Aggregate information + Sample_ID = gbk_path.split("/")[-1].split(".gbk")[ + -2 + ] # Assuming file name equals sample name + if verbose: + print("\nParsing antiSMASH file(s): " + Sample_ID + "\n... ", end="") + + with open(gbk_path) as gbk: + for record in SeqIO.parse( + gbk, "genbank" + ): # GBK records are contigs in this case + # Initiate variables per contig + cluster_num = 1 + antismash_out_line = {} + Contig_ID = record.id + Product_class = "" + BGC_complete = "" + BGC_start = "" + BGC_end = "" + BGC_length = "" + PFAM_domains = [] + MIBiG_ID = "NA" + + for feature in record.features: + # Extract relevant infos from the first protocluster feature from the contig record + if feature.type == "protocluster": + if ( + antismash_out_line + ): # If there is more than 1 BGC per contig, reset the output line for new BGC. Assuming that BGCs do not overlap. + if not CDS_ID: + CDS_ID = ["NA"] + antismash_out_line = { # Create dictionary of BGC info + "Sample_ID": Sample_ID, + "Prediction_tool": "antiSMASH", + "Contig_ID": Contig_ID, + "Product_class": ";".join(Product_class), + "BGC_probability": "NA", + "BGC_complete": BGC_complete, + "BGC_start": BGC_start, + "BGC_end": BGC_end, + "BGC_length": BGC_length, + "CDS_ID": ";".join(CDS_ID), + "CDS_count": CDS_count, + "PFAM_domains": ";".join(PFAM_domains), + "MIBiG_ID": MIBiG_ID, + "InterPro_ID": "NA", + } + antismash_out_line = pd.DataFrame([antismash_out_line]) + antismash_out = pd.concat( + [antismash_out, antismash_out_line], ignore_index=True + ) + antismash_out_line = {} + + # Reset variables per BGC + CDS_ID = [] + CDS_count = 0 + PFAM_domains = [] + + # Extract all the BGC info + Product_class = feature.qualifiers["product"] + for i in range(len(Product_class)): + Product_class[i] = ( + Product_class[i][0].upper() + Product_class[i][1:] + ) # Make first letters uppercase, e.g. lassopeptide -> Lassopeptide + + if feature.qualifiers["contig_edge"] == ["True"]: + BGC_complete = "No" + elif feature.qualifiers["contig_edge"] == ["False"]: + BGC_complete = "Yes" + + BGC_start = ( + feature.location.start + 1 + ) # +1 because zero-based start position + BGC_end = feature.location.end + BGC_length = feature.location.end - feature.location.start + + # If there are knownclusterblast files for the BGC, get MIBiG IDs of their homologs + if kcb_files: + print(kcb_files) + kcb_file = "{}_c{}.txt".format( + record.id, str(cluster_num) + ) # Check if this filename is among the knownclusterblast files + if kcb_file in kcb_files: + MIBiG_IDs = ";".join( + parse_knownclusterblast( + os.path.join(kcb_path, kcb_file) + ) + ) + if MIBiG_IDs != "": + MIBiG_ID = MIBiG_IDs + cluster_num += 1 + + # Count functional CDSs (no pseudogenes) and get the PFAM annotation + elif ( + feature.type == "CDS" + and "translation" in feature.qualifiers.keys() + and BGC_start != "" + ): # Make sure not to count pseudogenes (which would have no "translation tag") and count no CDSs before first BGC + if ( + feature.location.end <= BGC_end + ): # Make sure CDS is within the current BGC region + if "locus_tag" in feature.qualifiers: + CDS_ID.append(feature.qualifiers["locus_tag"][0]) + CDS_count += 1 + if "sec_met_domain" in feature.qualifiers.keys(): + for PFAM_domain in feature.qualifiers["sec_met_domain"]: + PFAM_domain_name = re.search( + "(.+) \(E-value", PFAM_domain + ).group(1) + PFAM_domains.append(PFAM_domain_name) + + # Create dictionary of BGC info + if not CDS_ID: + CDS_ID = ["NA"] + antismash_out_line = { + "Sample_ID": Sample_ID, + "Prediction_tool": "antiSMASH", + "Contig_ID": Contig_ID, + "Product_class": ";".join(Product_class), + "BGC_probability": "NA", + "BGC_complete": BGC_complete, + "BGC_start": BGC_start, + "BGC_end": BGC_end, + "BGC_length": BGC_length, + "CDS_ID": ";".join(CDS_ID), + "CDS_count": CDS_count, + "PFAM_domains": ";".join(PFAM_domains), + "MIBiG_ID": MIBiG_ID, + "InterPro_ID": "NA", + } + + if BGC_start != "": # Only keep records with BGCs + antismash_out_line = pd.DataFrame([antismash_out_line]) + antismash_out = pd.concat( + [antismash_out, antismash_out_line], ignore_index=True + ) + + # Reset variables per BGC + CDS_ID = [] + CDS_count = 0 + PFAM_domains = [] + + if verbose: + print("Done.") + return antismash_out + + +######################## +# DEEPBGC FUNCTIONS +######################## + + +def deepbgc_workflow(deepbgc_path): + """ + Create data frame with aggregated deepBGC output. + """ + + if verbose: + print("\nParsing deepBGC file\n... ", end="") + + # Prepare input and output columns + deepbgc_map_dict = { + "sequence_id": "Contig_ID", + "nucl_start": "BGC_start", + "nucl_end": "BGC_end", + "nucl_length": "BGC_length", + "num_proteins": "CDS_count", + "deepbgc_score": "BGC_probability", + "product_class": "Product_class", + "protein_ids": "CDS_ID", + "pfam_ids": "PFAM_domains", + } + deepbgc_sum_cols = [ + "Sample_ID", + "Prediction_tool", + "Contig_ID", + "Product_class", + "BGC_probability", + "BGC_complete", + "BGC_start", + "BGC_end", + "BGC_length", + "CDS_ID", + "CDS_count", + "PFAM_domains", + "MIBiG_ID", + "InterPro_ID", + ] + deepbgc_unused_cols = [ + "detector_version", + "detector_label", + "bgc_candidate_id", + "num_domains", + "num_bio_domains", + "product_activity", + "antibacterial", + "cytotoxic", + "inhibitor", + "antifungal", + "Alkaloid", + "NRP", + "Other", + "Polyketide", + "RiPP", + "Saccharide", + "Terpene", + "bio_pfam_ids", + ] + + # Grab deepBGC sample ID + sample = os.path.basename(deepbgc_path).rsplit(".bgc", 1)[0] + + # Initiate dataframe + deepbgc_out = pd.DataFrame(columns=deepbgc_sum_cols) + + # Add relevant deepBGC output columns per BGC + deepbgc_df = ( + pd.read_csv(deepbgc_path, sep="\t") + .drop(deepbgc_unused_cols, axis=1) + .rename(columns=deepbgc_map_dict) + ) + deepbgc_df["Sample_ID"] = sample + deepbgc_df["Prediction_tool"] = "deepBGC" + deepbgc_df["BGC_complete"] = "NA" + deepbgc_df["MIBiG_ID"] = "NA" + deepbgc_df["InterPro_ID"] = "NA" + + # Concatenate data frame to out w/o common index column (e.g. sample_id) due to duplicate row names + deepbgc_out = pd.concat([deepbgc_out, deepbgc_df], ignore_index=True, sort=False) + + # Return data frame with ordered columns + deepbgc_out = deepbgc_out[deepbgc_sum_cols] + if verbose: + print("Done.") + return deepbgc_out + + +######################## +# GECCO FUNCTIONS +######################## + + +def getInterProID(gbk_path): + """ + Retrieve InterPro IDs from GECCO GBK file. + """ + + with open(gbk_path) as gbk: + ip_ids = [] + id_pattern = 'InterPro\:(.*)"' + + for line in gbk: + if line.find("InterPro:") != -1: + new_id = re.search(id_pattern, line).group(1) + ip_ids.append(new_id) + ipids_str = ";".join(map(str, ip_ids)) + return ipids_str + + +def gecco_workflow(gecco_paths): + """ + Create data frame with aggregated GECCO output. + """ + + if verbose: + print("\nParsing GECCO files\n... ", end="") + + # GECCO output columns that can be mapped (comBGC:GECCO) + map_dict = { + "sequence_id": "Contig_ID", + "bgc_id": "cluster_id", + "type": "Product_class", + "average_p": "BGC_probability", + "start": "BGC_start", + "end": "BGC_end", + "domains": "PFAM_domains", + "proteins": "CDS_ID", + } + summary_cols = [ + "Sample_ID", + "Prediction_tool", + "Contig_ID", + "Product_class", + "BGC_probability", + "BGC_complete", + "BGC_start", + "BGC_end", + "BGC_length", + "CDS_ID", + "CDS_count", + "PFAM_domains", + "MIBiG_ID", + "InterPro_ID", + ] + unused_cols = [ + "max_p", + "alkaloid_probability", + "polyketide_probability", + "ripp_probability", + "saccharide_probability", + "terpene_probability", + "nrp_probability", + ] + + tsv_path = "" + gbk_paths = [] + + for path in gecco_paths: + if path.endswith(".tsv"): + tsv_path = path + else: + gbk_paths.append(path) + + # Initiate dataframe + gecco_out = pd.DataFrame(columns=summary_cols) + + # Add sample information + sample = tsv_path.split("/")[-1].split(".")[0] + gecco_df = ( + pd.read_csv(tsv_path, sep="\t") + .drop(unused_cols, axis=1) + .rename(columns=map_dict) + ) + + # Fill columns (1 row per BGC) + gecco_df["Sample_ID"] = sample + gecco_df["BGC_length"] = gecco_df["BGC_end"] - gecco_df["BGC_start"] + gecco_df["CDS_count"] = [ + len(gecco_df["CDS_ID"].iloc[i].split(";")) for i in range(0, gecco_df.shape[0]) + ] # Number of contigs in 'Annotation_ID' + gecco_df["Prediction_tool"] = "GECCO" + + # Add column 'InterPro_ID' + for gbk_path in gbk_paths: + bgc_id = gbk_path.split("/")[-1][0:-4] + gecco_df.loc[gecco_df["cluster_id"] == bgc_id, "InterPro_ID"] = getInterProID( + gbk_path + ) + + # Add empty columns with no output from GECCO + gecco_df["BGC_complete"] = "NA" + gecco_df["MIBiG_ID"] = "NA" + gecco_out = pd.concat([gecco_out, gecco_df]) + + # Fill all empty cells with NA + for row in range(len(gecco_df["PFAM_domains"])): + if gecco_out["PFAM_domains"].isnull().values[row]: + gecco_out.loc[row, "PFAM_domains"] = "NA" + + # Return data frame with ordered columns + gecco_out = gecco_out[summary_cols] + + if verbose: + print("Done.") + + return gecco_out + + +######################## +# MAIN +######################## + +if __name__ == "__main__": + if input_antismash: + tools = { + "antiSMASH": input_antismash, + "deepBGC": input_deepbgc, + "GECCO": input_gecco, + } + elif dir_antismash: + tools = {"antiSMASH": dir_antismash} + else: + tools = {"deepBGC": input_deepbgc, "GECCO": input_gecco} + + tools_provided = {} + + for tool in tools.keys(): + if tools[tool]: + tools_provided[tool] = tools[tool] + + if verbose: + print(welcome) + print("\nYou provided input for: " + ", ".join(tools_provided.keys())) + + # Aggregate BGC information into data frame + summary_antismash = pd.DataFrame() + summary_deepbgc = pd.DataFrame() + summary_gecco = pd.DataFrame() + + for tool in tools_provided.keys(): + if tool == "antiSMASH": + if dir_antismash: + antismash_paths = prepare_multisample_input_antismash(dir_antismash) + for input_antismash in antismash_paths: + summary_antismash_temp = antismash_workflow(input_antismash) + summary_antismash = pd.concat( + [summary_antismash, summary_antismash_temp] + ) + else: + summary_antismash = antismash_workflow(input_antismash) + elif tool == "deepBGC": + summary_deepbgc = deepbgc_workflow(input_deepbgc) + elif tool == "GECCO": + summary_gecco = gecco_workflow(input_gecco) + + # Summarize and sort data frame + summary_all = pd.concat([summary_antismash, summary_deepbgc, summary_gecco]) + summary_all.sort_values( + by=["Sample_ID", "Contig_ID", "BGC_start", "BGC_length", "Prediction_tool"], + axis=0, + inplace=True, + ) + + # Rearrange and rename the columns in the summary df + summary_all = summary_all.iloc[:, [0, 2, 1] + list(range(3, len(summary_all.columns)))] + summary_all.rename(columns={'Sample_ID':'sample_id', 'Contig_ID':'contig_id', 'CDS_ID':'BGC_region_contig_ids'}, inplace=True) + + # Write results to TSV + if not os.path.exists(outdir): + os.makedirs(outdir) + summary_all.to_csv( + os.path.join(outdir, "combgc_summary.tsv"), sep="\t", index=False + ) + print("Your BGC summary file is: " + os.path.join(outdir, "combgc_summary.tsv")) diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py new file mode 100755 index 00000000..44eed31a --- /dev/null +++ b/bin/merge_taxonomy.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 + +# Written by Anan Ibrahim and released under the MIT license. +# See git repository (https://github.com/Darcy220606/AMPcombi) for full license text. +# Date: March 2024 +# Version: 0.1.0 + +# Required modules +import sys +import os +import pandas as pd +import numpy as np +import argparse + +tool_version = "0.1.0" +######################################### +# TOP LEVEL: AMPCOMBI +######################################### +parser = argparse.ArgumentParser(prog = 'merge_taxonomy', formatter_class=argparse.RawDescriptionHelpFormatter, + usage='%(prog)s [options]', + description=('''\ + ............................................................................. + *merge_taxonomy* + ............................................................................. + This script merges all three funcscan workflows with + MMseqs2 taxonomy results. This is done in three submodules that can be + activated seperately. + .............................................................................'''), + epilog='''Thank you for running taxonomy_merge!''', + add_help=True) +parser.add_argument('--version', action='version', version='merge_taxonomy ' + tool_version) + +######################################### +# SUBPARSERS +######################################### +subparsers = parser.add_subparsers(required=True) + +######################################### +# SUBPARSER: AMPCOMBI +######################################### +ampcombi_parser = subparsers.add_parser('ampcombi_taxa') + +ampcombi_parser.add_argument("--ampcombi", dest="amp", nargs='?', help="Enter the path to the ampcombi_complete_summary.tsv' \n (default: %(default)s)", + type=str, default='ampcombi_complete_summary.csv') +ampcombi_parser.add_argument("--taxonomy", dest="taxa1", nargs='+', help="Enter the list of taxonomy files for all samples. ") + +######################################### +# SUBPARSER: COMBGC +######################################### +combgc_parser = subparsers.add_parser('combgc_taxa') + +combgc_parser.add_argument("--combgc", dest="bgc", nargs='?', help="Enter the path to the combgc_complete_summary.tsv' \n (default: %(default)s)", + type=str, default='combgc_complete_summary.csv') +combgc_parser.add_argument("--taxonomy", dest="taxa2", nargs='+', help="Enter the list of taxonomy files for all samples. ") + +######################################### +# SUBPARSER: HAMRONIZATION +######################################### +hamronization_parser = subparsers.add_parser('hamronization_taxa') + +hamronization_parser.add_argument("--hamronization", dest="arg", nargs='?', help="Enter the path to the hamronization_complete_summary.tsv' \n (default: %(default)s)", + type=str, default='hamronization_complete_summary.csv') +hamronization_parser.add_argument("--taxonomy", dest="taxa3",nargs='+', help="Enter the list of taxonomy files for all samples. ") + +######################################### +# TAXONOMY +######################################### +def reformat_mmseqs_taxonomy(mmseqs_taxonomy): + mmseqs2_df = pd.read_csv(mmseqs_taxonomy, sep='\t', header=None, names=['contig_id', 'taxid', 'rank_label', 'scientific_name', 'lineage', 'mmseqs_lineage_contig']) + # remove the lineage column + mmseqs2_df.drop('lineage', axis=1, inplace=True) + mmseqs2_df['mmseqs_lineage_contig'].unique() + # convert any classification that has Eukaryota/root to NaN as funcscan targets bacteria ONLY ** + for i, row in mmseqs2_df.iterrows(): + lineage = str(row['mmseqs_lineage_contig']) + if 'Eukaryota' in lineage or 'root' in lineage: + mmseqs2_df.at[i, 'mmseqs_lineage_contig'] = np.nan + # insert the sample name in the first column according to the file basename + file_basename = os.path.basename(mmseqs_taxonomy) + filename = os.path.splitext(file_basename)[0] + mmseqs2_df.insert(0, 'sample_id', filename) + return mmseqs2_df + +######################################### +# FUNCTION: AMPCOMBI +######################################### +def ampcombi_taxa(args): + merged_df = pd.DataFrame() + + # assign input args to variables + ampcombi = args.amp + taxa_list = args.taxa1 + + # prepare the taxonomy files + taxa_df = pd.DataFrame() + # append the dfs to the taxonomy_files_combined + for file in taxa_list: # list of taxa files ['',''] + df = reformat_mmseqs_taxonomy(file) + taxa_df = pd.concat([taxa_df, df]) + + # filter the tool df + tool_df = pd.read_csv(ampcombi, sep='\t') + # remove the column with contig_id - duplicate #NOTE: will be fixed in AMPcombi v2.0.0 + tool_df = tool_df.drop('contig_id', axis=1) + # make sure 1st and 2nd column have the same column labels + tool_df.rename(columns={tool_df.columns[0]: 'sample_id'}, inplace=True) + tool_df.rename(columns={tool_df.columns[1]: 'contig_id'}, inplace=True) + # grab the real contig id in another column copy for merging + tool_df['contig_id_merge'] = tool_df['contig_id'].str.rsplit('_', 1).str[0] + + # merge rows from taxa to ampcombi_df based on substring match in sample_id + # grab the unique sample names from the taxonomy table + samples_taxa = taxa_df['sample_id'].unique() + # for every sampleID in taxadf merge the results + for sampleID in samples_taxa: + # subset ampcombi + subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)] + # subset taxa + subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)] + # merge + subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id_merge', right_on='contig_id', how='left') + # cleanup the table + columnsremove = ['contig_id_merge','contig_id_y', 'sample_id_y'] + subset_df.drop(columnsremove, axis=1, inplace=True) + subset_df.rename(columns={'contig_id_x': 'contig_id', 'sample_id_x':'sample_id'},inplace=True) + # append in the combined_df + merged_df = merged_df.append(subset_df, ignore_index=True) + + # write to file + merged_df.to_csv('ampcombi_complete_summary_taxonomy.tsv', sep='\t', index=False) + +######################################### +# FUNCTION: COMBGC +######################################### +def combgc_taxa(args): + merged_df = pd.DataFrame() + + # assign input args to variables + combgc = args.bgc + taxa_list = args.taxa2 + + # prepare the taxonomy files + taxa_df = pd.DataFrame() + # append the dfs to the taxonomy_files_combined + for file in taxa_list: # list of taxa files ['',''] + df = reformat_mmseqs_taxonomy(file) + taxa_df = pd.concat([taxa_df, df]) + + # filter the tool df + tool_df = pd.read_csv(combgc, sep='\t') + # make sure 1st and 2nd column have the same column labels + tool_df.rename(columns={tool_df.columns[0]: 'sample_id'}, inplace=True) + tool_df.rename(columns={tool_df.columns[1]: 'contig_id'}, inplace=True) + + # merge rows from taxa to ampcombi_df based on substring match in sample_id + # grab the unique sample names from the taxonomy table + samples_taxa = taxa_df['sample_id'].unique() + # for every sampleID in taxadf merge the results + for sampleID in samples_taxa: + # subset ampcombi + subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)] + # subset taxa + subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)] + # merge + subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id', right_on='contig_id', how='left') + # cleanup the table + columnsremove = ['sample_id_y'] + subset_df.drop(columnsremove, axis=1, inplace=True) + subset_df.rename(columns={'sample_id_x':'sample_id'},inplace=True) + # append in the combined_df + merged_df = merged_df.append(subset_df, ignore_index=True) + + # write to file + merged_df.to_csv('combgc_complete_summary_taxonomy.tsv', sep='\t', index=False) + +######################################### +# FUNCTION: HAMRONIZATION +######################################### +def hamronization_taxa(args): + merged_df = pd.DataFrame() + + # assign input args to variables + hamronization = args.arg + taxa_list = args.taxa3 + + # prepare the taxonomy files + taxa_df = pd.DataFrame() + # append the dfs to the taxonomy_files_combined + for file in taxa_list: # list of taxa files ['',''] + df = reformat_mmseqs_taxonomy(file) + taxa_df = pd.concat([taxa_df, df]) + + # filter the tool df + tool_df = pd.read_csv(hamronization, sep='\t') + # rename the columns + tool_df.rename(columns={'input_file_name':'sample_id', 'input_sequence_id':'contig_id'}, inplace=True) + # reorder the columns + new_order = ['sample_id', 'contig_id'] + [col for col in tool_df.columns if col not in ['sample_id', 'contig_id']] + tool_df = tool_df.reindex(columns=new_order) + # grab the real contig id in another column copy for merging + tool_df['contig_id_merge'] = tool_df['contig_id'].str.rsplit('_', 1).str[0] + + # merge rows from taxa to ampcombi_df based on substring match in sample_id + # grab the unique sample names from the taxonomy table + samples_taxa = taxa_df['sample_id'].unique() + # for every sampleID in taxadf merge the results + for sampleID in samples_taxa: + # subset ampcombi + subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)] + # subset taxa + subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)] + # merge + subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id_merge', right_on='contig_id', how='left') + # cleanup the table + columnsremove = ['contig_id_merge','contig_id_y', 'sample_id_y'] + subset_df.drop(columnsremove, axis=1, inplace=True) + subset_df.rename(columns={'contig_id_x': 'contig_id', 'sample_id_x':'sample_id'},inplace=True) + # append in the combined_df + merged_df = merged_df.append(subset_df, ignore_index=True) + + # write to file + merged_df.to_csv('hamronization_complete_summary_taxonomy.tsv', sep='\t', index=False) + +######################################### +# SUBPARSERS: DEFAULT +######################################### +ampcombi_parser.set_defaults(func=ampcombi_taxa) +combgc_parser.set_defaults(func=combgc_taxa) +hamronization_parser.set_defaults(func=hamronization_taxa) + +if __name__ == '__main__': + args = parser.parse_args() + args.func(args) # call the default function diff --git a/conf/base.config b/conf/base.config index d4119475..d6e451de 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,10 +10,9 @@ process { - // TODO nf-core: Check the defaults for all processes - cpus = { 1 * task.attempt } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 @@ -24,39 +23,211 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_single { - cpus = { 1 } + + withLabel: process_single { + cpus = { 1 } memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } + time = { 4.h * task.attempt } } - withLabel:process_low { - cpus = { 2 * task.attempt } + withLabel: process_low { + cpus = { 2 * task.attempt } memory = { 12.GB * task.attempt } - time = { 4.h * task.attempt } + time = { 4.h * task.attempt } } - withLabel:process_medium { - cpus = { 6 * task.attempt } + withLabel: process_medium { + cpus = { 6 * task.attempt } memory = { 36.GB * task.attempt } - time = { 8.h * task.attempt } + time = { 8.h * task.attempt } } - withLabel:process_high { - cpus = { 12 * task.attempt } + withLabel: process_high { + cpus = { 12 * task.attempt } memory = { 72.GB * task.attempt } - time = { 16.h * task.attempt } + time = { 16.h * task.attempt } } - withLabel:process_long { - time = { 20.h * task.attempt } + withLabel: process_long { + time = { 20.h * task.attempt } } - withLabel:process_high_memory { + withLabel: process_high_memory { memory = { 200.GB * task.attempt } } - withLabel:error_ignore { + withLabel: error_ignore { errorStrategy = 'ignore' } - withLabel:error_retry { + withLabel: error_retry { errorStrategy = 'retry' maxRetries = 2 } + + /* + * Resource specifications + */ + + withName: GUNZIP { + memory = { 2.GB * task.attempt } + cpus = 1 + } + + withName: UNTAR { + memory = { 2.GB * task.attempt } + cpus = 1 + } + + withName: PROKKA { + memory = { 8.GB * task.attempt } + cpus = { 4 * task.attempt } + time = { 8.h * task.attempt } + } + + withName: PRODIGAL_GBK { + memory = { 2.GB * task.attempt } + cpus = 1 + } + + withName: BAKTA_BAKTA { + memory = { 64.GB * task.attempt } + cpus = { 8 * task.attempt } + time = { 8.h * task.attempt } + } + + withName: ABRICATE_RUN { + memory = { 2.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: AMRFINDERPLUS_RUN { + memory = { 2.GB * task.attempt } + cpus = 1 + } + + withName: DEEPARG_DOWNLOADDATA { + memory = { 2.GB * task.attempt } + cpus = 1 + time = { 2.h * task.attempt } + } + + withName: DEEPARG_PREDICT { + memory = { 2.GB * task.attempt } + cpus = 1 + } + + withName: FARGENE { + memory = { 2.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: RGI_MAIN { + memory = { 28.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: AMPIR { + memory = { 8.GB * task.attempt } + cpus = 1 + } + + withName: AMPLIFY_PREDICT { + memory = { 16.GB * task.attempt } + cpus = 1 + time = { 24.h * task.attempt } + } + + withName: AMP_HMMER_HMMSEARCH { + memory = { 2.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: MACREL_CONTIGS { + memory = { 4.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: BGC_HMMER_HMMSEARCH { + memory = { 2.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: ANTISMASH_ANTISMASHLITE { + memory = { 64.GB * task.attempt } + cpus = { 8 * task.attempt } + time = { 12.h * task.attempt } + } + + withName: ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: DEEPBGC_DOWNLOAD { + memory = { 2.GB * task.attempt } + cpus = 1 + } + + withName: DEEPBGC_PIPELINE { + memory = { 2.GB * task.attempt } + cpus = 1 + time = { 24.h * task.attempt } + } + + withName: GECCO_RUN { + memory = { 16.GB * task.attempt } + cpus = { 4 * task.attempt } + } + + withName: HAMRONIZATION_ABRICATE { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: HAMRONIZATION_AMRFINDERPLUS { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: HAMRONIZATION_DEEPARG { + memory = { 8.GB * task.attempt } + cpus = 1 + } + + withName: HAMRONIZATION_RGI { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: HAMRONIZATION_FARGENE { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: HAMRONIZATION_SUMMARIZE { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: ARGNORM_DEEPARG { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: ARGNORM_ABRICATE { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: ARGNORM_AMRFINDERPLUS { + memory = { 4.GB * task.attempt } + cpus = 1 + } + + withName: AMPCOMBI2_PARSETABLES { + memory = { 8.GB * task.attempt } + time = { 2.h * task.attempt } + errorStrategy = { task.exitStatus == 1 ? 'retry' : 'finish' } + maxRetries = 2 // Retry the process up to 2 times + } + + withName: AMPCOMBI2_CLUSTER { + memory = { 6.GB * task.attempt } + time = { 2.h * task.attempt } + } } diff --git a/conf/modules.config b/conf/modules.config index d203d2b6..d4e473d2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,20 +15,671 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - withName: FASTQC { - ext.args = '--quiet' + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: GUNZIP { publishDir = [ - path: { "${params.outdir}/multiqc" }, + enabled: false + ] + } + + withName: MMSEQS_DATABASES { + publishDir = [ + path: { "${params.outdir}/databases/mmseqs/" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + params.taxa_classification_mmseqs_db_savetmp ? "" : "--remove-tmp-files" + ].join(' ').trim() + } + + withName: MMSEQS_CREATEDB { + publishDir = [ + path: { "${params.outdir}/databases/mmseqs/mmseqs_createdb/" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: MMSEQS_TAXONOMY { + publishDir = [ + path: { "${params.outdir}/databases/mmseqs/mmseqs_taxonomy/" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + params.taxa_classification_mmseqs_taxonomy_savetmp ? "" : "--remove-tmp-files", + "--search-type ${params.taxa_classification_mmseqs_taxonomy_searchtype}", + "--lca-ranks ${params.taxa_classification_mmseqs_taxonomy_lcaranks}", + "--tax-lineage ${params.taxa_classification_mmseqs_taxonomy_taxlineage}", + "-s ${params.taxa_classification_mmseqs_taxonomy_sensitivity}", + "--orf-filter-s ${params.taxa_classification_mmseqs_taxonomy_orffilters}", + "--lca-mode ${params.taxa_classification_mmseqs_taxonomy_lcamode}", + "--vote-mode ${params.taxa_classification_mmseqs_taxonomy_votemode}", + ].join(' ').trim() + } + + withName: MMSEQS_CREATETSV { + publishDir = [ + path: { "${params.outdir}/taxa_classification/mmseqs_createtsv/${meta.id}/" }, + mode: params.publish_dir_mode, + enabled: params.run_taxa_classification, + pattern: "*.tsv", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: SEQKIT_SEQ { + ext.prefix = { "${meta.id}_long" } + publishDir = [ + path: { "${params.outdir}/bgc/seqkit/" }, + mode: params.publish_dir_mode, + enabled: params.bgc_savefilteredcontigs, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--min-len ${params.bgc_mincontiglength}" + ].join(' ').trim() + } + + withName: PROKKA { + ext.prefix = { "${meta.id}_prokka" } + publishDir = [ + path: { "${params.outdir}/annotation/prokka/${meta.category}/" }, + mode: params.publish_dir_mode, + enabled: params.save_annotations, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--kingdom ${params.annotation_prokka_kingdom}", + "--gcode ${params.annotation_prokka_gcode}", + "--mincontiglen ${params.annotation_prokka_mincontiglen}", + "--evalue ${params.annotation_prokka_evalue}", + "--coverage ${params.annotation_prokka_coverage}", + params.annotation_prokka_retaincontigheaders ? "--force" : "--locustag PROKKA --centre CENTER", + params.annotation_prokka_singlemode ? '' : '--metagenome', + params.annotation_prokka_cdsrnaolap ? '--cdsrnaolap' : '', + params.annotation_prokka_rawproduct ? '--rawproduct' : '', + params.annotation_prokka_rnammer ? '--rnammer' : '', + params.annotation_prokka_compliant ? '--compliant' : '', + params.annotation_prokka_addgenes ? '--addgenes' : '', + ].join(' ').trim() + } + + withName: BAKTA_BAKTADBDOWNLOAD { + publishDir = [ + path: { "${params.outdir}/databases/bakta" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--type ${params.annotation_bakta_db_downloadtype}" + ].join(' ').trim() + } + + withName: BAKTA_BAKTA { + ext.prefix = { "${meta.id}_bakta" } + publishDir = [ + path: { "${params.outdir}/annotation/bakta/${meta.category}/" }, + mode: params.publish_dir_mode, + enabled: params.save_annotations, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + params.annotation_bakta_singlemode ? '' : '--meta', + "--min-contig-length ${params.annotation_bakta_mincontiglen}", + "--translation-table ${params.annotation_bakta_translationtable}", + "--gram ${params.annotation_bakta_gram}", + params.annotation_bakta_complete ? '--complete' : '', + params.annotation_bakta_renamecontigheaders ? '' : '--keep-contig-headers', + params.annotation_bakta_compliant ? '--compliant' : '', + params.annotation_bakta_trna ? '' : '--skip-trna', + params.annotation_bakta_tmrna ? '' : '--skip-tmrna', + params.annotation_bakta_rrna ? '' : '--skip-rrna', + params.annotation_bakta_ncrna ? '' : '--skip-ncrna', + params.annotation_bakta_ncrnaregion ? '' : '--skip-ncrna-region', + params.annotation_bakta_crispr ? '' : '--skip-crispr', + params.annotation_bakta_skipcds ? '--skip-cds' : '', + params.annotation_bakta_pseudo ? '' : '--skip-pseudo', + params.annotation_bakta_skipsorf ? '--skip-sorf' : '', + params.annotation_bakta_gap ? '' : '--skip-gap', + params.annotation_bakta_ori ? '' : '--skip-ori', + params.annotation_bakta_activate_plot ? '' : '--skip-plot', + ].join(' ').trim() + } + + withName: PRODIGAL { + publishDir = [ + path: { "${params.outdir}/annotation/prodigal/${meta.category}/" }, + mode: params.publish_dir_mode, + enabled: params.save_annotations, + pattern: "*.{faa,fna,gbk,faa.gz,faa.gz,fna.gz,gbk.gz}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + params.annotation_prodigal_singlemode ? "-p single" : "-p meta", + params.annotation_prodigal_closed ? "-c" : "", + params.annotation_prodigal_forcenonsd ? "-n" : "", + "-g ${params.annotation_prodigal_transtable}", + ].join(' ').trim() + } + + withName: PYRODIGAL { + ext.prefix = { "${meta.id}_pyrodigal" } + publishDir = [ + path: { "${params.outdir}/annotation/pyrodigal/${meta.category}/" }, + mode: params.publish_dir_mode, + enabled: params.save_annotations, + pattern: "*.{faa,fna,gbk,score}.gz", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + params.annotation_pyrodigal_singlemode ? "-p single" : "-p meta", + params.annotation_pyrodigal_closed ? "-c" : "", + params.annotation_pyrodigal_forcenonsd ? "-n" : "", + "-g ${params.annotation_pyrodigal_transtable}", + ].join(' ').trim() + } + + withName: ABRICATE_RUN { + publishDir = [ + path: { "${params.outdir}/arg/abricate/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--db ${params.arg_abricate_db_id}", + "--minid ${params.arg_abricate_minid}", + "--mincov ${params.arg_abricate_mincov}", + ].join(' ').trim() + } + + withName: AMRFINDERPLUS_UPDATE { + publishDir = [ + path: { "${params.outdir}/databases/amrfinderplus" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: AMRFINDERPLUS_RUN { + publishDir = [ + path: { "${params.outdir}/arg/amrfinderplus/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + ext.args = { + [ + "--ident_min ${params.arg_amrfinderplus_identmin}", + "--coverage_min ${params.arg_amrfinderplus_coveragemin}", + "--translation_table ${params.arg_amrfinderplus_translationtable}", + params.arg_amrfinderplus_plus ? '--plus' : '', + params.arg_amrfinderplus_name ? "--name ${meta.id}" : '', + ].join(' ').trim() + } } + withName: DEEPARG_DOWNLOADDATA { + publishDir = [ + path: { "${params.outdir}/databases/deeparg" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: DEEPARG_PREDICT { + publishDir = [ + path: { "${params.outdir}/arg/deeparg/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--type prot", + "--min-prob ${params.arg_deeparg_minprob}", + "--arg-alignment-identity ${params.arg_deeparg_alignmentidentity}", + "--arg-alignment-evalue ${params.arg_deeparg_alignmentevalue}", + "--arg-alignment-overlap ${params.arg_deeparg_alignmentoverlap}", + "--arg-num-alignments-per-entry ${params.arg_deeparg_numalignmentsperentry}", + ].join(' ').trim() + } + + withName: FARGENE { + tag = { "${meta.id}|${hmm_model}" } + publishDir = [ + [ + path: { "${params.outdir}/arg/fargene/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*.log", + ], + [ + path: { "${params.outdir}/arg/fargene/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*/results_summary.txt", + ], + [ + path: { "${params.outdir}/arg/fargene/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*/{hmmsearchresults,predictedGenes,retrievedFragments}/*", + ], + [ + path: { "${params.outdir}/arg/fargene/${meta.id}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*/{tmpdir}/*", + enabled: params.arg_fargene_savetmpfiles, + ], + ] + ext.prefix = { "${meta.hmm_class}" } + ext.args = { "--hmm-model ${params.arg_fargene_hmmmodel} --logfile ${meta.id}-${meta.hmm_class}.log --min-orf-length ${params.arg_fargene_minorflength} --score ${params.arg_fargene_score} --translation-format ${params.arg_fargene_translationformat}" } + ext.args = params.arg_fargene_orffinder ? '--orf-finder' : '' + } + + withName: UNTAR_CARD { + ext.prefix = "card_database" + publishDir = [ + [ + path: { "${params.outdir}/databases/rgi" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ] + } + + withName: RGI_CARDANNOTATION { + publishDir = [ + [ + path: { "${params.outdir}/databases/rgi" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ] + } + + withName: RGI_MAIN { + publishDir = [ + [ + path: { "${params.outdir}/arg/rgi/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*.{txt}", + ], + [ + path: { "${params.outdir}/arg/rgi/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*.{json}", + enabled: params.arg_rgi_savejson, + ], + [ + path: { "${params.outdir}/arg/rgi/${meta.id}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*temp*", + enabled: params.arg_rgi_savetmpfiles, + ], + ] + ext.args2 = [ + "--alignment_tool ${params.arg_rgi_alignmenttool}", + "--data ${params.arg_rgi_data}", + params.arg_rgi_includeloose ? '--include_loose' : '', + params.arg_rgi_includenudge ? '--include_nudge' : '', + params.arg_rgi_lowquality ? '--low_quality' : '', + params.arg_rgi_split_prodigal_jobs ? '--split_prodigal_jobs' : '', + ].join(' ').trim() + } + + withName: AMPIR { + ext.prefix = { "${meta.id}.ampir" } + publishDir = [ + path: { "${params.outdir}/amp/ampir/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: AMPLIFY_PREDICT { + ext.prefix = { "${meta.id}.amplify" } + publishDir = [ + path: { "${params.outdir}/amp/amplify/${meta.id}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: AMP_HMMER_HMMSEARCH { + label = { "${meta.id}_${meta.hmm_id}" } + publishDir = [ + path: { "${params.outdir}/amp/hmmer_hmmsearch/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${meta.id}_${meta.hmm_id}.hmmer_hmmsearch" } + } + + withName: MACREL_CONTIGS { + ext.prefix = { "${meta.id}.macrel" } + publishDir = [ + path: { "${params.outdir}/amp/macrel" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = "--keep-negatives" + } + + withName: BGC_HMMER_HMMSEARCH { + label = { "${meta.id}_${meta.hmm_id}" } + publishDir = [ + path: { "${params.outdir}/bgc/hmmer_hmmsearch/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${meta.id}_${meta.hmm_id}" } + } + + withName: ANTISMASH_ANTISMASHLITE { + publishDir = [ + path: { "${params.outdir}/bgc/antismash" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + params.bgc_antismash_cbgeneral ? '--cb-general' : '', + params.bgc_antismash_cbknownclusters ? '--cb-knownclusters' : '', + params.bgc_antismash_smcogtrees ? '--smcog-trees' : '', + params.bgc_antismash_ccmibig ? '--cc-mibig' : '', + params.bgc_antismash_cbsubclusters ? '--cb-subclusters' : '', + params.bgc_antismash_pfam2go ? '--pfam2go' : '', + params.bgc_antismash_rre ? '--rre' : '', + params.bgc_antismash_tfbs ? '--tfbs' : '', + "--allow-long-headers", + "--minlength ${params.bgc_antismash_contigminlength}", + "--hmmdetection-strictness ${params.bgc_antismash_hmmdetectionstrictness}", + "--taxon ${params.bgc_antismash_taxon}", + ].join(' ').trim() + } + + withName: ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { + publishDir = [ + path: { "${params.outdir}/databases/antismash" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: DEEPBGC_DOWNLOAD { + publishDir = [ + path: { "${params.outdir}/databases/deepbgc" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: DEEPBGC_PIPELINE { + publishDir = [ + path: { "${params.outdir}/bgc/deepbgc/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--score ${params.bgc_deepbgc_score}", + params.bgc_deepbgc_prodigalsinglemode ? '' : '--prodigal-meta-mode', + "--merge-max-protein-gap ${params.bgc_deepbgc_mergemaxproteingap}", + "--merge-max-nucl-gap ${params.bgc_deepbgc_mergemaxnuclgap}", + "--min-nucl ${params.bgc_deepbgc_minnucl}", + "--min-proteins ${params.bgc_deepbgc_minproteins}", + "--min-domains ${params.bgc_deepbgc_mindomains}", + "--min-bio-domains ${params.bgc_deepbgc_minbiodomains}", + "--classifier-score ${params.bgc_deepbgc_classifierscore}", + ].join(' ').trim() + } + + withName: GECCO_RUN { + publishDir = [ + path: { "${params.outdir}/bgc/gecco/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--cds ${params.bgc_gecco_cds}", + "--threshold ${params.bgc_gecco_threshold}", + "--p-filter ${params.bgc_gecco_pfilter}", + "--edge-distance ${params.bgc_gecco_edgedistance}", + params.bgc_gecco_mask ? '--mask' : '', + ].join(' ').trim() + } + + withName: HAMRONIZATION_ABRICATE { + publishDir = [ + path: { "${params.outdir}/arg/hamronization/abricate" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${report}.abricate" } + } + + withName: HAMRONIZATION_AMRFINDERPLUS { + publishDir = [ + path: { "${params.outdir}/arg/hamronization/amrfinderplus" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${report}.amrfinderplus" } + } + + withName: HAMRONIZATION_DEEPARG { + publishDir = [ + path: { "${params.outdir}/arg/hamronization/deeparg" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${report}.deeparg" } + } + + withName: HAMRONIZATION_RGI { + publishDir = [ + path: { "${params.outdir}/arg/hamronization/rgi" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${report}.rgi" } + } + + withName: HAMRONIZATION_FARGENE { + publishDir = [ + path: { "${params.outdir}/arg/hamronization/fargene" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${meta.id}_${report}.fargene" } + } + + withName: HAMRONIZATION_SUMMARIZE { + publishDir = [ + path: { "${params.outdir}/reports/hamronization_summarize" }, + mode: params.publish_dir_mode, + saveAs: { params.run_taxa_classification == false ? it : null }, + ] + } + + withName: MERGE_TAXONOMY_HAMRONIZATION { + publishDir = [ + path: { "${params.outdir}/reports/hamronization_summarize" }, + mode: params.publish_dir_mode, + saveAs: { _ -> null }, + ] + } + + withName: ARG_TABIX_BGZIP { + publishDir = [ + path: { "${params.outdir}/reports/hamronization_summarize" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: AMPCOMBI2_PARSETABLES { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--aminoacid_length ${params.amp_ampcombi_parsetables_aalength}", + "--db_evalue ${params.amp_ampcombi_parsetables_dbevalue}", + "--amp_cutoff ${params.amp_ampcombi_parsetables_cutoff}", + "--ampir_file ${params.amp_ampcombi_parsetables_ampir}", + "--amplify_file ${params.amp_ampcombi_parsetables_amplify}", + "--macrel_file ${params.amp_ampcombi_parsetables_macrel}", + "--hmmsearch_file ${params.amp_ampcombi_parsetables_hmmsearch}", + "--ampgram_file '.tsv' --amptransformer_file '.txt' --neubi_file '.fasta' --log TRUE", + "--hmm_evalue ${params.amp_ampcombi_parsetables_hmmevalue}", + "--window_size_stop_codon ${params.amp_ampcombi_parsetables_windowstopcodon}", + "--window_size_transporter ${params.amp_ampcombi_parsetables_windowtransport}", + params.amp_ampcombi_parsetables_removehitswostopcodons ? '--remove_stop_codons' : '', + ].join(' ').trim() + ext.prefix = { "${meta.id}" } + } + withName: AMPCOMBI2_COMPLETE { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { + return filename + } + else { + return !params.run_taxa_classification ? filename : null + } + }, + ] + ext.args = "--log TRUE" + } + + withName: AMPCOMBI2_CLUSTER { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { + return filename + } + else { + return !params.run_taxa_classification ? filename : null + } + }, + ] + ext.args = [ + "--cluster_cov_mode ${params.amp_ampcombi_cluster_covmode}", + "--cluster_mode ${params.amp_ampcombi_cluster_mode}", + "--cluster_coverage ${params.amp_ampcombi_cluster_coverage}", + "--cluster_seq_id ${params.amp_ampcombi_cluster_seqid}", + "--cluster_sensitivity ${params.amp_ampcombi_cluster_sensitivity}", + "--cluster_min_member ${params.amp_ampcombi_cluster_minmembers}", + "--log TRUE", + params.amp_ampcombi_cluster_removesingletons ? '--cluster_remove_singletons' : '', + ].join(' ').trim() + } + + withName: MERGE_TAXONOMY_AMPCOMBI { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { _ -> null }, + ] + } + + withName: AMP_TABIX_BGZIP { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: COMBGC { + publishDir = [ + path: { "${params.outdir}/reports/combgc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: ARGNORM_ABRICATE { + publishDir = [ + path: { "${params.outdir}/arg/argnorm/abricate/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${meta.id}.normalized.tsv" } + ext.args = "--hamronized" + } + + withName: ARGNORM_AMRFINDERPLUS { + publishDir = [ + path: { "${params.outdir}/arg/argnorm/amrfinderplus/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { "${meta.id}.normalized.tsv" } + ext.args = "--hamronized" + } + + withName: ARGNORM_DEEPARG { + publishDir = [ + path: { "${params.outdir}/arg/argnorm/deeparg/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.prefix = { input_tsv.toString().endsWith(".potential.ARG.deeparg.tsv") ? "${meta.id}.potential.ARG.normalized.tsv" : "${meta.id}.ARG.normalized.tsv" } + ext.args = "--hamronized" + } + + withName: MERGE_TAXONOMY_COMBGC { + publishDir = [ + path: { "${params.outdir}/reports/combgc" }, + mode: params.publish_dir_mode, + saveAs: { _ -> null }, + ] + } + + withName: BGC_TABIX_BGZIP { + publishDir = [ + path: { "${params.outdir}/reports/combgc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: AMP_DATABASE_DOWNLOAD { + publishDir = [ + path: { "${params.outdir}/databases/${params.amp_ampcombi_db}" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } } diff --git a/conf/test.config b/conf/test.config index a55b8e32..61ad1c4d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -19,14 +19,18 @@ process { } params { - config_profile_name = 'Test profile' + config_profile_name = 'AMP/ARG Pyrodigal test profile' config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' - // Genome references - genome = 'R64-1-1' + annotation_tool = 'pyrodigal' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' } diff --git a/conf/test_bakta.config b/conf/test_bakta.config new file mode 100644 index 00000000..4cd2dacb --- /dev/null +++ b/conf/test_bakta.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bakta, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'AMP/ARG Bakta test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'bakta' + annotation_bakta_db_downloadtype = 'light' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' +} diff --git a/conf/test_bgc_bakta.config b/conf/test_bgc_bakta.config new file mode 100644 index 00000000..fba6c3ea --- /dev/null +++ b/conf/test_bgc_bakta.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bgc_bakta, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'BGC Bakta test profile' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'bakta' + annotation_bakta_db_downloadtype = "light" + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_bgc_prokka.config b/conf/test_bgc_prokka.config new file mode 100644 index 00000000..ece6902b --- /dev/null +++ b/conf/test_bgc_prokka.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bgc_prokka, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'BGC Prokka test profile' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'prokka' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_bgc_pyrodigal.config b/conf/test_bgc_pyrodigal.config new file mode 100644 index 00000000..da83cbd6 --- /dev/null +++ b/conf/test_bgc_pyrodigal.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bgc_pyrodigal, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'BGC Pyrodigal test profile' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'pyrodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_full.config b/conf/test_full.config index b919e3e2..9d16fb18 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -11,14 +11,38 @@ */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_full.csv' - // Genome references - genome = 'R64-1-1' + // Database and annotation options + save_annotations = true + + // AMP params + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + amp_hmmsearch_savealignments = true + amp_hmmsearch_savedomains = true + amp_hmmsearch_savetargets = true + amp_skip_amplify = true // takes too long + + // ARG params + run_arg_screening = true + arg_skip_deeparg = false + + // BGC params + run_bgc_screening = true + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + bgc_hmmsearch_savealignments = true + bgc_hmmsearch_savetargets = true + bgc_hmmsearch_savedomains = true + bgc_skip_deepbgc = true // takes too long + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 } diff --git a/conf/test_nothing.config b/conf/test_nothing.config new file mode 100644 index 00000000..91640a32 --- /dev/null +++ b/conf/test_nothing.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Although in this case we turn everything off + + Use as follows: + nextflow run nf-core/funcscan -profile test_nothing, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test nothing profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + + annotation_tool = 'pyrodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = false + + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + amp_skip_amplify = true + amp_skip_macrel = true + amp_skip_ampir = true + amp_run_hmmsearch = false + + arg_skip_deeparg = true + arg_skip_fargene = true + arg_skip_rgi = true + arg_skip_amrfinderplus = true + arg_skip_deeparg = true + arg_skip_abricate = true + + bgc_skip_antismash = true + bgc_skip_deepbgc = true + bgc_skip_gecco = true + bgc_run_hmmsearch = false +} diff --git a/conf/test_preannotated.config b/conf/test_preannotated.config new file mode 100644 index 00000000..764304e2 --- /dev/null +++ b/conf/test_preannotated.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_preannotated, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'ARG/AMP test profile - preannotated input' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_preannotated.csv' + + annotation_tool = 'pyrodigal' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' +} diff --git a/conf/test_preannotated_bgc.config b/conf/test_preannotated_bgc.config new file mode 100644 index 00000000..70d5d1d5 --- /dev/null +++ b/conf/test_preannotated_bgc.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_preannotated_bgc, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'BGC test profile - preannotated input' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_preannotated.csv' + + annotation_tool = 'pyrodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_prokka.config b/conf/test_prokka.config new file mode 100644 index 00000000..fd576b81 --- /dev/null +++ b/conf/test_prokka.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_prokka, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'AMP/ARG Prokka test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'prokka' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' +} diff --git a/conf/test_taxonomy_bakta.config b/conf/test_taxonomy_bakta.config new file mode 100644 index 00000000..6763c48e --- /dev/null +++ b/conf/test_taxonomy_bakta.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_taxonomy_bakta, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} + +params { + config_profile_name = 'Taxonomic classification test profile' + config_profile_description = 'Minimal test dataset to check taxonomic classification workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + + run_taxa_classification = true + annotation_tool = 'bakta' + annotation_bakta_db_downloadtype = 'light' + + run_arg_screening = true + arg_skip_deeparg = true + arg_skip_amrfinderplus = true + + run_amp_screening = true + amp_run_hmmsearch = true + + run_bgc_screening = true + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 + bgc_run_hmmsearch = true +} diff --git a/conf/test_taxonomy_prokka.config b/conf/test_taxonomy_prokka.config new file mode 100644 index 00000000..e126624f --- /dev/null +++ b/conf/test_taxonomy_prokka.config @@ -0,0 +1,49 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_taxonomy_prokka, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} + +params { + config_profile_name = 'Taxonomic classification test profile' + config_profile_description = 'Minimal test dataset to check taxonomic classification workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + + run_taxa_classification = true + annotation_tool = 'prokka' + + run_arg_screening = true + arg_skip_deeparg = true + arg_skip_amrfinderplus = true + + run_amp_screening = true + amp_run_hmmsearch = true + + run_bgc_screening = true + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 + bgc_run_hmmsearch = true +} diff --git a/conf/test_taxonomy_pyrodigal.config b/conf/test_taxonomy_pyrodigal.config new file mode 100644 index 00000000..cbe89dc3 --- /dev/null +++ b/conf/test_taxonomy_pyrodigal.config @@ -0,0 +1,49 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_taxonomy_pyrodigal, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} + +params { + config_profile_name = 'Taxonomic classification test profile' + config_profile_description = 'Minimal test dataset to check taxonomic classification workflow function' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + + run_taxa_classification = true + annotation_tool = 'pyrodigal' + + run_arg_screening = true + arg_skip_deeparg = true + arg_skip_amrfinderplus = true + + run_amp_screening = true + amp_run_hmmsearch = true + + run_bgc_screening = true + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 + bgc_run_hmmsearch = true +} diff --git a/docs/images/funcscan_icon.png b/docs/images/funcscan_icon.png new file mode 100644 index 00000000..7dcfbf24 Binary files /dev/null and b/docs/images/funcscan_icon.png differ diff --git a/docs/images/funcscan_icon.svg b/docs/images/funcscan_icon.svg new file mode 100644 index 00000000..cc97af76 --- /dev/null +++ b/docs/images/funcscan_icon.svg @@ -0,0 +1,365 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/funcscan_metro_workflow.png b/docs/images/funcscan_metro_workflow.png new file mode 100644 index 00000000..7fda2756 Binary files /dev/null and b/docs/images/funcscan_metro_workflow.png differ diff --git a/docs/images/funcscan_metro_workflow.svg b/docs/images/funcscan_metro_workflow.svg new file mode 100644 index 00000000..c9d291fb --- /dev/null +++ b/docs/images/funcscan_metro_workflow.svg @@ -0,0 +1,2997 @@ + + + +vcfvcftsvhAMRonizationABRicateAMRFinderPlusDeepARGfARGeneRGIgunzipMMseqs2SeqKitProkkaProdigalPyrodigalBaktaAMPcombicomBGCSummaryantiSMASHGECCODeepBGChmmsearchhmmsearchampirAMPlifyMacrelfuncscanv2.0vcfvcffastavcfvcfgbkvcfvcffaa()vcfvcftsvargNormLEGENDAntimicrobial Peptide Genes (AMPs)Biosynthetic Gene Clusters (BGCs)Antibiotic Resistance Genes (ARGs)Taxonomic ClassificationScreening ToolsPreprocessing ToolsPostprocessing ToolsOptional Input() diff --git a/docs/images/funcscan_metro_workflow_dark.png b/docs/images/funcscan_metro_workflow_dark.png new file mode 100644 index 00000000..7656d595 Binary files /dev/null and b/docs/images/funcscan_metro_workflow_dark.png differ diff --git a/docs/images/funcscan_metro_workflow_dark.svg b/docs/images/funcscan_metro_workflow_dark.svg new file mode 100644 index 00000000..bb30861e --- /dev/null +++ b/docs/images/funcscan_metro_workflow_dark.svg @@ -0,0 +1,3223 @@ + + + +vcfvcftsvhAMRonizationABRicateAMRFinderPlusDeepARGfARGeneRGIgunzipMMseqs2SeqKitProkkaProdigalPyrodigalBaktaAMPcombicomBGCSummaryantiSMASHGECCODeepBGChmmsearchhmmsearchampirAMPlifyMacrelfuncscanv2.0vcfvcffastavcfvcfgbkvcfvcffaa()vcfvcftsvargNormLEGENDAntimicrobial Peptide Genes (AMPs)Biosynthetic Gene Clusters (BGCs)Antibiotic Resistance Genes (ARGs)Taxonomic ClassificationScreening ToolsPreprocessing ToolsPostprocessing ToolsOptional Input() diff --git a/docs/images/nf-core-funcscan_logo_flat_dark.png b/docs/images/nf-core-funcscan_logo_flat_dark.png new file mode 100644 index 00000000..540d3b77 Binary files /dev/null and b/docs/images/nf-core-funcscan_logo_flat_dark.png differ diff --git a/docs/images/nf-core-funcscan_logo_flat_dark.svg b/docs/images/nf-core-funcscan_logo_flat_dark.svg new file mode 100644 index 00000000..8491e14d --- /dev/null +++ b/docs/images/nf-core-funcscan_logo_flat_dark.svg @@ -0,0 +1,457 @@ + + diff --git a/docs/images/nf-core-funcscan_logo_flat_light.png b/docs/images/nf-core-funcscan_logo_flat_light.png new file mode 100644 index 00000000..7e053148 Binary files /dev/null and b/docs/images/nf-core-funcscan_logo_flat_light.png differ diff --git a/docs/images/nf-core-funcscan_logo_flat_light.svg b/docs/images/nf-core-funcscan_logo_flat_light.svg new file mode 100644 index 00000000..fd4e0f83 --- /dev/null +++ b/docs/images/nf-core-funcscan_logo_flat_light.svg @@ -0,0 +1,470 @@ + + diff --git a/docs/output.md b/docs/output.md index 875a444a..0920236c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,50 +2,652 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +The output of nf-core/funcscan provides reports for each of the functional groups: -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +- **antibiotic resistance genes** (tools: [ABRicate](https://github.com/tseemann/abricate), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), [fARGene](https://github.com/fannyhb/fargene), [RGI](https://card.mcmaster.ca/analyze/rgi) – summarised by [hAMRonization](https://github.com/pha4ge/hAMRonization). Results from ABRicate, AMRFinderPlus, and DeepARG are normalised to [ARO](https://obofoundry.org/ontology/aro.html) by [argNorm](https://github.com/BigDataBiology/argNorm).) +- **antimicrobial peptides** (tools: [Macrel](https://github.com/BigDataBiology/macrel), [AMPlify](https://github.com/bcgsc/AMPlify), [ampir](https://ampir.marine-omics.net), [hmmsearch](http://hmmer.org) – summarised by [AMPcombi](https://github.com/Darcy220606/AMPcombi)) +- **biosynthetic gene clusters** (tools: [antiSMASH](https://docs.antismash.secondarymetabolites.org), [DeepBGC](https://github.com/Merck/deepbgc), [GECCO](https://gecco.embl.de), [hmmsearch](http://hmmer.org) – summarised by [comBGC](#combgc)) - +As a general workflow, we recommend to first look at the summary reports ([ARGs](#hamronization), [AMPs](#ampcombi), [BGCs](#combgc)), to get a general overview of what hits have been found across all the tools of each functional group. After which, you can explore the specific output directories of each tool to get more detailed information about each result. The tool-specific output directories also includes the output from the functional annotation steps of either [prokka](https://github.com/tseemann/prokka), [pyrodigal](https://github.com/althonos/pyrodigal), [prodigal](https://github.com/hyattpd/Prodigal), or [Bakta](https://github.com/oschwengers/bakta) if the `--save_annotations` flag was set. Additionally, taxonomic classifications from [MMseqs2](https://github.com/soedinglab/MMseqs2) are saved if the `--taxa_classification_mmseqs_db_savetmp` and `--taxa_classification_mmseqs_taxonomy_savetmp` flags are set. + +Similarly, all downloaded databases are saved (i.e. from [MMseqs2](https://github.com/soedinglab/MMseqs2), [antiSMASH](https://docs.antismash.secondarymetabolites.org), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [Bakta](https://github.com/oschwengers/bakta), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), [RGI](https://github.com/arpcard/rgi), and/or [AMPcombi](https://github.com/Darcy220606/AMPcombi)) into the output directory `/databases/` if the `--save_db` flag was set. + +Furthermore, for reproducibility, versions of all software used in the run is presented in a [MultiQC](http://multiqc.info) report. + +The directories listed below will be created in the results directory (specified by the `--outdir` flag) after the pipeline has finished. All paths are relative to this top-level output directory. The default directory structure of nf-core/funcscan is: + +```tree +results/ +├── taxonomic_classification/ +| └── mmseqs_createtsv/ +├── annotation/ +| ├── bakta/ +| ├── prodigal/ +| ├── prokka/ +| └── pyrodigal/ +├── amp/ +| ├── ampir/ +| ├── amplify/ +| ├── hmmsearch/ +| └── macrel/ +├── arg/ +| ├── abricate/ +| ├── amrfinderplus/ +| ├── deeparg/ +| ├── fargene/ +| ├── rgi/ +| ├── hamronization/ +| └── argnorm/ +├── bgc/ +| ├── antismash/ +| ├── deepbgc/ +| ├── gecco/ +| └── hmmsearch/ +├── qc/ +| └── seqkit/ +├── reports/ +| ├── ampcombi/ +| ├── combgc/ +| └── hamronization_summarize/ +├── databases/ +├── multiqc/ +└── pipeline_info/ +work/ +``` ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes prokaryotic sequence data through the following steps: + +Input contig QC with: + +- [SeqKit](https://bioinf.shenwei.me/seqkit/) (default) - for separating into long- and short- categories + +Taxonomy classification of nucleotide sequences with: + +- [MMseqs2](https://github.com/soedinglab/MMseqs2) (default) - for contig taxonomic classification using 2bLCA. + +ORF prediction and annotation with any of: + +- [Pyrodigal](#pyrodigal) (default) – for open reading frame prediction. +- [Prodigal](#prodigal) – for open reading frame prediction. +- [Prokka](#prokka) – open reading frame prediction and functional protein annotation. +- [Bakta](#bakta) – open reading frame prediction and functional protein annotation. + +Antimicrobial Resistance Genes (ARGs): + +- [ABRicate](#abricate) – antimicrobial resistance gene detection, based on alignment to one of several databases. +- [AMRFinderPlus](#amrfinderplus) – antimicrobial resistance gene detection, using NCBI’s curated Reference Gene Database and curated collection of Hidden Markov Models. +- [DeepARG](#deeparg) – antimicrobial resistance gene detection, using a deep learning model. +- [fARGene](#fargene) – antimicrobial resistance gene detection, using Hidden Markov Models. +- [RGI](#rgi) – antimicrobial resistance gene detection, based on alignment to the CARD database. + +Antimicrobial Peptides (AMPs): + +- [ampir](#ampir) – antimicrobial peptide detection, based on a supervised statistical machine learning approach. +- [amplify](#amplify) – antimicrobial peptide detection, using a deep learning model. +- [hmmsearch](#hmmsearch) – antimicrobial peptide detection, based on hidden Markov models. +- [Macrel](#macrel) – antimicrobial peptide detection, using a machine learning approach. -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +Biosynthetic Gene Clusters (BGCs): -### FastQC +- [antiSMASH](#antismash) – biosynthetic gene cluster detection. +- [deepBGC](#deepbgc) - biosynthetic gene cluster detection, using a deep learning model. +- [GECCO](#gecco) – biosynthetic gene cluster detection, using Conditional Random Fields (CRFs). +- [hmmsearch](#hmmsearch) – biosynthetic gene cluster detection, based on hidden Markov models. + +Output Summaries: + +- [AMPcombi](#ampcombi) – summary report of antimicrobial peptide gene output from various detection tools. +- [hAMRonization](#hamronization) – summary of antimicrobial resistance gene output from various detection tools. +- [argNorm](#argNorm) - Normalize ARG annotations from [ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), and [DeepARG](#deeparg) to the ARO +- [comBGC](#combgc) – summary of biosynthetic gene cluster output from various detection tools. +- [MultiQC](#multiqc) – report of all software and versions used in the pipeline. +- [Pipeline information](#pipeline-information) – report metrics generated during the workflow execution. + +## Tool details + +### Taxonomic classification tools + +[MMseqs2](#mmseqs2) + +#### MMseqs2
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `taxonomic_classification/mmseqs2_createtsv/` + - `/`: + - `*.tsv`: tab-separated table containing the taxonomic lineage of every contig. When a contig cannot be classified according to the database, it is assigned in the 'lineage' column as 'no rank | unclassified'. +- `reports//_complete_summary_taxonomy.tsv.gz`: tab-separated table containing the concatenated results from the summary tables along with the taxonomic classification if the parameter `--run_taxa_classification` is called. +
+ +[MMseqs2](https://github.com/soedinglab/MMseqs2) classifies the taxonomic lineage of contigs based on the last common ancestor. The inferred taxonomic lineages are included in the final workflow summaries to annotate the potential source bacteria of the identified AMPs, ARGs, and/or BGCs. + +### Annotation tools + +[Pyrodigal](#pyrodigal), [Prodigal](#prodigal), [Prokka](#prokka), [Bakta](#bakta) + +#### Prodigal + +
+Output files + +- `prodigal/` + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC subworkflow only) + - `/`: + - `*.fna`: nucleotide FASTA file of the input contig sequences + - `*.faa`: protein FASTA file of the translated CDS sequences + - `*.gbk`: annotation in GBK format, containing both sequences and annotations + +> Descriptions taken from the [Prodigal documentation](https://github.com/hyattpd/prodigal/wiki)
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +[Prodigal](https://github.com/hyattpd/Prodigal) annotates whole (meta-)genomes by identifying ORFs in a set of genomic DNA sequences. The output is used by some of the functional screening tools. -### MultiQC +#### Pyrodigal
Output files -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `pyrodigal/` + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC subworkflow only) + - `/`: + - `*.gbk`: annotation in GBK format, containing both sequences and annotations + - `*.fna`: nucleotide FASTA file of the annotated CDS sequences + - `*.faa`: protein FASTA file of the translated CDS sequences + +> Descriptions taken from the [Pyrodigal documentation](https://pyrodigal.readthedocs.io/) + +
+ +[Pyrodigal](https://github.com/althonos/pyrodigal) annotates whole (meta-)genomes by identifying ORFs in a set of genomic DNA sequences. It produces the same results as [Prodigal](#prodigal) while being more resource-optimized, thus faster. Unlike Prodigal, Pyrodigal cannot produce output in GenBank format. The output is used by some of the functional screening tools. + +#### Prokka + +
+Output files + +- `prokka/` + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC subworkflow only) + - `/` + - `*.gff`: annotation in GFF3 format, containing both sequences and annotations + - `*.gbk`: standard Genbank file derived from the master .gff + - `*.fna`: nucleotide FASTA file of the input contig sequences + - `*.faa`: protein FASTA file of the translated CDS sequences + - `*.ffn`: nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA) + - `*.sqn`: an ASN1 format "Sequin" file for submission to Genbank + - `*.fsa`: nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file + - `*.tbl`: feature Table file, used by "tbl2asn" to create the .sqn file + - `*.err`: unacceptable annotations - the NCBI discrepancy report + - `*.log`: logging output that Prokka produced during its run + - `*.txt`: statistics relating to the annotated features found + - `*.tsv`: tab-separated file of all features + +> Descriptions directly from the [Prokka documentation](https://github.com/tseemann/prokka#output-files)
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +[Prokka](https://github.com/tseemann/prokka) performs whole genome annotation to identify features of interest in a set of (meta-)genomic DNA sequences. The output is used by some of the functional screening tools. + +#### Bakta + +
+Output files + +- `bakta/` + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC only) + - `` + - `.gff3`: annotations & sequences in GFF3 format + - `.gbff`: annotations & sequences in (multi) GenBank format + - `.ffn`: feature nucleotide sequences as FASTA + - `.fna`: replicon/contig DNA sequences as FASTA + - `.embl`: annotations & sequences in (multi) EMBL format + - `.faa`: CDS/sORF amino acid sequences as FASTA + - `_hypothetical.faa`: further information on hypothetical protein CDS as simple human readble tab separated values + - `_hypothetical.tsv`: hypothetical protein CDS amino acid sequences as FASTA + - `.tsv`: annotations as simple human readble TSV + - `.txt`: summary in TXT format + +> Descriptions taken from the [Bakta documentation](https://github.com/oschwengers/bakta#output). + +
+ +[Bakta](https://github.com/oschwengers/bakta) is a tool for the rapid & standardised annotation of bacterial genomes and plasmids from both isolates and MAGs. It provides dbxref-rich, sORF-including and taxon-independent annotations in machine-readable JSON & bioinformatics standard file formats for automated downstream analysis. The output is used by some of the functional screening tools. + +### AMP detection tools + +[ampir](#ampir), [AMPlify](#amplify), [hmmsearch](#hmmsearch), [Macrel](#macrel) + +#### ampir + +
+Output files + +- `ampir/` + - `.ampir.faa`: predicted AMP sequences in FAA format + - `.ampir.tsv`: predicted AMP metadata in TSV format, contains contig name, sequence and probability score + +
+ +[ampir](https://github.com/Legana/ampir) (**a**nti**m**icrobial **p**eptide **p**rediction **i**n **r**) was designed to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, “precursor” and “mature” that have been trained on publicly available antimicrobial peptide data. + +#### AMPlify + +
+Output files + +- `amplify/` + - `*_results.tsv`: table of contig amino-acid sequences with prediction result (AMP or non-AMP) and information on sequence length, charge, probability score, AMPlify log-scaled score) + +
+ +[AMPlify](https://github.com/bcgsc/AMPlify) is an attentive deep learning model for antimicrobial peptide prediction. It takes in contig annotations (as protein sequences) and classifies them as either AMP or non-AMP. + +#### hmmsearch + +
+Output files + +- `hmmersearch/` + - `*.txt.gz`: human readable output summarizing hmmsearch results + - `*.sto.gz`: optional multiple sequence alignment (MSA) in Stockholm format + - `*.tbl.gz`: optional tabular (space-delimited) summary of per-target output + - `*.domtbl.gz`: optional tabular (space-delimited) summary of per-domain output + +
+ +[HMMER/hmmsearch](http://hmmer.org) is used for searching sequence databases for sequence homologs, and for making sequence alignments. It implements methods using probabilistic models called profile hidden Markov models (profile HMMs). `hmmsearch` is used to search one or more profiles against a sequence database. + +#### Macrel + +
+Output files + +- `macrel_contigs/` + - `*.smorfs.faa.gz`: zipped fasta file containing amino acid sequences of small peptides (<100 aa, small open reading frames) showing the general gene prediction information in the contigs + - `*.all_orfs.faa.gz`: zipped fasta file containing amino acid sequences showing the general gene prediction information in the contigs + - `prediction.gz`: zipped file, with all predicted and non-predicted AMPs in a table format + - `*.md`: readme file containing tool specific information (e.g. citations, details about the output, etc.) + - `*_log.txt`: log file containing the information pertaining to the run + +
+ +[Macrel](https://github.com/BigDataBiology/macrel) is a tool that mines antimicrobial peptides (AMPs) from (meta)genomes by predicting peptides from genomes (provided as contigs) and outputs predicted antimicrobial peptides that meet specific criteria/thresholds. + +### ARG detection tools + +[ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), [DeepARG](#deeparg), [fARGene](#fargene), [RGI](#rgi). + +#### ABRicate + +
+Output files + +- `abricate/` + - `*.{csv,tsv}`: search results in tabular format + +
+ +[ABRicate](https://github.com/tseemann/abricate) screens contigs for antimicrobial resistance or virulence genes. It comes bundled with multiple databases: NCBI, CARD, ARG-ANNOT, Resfinder, MEGARES, EcOH, PlasmidFinder, Ecoli_VF and VFDB. + +#### AMRFinderPlus + +
+Output files + +- `amrfinderplus/` + - `*.tsv`: search results in tabular format + +
+ +[AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder) relies on NCBI’s curated Reference Gene Database and curated collection of Hidden Markov Models. It identifies antimicrobial resistance genes, resistance-associated point mutations, and select other classes of genes using protein annotations and/or assembled nucleotide sequences. + +#### DeepARG + +
+Output files + +- `deeparg/` + - `*.align.daa*`: Intermediate DIAMOND alignment output + - `*.align.daa.tsv`: DIAMOND alignment output as .tsv + - `*.mapping.ARG`: ARG predictions with a probability >= --prob (0.8 default). + - `*.mapping.potential.ARG`: ARG predictions with a probability < --prob (0.8 default) + +
+ +[deepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master) uses deep learning to characterize and annotate antibiotic resistance genes in metagenomes. It is composed of two models for two types of input: short sequence reads and gene-like sequences. In this pipeline we use the `ls` model, which is suitable for annotating full sequence genes and to discover novel antibiotic resistance genes from assembled samples. The tool `DIAMOND` is used as an aligner. + +#### fARGene + +
+Output files + +- `fargene/` + - `fargene_analysis.log`: logging output that fARGene produced during its run + - `/`: + - `hmmsearchresults/`: output from intermediate hmmsearch step + - `predictedGenes/`: + - `*-filtered.fasta`: nucleotide sequences of predicted ARGs + - `*-filtered-peptides.fasta`: amino acid sequences of predicted ARGs + - `results_summary.txt`: text summary of results, listing predicted genes and ORFs for each input file + - `tmpdir/`: temporary output files and fasta files (only if `--arg_fargene_savetmpfiles` supplied) + +
+ +[fARGene](https://github.com/fannyhb/fargene) (**F**ragmented **A**ntibiotic **R**esistance **Gene** Identifier) is a tool that takes either fragmented metagenomic data or longer sequences as input and predicts and delivers full-length antibiotic resistance genes as output. The tool includes developed and optimised models for a number of resistance gene types. By default the pipeline runs all models, thus you will receive output for all models. If only a sub-list or single model is required, this can be specified with the `--hmm-model` flag. Available models are: + +- `class_a`: class A beta-lactamases +- `class_b_1_2`: subclass B1 and B2 beta-lactamases +- `class_b3`: subclass B3 beta-lactamases +- `class_c`: class C beta-lactamases +- `class_d_1, class_d_2`: class D beta-lactamases +- `qnr`: quinolone resistance genes +- `tet_efflux`, `tet_rpg`, `tet_enzyme`: tetracycline resistance genes + +#### RGI + +
+Output files + +- `rgi/` + - `.txt`: hit results table separated by '#' + - `.json`: hit results in json format (only if `--arg_rgi_savejson` supplied) + - `temp/`: + - `.fasta.temp.*.json`: temporary json files, '\*' stands for 'homolog', 'overexpression', 'predictedGenes' and 'predictedGenes.protein' (only if `--arg_rgi_savetmpfiles` supplied). + +
+ +[RGI](https://github.com/arpcard/rgi) (**R**esistance **G**ene **I**dentifier) predicts resistome(s) from protein or nucleotide data based on homology and SNP models. It uses reference data from the Comprehensive Antibiotic Resistance Database (CARD). + +### BGC detection tools + +[antiSMASH](#antismash), [deepBGC](#deepbgc), [GECCO](#gecco), [hmmsearch](#hmmsearch). + +Note that the BGC tools are run on a set of annotations generated on only long contigs (3000 bp or longer) by default. These specific filtered FASTA files are under `bgc/seqkit/`, and annotations files are under `annotation//long/`, if the corresponding saving flags are specified (see [parameter docs](https://nf-co.re/funcscan/parameters)). However the same annotations _should_ also be annotation files in the sister `all/` directory. + +### Input contig QC + +
+Output files + +- `seqkit/` + - `_long.fasta`: FASTA file containing contigs equal or longer than the threshold set by `--contig_qc_lengththreshold` used in BGC subworkflow +
+ +[SeqKit](https://bioinf.shenwei.me/seqkit/) is a cross-platform and ultrafast toolkit for FASTA/Q file manipulation. + +Note that filtered FASTA is only used for BGC workflow for run-time optimisation and biological reasons. All contigs are otherwise screened in ARG/AMP workflows. + +#### antiSMASH + +
+Output files + +- `antismash/` + - `css`: accessory files for the HTML output + - `clusterblastoutput.txt` (optional): raw BLAST output of known clusters previously predicted by antiSMASH using the built-in ClusterBlast algorithm + - `images`: accessory files for the HTML output + - `index.html`: interactive web view of results in HTML format + - `js`: accessory files for the HTML output + - `knownclusterblast/`: directory with MIBiG hits (optional) + - `*_c*.txt`: tables with MIBiG hits + - `knownclusterblastoutput.txt` (optional): raw BLAST output of known clusters of the MIBiG database. + - `regions.js`: sideloaded annotations of protoclusters and/or subregions + - `*region*.gbk`: nucleotide sequence + annotations in GenBank file format; one file per antiSMASH hit. + - `.gbk`: nucleotide sequence and annotations in GenBank format; converted from input file + - `.json`: nucleotide sequence and annotations in JSON format; converted from GenBank file + - `.log`: logging output that antiSMASH produced during its run + - `.zip`: compressed version of the output folder in zip format + +
+ +[antiSMASH](https://docs.antismash.secondarymetabolites.org) (**anti**biotics & **S**econdary **M**etabolite **A**nalysis **SH**ell) is a tool for rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial genomes. It identifies biosynthetic loci covering all currently known secondary metabolite compound classes in a rule-based fashion using profile HMMs and aligns the identified regions at the gene cluster level to their nearest relatives from a database containing experimentally verified gene clusters (MIBiG). + +#### deepBGC + +
+Output files + +- `deepbgc/` + - `README.txt`: Summary of output files generated + - `LOG.txt`: Log output of DeepBGC + - `*.antismash.json`: AntiSMASH JSON file for sideloading + - `*.bgc.gbk`: Sequences and features of all detected BGCs in GenBank format + - `*.bgc.tsv`: Table of detected BGCs and their properties + - `*.full.gbk`: Fully annotated input sequence with proteins, Pfam domains (PFAM_domain features) and BGCs (cluster features) + - `*.pfam.tsv`: Table of Pfam domains (pfam_id) from given sequence (sequence_id) in genomic order, with BGC detection scores + - `evaluation/` + - `*.bgc.png`: Detected BGCs plotted by their nucleotide coordinates + - `*.pr.png`: Precision-Recall curve based on predicted per-Pfam BGC scores + - `*.roc.png`: ROC curve based on predicted per-Pfam BGC scores + - `*.score.png`: BGC detection scores of each Pfam domain in genomic order + +
+ +[deepBGC](https://github.com/Merck/deepbgc) detects BGCs in bacterial and fungal genomes using deep learning. DeepBGC employs a Bidirectional Long Short-Term Memory Recurrent Neural Network and a word2vec-like vector embedding of Pfam protein domains. Product class and activity of detected BGCs is predicted using a Random Forest classifier. + +#### GECCO + +
+Output files + +- `gecco/` + - `*.genes.tsv/`: TSV file containing detected/predicted genes with BGC probability scores + - `*.features.tsv`: TSV file containing identified domains + - `*.clusters.tsv`: TSV file containing coordinates of predicted clusters and BGC types + - `*_cluster_*.gbk`: GenBank file (if clusters were found) containing sequence with annotations; one file per GECCO hit + +
+ +[GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). + +### Summary tools + +[AMPcombi](#ampcombi), [hAMRonization](#hamronization), [comBGC](#combgc), [MultiQC](#multiqc), [pipeline information](#pipeline-information), [argNorm](#argnorm). + +#### AMPcombi + +
+Output files + +- `ampcombi/` + - `Ampcombi_summary.tsv`: tab-separated table containing the concatenated and filtered results from each AMPcombi summary table. This is the output given when the taxonomic classification is not activated (pipeline default). + - `Ampcombi_parse_tables.log`: log file containing the run information from AMPcombi submodule `ampcombi2/parsetables` + - `Ampcombi_complete.log`: log file containing the run information from AMPcombi submodule `ampcombi2/complete` + - `Ampcombi_summary_cluster.tsv`: tab-separated table containing the clustered AMP hits. This is the output given when the taxonomic classification is not activated (pipeline default). + - `Ampcombi_summary_cluster_representative_seq.tsv`: tab-separated table containing the representative sequence of each cluster. This can be used in AMPcombi for constructing 3D structures using ColabFold. For more details on how to do this, please refer to the [AMPcombi documentation](https://ampcombi.readthedocs.io/en/main/). + - `Ampcombi_cluster.log`: log file containing the run information from AMPcombi submodule `ampcombi2/cluster` + - `ampcombi_complete_summary_taxonomy.tsv.gz`: summarised output from all AMP workflow tools with taxonomic assignment in compressed tsv format. This is the same output as `Ampcombi_summary_cluster.tsv` file but with taxonomic classification of the contig. + - `/contig_gbks`: contains all the contigs in gbk format that an AMP was found on using the custom parameters + - `/*_ampcombi.log`: a log file generated by AMPcombi + - `/*_ampcombi.tsv`: summarised output in tsv format for each sample + - `/*_amp.faa*`: fasta file containing the amino acid sequences for all AMP hits for each sample + - `/*_mmseqs_matches.txt*`: alignment file generated by MMseqs2 for each sample + AMP summary table header descriptions using DRAMP as reference database + +| Table column | Description | +| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `name` | Name of the sample | +| `contig_id` | Contig header | +| `prob_macrel` | Probability associated with the AMP prediction using `MACREL` | +| `prob_neubi` | Probability associated with the AMP prediction using `NEUBI` | +| `prob_ampir` | Probability associated with the AMP prediction using `AMPIR` | +| `prob_amplify` | Probability associated with the AMP prediction using `AMPLIFY` | +| `evalue_hmmer` | Expected number of false positives (nonhomologous sequences) with a similar of higher score. This stands for how significant the hit is, the lower the evalue, the more significant the hit | +| `aa_sequence` | Amino acid sequence that forms part of the contig and is AMP encoding | +| `target_id` | [DRAMP](http://dramp.cpu-bioinfor.org/) ID within the database found to be similar to the predicted AMP by `MMseqs2` alignment | +| `pident` | Percentage identity of amino acid residues that fully aligned between the `DRAMP` sequence and the predicted AMP sequence | +| `evalue` | Number of alignments of similar or better qualities that can be expected when searching a database of similar size with a random sequence distribution. This is generated by `MMseqs2` alignments using the [DRAMP](http://dramp.cpu-bioinfor.org/) AMP database. The lower the value the more significant that the hit is positive. An e-value of < 0.001 means that the this hit will be found by chance once per 1,0000 queries | +| `Sequence` | Sequence corresponding to the `DRAMP` ID found to be similar to the predicted AMP sequence | +| `Sequence_length` | Number of amino acid residues in the `DRAMP` sequence | +| `Name` | Full name of the peptide copied from the database it was uploaded to | +| `Swiss_Prot_Entry` | Entry name of the peptide within the [UniProtKB/Swiss-Prot](https://www.uniprot.org/help/entry_name) database | +| `Family` | Name of the family, group or class of AMPs this peptide belongs to, e.g. bacteriocins | +| `Gene` | Name of the gene (if available in the database) that encodes the peptide | +| `Source` | Name of the source organism (if available in the database) from which the peptide was extracted | +| `Activity` | Peptide activity, e.g. Antimicrobial, Antibacterial, Anti-Gram+, Anti-Gram-, Insecticidal or Antifungal | +| `Protein_existence` | Peptide status, e.g. only a homology, protein level, predicted or transcript level | +| `Structure` | Type of peptide structure, e.g. alpha helix, bridge, etc. | +| `Structure_Description` | Further description of the structure if available | +| `PDB_ID` | The ID of an equivalent peptide found in the protein data bank [PDB](https://www.rcsb.org/docs/general-help/organization-of-3d-structures-in-the-protein-data-bank) | +| `Comments` | Further details found in the database regarding the peptide | +| `Target_Organism` | Name of the target organism to which the peptide is effective against | +| `Hemolytic_activity` | Type of hemolytic activity if any | +| `Linear/Cyclic/Branched` | Whether the hit is a linear, cyclic or branched peptide | +| `N-terminal_Modification` | Whether it contains N-terminal_Modification | +| `C-termina_Modification` | Whether it contains C-terminal_Modification | +| `Other_Modifications` | Whether there are any other modifications found in the peptide structure | +| `Stereochemistry` | Type of peptide stereochemistry if available | +| `Cytotoxicity` | Cytotoxicity mechanism of the peptide if available | +| `Binding_Target` | Peptide binding target, e.g. lipid, cell membrane or chitin binding | +| `Pubmed_ID` | Pubmed ID if a publication is associated with the peptide | +| `Reference` | Citation of the associated publication if available | +| `Author` | Authors' names associated with the publication or who have uploaded the peptide | +| `Title` | Publication title if available | +| `...` | | + +
+ +[AMPcombi](https://github.com/Darcy220606/AMPcombi) summarizes the results of **antimicrobial peptide (AMP)** prediction tools (ampir, AMPlify, Macrel, and other non-nf-core supported tools) into a single table and aligns the hits against a reference AMP database for functional, structural and taxonomic classification using [MMseqs2](https://github.com/soedinglab/MMseqs2). It further assigns the physiochemical properties (e.g. hydrophobicity, molecular weight) using the [Biopython toolkit](https://github.com/biopython/biopython) and clusters the resulting AMP hits from all samples using [MMseqs2](https://github.com/soedinglab/MMseqs2). To further filter the recovered AMPs using the presence of signaling peptides, the output file `Ampcombi_summary_cluster.tsv` or `ampcombi_complete_summary_taxonomy.tsv.gz` can be used downstream as detailed [here](https://ampcombi.readthedocs.io/en/main/usage.html#signal-peptide). The final tables generated may also be visualized and explored using an interactive [user interface](https://ampcombi.readthedocs.io/en/main/visualization.html). + +AMPcombi interface + +#### hAMRonization + +
+Output files + +- `hamronization_summarize/` one of the following: + - `hamronization_combined_report.json`: summarised output in .json format + - `hamronization_combined_report.tsv`: summarised output in .tsv format when the taxonomic classification is turned off (pipeline default). + - `hamronization_combined_report.tsv.gz`: summarised output in gzipped format when the taxonomic classification is turned on by `--run_taxa_classification`. + - `hamronization_combined_report.html`: interactive output in .html format + +
+
+ARG summary table headers + +| Table column | Description | +| ------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `input_file_name` | Name of the file containing the sequence data to be analysed | +| `gene_symbol` | Short name of a gene; a single word that does not contain white space characters. It is typically derived from the gene name | +| `gene_name` | Name of a gene | +| `reference_database_name` | Identifier of a biological or bioinformatics database | +| `reference_database_version` | Version of the database containing the reference sequences used for analysis | +| `reference_accession` | Identifier that specifies an individual sequence record in a public sequence repository | +| `analysis_software_name` | Name of a computer package, application, method or function used for the analysis of data | +| `analysis_software_version` | Version of software used to analyze data | +| `genetic_variation_type` | Class of genetic variation detected | +| `antimicrobial_agent` (optional) | A substance that kills or slows the growth of microorganisms, including bacteria, viruses, fungi and protozoans | +| `coverage_percentage` (optional) | Percentage of the reference sequence covered by the sequence of interest | +| `coverage_depth` (optional) | Average number of reads representing a given nucleotide in the reconstructed sequence | +| `coverage_ratio` (optional) | Ratio of the reference sequence covered by the sequence of interest. | +| `drug_class` (optional) | Set of antibiotic molecules, with similar chemical structures, molecular targets, and/or modes and mechanisms of action | +| `input_gene_length` (optional) | Length (number of positions) of a target gene sequence submitted by a user | +| `input_gene_start` (optional) | Position of the first nucleotide in a gene sequence being analysed (input gene sequence) | +| `input_gene_stop` (optional) | Position of the last nucleotide in a gene sequence being analysed (input gene sequence) | +| `input_protein_length` (optional) | Length (number of positions) of a protein target sequence submitted by a user | +| `input_protein_start` (optional) | Position of the first amino acid in a protein sequence being analysed (input protein sequence) | +| `input_protein_stop` (optional) | Position of the last amino acid in a protein sequence being analysed (input protein sequence) | +| `input_sequence_id` (optional) | Identifier of molecular sequence(s) or entries from a molecular sequence database | +| `nucleotide_mutation` (optional) | Nucleotide sequence change(s) detected in the sequence being analysed compared to a reference | +| `nucleotide_mutation_interpretation` (optional) | Description of identified nucleotide mutation(s) that facilitate clinical interpretation | +| `predicted_phenotype` (optional) | Characteristic of an organism that is predicted rather than directly measured or observed | +| `predicted_phenotype_confidence_level` (optional) | Confidence level in a predicted phenotype | +| `amino_acid_mutation` (optional) | Amino acid sequence change(s) detected in the sequence being analysed compared to a reference | +| `amino_acid_mutation_interpretation` (optional) | Description of identified amino acid mutation(s) that facilitate clinical interpretation. | +| `reference_gene_length` (optional) | Length (number of positions) of a gene reference sequence retrieved from a database | +| `reference_gene_start` (optional) | Position of the first nucleotide in a reference gene sequence | +| `reference_gene_stop` (optional) | Position of the last nucleotide in a reference sequence | +| `reference_protein_length` (optional) | Length (number of positions) of a protein reference sequence retrieved from a database | +| `reference_protein_start` (optional) | Position of the first amino acid in a reference protein sequence | +| `reference_protein_stop` (optional) | Position of the last amino acid in a reference protein sequence | +| `resistance_mechanism` (optional) | Antibiotic resistance mechanisms evolve naturally via natural selection through random mutation, but it could also be engineered by applying an evolutionary stress on a population | +| `strand_orientation` (optional) | Orientation of a genomic element on the double-stranded molecule | +| `sequence_identity` (optional) | Sequence identity is the number (%) of matches (identical characters) in positions from an alignment of two molecular sequences | + +
+ +[hAMRonization](https://github.com/pha4ge/hAMRonization) summarizes the outputs of the **antimicrobial resistance gene** detection tools (ABRicate, AMRFinderPlus, DeepARG, fARGene, RGI) into a single unified tabular format. It supports a variety of summary options including an interactive summary. + +#### argNorm + +
+Output files + +- `normalized/` + - `*.{tsv}`: search results in tabular format +
+
+ ARG summary table headers + +| Table column | Description | +| ---------------------------- | -------------------------------------------------------------------------------- | +| `ARO` | ARO accessions of ARG | +| `confers_resistance_to` | ARO accessions of drugs to which ARGs confer resistance to | +| `resistance_to_drug_classes` | ARO accessions of drugs classes to which drugs in `confers_resistance_to` belong | + +
+ +[argnorm](https://github.com/BigDataBiology/argNorm) is a tool to normalize antibiotic resistance genes (ARGs) by mapping them to the antibiotic resistance ontology ([ARO](https://obofoundry.org/ontology/aro.html)) created by the CARD database. argNorm also enhances antibiotic resistance gene annotations by providing categorization of the drugs that antibiotic resistance genes confer resistance to. + +argNorm takes the outputs of the [hAMRonization](#hamronization) tool of [ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), and [DeepARG](#deeparg) and normalizes ARGs in the hAMRonization output to the ARO. + +#### comBGC + +
+Output files + +- `comBGC/` + - `combgc_complete_summary.tsv`: summarised output from all BGC detection tools used in tsv format (all samples concatenated). This is the output given when the taxonomic classification is not activated (pipeline default). + - `combgc_complete_summary.tsv.gz`: summarised output in gzipped format from all BGC detection tools used in tsv format (all samples concatenated) along with the taxonomic classification obtained when `--run_taxa_classification` is activated. + - `*/combgc_summary.tsv`: summarised output from all applied BGC detection tools in tsv format for each sample. + +
+ +
+BGC summary table headers + +| Table column | Description | +| ----------------- | ----------------------------------------------------------------------------------------------------------- | +| `Sample_ID` | ID of the sample | +| `Prediction_tool` | BGC prediction tool (antiSMASH, DeepBGC, and/or GECCO) | +| `Contig_ID` | ID of the contig containing the candidate BGC | +| `Product_class` | Predicted BGC type/class | +| `BGC_probability` | Confidence of BGC candidate as inferred by the respective tool | +| `BGC_complete` | Whether BGC sequence is assumed to be complete or truncated by the edge of the contig | +| `BGC_start` | Predicted BGC start position on the ontig | +| `BGC_end` | Predicted BGC end position on the contig | +| `BGC_length` | Length of the predicted BGC | +| `CDS_ID` | ID of the coding sequence(s) (CDS) from the annotation step (prodigal/prokka/bakta) if provided by the tool | +| `CDS_count` | Number of CDSs the BGC contains | +| `PFAM_domains` | Inferred PFAM IDs or annotations if provided by the tool | +| `MIBiG_ID` | Inferred MIBiG IDs if provided by the tool | +| `InterPro_ID` | Inferred InterPro IDs if provided by the tool | + +
+ +**comBGC** is a tool built for nf-core/funcscan which summarizes the results of the **Biosynthetic Gene Cluster (BGC)** prediction tools (antiSMASH, deepBGC, GECCO) used in the pipeline into one comprehensive tabular summary with standardised headers. + +> ℹ️ comBGC does not feature `hmmer_hmmsearch` support. Please check the hmmsearch results directory. + +#### MultiQC + +
+Output files + +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser + - `multiqc_data/`: directory containing raw parsed data used for MultiQC report rendering + - `multiqc_plots/`: directory containing any static images from the report in various formats + +
-Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +[MultiQC](http://multiqc.info) is used in nf-core/funcscan to report the versions of all software used in the given pipeline run, and provides a suggested methods text. This allows for reproducible analysis and transparency in method reporting in publications. -### Pipeline information +#### Pipeline information
Output files diff --git a/docs/usage.md b/docs/usage.md index fe73ac87..9fc5bca5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,69 +6,42 @@ ## Introduction - +nf-core/funcscan is a pipeline for efficient and parallelised screening of long nucleotide sequences such as contigs for antimicrobial peptide genes, antimicrobial resistance genes, and biosynthetic gene clusters. It can additionally identify the taxonomic origin of the sequences. -## Samplesheet input +## Running the pipeline -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +The typical command for running the pipeline is as follows: ```bash ---input '[path to samplesheet file]' +nextflow run nf-core/funcscan --input samplesheet.csv --outdir -profile docker --run__screening ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - -### Full samplesheet - -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, -``` - -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +To run any of the three screening workflows (AMP, ARG, and/or BGC) or taxonomic classification, switch them on by adding the respective flag(s) to the command: -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +- `--run_amp_screening` +- `--run_arg_screening` +- `--run_bgc_screening` +- `--run_taxa_classification` -## Running the pipeline +When switched on, all tools of the given workflow will be run by default. If you don't need specific tools, you can explicitly skip them. The exception is HMMsearch, which needs to be explicitly switched on and provided with HMM screening files (AMP and BGC workflows, see [parameter documentation](/funcscan/parameters)). For the taxonomic classification, MMseqs2 is currently the only tool implemented in the pipline. -The typical command for running the pipeline is as follows: +**Example:** You want to run AMP and ARG screening but you don't need the DeepARG tool of the ARG workflow and the Macrel tool of the AMP workflow. Your command would be: ```bash -nextflow run nf-core/funcscan --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker -``` +nextflow run nf-core/funcscan --input samplesheet.csv --outdir -profile docker --run_arg_screening --arg_skip_deeparg --run_amp_screening --amg_skip_macrel -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +``` Note that the pipeline will create the following files in your working directory: ```bash -work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. +work # Directory containing temporary files required for the run + # Final results (location specified with --outdir) +.nextflow_log # Log file from nextflow + +# Other nextflow hidden files, eg. history of pipeline runs and old logs ``` If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. @@ -95,7 +68,448 @@ genome: 'GRCh37' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). -### Updating the pipeline +## Samplesheet input + +nf-core/funcscan takes FASTA files as input, typically contigs or whole genome sequences. To supply these to the pipeline, you will need to create a samplesheet with information about the samples you would like to analyse. Use this parameter to specify its location. + +```bash +--input '[path to samplesheet file]' +``` + +The input samplesheet has to be a comma-separated file (`.csv`) with 2 (`sample`, and `fasta`) or 4 columns (`sample`, `fasta`, `protein`, `gbk`), and a header row as shown in the examples below. + +If you already have annotated contigs with peptide sequences and an annotation file in Genbank format (`.gbk.` or `.gbff`), you can supply these to the pipeline using the optional `protein` and `gbk` columns. If these additional columns are supplied, pipeline annotation (i.e. with bakta, prodigal, pyrodigal or prokka) will be skipped and the corresponding annotation files used instead. + +For two columns (without pre-annotated data): + +```csv title="samplesheet.csv" +sample,fasta +sample_1,///wastewater_metagenome_contigs_1.fasta.gz +sample_2,///wastewater_metagenome_contigs_2.fasta.gz +``` + +For four columns (with pre-annotated data): + +```csv title="samplesheet.csv" +sample,fasta,protein,gbk +sample_1,///wastewater_metagenome_contigs_1.fasta.gz,///wastewater_metagenome_contigs_1.faa,///wastewater_metagenome_contigs_1.fasta.gbk +sample_2,///wastewater_metagenome_contigs_2.fasta.gz,///wastewater_metagenome_contigs_2.faa,///wastewater_metagenome_contigs_2.fasta.gbk +``` + +| Column | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This will be used to name all output files from the pipeline. Spaces in sample names are automatically converted to underscores (`_`). | +| `fasta` | Path or URL to a gzipped or uncompressed FASTA file. Accepted file suffixes are: `.fasta`, `.fna`, or `.fa`, or any of these with `.gz`, e.g. `.fa.gz`. | +| `protein` | Optional path to a pre-generated amino acid FASTA file (`.faa`) containing protein annotations of `fasta`, optionally gzipped. Required to be supplied if `gbk` also given. | +| `gbk` | Optional path to a pre-generated annotation file in Genbank format (`.gbk`, or `.gbff`) format containing annotations information of `fasta`, optionally gzipped. Required to be supplied if `protein` is also given. | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + +:::danger +We highly recommend performing quality control on input contigs before running the pipeline. You may not receive results for some tools if none of the contigs in a FASTA file reach certain thresholds. Check parameter documentation for relevant minimum contig parameters. + +For example, ideally BGC screening requires contigs of at least 3,000 bp else downstream tools may crash. +::: + +## Notes on screening tools and taxonomic classification + +The implementation of some tools in the pipeline may have some particular behaviours that you should be aware of before you run the pipeline. + +### MMseqs2 + +MMseqs2 is currently the only taxonomic classification tool used in the pipeline to assign a taxonomic lineage to the input contigs. The database used to assign the taxonomic lineage can either be: + +- A custom based database created by the user using `mmseqs createdb` externally and beforehand. If this flag is assigned, this database takes precedence over the default database in `--mmseqs_db_id`. + + ```bash + --taxa_classification_mmseqs_db '///' + ``` + + The contents of the directory should have files such as `.version` and `.taxonomy` in the top level. + +- An MMseqs2 ready database. These databases were compiled by the developers of MMseqs2 and can be called using their labels. All available options can be found [here](https://github.com/soedinglab/MMseqs2/wiki#downloading-databases). Only use those databases that have taxonomy files available (i.e., Taxonomy == Yes). By default mmseqs2 in the pipeline uses '[Kalamari](https://github.com/lskatz/Kalamari)', and runs an aminoacid based alignment. However, if the user requires a more comprehensive taxonomic classification, we recommend the use of [GTDB](https://gtdb.ecogenomic.org/), but for that please remember to increase the memory, CPU threads and time required for the process `MMSEQS_TAXONOMY`. + + ```bash + --taxa_classification_mmseqs_db_id 'Kalamari' + ``` + +### antiSMASH + +antiSMASH has a minimum contig parameter, in which only contigs of a certain length (or longer) will be screened. In cases where no hits are found in these, the tool ends successfully without hits. However if no contigs in an input file reach that minimum threshold, the tool will end with a 'failure' code, and cause the pipeline to crash. + +When the annotation is run with Prokka, the resulting `.gbk` file passed to antiSMASH may produce the error `translation longer than location allows` and end the pipeline run. This Prokka bug has been reported before (see [discussion on GitHub](https://github.com/antismash/antismash/discussions/450)) and is not likely to be fixed soon. + +:::warning +If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for annotation but instead use the default annotation tool (Pyrodigal) or switch to Prodigal, or (for bacteria only!) Bakta. +::: + +## Databases and reference files + +Various tools of nf-core/funcscan use databases and reference files to operate. + +nf-core/funcscan offers the functionality to auto-download databases for you, and as these databases can be very large, and we suggest to store these files in a central place from where you can reuse them across pipeline runs. + +If your infrastructure has internet access (particularly on compute nodes), we **highly recommend** allowing the pipeline to download these databases for you on a first run, saving these to your results directory with `--save_db`, then moving these to a different location (in case you wish to delete the results directory of this first run). An exception to this is HMM files where no auto-downloading functionality is possible. + +:::warning +We generally do not recommend downloading the databases yourself, as this can often be non-trivial to do! +::: + +As a reference, we will describe below where and how you can obtain databases and reference files used for tools included in the pipeline. + +### Bakta + +nf-core/funcscan offers multiple tools for annotating input sequences. Bakta is a new tool touted as a bacteria-only successor to the well-established Prokka. + +To supply the preferred Bakta database (and not have the pipeline download it for every new run), use the flag `--annotation_bakta_db`. +The full or light Bakta database must be downloaded from the Bakta Zenodo archive. + +You can do this by installing via conda and using the dedicated command + +```bash +conda create -n bakta -c bioconda bakta +conda activate bakta + +bakta_db download --output --type +``` + +Alternatively, you can manually download the files via the links which can be found on the [Bakta GitHub repository](https://github.com/oschwengers/bakta#database-download). + +Once downloaded this must be untarred: + +```bash +tar xvzf db.tar.gz +``` + +And then passed to the pipeline with: + +```bash +--annotation_bakta_db //// +``` + +The contents of the directory should have files such as `*.dmnd` in the top level. + +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: + +### hmmsearch + +nf-core/funcscan allows screening of sequences for functional genes associated with various natural product types via Hidden Markov Models (HMMs) using hmmsearch. + +This requires supplying a list of HMM files ending in `.hmm`, that have models for the particular molecule(s) or BGCs you are interested in. +You can download these files from places such as [PFAM](https://www.ebi.ac.uk/interpro/download/Pfam/) for antimicrobial peptides (AMP), or the antiSMASH GitHub repository for [biosynthetic gene cluster](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/data) related HMMs, or create them yourself. + +You should place all HMMs in a directory, supply them to the AMP or BGC workflow and switch hmmsearch on: + +```bash +--amp_run_hmmsearch --amp_hmmsearch_models "////*.hmm" +``` + +:::warning +Ensure to wrap this path in double quotes if using an asterisk, to ensure Nextflow (not your shell) parses the wildcard. +::: + +### AMPcombi + +For AMPcombi, nf-core/funcscan will by default download the most recent version of the [DRAMP](http://dramp.cpu-bioinfor.org/) database as a reference database, and modifies the files for aligning the AMP hits in the AMP workflow. + +nf-core/funcscan currently provides a python3 helper script to do these steps. + +```bash +mkdir -p ampcombi/amp_ref_database +cd ampcombi/ +wget https://github.com/nf-core/funcscan/raw//bin/ampcombi_download.py +python3 ampcombi_download.py +``` + +In addition to [DRAMP](http://dramp.cpu-bioinfor.org/), two more reference databases can be used to classify the recovered AMPs in the AMP workflow; [APD](https://aps.unmc.edu/) and [UniRef100](https://academic.oup.com/bioinformatics/article/23/10/1282/197795). Only one database can be used at a time using `--amp_ampcombi_db database_name`. + +However, the user can also supply their own custom AMP database by following the guidelines in [AMPcombi](https://ampcombi.readthedocs.io/en/main/). +This can then be passed to the pipeline with: + +```bash +--amp_ampcombi_db_dir_path '/// +``` + +The contents of the directory should have files such as `*.fasta` and `*.tsv` in the top level; a fasta file and the corresponding table with structural, functional and (if reported) taxonomic classifications. AMPcombi will then generate the corresponding `mmseqs2` directory, in which all binary files are prepared for downstream alignment of the recovered AMPs with [MMseqs2](https://github.com/soedinglab/MMseqs2). These can also be provided by the user by setting up an mmseqs2 compatible database using `mmseqs createdb *.fasta` in a directory called `mmseqs2`. An example file structure for [DRAMP](http://dramp.cpu-bioinfor.org/) used as the reference database: + +```bash +amp_DRAMP_database/ +├── general_amps_2024_11_13.fasta +├── general_amps_2024_11_13.txt +└── mmseqs2 + ├── ref_DB + ├── ref_DB.dbtype + ├── ref_DB_h + ├── ref_DB_h.dbtype + ├── ref_DB_h.index + ├── ref_DB.index + ├── ref_DB.lookup + └── ref_DB.source +``` + +:::note{.fa-whale} +For both [DRAMP](http://dramp.cpu-bioinfor.org/) and [APD](https://aps.unmc.edu/), AMPcombi removes entries that contains any non amino acid residues by default. +::: + +:::warning +The pipeline will automatically run Pyrodigal instead of Prodigal if the parameters `--run_annotation_tool prodigal --run_amp_screening` are both provided. +This is due to an incompatibility issue of Prodigal's output `.gbk` file with multiple downstream tools. +::: + +### Abricate + +The default ABRicate installation comes with a series of 'default' databases: + +- NCBI AMRFinderPlus (`ncbi`) +- CARD (`card`) +- ResFinder (`resfinder`) +- ARG-ANNOT (`argannot`) +- MEGARES (`megares`) +- EcOH (`echo`) +- PlasmidFinder (`plasmidfinder`) +- VFDB (`vfdb`) +- Ecoli_VF (`ecoli_vf`) + +Each can be specified by using the nf-core/funcscan flag, for example for card: `--arg_abricate_db_id card`. + +ABRicate also allows you to download additional and/or use custom databases. +For both of these, you will need to have your own local installation of ABRicate. +You then can download/add the custom database to the local installation's database directory, and supply this directory to the pipeline with the flag `--arg_abricate_db`, in combination with the name of the new database to `--arg_abricate_db_id `. + +For example, if you want to use the `bacmet2` database that does not come with the default installation, you could do: + +```bash +## Create conda environment +conda create -n abricate -c bioconda abricate +conda activate abricate + +## Download the bacmet2 database +abricate-get_db --db bacmet2 ## the logging will tell you where the database is downloaded to, e.g. /home//bin/miniconda3/envs/abricate/db/bacmet2/sequences +``` + +The resulting directory and database name can be passed to the pipeline as follows + +```bash +--arg_abricate_db ////db/ --arg_abricate_db_id bacmet2 +``` + +The contents of the directory should have a directory named with the database name in the top level (e.g. `bacmet2/`). + +### AMRFinderPlus + +AMRFinderPlus relies on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models. + +nf-core/funcscan will download this database for you, unless the path to a local version is given with: + +```bash +--arg_amrfinderplus_db '////latest' +``` + +You must give the `latest` directory to the pipeline, and the contents of the directory should include files such as `*.nbd`, `*.nhr`, `versions.txt` etc. in the top level. + +To obtain a local version of the database: + +1. Install AMRFinderPlus from [bioconda](https://bioconda.github.io/recipes/ncbi-amrfinderplus/README.html?highlight=amrfinderplus). + To ensure database compatibility, please use the same version as is used in your nf-core/funcscan release (check version in file `//funcscan/modules/nf-core/amrfinderplus/run/environment.yml`). + +```bash +conda create -n amrfinderplus -c bioconda ncbi-amrfinderplus=3.12.8 +conda activate amrfinderplus +``` + +2. Run `amrfinder --update`, which will download the latest version of the AMRFinderPlus database to the default location (location of the AMRFinderPlus binaries/data). + It creates a directory in the format YYYY-MM-DD.version (e.g., `//data/2024-01-31.1/`). + +
+AMR related files in the database folder + +```tree +/ +├── AMR_CDS.* +├── AMR_DNA-Campylobacter.* +├── AMR_DNA-Clostridioides_difficile.* +├── AMR_DNA-Enterococcus_faecalis.* +├── AMR_DNA-Enterococcus_faecium.* +├── AMR_DNA-Escherichia.* +├── AMR_DNA-Neisseria.* +├── AMR_DNA-Salmonella.* +├── AMR_DNA-Staphylococcus_aureus.* +├── AMR_DNA-Streptococcus_pneumoniae.* +├── AMR.LIB.* +├── AMRProt.* +├── changes.txt +├── database_format_version.txt +├── fam.tab +├── taxgroup.tab +└── version.txt +``` + +
+ +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: + +### DeepARG + +DeepARG requires a database of potential antimicrobial resistance gene sequences based on a consensus from UNIPROT, CARD, and ARDB. + +nf-core/funcscan can download this database for you, however it is very slow and pipeline runtime will be improved if you download this separately and supply it to the pipeline. + +You can either: + +1. Install DeepARG from [bioconda](https://bioconda.github.io/recipes/deeparg/README.html?highlight=deeparg) + +```bash +conda create -n deeparg -c bioconda deeparg +conda activate deeparg +``` + +2. Run `deeparg download_data -o ////` + +Or download the files directly from + +1. the [DeepARG FTP site](https://bench.cs.vt.edu/ftp/data/gustavo1/deeparg/database/) +2. the [DeepARG database Zenodo archive](https://zenodo.org/record/8280582) + +Note that more recent database versions maybe available from the [ARGMiner service](https://bench.cs.vt.edu/argminer/#/home). + +You can then supply the path to resulting database directory with: + +```bash +--arg_deeparg_db '/////' +``` + +The contents of the directory should include directories such as `database`, `model`, and files such as `deeparg.gz` etc. in the top level. + +Note that if you supply your own database that is not downloaded by the pipeline, make sure to also supply `--arg_deeparg_db_version` along +with the version number so hAMRonization will correctly display the database version in the summary report. + +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. +You can then move these to a central cache directory of your choice for re-use in the future. +::: + +### MMSeqs2 + +To download MMSeqs2 databases for taxonomic classification, you can install `mmseqs` via conda: + +```bash +conda create -n mmseqs2 -c bioconda mmseqs2 +conda activate mmseqs2 +``` + +Then to download the database of your choice + +```bash +mmseqs databases tmp/ +``` + +:::info +You may want to specify a different location for `tmp/`, we just borrowed here from the official `mmseqs` [documentation](https://github.com/soedinglab/mmseqs2/wiki#downloading-databases). +::: + +### RGI + +RGI requires the database CARD which can be downloaded by nf-core/funcscan or supplied by the user manually. +To download and supply the database yourself, do: + +1. Download [CARD](https://card.mcmaster.ca/latest/data) + +```bash +wget https://card.mcmaster.ca/latest/data +``` + +2. Extract the (`.tar.bz2`) archive. + +```bash +tar -xjvf data +``` + +You can then supply the path to resulting database directory with: + +```bash +--arg_rgi_db '////' +``` + +The contents of the directory should include files such as `card.json`, `aro_index.tsv`, `snps.txt` etc. in the top level. + +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. +You can then move these to a central cache directory of your choice for re-use in the future. +::: + +### antiSMASH + +antiSMASH requires several databases for the detection of potential biosynthetic gene cluster (BGC) sequences (ClusterBlast, MIBiG, Pfam, Resfams, TIGRFAMs). + +nf-core/funcscan can download these databases for you, however this is very slow and pipeline runtime will be improved if you download them separately and supply them to the pipeline. + +The same applies for the antiSMASH installation directory, which is also a required parameter for the pipeline when using containers, due to some slight incompatibility when using such engines. + +To supply the database directories to the pipeline: + +1. Install antiSMASH from [bioconda](https://bioconda.github.io/recipes/antismash-lite/README.html). To ensure database compatibility, please use the same version as is used in your nf-core/funcscan release (check version in file `//funcscan/modules/nf-core/antismash/antismashlite/environment.yml`). + +```bash +conda create -n antismash-lite -c bioconda antismash-lite +conda activate antismash-lite +``` + +2. Run the command `download-antismash-databases`. Use `--database-dir` to specify a new location. +3. You can then supply the paths to the resulting databases and the whole installation directory with: + +```bash +--bgc_antismash_db '/////' +--bgc_antismash_installdir '/////antismash' +``` + +Note that the names of the supplied folders must differ from each other (e.g. `antismash_db` and `antismash_dir`). +The contents of the database directory should include directories such as `as-js/`, `clusterblast/`, `clustercompare/` etc. in the top level. +The contents of the installation directory should include directories such as `common/` `config/` and files such as `custom_typing.py` `custom_typing.pyi` etc. in the top level. + +:::info +If installing with conda, the installation directory will be `lib/python3.10/site-packages/antismash` from the base directory of your conda install or conda environment directory. +::: + +Note that the names of the two required folders must differ from each other (i.e., the `--bgc_antismash_db` directory must not be called `antismash`). +If they are not provided, the databases will be auto-downloaded upon each BGC screening run of the pipeline. + +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: + +### DeepBGC + +DeepBGC relies on trained models and Pfams to run its analysis. +nf-core/funcscan will download these databases for you. If the flag `--save_db` is set, the downloaded files will be stored in the output directory under `databases/deepbgc/`. + +Alternatively, you can download the database locally with: + +```bash +conda create -n deepbgc -c bioconda deepbgc +conda activate deepbgc +export DEEPBGC_DOWNLOADS_DIR= +deepbgc download +``` + +You can then indicate the path to the database folder in the pipeline with `--bgc_deepbgc_db ///`. +The contents of the database directory should include directories such as `common`, `0.1.0` in the top level: + +```console +deepbgc_db/ +├── common + └── Pfam-hmm-models*.hmm.* +└── [0.1.0] + ├── classifier + | └── myClassifiers*.pkl + └── detector + └── myDetectors*.pkl +``` + +## Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -103,7 +517,7 @@ When you run the above command, Nextflow automatically pulls the pipeline code f nextflow pull nf-core/funcscan ``` -### Reproducibility +## Reproducibility It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. diff --git a/main.nf b/main.nf index 0dc68a48..39d0e261 100644 --- a/main.nf +++ b/main.nf @@ -15,10 +15,9 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FUNCSCAN } from './workflows/funcscan' +include { FUNCSCAN } from './workflows/funcscan' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_funcscan_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_funcscan_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_funcscan_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -26,10 +25,6 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_func ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -38,7 +33,7 @@ params.fasta = getGenomeAttribute('fasta') */ // -// WORKFLOW: Run main analysis pipeline depending on type of input +// WORKFLOW: Run main analysis pipeline // workflow NFCORE_FUNCSCAN { diff --git a/modules.json b/modules.json index 46bda8fd..3f236d08 100644 --- a/modules.json +++ b/modules.json @@ -5,7 +5,162 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "fastqc": { + "abricate/run": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "ampcombi2/cluster": { + "branch": "master", + "git_sha": "993865fe60cb1569155fbbbe0cee113e1127abaf", + "installed_by": ["modules"] + }, + "ampcombi2/complete": { + "branch": "master", + "git_sha": "993865fe60cb1569155fbbbe0cee113e1127abaf", + "installed_by": ["modules"] + }, + "ampcombi2/parsetables": { + "branch": "master", + "git_sha": "993865fe60cb1569155fbbbe0cee113e1127abaf", + "installed_by": ["modules"] + }, + "ampir": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "amplify/predict": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "amrfinderplus/run": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "amrfinderplus/update": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "antismash/antismashlite": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "antismash/antismashlitedownloaddatabases": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "argnorm": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bakta/bakta": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bakta/baktadbdownload": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "deeparg/downloaddata": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "deeparg/predict": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "deepbgc/download": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "deepbgc/pipeline": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "fargene": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "gecco/run": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hamronization/abricate": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hamronization/amrfinderplus": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hamronization/deeparg": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hamronization/fargene": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hamronization/rgi": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hamronization/summarize": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "hmmer/hmmsearch": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "macrel/contigs": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "mmseqs/createdb": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "mmseqs/createtsv": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "mmseqs/databases": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "mmseqs/taxonomy": { "branch": "master", "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", "installed_by": ["modules"] @@ -14,6 +169,47 @@ "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] + }, + "prodigal": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "prokka": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "pyrodigal": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "rgi/cardannotation": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "rgi/main": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "seqkit/seq": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/untar/untar.diff" } } }, diff --git a/modules/local/amp_database_download.nf b/modules/local/amp_database_download.nf new file mode 100644 index 00000000..8e2bc05a --- /dev/null +++ b/modules/local/amp_database_download.nf @@ -0,0 +1,30 @@ +process AMP_DATABASE_DOWNLOAD { + label 'process_single' + + conda "bioconda::ampcombi=2.0.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:2.0.1--pyhdfd78af_0': + 'biocontainers/ampcombi:2.0.1--pyhdfd78af_0' }" + + input: + val database_id + + output: + path "amp_${database_id}_database" , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + ampcombi_download.py \\ + --database_id $database_id \\ + --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/local/combgc.nf b/modules/local/combgc.nf new file mode 100644 index 00000000..8da4c58c --- /dev/null +++ b/modules/local/combgc.nf @@ -0,0 +1,32 @@ +process COMBGC { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + tuple val(meta), path(input_paths) + + output: + tuple val(meta), path("${prefix}/combgc_summary.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + prefix = task.ext.prefix ?: "${meta.id}" + """ + comBGC.py \\ + -i $input_paths \\ + -o $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + comBGC: \$(comBGC.py --version | sed 's/comBGC //g') + END_VERSIONS + """ +} diff --git a/modules/local/merge_taxonomy_ampcombi.nf b/modules/local/merge_taxonomy_ampcombi.nf new file mode 100644 index 00000000..26e38343 --- /dev/null +++ b/modules/local/merge_taxonomy_ampcombi.nf @@ -0,0 +1,32 @@ +process MERGE_TAXONOMY_AMPCOMBI { + label 'process_medium' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + path(ampcombi_df) + path(taxa_list) + + output: + path "ampcombi_complete_summary_taxonomy.tsv" , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + merge_taxonomy.py \\ + ampcombi_taxa \\ + --ampcombi $ampcombi_df \\ + --taxonomy $taxa_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + merge_taxonomy: \$(merge_taxonomy.py --version | sed 's/merge_taxonomy //g') + END_VERSIONS + """ +} diff --git a/modules/local/merge_taxonomy_combgc.nf b/modules/local/merge_taxonomy_combgc.nf new file mode 100644 index 00000000..075668f2 --- /dev/null +++ b/modules/local/merge_taxonomy_combgc.nf @@ -0,0 +1,32 @@ +process MERGE_TAXONOMY_COMBGC { + label 'process_medium' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + path(combgc_df) + path(taxa_list) + + output: + path "combgc_complete_summary_taxonomy.tsv" , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + merge_taxonomy.py \\ + combgc_taxa \\ + --combgc $combgc_df \\ + --taxonomy $taxa_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + merge_taxonomy: \$(merge_taxonomy.py --version | sed 's/merge_taxonomy //g') + END_VERSIONS + """ +} diff --git a/modules/local/merge_taxonomy_hamronization.nf b/modules/local/merge_taxonomy_hamronization.nf new file mode 100644 index 00000000..14b85ff2 --- /dev/null +++ b/modules/local/merge_taxonomy_hamronization.nf @@ -0,0 +1,32 @@ +process MERGE_TAXONOMY_HAMRONIZATION { + label 'process_medium' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + path(hamronization_df) + path(taxa_list) + + output: + path "hamronization_complete_summary_taxonomy.tsv" , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + merge_taxonomy.py \\ + hamronization_taxa \\ + --hamronization $hamronization_df \\ + --taxonomy $taxa_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + merge_taxonomy: \$(merge_taxonomy.py --version | sed 's/merge_taxonomy //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/abricate/run/environment.yml b/modules/nf-core/abricate/run/environment.yml new file mode 100644 index 00000000..c7a7d199 --- /dev/null +++ b/modules/nf-core/abricate/run/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::abricate=1.0.1 diff --git a/modules/nf-core/abricate/run/main.nf b/modules/nf-core/abricate/run/main.nf new file mode 100644 index 00000000..b0d8a68a --- /dev/null +++ b/modules/nf-core/abricate/run/main.nf @@ -0,0 +1,51 @@ +process ABRICATE_RUN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': + 'biocontainers/abricate:1.0.1--ha8f3691_1' }" + + input: + tuple val(meta), path(assembly) + path databasedir + + output: + tuple val(meta), path("*.txt"), emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def datadir = databasedir ? "--datadir ${databasedir}" : '' + """ + abricate \\ + $assembly \\ + $args \\ + $datadir \\ + --threads $task.cpus \\ + > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def datadir = databasedir ? '--datadir ${databasedir}' : '' + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/abricate/run/meta.yml b/modules/nf-core/abricate/run/meta.yml new file mode 100644 index 00000000..dce78f3c --- /dev/null +++ b/modules/nf-core/abricate/run/meta.yml @@ -0,0 +1,49 @@ +name: abricate_run +description: Screen assemblies for antimicrobial resistance against multiple databases +keywords: + - bacteria + - assembly + - antimicrobial resistance +tools: + - abricate: + description: Mass screening of contigs for antibiotic resistance genes + homepage: https://github.com/tseemann/abricate + documentation: https://github.com/tseemann/abricate + tool_dev_url: https://github.com/tseemann/abricate + licence: ["GPL v2"] + identifier: biotools:ABRicate +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: FASTA, GenBank or EMBL formatted file + pattern: "*.{fa,fasta,fna,fa.gz,fasta.gz,fna.gz,gbk,gbk.gz,embl,embl.gz}" + - - databasedir: + type: directory + description: Optional location of local copy of database files, possibly with + custom databases set up with `abricate --setupdb` + pattern: "*/" +output: + - report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Tab-delimited report of results + pattern: "*.{txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/abricate/run/tests/main.nf.test b/modules/nf-core/abricate/run/tests/main.nf.test new file mode 100644 index 00000000..f31a67e7 --- /dev/null +++ b/modules/nf-core/abricate/run/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process ABRICATE_RUN" + script "../main.nf" + process "ABRICATE_RUN" + tag "modules" + tag "modules_nfcore" + tag "abricate" + tag "abricate/run" + + test("bacteroides_fragilis - genome.fa.gz") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bacteroides_fragilis - genome - stub") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/abricate/run/tests/main.nf.test.snap b/modules/nf-core/abricate/run/tests/main.nf.test.snap new file mode 100644 index 00000000..9f598c4a --- /dev/null +++ b/modules/nf-core/abricate/run/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "bacteroides_fragilis - genome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-19T21:06:27.483697023" + }, + "bacteroides_fragilis - genome": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,cd07e2953b127aed8d09bf1b2b903a1f" + ] + ], + "1": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,cd07e2953b127aed8d09bf1b2b903a1f" + ] + ], + "versions": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-19T21:06:20.036490495" + } +} \ No newline at end of file diff --git a/modules/nf-core/abricate/run/tests/tags.yml b/modules/nf-core/abricate/run/tests/tags.yml new file mode 100644 index 00000000..0a304886 --- /dev/null +++ b/modules/nf-core/abricate/run/tests/tags.yml @@ -0,0 +1,2 @@ +abricate/run: + - modules/nf-core/abricate/run/** diff --git a/modules/nf-core/ampcombi2/cluster/environment.yml b/modules/nf-core/ampcombi2/cluster/environment.yml new file mode 100644 index 00000000..f9c25b04 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ampcombi=2.0.1" diff --git a/modules/nf-core/ampcombi2/cluster/main.nf b/modules/nf-core/ampcombi2/cluster/main.nf new file mode 100644 index 00000000..98a19a96 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/main.nf @@ -0,0 +1,48 @@ +process AMPCOMBI2_CLUSTER { + tag 'ampcombi2' + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:2.0.1--pyhdfd78af_0': + 'biocontainers/ampcombi:2.0.1--pyhdfd78af_0' }" + + input: + path(summary_file) + + output: + path("Ampcombi_summary_cluster.tsv") , emit: cluster_tsv + path("Ampcombi_summary_cluster_representative_seq.tsv"), emit: rep_cluster_tsv + path("Ampcombi_cluster.log") , emit: log, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ampcombi cluster \\ + --ampcombi_summary ${summary_file} \\ + $args \\ + --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch Ampcombi_summary_cluster.tsv + touch Ampcombi_summary_cluster_representative_seq.tsv + touch Ampcombi_cluster.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ampcombi2/cluster/meta.yml b/modules/nf-core/ampcombi2/cluster/meta.yml new file mode 100644 index 00000000..2e37a0c2 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/meta.yml @@ -0,0 +1,58 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ampcombi2_cluster" +description: A submodule that clusters the merged AMP hits generated from ampcombi2/parsetables + and ampcombi2/complete using MMseqs2 cluster. +keywords: + - antimicrobial peptides + - amps + - parsing + - reporting + - align + - clustering + - mmseqs2 +tools: + - ampcombi2/cluster: + description: "A tool for clustering all AMP hits found across many samples and + supporting many AMP prediction tools." + homepage: "https://github.com/Darcy220606/AMPcombi" + documentation: "https://github.com/Darcy220606/AMPcombi" + tool_dev_url: "https://github.com/Darcy220606/AMPcombi/tree/dev" + licence: ["MIT"] + identifier: "" + +input: + - - summary_file: + type: file + description: A file corresponding to the Ampcombi_summary.tsv that is generated + by running 'ampcombi complete'. It is a file containing all the merged AMP + results from all samples and all tools. + pattern: "*.tsv" +output: + - cluster_tsv: + - Ampcombi_summary_cluster.tsv: + type: file + description: A file containing all the results from the merged input table 'Ampcombi_summary.tsv', + but also including the cluster id number. The clustering is done using MMseqs2 + cluster. + pattern: "*.tsv" + - rep_cluster_tsv: + - Ampcombi_summary_cluster_representative_seq.tsv: + type: file + description: A file containing the representative sequences of the clusters + estimated by the tool. The clustering is done using MMseqs2 cluster. + pattern: "*.tsv" + - log: + - Ampcombi_cluster.log: + type: file + description: A log file that captures the standard output for the entire process + in a log file. Can be activated by `--log`. + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/ampcombi2/cluster/tests/main.nf.test.snap b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test.snap new file mode 100644 index 00000000..fd79a83b --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "ampcombi2_cluster - metagenome": { + "content": [ + true, + true, + [ + "versions.yml:md5,b629089d44775078dce5e664a455422b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-03T07:57:01.869983435" + }, + "ampcombi2_cluster - metagenome - stub": { + "content": [ + { + "0": [ + "Ampcombi_summary_cluster.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "Ampcombi_summary_cluster_representative_seq.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "Ampcombi_cluster.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "3": [ + "versions.yml:md5,b629089d44775078dce5e664a455422b" + ], + "cluster_tsv": [ + "Ampcombi_summary_cluster.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "log": [ + "Ampcombi_cluster.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "rep_cluster_tsv": [ + "Ampcombi_summary_cluster_representative_seq.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,b629089d44775078dce5e664a455422b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-03T07:57:23.939137628" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampcombi2/complete/environment.yml b/modules/nf-core/ampcombi2/complete/environment.yml new file mode 100644 index 00000000..f9c25b04 --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ampcombi=2.0.1" diff --git a/modules/nf-core/ampcombi2/complete/main.nf b/modules/nf-core/ampcombi2/complete/main.nf new file mode 100644 index 00000000..98f62347 --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/main.nf @@ -0,0 +1,44 @@ +process AMPCOMBI2_COMPLETE { + tag "ampcombi2" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:2.0.1--pyhdfd78af_0': + 'biocontainers/ampcombi:2.0.1--pyhdfd78af_0' }" + + input: + path(summaries) + + output: + path("Ampcombi_summary.tsv") , emit: tsv + path("Ampcombi_complete.log"), emit: log, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ampcombi complete \\ + --summaries_files '${summaries.collect{"$it"}.join("' '")}' \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch Ampcombi_summary.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ampcombi2/complete/meta.yml b/modules/nf-core/ampcombi2/complete/meta.yml new file mode 100644 index 00000000..13a7468b --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/meta.yml @@ -0,0 +1,56 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ampcombi2_complete" +description: A submodule that merges all output summary tables from ampcombi/parsetables + in one summary file. +keywords: + - antimicrobial peptides + - amps + - parsing + - reporting + - align + - macrel + - amplify + - hmmsearch + - neubi + - ampir + - ampgram + - amptransformer + - DRAMP +tools: + - ampcombi2/complete: + description: "This merges the per sample AMPcombi summaries generated by running + 'ampcombi2/parsetables'." + homepage: "https://github.com/Darcy220606/AMPcombi" + documentation: "https://github.com/Darcy220606/AMPcombi" + tool_dev_url: "https://github.com/Darcy220606/AMPcombi/tree/dev" + licence: ["MIT"] + identifier: "" + +input: + - - summaries: + type: list + description: The path to the list of files corresponding to each sample as generated + by ampcombi2/parsetables. + pattern: "[*_ampcombi.tsv, *_ampcombi.tsv]" +output: + - tsv: + - Ampcombi_summary.tsv: + type: file + description: A file containing the complete AMPcombi summaries from all processed + samples. + pattern: "*.tsv" + - log: + - Ampcombi_complete.log: + type: file + description: A log file that captures the standard output for the entire process + in a log file. Can be activated by `--log`. + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/ampcombi2/complete/tests/main.nf.test.snap b/modules/nf-core/ampcombi2/complete/tests/main.nf.test.snap new file mode 100644 index 00000000..87435e5b --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "ampcombi2_complete - contigs - stub": { + "content": [ + { + "0": [ + "Ampcombi_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,bfba0046e0cfa7b0b6d79663823f94c0" + ], + "log": [ + + ], + "tsv": [ + "Ampcombi_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,bfba0046e0cfa7b0b6d79663823f94c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-03T07:57:53.385349848" + }, + "ampcombi2_complete - contigs": { + "content": [ + true, + [ + "versions.yml:md5,bfba0046e0cfa7b0b6d79663823f94c0" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-03T07:57:40.263912946" + } +} diff --git a/modules/nf-core/ampcombi2/parsetables/environment.yml b/modules/nf-core/ampcombi2/parsetables/environment.yml new file mode 100644 index 00000000..f9c25b04 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ampcombi=2.0.1" diff --git a/modules/nf-core/ampcombi2/parsetables/main.nf b/modules/nf-core/ampcombi2/parsetables/main.nf new file mode 100644 index 00000000..b9d855df --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/main.nf @@ -0,0 +1,92 @@ +process AMPCOMBI2_PARSETABLES { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:2.0.1--pyhdfd78af_0': + 'biocontainers/ampcombi:2.0.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(amp_input) + path faa_input + path gbk_input + val opt_amp_db + path opt_amp_db_dir + path opt_interproscan + + output: + tuple val(meta), path("${meta.id}/") , emit: sample_dir + tuple val(meta), path("${meta.id}/contig_gbks/") , emit: contig_gbks + tuple val(meta), path("${meta.id}/${meta.id}_mmseqs_matches.tsv") , emit: db_tsv + tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.tsv") , emit: tsv + tuple val(meta), path("${meta.id}/${meta.id}_amp.faa") , emit: faa + tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.log") , emit: sample_log , optional:true + tuple val(meta), path("Ampcombi_parse_tables.log") , emit: full_log , optional:true + tuple val(meta), path("amp_${opt_amp_db}_database/") , emit: db , optional:true + tuple val(meta), path("amp_${opt_amp_db}_database/*.txt") , emit: db_txt , optional:true + tuple val(meta), path("amp_${opt_amp_db}_database/*.fasta") , emit: db_fasta , optional:true + tuple val(meta), path("amp_${opt_amp_db}_database/mmseqs2/") , emit: db_mmseqs , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def db_dir = opt_amp_db_dir ? "--amp_database_dir ${opt_amp_db_dir}" : "" + def interpro = opt_interproscan ? "--interproscan_output ${opt_interproscan}" : "" + + """ + ampcombi parse_tables \\ + --path_list '${amp_input.collect { "${it}" }.join("' '")}' \\ + --faa ${faa_input} \\ + --gbk ${gbk_input} \\ + --sample_list ${prefix} \\ + --amp_database ${opt_amp_db} \\ + ${db_dir} \\ + ${interpro} \\ + ${args} \\ + --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def db_dir = opt_amp_db_dir ? "--amp_database_dir ${opt_amp_db_dir}" : "" + def interpro = opt_interproscan ? "--interproscan_output ${opt_interproscan}" : "" + + """ + mkdir -p ${prefix} + mkdir -p ${prefix}/contig_gbks + touch ${prefix}/${meta.id}_mmseqs_matches.tsv + touch ${prefix}/${meta.id}_ampcombi.tsv + touch ${prefix}/${meta.id}_amp.faa + touch ${prefix}/${meta.id}_ampcombi.log + touch Ampcombi_parse_tables.log + + mkdir -p amp_${opt_amp_db}_database + mkdir -p amp_${opt_amp_db}_database/mmseqs2 + touch amp_${opt_amp_db}_database/*.fasta + touch amp_${opt_amp_db}_database/*.txt + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB.dbtype + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB_h + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB_h.dbtype + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB_h.index + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB.index + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB.lookup + touch amp_${opt_amp_db}_database/mmseqs2/ref_DB.source + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ampcombi2/parsetables/meta.yml b/modules/nf-core/ampcombi2/parsetables/meta.yml new file mode 100644 index 00000000..14a0fd02 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/meta.yml @@ -0,0 +1,200 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ampcombi2_parsetables" +description: A submodule that parses and standardizes the results from various antimicrobial + peptide identification tools. +keywords: + - antimicrobial peptides + - amps + - parsing + - reporting + - align + - macrel + - amplify + - hmmsearch + - neubi + - ampir + - ampgram + - amptransformer + - DRAMP + - MMseqs2 + - InterProScan +tools: + - ampcombi2/parsetables: + description: "A parsing tool to convert and summarise the outputs from multiple + AMP detection tools in a standardized format." + homepage: "https://github.com/Darcy220606/AMPcombi" + documentation: "https://ampcombi.readthedocs.io/en/main/" + tool_dev_url: "https://github.com/Darcy220606/AMPcombi/tree/dev" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - amp_input: + type: list + description: The path to the directory containing the results for the AMP tools + for each processed sample or a list of files corresponding to each file generated + by AMP tools. + pattern: "[*amptool.tsv, *amptool.tsv]" + - - faa_input: + type: file + description: The path to the file corresponding to the respective protein fasta + files with '.faa' extension. File names have to contain the corresponding + sample name, i.e. sample_1.faa + pattern: "*.faa" + - - gbk_input: + type: file + description: The path to the file corresponding to the respective annotated + files with either '.gbk' or '.gbff' extensions. File names must contain the + corresponding sample name, i.e. sample_1.faa where "sample_1" is the sample + name. + pattern: "*.gbk" + - - opt_amp_db: + type: string + description: The name of the database to download and set up. This can either be 'DRAMP', 'APD' or 'UniRef100'. + pattern: "DRAMP|APD|UniRef100" + - - opt_amp_db_dir: + type: directory + description: The path to the folder containing the fasta and tsv database files. + pattern: "path/to/amp_*_database" + - - opt_interproscan: + type: directory + description: A path to a file corresponding to the respective tsv files containing protein classifications of the annotated CDSs. The file must be the raw output from InterProScan. + pattern: "*.tsv" +output: + - sample_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${meta.id}/: + type: directory + description: The output directory that contains the summary output and related + alignment files for one sample. + pattern: "/*" + - contig_gbks: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${meta.id}/contig_gbks/: + type: directory + description: The output subdirectory that contains the gbk files containing + the AMP hits for each sample. + pattern: "/*/contig_gbks" + - db_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${meta.id}/${meta.id}_mmseqs_matches.tsv: + type: file + description: An alignment file containing the results from the MMseqs2 alignment + step done on all AMP hits. + pattern: "/*/*_mmseqs_matches.tsv" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${meta.id}/${meta.id}_ampcombi.tsv: + type: file + description: A file containing the summary report of all predicted AMP hits + from all AMP tools given as input, the corresponding taxonomic and functional + classification from the alignment step and the estimated physiochemical properties. + pattern: "/*/*_ampcombi.tsv" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${meta.id}/${meta.id}_amp.faa: + type: file + description: A fasta file containing the amino acid sequences of all predicted + AMP hits. + pattern: "/*/*_amp.faa" + - sample_log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${meta.id}/${meta.id}_ampcombi.log: + type: file + description: A log file that captures the standard output per sample in a log + file. Can be activated by `--log`. + pattern: "/*/*.log" + - full_log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - Ampcombi_parse_tables.log: + type: file + description: A log file that captures the standard output for the entire process + in a log file. Can be activated by `--log`. + pattern: "Ampcombi_parse_tables.log" + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - amp_${opt_amp_db}_database/: + type: directory + description: If the AMP reference database ID is not provided by the user using + the flag `--amp_database', by default the DRAMP database will be downloaded, + filtered and stored in this folder. + pattern: "/amp_*_database" + - db_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - amp_${opt_amp_db}_database/*.txt: + type: file + description: AMP reference database in tsv-format with two columns containing + header and sequence. + pattern: "/amp_*_database/*.txt" + - db_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - amp_${opt_amp_db}_database/*.fasta: + type: file + description: AMP reference database fasta file in clean format. + characters. + pattern: "/amp_*_database/*.fasta" + - db_mmseqs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - amp_${opt_amp_db}_database/mmseqs2/: + type: directory + description: As alignment to the reference database is carried out by MMseqs2, this directory + contains all the files generated by MMseqs2 on the fasta file of the database. + pattern: "/amp_*_database/mmseqs2" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test new file mode 100644 index 00000000..272d31e6 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process AMPCOMBI2_PARSETABLES" + script "../main.nf" + process "AMPCOMBI2_PARSETABLES" + tag "modules" + tag "modules_nfcore" + tag "antimicrobial peptides" + tag "ampcombi2" + tag "ampcombi2/parsetables" + + config "./nextflow.config" + + test("ampcombi2_parsetables - metagenome") { + when { + process { + """ + amp_input = [ + [id:'sample_1'], + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/ampir/sample_1/sample_1.ampir.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/amplify/sample_1/sample_1.amplify.tsv', checkIfExists: true) + ] + ] + faa_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_faa_0.2/sample_1.faa', checkIfExists: true) + gbk_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_gbk_0.2/sample_1.gbff', checkIfExists: true) + + input[0] = amp_input + input[1] = faa_input + input[2] = gbk_input + input[3] = 'DRAMP' + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.sample_dir.collect { file(it[1]).getName() } + + process.out.contig_gbks.collect { file(it[1]).getName() } + + process.out.db_tsv.collect { file(it[1]).readLines()[0] } + + process.out.tsv.collect { file(it[1]).readLines()[0] } + + process.out.faa.collect { file(it[1]).readLines()[0] } + + process.out.full_log.collect { file(it[1]).readLines().contains("File downloaded successfully") } + + process.out.sample_log.collect { file(it[1]).readLines().contains("found ampir file") } + + process.out.db.collect { file(it[1]).getName() } + + process.out.db_txt.collect { file(it[1]).readLines()[0] } + + process.out.db_fasta.collect { file(it[1]).readLines()[0] } + + process.out.db_mmseqs.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + } + test("ampcombi2_parsetables - metagenome - stub") { + options "-stub" + when { + process { + """ + amp_input = [ + [id:'sample_1'], + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/ampir/sample_1/sample_1.ampir.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/amplify/sample_1/sample_1.amplify.tsv', checkIfExists: true) + ] + ] + faa_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_faa_0.2/sample_1.faa', checkIfExists: true) + gbk_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_gbk_0.2/sample_1.gbff', checkIfExists: true) + + input[0] = amp_input + input[1] = faa_input + input[2] = gbk_input + input[3] = 'DRAMP' + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test.snap b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test.snap new file mode 100644 index 00000000..47102283 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test.snap @@ -0,0 +1,282 @@ +{ + "ampcombi2_parsetables - metagenome": { + "content": [ + [ + "sample_1", + "contig_gbks", + null, + "sample_id\tCDS_id\tprob_ampir\tprob_amplify\taa_sequence\tmolecular_weight\thelix_fraction\tturn_fraction\tsheet_fraction\tisoelectric_point\thydrophobicity\ttransporter_protein\tcontig_id\tCDS_start\tCDS_end\tCDS_dir\tCDS_stop_codon_found", + ">BAONEE_00005", + false, + true, + "amp_DRAMP_database", + "DRAMP_ID\tSequence\tSequence_Length\tName\tSwiss_Prot_Entry\tFamily\tGene\tSource\tActivity\tProtein_existence\tStructure\tStructure_Description\tPDB_ID\tComments\tTarget_Organism\tHemolytic_activity\tLinear/Cyclic/Branched\tN-terminal_Modification\tC-terminal_Modification\tOther_Modifications\tStereochemistry\tCytotoxicity\tBinding_Traget\tPubmed_ID\tReference\tAuthor\tTitle", + ">DRAMP00005", + "mmseqs2", + "versions.yml:md5,09f086e07825d96816d792d73eee90ca" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T13:58:57.988191067" + }, + "ampcombi2_parsetables - metagenome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + [ + [ + + ], + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_mmseqs_matches.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + [ + + ] + ] + ], + "10": [ + [ + { + "id": "sample_1" + }, + [ + "ref_DB:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.lookup:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.source:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + "versions.yml:md5,09f086e07825d96816d792d73eee90ca" + ], + "2": [ + [ + { + "id": "sample_1" + }, + "sample_1_mmseqs_matches.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "sample_1" + }, + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "sample_1" + }, + "Ampcombi_parse_tables.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "sample_1" + }, + [ + "*.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "*.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "ref_DB:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.lookup:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.source:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "8": [ + [ + { + "id": "sample_1" + }, + "*.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "sample_1" + }, + "*.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "contig_gbks": [ + [ + { + "id": "sample_1" + }, + [ + + ] + ] + ], + "db": [ + [ + { + "id": "sample_1" + }, + [ + "*.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "*.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "ref_DB:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.lookup:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.source:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "db_fasta": [ + [ + { + "id": "sample_1" + }, + "*.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "db_mmseqs": [ + [ + { + "id": "sample_1" + }, + [ + "ref_DB:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.lookup:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB.source:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_DB_h.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "db_tsv": [ + [ + { + "id": "sample_1" + }, + "sample_1_mmseqs_matches.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "db_txt": [ + [ + { + "id": "sample_1" + }, + "*.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "faa": [ + [ + { + "id": "sample_1" + }, + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "full_log": [ + [ + { + "id": "sample_1" + }, + "Ampcombi_parse_tables.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sample_dir": [ + [ + { + "id": "sample_1" + }, + [ + [ + + ], + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_mmseqs_matches.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "sample_log": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,09f086e07825d96816d792d73eee90ca" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T13:03:22.741430379" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampcombi2/parsetables/tests/nextflow.config b/modules/nf-core/ampcombi2/parsetables/tests/nextflow.config new file mode 100644 index 00000000..75396b7d --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/nextflow.config @@ -0,0 +1,22 @@ +process { + + withName: AMPCOMBI2_PARSETABLES { + + ext.args = [ + "--aminoacid_length 2000", + "--db_evalue 2000", + "--ampir_file 'ampir.tsv'", + "--amplify_file 'amplify.tsv'", + "--macrel_file '.prediction'", + "--neubi_file '.fasta'", + "--hmmsearch_file 'candidates.txt'", + "--ampgram_file '.tsv'", + "--amptransformer_file '.txt'", + "--log true", + "--interproscan_filter 'nonsense'" + ].join(' ') + + ext.prefix = "sample_1" + + } +} diff --git a/modules/nf-core/ampir/environment.yml b/modules/nf-core/ampir/environment.yml new file mode 100644 index 00000000..359e426c --- /dev/null +++ b/modules/nf-core/ampir/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-ampir=1.1.0 diff --git a/modules/nf-core/ampir/main.nf b/modules/nf-core/ampir/main.nf new file mode 100644 index 00000000..4a899fdd --- /dev/null +++ b/modules/nf-core/ampir/main.nf @@ -0,0 +1,70 @@ +process AMPIR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0': + 'biocontainers/r-ampir:1.1.0' }" + + input: + tuple val(meta), path(faa) + val model + val min_length + val min_probability + + output: + tuple val(meta), path("*.faa"), emit: amps_faa + tuple val(meta), path("*.tsv"), emit: amps_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + #!/usr/bin/env Rscript + library(ampir) + + input_seqs <- read_faa('${faa}') + prediction <- predict_amps(input_seqs,${min_length},model = '${model}') + prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ] + output_seqs <- input_seqs[row.names(prediction), ] + write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.') + df_to_faa(output_seqs, "${prefix}.faa") + + version_file_path <- "versions.yml" + version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".") + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" ampir: ", f, sep = "") + writeLines(version_ampir, f) + close(f) + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + #!/usr/bin/env Rscript + library(ampir) + + t <- file("${prefix}.tsv", "w") + close(t) + + a <- file("${prefix}.faa", "w") + close(a) + + version_file_path <- "versions.yml" + version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".") + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" ampir: ", f, sep = "") + writeLines(version_ampir, f) + close(f) + """ +} diff --git a/modules/nf-core/ampir/meta.yml b/modules/nf-core/ampir/meta.yml new file mode 100644 index 00000000..571ddd86 --- /dev/null +++ b/modules/nf-core/ampir/meta.yml @@ -0,0 +1,70 @@ +name: "ampir" +description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) + from any given size protein dataset. ampir uses a supervised statistical machine + learning approach to predict AMPs. +keywords: + - ampir + - amp + - antimicrobial peptide prediction +tools: + - "ampir": + description: "A toolkit to predict antimicrobial peptides from protein sequences + on a genome-wide scale." + homepage: "https://github.com/Legana/ampir" + documentation: "https://cran.r-project.org/web/packages/ampir/index.html" + tool_dev_url: "https://github.com/Legana/ampir" + doi: "10.1093/bioinformatics/btaa653" + licence: ["GPL v2"] + identifier: biotools:ampir +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - faa: + type: file + description: FASTA file containing amino acid sequences + pattern: "*.{faa,fasta}" + - - model: + type: string + description: Built-in model for AMP prediction + pattern: "{precursor,mature}" + - - min_length: + type: integer + description: Minimum protein length for which predictions will be generated + pattern: "[0-9]+" + - - min_probability: + type: float + description: Cut-off for AMP prediction + pattern: "[0-9].[0-9]+" +output: + - amps_faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.faa": + type: file + description: File containing AMP predictions in amino acid FASTA format + pattern: "*.{faa}" + - amps_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: File containing AMP predictions in TSV format + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/ampir/tests/main.nf.test b/modules/nf-core/ampir/tests/main.nf.test new file mode 100644 index 00000000..0ed40ef5 --- /dev/null +++ b/modules/nf-core/ampir/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process AMPIR" + script "../main.nf" + process "AMPIR" + + tag "modules" + tag "modules_nfcore" + tag "ampir" + + test("candidatus_portiera_aleyrodidarum proteome [fasta]") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true), + ] + input[1] = "precursor" // model + input[2] = 10 // min_length + input[3] = "0.7" // min_probability + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum proteome [fasta] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true), + ] + input[1] = "precursor" // model + input[2] = 10 // min_length + input[3] = "0.7" // min_probability + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.amps_faa.collect { file(it[1]).getName() } + + process.out.amps_tsv.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ampir/tests/main.nf.test.snap b/modules/nf-core/ampir/tests/main.nf.test.snap new file mode 100644 index 00000000..77f1b9ec --- /dev/null +++ b/modules/nf-core/ampir/tests/main.nf.test.snap @@ -0,0 +1,61 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + [ + "test.faa", + "test.tsv", + "versions.yml:md5,f8d5026ccdd8f72c7ac1b5e4670aab49" + ] + ], + "timestamp": "2023-12-26T18:19:18.308141504" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,0435609144022c55ac196db053f0df89" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,70a70e8698e8d367707f4b1833e3168c" + ] + ], + "2": [ + "versions.yml:md5,f8d5026ccdd8f72c7ac1b5e4670aab49" + ], + "amps_faa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,0435609144022c55ac196db053f0df89" + ] + ], + "amps_tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,70a70e8698e8d367707f4b1833e3168c" + ] + ], + "versions": [ + "versions.yml:md5,f8d5026ccdd8f72c7ac1b5e4670aab49" + ] + } + ], + "timestamp": "2023-12-26T18:18:57.151185866" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampir/tests/tags.yml b/modules/nf-core/ampir/tests/tags.yml new file mode 100644 index 00000000..5ceace2d --- /dev/null +++ b/modules/nf-core/ampir/tests/tags.yml @@ -0,0 +1,2 @@ +ampir: + - "modules/nf-core/ampir/**" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/amplify/predict/environment.yml similarity index 65% rename from modules/nf-core/fastqc/environment.yml rename to modules/nf-core/amplify/predict/environment.yml index 691d4c76..e1cb5703 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/amplify/predict/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::fastqc=0.12.1 + - bioconda::amplify=2.0.0 diff --git a/modules/nf-core/amplify/predict/main.nf b/modules/nf-core/amplify/predict/main.nf new file mode 100644 index 00000000..26108da7 --- /dev/null +++ b/modules/nf-core/amplify/predict/main.nf @@ -0,0 +1,51 @@ +process AMPLIFY_PREDICT { + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/amplify:2.0.0--py36hdfd78af_1': + 'biocontainers/amplify:2.0.0--py36hdfd78af_1' }" + + input: + tuple val(meta), path(faa) + path(model_dir) + + output: + tuple val(meta), path('*.tsv'), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def custom_model_dir = model_dir ? "-md ${model_dir}" : "" + """ + AMPlify \\ + $args \\ + ${custom_model_dir} \\ + -s '${faa}' + + #rename output, because tool includes date and time in name + mv *.tsv ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + AMPlify: \$(AMPlify --help | grep 'AMPlify v' | sed -e "s/^.*AMPlify v//") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + AMPlify: \$(AMPlify --help | grep 'AMPlify v' | sed -e "s/^.*AMPlify v//") + END_VERSIONS + """ +} diff --git a/modules/nf-core/amplify/predict/meta.yml b/modules/nf-core/amplify/predict/meta.yml new file mode 100644 index 00000000..cbe19f33 --- /dev/null +++ b/modules/nf-core/amplify/predict/meta.yml @@ -0,0 +1,51 @@ +name: "amplify_predict" +description: AMPlify is an attentive deep learning model for antimicrobial peptide + prediction. +keywords: + - antimicrobial peptides + - AMPs + - prediction + - model +tools: + - "amplify": + description: "Attentive deep learning model for antimicrobial peptide prediction" + homepage: "https://github.com/bcgsc/AMPlify" + documentation: "https://github.com/bcgsc/AMPlify" + tool_dev_url: "https://github.com/bcgsc/AMPlify" + doi: "10.1186/s12864-022-08310-4" + licence: ["GPL v3"] + identifier: biotools:amplify +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - faa: + type: file + description: amino acid sequences fasta + pattern: "*.{fa,fa.gz,faa,faa.gz,fasta,fasta.gz}" + - - model_dir: + type: directory + description: Directory of where models are stored (optional) +output: + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: amino acid sequences with prediction (AMP, non-AMP) and probability + scores + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/amplify/predict/tests/main.nf.test b/modules/nf-core/amplify/predict/tests/main.nf.test new file mode 100644 index 00000000..835c409c --- /dev/null +++ b/modules/nf-core/amplify/predict/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process AMPLIFY_PREDICT" + script "../main.nf" + process "AMPLIFY_PREDICT" + + tag "modules" + tag "modules_nfcore" + tag "amplify" + tag "amplify/predict" + tag "prodigal" + tag "gunzip" + + test("AMPlify predict (with Prodigal) - sarscov2 - contigs.fasta") { + + setup { + run("PRODIGAL") { + script "../../../prodigal/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true)) + ]) + input[1] = "gbk" + """ + } + } + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = PRODIGAL.out.amino_acid_fasta + + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("AMPlify predict - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file("test")) + ]) + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/amplify/predict/tests/main.nf.test.snap b/modules/nf-core/amplify/predict/tests/main.nf.test.snap new file mode 100644 index 00000000..d70e80eb --- /dev/null +++ b/modules/nf-core/amplify/predict/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "AMPlify predict (with Prodigal) - sarscov2 - contigs.fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T12:58:56.67316521" + }, + "AMPlify predict (with Prodigal) - sarscov2 - contigs.fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,1951084ce1d410028be86754997e5852" + ] + ], + "1": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,1951084ce1d410028be86754997e5852" + ] + ], + "versions": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T12:58:49.894554665" + } +} \ No newline at end of file diff --git a/modules/nf-core/amplify/predict/tests/tags.yml b/modules/nf-core/amplify/predict/tests/tags.yml new file mode 100644 index 00000000..592eb7bc --- /dev/null +++ b/modules/nf-core/amplify/predict/tests/tags.yml @@ -0,0 +1,2 @@ +amplify/predict: + - "modules/nf-core/amplify/predict/**" diff --git a/modules/nf-core/amrfinderplus/run/environment.yml b/modules/nf-core/amrfinderplus/run/environment.yml new file mode 100644 index 00000000..2744ce54 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ncbi-amrfinderplus=3.12.8 diff --git a/modules/nf-core/amrfinderplus/run/main.nf b/modules/nf-core/amrfinderplus/run/main.nf new file mode 100644 index 00000000..046ba262 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/main.nf @@ -0,0 +1,80 @@ +process AMRFINDERPLUS_RUN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.12.8--h283d18e_0': + 'biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }" + + input: + tuple val(meta), path(fasta) + path db + + output: + tuple val(meta), path("${prefix}.tsv") , emit: report + tuple val(meta), path("${prefix}-mutations.tsv"), emit: mutation_report, optional: true + path "versions.yml" , emit: versions + env VER , emit: tool_version + env DBVER , emit: db_version + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def is_compressed_fasta = fasta.getName().endsWith(".gz") ? true : false + def is_compressed_db = db.getName().endsWith(".gz") ? true : false + prefix = task.ext.prefix ?: "${meta.id}" + organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-mutations.tsv" : "" + fasta_name = fasta.getName().replace(".gz", "") + fasta_param = "-n" + if (meta.containsKey("is_proteins")) { + if (meta.is_proteins) { + fasta_param = "-p" + } + } + """ + if [ "$is_compressed_fasta" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + if [ "$is_compressed_db" == "true" ]; then + mkdir amrfinderdb + tar xzvf $db -C amrfinderdb + else + mv $db amrfinderdb + fi + + amrfinder \\ + $fasta_param $fasta_name \\ + $organism_param \\ + $args \\ + --database amrfinderdb \\ + --threads $task.cpus > ${prefix}.tsv + + VER=\$(amrfinder --version) + DBVER=\$(echo \$(amrfinder --database amrfinderdb --database_version 2> stdout) | rev | cut -f 1 -d ' ' | rev) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + amrfinderplus-database: \$(echo \$(echo \$(amrfinder --database amrfinderdb --database_version 2> stdout) | rev | cut -f 1 -d ' ' | rev)) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + VER=\$(amrfinder --version) + DBVER=stub_version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + amrfinderplus-database: stub_version + END_VERSIONS + """ +} diff --git a/modules/nf-core/amrfinderplus/run/meta.yml b/modules/nf-core/amrfinderplus/run/meta.yml new file mode 100644 index 00000000..d081a2bd --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/meta.yml @@ -0,0 +1,74 @@ +name: amrfinderplus_run +description: Identify antimicrobial resistance in gene or protein sequences +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - amrfinderplus: + description: AMRFinderPlus finds antimicrobial resistance and other genes in protein + or nucleotide sequences. + homepage: https://github.com/ncbi/amr/wiki + documentation: https://github.com/ncbi/amr/wiki + tool_dev_url: https://github.com/ncbi/amr + doi: "10.1038/s41598-021-91456-0" + licence: ["Public Domain"] + identifier: biotools:amrfinderplus +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + - - db: + type: file + description: A compressed tarball of the AMRFinderPlus database to query + pattern: "*.tar.gz" +output: + - report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.tsv: + type: file + description: AMRFinder+ final report + pattern: "*.tsv" + - mutation_report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}-mutations.tsv: + type: file + description: Report of organism-specific point-mutations + pattern: "*-mutations.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - tool_version: + - VER: + type: string + description: The version of the tool in string format (useful for downstream + tools such as hAMRronization) + - db_version: + - DBVER: + type: string + description: The version of the used database in string format (useful for downstream + tools such as hAMRronization) +authors: + - "@rpetit3" + - "@louperelo" + - "@jfy133" +maintainers: + - "@rpetit3" + - "@louperelo" + - "@jfy133" diff --git a/modules/nf-core/amrfinderplus/run/tests/main.nf.test b/modules/nf-core/amrfinderplus/run/tests/main.nf.test new file mode 100644 index 00000000..8103bb0f --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process AMRFINDERPLUS_RUN" + script "../main.nf" + process "AMRFINDERPLUS_RUN" + + tag "modules" + tag "modules_nfcore" + tag "amrfinderplus" + tag "amrfinderplus/run" + tag "amrfinderplus/update" + + setup { + + run("AMRFINDERPLUS_UPDATE") { + script "modules/nf-core/amrfinderplus/update/main.nf" + process { + """ + """ + } + } + } + + test("amrfinderplus/run - haemophilus_influenzae - genome_fna_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = AMRFINDERPLUS_UPDATE.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("amrfinderplus/run - haemophilus_influenzae - genome_fna_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = AMRFINDERPLUS_UPDATE.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/amrfinderplus/run/tests/main.nf.test.snap b/modules/nf-core/amrfinderplus/run/tests/main.nf.test.snap new file mode 100644 index 00000000..f1e37cd3 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "amrfinderplus/run - haemophilus_influenzae - genome_fna_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,38c4420b00b74ca78268341754d6d26f" + ], + "3": [ + "3.12.8" + ], + "4": [ + "stub_version" + ], + "db_version": [ + "stub_version" + ], + "mutation_report": [ + + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tool_version": [ + "3.12.8" + ], + "versions": [ + "versions.yml:md5,38c4420b00b74ca78268341754d6d26f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T19:17:59.662186954" + }, + "amrfinderplus/run - haemophilus_influenzae - genome_fna_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,1cdc90746febb496e06e63dd936aca9b" + ], + "3": [ + "3.12.8" + ], + "4": [ + "2024-01-31.1" + ], + "db_version": [ + "2024-01-31.1" + ], + "mutation_report": [ + + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ] + ], + "tool_version": [ + "3.12.8" + ], + "versions": [ + "versions.yml:md5,1cdc90746febb496e06e63dd936aca9b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T19:17:49.927168241" + } +} \ No newline at end of file diff --git a/modules/nf-core/amrfinderplus/run/tests/tags.yml b/modules/nf-core/amrfinderplus/run/tests/tags.yml new file mode 100644 index 00000000..3a5a84a4 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/tests/tags.yml @@ -0,0 +1,2 @@ +amrfinderplus/run: + - "modules/nf-core/amrfinderplus/run/**" diff --git a/modules/nf-core/amrfinderplus/update/environment.yml b/modules/nf-core/amrfinderplus/update/environment.yml new file mode 100644 index 00000000..2744ce54 --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ncbi-amrfinderplus=3.12.8 diff --git a/modules/nf-core/amrfinderplus/update/main.nf b/modules/nf-core/amrfinderplus/update/main.nf new file mode 100644 index 00000000..619a2a34 --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/main.nf @@ -0,0 +1,38 @@ +process AMRFINDERPLUS_UPDATE { + tag "update" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.12.8--h283d18e_0': + 'biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }" + + output: + path "amrfinderdb.tar.gz", emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + amrfinder_update -d amrfinderdb + tar czvf amrfinderdb.tar.gz -C amrfinderdb/\$(readlink amrfinderdb/latest) ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + END_VERSIONS + """ + + stub: + """ + touch amrfinderdb.tar + gzip amrfinderdb.tar + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/amrfinderplus/update/meta.yml b/modules/nf-core/amrfinderplus/update/meta.yml new file mode 100644 index 00000000..574957e1 --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/meta.yml @@ -0,0 +1,32 @@ +name: amrfinderplus_update +description: Identify antimicrobial resistance in gene or protein sequences +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - amrfinderplus: + description: AMRFinderPlus finds antimicrobial resistance and other genes in protein + or nucleotide sequences. + homepage: https://github.com/ncbi/amr/wiki + documentation: https://github.com/ncbi/amr/wiki + tool_dev_url: https://github.com/ncbi/amr + doi: "10.1038/s41598-021-91456-0" + licence: ["Public Domain"] + identifier: biotools:amrfinderplus +# this module does have any input. +output: + - db: + - amrfinderdb.tar.gz: + type: file + description: The latest AMRFinder+ database in a compressed tarball + pattern: "*.tar.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/amrfinderplus/update/tests/main.nf.test b/modules/nf-core/amrfinderplus/update/tests/main.nf.test new file mode 100644 index 00000000..72ff29e6 --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process AMRFINDERPLUS_UPDATE" + script "../main.nf" + process "AMRFINDERPLUS_UPDATE" + + tag "modules" + tag "modules_nfcore" + tag "amrfinderplus" + tag "amrfinderplus/update" + + test("amrfinderplus/update") { + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db.collect { file(it).getName() } + + process.out.versions + ).match() + } + ) + } + } + + test("amrfinderplus/update - stub") { + + options "-stub" + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/amrfinderplus/update/tests/main.nf.test.snap b/modules/nf-core/amrfinderplus/update/tests/main.nf.test.snap new file mode 100644 index 00000000..646e134c --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "amrfinderplus/update - stub": { + "content": [ + { + "0": [ + "amrfinderdb.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,785d6824f78d04a40f96ec9c1e02c3a8" + ], + "db": [ + "amrfinderdb.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,785d6824f78d04a40f96ec9c1e02c3a8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T20:33:49.682876802" + }, + "amrfinderplus/update": { + "content": [ + [ + "amrfinderdb.tar.gz", + "versions.yml:md5,785d6824f78d04a40f96ec9c1e02c3a8" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T20:33:40.320979057" + } +} \ No newline at end of file diff --git a/modules/nf-core/amrfinderplus/update/tests/tags.yml b/modules/nf-core/amrfinderplus/update/tests/tags.yml new file mode 100644 index 00000000..bbe0358f --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/tests/tags.yml @@ -0,0 +1,2 @@ +amrfinderplus/update: + - "modules/nf-core/amrfinderplus/update/**" diff --git a/modules/nf-core/antismash/antismashlite/environment.yml b/modules/nf-core/antismash/antismashlite/environment.yml new file mode 100644 index 00000000..ce4491dc --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::antismash-lite=7.1.0 diff --git a/modules/nf-core/antismash/antismashlite/main.nf b/modules/nf-core/antismash/antismashlite/main.nf new file mode 100644 index 00000000..422e7be0 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/main.nf @@ -0,0 +1,97 @@ +process ANTISMASH_ANTISMASHLITE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/antismash-lite:7.1.0--pyhdfd78af_0' : + 'biocontainers/antismash-lite:7.1.0--pyhdfd78af_0' }" + + containerOptions { + workflow.containerEngine == 'singularity' ? + "-B $antismash_dir:/usr/local/lib/python3.10/site-packages/antismash" : + workflow.containerEngine == 'docker' ? + "-v \$PWD/$antismash_dir:/usr/local/lib/python3.10/site-packages/antismash" : + '' + } + + input: + tuple val(meta), path(sequence_input) + path(databases) + path(antismash_dir) // Optional input: AntiSMASH installation folder. It is not needed for using this module with conda, but required for docker/singularity (see meta.yml). + path(gff) + + output: + tuple val(meta), path("${prefix}/clusterblast/*_c*.txt") , optional: true, emit: clusterblast_file + tuple val(meta), path("${prefix}/{css,images,js}") , emit: html_accessory_files + tuple val(meta), path("${prefix}/knownclusterblast/region*/ctg*.html") , optional: true, emit: knownclusterblast_html + tuple val(meta), path("${prefix}/knownclusterblast/") , optional: true, emit: knownclusterblast_dir + tuple val(meta), path("${prefix}/knownclusterblast/*_c*.txt") , optional: true, emit: knownclusterblast_txt + tuple val(meta), path("${prefix}/svg/clusterblast*.svg") , optional: true, emit: svg_files_clusterblast + tuple val(meta), path("${prefix}/svg/knownclusterblast*.svg") , optional: true, emit: svg_files_knownclusterblast + tuple val(meta), path("${prefix}/*.gbk") , emit: gbk_input + tuple val(meta), path("${prefix}/*.json") , emit: json_results + tuple val(meta), path("${prefix}/*.log") , emit: log + tuple val(meta), path("${prefix}/*.zip") , emit: zip + tuple val(meta), path("${prefix}/*region*.gbk") , optional: true, emit: gbk_results + tuple val(meta), path("${prefix}/clusterblastoutput.txt") , optional: true, emit: clusterblastoutput + tuple val(meta), path("${prefix}/index.html") , emit: html + tuple val(meta), path("${prefix}/knownclusterblastoutput.txt") , optional: true, emit: knownclusterblastoutput + tuple val(meta), path("${prefix}/regions.js") , emit: json_sideloading + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + gff_flag = gff ? "--genefinding-gff3 ${gff}" : "" + + """ + ## We specifically do not include on-the-fly annotations (--genefinding-tool none) as + ## this should be run as a separate module for versioning purposes + + antismash \\ + $args \\ + $gff_flag \\ + -c $task.cpus \\ + --output-dir $prefix \\ + --output-basename $prefix \\ + --genefinding-tool none \\ + --logfile $prefix/${prefix}.log \\ + --databases $databases \\ + $sequence_input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash-lite: \$(echo \$(antismash --version) | sed 's/antiSMASH //') + END_VERSIONS + """ + + stub: + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def VERSION = '7.1.0' // WARN: Version information not provided by tool during stub run. Please update this string when bumping container versions. + """ + mkdir -p ${prefix}/css + mkdir ${prefix}/images + mkdir ${prefix}/js + touch ${prefix}/NZ_CP069563.1.region001.gbk + touch ${prefix}/NZ_CP069563.1.region002.gbk + touch ${prefix}/css/bacteria.css + touch ${prefix}/genome.gbk + touch ${prefix}/genome.json + touch ${prefix}/genome.zip + touch ${prefix}/images/about.svg + touch ${prefix}/index.html + touch ${prefix}/js/antismash.js + touch ${prefix}/js/jquery.js + touch ${prefix}/regions.js + touch ${prefix}/test.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash-lite: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/antismash/antismashlite/meta.yml b/modules/nf-core/antismash/antismashlite/meta.yml new file mode 100644 index 00000000..63828343 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/meta.yml @@ -0,0 +1,230 @@ +name: antismash_antismashlite +description: | + antiSMASH allows the rapid genome-wide identification, annotation + and analysis of secondary metabolite biosynthesis gene clusters. +keywords: + - secondary metabolites + - BGC + - biosynthetic gene cluster + - genome mining + - NRPS + - RiPP + - antibiotics + - prokaryotes + - bacteria + - eukaryotes + - fungi + - antismash +tools: + - antismashlite: + description: "antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell" + homepage: "https://docs.antismash.secondarymetabolites.org" + documentation: "https://docs.antismash.secondarymetabolites.org" + tool_dev_url: "https://github.com/antismash/antismash" + doi: "10.1093/nar/gkab335" + licence: ["AGPL v3"] + identifier: biotools:antismash +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sequence_input: + type: file + description: nucleotide sequence file (annotated) + pattern: "*.{gbk, gb, gbff, genbank, embl, fasta, fna}" + - - databases: + type: directory + description: | + Downloaded AntiSMASH databases (e.g. in the AntiSMASH installation directory + "data/databases") + pattern: "*/" + - - antismash_dir: + type: directory + description: | + A local copy of an AntiSMASH installation folder. This is required when running with + docker and singularity (not required for conda), due to attempted 'modifications' of + files during database checks in the installation directory, something that cannot + be done in immutable docker/singularity containers. Therefore, a local installation + directory needs to be mounted (including all modified files from the downloading step) + to the container as a workaround. + pattern: "*/" +output: + - clusterblast_file: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/clusterblast/*_c*.txt: + type: file + description: Output of ClusterBlast algorithm + pattern: "clusterblast/*_c*.txt" + - html_accessory_files: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/{css,images,js}: + type: directory + description: Accessory files for the HTML output + pattern: "{css/,images/,js/}" + - knownclusterblast_html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/knownclusterblast/region*/ctg*.html: + type: file + description: Tables with MIBiG hits in HTML format + pattern: "knownclusterblast/region*/ctg*.html" + - knownclusterblast_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/knownclusterblast/: + type: directory + description: Directory with MIBiG hits + pattern: "knownclusterblast/" + - knownclusterblast_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/knownclusterblast/*_c*.txt: + type: file + description: Tables with MIBiG hits + pattern: "knownclusterblast/*_c*.txt" + - svg_files_clusterblast: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/svg/clusterblast*.svg: + type: file + description: SVG images showing the % identity of the aligned hits against their + queries + pattern: "svg/clusterblast*.svg" + - svg_files_knownclusterblast: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/svg/knownclusterblast*.svg: + type: file + description: SVG images showing the % identity of the aligned hits against their + queries + pattern: "svg/knownclusterblast*.svg" + - gbk_input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.gbk: + type: file + description: Nucleotide sequence and annotations in GenBank format; converted + from input file + pattern: "*.gbk" + - json_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.json: + type: file + description: Nucleotide sequence and annotations in JSON format; converted from + GenBank file (gbk_input) + pattern: "*.json" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.log: + type: file + description: Contains all the logging output that antiSMASH produced during + its run + pattern: "*.log" + - zip: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.zip: + type: file + description: Contains a compressed version of the output folder in zip format + pattern: "*.zip" + - gbk_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*region*.gbk: + type: file + description: Nucleotide sequence and annotations in GenBank format; one file + per antiSMASH hit + pattern: "*region*.gbk" + - clusterblastoutput: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/clusterblastoutput.txt: + type: file + description: Raw BLAST output of known clusters previously predicted by antiSMASH + using the built-in ClusterBlast algorithm + pattern: "clusterblastoutput.txt" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/index.html: + type: file + description: Graphical web view of results in HTML format + patterN: "index.html" + - knownclusterblastoutput: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/knownclusterblastoutput.txt: + type: file + description: Raw BLAST output of known clusters of the MIBiG database + pattern: "knownclusterblastoutput.txt" + - json_sideloading: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/regions.js: + type: file + description: Sideloaded annotations of protoclusters and/or subregions (see + antiSMASH documentation "Annotation sideloading") + pattern: "regions.js" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/antismash/antismashlite/tests/main.nf.test b/modules/nf-core/antismash/antismashlite/tests/main.nf.test new file mode 100644 index 00000000..5ee21d6d --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process ANTISMASH_ANTISMASHLITE" + script "../main.nf" + process "ANTISMASH_ANTISMASHLITE" + + tag "modules" + tag "modules_nfcore" + tag "antismash" + tag "antismash/antismashlite" + tag "antismash/antismashlitedownloaddatabases" + tag "gunzip" + tag "untar" + + setup { + run("UNTAR", alias: "UNTAR_CSS") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/css.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_DETECTION") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/detection.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_MODULES") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/modules.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES") { + script "modules/nf-core/antismash/antismashlitedownloaddatabases" + process { + """ + input[0] = UNTAR_CSS.out.untar.map{ it[1] } + input[1] = UNTAR_DETECTION.out.untar.map{ it[1] } + input[2] = UNTAR_MODULES.out.untar.map{ it[1] } + """ + } + } + + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz', checkIfExists: true) + ] + """ + } + } + } + + test("antismashlite - bacteroides_fragilis - genome") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database + input[2] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gbk_results.get(0).get(1).get(0)).text.contains("##antiSMASH-Data-START##") }, + { assert snapshot(process.out.html_accessory_files).match("html_accessory_files") }, + { assert path(process.out.gbk_input.get(0).get(1).get(0)).text.contains("##antiSMASH-Data-END##") }, + { assert path(process.out.zip.get(0).get(1)).exists() }, + { assert path(process.out.html.get(0).get(1)).text.contains("https://antismash.secondarymetabolites.org/") }, + { assert path(process.out.json_sideloading.get(0).get(1)).text.contains("\"seq_id\": \"NZ_CP069563.1\"") }, + { assert path(process.out.log.get(0).get(1)).text.contains("antiSMASH status: SUCCESS") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("antismashlite - bacteroides_fragilis - genome - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database + input[2] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/antismash/antismashlite/tests/main.nf.test.snap b/modules/nf-core/antismash/antismashlite/tests/main.nf.test.snap new file mode 100644 index 00000000..618b06f9 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/tests/main.nf.test.snap @@ -0,0 +1,301 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,2a1c54c017741b59c057a05453fc067d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-09T17:06:08.439031477" + }, + "html_accessory_files": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ], + [ + "about.svg:md5,2573f954dd506e2d0878daed04f5420a", + "bacteria_about.png:md5,99cdc2aa09aee37553b10ca86b172170", + "bacteria_antismash_icon.svg:md5,23a265b0e1cf293a4743fe13030b636f", + "bacteria_antismash_logo.svg:md5,f80f639969ee6506571ffda2e197df93", + "bacteria_antismash_white.svg:md5,2c9da15cc168d8f796269d037b5e7f60", + "bacteria_download.png:md5,c3428df1cf17cb97e2897ca6daa93d48", + "bacteria_help.png:md5,359b68f90c73208eb389759c0f5c1091", + "bacteria_home.png:md5,6595d97ee49d251fe038207f82012eff", + "bacteria_logo.png:md5,013f84d6dd93cde96f07084ff63d855c", + "contact.svg:md5,53b878c2af4f8a80a647ac30f61e6bf6", + "download.svg:md5,722038156f4ece46747cbf6908501974", + "expand-arrows-alt-solid.svg:md5,21b37749f54320135a455ed266a7fc3a", + "external-link-alt-solid.svg:md5,ca337694c74e57f73d15ca9db30081ba", + "fungi_about.png:md5,4d55bf14df0340dca01a286487fa8448", + "fungi_antismash_icon.svg:md5,2acc19cc91d5d7285a72f0b3912e108a", + "fungi_antismash_icon_white.svg:md5,961f1c41e25036a625f115f209a961c7", + "fungi_antismash_logo.svg:md5,36560983a36f46786c98a05125b15724", + "fungi_download.png:md5,782580852674aab0b69b2b94a94c7615", + "fungi_help.png:md5,0ac06748f3177d150ab90997117c4f64", + "fungi_home.png:md5,880071898062d6dafe989ac73bb7bbea", + "fungi_logo.png:md5,29294392a3953fd1ba12d1a39cebaeeb", + "help.svg:md5,e7565a3cd74893422f2886a0af748df2", + "mail.png:md5,049f51233b29663e4e4e4c8097c2d096", + "minus-circle.svg:md5,b523305570d06b6e34cd7099bed22015", + "nostructure_icon.png:md5,fc982a5b84a1a99db607731625a87f88", + "plant_antismash_icon.svg:md5,e031de9570ef2809e52502481a5e77ea", + "plant_antismash_icon_white.svg:md5,10d25996b023dbdaed4a382471ab4877", + "plus-circle.svg:md5,cba2cdd9ef893274f572228b354718cf", + "question-circle-solid.svg:md5,6dbc83547e29ecedc7f2a5b81354353b", + "search-solid.svg:md5,aeab848c26357f3d120f3e58f1efa8f5" + ], + [ + "antismash.js:md5,c90571fe2580fd4feff9a37314f1fe6b", + "jquery.js:md5,397754ba49e9e0cf4e7c190da78dda05", + "jquery.tablesorter.min.js:md5,5e9e08cef4d1be0eaa538e6eb28809a7" + ] + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-09T17:06:08.392236617" + }, + "antismashlite - bacteroides_fragilis - genome - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "about.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "antismash.js:md5,d41d8cd98f00b204e9800998ecf8427e", + "jquery.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "10": [ + [ + { + "id": "test" + }, + "genome.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "12": [ + + ], + "13": [ + [ + { + "id": "test" + }, + "index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + + ], + "15": [ + [ + { + "id": "test" + }, + "regions.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + "versions.yml:md5,2a1c54c017741b59c057a05453fc067d" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "8": [ + [ + { + "id": "test" + }, + "genome.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "clusterblast_file": [ + + ], + "clusterblastoutput": [ + + ], + "gbk_input": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "gbk_results": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "html": [ + [ + { + "id": "test" + }, + "index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html_accessory_files": [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "about.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "antismash.js:md5,d41d8cd98f00b204e9800998ecf8427e", + "jquery.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "json_results": [ + [ + { + "id": "test" + }, + "genome.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json_sideloading": [ + [ + { + "id": "test" + }, + "regions.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "knownclusterblast_dir": [ + + ], + "knownclusterblast_html": [ + + ], + "knownclusterblast_txt": [ + + ], + "knownclusterblastoutput": [ + + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "svg_files_clusterblast": [ + + ], + "svg_files_knownclusterblast": [ + + ], + "versions": [ + "versions.yml:md5,2a1c54c017741b59c057a05453fc067d" + ], + "zip": [ + [ + { + "id": "test" + }, + "genome.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-11T16:35:51.079804" + } +} \ No newline at end of file diff --git a/modules/nf-core/antismash/antismashlite/tests/tags.yml b/modules/nf-core/antismash/antismashlite/tests/tags.yml new file mode 100644 index 00000000..020b39d6 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/tests/tags.yml @@ -0,0 +1,2 @@ +antismash/antismashlite: + - "modules/nf-core/antismash/antismashlite/**" diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml new file mode 100644 index 00000000..ce4491dc --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::antismash-lite=7.1.0 diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf b/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf new file mode 100644 index 00000000..e63f20d2 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf @@ -0,0 +1,69 @@ +process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/antismash-lite:7.1.0--pyhdfd78af_0' : + 'biocontainers/antismash-lite:7.1.0--pyhdfd78af_0' }" + + /* + These files are normally downloaded/created by download-antismash-databases itself, and must be retrieved for input by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. This is solely for use for CI tests of the nf-core/module version of antiSMASH. + Reason: Upon execution, the tool checks if certain database files are present within the container and if not, it tries to create them in /usr/local/bin, for which only root user has write permissions. Mounting those database files with this module prevents the tool from trying to create them. + These files are also emitted as output channels in this module to enable the antismash-lite module to use them as mount volumes to the docker/singularity containers. + */ + + containerOptions { + workflow.containerEngine == 'singularity' ? + "-B $database_css:/usr/local/lib/python3.10/site-packages/antismash/outputs/html/css,$database_detection:/usr/local/lib/python3.10/site-packages/antismash/detection,$database_modules:/usr/local/lib/python3.10/site-packages/antismash/modules" : + workflow.containerEngine == 'docker' ? + "-v \$PWD/$database_css:/usr/local/lib/python3.10/site-packages/antismash/outputs/html/css -v \$PWD/$database_detection:/usr/local/lib/python3.10/site-packages/antismash/detection -v \$PWD/$database_modules:/usr/local/lib/python3.10/site-packages/antismash/modules" : + '' + } + + input: + path database_css + path database_detection + path database_modules + + output: + path("antismash_db") , emit: database + path("antismash_dir"), emit: antismash_dir + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + cp_cmd = ( session.config.conda && session.config.conda.enabled ) ? "cp -r \$(python -c 'import antismash;print(antismash.__file__.split(\"/__\")[0])') antismash_dir;" : "cp -r /usr/local/lib/python3.10/site-packages/antismash antismash_dir;" + """ + download-antismash-databases \\ + --database-dir antismash_db \\ + $args + + $cp_cmd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash-lite: \$(antismash --version | sed 's/antiSMASH //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + cp_cmd = (session.config.conda && session.config.conda.enabled ) ? "cp -r \$(python -c 'import antismash;print(antismash.__file__.split(\"/__\")[0])') antismash_dir;" : "cp -r /usr/local/lib/python3.10/site-packages/antismash antismash_dir;" + def VERSION = '7.1.0' // WARN: Version information not provided by tool during stub run. Please update this string when bumping container versions. + """ + echo "download-antismash-databases --database-dir antismash_db $args" + + echo "$cp_cmd" + + mkdir antismash_dir + mkdir antismash_db + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash-lite: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml new file mode 100644 index 00000000..fdca8294 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml @@ -0,0 +1,64 @@ +name: antismash_antismashlitedownloaddatabases +description: antiSMASH allows the rapid genome-wide identification, annotation and + analysis of secondary metabolite biosynthesis gene clusters. This module downloads + the antiSMASH databases for conda and docker/singularity runs. +keywords: + - secondary metabolites + - BGC + - biosynthetic gene cluster + - genome mining + - NRPS + - RiPP + - antibiotics + - prokaryotes + - bacteria + - eukaryotes + - fungi + - antismash + - database +tools: + - antismash: + description: antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell + homepage: https://docs.antismash.secondarymetabolites.org + documentation: https://docs.antismash.secondarymetabolites.org + tool_dev_url: https://github.com/antismash/antismash + doi: "10.1093/nar/gkab335" + licence: ["AGPL v3"] + identifier: biotools:antismash +input: + - - database_css: + type: directory + description: | + antismash/outputs/html/css folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. + pattern: "css" + - - database_detection: + type: directory + description: | + antismash/detection folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. + pattern: "detection" + - - database_modules: + type: directory + description: | + antismash/modules folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. + pattern: "modules" +output: + - database: + - antismash_db: + type: directory + description: Download directory for antiSMASH databases + pattern: "antismash_db" + - antismash_dir: + - antismash_dir: + type: directory + description: | + antismash installation folder which is being modified during the antiSMASH database downloading step. The modified files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database and installation folder in pipelines. + pattern: "antismash_dir" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test new file mode 100644 index 00000000..55f5f2f5 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test @@ -0,0 +1,135 @@ +nextflow_process { + + name "Test Process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES" + script "../main.nf" + process "ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES" + + tag "modules" + tag "modules_nfcore" + tag "antismash" + tag "antismash/antismashlitedownloaddatabases" + tag "untar" + + test("antiSMASH-lite downloaddatabases") { + + setup { + + run("UNTAR", alias: "UNTAR_CSS") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/css.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_DETECTION") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/detection.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_MODULES") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/modules.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR_CSS.out.untar.map{ it[1] } + input[1] = UNTAR_DETECTION.out.untar.map{ it[1] } + input[2] = UNTAR_MODULES.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + file(process.out.database.get(0)).list().sort(), + process.out.versions, + ).match() } + ) + } + } + + test("antiSMASH-lite downloaddatabases - stub") { + + options "-stub" + + setup { + + run("UNTAR", alias: "UNTAR_CSS") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/css.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_DETECTION") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/detection.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_MODULES") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/modules.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR_CSS.out.untar.map{ it[1] } + input[1] = UNTAR_DETECTION.out.untar.map{ it[1] } + input[2] = UNTAR_MODULES.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test.snap b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test.snap new file mode 100644 index 00000000..21ee9d41 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test.snap @@ -0,0 +1,62 @@ +{ + "antiSMASH-lite downloaddatabases - stub": { + "content": [ + { + "0": [ + [ + + ] + ], + "1": [ + [ + + ] + ], + "2": [ + "versions.yml:md5,9eccc775a12d25ca5dfe334e8874f12a" + ], + "antismash_dir": [ + [ + + ] + ], + "database": [ + [ + + ] + ], + "versions": [ + "versions.yml:md5,9eccc775a12d25ca5dfe334e8874f12a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:41:29.456143" + }, + "antiSMASH-lite downloaddatabases": { + "content": [ + [ + "as-js", + "clusterblast", + "clustercompare", + "comparippson", + "knownclusterblast", + "nrps_pks", + "pfam", + "resfam", + "tigrfam" + ], + [ + "versions.yml:md5,9eccc775a12d25ca5dfe334e8874f12a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:41:08.116244" + } +} \ No newline at end of file diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/tags.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/tags.yml new file mode 100644 index 00000000..1b01466e --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/tags.yml @@ -0,0 +1,2 @@ +antismash/antismashlitedownloaddatabases: + - "modules/nf-core/antismash/antismashlitedownloaddatabases/**" diff --git a/modules/nf-core/argnorm/environment.yml b/modules/nf-core/argnorm/environment.yml new file mode 100644 index 00000000..783995f2 --- /dev/null +++ b/modules/nf-core/argnorm/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::argnorm=0.5.0" diff --git a/modules/nf-core/argnorm/main.nf b/modules/nf-core/argnorm/main.nf new file mode 100644 index 00000000..5ff5e8a5 --- /dev/null +++ b/modules/nf-core/argnorm/main.nf @@ -0,0 +1,68 @@ +process ARGNORM { + tag "$meta.id" + label 'process_low' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/argnorm:0.5.0--pyhdfd78af_0': + 'biocontainers/argnorm:0.5.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input_tsv) + val tool + val db + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.5.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def db_args = db ? "--db ${db}" : "" + if (!tool) { + error 'Tool not provided.' + } + if ((tool in ["abricate"]) && !db) { + error "$tool requires a database but not provided." + } + + """ + argnorm \\ + $tool \\ + -i $input_tsv \\ + -o $prefix \\ + $db_args \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + argnorm: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.5.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + if (!tool) { + error 'Tool not provided.' + } + if ((tool in ["abricate"]) && !db) { + error "$tool requires a database but not provided." + } + + """ + touch ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + argnorm: $VERSION + END_VERSIONS + """ + +} diff --git a/modules/nf-core/argnorm/meta.yml b/modules/nf-core/argnorm/meta.yml new file mode 100644 index 00000000..84842b9c --- /dev/null +++ b/modules/nf-core/argnorm/meta.yml @@ -0,0 +1,60 @@ +name: "argnorm" +description: Normalize antibiotic resistance genes (ARGs) using the ARO ontology (developed + by CARD). +keywords: + - amr + - antimicrobial resistance + - arg + - antimicrobial resistance genes + - genomics + - metagenomics + - normalization + - drug categorization +tools: + - "argnorm": + description: "Normalize antibiotic resistance genes (ARGs) using the ARO ontology + (developed by CARD)." + homepage: "https://argnorm.readthedocs.io/en/latest/" + documentation: "https://argnorm.readthedocs.io/en/latest/" + tool_dev_url: "https://github.com/BigDataBiology/argNorm" + licence: ["MIT"] + identifier: biotools:argnorm + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - input_tsv: + type: file + description: ARG annotation output + pattern: "*.tsv" + - - tool: + type: string + description: ARG annotation tool used + pattern: "argsoap|abricate|deeparg|resfinder|amrfinderplus" + - - db: + type: string + description: Database used for ARG annotation + pattern: "sarg|ncbi|resfinder|deeparg|megares|argannot|resfinderfg" +output: + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tsv": + type: file + description: Normalized argNorm output + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Vedanth-Ramji" +maintainers: + - "@Vedanth-Ramji" diff --git a/modules/nf-core/argnorm/tests/argnorm_hamronized.config b/modules/nf-core/argnorm/tests/argnorm_hamronized.config new file mode 100644 index 00000000..68748018 --- /dev/null +++ b/modules/nf-core/argnorm/tests/argnorm_hamronized.config @@ -0,0 +1,5 @@ +process { + withName: ARGNORM { + ext.args = '--hamronized' + } +} diff --git a/modules/nf-core/argnorm/tests/argnorm_raw.config b/modules/nf-core/argnorm/tests/argnorm_raw.config new file mode 100644 index 00000000..dffa3c48 --- /dev/null +++ b/modules/nf-core/argnorm/tests/argnorm_raw.config @@ -0,0 +1,5 @@ +process { + withName: ARGNORM { + ext.args = '' + } +} diff --git a/modules/nf-core/argnorm/tests/main.nf.test b/modules/nf-core/argnorm/tests/main.nf.test new file mode 100644 index 00000000..e68c2151 --- /dev/null +++ b/modules/nf-core/argnorm/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + name "Test Process ARGNORM" + script "../main.nf" + process "ARGNORM" + + tag "modules" + tag "modules_nfcore" + tag "argnorm" + + test("argnorm - amrfinderplus_ncbi_raw - tsv") { + config './argnorm_raw.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw.tsv' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("argnorm - amrfinderplus_ncbi_hamronized - tsv") { + config './argnorm_hamronized.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_hamronized.tsv' ], // meta map + file("https://raw.githubusercontent.com/BigDataBiology/argNorm/main/examples/hamronized/amrfinderplus.ncbi.orfs.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("argnorm - missing tool") { + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw.tsv' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Tool not provided") } + ) + } + } + + test("argnorm - missing db") { + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw.tsv' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = "abricate" + input[2] = [] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("abricate requires a database but not provided.") } + ) + } + } + + test("argnorm - amrfinderplus_ncbi_hamronized - tsv - stub") { + options "-stub" + config './argnorm_hamronized.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_hamronized_stub.tsv' ], // meta map + file("https://raw.githubusercontent.com/BigDataBiology/argNorm/main/examples/hamronized/amrfinderplus.ncbi.orfs.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("argnorm - amrfinderplus_ncbi - tsv - stub") { + + options "-stub" + config './argnorm_raw.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw_stub.tsv' ], // meta map + file("https://raw.githubusercontent.com/BigDataBiology/argNorm/main/examples/raw/amrfinderplus.ncbi.orfs.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/argnorm/tests/main.nf.test.snap b/modules/nf-core/argnorm/tests/main.nf.test.snap new file mode 100644 index 00000000..4bed36ba --- /dev/null +++ b/modules/nf-core/argnorm/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "argnorm - amrfinderplus_ncbi_raw - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_raw.tsv" + }, + "argnorm_raw.tsv:md5,f870c239182592a065d9f80732b39bba" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_raw.tsv" + }, + "argnorm_raw.tsv:md5,f870c239182592a065d9f80732b39bba" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:46:00.195868976" + }, + "argnorm - amrfinderplus_ncbi_hamronized - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_hamronized.tsv" + }, + "argnorm_hamronized.tsv:md5,1f9a3820f09fd6a818af372dfe5cf322" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_hamronized.tsv" + }, + "argnorm_hamronized.tsv:md5,1f9a3820f09fd6a818af372dfe5cf322" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:46:31.856263885" + }, + "argnorm - amrfinderplus_ncbi_hamronized - tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_hamronized_stub.tsv" + }, + "argnorm_hamronized_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_hamronized_stub.tsv" + }, + "argnorm_hamronized_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:47:03.088627445" + }, + "argnorm - amrfinderplus_ncbi - tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_raw_stub.tsv" + }, + "argnorm_raw_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_raw_stub.tsv" + }, + "argnorm_raw_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:47:34.346622776" + } +} diff --git a/modules/nf-core/argnorm/tests/tags.yml b/modules/nf-core/argnorm/tests/tags.yml new file mode 100644 index 00000000..a2b6e8d0 --- /dev/null +++ b/modules/nf-core/argnorm/tests/tags.yml @@ -0,0 +1,2 @@ +argnorm: + - "modules/nf-core/argnorm/**" diff --git a/modules/nf-core/bakta/bakta/environment.yml b/modules/nf-core/bakta/bakta/environment.yml new file mode 100644 index 00000000..b3c302f0 --- /dev/null +++ b/modules/nf-core/bakta/bakta/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bakta=1.9.3 diff --git a/modules/nf-core/bakta/bakta/main.nf b/modules/nf-core/bakta/bakta/main.nf new file mode 100644 index 00000000..9a32c3da --- /dev/null +++ b/modules/nf-core/bakta/bakta/main.nf @@ -0,0 +1,72 @@ +process BAKTA_BAKTA { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bakta:1.9.3--pyhdfd78af_0' : + 'biocontainers/bakta:1.9.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path db + path proteins + path prodigal_tf + + output: + tuple val(meta), path("${prefix}.embl") , emit: embl + tuple val(meta), path("${prefix}.faa") , emit: faa + tuple val(meta), path("${prefix}.ffn") , emit: ffn + tuple val(meta), path("${prefix}.fna") , emit: fna + tuple val(meta), path("${prefix}.gbff") , emit: gbff + tuple val(meta), path("${prefix}.gff3") , emit: gff + tuple val(meta), path("${prefix}.hypotheticals.tsv"), emit: hypotheticals_tsv + tuple val(meta), path("${prefix}.hypotheticals.faa"), emit: hypotheticals_faa + tuple val(meta), path("${prefix}.tsv") , emit: tsv + tuple val(meta), path("${prefix}.txt") , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" + def prodigal_tf = prodigal_tf ? "--prodigal-tf ${prodigal_tf[0]}" : "" + """ + bakta \\ + $fasta \\ + $args \\ + --threads $task.cpus \\ + --prefix $prefix \\ + $proteins_opt \\ + $prodigal_tf \\ + --db $db + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bakta: \$(echo \$(bakta --version) 2>&1 | cut -f '2' -d ' ') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.embl + touch ${prefix}.faa + touch ${prefix}.ffn + touch ${prefix}.fna + touch ${prefix}.gbff + touch ${prefix}.gff3 + touch ${prefix}.hypotheticals.tsv + touch ${prefix}.hypotheticals.faa + touch ${prefix}.tsv + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bakta: \$(echo \$(bakta --version) 2>&1 | cut -f '2' -d ' ') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bakta/bakta/meta.yml b/modules/nf-core/bakta/bakta/meta.yml new file mode 100644 index 00000000..f947e61b --- /dev/null +++ b/modules/nf-core/bakta/bakta/meta.yml @@ -0,0 +1,150 @@ +name: bakta_bakta +description: Annotation of bacterial genomes (isolates, MAGs) and plasmids +keywords: + - annotation + - fasta + - bacteria +tools: + - bakta: + description: Rapid & standardized annotation of bacterial genomes, MAGs & plasmids. + homepage: https://github.com/oschwengers/bakta + documentation: https://github.com/oschwengers/bakta + tool_dev_url: https://github.com/oschwengers/bakta + doi: "10.1099/mgen.0.000685" + licence: ["GPL v3"] + identifier: biotools:bakta +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + FASTA file to be annotated. Has to contain at least a non-empty string dummy value. + - - db: + type: file + description: | + Path to the Bakta database. Must have amrfinderplus database directory already installed within it (in a directory called 'amrfinderplus-db/'). + - - proteins: + type: file + description: FASTA/GenBank file of trusted proteins to first annotate from (optional) + - - prodigal_tf: + type: file + description: Training file to use for Prodigal (optional) +output: + - embl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.embl: + type: file + description: annotations & sequences in (multi) EMBL format + pattern: "*.embl" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.faa: + type: file + description: CDS/sORF amino acid sequences as FASTA + pattern: "*.faa" + - ffn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.ffn: + type: file + description: feature nucleotide sequences as FASTA + pattern: "*.ffn" + - fna: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.fna: + type: file + description: replicon/contig DNA sequences as FASTA + pattern: "*.fna" + - gbff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.gbff: + type: file + description: annotations & sequences in (multi) GenBank format + pattern: "*.gbff" + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.gff3: + type: file + description: annotations & sequences in GFF3 format + pattern: "*.gff3" + - hypotheticals_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.hypotheticals.tsv: + type: file + description: additional information on hypothetical protein CDS as simple human + readble tab separated values + pattern: "*.hypotheticals.tsv" + - hypotheticals_faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.hypotheticals.faa: + type: file + description: hypothetical protein CDS amino acid sequences as FASTA + pattern: "*.hypotheticals.faa" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.tsv: + type: file + description: annotations as simple human readble tab separated values + pattern: "*.tsv" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.txt: + type: file + description: genome statistics and annotation summary + pattern: "*.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" + - "@oschwengers" + - "@jfy133" +maintainers: + - "@rpetit3" + - "@oschwengers" + - "@jfy133" diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test b/modules/nf-core/bakta/bakta/tests/main.nf.test new file mode 100644 index 00000000..3c1f8f82 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process BAKTA_BAKTA" + script "../main.nf" + config "./nextflow.config" + process "BAKTA_BAKTA" + + tag "modules" + tag "modules_nfcore" + tag "bakta" + tag "bakta/bakta" + tag "bakta/baktadbdownload" + + test("Bakta - bacteroides_fragilis - genome.fasta") { + + setup { + run("BAKTA_BAKTADBDOWNLOAD") { + script "../../baktadbdownload/main.nf" + process { + """ + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + input[1] = BAKTA_BAKTADBDOWNLOAD.out.db + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.embl.get(0).get(1)).text.contains("/translation=\"MKNTLKIAILLIAIISMGHWMPVKQVCDLNSLSLQNVEALANGET") }, + { assert path(process.out.faa.get(0).get(1)).text.contains("MKNTLKIAILLIAIISMGHWMPVKQVCDLNSLSLQNVEALANGETPNYTFCIGAGSVDCPIQHDKVKYVSQGFSLDY") }, + { assert path(process.out.ffn.get(0).get(1)).text.contains("ATGAAAAACACTTTAAAAATAGCTATTCTTCTTATTGCTATTATTTCTATGGGGCATTGGATGCCTGTAAAACAAGT") }, + { assert path(process.out.fna.get(0).get(1)).text.contains("TCTTTTTACTCATAATCTACTTTTATGATGTTAATTATTTTTTCCGTGTCTCTCTTTCGG") }, + { assert path(process.out.gbff.get(0).get(1)).text.contains("/translation=\"MKNTLKIAILLIAIISMGHWMPVKQVCDLNSLSLQNVEALANGET") }, + { assert path(process.out.gff.get(0).get(1)).text.contains("##sequence-region contig_1 1 2926") }, + { assert path(process.out.hypotheticals_tsv.get(0).get(1)).text.contains("#Annotated with Bakta") }, + { assert path(process.out.hypotheticals_faa.get(0).get(1)).text.contains("MKNLILVLGCFFFLISCQQTEKEKLEELVKNWNGKEVLL") }, + { assert path(process.out.tsv.get(0).get(1)).text.contains("SO:0001217, UniRef:UniRef50_A0A0I9S7A3") }, + { assert path(process.out.txt.get(0).get(1)).text.contains("Length: 1739120") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("Bakta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id: 'stub'],file('stub')] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test.snap b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap new file mode 100644 index 00000000..40e30c36 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap @@ -0,0 +1,191 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,f8b70ceb2a328c25a190699384e6152d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T09:11:06.657602394" + }, + "Bakta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "stub" + }, + "stub.embl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "stub" + }, + "stub.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + "versions.yml:md5,f8b70ceb2a328c25a190699384e6152d" + ], + "2": [ + [ + { + "id": "stub" + }, + "stub.ffn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "stub" + }, + "stub.fna:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "stub" + }, + "stub.gbff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "stub" + }, + "stub.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "stub" + }, + "stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "stub" + }, + "stub.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "embl": [ + [ + { + "id": "stub" + }, + "stub.embl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "faa": [ + [ + { + "id": "stub" + }, + "stub.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ffn": [ + [ + { + "id": "stub" + }, + "stub.ffn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fna": [ + [ + { + "id": "stub" + }, + "stub.fna:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gbff": [ + [ + { + "id": "stub" + }, + "stub.gbff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff": [ + [ + { + "id": "stub" + }, + "stub.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hypotheticals_faa": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hypotheticals_tsv": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "stub" + }, + "stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "stub" + }, + "stub.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f8b70ceb2a328c25a190699384e6152d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T09:11:15.532858932" + } +} \ No newline at end of file diff --git a/modules/nf-core/bakta/bakta/tests/nextflow.config b/modules/nf-core/bakta/bakta/tests/nextflow.config new file mode 100644 index 00000000..9af0dde1 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: 'BAKTA_BAKTADBDOWNLOAD' { + ext.args = "--type light" + } + + withName: 'BAKTA_BAKTA' { + memory = 7.GB + } + +} diff --git a/modules/nf-core/bakta/bakta/tests/tags.yml b/modules/nf-core/bakta/bakta/tests/tags.yml new file mode 100644 index 00000000..ecb08c45 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/tags.yml @@ -0,0 +1,2 @@ +bakta/bakta: + - "modules/nf-core/bakta/bakta/**" diff --git a/modules/nf-core/bakta/baktadbdownload/environment.yml b/modules/nf-core/bakta/baktadbdownload/environment.yml new file mode 100644 index 00000000..b3c302f0 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bakta=1.9.3 diff --git a/modules/nf-core/bakta/baktadbdownload/main.nf b/modules/nf-core/bakta/baktadbdownload/main.nf new file mode 100644 index 00000000..e512d77d --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/main.nf @@ -0,0 +1,43 @@ +process BAKTA_BAKTADBDOWNLOAD { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bakta:1.9.3--pyhdfd78af_0' : + 'biocontainers/bakta:1.9.3--pyhdfd78af_0' }" + + output: + path "db*" , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + bakta_db \\ + download \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bakta: \$(echo \$(bakta_db --version) 2>&1 | cut -f '2' -d ' ') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + echo "bakta_db \\ + download \\ + $args" + + mkdir db + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bakta: \$(echo \$(bakta_db --version) 2>&1 | cut -f '2' -d ' ') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bakta/baktadbdownload/meta.yml b/modules/nf-core/bakta/baktadbdownload/meta.yml new file mode 100644 index 00000000..a0a3a455 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/meta.yml @@ -0,0 +1,35 @@ +name: "bakta_baktadbdownload" +description: Downloads BAKTA database from Zenodo +keywords: + - bakta + - annotation + - fasta + - bacteria + - database + - download +tools: + - bakta: + description: Rapid & standardized annotation of bacterial genomes, MAGs & plasmids + homepage: https://github.com/oschwengers/bakta + documentation: https://github.com/oschwengers/bakta + tool_dev_url: https://github.com/oschwengers/bakta + doi: "10.1099/mgen.0.000685" + licence: ["GPL v3"] + identifier: biotools:bakta +output: + - db: + - db*: + type: directory + description: BAKTA database directory + pattern: "db*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" + - "@jasmezz" +maintainers: + - "@jfy133" + - "@jasmezz" diff --git a/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test new file mode 100644 index 00000000..a5f827f9 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process BAKTA_BAKTADBDOWNLOAD" + script "../main.nf" + process "BAKTA_BAKTADBDOWNLOAD" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bakta" + tag "bakta/baktadbdownload" + + test("Bakta database download") { + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.db.get(0)).exists() }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("Bakta database download - stub") { + + options "-stub" + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db + + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap new file mode 100644 index 00000000..b1c82267 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap @@ -0,0 +1,29 @@ +{ + "Bakta database download": { + "content": [ + [ + "versions.yml:md5,df9b091b08a41b7d5eef95727b7eac29" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T11:34:41.812416438" + }, + "Bakta database download - stub": { + "content": [ + [ + [ + + ], + "versions.yml:md5,df9b091b08a41b7d5eef95727b7eac29" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T11:35:01.082923401" + } +} \ No newline at end of file diff --git a/modules/nf-core/bakta/baktadbdownload/tests/nextflow.config b/modules/nf-core/bakta/baktadbdownload/tests/nextflow.config new file mode 100644 index 00000000..8b99646a --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: 'BAKTA_BAKTADBDOWNLOAD' { + ext.args = "--type light" + } + +} diff --git a/modules/nf-core/bakta/baktadbdownload/tests/tags.yml b/modules/nf-core/bakta/baktadbdownload/tests/tags.yml new file mode 100644 index 00000000..c469fa48 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/tags.yml @@ -0,0 +1,2 @@ +bakta/baktadbdownload: + - "modules/nf-core/bakta/baktadbdownload/**" diff --git a/modules/nf-core/deeparg/downloaddata/environment.yml b/modules/nf-core/deeparg/downloaddata/environment.yml new file mode 100644 index 00000000..074c6501 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::deeparg=1.0.4 diff --git a/modules/nf-core/deeparg/downloaddata/main.nf b/modules/nf-core/deeparg/downloaddata/main.nf new file mode 100644 index 00000000..787c0027 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/main.nf @@ -0,0 +1,61 @@ +process DEEPARG_DOWNLOADDATA { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' : + 'biocontainers/deeparg:1.0.4--pyhdfd78af_0' }" + + /* + We have to force docker/singularity to mount a fake file to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). + Original report: https://github.com/nf-core/funcscan/issues/23 + */ + containerOptions { + "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + "${workflow.containerEngine}" == 'docker' ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + '' + } + + input: + + output: + path "db/" , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + + # Theano needs a writable space and uses the home directory by default, + # but the latter is not always writable, for instance when Singularity + # is run in --no-home mode + mkdir -p theano + export THEANO_FLAGS="base_compiledir=\$PWD/theano" + + deeparg \\ + download_data \\ + $args \\ + -o db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + mkdir db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/deeparg/downloaddata/meta.yml b/modules/nf-core/deeparg/downloaddata/meta.yml new file mode 100644 index 00000000..5df2887b --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/meta.yml @@ -0,0 +1,36 @@ +name: deeparg_downloaddata +description: A deep learning based approach to predict Antibiotic Resistance Genes + (ARGs) from metagenomes +keywords: + - download + - database + - deeparg + - antimicrobial resistance genes + - deep learning + - prediction +tools: + - deeparg: + description: A deep learning based approach to predict Antibiotic Resistance Genes + (ARGs) from metagenomes + homepage: https://github.com/gaarangoa/deeparg + documentation: https://github.com/gaarangoa/deeparg + tool_dev_url: https://github.com/gaarangoa/deeparg + doi: "10.1186/s40168-018-0401-z" + licence: ["MIT"] + identifier: "" +# No input required for download module. +output: + - db: + - db/: + type: directory + description: Directory containing database required for deepARG. + pattern: "db/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/deeparg/downloaddata/tests/main.nf.test b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test new file mode 100644 index 00000000..8e8c7647 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process DEEPARG_DOWNLOADDATA" + script "../main.nf" + process "DEEPARG_DOWNLOADDATA" + + tag "modules" + tag "modules_nfcore" + tag "deeparg" + tag "deeparg/downloaddata" + + test("downloaddata") { + + + when { + process { + """ + // No input required + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + file(process.out.db.get(0)).list().sort(), + process.out.versions, + ).match() } + ) + } + + } + + test("downloaddata - stub") { + + options "-stub" + + when { + process { + """ + // No input required + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/deeparg/downloaddata/tests/main.nf.test.snap b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test.snap new file mode 100644 index 00000000..fca46527 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test.snap @@ -0,0 +1,53 @@ +{ + "downloaddata": { + "content": [ + [ + "LICENSE:md5,f244898ceed024da6d64a1b97746edb1", + "README.md:md5,6c0450350c2d52c0f9b5d81c3d22ea7b", + "__MACOSX", + "bin", + "database", + "deeparg", + "deeparg.gz", + "gg13", + "model", + "scripts" + ], + [ + "versions.yml:md5,30e73617295a9f10ac7781bfe8ba617f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:40:43.022804921" + }, + "downloaddata - stub": { + "content": [ + { + "0": [ + [ + + ] + ], + "1": [ + "versions.yml:md5,30e73617295a9f10ac7781bfe8ba617f" + ], + "db": [ + [ + + ] + ], + "versions": [ + "versions.yml:md5,30e73617295a9f10ac7781bfe8ba617f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:40:47.261220647" + } +} \ No newline at end of file diff --git a/modules/nf-core/deeparg/downloaddata/tests/tags.yml b/modules/nf-core/deeparg/downloaddata/tests/tags.yml new file mode 100644 index 00000000..b909db89 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/tests/tags.yml @@ -0,0 +1,2 @@ +deeparg/downloaddata: + - "modules/nf-core/deeparg/downloaddata/**" diff --git a/modules/nf-core/deeparg/predict/environment.yml b/modules/nf-core/deeparg/predict/environment.yml new file mode 100644 index 00000000..074c6501 --- /dev/null +++ b/modules/nf-core/deeparg/predict/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::deeparg=1.0.4 diff --git a/modules/nf-core/deeparg/predict/main.nf b/modules/nf-core/deeparg/predict/main.nf new file mode 100644 index 00000000..20fd0a93 --- /dev/null +++ b/modules/nf-core/deeparg/predict/main.nf @@ -0,0 +1,76 @@ +process DEEPARG_PREDICT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' : + 'biocontainers/deeparg:1.0.4--pyhdfd78af_0' }" + + /* + We have to force docker/singularity to mount a fake file to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). + Original report: https://github.com/nf-core/funcscan/issues/23 + */ + containerOptions { + "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + "${workflow.containerEngine}" == 'docker' ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + '' + } + + input: + tuple val(meta), path(fasta), val(model) + path(db) + + output: + tuple val(meta), path("*.align.daa") , emit: daa + tuple val(meta), path("*.align.daa.tsv") , emit: daa_tsv + tuple val(meta), path("*.mapping.ARG") , emit: arg + tuple val(meta), path("*.mapping.potential.ARG"), emit: potential_arg + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + DATABASE=`find -L $db -type d -name "database" | sed 's/database//'` + + # Theano needs a writable space and uses the home directory by default, + # but the latter is not always writable, for instance when Singularity + # is run in --no-home mode + mkdir -p theano + export THEANO_FLAGS="base_compiledir=\$PWD/theano" + + deeparg \\ + predict \\ + $args \\ + -i $fasta \\ + -o ${prefix} \\ + -d \$DATABASE \\ + --model $model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.align.daa + touch ${prefix}.align.daa.tsv + touch ${prefix}.mapping.ARG + touch ${prefix}.mapping.potential.ARG + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/deeparg/predict/meta.yml b/modules/nf-core/deeparg/predict/meta.yml new file mode 100644 index 00000000..dbd63945 --- /dev/null +++ b/modules/nf-core/deeparg/predict/meta.yml @@ -0,0 +1,93 @@ +name: deeparg_predict +description: A deep learning based approach to predict Antibiotic Resistance Genes + (ARGs) from metagenomes +keywords: + - deeparg + - antimicrobial resistance + - antimicrobial resistance genes + - arg + - deep learning + - prediction + - contigs + - metagenomes +tools: + - deeparg: + description: A deep learning based approach to predict Antibiotic Resistance Genes + (ARGs) from metagenomes + homepage: https://github.com/gaarangoa/deeparg + documentation: https://github.com/gaarangoa/deeparg + tool_dev_url: https://github.com/gaarangoa/deeparg + doi: "10.1186/s40168-018-0401-z" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file containing gene-like sequences + pattern: "*.{fasta,fa,fna}" + - model: + type: string + description: Which model to use, depending on input data. Either 'LS' or 'SS' + for long or short sequences respectively + pattern: "LS|LS" + - - db: + type: directory + description: Path to a directory containing the deepARG pre-built models + pattern: "*/" +output: + - daa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.align.daa": + type: file + description: Sequences of ARG-like sequences from DIAMOND alignment + pattern: "*.align.daa" + - daa_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.align.daa.tsv": + type: file + description: Alignments scores against ARG-like sequences from DIAMOND alignment + pattern: "*.align.daa.tsv" + - arg: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mapping.ARG": + type: file + description: Table containing sequences with an ARG-like probability of more + than specified thresholds + pattern: "*.mapping.ARG" + - potential_arg: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mapping.potential.ARG": + type: file + description: Table containing sequences with an ARG-like probability of less + than specified thresholds, and requires manual inspection + pattern: "*.mapping.potential.ARG" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/deeparg/predict/tests/main.nf.test b/modules/nf-core/deeparg/predict/tests/main.nf.test new file mode 100644 index 00000000..4841c6eb --- /dev/null +++ b/modules/nf-core/deeparg/predict/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process DEEPARG_PREDICT" + script "../main.nf" + process "DEEPARG_PREDICT" + + tag "modules" + tag "modules_nfcore" + tag "deeparg" + tag "deeparg/predict" + tag "deeparg/downloaddata" + + setup { + run("DEEPARG_DOWNLOADDATA") { + script "../../../deeparg/downloaddata/main.nf" + process { + """ + // No input necessary + """ + } + } + + } + + test("bacteroides_fragilis - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + 'LS' + ] + input[1] = DEEPARG_DOWNLOADDATA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.daa_tsv, + process.out.arg, + file(process.out.daa[0][1]).name, + path(process.out.potential_arg[0][1]).readLines().first().contains("#ARG") + ).match() + } + ) + } + + } + + test("bacteroides_fragilis - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + 'LS' + ] + input[1] = DEEPARG_DOWNLOADDATA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/deeparg/predict/tests/main.nf.test.snap b/modules/nf-core/deeparg/predict/tests/main.nf.test.snap new file mode 100644 index 00000000..fa5df047 --- /dev/null +++ b/modules/nf-core/deeparg/predict/tests/main.nf.test.snap @@ -0,0 +1,120 @@ +{ + "bacteroides_fragilis - fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa.tsv:md5,46b6eba345742365fc1dbd5b4bacd3a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ] + ], + "test.align.daa", + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T20:55:06.511718259" + }, + "bacteroides_fragilis - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.potential.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,1668194fbcb82f7cce4699baa00c02a1" + ], + "arg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "daa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "daa_tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "potential_arg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.potential.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1668194fbcb82f7cce4699baa00c02a1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:50:17.147755715" + } +} \ No newline at end of file diff --git a/modules/nf-core/deeparg/predict/tests/tags.yml b/modules/nf-core/deeparg/predict/tests/tags.yml new file mode 100644 index 00000000..7fa73e37 --- /dev/null +++ b/modules/nf-core/deeparg/predict/tests/tags.yml @@ -0,0 +1,2 @@ +deeparg/predict: + - "modules/nf-core/deeparg/predict/**" diff --git a/modules/nf-core/deepbgc/download/environment.yml b/modules/nf-core/deepbgc/download/environment.yml new file mode 100644 index 00000000..36cb903f --- /dev/null +++ b/modules/nf-core/deepbgc/download/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::deepbgc=0.1.31 diff --git a/modules/nf-core/deepbgc/download/main.nf b/modules/nf-core/deepbgc/download/main.nf new file mode 100644 index 00000000..b141142c --- /dev/null +++ b/modules/nf-core/deepbgc/download/main.nf @@ -0,0 +1,30 @@ +process DEEPBGC_DOWNLOAD { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deepbgc:0.1.31--pyhca03a8a_0': + 'biocontainers/deepbgc:0.1.31--pyhca03a8a_0' }" + + output: + path "deepbgc_db/" , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + export DEEPBGC_DOWNLOADS_DIR='./deepbgc_db' + + deepbgc \\ + download + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepbgc/download/meta.yml b/modules/nf-core/deepbgc/download/meta.yml new file mode 100644 index 00000000..4551e9a0 --- /dev/null +++ b/modules/nf-core/deepbgc/download/meta.yml @@ -0,0 +1,38 @@ +name: "deepbgc_download" +description: Database download module for DeepBGC which detects BGCs in bacterial + and fungal genomes using deep learning. +keywords: + - database + - download + - BGC + - biosynthetic gene cluster + - deep learning + - neural network + - random forest + - genomes + - bacteria + - fungi +tools: + - "deepbgc": + description: "DeepBGC - Biosynthetic Gene Cluster detection and classification" + homepage: "https://github.com/Merck/deepbgc" + documentation: "https://github.com/Merck/deepbgc" + tool_dev_url: "https://github.com/Merck/deepbgc" + doi: "10.1093/nar/gkz654" + licence: ["MIT"] + identifier: biotools:DeepBGC +output: + - db: + - deepbgc_db/: + type: directory + description: Directory containing the DeepBGC database + pattern: "deepbgc_db/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/deepbgc/download/tests/main.nf.test b/modules/nf-core/deepbgc/download/tests/main.nf.test new file mode 100644 index 00000000..a1c2c532 --- /dev/null +++ b/modules/nf-core/deepbgc/download/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process DEEPBGC_DOWNLOAD" + script "../main.nf" + process "DEEPBGC_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "deepbgc" + tag "deepbgc/download" + + test("deepbgc download db") { + + when { + process { + """ + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.db).match("db") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/deepbgc/download/tests/main.nf.test.snap b/modules/nf-core/deepbgc/download/tests/main.nf.test.snap new file mode 100644 index 00000000..b71c00ee --- /dev/null +++ b/modules/nf-core/deepbgc/download/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,4130f2ce0a4d43fc3d8e04f4935f908b" + ] + ], + "timestamp": "2023-12-04T13:10:01.115594047" + }, + "db": { + "content": [ + [ + [ + [ + [ + "product_activity.pkl:md5,90f0c010460e9df882cb057664a49f30", + "product_class.pkl:md5,f78a2eda240403d2f40643d42202f3ac" + ], + [ + "clusterfinder_geneborder.pkl:md5,ca4be7031ae9f70780f17c616a4fa5b5", + "clusterfinder_original.pkl:md5,2ca2429bb9bc99a401d1093c376b37aa", + "clusterfinder_retrained.pkl:md5,65679a3b61c562ff4b84bdb574bb6d93", + "deepbgc.pkl:md5,7e9218be79ba45bc9adb23bed3845dc1" + ] + ], + [ + "Pfam-A.31.0.clans.tsv:md5,a0a4590ffb2b33b83ef2b28f6ead886b", + "Pfam-A.31.0.hmm:md5,79a3328e4c95b13949a4489b19959fc5", + "Pfam-A.31.0.hmm.h3f:md5,cbca323cf8dd4e5e7c109114ec444162", + "Pfam-A.31.0.hmm.h3i:md5,5242332a3f6a60cd1ab634cd9331afd6", + "Pfam-A.31.0.hmm.h3m:md5,1fe946fa2b3bcde1d4b2bad732bce612", + "Pfam-A.31.0.hmm.h3p:md5,27b98a1ded123b6a1ef72db01927017c" + ] + ] + ] + ], + "timestamp": "2023-12-04T13:09:47.229121097" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepbgc/download/tests/tags.yml b/modules/nf-core/deepbgc/download/tests/tags.yml new file mode 100644 index 00000000..6f1c7569 --- /dev/null +++ b/modules/nf-core/deepbgc/download/tests/tags.yml @@ -0,0 +1,2 @@ +deepbgc/download: + - "modules/nf-core/deepbgc/download/**" diff --git a/modules/nf-core/deepbgc/pipeline/environment.yml b/modules/nf-core/deepbgc/pipeline/environment.yml new file mode 100644 index 00000000..36cb903f --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::deepbgc=0.1.31 diff --git a/modules/nf-core/deepbgc/pipeline/main.nf b/modules/nf-core/deepbgc/pipeline/main.nf new file mode 100644 index 00000000..fc72d238 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/main.nf @@ -0,0 +1,80 @@ +process DEEPBGC_PIPELINE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deepbgc:0.1.31--pyhca03a8a_0': + 'biocontainers/deepbgc:0.1.31--pyhca03a8a_0' }" + + input: + tuple val(meta), path(genome) + path(db) + + output: + tuple val(meta), path("${prefix}/README.txt") , optional: true, emit: readme + tuple val(meta), path("${prefix}/LOG.txt") , emit: log + tuple val(meta), path("${prefix}/${prefix}.antismash.json") , optional: true, emit: json + tuple val(meta), path("${prefix}/${prefix}.bgc.gbk") , optional: true, emit: bgc_gbk + tuple val(meta), path("${prefix}/${prefix}.bgc.tsv") , optional: true, emit: bgc_tsv + tuple val(meta), path("${prefix}/${prefix}.full.gbk") , optional: true, emit: full_gbk + tuple val(meta), path("${prefix}/${prefix}.pfam.tsv") , optional: true, emit: pfam_tsv + tuple val(meta), path("${prefix}/evaluation/${prefix}.bgc.png") , optional: true, emit: bgc_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.pr.png") , optional: true, emit: pr_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.roc.png") , optional: true, emit: roc_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.score.png"), optional: true, emit: score_png + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + export DEEPBGC_DOWNLOADS_DIR=${db} + + deepbgc \\ + pipeline \\ + $args \\ + $genome + + if [[ "${genome.baseName}/" != "${prefix}/" ]]; then + mv "${genome.baseName}/" "${prefix}/" + fi + + for i in \$(find -name '${genome.baseName}*' -type f); do + mv \$i \${i/${genome.baseName}/${prefix}}; + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}/evaluation + touch ${prefix}/README.txt + touch ${prefix}/LOG.txt + touch ${prefix}/${prefix}.antismash.json + touch ${prefix}/${prefix}.bgc.gbk + touch ${prefix}/${prefix}.bgc.tsv + touch ${prefix}/${prefix}.full.gbk + touch ${prefix}/${prefix}.pfam.tsv + touch ${prefix}/evaluation/${prefix}.bgc.png + touch ${prefix}/evaluation/${prefix}.pr.png + touch ${prefix}/evaluation/${prefix}.roc.png + touch ${prefix}/evaluation/${prefix}.score.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepbgc/pipeline/meta.yml b/modules/nf-core/deepbgc/pipeline/meta.yml new file mode 100644 index 00000000..186c7d30 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/meta.yml @@ -0,0 +1,157 @@ +name: "deepbgc_pipeline" +description: DeepBGC detects BGCs in bacterial and fungal genomes using deep learning. +keywords: + - BGC + - biosynthetic gene cluster + - deep learning + - neural network + - random forest + - genomes + - bacteria + - fungi +tools: + - "deepbgc": + description: "DeepBGC - Biosynthetic Gene Cluster detection and classification" + homepage: "https://github.com/Merck/deepbgc" + documentation: "https://github.com/Merck/deepbgc" + tool_dev_url: "https://github.com/Merck/deepbgc" + doi: "10.1093/nar/gkz654" + licence: ["MIT"] + identifier: biotools:DeepBGC +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - genome: + type: file + description: FASTA/GenBank/Pfam CSV file + pattern: "*.{fasta,fa,fna,gbk,csv}" + - - db: + type: directory + description: Database path +output: + - readme: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/README.txt: + type: file + description: txt file containing description of output files + pattern: "*.{txt}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/LOG.txt: + type: file + description: Log output of DeepBGC + pattern: "*.{txt}" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/${prefix}.antismash.json: + type: file + description: AntiSMASH JSON file for sideloading + pattern: "*.{json}" + - bgc_gbk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/${prefix}.bgc.gbk: + type: file + description: Sequences and features of all detected BGCs in GenBank format + pattern: "*.{bgc.gbk}" + - bgc_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/${prefix}.bgc.tsv: + type: file + description: Table of detected BGCs and their properties + pattern: "*.{bgc.tsv}" + - full_gbk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/${prefix}.full.gbk: + type: file + description: Fully annotated input sequence with proteins, Pfam domains (PFAM_domain + features) and BGCs (cluster features) + pattern: "*.{full.gbk}" + - pfam_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/${prefix}.pfam.tsv: + type: file + description: Table of Pfam domains (pfam_id) from given sequence (sequence_id) + in genomic order, with BGC detection scores + pattern: "*.{pfam.tsv}" + - bgc_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/evaluation/${prefix}.bgc.png: + type: file + description: Detected BGCs plotted by their nucleotide coordinates + pattern: "*.{bgc.png}" + - pr_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/evaluation/${prefix}.pr.png: + type: file + description: Precision-Recall curve based on predicted per-Pfam BGC scores + pattern: "*.{pr.png}" + - roc_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/evaluation/${prefix}.roc.png: + type: file + description: ROC curve based on predicted per-Pfam BGC scores + pattern: "*.{roc.png}" + - score_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - ${prefix}/evaluation/${prefix}.score.png: + type: file + description: BGC detection scores of each Pfam domain in genomic order + pattern: "*.{score.png}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" + - "@jfy133" +maintainers: + - "@louperelo" + - "@jfy133" diff --git a/modules/nf-core/deepbgc/pipeline/tests/main.nf.test b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test new file mode 100644 index 00000000..9dd24049 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process DEEPBGC_PIPELINE" + script "../main.nf" + process "DEEPBGC_PIPELINE" + + tag "modules" + tag "modules_nfcore" + tag "deepbgc" + tag "deepbgc/pipeline" + tag "deepbgc/download" + tag "gunzip" + tag "prodigal" + + setup { + run("DEEPBGC_DOWNLOAD") { + script "../..//download/main.nf" + process { + """ + """ + } + } + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test_gbk', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true)) + ]) + """ + } + } + run("PRODIGAL") { + script "../../../prodigal/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'gbk' + """ + } + } + } + + test("deepbgc pipeline gbk - bacteroides fragilis - test1_contigs.fa.gz") { + + when { + process { + """ + input [0] = PRODIGAL.out.gene_annotations + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("gbk_versions") }, + { assert snapshot(process.out.json).match("gbk_json") }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.bgc_gbk.get(0).get(1)).exists() }, + { assert path(process.out.full_gbk.get(0).get(1)).exists() } + ) + } + + } + + test("deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz") { + + when { + process { + """ + input [0] = GUNZIP.out.gunzip + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("fa_versions") }, + { assert snapshot(process.out.bgc_gbk).match("fa_bgc_gbk") }, + { assert snapshot(process.out.bgc_png).match("fa_bgc_png") }, + { assert snapshot(process.out.score_png).match("fa_score_png") }, + { assert snapshot(process.out.pfam_tsv).match("fa_pfam_tsv") }, + { assert path(process.out.json.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.bgc_tsv.get(0).get(1)).exists() }, + { assert path(process.out.full_gbk.get(0).get(1)).exists() } + ) + } + } + + test("deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz - stub") { + options "-stub" + when { + process { + """ + input [0] = GUNZIP.out.gunzip + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} + diff --git a/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap new file mode 100644 index 00000000..ef64db97 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap @@ -0,0 +1,331 @@ +{ + "gbk_versions": { + "content": [ + [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2023-12-01T18:29:41.728695197" + }, + "fa_bgc_png": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,f4a0fc6cd260e2d7ad16f7a1fa103f96" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.389704368" + }, + "deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "LOG.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ], + "2": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.full.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pr.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.roc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_gbk": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_tsv": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "full_gbk": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.full.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "LOG.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pfam_tsv": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pr_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pr.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readme": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "roc_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.roc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "score_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:32:11.354631831" + }, + "fa_score_png": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,572e8882031f667580d8c8e13c2cbb91" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.401051746" + }, + "fa_pfam_tsv": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,1179eb4e6df0c83aaeec18d7d34e7524" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.411632144" + }, + "gbk_json": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,889ac1efb6a9a7d7b8c65e4cd2233bba" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:25:25.861672633" + }, + "fa_versions": { + "content": [ + [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2023-12-01T18:44:16.352023677" + }, + "fa_bgc_gbk": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,7fc70dd034903622dae273bf71b402f2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.383560585" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepbgc/pipeline/tests/tags.yml b/modules/nf-core/deepbgc/pipeline/tests/tags.yml new file mode 100644 index 00000000..c6c4e11d --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +deepbgc/pipeline: + - "modules/nf-core/deepbgc/pipeline/**" diff --git a/modules/nf-core/fargene/environment.yml b/modules/nf-core/fargene/environment.yml new file mode 100644 index 00000000..ade4d770 --- /dev/null +++ b/modules/nf-core/fargene/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fargene=0.1 diff --git a/modules/nf-core/fargene/main.nf b/modules/nf-core/fargene/main.nf new file mode 100644 index 00000000..42aa2ca2 --- /dev/null +++ b/modules/nf-core/fargene/main.nf @@ -0,0 +1,83 @@ +process FARGENE { + tag "$meta.id" + label 'process_low' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fargene:0.1--py27h21c881e_4' : + 'biocontainers/fargene:0.1--py27h21c881e_4' }" + + input: + // input may be fasta (for genomes or longer contigs) or paired-end fastq (for metagenome), the latter in addition with --meta flag + tuple val(meta), path(input) + val hmm_model + + output: + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("${prefix}/results_summary.txt") , emit: txt + tuple val(meta), path("${prefix}/hmmsearchresults/*.out") , optional: true, emit: hmm + tuple val(meta), path("${prefix}/hmmsearchresults/retrieved-*.out") , optional: true, emit: hmm_genes + tuple val(meta), path("${prefix}/predictedGenes/predicted-orfs.fasta") , optional: true, emit: orfs + tuple val(meta), path("${prefix}/predictedGenes/predicted-orfs-amino.fasta") , optional: true, emit: orfs_amino + tuple val(meta), path("${prefix}/predictedGenes/retrieved-contigs.fasta") , optional: true, emit: contigs + tuple val(meta), path("${prefix}/predictedGenes/retrieved-contigs-peptides.fasta") , optional: true, emit: contigs_pept + tuple val(meta), path("${prefix}/predictedGenes/*filtered.fasta") , optional: true, emit: filtered + tuple val(meta), path("${prefix}/predictedGenes/*filtered-peptides.fasta") , optional: true, emit: filtered_pept + tuple val(meta), path("${prefix}/retrievedFragments/all_retrieved_*.fastq") , optional: true, emit: fragments + tuple val(meta), path("${prefix}/retrievedFragments/trimmedReads/*.fasta") , optional: true, emit: trimmed + tuple val(meta), path("${prefix}/spades_assembly/*") , optional: true, emit: spades + tuple val(meta), path("${prefix}/tmpdir/*.fasta") , optional: true, emit: metagenome + tuple val(meta), path("${prefix}/tmpdir/*.out") , optional: true, emit: tmp + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + fargene \\ + $args \\ + -p $task.cpus \\ + -i $input \\ + --hmm-model $hmm_model \\ + -o $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fargene: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.log + mkdir -p ${prefix}/{hmmsearchresults,predictedGenes,retrievedFragments} + mkdir -p ${prefix}/retrievedFragments/trimmedReads/ + + touch ${prefix}/results_summary.txt + touch ${prefix}/hmmsearchresults/retrieved-${prefix}.out + touch ${prefix}/hmmsearchresults/${prefix}.out + touch ${prefix}/predictedGenes/predicted-orfs.fasta + touch ${prefix}/predictedGenes/predicted-orfs-amino.fasta + touch ${prefix}/predictedGenes/retrieved-contigs.fasta + touch ${prefix}/predictedGenes/retrieved-contigs-peptides.fasta + touch ${prefix}/predictedGenes/${prefix}-filtered.fasta + touch ${prefix}/predictedGenes/${prefix}-filtered-peptides.fasta + touch ${prefix}/retrievedFragments/all_retrieved_${prefix}.fastq + touch ${prefix}/retrievedFragments/trimmedReads/${prefix}.fasta + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fargene: $VERSION + END_VERSIONS + """ + +} diff --git a/modules/nf-core/fargene/meta.yml b/modules/nf-core/fargene/meta.yml new file mode 100644 index 00000000..e1bcc5ea --- /dev/null +++ b/modules/nf-core/fargene/meta.yml @@ -0,0 +1,201 @@ +name: fargene +description: tool that takes either fragmented metagenomic data or longer sequences + as input and predicts and delivers full-length antiobiotic resistance genes as output. +keywords: + - antibiotic resistance genes + - ARGs + - identifier + - metagenomic + - contigs +tools: + - fargene: + description: Fragmented Antibiotic Resistance Gene Identifier takes either fragmented + metagenomic data or longer sequences as input and predicts and delivers full-length + antiobiotic resistance genes as output + homepage: https://github.com/fannyhb/fargene + documentation: https://github.com/fannyhb/fargene + tool_dev_url: https://github.com/fannyhb/fargene + licence: ["MIT"] + identifier: biotools:fargene +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: uncompressed fasta file or paired-end fastq files containing either + genomes or longer contigs as nucleotide or protein sequences (fasta) or fragmented + metagenomic reads (fastq) + pattern: "*.{fasta}" + - - hmm_model: + type: string + description: name of custom hidden markov model to be used [pre-defined class_a, + class_b_1_2, class_b_3, class_c, class_d_1, class_d_2, qnr, tet_efflux, tet_rpg, + tet_enzyme] +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: log file + pattern: "*.{log}" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/results_summary.txt: + type: file + description: analysis summary text file + pattern: "*.{txt}" + - hmm: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/hmmsearchresults/*.out: + type: file + description: output from hmmsearch (both single gene annotations + contigs) + pattern: "*.{out}" + - hmm_genes: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/hmmsearchresults/retrieved-*.out: + type: file + description: output from hmmsearch (single gene annotations only) + pattern: "retrieved-*.{out}" + - orfs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/predictedGenes/predicted-orfs.fasta: + type: file + description: open reading frames (ORFs) + pattern: "*.{fasta}" + - orfs_amino: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/predictedGenes/predicted-orfs-amino.fasta: + type: file + description: protein translation of open reading frames (ORFs) + pattern: "*.{fasta}" + - contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/predictedGenes/retrieved-contigs.fasta: + type: file + description: (complete) contigs that passed the final full-length classification + pattern: "*.{fasta}" + - contigs_pept: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/predictedGenes/retrieved-contigs-peptides.fasta: + type: file + description: parts of the contigs that passed the final classification step + that aligned with the HMM, as amino acid sequences + pattern: "*.{fasta}" + - filtered: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/predictedGenes/*filtered.fasta: + type: file + description: sequences that passed the final classification step, but only the + parts that where predicted by the HMM to be part of the gene + pattern: "*.{fasta}" + - filtered_pept: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/predictedGenes/*filtered-peptides.fasta: + type: file + description: sequences from filtered.fasta, translated in the same frame as + the gene is predicted to be located + pattern: "*.{fasta}" + - fragments: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/retrievedFragments/all_retrieved_*.fastq: + type: file + description: All quality controlled retrieved fragments that were classified + as positive, together with its read-pair, gathered in two files + pattern: "*.{fastq}" + - trimmed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/retrievedFragments/trimmedReads/*.fasta: + type: file + description: The quality controlled retrieved fragments from each input file. + pattern: "*.{fasta}" + - spades: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/spades_assembly/*: + type: directory + description: The output from the SPAdes assembly + pattern: "spades_assembly" + - metagenome: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/tmpdir/*.fasta: + type: file + description: The FASTQ to FASTA converted input files from metagenomic reads. + pattern: "*.{fasta}" + - tmp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/tmpdir/*.out: + type: file + description: The from FASTQ to FASTA converted input files and their translated + input sequences. Are only saved if option --store-peptides is used. + pattern: "*.{fasta}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/fargene/tests/main.nf.test b/modules/nf-core/fargene/tests/main.nf.test new file mode 100644 index 00000000..2f4e3fc6 --- /dev/null +++ b/modules/nf-core/fargene/tests/main.nf.test @@ -0,0 +1,76 @@ +nextflow_process { + + name "Test Process FARGENE" + script "../main.nf" + process "FARGENE" + + tag "modules" + tag "modules_nfcore" + tag "fargene" + tag "gunzip" + + setup { + + run("GUNZIP") { + script "../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true)) + ]) + """ + } + } + } + + test("fargene - bacteroides fragilis - contigs.fa.gz") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'class_a' + """ + } + } + + then { + assertAll { + { assert process.success } + { assert snapshot( + process.out.txt, + path(process.out.log[0][1]).readLines().last().contains("Output can be found in"), + path(process.out.hmm[0][1]).readLines().last().contains("[ok]"), + file(process.out.tmp[0][1].find { file(it).name == "tmp.out" }).readLines().last().contains("[ok]"), + process.out.versions + ).match() + } + } + } + + } + + test("fargene - bacteroides fragilis - contigs.fa.gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'class_a' + """ + } + } + + then { + assertAll { + { assert process.success } + { assert snapshot(process.out).match() } + } + } + + } + +} diff --git a/modules/nf-core/fargene/tests/main.nf.test.snap b/modules/nf-core/fargene/tests/main.nf.test.snap new file mode 100644 index 00000000..54724f1b --- /dev/null +++ b/modules/nf-core/fargene/tests/main.nf.test.snap @@ -0,0 +1,283 @@ +{ + "fargene - bacteroides fragilis - contigs.fa.gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "results_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "all_retrieved_test.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs-amino.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "contigs": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "contigs_pept": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_pept": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fragments": [ + [ + { + "id": "test", + "single_end": false + }, + "all_retrieved_test.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hmm": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "hmm_genes": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metagenome": [ + + ], + "orfs": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "orfs_amino": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs-amino.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spades": [ + + ], + "tmp": [ + + ], + "trimmed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "results_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T15:08:54.830926802" + }, + "fargene - bacteroides fragilis - contigs.fa.gz": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50" + ] + ], + true, + true, + true, + [ + "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T15:24:49.384451483" + } +} \ No newline at end of file diff --git a/modules/nf-core/fargene/tests/tags.yml b/modules/nf-core/fargene/tests/tags.yml new file mode 100644 index 00000000..c470032f --- /dev/null +++ b/modules/nf-core/fargene/tests/tags.yml @@ -0,0 +1,2 @@ +fargene: + - "modules/nf-core/fargene/**" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 752c3a10..00000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,64 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') - - // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) - // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 - // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus - // FastQC memory value allowed range (100 - 10000) - def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) - - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - - fastqc \\ - $args \\ - --threads $task.cpus \\ - --memory $fastqc_memory \\ - $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 2b2e62b8..00000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] - identifier: biotools:fastqc -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - html: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.html": - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.zip": - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index e9d79a07..00000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,309 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("sarscov2 single-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 interleaved [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [bam]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 multiple [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 custom_prefix") { - - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 single-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 interleaved [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [bam] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 multiple [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 custom_prefix - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index d5db3092..00000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,392 +0,0 @@ -{ - "sarscov2 custom_prefix": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:16.374038" - }, - "sarscov2 single-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:24.993809" - }, - "sarscov2 custom_prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:10.93942" - }, - "sarscov2 interleaved [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:42.355718" - }, - "sarscov2 paired-end [bam]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:53.276274" - }, - "sarscov2 multiple [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:05.527626" - }, - "sarscov2 paired-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:31.188871" - }, - "sarscov2 paired-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:34.273566" - }, - "sarscov2 multiple [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:02.304411" - }, - "sarscov2 single-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:19.095607" - }, - "sarscov2 interleaved [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:44.640184" - }, - "sarscov2 paired-end [bam] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:53.550742" - } -} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml deleted file mode 100644 index 7834294b..00000000 --- a/modules/nf-core/fastqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastqc: - - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gecco/run/environment.yml b/modules/nf-core/gecco/run/environment.yml new file mode 100644 index 00000000..7db7dc87 --- /dev/null +++ b/modules/nf-core/gecco/run/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gecco=0.9.10 diff --git a/modules/nf-core/gecco/run/main.nf b/modules/nf-core/gecco/run/main.nf new file mode 100644 index 00000000..04399d14 --- /dev/null +++ b/modules/nf-core/gecco/run/main.nf @@ -0,0 +1,65 @@ +process GECCO_RUN { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gecco:0.9.10--pyhdfd78af_0': + 'biocontainers/gecco:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input), path(hmm) + path model_dir + + output: + tuple val(meta), path("*.genes.tsv") , optional: true, emit: genes + tuple val(meta), path("*.features.tsv") , emit: features + tuple val(meta), path("*.clusters.tsv") , optional: true, emit: clusters + tuple val(meta), path("*_cluster_*.gbk"), optional: true, emit: gbk + tuple val(meta), path("*.json") , optional: true, emit: json + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def custom_model = model_dir ? "--model ${model_dir}" : "" + def custom_hmm = hmm ? "--hmm ${hmm}" : "" + """ + gecco \\ + run \\ + $args \\ + -j $task.cpus \\ + -o ./ \\ + -g ${input} \\ + $custom_model \\ + $custom_hmm + + for i in \$(find -name '${input.baseName}*' -type f); do + mv \$i \${i/${input.baseName}/${prefix}}; + done + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.genes.tsv + touch ${prefix}.features.tsv + touch ${prefix}.clusters.tsv + touch NC_018507.1_cluster_1.gbk + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gecco/run/meta.yml b/modules/nf-core/gecco/run/meta.yml new file mode 100644 index 00000000..6a557cea --- /dev/null +++ b/modules/nf-core/gecco/run/meta.yml @@ -0,0 +1,101 @@ +name: "gecco_run" +description: GECCO is a fast and scalable method for identifying putative novel Biosynthetic + Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields + (CRFs). +keywords: + - bgc + - detection + - metagenomics + - contigs +tools: + - "gecco": + description: "Biosynthetic Gene Cluster prediction with Conditional Random Fields." + homepage: "https://gecco.embl.de" + documentation: "https://gecco.embl.de" + tool_dev_url: "https://github.com/zellerlab/GECCO" + doi: "10.1101/2021.05.03.442509" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: A genomic file containing one or more sequences as input. Input + type is any supported by Biopython (fasta, gbk, etc.) + pattern: "*" + - hmm: + type: file + description: Alternative HMM file(s) to use in HMMER format + pattern: "*.hmm" + - - model_dir: + type: directory + description: Path to an alternative CRF (Conditional Random Fields) module to + use +output: + - genes: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.genes.tsv": + type: file + description: TSV file containing detected/predicted genes with BGC probability + scores. Will not be generated if no hits are found. + pattern: "*.genes.tsv" + - features: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.features.tsv": + type: file + description: TSV file containing identified domains + pattern: "*.features.tsv" + - clusters: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.clusters.tsv": + type: file + description: TSV file containing coordinates of predicted clusters and BGC types. Will + not be generated if no hits are found. + pattern: "*.clusters.tsv" + - gbk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_cluster_*.gbk": + type: file + description: Per cluster GenBank file (if found) containing sequence with annotations. + Will not be generated if no hits are found. + pattern: "*.gbk" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: AntiSMASH v6 sideload JSON file (if --antismash-sideload) supplied. + Will not be generated if no hits are found. + pattern: "*.gbk" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/gecco/run/tests/main.nf.test b/modules/nf-core/gecco/run/tests/main.nf.test new file mode 100644 index 00000000..002a8e84 --- /dev/null +++ b/modules/nf-core/gecco/run/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process GECCO_RUN" + script "../main.nf" + process "GECCO_RUN" + + tag "modules" + tag "modules_nfcore" + tag "gecco" + tag "gecco/run" + + test("gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.genes + + process.out.features + + process.out.clusters + + process.out.versions + ).match() }, + { assert path(process.out.gbk.get(0).get(1)).text.contains("MVKNDIDILILGGGCTGLSLAYYLSFLPNTVRIFLIENKFIYNND") } + ) + } + + } + + test("gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gecco/run/tests/main.nf.test.snap b/modules/nf-core/gecco/run/tests/main.nf.test.snap new file mode 100644 index 00000000..aec1cd52 --- /dev/null +++ b/modules/nf-core/gecco/run/tests/main.nf.test.snap @@ -0,0 +1,130 @@ +{ + "gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.genes.tsv:md5,2338b99d9b77200e9a071941ad8bb3ac" + ], + [ + { + "id": "test", + "single_end": false + }, + "test.features.tsv:md5,dcd929c60337b4835729d886897cdd36" + ], + [ + { + "id": "test", + "single_end": false + }, + "test.clusters.tsv:md5,84a10da0afff6e2085d8296db69eb8b0" + ], + "versions.yml:md5,3ade04da8c7c1f46cdf07ecb0334a777" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-29T13:44:10.071851827" + }, + "gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.genes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.features.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.clusters.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "NC_018507.1_cluster_1.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,3ade04da8c7c1f46cdf07ecb0334a777" + ], + "clusters": [ + [ + { + "id": "test", + "single_end": false + }, + "test.clusters.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "features": [ + [ + { + "id": "test", + "single_end": false + }, + "test.features.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gbk": [ + [ + { + "id": "test", + "single_end": false + }, + "NC_018507.1_cluster_1.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "genes": [ + [ + { + "id": "test", + "single_end": false + }, + "test.genes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + + ], + "versions": [ + "versions.yml:md5,3ade04da8c7c1f46cdf07ecb0334a777" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T22:07:47.729614386" + } +} \ No newline at end of file diff --git a/modules/nf-core/gecco/run/tests/tags.yml b/modules/nf-core/gecco/run/tests/tags.yml new file mode 100644 index 00000000..1542bcca --- /dev/null +++ b/modules/nf-core/gecco/run/tests/tags.yml @@ -0,0 +1,2 @@ +gecco/run: + - "modules/nf-core/gecco/run/**" diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..5e67e3b9 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..9066c035 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..776211ad --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..069967e7 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/hamronization/abricate/environment.yml b/modules/nf-core/hamronization/abricate/environment.yml new file mode 100644 index 00000000..791b9c96 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/abricate/main.nf b/modules/nf-core/hamronization/abricate/main.nf new file mode 100644 index 00000000..54cd9904 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/main.nf @@ -0,0 +1,53 @@ +process HAMRONIZATION_ABRICATE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(report) + val(format) + val(software_version) + val(reference_db_version) + + output: + tuple val(meta), path("*.json"), optional: true, emit: json + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hamronize \\ + abricate \\ + ${report} \\ + $args \\ + --format ${format} \\ + --analysis_software_version ${software_version} \\ + --reference_database_version ${reference_db_version} \\ + > ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/hamronization/abricate/meta.yml b/modules/nf-core/hamronization/abricate/meta.yml new file mode 100644 index 00000000..b1346892 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/meta.yml @@ -0,0 +1,69 @@ +name: "hamronization_abricate" +description: Tool to convert and summarize ABRicate outputs using the hAMRonization + specification +keywords: + - amr + - antimicrobial resistance + - reporting + - abricate +tools: + - "hamronization": + description: "Tool to convert and summarize AMR gene detection outputs using the + hAMRonization specification" + homepage: "https://github.com/pha4ge/hAMRonization/" + documentation: "https://github.com/pha4ge/hAMRonization/" + tool_dev_url: "https://github.com/pha4ge/hAMRonization" + licence: ["GNU Lesser General Public v3 (LGPL v3)"] + identifier: biotools:hamronization +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: Output TSV or CSV file from ABRicate + pattern: "*.{csv,tsv}" + - - format: + type: string + description: Type of report file to be produced + pattern: "tsv|json" + - - software_version: + type: string + description: Version of ABRicate used + pattern: "[0-9].[0-9].[0-9]" + - - reference_db_version: + type: string + description: Database version of ABRicate used + pattern: "[0-9][0-9][0-9][0-9]-[A-Z][a-z][a-z]-[0-9][0-9]" +output: + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: hAMRonised report in JSON format + pattern: "*.json" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: hAMRonised report in TSV format + pattern: "*.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/hamronization/abricate/tests/main.nf.test b/modules/nf-core/hamronization/abricate/tests/main.nf.test new file mode 100644 index 00000000..d6c21350 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_ABRICATE" + script "../main.nf" + process "HAMRONIZATION_ABRICATE" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/abricate" + + test("hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv") { + + when { + process { + """ + input[0] = [ [ id:"test" ], file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv', checkIfExists: true) ] + input[1] = 'tsv' + input[2] = '1.0.1' + input[3] = '2021-Mar-27' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:"test" ], file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv', checkIfExists: true) ] + input[1] = 'tsv' + input[2] = '1.0.1' + input[3] = '2021-Mar-27' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/abricate/tests/main.nf.test.snap b/modules/nf-core/hamronization/abricate/tests/main.nf.test.snap new file mode 100644 index 00000000..47432f37 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.tsv:md5,4b1024ba25c116a5312944f65dd40e9b" + ] + ], + "2": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,4b1024ba25c116a5312944f65dd40e9b" + ] + ], + "versions": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T22:16:18.968989666" + }, + "hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T22:25:57.524839789" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/abricate/tests/tags.yml b/modules/nf-core/hamronization/abricate/tests/tags.yml new file mode 100644 index 00000000..ed35a969 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/abricate: + - "modules/nf-core/hamronization/abricate/**" diff --git a/modules/nf-core/hamronization/amrfinderplus/environment.yml b/modules/nf-core/hamronization/amrfinderplus/environment.yml new file mode 100644 index 00000000..791b9c96 --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/amrfinderplus/main.nf b/modules/nf-core/hamronization/amrfinderplus/main.nf new file mode 100644 index 00000000..22a56d66 --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/main.nf @@ -0,0 +1,54 @@ +process HAMRONIZATION_AMRFINDERPLUS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(report) + val(format) + val(software_version) + val(reference_db_version) + + output: + tuple val(meta), path("*.json") , optional: true, emit: json + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hamronize \\ + amrfinderplus \\ + ${report} \\ + $args \\ + --format ${format} \\ + --analysis_software_version ${software_version} \\ + --reference_database_version ${reference_db_version} \\ + --input_file_name ${prefix} \\ + > ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/hamronization/amrfinderplus/meta.yml b/modules/nf-core/hamronization/amrfinderplus/meta.yml new file mode 100644 index 00000000..aba55b1f --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/meta.yml @@ -0,0 +1,71 @@ +name: "hamronization_amrfinderplus" +description: Tool to convert and summarize AMRfinderPlus outputs using the hAMRonization + specification. +keywords: + - amr + - antimicrobial resistance + - arg + - antimicrobial resistance genes + - reporting + - amrfinderplus +tools: + - "hamronization": + description: "Tool to convert and summarize AMR gene detection outputs using the + hAMRonization specification" + homepage: "https://github.com/pha4ge/hAMRonization/" + documentation: "https://github.com/pha4ge/hAMRonization/" + tool_dev_url: "https://github.com/pha4ge/hAMRonization" + licence: ["GNU Lesser General Public v3 (LGPL v3)"] + identifier: biotools:hamronization +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: Output .tsv file from AMRfinderPlus + pattern: "*.tsv" + - - format: + type: string + description: Type of report file to be produced + pattern: "tsv|json" + - - software_version: + type: string + description: Version of AMRfinder used + pattern: "[0-9].[0-9].[0-9]" + - - reference_db_version: + type: string + description: Database version of ncbi_AMRfinder used + pattern: "[0-9]-[0-9]-[0-9].[0-9]" +output: + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: hAMRonised report in JSON format + pattern: "*.json" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: hAMRonised report in TSV format + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test new file mode 100644 index 00000000..d74f2f56 --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_AMRFINDERPLUS" + script "../main.nf" + process "HAMRONIZATION_AMRFINDERPLUS" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/amrfinderplus" + + test("hamronization/amrfinderplus - delete_me/amrfinderplus - tsv") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = 'tsv' + input[2] = '3.10.30' + input[3] = '2022-05-26.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hamronization/amrfinderplus - delete_me/amrfinderplus - tsv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = 'tsv' + input[2] = '3.10.30' + input[3] = '2022-05-26.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test.snap b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test.snap new file mode 100644 index 00000000..486d8cdc --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "hamronization/amrfinderplus - delete_me/amrfinderplus - tsv - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T10:47:35.965140591" + }, + "hamronization/amrfinderplus - delete_me/amrfinderplus - tsv": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,cde9a32a5b0a8902c4a76ebd2a820d4d" + ] + ], + "2": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,cde9a32a5b0a8902c4a76ebd2a820d4d" + ] + ], + "versions": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T10:47:30.194755603" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/amrfinderplus/tests/tags.yml b/modules/nf-core/hamronization/amrfinderplus/tests/tags.yml new file mode 100644 index 00000000..d3791c05 --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/amrfinderplus: + - "modules/nf-core/hamronization/amrfinderplus/**" diff --git a/modules/nf-core/hamronization/deeparg/environment.yml b/modules/nf-core/hamronization/deeparg/environment.yml new file mode 100644 index 00000000..791b9c96 --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/deeparg/main.nf b/modules/nf-core/hamronization/deeparg/main.nf new file mode 100644 index 00000000..be3ef006 --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/main.nf @@ -0,0 +1,55 @@ +process HAMRONIZATION_DEEPARG { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(report) + val(format) + val(software_version) + val(reference_db_version) + + output: + tuple val(meta), path("*.json"), optional: true, emit: json + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hamronize \\ + deeparg \\ + ${report} \\ + $args \\ + --format ${format} \\ + --analysis_software_version ${software_version} \\ + --reference_database_version ${reference_db_version} \\ + --input_file_name ${prefix} \\ + > ${prefix}.${format} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/hamronization/deeparg/meta.yml b/modules/nf-core/hamronization/deeparg/meta.yml new file mode 100644 index 00000000..39149a34 --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/meta.yml @@ -0,0 +1,69 @@ +name: hamronization_deeparg +description: Tool to convert and summarize DeepARG outputs using the hAMRonization + specification +keywords: + - amr + - antimicrobial resistance + - reporting + - deeparg +tools: + - hamronization: + description: Tool to convert and summarize AMR gene detection outputs using the + hAMRonization specification + homepage: https://github.com/pha4ge/hAMRonization/ + documentation: https://github.com/pha4ge/hAMRonization/ + tool_dev_url: https://github.com/pha4ge/hAMRonization + licence: ["GNU Lesser General Public v3 (LGPL v3)"] + identifier: biotools:hamronization +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: Output .mapping.ARG file from DeepARG + pattern: "*.mapping.ARG" + - - format: + type: string + description: Type of report file to be produced + pattern: "tsv|json" + - - software_version: + type: string + description: Version of DeepARG used + pattern: "[0-9].[0-9].[0-9]" + - - reference_db_version: + type: integer + description: Database version of DeepARG used + pattern: "[0-9]" +output: + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: hAMRonised report in JSON format + pattern: "*.json" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: hAMRonised report in TSV format + pattern: "*.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/hamronization/deeparg/tests/main.nf.test b/modules/nf-core/hamronization/deeparg/tests/main.nf.test new file mode 100644 index 00000000..e13be328 --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_DEEPARG" + script "../main.nf" + process "HAMRONIZATION_DEEPARG" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/deeparg" + + test("hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hamronization/deeparg/tests/main.nf.test.snap b/modules/nf-core/hamronization/deeparg/tests/main.nf.test.snap new file mode 100644 index 00000000..d680080b --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T00:43:44.834346159" + }, + "hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,e886d665bf9fc266be8193859863d2f4" + ] + ], + "2": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,e886d665bf9fc266be8193859863d2f4" + ] + ], + "versions": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T10:32:36.722182719" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/deeparg/tests/tags.yml b/modules/nf-core/hamronization/deeparg/tests/tags.yml new file mode 100644 index 00000000..0dedf03d --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/deeparg: + - "modules/nf-core/hamronization/deeparg/**" diff --git a/modules/nf-core/hamronization/fargene/environment.yml b/modules/nf-core/hamronization/fargene/environment.yml new file mode 100644 index 00000000..791b9c96 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/fargene/main.nf b/modules/nf-core/hamronization/fargene/main.nf new file mode 100644 index 00000000..ca1edc73 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/main.nf @@ -0,0 +1,54 @@ +process HAMRONIZATION_FARGENE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(report) + val(format) + val(software_version) + val(reference_db_version) + + output: + tuple val(meta), path("*.json") , optional: true, emit: json + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hamronize \\ + fargene \\ + ${report} \\ + $args \\ + --format ${format} \\ + --analysis_software_version ${software_version} \\ + --reference_database_version ${reference_db_version} \\ + --input_file_name ${prefix} \\ + > ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "stub" > ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/hamronization/fargene/meta.yml b/modules/nf-core/hamronization/fargene/meta.yml new file mode 100644 index 00000000..efd3de36 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/meta.yml @@ -0,0 +1,71 @@ +name: "hamronization_fargene" +description: Tool to convert and summarize fARGene outputs using the hAMRonization + specification +keywords: + - amr + - antimicrobial resistance + - arg + - antimicrobial resistance genes + - reporting + - fARGene +tools: + - hamronization: + description: "Tool to convert and summarize AMR gene detection outputs using the + hAMRonization specification" + homepage: "https://github.com/pha4ge/hAMRonization/" + documentation: "https://github.com/pha4ge/hAMRonization/" + tool_dev_url: "https://github.com/pha4ge/hAMRonization" + licence: ["GNU Lesser General Public v3 (LGPL v3)"] + identifier: biotools:hamronization +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: Output .txt file from fARGene + pattern: "*.txt" + - - format: + type: string + description: Type of report file to be produced + pattern: "tsv|json" + - - software_version: + type: string + description: Version of fARGene used + pattern: "[0-9].[0-9].[0-9]" + - - reference_db_version: + type: string + description: Database version of fARGene used + pattern: "[0-9].[0-9].[0-9]" +output: + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: hAMRonised report in JSON format + pattern: "*.json" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: hAMRonised report in TSV format + pattern: "*.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/hamronization/fargene/tests/main.nf.test b/modules/nf-core/hamronization/fargene/tests/main.nf.test new file mode 100644 index 00000000..a5c5f2f7 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_FARGENE" + script "../main.nf" + process "HAMRONIZATION_FARGENE" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/fargene" + tag "gunzip" + tag "fargene" + + test("hamronization/fargene - bacteroides_fragilis - illumina - fa.gz/gunzip") { + setup { + + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true)) + ]) + """ + } + } + + run("FARGENE") { + script "../../../fargene/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'class_a' + """ + } + } + } + + when { + process { + """ + input[0] = FARGENE.out.hmm + input[1] = 'tsv' + input[2] = '0.1' + input[3] = '0.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.tsv.get(0).get(1)).exists() }, // No md5 check because of empty file + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("hamronization/fargene - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id: 'test'], file("dummy.fa") ] + input[1] = 'tsv' + input[2] = '0.1' + input[3] = '0.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/fargene/tests/main.nf.test.snap b/modules/nf-core/hamronization/fargene/tests/main.nf.test.snap new file mode 100644 index 00000000..b60c696b --- /dev/null +++ b/modules/nf-core/hamronization/fargene/tests/main.nf.test.snap @@ -0,0 +1,53 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,a961ebe5815800b3b27c935a4ecbf7f3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T15:18:12.580157967" + }, + "hamronization/fargene - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "2": [ + "versions.yml:md5,a961ebe5815800b3b27c935a4ecbf7f3" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "versions": [ + "versions.yml:md5,a961ebe5815800b3b27c935a4ecbf7f3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T01:00:18.784613823" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/fargene/tests/tags.yml b/modules/nf-core/hamronization/fargene/tests/tags.yml new file mode 100644 index 00000000..49357928 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/fargene: + - "modules/nf-core/hamronization/fargene/**" diff --git a/modules/nf-core/hamronization/rgi/environment.yml b/modules/nf-core/hamronization/rgi/environment.yml new file mode 100644 index 00000000..791b9c96 --- /dev/null +++ b/modules/nf-core/hamronization/rgi/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/rgi/main.nf b/modules/nf-core/hamronization/rgi/main.nf new file mode 100644 index 00000000..9a99a0ff --- /dev/null +++ b/modules/nf-core/hamronization/rgi/main.nf @@ -0,0 +1,54 @@ +process HAMRONIZATION_RGI { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(report) + val(format) + val(software_version) + val(reference_db_version) + + output: + tuple val(meta), path("*.json") , optional: true, emit: json + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + hamronize \\ + rgi \\ + ${report} \\ + $args \\ + --format ${format} \\ + --analysis_software_version ${software_version} \\ + --reference_database_version ${reference_db_version} \\ + --input_file_name ${prefix} \\ + > ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/hamronization/rgi/meta.yml b/modules/nf-core/hamronization/rgi/meta.yml new file mode 100644 index 00000000..525148e5 --- /dev/null +++ b/modules/nf-core/hamronization/rgi/meta.yml @@ -0,0 +1,70 @@ +name: "hamronization_rgi" +description: Tool to convert and summarize RGI outputs using the hAMRonization specification. +keywords: + - amr + - antimicrobial resistance + - arg + - antimicrobial resistance genes + - reporting + - rgi +tools: + - hamronization: + description: "Tool to convert and summarize AMR gene detection outputs using the + hAMRonization specification" + homepage: "https://github.com/pha4ge/hAMRonization/" + documentation: "https://github.com/pha4ge/hAMRonization/" + tool_dev_url: "https://github.com/pha4ge/hAMRonization" + licence: ["GNU Lesser General Public v3 (LGPL v3)"] + identifier: biotools:hamronization +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: Output .txt file from RGI + pattern: "*.txt" + - - format: + type: string + description: Type of report file to be produced + pattern: "tsv|json" + - - software_version: + type: string + description: Version of DeepARG used + pattern: "[0-9].[0-9].[0-9]" + - - reference_db_version: + type: string + description: Database version of DeepARG used + pattern: "[0-9].[0-9].[0-9]" +output: + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: hAMRonised report in JSON format + pattern: "*.json" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: hAMRonised report in TSV format + pattern: "*.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/hamronization/rgi/tests/main.nf.test b/modules/nf-core/hamronization/rgi/tests/main.nf.test new file mode 100644 index 00000000..52945ebe --- /dev/null +++ b/modules/nf-core/hamronization/rgi/tests/main.nf.test @@ -0,0 +1,98 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_RGI" + script "../main.nf" + process "HAMRONIZATION_RGI" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/rgi" + tag "rgi/main" + tag "rgi/cardannotation" + tag "untar" + + setup { + + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('data.tar.gz') + + input[0] = [ + [ ], + file("data.tar.gz") + ] + """ + } + } + + run("RGI_CARDANNOTATION") { + script "modules/nf-core/rgi/cardannotation/main.nf" + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + run("RGI_MAIN") { + script "modules/nf-core/rgi/main/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + } + + test("hamronization/rgi - haemophilus_influenzae - genome - fna.gz") { + + when { + process { + """ + input[0] = RGI_MAIN.out.tsv + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '3.2.3' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("hamronization/rgi - haemophilus_influenzae - genome - fna.gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = RGI_MAIN.out.tsv + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '3.2.3' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/rgi/tests/main.nf.test.snap b/modules/nf-core/hamronization/rgi/tests/main.nf.test.snap new file mode 100644 index 00000000..07a41eae --- /dev/null +++ b/modules/nf-core/hamronization/rgi/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "hamronization/rgi - haemophilus_influenzae - genome - fna.gz - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:16:18.164635116" + }, + "hamronization/rgi - haemophilus_influenzae - genome - fna.gz": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,98b98bc42db5569db041d1819bbf1d89" + ] + ], + "2": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,98b98bc42db5569db041d1819bbf1d89" + ] + ], + "versions": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:15:49.081218466" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/rgi/tests/tags.yml b/modules/nf-core/hamronization/rgi/tests/tags.yml new file mode 100644 index 00000000..40c55588 --- /dev/null +++ b/modules/nf-core/hamronization/rgi/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/rgi: + - "modules/nf-core/hamronization/rgi/**" diff --git a/modules/nf-core/hamronization/summarize/environment.yml b/modules/nf-core/hamronization/summarize/environment.yml new file mode 100644 index 00000000..791b9c96 --- /dev/null +++ b/modules/nf-core/hamronization/summarize/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/summarize/main.nf b/modules/nf-core/hamronization/summarize/main.nf new file mode 100644 index 00000000..358ad83f --- /dev/null +++ b/modules/nf-core/hamronization/summarize/main.nf @@ -0,0 +1,49 @@ +process HAMRONIZATION_SUMMARIZE { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" + + input: + path(reports) + val(format) + + output: + path("hamronization_combined_report.json"), optional: true, emit: json + path("hamronization_combined_report.tsv") , optional: true, emit: tsv + path("hamronization_combined_report.html"), optional: true, emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def outformat = format == 'interactive' ? 'html' : format + """ + hamronize \\ + summarize \\ + ${reports.join(' ')} \\ + -t ${format} \\ + $args \\ + -o hamronization_combined_report.${outformat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def outformat = format == 'interactive' ? 'html' : format + """ + touch hamronization_combined_report.${outformat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/hamronization/summarize/meta.yml b/modules/nf-core/hamronization/summarize/meta.yml new file mode 100644 index 00000000..54ceeff3 --- /dev/null +++ b/modules/nf-core/hamronization/summarize/meta.yml @@ -0,0 +1,50 @@ +name: hamronization_summarize +description: Tool to summarize and combine all hAMRonization reports into a single + file +keywords: + - amr + - antimicrobial resistance + - reporting +tools: + - hamronization: + description: Tool to convert and summarize AMR gene detection outputs using the + hAMRonization specification + homepage: https://github.com/pha4ge/hAMRonization/ + documentation: https://github.com/pha4ge/hAMRonization/ + tool_dev_url: https://github.com/pha4ge/hAMRonization + licence: ["GNU Lesser General Public v3 (LGPL v3)"] + identifier: biotools:hamronization +input: + - - reports: + type: file + description: List of multiple hAMRonization reports in either JSON or TSV format + pattern: "*.{json,tsv}" + - - format: + type: string + description: Type of final combined report file to be produced + pattern: "tsv|json|interactive" +output: + - json: + - hamronization_combined_report.json: + type: file + description: hAMRonised summary in JSON format + pattern: "*.json" + - tsv: + - hamronization_combined_report.tsv: + type: file + description: hAMRonised summary in TSV format + pattern: "*.json" + - html: + - hamronization_combined_report.html: + type: file + description: hAMRonised summary in HTML format + pattern: "*.html" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/hamronization/summarize/tests/main.nf.test b/modules/nf-core/hamronization/summarize/tests/main.nf.test new file mode 100644 index 00000000..dc2da33e --- /dev/null +++ b/modules/nf-core/hamronization/summarize/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_SUMMARIZE" + script "../main.nf" + process "HAMRONIZATION_SUMMARIZE" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/summarize" + tag "hamronization/deeparg" + + setup { + run("HAMRONIZATION_DEEPARG", alias: "HAMRONIZATION_DEEPARG1") { + script "../../deeparg/main.nf" + process { + """ + input[0] = [ + [ id:'test1', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + + run("HAMRONIZATION_DEEPARG", alias: "HAMRONIZATION_DEEPARG2") { + script "../../deeparg/main.nf" + process { + """ + input[0] = [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + } + + test("hamronization/summarize - bacteroides_fragilis - hamronization - arg") { + + when { + process { + """ + ch_deeparg_run_one = HAMRONIZATION_DEEPARG1.out.tsv + ch_deeparg_run_two = HAMRONIZATION_DEEPARG2.out.tsv + + ch_deeparg_run_one + .mix( ch_deeparg_run_two ) + .map{ + [ it[1] ] + } + .collect() + .set { ch_input_for_summarize } + + input[0] = ch_input_for_summarize + input[1] = 'json' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("hamronization/summarize - stub") { + + options "-stub" + + when { + process { + """ + ch_deeparg_run_one = HAMRONIZATION_DEEPARG1.out.tsv + ch_deeparg_run_two = HAMRONIZATION_DEEPARG2.out.tsv + + ch_deeparg_run_one + .mix( ch_deeparg_run_two ) + .map{ + [ it[1] ] + } + .collect() + .set { ch_input_for_summarize } + + input[0] = ch_input_for_summarize + input[1] = 'json' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/summarize/tests/main.nf.test.snap b/modules/nf-core/hamronization/summarize/tests/main.nf.test.snap new file mode 100644 index 00000000..8449f2ed --- /dev/null +++ b/modules/nf-core/hamronization/summarize/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "hamronization/summarize - stub": { + "content": [ + { + "0": [ + "hamronization_combined_report.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ], + "html": [ + + ], + "json": [ + "hamronization_combined_report.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "tsv": [ + + ], + "versions": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T00:28:03.124164682" + }, + "hamronization/summarize - bacteroides_fragilis - hamronization - arg": { + "content": [ + { + "0": [ + "hamronization_combined_report.json:md5,b27855689f41a9a95ddcfbf6c02d3528" + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ], + "html": [ + + ], + "json": [ + "hamronization_combined_report.json:md5,b27855689f41a9a95ddcfbf6c02d3528" + ], + "tsv": [ + + ], + "versions": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T15:04:33.885586093" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/summarize/tests/tags.yml b/modules/nf-core/hamronization/summarize/tests/tags.yml new file mode 100644 index 00000000..f98ff599 --- /dev/null +++ b/modules/nf-core/hamronization/summarize/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/summarize: + - "modules/nf-core/hamronization/summarize/**" diff --git a/modules/nf-core/hmmer/hmmsearch/environment.yml b/modules/nf-core/hmmer/hmmsearch/environment.yml new file mode 100644 index 00000000..c5ddec5d --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hmmer=3.4 diff --git a/modules/nf-core/hmmer/hmmsearch/main.nf b/modules/nf-core/hmmer/hmmsearch/main.nf new file mode 100644 index 00000000..603a865e --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/main.nf @@ -0,0 +1,70 @@ +process HMMER_HMMSEARCH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmmer:3.4--hdbdd923_1' : + 'biocontainers/hmmer:3.4--hdbdd923_1' }" + + input: + tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain) + + output: + tuple val(meta), path('*.txt.gz') , emit: output + tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true + tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true + tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + output = "${prefix}.txt" + alignment = write_align ? "-A ${prefix}.sto" : '' + target_summary = write_target ? "--tblout ${prefix}.tbl" : '' + domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : '' + """ + hmmsearch \\ + $args \\ + --cpu $task.cpus \\ + -o $output \\ + $alignment \\ + $target_summary \\ + $domain_summary \\ + $hmmfile \\ + $seqdb + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.txt" + ${write_align ? "touch ${prefix}.sto" : ''} \\ + ${write_target ? "touch ${prefix}.tbl" : ''} \\ + ${write_domain ? "touch ${prefix}.domtbl" : ''} + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/hmmer/hmmsearch/meta.yml b/modules/nf-core/hmmer/hmmsearch/meta.yml new file mode 100644 index 00000000..0e078659 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/meta.yml @@ -0,0 +1,92 @@ +name: hmmer_hmmsearch +description: search profile(s) against a sequence database +keywords: + - Hidden Markov Model + - HMM + - hmmer + - hmmsearch +tools: + - hmmer: + description: Biosequence analysis using profile hidden Markov models + homepage: http://hmmer.org/ + documentation: http://hmmer.org/documentation.html + tool_dev_url: https://github.com/EddyRivasLab/hmmer + doi: "10.1371/journal.pcbi.1002195" + licence: ["BSD"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - hmmfile: + type: file + description: One or more HMM profiles created with hmmbuild + pattern: "*.{hmm,hmm.gz}" + - seqdb: + type: file + description: Database of sequences in FASTA format + pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}" + - write_align: + type: boolean + description: Flag to save optional alignment output. Specify with 'true' to + save. + - write_target: + type: boolean + description: Flag to save optional per target summary. Specify with 'true' to + save. + - write_domain: + type: boolean + description: Flag to save optional per domain summary. Specify with 'true' to + save. +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt.gz": + type: file + description: Human readable output summarizing hmmsearch results + pattern: "*.{txt.gz}" + - alignments: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sto.gz": + type: file + description: Optional multiple sequence alignment (MSA) in Stockholm format + pattern: "*.{sto.gz}" + - target_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbl.gz": + type: file + description: Optional tabular (space-delimited) summary of per-target output + pattern: "*.{tbl.gz}" + - domain_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.domtbl.gz": + type: file + description: Optional tabular (space-delimited) summary of per-domain output + pattern: "*.{domtbl.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Midnighter" +maintainers: + - "@Midnighter" diff --git a/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test new file mode 100644 index 00000000..f1b59e98 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process HMMER_HMMSEARCH" + script "../main.nf" + process "HMMER_HMMSEARCH" + + tag "modules" + tag "modules_nfcore" + tag "hmmer" + tag "hmmer/hmmsearch" + + test("hmmer/hmmsearch") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.output[0][1]).linesGzip.toString().contains('[ok]') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("hmmer/hmmsearch - optional") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.output.get(0).get(1)).linesGzip.toString().contains('[ok]') }, + { assert path(process.out.target_summary.get(0).get(1)).linesGzip.toString().contains('[ok]') }, + { assert snapshot( + process.out.alignments + + process.out.versions + ).match() } + ) + } + + } + + test("hmmer/hmmsearch - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hmmer/hmmsearch - optional - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap new file mode 100644 index 00000000..e6b22771 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap @@ -0,0 +1,175 @@ +{ + "hmmer/hmmsearch": { + "content": [ + [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:47.293093635" + }, + "hmmer/hmmsearch - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ], + "alignments": [ + + ], + "domain_summary": [ + + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_summary": [ + + ], + "versions": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:57.862047944" + }, + "hmmer/hmmsearch - optional - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ], + "alignments": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "domain_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:19:03.49192788" + }, + "hmmer/hmmsearch - optional": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,5c44c289b9e36aa1f7f3afae2005fbb7" + ], + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:52.725638562" + } +} \ No newline at end of file diff --git a/modules/nf-core/hmmer/hmmsearch/tests/tags.yml b/modules/nf-core/hmmer/hmmsearch/tests/tags.yml new file mode 100644 index 00000000..1776d21f --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/tags.yml @@ -0,0 +1,2 @@ +hmmer/hmmsearch: + - "modules/nf-core/hmmer/hmmsearch/**" diff --git a/modules/nf-core/macrel/contigs/environment.yml b/modules/nf-core/macrel/contigs/environment.yml new file mode 100644 index 00000000..bb5ce1a6 --- /dev/null +++ b/modules/nf-core/macrel/contigs/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::macrel=1.4.0 diff --git a/modules/nf-core/macrel/contigs/main.nf b/modules/nf-core/macrel/contigs/main.nf new file mode 100644 index 00000000..b8f8f522 --- /dev/null +++ b/modules/nf-core/macrel/contigs/main.nf @@ -0,0 +1,61 @@ +process MACREL_CONTIGS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/macrel:1.4.0--pyh7e72e81_0': + 'biocontainers/macrel:1.4.0--pyh7e72e81_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*/*.smorfs.faa.gz") , emit: smorfs + tuple val(meta), path("*/*.all_orfs.faa.gz") , emit: all_orfs + tuple val(meta), path("*/*.prediction.gz") , emit: amp_prediction + tuple val(meta), path("*/*.md") , emit: readme_file + tuple val(meta), path("*/*_log.txt") , emit: log_file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + macrel contigs \\ + $args \\ + --fasta $fasta \\ + --output ${prefix}/ \\ + --tag ${prefix} \\ + --log-file ${prefix}/${prefix}_log.txt \\ + --threads $task.cpus + + gzip --no-name ${prefix}/*.faa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macrel: \$(echo \$(macrel --version | sed 's/macrel //g')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + + touch ${prefix}/${prefix}_log.txt + echo | gzip > ${prefix}/${prefix}.smorfs.faa.gz + echo | gzip > ${prefix}/${prefix}.all_orfs.faa.gz + echo | gzip > ${prefix}/${prefix}.prediction.gz + touch ${prefix}/${prefix}.md + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macrel: \$(echo \$(macrel --version | sed 's/macrel //g')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/macrel/contigs/meta.yml b/modules/nf-core/macrel/contigs/meta.yml new file mode 100644 index 00000000..c1c03f42 --- /dev/null +++ b/modules/nf-core/macrel/contigs/meta.yml @@ -0,0 +1,92 @@ +name: macrel_contigs +description: A tool that mines antimicrobial peptides (AMPs) from (meta)genomes by + predicting peptides from genomes (provided as contigs) and outputs all the predicted + anti-microbial peptides found. +keywords: + - AMP + - antimicrobial peptides + - genome mining + - metagenomes + - peptide prediction +tools: + - macrel: + description: A pipeline for AMP (antimicrobial peptide) prediction + homepage: https://macrel.readthedocs.io/en/latest/ + documentation: https://macrel.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/BigDataBiology/macrel + doi: "10.7717/peerj.10555" + licence: ["MIT"] + identifier: biotools:macrel +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: A fasta file with nucleotide sequences. + pattern: "*.{fasta,fa,fna,fasta.gz,fa.gz,fna.gz}" +output: + - smorfs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*.smorfs.faa.gz": + type: file + description: A zipped fasta file containing aminoacid sequences showing the + general gene prediction information in the contigs. + pattern: "*.smorfs.faa.gz" + - all_orfs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*.all_orfs.faa.gz": + type: file + description: A zipped fasta file containing amino acid sequences showing the + general gene prediction information in the contigs. + pattern: "*.all_orfs.faa.gz" + - amp_prediction: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*.prediction.gz": + type: file + description: A zipped file, with all predicted amps in a table format. + pattern: "*.prediction.gz" + - readme_file: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*.md": + type: file + description: A readme file containing tool specific information (e.g. citations, + details about the output, etc.). + pattern: "*.md" + - log_file: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*/*_log.txt": + type: file + description: A log file containing the information pertaining to the run. + pattern: "*_log.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/macrel/contigs/tests/main.nf.test b/modules/nf-core/macrel/contigs/tests/main.nf.test new file mode 100644 index 00000000..5b641b1e --- /dev/null +++ b/modules/nf-core/macrel/contigs/tests/main.nf.test @@ -0,0 +1,66 @@ + +nextflow_process { + + name "Test Process MACREL_CONTIGS" + script "../main.nf" + process "MACREL_CONTIGS" + + tag "modules" + tag "modules_nfcore" + tag "macrel" + tag "macrel/contigs" + + test("test-macrel-contigs") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.smorfs, + path(process.out.all_orfs[0][1]).linesGzip[0], + process.out.amp_prediction, + process.out.readme_file, + file(process.out.log_file[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-macrel-contigs-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/macrel/contigs/tests/main.nf.test.snap b/modules/nf-core/macrel/contigs/tests/main.nf.test.snap new file mode 100644 index 00000000..3908c49c --- /dev/null +++ b/modules/nf-core/macrel/contigs/tests/main.nf.test.snap @@ -0,0 +1,150 @@ +{ + "test-macrel-contigs": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.smorfs.faa.gz:md5,2433037a55de266a1203759834849669" + ] + ], + ">k141_0_1 # 235 # 468 # -1 # ID=1_1;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.333", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.prediction.gz:md5,c929d870dc197f9d5d36d3d5f683cbf4" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "README.md:md5,cf088d9256ff7b7730699f17b64b4028" + ] + ], + "test_log.txt", + [ + "versions.yml:md5,ab072d9245c9b28a8bc694e98795c924" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T20:48:49.632715" + }, + "test-macrel-contigs-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.smorfs.faa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.all_orfs.faa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.prediction.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,ab072d9245c9b28a8bc694e98795c924" + ], + "all_orfs": [ + [ + { + "id": "test", + "single_end": false + }, + "test.all_orfs.faa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "amp_prediction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.prediction.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "log_file": [ + [ + { + "id": "test", + "single_end": false + }, + "test_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readme_file": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "smorfs": [ + [ + { + "id": "test", + "single_end": false + }, + "test.smorfs.faa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ab072d9245c9b28a8bc694e98795c924" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T20:50:42.040416" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createdb/environment.yml b/modules/nf-core/mmseqs/createdb/environment.yml new file mode 100644 index 00000000..18547591 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/createdb/main.nf b/modules/nf-core/mmseqs/createdb/main.nf new file mode 100644 index 00000000..9487e5bc --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/main.nf @@ -0,0 +1,65 @@ +process MMSEQS_CREATEDB { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + tuple val(meta), path(sequence) + + output: + tuple val(meta), path("${prefix}/"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = sequence.getExtension() == "gz" ? true : false + def sequence_name = is_compressed ? sequence.getBaseName() : sequence + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${sequence} > ${sequence_name} + fi + + mkdir -p ${prefix} + + mmseqs \\ + createdb \\ + ${sequence_name} \\ + ${prefix}/${prefix} \\ + $args \\ + --compressed 1 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + + touch ${prefix}/${prefix} + touch ${prefix}/${prefix}.dbtype + touch ${prefix}/${prefix}.index + touch ${prefix}/${prefix}.lookup + touch ${prefix}/${prefix}.source + touch ${prefix}/${prefix}_h + touch ${prefix}/${prefix}_h.dbtype + touch ${prefix}/${prefix}_h.index + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/createdb/meta.yml b/modules/nf-core/mmseqs/createdb/meta.yml new file mode 100644 index 00000000..c392a360 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "mmseqs_createdb" +description: Create an MMseqs database from an existing FASTA/Q file +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing + - mmseqs2 +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering + suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] + identifier: biotools:mmseqs +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - sequence: + type: file + description: Input sequences in FASTA/Q (zipped or unzipped) format to parse + into an mmseqs database + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,fastq,fastq.gz,fq,fq.gz}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - ${prefix}/: + type: directory + description: The created MMseqs2 database + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" + - "@vagkaratzas" diff --git a/modules/nf-core/mmseqs/createdb/tests/main.nf.test b/modules/nf-core/mmseqs/createdb/tests/main.nf.test new file mode 100644 index 00000000..d4a4f0c8 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process MMSEQS_CREATEDB" + script "../main.nf" + process "MMSEQS_CREATEDB" + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/createdb" + + test("Should build an mmseqs db from a contigs fasta file") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.versions + ).match() + } + ) + } + + } + + test("Should build an mmseqs db from a zipped amino acid sequence file") { + + when { + process { + """ + + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap new file mode 100644 index 00000000..a24c4118 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap @@ -0,0 +1,61 @@ +{ + "Should build an mmseqs db from a contigs fasta file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test:md5,7c3c2c5926cf8fa82e66b9628f680256", + "test.dbtype:md5,c8ed20c23ba91f4577f84c940c86c7db", + "test.index:md5,5b2fd8abd0ad3fee24738af7082e6a6e", + "test.lookup:md5,32f88756dbcb6aaf7b239b0d61730f1b", + "test.source:md5,9ada5b3ea6e1a7e16c4418eb98ae8d9d", + "test_h:md5,8c29f5ed94d83d7115e9c8a883ce358d", + "test_h.dbtype:md5,8895d3d8e9322aedbf45249dfb3ddb0a", + "test_h.index:md5,87c7c8c6d16018ebfaa6f408391a5ae2" + ] + ] + ], + [ + "versions.yml:md5,e644cbe263d4560298438a24f268eb6f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:01:44.163384" + }, + "Should build an mmseqs db from a zipped amino acid sequence file": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "test:md5,4b494965ed7ab67da8ca3f39523eb104", + "test.dbtype:md5,152afd7bf4dbe26f85032eee0269201a", + "test.index:md5,46f9d884e9a7f442fe1cd2ce339734e3", + "test.lookup:md5,3e27cb93d9ee875ad42a6f32f5651bdc", + "test.source:md5,eaa64fc8a5f7ec1ee49b0dcbd1a72e9d", + "test_h:md5,6e798b81c70d191f78939c2dd6223a7f", + "test_h.dbtype:md5,8895d3d8e9322aedbf45249dfb3ddb0a", + "test_h.index:md5,d5ac49ff56df064b980fa0eb5da57673" + ] + ] + ], + [ + "versions.yml:md5,e644cbe263d4560298438a24f268eb6f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:01:48.894044" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createdb/tests/tags.yml b/modules/nf-core/mmseqs/createdb/tests/tags.yml new file mode 100644 index 00000000..1f511ab0 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/createdb: + - modules/nf-core/mmseqs/createdb/** diff --git a/modules/nf-core/mmseqs/createtsv/environment.yml b/modules/nf-core/mmseqs/createtsv/environment.yml new file mode 100644 index 00000000..18547591 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/createtsv/main.nf b/modules/nf-core/mmseqs/createtsv/main.nf new file mode 100644 index 00000000..dcd4c13d --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/main.nf @@ -0,0 +1,63 @@ + +process MMSEQS_CREATETSV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + tuple val(meta), path(db_result) + tuple val(meta2), path(db_query) + tuple val(meta3), path(db_target) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args ?: "*.dbtype" // database generated by mmyseqs cluster | search | taxonomy | ... + def args3 = task.ext.args ?: "*.dbtype" // database generated by mmyseqs/createdb + def args4 = task.ext.args ?: "*.dbtype" // database generated by mmyseqs/createdb + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + # Extract files with specified args based suffix | remove suffix | isolate longest common substring of files + DB_RESULT_PATH_NAME=\$(find -L "$db_result/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_QUERY_PATH_NAME=\$(find -L "$db_query/" -maxdepth 1 -name "$args3" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_TARGET_PATH_NAME=\$(find -L "$db_target/" -maxdepth 1 -name "$args4" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + + mmseqs \\ + createtsv \\ + \$DB_QUERY_PATH_NAME \\ + \$DB_TARGET_PATH_NAME \\ + \$DB_RESULT_PATH_NAME \\ + ${prefix}.tsv \\ + $args \\ + --threads ${task.cpus} \\ + --compressed 1 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/createtsv/meta.yml b/modules/nf-core/mmseqs/createtsv/meta.yml new file mode 100644 index 00000000..5a50ff34 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "mmseqs_createtsv" +description: Create a tsv file from a query and a target database as well as the result + database +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing + - mmseqs2 + - tsv +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering + suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] + identifier: biotools:mmseqs +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_result: + type: directory + description: an MMseqs2 database with result data + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_query: + type: directory + description: an MMseqs2 database with query data + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_target: + type: directory + description: an MMseqs2 database with target data +output: + #Only when we have meta + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.tsv": + type: file + description: The resulting tsv file created using the query, target and result + MMseqs databases + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/mmseqs/createtsv/tests/cluster.nextflow.config b/modules/nf-core/mmseqs/createtsv/tests/cluster.nextflow.config new file mode 100644 index 00000000..48fee164 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/cluster.nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: MMSEQS_CREATETSV { + ext.args2 = '*_clu.dbtype' + } +} diff --git a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test new file mode 100644 index 00000000..1aa7463d --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test @@ -0,0 +1,247 @@ +nextflow_process { + + name "Test Process MMSEQS_CREATETSV" + script "../main.nf" + process "MMSEQS_CREATETSV" + + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/taxonomy" + tag "mmseqs/createdb" + tag "mmseqs/databases" + tag "untar" + tag "mmseqs/createtsv" + + test("mmseqs/createtsv - bacteroides_fragilis - taxonomy") { + + config "./taxonomy.nextflow.config" + + setup { + run("MMSEQS_CREATEDB", alias: "MMSEQS_TAXA") { + script "../../createdb/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + """ + } + } + run("MMSEQS_DATABASES") { + script "../../databases/main.nf" + process { + """ + input[0] = 'SILVA' + """ + } + } + run("MMSEQS_TAXONOMY") { + script "../../taxonomy/main.nf" + process { + """ + input[0] = MMSEQS_TAXA.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + } + when { + process { + """ + input[0] = MMSEQS_TAXONOMY.out.db_taxonomy + input[1] = [[:],[]] + input[2] = MMSEQS_TAXA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("mmseqs/createtsv - sarscov2 - cluster") { + + config "./cluster.nextflow.config" + + setup { + run("UNTAR", alias: "UNTAR_QUERY") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_TARGET") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_target', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_RESULT") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_result', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + } + + when { + + process { + """ + ch_query = UNTAR_QUERY.out.untar + ch_target = UNTAR_TARGET.out.untar + ch_result = UNTAR_RESULT.out.untar + + input[0] = ch_result + input[1] = ch_query + input[2] = ch_target + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("mmseqs/createtsv - bacteroides_fragilis - taxonomy - stub") { + + options "-stub" + config "./taxonomy.nextflow.config" + + setup { + run("MMSEQS_CREATEDB", alias: "MMSEQS_TAXA") { + script "../../createdb/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + """ + } + } + run("MMSEQS_DATABASES") { + script "../../databases/main.nf" + process { + """ + input[0] = 'SILVA' + """ + } + } + run("MMSEQS_TAXONOMY") { + script "../../taxonomy/main.nf" + process { + """ + input[0] = MMSEQS_TAXA.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + } + when { + process { + """ + input[0] = MMSEQS_TAXONOMY.out.db_taxonomy + input[1] = [[:],[]] + input[2] = MMSEQS_TAXA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("mmseqs/createtsv - sarscov2 - cluster - stub") { + + options "-stub" + config "./cluster.nextflow.config" + + setup { + run("UNTAR", alias: "UNTAR_QUERY") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_TARGET") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_target', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_RESULT") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_result', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + } + + when { + + process { + """ + ch_query = UNTAR_QUERY.out.untar + ch_target = UNTAR_TARGET.out.untar + ch_result = UNTAR_RESULT.out.untar + + input[0] = ch_result + input[1] = ch_query + input[2] = ch_target + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap new file mode 100644 index 00000000..1087de88 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "mmseqs/createtsv - bacteroides_fragilis - taxonomy - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:55:17.642787" + }, + "mmseqs/createtsv - sarscov2 - cluster - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:55:33.645454" + }, + "mmseqs/createtsv - bacteroides_fragilis - taxonomy": { + "content": [ + { + "0": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,9179f5c85b8b87a4dc998c9d17840161" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,9179f5c85b8b87a4dc998c9d17840161" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:54:45.718678" + }, + "mmseqs/createtsv - sarscov2 - cluster": { + "content": [ + { + "0": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,4e7ba50ce2879660dc6595286bf0d097" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,4e7ba50ce2879660dc6595286bf0d097" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:55:02.731974" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createtsv/tests/tags.yml b/modules/nf-core/mmseqs/createtsv/tests/tags.yml new file mode 100644 index 00000000..e27827f5 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/createtsv: + - "modules/nf-core/mmseqs/createtsv/**" diff --git a/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config b/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config new file mode 100644 index 00000000..f08205d1 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: MMSEQS_TAXONOMY { + ext.args = '--search-type 2' + } + +} diff --git a/modules/nf-core/mmseqs/databases/environment.yml b/modules/nf-core/mmseqs/databases/environment.yml new file mode 100644 index 00000000..18547591 --- /dev/null +++ b/modules/nf-core/mmseqs/databases/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/databases/main.nf b/modules/nf-core/mmseqs/databases/main.nf new file mode 100644 index 00000000..d43681ce --- /dev/null +++ b/modules/nf-core/mmseqs/databases/main.nf @@ -0,0 +1,62 @@ +process MMSEQS_DATABASES { + tag "${database}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + val database + + output: + path "${prefix}/" , emit: database + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: 'mmseqs_database' + """ + mkdir ${prefix}/ + + mmseqs databases \\ + ${database} \\ + ${prefix}/database \\ + tmp/ \\ + --threads ${task.cpus} \\ + --compressed 1 \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: 'mmseqs_database' + """ + mkdir ${prefix}/ + + touch ${prefix}/database + touch ${prefix}/database.dbtype + touch ${prefix}/database_h + touch ${prefix}/database_h.dbtype + touch ${prefix}/database_h.index + touch ${prefix}/database.index + touch ${prefix}/database.lookup + touch ${prefix}/database_mapping + touch ${prefix}/database.source + touch ${prefix}/database_taxonomy + touch ${prefix}/database.version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: /') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/databases/meta.yml b/modules/nf-core/mmseqs/databases/meta.yml new file mode 100644 index 00000000..be9380fb --- /dev/null +++ b/modules/nf-core/mmseqs/databases/meta.yml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "mmseqs_databases" +description: Download an mmseqs-formatted database +keywords: + - database + - indexing + - clustering + - searching +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering + suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] + identifier: biotools:mmseqs +input: + - - database: + type: string + description: Database available through the mmseqs2 databases interface - see + https://github.com/soedinglab/MMseqs2/wiki#downloading-databases for details +output: + - database: + - ${prefix}/: + type: directory + description: Directory containing processed mmseqs database + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/nf-core/mmseqs/databases/tests/main.nf.test b/modules/nf-core/mmseqs/databases/tests/main.nf.test new file mode 100644 index 00000000..3fe5d200 --- /dev/null +++ b/modules/nf-core/mmseqs/databases/tests/main.nf.test @@ -0,0 +1,55 @@ + +nextflow_process { + + name "Test Process MMSEQS_DATABASES" + script "../main.nf" + process "MMSEQS_DATABASES" + + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/databases" + + test("test-mmseqs-databases") { + + when { + process { + """ + input[0] = "SILVA" + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.database[0]).listFiles().collect { it.name }.toSorted(), // unstable + process.out.versions + ).match() + } + ) + } + } + + test("test-mmseqs-databases-stub") { + options '-stub' + when { + process { + """ + input[0] = "SILVA" + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/mmseqs/databases/tests/main.nf.test.snap b/modules/nf-core/mmseqs/databases/tests/main.nf.test.snap new file mode 100644 index 00000000..00d3003e --- /dev/null +++ b/modules/nf-core/mmseqs/databases/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "test-mmseqs-databases": { + "content": [ + [ + "database", + "database.dbtype", + "database.index", + "database.lookup", + "database.source", + "database.version", + "database_h", + "database_h.dbtype", + "database_h.index", + "database_mapping", + "database_taxonomy" + ], + [ + "versions.yml:md5,b038db45e5934b8f0f743449bbac01b4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-06T15:43:58.454012" + }, + "test-mmseqs-databases-stub": { + "content": [ + { + "0": [ + [ + "database:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.lookup:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.source:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.version:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_h:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_h.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_h.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_mapping:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_taxonomy:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,49082428ec974e4ddb09a6ca2e9f21b3" + ], + "database": [ + [ + "database:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.lookup:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.source:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.version:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_h:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_h.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_h.index:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_mapping:md5,d41d8cd98f00b204e9800998ecf8427e", + "database_taxonomy:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,49082428ec974e4ddb09a6ca2e9f21b3" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T17:00:20.527628" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/taxonomy/environment.yml b/modules/nf-core/mmseqs/taxonomy/environment.yml new file mode 100644 index 00000000..65f1e5cd --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mmseqs2=15.6f452" diff --git a/modules/nf-core/mmseqs/taxonomy/main.nf b/modules/nf-core/mmseqs/taxonomy/main.nf new file mode 100644 index 00000000..54849885 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/main.nf @@ -0,0 +1,65 @@ +process MMSEQS_TAXONOMY { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + tuple val(meta), path(db_query) + path(db_target) + + output: + tuple val(meta), path("${prefix}_taxonomy"), emit: db_taxonomy + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: "*.dbtype" //represents the db_query + def args3 = task.ext.args3 ?: "*.dbtype" //represents the db_target + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}_taxonomy + + # Extract files with specified args based suffix | remove suffix | isolate longest common substring of files + DB_QUERY_PATH_NAME=\$(find -L "${db_query}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_TARGET_PATH_NAME=\$(find -L "${db_target}/" -maxdepth 1 -name "${args3}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + + mmseqs \\ + taxonomy \\ + \$DB_QUERY_PATH_NAME \\ + \$DB_TARGET_PATH_NAME \\ + ${prefix}_taxonomy/${prefix} \\ + tmp1 \\ + $args \\ + --threads ${task.cpus} \\ + --compressed 1 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}_taxonomy + touch ${prefix}_taxonomy/${prefix}.{0..25} + touch ${prefix}_taxonomy/${prefix}.dbtype + touch ${prefix}_taxonomy/${prefix}.index + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/taxonomy/meta.yml b/modules/nf-core/mmseqs/taxonomy/meta.yml new file mode 100644 index 00000000..15756feb --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "mmseqs_taxonomy" +description: Computes the lowest common ancestor by identifying the query sequence + homologs against the target database. +keywords: + - protein sequence + - nucleotide sequence + - databases + - taxonomy + - homologs + - mmseqs2 +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering + suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] + identifier: biotools:mmseqs +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_query: + type: directory + description: An MMseqs2 database with query data + - - db_target: + type: directory + description: an MMseqs2 database with target data including the taxonomy classification +output: + - db_taxonomy: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - ${prefix}_taxonomy: + type: directory + description: An MMseqs2 database with target data including the taxonomy classification + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test new file mode 100644 index 00000000..95f1bc22 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process MMSEQS_TAXONOMY" + script "../main.nf" + config "./nextflow.config" + process "MMSEQS_TAXONOMY" + + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/taxonomy" + tag "mmseqs/createdb" + tag "mmseqs/databases" + + setup { + run("MMSEQS_CREATEDB") { + script "modules/nf-core/mmseqs/createdb/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + """ + } + } + + run("MMSEQS_DATABASES") { + script "modules/nf-core/mmseqs/databases/main.nf" + process { + """ + input[0] = 'SILVA' + """ + } + } + } + + test("mmseqs/taxonomy - bacteroides_fragilis - genome_nt") { + when { + process { + """ + input[0] = MMSEQS_CREATEDB.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.db_taxonomy[0][1]).list().sort(), + process.out.versions + ).match() + } + ) + } + } + + test("mmseqs/taxonomy - bacteroides_fragilis - genome_nt - stub") { + + options "-stub" + + when { + process { + """ + input[0] = MMSEQS_CREATEDB.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} + diff --git a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap new file mode 100644 index 00000000..225680ac --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap @@ -0,0 +1,113 @@ +{ + "mmseqs/taxonomy - bacteroides_fragilis - genome_nt": { + "content": [ + [ + "test_query.0", + "test_query.1", + "test_query.dbtype", + "test_query.index" + ], + [ + "versions.yml:md5,a8f24dca956a1c84099ff129f826c63f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:11:53.632751" + }, + "mmseqs/taxonomy - bacteroides_fragilis - genome_nt - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_query", + "single_end": false + }, + [ + "test_query.0:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.10:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.11:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.12:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.13:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.14:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.15:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.16:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.17:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.18:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.19:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.2:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.20:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.21:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.22:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.23:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.24:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.25:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.4:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.5:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.6:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.7:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.8:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.9:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,a8f24dca956a1c84099ff129f826c63f" + ], + "db_taxonomy": [ + [ + { + "id": "test_query", + "single_end": false + }, + [ + "test_query.0:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.10:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.11:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.12:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.13:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.14:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.15:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.16:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.17:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.18:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.19:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.2:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.20:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.21:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.22:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.23:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.24:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.25:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.4:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.5:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.6:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.7:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.8:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.9:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a8f24dca956a1c84099ff129f826c63f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:12:00.148815" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/taxonomy/tests/nextflow.config b/modules/nf-core/mmseqs/taxonomy/tests/nextflow.config new file mode 100644 index 00000000..72f6fc81 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: MMSEQS_TAXONOMY { + ext.args = '--search-type 2' + } +} diff --git a/modules/nf-core/mmseqs/taxonomy/tests/tags.yml b/modules/nf-core/mmseqs/taxonomy/tests/tags.yml new file mode 100644 index 00000000..76172197 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/taxonomy: + - "modules/nf-core/mmseqs/taxonomy/**" diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml new file mode 100644 index 00000000..7609bf3b --- /dev/null +++ b/modules/nf-core/prodigal/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::prodigal=2.6.3 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf new file mode 100644 index 00000000..49ced167 --- /dev/null +++ b/modules/nf-core/prodigal/main.nf @@ -0,0 +1,64 @@ +process PRODIGAL { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' : + 'biocontainers/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' }" + + input: + tuple val(meta), path(genome) + val(output_format) + + output: + tuple val(meta), path("${prefix}.${output_format}.gz"), emit: gene_annotations + tuple val(meta), path("${prefix}.fna.gz"), emit: nucleotide_fasta + tuple val(meta), path("${prefix}.faa.gz"), emit: amino_acid_fasta + tuple val(meta), path("${prefix}_all.txt.gz"), emit: all_gene_annotations + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + pigz -cdf ${genome} | prodigal \\ + $args \\ + -f $output_format \\ + -d "${prefix}.fna" \\ + -o "${prefix}.${output_format}" \\ + -a "${prefix}.faa" \\ + -s "${prefix}_all.txt" + + pigz -nm ${prefix}.fna + pigz -nm ${prefix}.${output_format} + pigz -nm ${prefix}.faa + pigz -nm ${prefix}_all.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fna.gz + touch ${prefix}.${output_format}.gz + touch ${prefix}.faa.gz + touch ${prefix}_all.txt.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml new file mode 100644 index 00000000..7d3d459e --- /dev/null +++ b/modules/nf-core/prodigal/meta.yml @@ -0,0 +1,79 @@ +name: prodigal +description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a + microbial (bacterial and archaeal) gene finding program +keywords: + - prokaryotes + - gene finding + - microbial +tools: + - prodigal: + description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) + is a microbial (bacterial and archaeal) gene finding program + homepage: https://github.com/hyattpd/Prodigal + documentation: https://github.com/hyattpd/prodigal/wiki + tool_dev_url: https://github.com/hyattpd/Prodigal + doi: "10.1186/1471-2105-11-119" + licence: ["GPL v3"] + identifier: biotools:prodigal +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome: + type: file + description: fasta/fasta.gz file + - - output_format: + type: string + description: Output format ("gbk"/"gff"/"sqn"/"sco") +output: + - gene_annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${output_format}.gz: + type: file + description: gene annotations in output_format given as input + pattern: "*.{output_format}" + - nucleotide_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.fna.gz: + type: file + description: nucleotide sequences file + pattern: "*.{fna}" + - amino_acid_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.faa.gz: + type: file + description: protein translations file + pattern: "*.{faa}" + - all_gene_annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_all.txt.gz: + type: file + description: complete starts file + pattern: "*.{_all.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@grst" +maintainers: + - "@grst" diff --git a/modules/nf-core/prodigal/tests/main.nf.test b/modules/nf-core/prodigal/tests/main.nf.test new file mode 100644 index 00000000..446bd0d1 --- /dev/null +++ b/modules/nf-core/prodigal/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process PRODIGAL" + script "../main.nf" + process "PRODIGAL" + + tag "modules" + tag "modules_nfcore" + tag "prodigal" + + test("prodigal - sarscov2 - gff") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("prodigal - sarscov2 - gbk") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("prodigal - sarscov2 - gff - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.out).match() } + ) + } + } + + test("prodigal - sarscov2 - gbk - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.out).match() } + ) + } + } + +} \ No newline at end of file diff --git a/modules/nf-core/prodigal/tests/main.nf.test.snap b/modules/nf-core/prodigal/tests/main.nf.test.snap new file mode 100644 index 00000000..f29802b4 --- /dev/null +++ b/modules/nf-core/prodigal/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "prodigal - sarscov2 - gbk - stub": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:58:09.852618454" + }, + "prodigal - sarscov2 - gff": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,612c2724c2891c63350f171f74165757" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "4": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ], + "all_gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "amino_acid_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,612c2724c2891c63350f171f74165757" + ] + ], + "nucleotide_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "versions": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:57:49.57989696" + }, + "prodigal - sarscov2 - gff - stub": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:58:03.210222528" + }, + "prodigal - sarscov2 - gbk": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gbk.gz:md5,188b3a0e3f78740ded7f3ec4d876cb4b" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "4": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ], + "all_gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "amino_acid_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gbk.gz:md5,188b3a0e3f78740ded7f3ec4d876cb4b" + ] + ], + "nucleotide_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "versions": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:57:56.606374214" + } +} \ No newline at end of file diff --git a/modules/nf-core/prodigal/tests/tags.yml b/modules/nf-core/prodigal/tests/tags.yml new file mode 100644 index 00000000..fc0cb020 --- /dev/null +++ b/modules/nf-core/prodigal/tests/tags.yml @@ -0,0 +1,2 @@ +prodigal: + - "modules/nf-core/prodigal/**" diff --git a/modules/nf-core/prokka/environment.yml b/modules/nf-core/prokka/environment.yml new file mode 100644 index 00000000..7c9753fc --- /dev/null +++ b/modules/nf-core/prokka/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::prokka=1.14.6 diff --git a/modules/nf-core/prokka/main.nf b/modules/nf-core/prokka/main.nf new file mode 100644 index 00000000..adfda037 --- /dev/null +++ b/modules/nf-core/prokka/main.nf @@ -0,0 +1,52 @@ +process PROKKA { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' : + 'biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }" + + input: + tuple val(meta), path(fasta) + path proteins + path prodigal_tf + + output: + tuple val(meta), path("${prefix}/*.gff"), emit: gff + tuple val(meta), path("${prefix}/*.gbk"), emit: gbk + tuple val(meta), path("${prefix}/*.fna"), emit: fna + tuple val(meta), path("${prefix}/*.faa"), emit: faa + tuple val(meta), path("${prefix}/*.ffn"), emit: ffn + tuple val(meta), path("${prefix}/*.sqn"), emit: sqn + tuple val(meta), path("${prefix}/*.fsa"), emit: fsa + tuple val(meta), path("${prefix}/*.tbl"), emit: tbl + tuple val(meta), path("${prefix}/*.err"), emit: err + tuple val(meta), path("${prefix}/*.log"), emit: log + tuple val(meta), path("${prefix}/*.txt"), emit: txt + tuple val(meta), path("${prefix}/*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" + def prodigal_tf = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : "" + """ + prokka \\ + $args \\ + --cpus $task.cpus \\ + --prefix $prefix \\ + $proteins_opt \\ + $prodigal_tf \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prokka: \$(echo \$(prokka --version 2>&1) | sed 's/^.*prokka //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/prokka/meta.yml b/modules/nf-core/prokka/meta.yml new file mode 100644 index 00000000..90745735 --- /dev/null +++ b/modules/nf-core/prokka/meta.yml @@ -0,0 +1,161 @@ +name: prokka +description: Whole genome annotation of small genomes (bacterial, archeal, viral) +keywords: + - annotation + - fasta + - prokka +tools: + - prokka: + description: Rapid annotation of prokaryotic genomes + homepage: https://github.com/tseemann/prokka + doi: "10.1093/bioinformatics/btu153" + licence: ["GPL v2"] + identifier: biotools:prokka +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + FASTA file to be annotated. Has to contain at least a non-empty string dummy value. + - - proteins: + type: file + description: FASTA file of trusted proteins to first annotate from (optional) + - - prodigal_tf: + type: file + description: Training file to use for Prodigal (optional) +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.gff: + type: file + description: annotation in GFF3 format, containing both sequences and annotations + pattern: "*.{gff}" + - gbk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.gbk: + type: file + description: annotation in GenBank format, containing both sequences and annotations + pattern: "*.{gbk}" + - fna: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.fna: + type: file + description: nucleotide FASTA file of the input contig sequences + pattern: "*.{fna}" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.faa: + type: file + description: protein FASTA file of the translated CDS sequences + pattern: "*.{faa}" + - ffn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.ffn: + type: file + description: nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, + tRNA, tmRNA, misc_RNA) + pattern: "*.{ffn}" + - sqn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.sqn: + type: file + description: an ASN1 format "Sequin" file for submission to Genbank + pattern: "*.{sqn}" + - fsa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.fsa: + type: file + description: nucleotide FASTA file of the input contig sequences, used by "tbl2asn" + to create the .sqn file + pattern: "*.{fsa}" + - tbl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.tbl: + type: file + description: feature Table file, used by "tbl2asn" to create the .sqn file + pattern: "*.{tbl}" + - err: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.err: + type: file + description: unacceptable annotations - the NCBI discrepancy report. + pattern: "*.{err}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.log: + type: file + description: contains all the output that Prokka produced during its run + pattern: "*.{log}" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.txt: + type: file + description: statistics relating to the annotated features found + pattern: "*.{txt}" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*.tsv: + type: file + description: tab-separated file of all features (locus_tag,ftype,len_bp,gene,EC_number,COG,product) + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/prokka/tests/main.nf.test b/modules/nf-core/prokka/tests/main.nf.test new file mode 100644 index 00000000..dca19bba --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_process { + + name "Test Process PROKKA" + script "../main.nf" + process "PROKKA" + + tag "modules" + tag "modules_nfcore" + tag "prokka" + + test("Prokka - sarscov2 - genome.fasta") { + + when { + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) + ]) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gbk.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.sqn.get(0).get(1)).exists() }, + { assert snapshot( + process.out.gff, + process.out.fna, + process.out.faa, + process.out.ffn, + process.out.fsa, + process.out.tbl, + process.out.err, + process.out.txt, + process.out.tsv, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/prokka/tests/main.nf.test.snap b/modules/nf-core/prokka/tests/main.nf.test.snap new file mode 100644 index 00000000..874c989d --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "Prokka - sarscov2 - genome.fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,5dbfb8fcf2db020564c16045976a0933" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna:md5,787307f29a263e5657cc276ebbf7e2b3" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,a4ceda83262b3c222a6b1f508fb9e24b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.ffn:md5,80f474b5367b7ea5ed23791935f65e34" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fsa:md5,71bbefcb7f12046bcd3263f58cfd5404" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl:md5,d8f816a066ced94b62d9618b13fb8add" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.err:md5,b3daedc646fddd422824e2b3e5e9229d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,b40e485ffc8eaf1feacf8d79d9751a33" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,da7c720c3018c5081d6a70b517b7d450" + ] + ], + [ + "versions.yml:md5,e83a22fe02167e290d90853b45650db9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:34:20.447734" + } +} \ No newline at end of file diff --git a/modules/nf-core/prokka/tests/tags.yml b/modules/nf-core/prokka/tests/tags.yml new file mode 100644 index 00000000..a2dc7bdc --- /dev/null +++ b/modules/nf-core/prokka/tests/tags.yml @@ -0,0 +1,2 @@ +prokka: + - "modules/nf-core/prokka/**" diff --git a/modules/nf-core/pyrodigal/environment.yml b/modules/nf-core/pyrodigal/environment.yml new file mode 100644 index 00000000..b0bbc515 --- /dev/null +++ b/modules/nf-core/pyrodigal/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pyrodigal=3.3.0 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/pyrodigal/main.nf b/modules/nf-core/pyrodigal/main.nf new file mode 100644 index 00000000..7cb97594 --- /dev/null +++ b/modules/nf-core/pyrodigal/main.nf @@ -0,0 +1,61 @@ +process PYRODIGAL { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-2fe9a8ce513c91df34b43a6610df94c3a2eb3bd0:47e7d40834619419f202394563267d74cef857be-0': + 'biocontainers/mulled-v2-2fe9a8ce513c91df34b43a6610df94c3a2eb3bd0:47e7d40834619419f202394563267d74cef857be-0' }" + + input: + tuple val(meta), path(fasta) + val(output_format) + + output: + tuple val(meta), path("*.${output_format}.gz") , emit: annotations + tuple val(meta), path("*.fna.gz") , emit: fna + tuple val(meta), path("*.faa.gz") , emit: faa + tuple val(meta), path("*.score.gz") , emit: score + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pigz -cdf ${fasta} > pigz_fasta.fna + + pyrodigal \\ + $args \\ + -i pigz_fasta.fna \\ + -f $output_format \\ + -o "${prefix}.${output_format}" \\ + -d ${prefix}.fna \\ + -a ${prefix}.faa \\ + -s ${prefix}.score + + pigz -nmf ${prefix}* + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyrodigal: \$(echo \$(pyrodigal --version 2>&1 | sed 's/pyrodigal v//')) + END_VERSIONS + """ + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${output_format}.gz + touch ${prefix}.fna.gz + touch ${prefix}.faa.gz + touch ${prefix}.score.gz + touch versions.yml + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyrodigal: \$(echo \$(pyrodigal --version 2>&1 | sed 's/pyrodigal v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pyrodigal/meta.yml b/modules/nf-core/pyrodigal/meta.yml new file mode 100644 index 00000000..d8394d07 --- /dev/null +++ b/modules/nf-core/pyrodigal/meta.yml @@ -0,0 +1,83 @@ +name: "pyrodigal" +description: Pyrodigal is a Python module that provides bindings to Prodigal, a fast, + reliable protein-coding gene prediction for prokaryotic genomes. +keywords: + - sort + - annotation + - prediction + - prokaryote +tools: + - "pyrodigal": + description: "Pyrodigal is a Python module that provides bindings to Prodigal + (ORF finder for microbial sequences) using Cython." + homepage: "https://pyrodigal.readthedocs.org/" + documentation: "https://pyrodigal.readthedocs.org/" + tool_dev_url: "https://github.com/althonos/pyrodigal/" + doi: "10.21105/joss.04296" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fasta.gz,fa.gz,fna.gz}" + - - output_format: + type: string + description: Output format + pattern: "{gbk,gff}" +output: + - annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${output_format}.gz": + type: file + description: Gene annotations. The file format is specified via input channel + "output_format". + pattern: "*.{gbk,gff}.gz" + - fna: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fna.gz": + type: file + description: nucleotide sequences file + pattern: "*.{fna.gz}" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.faa.gz": + type: file + description: protein translations file + pattern: "*.{faa.gz}" + - score: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.score.gz": + type: file + description: all potential genes (with scores) + pattern: "*.{score.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/pyrodigal/tests/main.nf.test b/modules/nf-core/pyrodigal/tests/main.nf.test new file mode 100644 index 00000000..faa7c8ec --- /dev/null +++ b/modules/nf-core/pyrodigal/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process PYRODIGAL" + script "../main.nf" + process "PYRODIGAL" + + tag "modules" + tag "modules_nfcore" + tag "pyrodigal" + + test("pyrodigal - sarscov2 - gff") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("pyrodigal - sarscov2 - gbk") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.annotations.get(0).get(1)).linesGzip[14..22], + process.out.faa, + process.out.fna, + process.out.score, + process.out.versions, + ).match() } + ) + } + } + + test("pyrodigal - sarscov2 - gff - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gff.collect { file(it[1]).getName() } + + process.out.fna.collect { file(it[1]).getName() } + + process.out.faa.collect { file(it[1]).getName() } + + process.out.score.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + } + + test("pyrodigal - sarscov2 - gbk - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gff.collect { file(it[1]).getName() } + + process.out.fna.collect { file(it[1]).getName() } + + process.out.faa.collect { file(it[1]).getName() } + + process.out.score.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + } + +} \ No newline at end of file diff --git a/modules/nf-core/pyrodigal/tests/main.nf.test.snap b/modules/nf-core/pyrodigal/tests/main.nf.test.snap new file mode 100644 index 00000000..827fdaaa --- /dev/null +++ b/modules/nf-core/pyrodigal/tests/main.nf.test.snap @@ -0,0 +1,171 @@ +{ + "pyrodigal - sarscov2 - gff - stub": { + "content": [ + [ + "test.fna.gz", + "test.faa.gz", + "test.score.gz", + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:42:12.012112014" + }, + "pyrodigal - sarscov2 - gbk": { + "content": [ + [ + " CDS 310..13476", + " /codon_start=1", + " /inference=\"ab initio prediction:pyrodigal:3.3.0\"", + " /locus_tag=\"MT192765.1_1\"", + " /transl_table=11", + " /translation=\"MPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLP", + " QLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKV", + " LLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG", + " AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIA" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.score.gz:md5,c0703a9e662ae0b21c7bbb082ef3fb5f" + ] + ], + [ + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T06:09:40.289778252" + }, + "pyrodigal - sarscov2 - gff": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,8fcd2d93131cf9fb0c82b81db059ad27" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.score.gz:md5,c0703a9e662ae0b21c7bbb082ef3fb5f" + ] + ], + "4": [ + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ], + "annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,8fcd2d93131cf9fb0c82b81db059ad27" + ] + ], + "faa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "fna": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "score": [ + [ + { + "id": "test", + "single_end": false + }, + "test.score.gz:md5,c0703a9e662ae0b21c7bbb082ef3fb5f" + ] + ], + "versions": [ + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:41:55.822235843" + }, + "pyrodigal - sarscov2 - gbk - stub": { + "content": [ + [ + "test.fna.gz", + "test.faa.gz", + "test.score.gz", + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:42:19.81157751" + } +} \ No newline at end of file diff --git a/modules/nf-core/pyrodigal/tests/tags.yml b/modules/nf-core/pyrodigal/tests/tags.yml new file mode 100644 index 00000000..8851ca8b --- /dev/null +++ b/modules/nf-core/pyrodigal/tests/tags.yml @@ -0,0 +1,2 @@ +pyrodigal: + - "modules/nf-core/pyrodigal/**" diff --git a/modules/nf-core/rgi/cardannotation/environment.yml b/modules/nf-core/rgi/cardannotation/environment.yml new file mode 100644 index 00000000..609693fe --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rgi=6.0.3 diff --git a/modules/nf-core/rgi/cardannotation/main.nf b/modules/nf-core/rgi/cardannotation/main.nf new file mode 100644 index 00000000..d2a814e0 --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/main.nf @@ -0,0 +1,61 @@ +process RGI_CARDANNOTATION { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rgi:6.0.3--pyha8f3691_1': + 'biocontainers/rgi:6.0.3--pyha8f3691_1' }" + + input: + path(card) + + output: + path("card_database_processed") , emit: db + env RGI_VERSION , emit: tool_version + env DB_VERSION , emit: db_version + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + rgi card_annotation \\ + -i ${card}/card.json \\ + $args + + DB_VERSION=\$(ls card_database_*_all.fasta | sed "s/card_database_v\\([0-9].*[0-9]\\).*/\\1/") + + mkdir card_database_processed + mv card*.fasta card_database_processed + cp ${card}/* card_database_processed + + RGI_VERSION=\$(rgi main --version) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ + + stub: + """ + touch card.fasta + touch card_all.fasta + + mkdir card_database_processed + mv card*.fasta card_database_processed + + RGI_VERSION=\$(rgi main --version) + DB_VERSION=stub_version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rgi/cardannotation/meta.yml b/modules/nf-core/rgi/cardannotation/meta.yml new file mode 100644 index 00000000..8aff020f --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/meta.yml @@ -0,0 +1,51 @@ +name: rgi_cardannotation +description: Preprocess the CARD database for RGI to predict antibiotic resistance + from protein or nucleotide data +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - rgi: + description: This module preprocesses the downloaded Comprehensive Antibiotic + Resistance Database (CARD) which can then be used as input for RGI. + homepage: https://card.mcmaster.ca + documentation: https://github.com/arpcard/rgi + tool_dev_url: https://github.com/arpcard/rgi + doi: "10.1093/nar/gkz935" + licence: ["https://card.mcmaster.ca/about"] + identifier: "" +input: + - - card: + type: directory + description: Directory containing the CARD database + pattern: "*/" +output: + - db: + - card_database_processed: + type: directory + description: Directory containing the processed CARD database files + pattern: "*/" + - tool_version: + - RGI_VERSION: + type: string + description: The version of the tool in string format (useful for downstream + tools such as hAMRronization) + - db_version: + - DB_VERSION: + type: string + description: The version of the used database in string format (useful for downstream + tools such as hAMRronization) + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" +maintainers: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" diff --git a/modules/nf-core/rgi/cardannotation/tests/main.nf.test b/modules/nf-core/rgi/cardannotation/tests/main.nf.test new file mode 100644 index 00000000..fa51142a --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process RGI_CARDANNOTATION" + script "../main.nf" + process "RGI_CARDANNOTATION" + + tag "modules" + tag "modules_nfcore" + tag "rgi" + tag "rgi/cardannotation" + tag "untar" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('data.tar.gz') + + input[0] = [ + [ ], + file("data.tar.gz") + ] + """ + } + } + } + + test("rgi/cardannotation") { + + when { + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("rgi/cardannotation - stub") { + + options "-stub" + + when { + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rgi/cardannotation/tests/main.nf.test.snap b/modules/nf-core/rgi/cardannotation/tests/main.nf.test.snap new file mode 100644 index 00000000..5d58124d --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/tests/main.nf.test.snap @@ -0,0 +1,118 @@ +{ + "rgi/cardannotation - stub": { + "content": [ + { + "0": [ + [ + "card.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "card_all.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "6.0.3" + ], + "2": [ + "stub_version" + ], + "3": [ + "versions.yml:md5,ff6d0eeef874d3a3cb6e823cd4610e2d" + ], + "db": [ + [ + "card.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "card_all.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "db_version": [ + "stub_version" + ], + "tool_version": [ + "6.0.3" + ], + "versions": [ + "versions.yml:md5,ff6d0eeef874d3a3cb6e823cd4610e2d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:33:34.376943812" + }, + "rgi/cardannotation": { + "content": [ + { + "0": [ + [ + "CARD-Download-README.txt:md5,ca330e1d89e3a97ac6f50c86a8ca5c34", + "aro_categories.tsv:md5,ba2f33c43b199cd62ae5663125ce316e", + "aro_categories_index.tsv:md5,39f995f2356b6a0cb5fd34e3c6ffc8e1", + "aro_index.tsv:md5,b7250ed3208c8497ec2371527a689eeb", + "card.json:md5,e2cb53b1706a602d5265d2284a1fcdd5", + "card_database_v3.2.9.fasta:md5,0839d4447860694782a5db5cd6eae085", + "card_database_v3.2.9_all.fasta:md5,5295875faf06bef62ea954fef40958c3", + "nucleotide_fasta_protein_homolog_model.fasta:md5,ebcd48a6c9e14f339ffd9d2673eed803", + "nucleotide_fasta_protein_knockout_model.fasta:md5,ff476b358ef70da53acf4602568a9b9b", + "nucleotide_fasta_protein_overexpression_model.fasta:md5,68937e587c880153400fa8203f6a90d5", + "nucleotide_fasta_protein_variant_model.fasta:md5,1ff9cbaf0d640e2084f13751309f8176", + "nucleotide_fasta_rRNA_gene_variant_model.fasta:md5,b88fbe1d6de44b2ff2819ee63d001d75", + "protein_fasta_protein_homolog_model.fasta:md5,130a0947c60d18ef2e7d0ab886f80af3", + "protein_fasta_protein_knockout_model.fasta:md5,6b259399e3eae3f23eaa421bbba6ba25", + "protein_fasta_protein_overexpression_model.fasta:md5,758b753b821789147cdd795c654940ad", + "protein_fasta_protein_variant_model.fasta:md5,ec46ea3d9dc7ab01ec22cf265e410c88", + "shortname_antibiotics.tsv:md5,9d20abb9f6d37ed0cecc1573867ca49a", + "shortname_pathogens.tsv:md5,ae267113de686bc8f58eab5845cc343b", + "snps.txt:md5,ee6dfbe7a65f3ffdb6968822c47e4550" + ] + ], + "1": [ + "6.0.3" + ], + "2": [ + "3.2.9" + ], + "3": [ + "versions.yml:md5,43f331ec71ec01a1bae10e30f4ce4f26" + ], + "db": [ + [ + "CARD-Download-README.txt:md5,ca330e1d89e3a97ac6f50c86a8ca5c34", + "aro_categories.tsv:md5,ba2f33c43b199cd62ae5663125ce316e", + "aro_categories_index.tsv:md5,39f995f2356b6a0cb5fd34e3c6ffc8e1", + "aro_index.tsv:md5,b7250ed3208c8497ec2371527a689eeb", + "card.json:md5,e2cb53b1706a602d5265d2284a1fcdd5", + "card_database_v3.2.9.fasta:md5,0839d4447860694782a5db5cd6eae085", + "card_database_v3.2.9_all.fasta:md5,5295875faf06bef62ea954fef40958c3", + "nucleotide_fasta_protein_homolog_model.fasta:md5,ebcd48a6c9e14f339ffd9d2673eed803", + "nucleotide_fasta_protein_knockout_model.fasta:md5,ff476b358ef70da53acf4602568a9b9b", + "nucleotide_fasta_protein_overexpression_model.fasta:md5,68937e587c880153400fa8203f6a90d5", + "nucleotide_fasta_protein_variant_model.fasta:md5,1ff9cbaf0d640e2084f13751309f8176", + "nucleotide_fasta_rRNA_gene_variant_model.fasta:md5,b88fbe1d6de44b2ff2819ee63d001d75", + "protein_fasta_protein_homolog_model.fasta:md5,130a0947c60d18ef2e7d0ab886f80af3", + "protein_fasta_protein_knockout_model.fasta:md5,6b259399e3eae3f23eaa421bbba6ba25", + "protein_fasta_protein_overexpression_model.fasta:md5,758b753b821789147cdd795c654940ad", + "protein_fasta_protein_variant_model.fasta:md5,ec46ea3d9dc7ab01ec22cf265e410c88", + "shortname_antibiotics.tsv:md5,9d20abb9f6d37ed0cecc1573867ca49a", + "shortname_pathogens.tsv:md5,ae267113de686bc8f58eab5845cc343b", + "snps.txt:md5,ee6dfbe7a65f3ffdb6968822c47e4550" + ] + ], + "db_version": [ + "3.2.9" + ], + "tool_version": [ + "6.0.3" + ], + "versions": [ + "versions.yml:md5,43f331ec71ec01a1bae10e30f4ce4f26" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:33:06.962413561" + } +} \ No newline at end of file diff --git a/modules/nf-core/rgi/cardannotation/tests/tags.yml b/modules/nf-core/rgi/cardannotation/tests/tags.yml new file mode 100644 index 00000000..02c2de0b --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/tests/tags.yml @@ -0,0 +1,2 @@ +rgi/cardannotation: + - "modules/nf-core/rgi/cardannotation/**" diff --git a/modules/nf-core/rgi/main/environment.yml b/modules/nf-core/rgi/main/environment.yml new file mode 100644 index 00000000..609693fe --- /dev/null +++ b/modules/nf-core/rgi/main/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rgi=6.0.3 diff --git a/modules/nf-core/rgi/main/main.nf b/modules/nf-core/rgi/main/main.nf new file mode 100644 index 00000000..ba05358a --- /dev/null +++ b/modules/nf-core/rgi/main/main.nf @@ -0,0 +1,89 @@ +process RGI_MAIN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rgi:6.0.3--pyha8f3691_1': + 'biocontainers/rgi:6.0.3--pyha8f3691_1' }" + + input: + tuple val(meta), path(fasta) + path(card) + path(wildcard) + + output: + tuple val(meta), path("*.json"), emit: json + tuple val(meta), path("*.txt") , emit: tsv + tuple val(meta), path("temp/") , emit: tmp + env RGI_VERSION , emit: tool_version + env DB_VERSION , emit: db_version + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // This customizes the command: rgi load + def args2 = task.ext.args ?: '' // This customizes the command: rgi main + def prefix = task.ext.prefix ?: "${meta.id}" + def load_wildcard = "" + + if (wildcard) { + load_wildcard = """ \\ + --wildcard_annotation ${wildcard}/wildcard_database_v\$DB_VERSION.fasta \\ + --wildcard_annotation_all_models ${wildcard}/wildcard_database_v\$DB_VERSION\\_all.fasta \\ + --wildcard_index ${wildcard}/wildcard/index-for-model-sequences.txt \\ + --amr_kmers ${wildcard}/wildcard/all_amr_61mers.txt \\ + --kmer_database ${wildcard}/wildcard/61_kmer_db.json \\ + --kmer_size 61 + """ + } + + """ + DB_VERSION=\$(ls ${card}/card_database_*_all.fasta | sed "s/${card}\\/card_database_v\\([0-9].*[0-9]\\).*/\\1/") + + rgi \\ + load \\ + $args \\ + --card_json ${card}/card.json \\ + --debug --local \\ + --card_annotation ${card}/card_database_v\$DB_VERSION.fasta \\ + --card_annotation_all_models ${card}/card_database_v\$DB_VERSION\\_all.fasta \\ + $load_wildcard + + rgi \\ + main \\ + $args2 \\ + --num_threads $task.cpus \\ + --output_file $prefix \\ + --input_sequence $fasta + + mkdir temp/ + for FILE in *.xml *.fsa *.{nhr,nin,nsq} *.draft *.potentialGenes *{variant,rrna,protein,predictedGenes,overexpression,homolog}.json; do [[ -e \$FILE ]] && mv \$FILE temp/; done + + RGI_VERSION=\$(rgi main --version) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ + + stub: + """ + mkdir -p temp + touch test.json + touch test.txt + + RGI_VERSION=\$(rgi main --version) + DB_VERSION=stub_version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rgi/main/meta.yml b/modules/nf-core/rgi/main/meta.yml new file mode 100644 index 00000000..9d9836c0 --- /dev/null +++ b/modules/nf-core/rgi/main/meta.yml @@ -0,0 +1,96 @@ +name: rgi_main +description: Predict antibiotic resistance from protein or nucleotide data +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - rgi: + description: This tool provides a preliminary annotation of your DNA sequence(s) + based upon the data available in The Comprehensive Antibiotic Resistance Database + (CARD). Hits to genes tagged with Antibiotic Resistance ontology terms will + be highlighted. As CARD expands to include more pathogens, genomes, plasmids, + and ontology terms this tool will grow increasingly powerful in providing first-pass + detection of antibiotic resistance associated genes. See license at CARD website + homepage: https://card.mcmaster.ca + documentation: https://github.com/arpcard/rgi + tool_dev_url: https://github.com/arpcard/rgi + doi: "10.1093/nar/gkz935" + licence: ["https://card.mcmaster.ca/about"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + - - card: + type: directory + description: Directory containing the CARD database. This is expected to be + the unarchived but otherwise unaltered download folder (see RGI documentation + for download instructions). + pattern: "*/" + - - wildcard: + type: directory + description: Directory containing the WildCARD database (optional). This is + expected to be the unarchived but otherwise unaltered download folder (see + RGI documentation for download instructions). + pattern: "*/" +output: + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: JSON formatted file with RGI results + pattern: "*.{json}" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Tab-delimited file with RGI results + pattern: "*.{txt}" + - tmp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - temp/: + type: directory + description: Directory containing various intermediate files + pattern: "temp/" + - tool_version: + - RGI_VERSION: + type: string + description: The version of the tool in string format (useful for downstream + tools such as hAMRronization) + - db_version: + - DB_VERSION: + type: string + description: The version of the used database in string format (useful for downstream + tools such as hAMRronization) + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" +maintainers: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" diff --git a/modules/nf-core/rgi/main/tests/main.nf.test b/modules/nf-core/rgi/main/tests/main.nf.test new file mode 100644 index 00000000..1fca563a --- /dev/null +++ b/modules/nf-core/rgi/main/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process RGI_MAIN" + script "../main.nf" + process "RGI_MAIN" + + tag "modules" + tag "modules_nfcore" + tag "rgi" + tag "rgi/main" + tag "rgi/cardannotation" + tag "untar" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('card-data.tar.bz2') + + input[0] = [ + [ ], + file("card-data.tar.bz2") + ] + """ + } + } + + run("RGI_CARDANNOTATION") { + script "modules/nf-core/rgi/cardannotation" + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + } + + + test("rgi/main - haemophilus_influenzae - genome_fna_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.tsv, + process.out.json, + file(process.out.tmp.get(0).get(1)).list().sort(), + process.out.tool_version, + process.out.db_version, + ).match() } + ) + } + } + + test("rgi/main - haemophilus_influenzae - genome_fna_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rgi/main/tests/main.nf.test.snap b/modules/nf-core/rgi/main/tests/main.nf.test.snap new file mode 100644 index 00000000..a8dc1d61 --- /dev/null +++ b/modules/nf-core/rgi/main/tests/main.nf.test.snap @@ -0,0 +1,143 @@ +{ + "rgi/main - haemophilus_influenzae - genome_fna_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "3": [ + "6.0.3" + ], + "4": [ + "stub_version" + ], + "5": [ + "versions.yml:md5,f77ce9bdc8d309c9d6f7ec63bd53f5cf" + ], + "db_version": [ + "stub_version" + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tmp": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "tool_version": [ + "6.0.3" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f77ce9bdc8d309c9d6f7ec63bd53f5cf" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T22:51:36.047807514" + }, + "rgi/main - haemophilus_influenzae - genome_fna_gz": { + "content": [ + [ + "versions.yml:md5,a9f89e3bebd538efa07bcbe9fe1ba37a" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,5854d6bef754d91da80980e96b6a054b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,f9ca00ea1ff6e733c7c25bb2dfd65128" + ] + ], + [ + "genome.fna.gz.temp.uncompressed.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.blastRes.rrna.xml", + "genome.fna.gz.temp.uncompressed.fsa.temp.contig.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.contig.fsa.blastRes.xml", + "genome.fna.gz.temp.uncompressed.fsa.temp.contigToORF.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nhr", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nin", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nsq", + "genome.fna.gz.temp.uncompressed.fsa.temp.draft", + "genome.fna.gz.temp.uncompressed.fsa.temp.homolog.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.overexpression.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.potentialGenes", + "genome.fna.gz.temp.uncompressed.fsa.temp.predictedGenes.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.predictedGenes.protein.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.rrna.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.variant.json" + ], + [ + "6.0.3" + ], + [ + "3.2.9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T22:51:14.372178941" + } +} \ No newline at end of file diff --git a/modules/nf-core/rgi/main/tests/tags.yml b/modules/nf-core/rgi/main/tests/tags.yml new file mode 100644 index 00000000..e68ad8a2 --- /dev/null +++ b/modules/nf-core/rgi/main/tests/tags.yml @@ -0,0 +1,2 @@ +rgi/main: + - "modules/nf-core/rgi/main/**" diff --git a/modules/nf-core/seqkit/seq/environment.yml b/modules/nf-core/seqkit/seq/environment.yml new file mode 100644 index 00000000..4f8058a9 --- /dev/null +++ b/modules/nf-core/seqkit/seq/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::seqkit=2.8.1" diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf new file mode 100644 index 00000000..d7d38fc8 --- /dev/null +++ b/modules/nf-core/seqkit/seq/main.nf @@ -0,0 +1,63 @@ +process SEQKIT_SEQ { + tag "$meta.id" + label 'process_low' + // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0': + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${prefix}.*") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2" : '' + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + seqkit \\ + seq \\ + --threads $task.cpus \\ + $args \\ + $fastx \\ + $call_gzip \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/seq/meta.yml b/modules/nf-core/seqkit/seq/meta.yml new file mode 100644 index 00000000..7d32aba5 --- /dev/null +++ b/modules/nf-core/seqkit/seq/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqkit_seq" +description: Transforms sequences (extract ID, filter by length, remove gaps, reverse + complement...) +keywords: + - genomics + - fasta + - fastq + - transform + - filter + - gaps + - complement +tools: + - "seqkit": + description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation" + homepage: "https://bioinf.shenwei.me/seqkit/" + documentation: "https://bioinf.shenwei.me/seqkit/usage/" + tool_dev_url: "https://github.com/shenwei356/seqkit" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] + identifier: biotools:seqkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Input fasta/fastq file + pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}" +output: + - fastx: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.*: + type: file + description: Output fasta/fastq file + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test b/modules/nf-core/seqkit/seq/tests/main.nf.test new file mode 100644 index 00000000..9fd1c085 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/main.nf.test @@ -0,0 +1,145 @@ +nextflow_process { + + name "Test Process SEQKIT_SEQ" + script "../main.nf" + process "SEQKIT_SEQ" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/seq" + + test("sarscov2-genome_fasta") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2-genome_fasta_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2-test_1_fastq_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("file_name_conflict-fail_with_error") { + when { + process { + """ + input[0] = [ + [ id:'test_1' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("file_name_conflict-fail_with_error-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test.snap b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap new file mode 100644 index 00000000..e6910966 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:52:18.220051903" + }, + "sarscov2-test_1_fastq_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:55.607826581" + }, + "sarscov2-genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:27.717072933" + }, + "sarscov2-genome_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:37.917560104" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/seq/tests/nextflow.config b/modules/nf-core/seqkit/seq/tests/nextflow.config new file mode 100644 index 00000000..d8e3c66a --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args2 = '-n' +} diff --git a/modules/nf-core/seqkit/seq/tests/tags.yml b/modules/nf-core/seqkit/seq/tests/tags.yml new file mode 100644 index 00000000..5eeca7e3 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/seq: + - "modules/nf-core/seqkit/seq/**" diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml new file mode 100644 index 00000000..017c259d --- /dev/null +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..67991c74 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,55 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + echo "" | gzip > ${output} + touch ${output}.gzi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 00000000..131e92cf --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,61 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${output}: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${output}.gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 00000000..6b6ff55f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test new file mode 100644 index 00000000..d784aa07 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 00000000..0748143f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:34.159992362" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:22.087769106" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:57.15091665" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:45.219404786" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml new file mode 100644 index 00000000..de0eec86 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 00000000..f3a3c467 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..c651bdad --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..290346b3 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,49 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - untar: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..c957517a --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..ceb91b79 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/untar/untar.diff b/modules/nf-core/untar/untar.diff new file mode 100644 index 00000000..d020ffeb --- /dev/null +++ b/modules/nf-core/untar/untar.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/untar' +'modules/nf-core/untar/environment.yml' is unchanged +'modules/nf-core/untar/meta.yml' is unchanged +Changes in 'untar/main.nf': +--- modules/nf-core/untar/main.nf ++++ modules/nf-core/untar/main.nf +@@ -4,8 +4,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : +- 'nf-core/ubuntu:22.04' }" ++ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : ++ 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + +'modules/nf-core/untar/tests/main.nf.test' is unchanged +'modules/nf-core/untar/tests/tags.yml' is unchanged +'modules/nf-core/untar/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/nextflow.config b/nextflow.config index 31df8edd..d7f5e1d4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,46 +9,247 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null - - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + input = null // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options - config_profile_name = null - config_profile_description = null - - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + trace_report_suffix = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + // Config options + + // Taxonomy classification options + run_taxa_classification = false + taxa_classification_tool = 'mmseqs2' + + taxa_classification_mmseqs_db = null + taxa_classification_mmseqs_db_id = 'Kalamari' + taxa_classification_mmseqs_db_savetmp = false + + taxa_classification_mmseqs_taxonomy_savetmp = false + taxa_classification_mmseqs_taxonomy_searchtype = 2 + taxa_classification_mmseqs_taxonomy_lcaranks = 'kingdom,phylum,class,order,family,genus,species' + taxa_classification_mmseqs_taxonomy_taxlineage = 1 + taxa_classification_mmseqs_taxonomy_sensitivity = 5.0 + taxa_classification_mmseqs_taxonomy_orffilters = 2.0 + taxa_classification_mmseqs_taxonomy_lcamode = 3 + taxa_classification_mmseqs_taxonomy_votemode = 1 + + // Annotation options + annotation_tool = 'pyrodigal' + save_annotations = false + + annotation_prodigal_singlemode = false + annotation_prodigal_closed = false + annotation_prodigal_transtable = 11 + annotation_prodigal_forcenonsd = false + + annotation_pyrodigal_singlemode = false + annotation_pyrodigal_closed = false + annotation_pyrodigal_transtable = 11 + annotation_pyrodigal_forcenonsd = false + + annotation_bakta_db = null + annotation_bakta_db_downloadtype = 'full' + annotation_bakta_singlemode = false + annotation_bakta_mincontiglen = 1 + annotation_bakta_translationtable = 11 + annotation_bakta_gram = '?' + annotation_bakta_complete = false + annotation_bakta_renamecontigheaders = false + annotation_bakta_compliant = false + annotation_bakta_trna = false + annotation_bakta_tmrna = false + annotation_bakta_rrna = false + annotation_bakta_ncrna = false + annotation_bakta_ncrnaregion = false + annotation_bakta_crispr = false + annotation_bakta_skipcds = false + annotation_bakta_pseudo = false + annotation_bakta_skipsorf = false + annotation_bakta_gap = false + annotation_bakta_ori = false + annotation_bakta_activate_plot = false + + annotation_prokka_singlemode = false + annotation_prokka_rawproduct = false + annotation_prokka_kingdom = 'Bacteria' + annotation_prokka_gcode = 11 + annotation_prokka_cdsrnaolap = false + annotation_prokka_rnammer = false + annotation_prokka_mincontiglen = 1 + annotation_prokka_evalue = 0.000001 + annotation_prokka_coverage = 80 + annotation_prokka_compliant = true + annotation_prokka_addgenes = false + annotation_prokka_retaincontigheaders = false + + // Database downloading options + save_db = false + + // AMP options + run_amp_screening = false + + amp_skip_amplify = false + + amp_skip_macrel = false + + amp_skip_ampir = false + amp_ampir_model = 'precursor' + amp_ampir_minlength = 10 + + amp_run_hmmsearch = false + amp_hmmsearch_models = null + amp_hmmsearch_savealignments = false + amp_hmmsearch_savetargets = false + amp_hmmsearch_savedomains = false + + amp_ampcombi_db_id = 'DRAMP' + amp_ampcombi_db = null + amp_ampcombi_parsetables_cutoff = 0.6 + amp_ampcombi_parsetables_ampir = '.ampir.tsv' + amp_ampcombi_parsetables_amplify = '.amplify.tsv' + amp_ampcombi_parsetables_macrel = '.macrel.prediction' + amp_ampcombi_parsetables_hmmsearch = '.hmmer_hmmsearch.txt' + amp_ampcombi_parsetables_aalength = 120 + amp_ampcombi_parsetables_dbevalue = 5 + amp_ampcombi_parsetables_hmmevalue = 0.06 + amp_ampcombi_parsetables_windowstopcodon = 60 + amp_ampcombi_parsetables_windowtransport = 11 + amp_ampcombi_parsetables_removehitswostopcodons = false + amp_ampcombi_cluster_covmode = 0 + amp_ampcombi_cluster_mode = 1 + amp_ampcombi_cluster_coverage = 0.8 + amp_ampcombi_cluster_seqid = 0.4 + amp_ampcombi_cluster_sensitivity = 4.0 + amp_ampcombi_cluster_removesingletons = false + amp_ampcombi_cluster_minmembers = 0 + + // ARG options + run_arg_screening = false + + arg_skip_fargene = false + arg_fargene_hmmmodel = 'class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme' + arg_fargene_savetmpfiles = false + arg_fargene_minorflength = 90 + arg_fargene_score = null + arg_fargene_translationformat = 'pearson' + arg_fargene_orffinder = false + + arg_skip_rgi = false + arg_rgi_db = null + arg_rgi_savejson = false + arg_rgi_savetmpfiles = false + arg_rgi_alignmenttool = 'BLAST' + arg_rgi_includeloose = false + arg_rgi_includenudge = false + arg_rgi_lowquality = false + arg_rgi_data = 'NA' + arg_rgi_split_prodigal_jobs = true + + arg_skip_amrfinderplus = false + arg_amrfinderplus_db = null + arg_amrfinderplus_identmin = -1 + arg_amrfinderplus_coveragemin = 0.5 + arg_amrfinderplus_translationtable = 11 + arg_amrfinderplus_plus = false + arg_amrfinderplus_name = false + + arg_skip_deeparg = false + arg_deeparg_db = null + arg_deeparg_db_version = 2 + // Make sure to update on module version bump! + arg_deeparg_model = 'LS' + arg_deeparg_minprob = 0.8 + arg_deeparg_alignmentidentity = 50 + arg_deeparg_alignmentevalue = 1E-10 + arg_deeparg_alignmentoverlap = 0.8 + arg_deeparg_numalignmentsperentry = 1000 + + arg_skip_abricate = false + arg_abricate_db_id = 'ncbi' + arg_abricate_db = null + arg_abricate_minid = 80 + arg_abricate_mincov = 80 + + arg_hamronization_summarizeformat = 'tsv' + + arg_skip_argnorm = false + + // BGC options + run_bgc_screening = false + + bgc_mincontiglength = 3000 + bgc_savefilteredcontigs = false + + bgc_skip_antismash = false + bgc_antismash_db = null + bgc_antismash_installdir = null + bgc_antismash_cbgeneral = false + bgc_antismash_cbknownclusters = false + bgc_antismash_cbsubclusters = false + bgc_antismash_smcogtrees = false + bgc_antismash_ccmibig = false + bgc_antismash_contigminlength = 3000 + bgc_antismash_hmmdetectionstrictness = 'relaxed' + bgc_antismash_pfam2go = false + bgc_antismash_rre = false + bgc_antismash_taxon = 'bacteria' + bgc_antismash_tfbs = false + + bgc_skip_deepbgc = false + bgc_deepbgc_db = null + bgc_deepbgc_score = 0.5 + bgc_deepbgc_prodigalsinglemode = false + bgc_deepbgc_mergemaxproteingap = 0 + bgc_deepbgc_mergemaxnuclgap = 0 + bgc_deepbgc_minnucl = 1 + bgc_deepbgc_minproteins = 1 + bgc_deepbgc_mindomains = 1 + bgc_deepbgc_minbiodomains = 0 + bgc_deepbgc_classifierscore = 0.5 + + bgc_skip_gecco = false + bgc_gecco_cds = 3 + bgc_gecco_threshold = 0.8 + bgc_gecco_pfilter = 1E-9 + bgc_gecco_edgedistance = 0 + bgc_gecco_mask = false + + bgc_run_hmmsearch = false + bgc_hmmsearch_models = null + bgc_hmmsearch_savealignments = false + bgc_hmmsearch_savetargets = false + bgc_hmmsearch_savedomains = false + + // Config options + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines @@ -56,90 +257,90 @@ includeConfig 'conf/base.config' profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } wave { apptainer.ociAutoPull = true @@ -149,40 +350,73 @@ profiles { wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB process { resourceLimits = [ memory: 8.GB, - cpus : 4, - time : 1.h + cpus: 4, + time: 1.h, ] } } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { + includeConfig 'conf/test.config' + } + test_bakta { + includeConfig 'conf/test_bakta.config' + } + test_prokka { + includeConfig 'conf/test_prokka.config' + } + test_bgc_bakta { + includeConfig 'conf/test_bgc_bakta.config' + } + test_bgc_prokka { + includeConfig 'conf/test_bgc_prokka.config' + } + test_bgc_pyrodigal { + includeConfig 'conf/test_bgc_pyrodigal.config' + } + test_taxonomy_bakta { + includeConfig 'conf/test_taxonomy_bakta.config' + } + test_taxonomy_prokka { + includeConfig 'conf/test_taxonomy_prokka.config' + } + test_taxonomy_pyrodigal { + includeConfig 'conf/test_taxonomy_pyrodigal.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + test_nothing { + includeConfig 'conf/test_nothing.config' + } + test_preannotated { + includeConfig 'conf/test_preannotated.config' + } + test_preannotated_bgc { + includeConfig 'conf/test_preannotated_bgc.config' + } } // Load nf-core custom profiles from different Institutions includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/funcscan custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/funcscan.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/funcscan.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -226,48 +460,48 @@ dag { manifest { name = 'nf-core/funcscan' - author = """Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead + author = """Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates""" + // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ - // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ name: 'Jasmin Frangenberg', affiliation: 'Leibniz Institute for Natural Product Research and Infection Biology Hans Knöll Institute, Jena, Germany', email: 'jasmin.frangenberg@leibniz-hki.de', github: 'jasmezz', - contribution: [author, maintainer], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0009-0004-5961-4709' + contribution: [author, maintainer], + orcid: '0009-0004-5961-4709', ], [ name: ' Anan Ibrahim', affiliation: 'Leibniz Institute for Natural Product Research and Infection Biology Hans Knöll Institute, Jena, Germany', email: 'anan.ibrahim@leibniz-hki.de', github: 'darcy220606', - contribution: [author, maintainer], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0003-3719-901X' + contribution: [author, maintainer], + orcid: '0000-0003-3719-901X', ], [ name: ' Louisa Perelo', affiliation: 'Quantitative Biology Center (QBiC), University of Tübingen, Tübingen, Germany', email: 'louisa.perelo@qbic.uni-tuebingen.de', github: 'louperelo', - contribution: [author, contributor], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0002-7307-062X' + contribution: [author, contributor], + orcid: '0000-0002-7307-062X', ], [ name: ' Moritz E. Beber', affiliation: '', email: '', github: 'Midnighter', - contribution: [author, contributor], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0003-2406-1978' + contribution: [author, contributor], + orcid: '0000-0003-2406-1978', ], [ name: ' James A. Fellows Yates', affiliation: 'Leibniz Institute for Natural Product Research and Infection Biology Hans Knöll Institute, Jena, Germany; Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany', email: 'james_fellows_yates@eva.mpg.de', github: 'jfy133', - contribution: [author, maintainer], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '0000-0001-5585-6277' + contribution: [author, maintainer], + orcid: '0000-0001-5585-6277', ], ] homePage = 'https://github.com/nf-core/funcscan' @@ -276,23 +510,23 @@ manifest { defaultBranch = 'master' nextflowVersion = '!>=24.04.2' version = '2.1.0dev' - doi = '' + doi = '10.5281/zenodo.7643099' } // Nextflow plugins plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.1.1' } validation { defaultIgnoreParams = ["genomes"] - monochromeLogs = params.monochrome_logs + monochromeLogs = params.monochrome_logs help { - enabled = true - command = "nextflow run nf-core/funcscan -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" + enabled = true + command = "nextflow run nf-core/funcscan -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" showHiddenParameter = "show_hidden" - beforeText = """ + beforeText = """ -\033[2m----------------------------------------------------\033[0m- \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m \033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m @@ -302,7 +536,7 @@ validation { \033[0;35m nf-core/funcscan ${manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} + afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x @@ -312,6 +546,6 @@ validation { } summary { beforeText = validation.help.beforeText - afterText = validation.help.afterText + afterText = validation.help.afterText } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 60a71c78..4ba539bd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,8 +19,8 @@ "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/funcscan/usage#samplesheet-input).", + "description": "Path to comma-separated file containing sample names and paths to corresponding FASTA files, and optional annotation files.", + "help_text": "Before running the pipeline, you will need to create a design file with information about the samples to be scanned by nf-core/funcscan, containing at a minimum sample names and paths to contigs. Use this parameter to specify its location. It has to be a two or four column comma-separated file with a header row (`sample,fasta` or `sample,fasta,protein,gbk`). See [usage docs](https://nf-co.re/funcscan/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -41,47 +41,1360 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" } + }, + "help_text": "" + }, + "screening_type_activation": { + "title": "Screening type activation", + "type": "object", + "description": "These parameters influence which workflow (ARG, AMP and/or BGC) to activate.", + "default": "", + "properties": { + "run_amp_screening": { + "type": "boolean", + "description": "Activate antimicrobial peptide genes screening tools.", + "fa_icon": "fas fa-check-circle" + }, + "run_arg_screening": { + "type": "boolean", + "description": "Activate antimicrobial resistance gene screening tools.", + "fa_icon": "fas fa-check-circle" + }, + "run_bgc_screening": { + "type": "boolean", + "description": "Activate biosynthetic gene cluster screening tools.", + "fa_icon": "fas fa-check-circle" + } + }, + "fa_icon": "fa fa-list-ol" + }, + "taxonomic_classification_general_options": { + "title": "Taxonomic classification: general options", + "type": "object", + "description": "These options influence whether to activate the taxonomic classification of the input nucleotide sequences.", + "default": "", + "properties": { + "run_taxa_classification": { + "type": "boolean", + "description": "Activates the taxonomic classification of input nucleotide sequences.", + "help_text": "This flag turns on the taxonomic classification of input nucleotide sequences. The taxonomic annotations should be turned on if the input metagenomes' bacterial sources are unknown, which can help identify the source of the AMP, BGC or ARG hit obtained for laboratory experiments. This flag should be turned off (which is by default) if the input nucleotide sequences represent a single known genome or *nf-core/mag* was run beforehand. Turning on this flag relatively decreases the pipeline speed and requires >8GB RAM. Due to the size of the resulting table, the final summary is in a zipped format.", + "fa_icon": "fas fa-check-circle" + }, + "taxa_classification_tool": { + "type": "string", + "default": "mmseqs2", + "help_text": "This flag specifies which tool for taxonomic classification should be activated. At the moment only 'MMseqs2' is incorporated in the pipeline.", + "description": "Specifies the tool used for taxonomic classification.", + "fa_icon": "fas fa-tools" + } + }, + "fa_icon": "fas fa-tag" + }, + "taxonomic_classification_mmseqs2_databases": { + "title": "Taxonomic classification: MMseqs2 databases", + "type": "object", + "description": "These parameters influence the database to be used in classifying the taxonomy.", + "default": "", + "properties": { + "taxa_classification_mmseqs_db": { + "type": "string", + "description": "Specify a path to MMseqs2-formatted database.", + "help_text": "Specify a path to a database that is prepared in MMseqs2 format as detailed in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\nThe contents of the directory should have files such as `.version` and `.taxonomy` in the top level.", + "fa_icon": "fas fa-database" + }, + "taxa_classification_mmseqs_db_id": { + "type": "string", + "default": "Kalamari", + "help_text": "Specify which MMseqs2-formatted database to use to classify the input contigs. This can be a nucleotide or amino acid database that includes taxonomic classifications. For example, both GTDB (an amico acid database) and SILVA (a nucleotide database) are supported by MMseqs2. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs databases ", + "description": "Specify the label of the database to be used.", + "fa_icon": "fas fa-address-card" + }, + "taxa_classification_mmseqs_db_savetmp": { + "type": "boolean", + "help_text": "This flag saves the temporary files from downloading the database and formatting it in the MMseqs2 format into the output folder. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs databases: `--remove-tmp-files`", + "description": "Specify whether the temporary files should be saved.", + "fa_icon": "fas fa-save" + } + }, + "fa_icon": "fas fa-tag" + }, + "taxonomic_classification_mmseqs2_taxonomy": { + "title": "Taxonomic classification: MMseqs2 taxonomy", + "type": "object", + "description": "These parameters influence the taxonomic classification step.", + "default": "", + "properties": { + "taxa_classification_mmseqs_taxonomy_savetmp": { + "type": "boolean", + "help_text": "This flag saves the temporary files from creating the taxonomy database and the final `tsv` file into the output folder. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--remove-tmp-files`", + "description": "Specify whether to save the temporary files.", + "fa_icon": "fas fa-save" + }, + "taxa_classification_mmseqs_taxonomy_searchtype": { + "type": "integer", + "default": 2, + "help_text": "Specify the type of alignment to be carried out between the query database and the reference MMseqs2 database. This can be set to '0' for automatic detection, '1' for amino acid alignment, '2' for translating the inputs and running the alignment on the translated sequences, '3' nucleotide based alignment and '4' for the translated nucleotide sequences alignment. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--search-type`", + "description": "Specify the alignment type between database and query.", + "fa_icon": "fas fa-align-center" + }, + "taxa_classification_mmseqs_taxonomy_lcaranks": { + "type": "string", + "default": "kingdom,phylum,class,order,family,genus,species", + "help_text": "Specify the taxonomic ranks to include in the taxonomic lineage column in the final `.tsv` file. For example, 'kingdom,phylum,class,order,family,genus,species'. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--lca-ranks`", + "description": "Specify the taxonomic levels to display in the result table.", + "fa_icon": "fas fa-stream" + }, + "taxa_classification_mmseqs_taxonomy_taxlineage": { + "type": "integer", + "default": 1, + "help_text": "This flag specifies whether the taxonomic lineage should be included in the output `.tsv` file. The taxonomic lineage is obtained from the internal module of `mmseqs/taxonomy` that infers the last common ancestor to classify the taxonomy. A value of '0' writes no taxonomic lineage, a value of '1' adds a column with the full lineage names prefixed with abbreviation of the lineage level, e.g. `k_Prokaryotes;p_Bacteroidetes;c_....;o_....;f_....;g_....;s_....,` while a value of '2' adds a column with the full NCBI taxids lineage,e.g. `1324;2345;4546;5345`. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--tax-lineage`", + "description": "Specify whether to include or remove the taxonomic lineage.", + "fa_icon": "fab fa-audible" + }, + "taxa_classification_mmseqs_taxonomy_sensitivity": { + "type": "number", + "default": 5.0, + "help_text": "This flag specifies the speed and sensitivity of the taxonomic search. It stands for how many kmers should be produced during the preliminary seeding stage. A very fast search requires a low value e.g. '1.0' and a a very sensitive search requires e.g. '7.0'. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `-s`", + "description": "Specify the speed and sensitivity for taxonomy assignment.", + "fa_icon": "fas fa-history" + }, + "taxa_classification_mmseqs_taxonomy_orffilters": { + "type": "number", + "default": 2.0, + "help_text": "This flag specifies the sensitivity used for prefiltering the query ORF. Before the taxonomy-assigning step, MMseqs2 searches the predicted ORFs against the provided database. This value influences the speed with which the search is carried out. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--orf-filter-s`", + "description": "Specify the ORF search sensitivity in the prefilter step.", + "fa_icon": "fas fa-history" + }, + "taxa_classification_mmseqs_taxonomy_lcamode": { + "type": "integer", + "default": 3, + "help_text": "This flag specifies the strategy used for assigning the last common ancestor (LCA). MMseqs2 assigns taxonomy based on an accelerated approximation of the 2bLCA protocol and uses the value of '3'. In this mode, the taxonomic assignment is based not only on usual alignment parameters but also considers the taxonomic classification of the LCA. When the value '4' is used the LCA is assigned based on all the equal scoring top hits. If the value '1' is used the LCA assignment is disregarded and the taxonomic assignment is based on usual alignment parameters like E-value and coverage. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf). \n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--lca-mode`", + "description": "Specify the mode to assign the taxonomy.", + "fa_icon": "fas fa-broom" + }, + "taxa_classification_mmseqs_taxonomy_votemode": { + "type": "integer", + "default": 1, + "help_text": "This flag assigns the mode value with which the weights are computed. The value of '0' stands for uniform weights of taxonomy assignments, the value of '1' uses the minus log E-value and '2' the actual score. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--vote-mode`", + "description": "Specify the weights of the taxonomic assignment.", + "fa_icon": "fas fa-balance-scale-right" + } + }, + "fa_icon": "fas fa-tag" + }, + "annotation_general_options": { + "title": "Annotation: general options", + "type": "object", + "description": "These options influence the generation of annotation files required for downstream steps in ARG, AMP, and BGC workflows.", + "default": "", + "properties": { + "annotation_tool": { + "type": "string", + "default": "pyrodigal", + "description": "Specify which annotation tool to use for some downstream tools.", + "enum": ["prodigal", "pyrodigal", "prokka", "bakta"], + "fa_icon": "fas fa-edit" + }, + "save_annotations": { + "type": "boolean", + "description": "Specify whether to save gene annotations in the results directory.", + "fa_icon": "fas fa-save" + } + }, + "fa_icon": "fas fa-file-signature", + "help_text": "" + }, + "annotation_bakta": { + "title": "Annotation: BAKTA", + "type": "object", + "description": "BAKTA is a tool developed to annotate bacterial genomes and plasmids from both isolates and MAGs. More info: https://github.com/oschwengers/bakta", + "default": "", + "properties": { + "annotation_bakta_db": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "Specify a path to a local copy of a BAKTA database.", + "help_text": "If a local copy of a BAKTA database exists, specify the path to that database which is prepared in a BAKTA format. Otherwise this will be downloaded for you.\n\nThe contents of the directory should have files such as `*.dmnd` in the top level." + }, + "annotation_bakta_db_downloadtype": { + "type": "string", + "description": "Download full or light version of the Bakta database if not supplying own database.", + "help_text": "If you want the pipeline to download the Bakta database for you, you can choose between the full (33.1 GB) and light (1.3 GB) version. The full version is generally recommended for best annotation results, because it contains all of these:\n\n- UPS: unique protein sequences identified via length and MD5 hash digests (100% coverage & 100% sequence identity)\n- IPS: identical protein sequences comprising seeds of UniProt's UniRef100 protein sequence clusters\n- PSC: protein sequences clusters comprising seeds of UniProt's UniRef90 protein sequence clusters\n- PSCC: protein sequences clusters of clusters comprising annotations of UniProt's UniRef50 protein sequence clusters\n\nIf download bandwidth, storage, memory, or run duration requirements become an issue, go for the light version (which only contains PSCCs) by modifying the `annotation_bakta_db_downloadtype` flag.\n\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta#database)\n\n> Modifies tool parameter(s):\n> - BAKTA_DBDOWNLOAD: `--type`", + "fa_icon": "fas fa-database", + "enum": ["full", "light"], + "default": "full" + }, + "annotation_bakta_singlemode": { + "type": "boolean", + "description": "Use the default genome-length optimised mode (rather than the metagenome mode).", + "help_text": "By default, Bakta's `--meta` mode is used in the pipeline to improve the gene prediction of highly fragmented metagenomes.\n\nBy specifying this parameter Bakta will instead use its default mode that is optimised for singular 'complete' genome sequences.\n\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--meta`", + "fa_icon": "fas fa-dna" + }, + "annotation_bakta_mincontiglen": { + "type": "integer", + "default": 1, + "description": "Specify the minimum contig size.", + "help_text": "Specify the minimum contig size that would be annotated by BAKTA.\nIf run with '--annotation_bakta_compliant', the minimum contig length must be set to 200. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--min-contig-length`", + "minimum": 1, + "fa_icon": "fas fa-align-left" + }, + "annotation_bakta_translationtable": { + "type": "integer", + "default": 11, + "description": "Specify the genetic code translation table.", + "help_text": "Specify the genetic code translation table used for translation of nucleotides to amino acids. \nAll possible genetic codes (1-25) used for gene annotation can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--translation-table`", + "minimum": 1, + "maximum": 25, + "fa_icon": "fas fa-border-all" + }, + "annotation_bakta_gram": { + "type": "string", + "default": "?", + "enum": ["+", "-", "?"], + "description": "Specify the type of bacteria to be annotated to detect signaling peptides.", + "help_text": "Specify the type of bacteria expected in the input dataset for correct annotation of the signal peptide predictions. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--gram`", + "fa_icon": "far fa-plus-square" + }, + "annotation_bakta_complete": { + "type": "boolean", + "description": "Specify that all contigs are complete replicons.", + "help_text": "This flag expects contigs that make up complete chromosomes and/or plasmids. By calling it, the user ensures that the contigs are complete replicons. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--complete`", + "fa_icon": "far fa-circle" + }, + "annotation_bakta_renamecontigheaders": { + "type": "boolean", + "description": "Changes the original contig headers.", + "help_text": "This flag specifies that the contig headers should be rewritten. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--keep-contig-headers`", + "fa_icon": "fas fa-remove-format" + }, + "annotation_bakta_compliant": { + "type": "boolean", + "description": "Clean the result annotations to standardise them to Genbank/ENA conventions.", + "help_text": "The resulting annotations are cleaned up to standardise them to Genbank/ENA/DDJB conventions. CDS without any attributed hits and those without gene symbols or product descriptions different from hypothetical will be marked as 'hypothetical'.\nWhen activated the `--min-contig-length` will be set to 200. More info can be found [here](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--compliant`", + "fa_icon": "fas fa-check-circle" + }, + "annotation_bakta_trna": { + "type": "boolean", + "description": "Activate tRNA detection & annotation.", + "help_text": "This flag activates [tRNAscan-SE 2.0](http://lowelab.ucsc.edu/tRNAscan-SE/) that predicts tRNA genes. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-trna`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_tmrna": { + "type": "boolean", + "description": "Activate tmRNA detection & annotation.", + "help_text": "This flag activates [Aragorn](http://www.ansikte.se/ARAGORN/) that predicts tmRNA genes. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-tmrna`\n`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_rrna": { + "type": "boolean", + "description": "Activate rRNA detection & annotation.", + "help_text": "This flag activates [Infernal vs. Rfam rRNA covariance models](http://eddylab.org/infernal/) that predicts rRNA genes. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--rrna`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_ncrna": { + "type": "boolean", + "description": "Activate ncRNA detection & annotation.", + "help_text": "This flag activates [Infernal vs. Rfam ncRNA covariance models](http://eddylab.org/infernal/) that predicts ncRNA genes.\nBAKTA distinguishes between ncRNA genes and (cis-regulatory) regions to enable the distinction of feature overlap detection.\nThis includes distinguishing between ncRNA gene types: sRNA, antisense, ribozyme and antitoxin. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--ncrna`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_ncrnaregion": { + "type": "boolean", + "description": "Activate ncRNA region detection & annotation.", + "help_text": "This flag activates [Infernal vs. Rfam ncRNA covariance models](http://eddylab.org/infernal/) that predicts ncRNA cis-regulatory regions.\nBAKTA distinguishes between ncRNA genes and (cis-regulatory) regions to enable the distinction of feature overlap detection.\nThis including distinguishing between ncRNA (cis-regulatory) region types: riboswitch, thermoregulator, leader and frameshift element. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-ncrna-region`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_crispr": { + "type": "boolean", + "description": "Activate CRISPR array detection & annotation.", + "help_text": "This flag activates [PILER-CR](https://www.drive5.com/pilercr/) that predicts CRISPR arrays. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-crispr`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_skipcds": { + "type": "boolean", + "description": "Skip CDS detection & annotation.", + "help_text": "This flag skips CDS prediction that is done by [PYRODIGAL](https://github.com/althonos/pyrodigal) with which the distinct prediction for complete replicons and uncompleted contigs is done.\nFor more information on how BAKTA predicts CDS please refer to the BAKTA [documentation](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-cds`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_pseudo": { + "type": "boolean", + "description": "Activate pseudogene detection & annotation.", + "help_text": "This flag activates the search for reference Phytochelatin Synthase genes (PCSs) using 'hypothetical' CDS as seed sequences, then aligns the translated PCSs against up-/downstream-elongated CDS regions. More details can be found in the BAKTA [documentation](https://github.com/oschwengers/bakta). \n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-pseudo`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_skipsorf": { + "type": "boolean", + "description": "Skip sORF detection & annotation.", + "help_text": "Skip the prediction of sORFs from amino acids stretches as less than 30aa. For more info please refer to BAKTA [documentation](https://github.com/oschwengers/bakta). All sORF without gene symbols or product descriptions different from hypothetical will be discarded, while only those identified hits exhibiting proper gene symbols or product descriptions different from hypothetical will still be included in the final annotation.\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-sorf`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_gap": { + "type": "boolean", + "description": "Activate gap detection & annotation.", + "help_text": "Activates any gene annotation found within contig assembly gaps. More details can be found in the BAKTA [documentation](https://github.com/oschwengers/bakta). \n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-gap`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_ori": { + "type": "boolean", + "description": "Activate oriC/oriT detection & annotation.", + "help_text": "Activates the BAKTA search for oriC/oriT genes by comparing results from Blast+ (generated by cov=0.8, id=0.8) and the [MOB-suite](https://github.com/phac-nml/mob-suite) of oriT & [DoriC](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6323995/) oriC/oriV sequences. Annotations of ori regions take into account overlapping Blast+ hits and are conducted based on a majority vote heuristic. Region edges may be fuzzy. For more info please refer to the BAKTA [documentation](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-ori`", + "fa_icon": "fas fa-forward" + }, + "annotation_bakta_activate_plot": { + "type": "boolean", + "fa_icon": "fas fa-chart-pie", + "description": "Activate generation of circular genome plots.", + "help_text": "Activate this flag to generate genome plots (might be memory-intensive).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-plot`" + } + }, + "fa_icon": "fas fa-file-signature" + }, + "annotation_prokka": { + "title": "Annotation: Prokka", + "type": "object", + "description": "Prokka annotates genomic sequences belonging to bacterial, archaeal and viral genomes. More info: https://github.com/tseemann/prokka", + "default": "", + "properties": { + "annotation_prokka_singlemode": { + "type": "boolean", + "description": "Use the default genome-length optimised mode (rather than the metagenome mode).", + "help_text": "By default, Prokka's `--metagenome` mode is used in the pipeline to improve the gene prediction of highly fragmented metagenomes.\n\nBy specifying this parameter Prokka will instead use its default mode that is optimised for singular 'complete' genome sequences.\n\nFor more information, please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--metagenome`", + "fa_icon": "fas fa-braille" + }, + "annotation_prokka_rawproduct": { + "type": "boolean", + "description": "Suppress the default clean-up of the gene annotations.", + "help_text": "By default, annotation in Prokka is carried out by alignment to other proteins in its database, or the databases the user provides via the tools `--proteins` flag. The resulting annotations are then cleaned up to standardise them to Genbank/ENA conventions.\n'Vague names' are set to 'hypothetical proteins', 'possible/probable/predicted' are set to 'putative' and 'EC/CPG and locus tag ids' are removed.\n\nBy supplying this flag you stop such clean up leaving the original annotation names.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\nThis flag suppresses this default behavior of Prokka (which is to perform the cleaning).\n\n> Modifies tool parameter(s):\n> - Prokka: `--rawproduct`", + "fa_icon": "fab fa-product-hunt" + }, + "annotation_prokka_kingdom": { + "type": "string", + "default": "Bacteria", + "fa_icon": "fas fa-crown", + "description": "Specify the kingdom that the input represents.", + "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> \u26a0\ufe0f Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", + "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"] + }, + "annotation_prokka_gcode": { + "type": "integer", + "default": 11, + "minimum": 0, + "maximum": 25, + "description": "Specify the translation table used to annotate the sequences.", + "help_text": "Specify the translation table used to annotate the sequences. All possible genetic codes (1-25) used for gene annotation can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). This flag is required if the flag `--kingdom` is assigned.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--gcode`", + "fa_icon": "fas fa-border-all" + }, + "annotation_prokka_mincontiglen": { + "type": "integer", + "default": 1, + "description": "Minimum contig size required for annotation (bp).", + "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be \u2265 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", + "fa_icon": "fas fa-ruler-horizontal" + }, + "annotation_prokka_evalue": { + "type": "number", + "default": 1e-6, + "description": "E-value cut-off.", + "help_text": "Specifiy the maximum E-value used for filtering the alignment hits.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`", + "fa_icon": "fas fa-sort-amount-down" + }, + "annotation_prokka_coverage": { + "type": "integer", + "default": 80, + "description": "Set the assigned minimum coverage.", + "help_text": "Specify the minimum coverage percent of the annotated genome. This must be set between 0-100.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--coverage`", + "fa_icon": "fas fa-align-right", + "minimum": 0, + "maximum": 100 + }, + "annotation_prokka_cdsrnaolap": { + "type": "boolean", + "description": "Allow transfer RNA (trRNA) to overlap coding sequences (CDS).", + "help_text": "Allow transfer RNA (trRNA) to overlap coding sequences (CDS). Transfer RNAs are short stretches of nucleotide sequences that link mRNA and the amino acid sequence of proteins. Their presence helps in the annotation of the sequences, because each trRNA can only be attached to one type of amino acid.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--cdsrnaolap`", + "fa_icon": "fas fa-align-justify" + }, + "annotation_prokka_rnammer": { + "type": "boolean", + "description": "Use RNAmmer for rRNA prediction.", + "help_text": "Activates [RNAmmer](https://services.healthtech.dtu.dk/service.php?RNAmmer-1.2) instead of the Prokka default [Barrnap](https://github.com/tseemann/barrnap) for rRNA prediction during the annotation process. RNAmmer classifies ribosomal RNA genes in genome sequences by using two levels of Hidden Markov Models. Barrnap uses the nhmmer tool that includes HMMER 3.1 for HMM searching in RNA:DNA style.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--rnammer`", + "fa_icon": "fas fa-cogs" + }, + "annotation_prokka_compliant": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Force contig name to Genbank/ENA/DDJB naming rules.", + "help_text": "Force the contig headers to conform to the Genbank/ENA/DDJB contig header standards. This is activated in combination with `--centre [X]` when contig headers supplied by the user are non-conforming and therefore need to be renamed before Prokka can start annotation. This flag activates `--genes --mincontiglen 200`. For more information please check the Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--compliant`", + "default": true + }, + "annotation_prokka_addgenes": { + "type": "boolean", + "fa_icon": "fas fa-dna", + "description": "Add the gene features for each CDS hit.", + "help_text": "For every CDS annotated, this flag adds the gene that encodes for that CDS region. For more information please check the Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--addgenes`" + }, + "annotation_prokka_retaincontigheaders": { + "type": "boolean", + "fa_icon": "fas fa-font", + "help_text": "This parameter allows prokka to retain the original contig names by activating `PROKKA`'s `--force` flag. If this parameter is set to `false` it activates `PROKKA`'s flags `--locus-tag PROKKA --centre CENTER` so the locus tags (contig names) will be PROKKA_# and the center tag will be CENTER. By default `PROKKA` changes contig headers to avoid errors that might rise due to long contig headers, so this must be turned on if the user has short contig names that should be retained by `PROKKA`. \n\n> Modifies tool parameter(s):\n> - Prokka: `--locus-tag PROKKA --centre CENTER`\n> - Prokka: `--force`", + "description": "Retains contig names." + } + }, + "fa_icon": "fas fa-file-signature" + }, + "annotation_prodigal": { + "title": "Annotation: Prodigal", + "type": "object", + "description": "Prodigal is a protein-coding gene prediction tool developed to run on bacterial and archaeal genomes. More info: https://github.com/hyattpd/prodigal/wiki", + "default": "", + "properties": { + "annotation_prodigal_singlemode": { + "type": "boolean", + "description": "Specify whether to use Prodigal's single-genome mode for long sequences.", + "help_text": "By default Prodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default. Providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s): \n> -PRODIGAL: `-p`", + "fa_icon": "far fa-circle" + }, + "annotation_prodigal_closed": { + "type": "boolean", + "description": "Does not allow partial genes on contig edges.", + "help_text": "Suppresses partial genes from being on contig edge, resulting in closed ends. Should only be activated for genomes where it is sure the first and last bases of the sequence(s) do not fall inside a gene. Run together with `-p normal` (former `-p single`) .\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-c`", + "fa_icon": "fas fa-arrows-alt-h" + }, + "annotation_prodigal_transtable": { + "type": "integer", + "default": 11, + "description": "Specifies the translation table used for gene annotation.", + "help_text": "Specifies which translation table should be used for seqeunce annotation. All possible genetic code translation tables can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). The default is set at 11, which is used for standard Bacteria/Archeae.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-g`", + "fa_icon": "fas fa-border-all" + }, + "annotation_prodigal_forcenonsd": { + "type": "boolean", + "description": "Forces Prodigal to scan for motifs.", + "help_text": "Forces PRODIGAL to a full scan for motifs rather than activating the Shine-Dalgarno RBS finder, the default scanner for PRODIGAL to train for motifs.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-n`", + "fa_icon": "fas fa-barcode" + } + }, + "fa_icon": "fas fa-file-signature" + }, + "annotation_pyrodigal": { + "title": "Annotation: Pyrodigal", + "type": "object", + "description": "Pyrodigal is a resource-optimized wrapper around Prodigal, producing protein-coding gene predictions of bacterial and archaeal genomes. Read more at the Pyrodigal GitHub repository (https://github.com/althonos/pyrodigal) or its documentation (https://pyrodigal.readthedocs.io).", + "default": "", + "properties": { + "annotation_pyrodigal_singlemode": { + "type": "boolean", + "fa_icon": "far fa-circle", + "description": "Specify whether to use Pyrodigal's single-genome mode for long sequences.", + "help_text": "By default Pyrodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s): \n> - PYRODIGAL: `-p`" + }, + "annotation_pyrodigal_closed": { + "type": "boolean", + "fa_icon": "fas fa-arrows-alt-h", + "description": "Does not allow partial genes on contig edges.", + "help_text": "Suppresses partial genes from being on contig edge, resulting in closed ends. Should only be activated for genomes where it is sure the first and last bases of the sequence(s) do not fall inside a gene. Run together with `-p single` .\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-c`" + }, + "annotation_pyrodigal_transtable": { + "type": "integer", + "default": 11, + "fa_icon": "fas fa-border-all", + "description": "Specifies the translation table used for gene annotation.", + "help_text": "Specifies which translation table should be used for seqeunce annotation. All possible genetic code translation tables can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). The default is set at 11, which is used for standard Bacteria/Archeae.\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-g`" + }, + "annotation_pyrodigal_forcenonsd": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "description": "Forces Pyrodigal to scan for motifs.", + "help_text": "Forces Pyrodigal to a full scan for motifs rather than activating the Shine-Dalgarno RBS finder, the default scanner for Pyrodigal to train for motifs.\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-n`" + } + }, + "fa_icon": "fas fa-file-signature" + }, + "database_downloading_options": { + "title": "Database downloading options", + "type": "object", + "description": "General options for database downloading", + "default": "", + "properties": { + "save_db": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Specify whether to save pipeline-downloaded databases in your results directory.", + "help_text": "While nf-core/funcscan can download databases for you, often these are very large and can significantly slow-down pipeline runtime if the databases have to be downloaded every run.\n\nSpecifying `--save_db` will save the pipeline-downloaded databases in your results directory. This applies to: AMRFinderPlus, antiSMASH, Bakta, CARD (for RGI), DeepARG, DeepBGC, and DRAMP (for AMPcombi2).\n\nYou can then move the resulting directories/files to a central cache directory of your choice for re-use in the future.\n\nIf you do not specify these flags, the database files will remain in your `work/` directory and will be deleted if `cleanup = true` is specified in your config, or if you run `nextflow clean`.\n" + } + }, + "fa_icon": "fas fa-database" + }, + "amp_amplify": { + "title": "AMP: AMPlify", + "type": "object", + "description": "Antimicrobial Peptide detection using a deep learning model. More info: https://github.com/bcgsc/AMPlify", + "default": "", + "properties": { + "amp_skip_amplify": { + "type": "boolean", + "description": "Skip AMPlify during AMP screening.", + "fa_icon": "fas fa-ban" + } + }, + "fa_icon": "fa fa-plus-square" + }, + "amp_ampir": { + "title": "AMP: ampir", + "type": "object", + "description": "Antimicrobial Peptide detection using machine learning. ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, 'precursor' and 'mature' that have been trained on publicly available antimicrobial peptide data. More info: https://github.com/Legana/ampir", + "default": "", + "properties": { + "amp_skip_ampir": { + "type": "boolean", + "description": "Skip ampir during AMP screening.", + "fa_icon": "fas fa-ban" + }, + "amp_ampir_model": { + "type": "string", + "default": "precursor", + "description": "Specify which machine learning classification model to use.", + "help_text": "Ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, \"precursor\" and \"mature\". \n\nThe precursor module is better for predicted proteins from a translated transcriptome or translated gene models. The alternative model (mature) is best suited for AMP sequences after post-translational processing, typically from direct proteomic sequencing.\n\nMore information can be found in the ampir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir: `model =`", + "enum": ["precursor", "mature"], + "fa_icon": "fas fa-layer-group" + }, + "amp_ampir_minlength": { + "type": "integer", + "default": 10, + "description": "Specify minimum protein length for prediction calculation.", + "help_text": "Filters result for minimum protein length.\nNote that amino acid sequences that are shorter than 10 amino acids long and/or contain anything other than the standard 20 amino acids are not evaluated and will contain an NA as their \"prob_AMP value.\"\n\nMore information can be found in the ampir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir parameter: `min_length` in the `calculate_features()` function", + "fa_icon": "fas fa-ruler-horizontal" + } + }, + "fa_icon": "fa fa-plus-square" + }, + "amp_hmmsearch": { + "title": "AMP: hmmsearch", + "type": "object", + "description": "Antimicrobial Peptide detection based on predefined HMM models. This tool implements methods using probabilistic models called profile hidden Markov models (profile HMMs) to search against a sequence database. More info: http://eddylab.org/software/hmmer/Userguide.pdf", + "default": "", + "properties": { + "amp_run_hmmsearch": { + "type": "boolean", + "description": "Run hmmsearch during AMP screening.", + "help_text": "hmmsearch is not run by default because HMM model files must be provided by the user with the flag `amp_hmmsearch_models`.", + "fa_icon": "fas fa-ban" + }, + "amp_hmmsearch_models": { + "type": "string", + "description": "Specify path to the AMP hmm model file(s) to search against. Must have quotes if wildcard used.", + "help_text": "hmmsearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter\n\ne.g. \n\n```\n--amp_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash! When using quotes, the absolute path to the HMM file(s) has to be given.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).", + "fa_icon": "fas fa-layer-group" + }, + "amp_hmmsearch_savealignments": { + "type": "boolean", + "help_text": "Save a multiple alignment of all significant hits (those satisfying inclusion thresholds) to a file\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - hmmsearch: `-A`", + "description": "Saves a multiple alignment of all significant hits to a file.", + "fa_icon": "fas fa-save" + }, + "amp_hmmsearch_savetargets": { + "type": "boolean", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-target output, with one data line per homologous target sequence found.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - hmmsearch: `--tblout`", + "description": "Save a simple tabular file summarising the per-target output.", + "fa_icon": "fas fa-save" + }, + "amp_hmmsearch_savedomains": { + "type": "boolean", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-domain output, with one data line per homologous domain detected in a query sequence for each homologous model.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - hmmsearch: `--domtblout`", + "description": "Save a simple tabular file summarising the per-domain output.", + "fa_icon": "fas fa-save" + } + }, + "fa_icon": "fa fa-plus-square", + "help_text": "HMMER/hmmsearch is used for searching sequence databases for sequence homologs, and for making sequence alignments. It implements methods using probabilistic models called profile hidden Markov models (profile HMMs). `hmmsearch` is used to search one or more profiles against a sequence database.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n" + }, + "amp_macrel": { + "title": "AMP: Macrel", + "type": "object", + "description": "Antimicrobial peptide detection from metagenomes. More info: https://github.com/BigDataBiology/macrel", + "default": "", + "properties": { + "amp_skip_macrel": { + "type": "boolean", + "description": "Skip Macrel during AMP screening.", + "fa_icon": "fas fa-ban" + } + }, + "fa_icon": "fa fa-plus-square" + }, + "amp_ampcombi2_parsetables": { + "title": "AMP: ampcombi2 parsetables", + "type": "object", + "description": "Antimicrobial peptides parsing, filtering, and annotating submodule of AMPcombi2. More info: https://github.com/Darcy220606/AMPcombi", + "default": "", + "properties": { + "amp_ampcombi_db_id": { + "type": "string", + "description": "The name of the database used to classify the AMPs.", + "help_text": "AMPcombi can use three different AMP databases to classify the recovered AMPS. These can either be: \n\n- [DRAMP database](http://dramp.cpu-bioinfor.org/downloads/): Only general AMPs are downloaded and filtered to remove any entry that has an instance of non amino acid residues in their sequence.\n\n- [APD](https://aps.unmc.edu/): Only experimentally validated AMPs are present.\n\n- [UniRef100](https://academic.oup.com/bioinformatics/article/23/10/1282/197795): Combines a more general protein dataset including curated and non curated AMPs. Helpful for identifying the clusters to remove any potential false positives. Beware: If the thresholds are for ampcombi are not strict enough, alignment with this database can take a long time. \n\nBy default this is set to 'DRAMP'. Other valid options include 'APD' or 'UniRef100'.\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables).", + "fa_icon": "fas fa-address-book", + "default": "DRAMP", + "enum": ["DRAMP", "APD", "UniRef100"] + }, + "amp_ampcombi_db": { + "type": "string", + "description": "The path to the folder containing the reference database files.", + "help_text": "The path to the folder containing the reference database files (`*.fasta` and `*.tsv`); a fasta file and the corresponding table with structural, functional and if reported taxonomic classifications. AMPcombi will then generate the corresponding `mmseqs2` directory, in which all binary files are prepared for the downstream alignment of teh recovered AMPs with [MMseqs2](https://github.com/soedinglab/MMseqs2). These can also be provided by the user by setting up an mmseqs2 compatible database using `mmseqs createdb *.fasta` in a directory called `mmseqs2`.\n\nExample file structure for the reference database supplied by the user:\n\n```bash\namp_DRAMP_database/\n\u251c\u2500\u2500 general_amps_2024_11_13.fasta\n\u251c\u2500\u2500 general_amps_2024_11_13.txt\n\u2514\u2500\u2500 mmseqs2\n \u251c\u2500\u2500 ref_DB\n \u251c\u2500\u2500 ref_DB.dbtype\n \u251c\u2500\u2500 ref_DB_h\n \u251c\u2500\u2500 ref_DB_h.dbtype\n \u251c\u2500\u2500 ref_DB_h.index\n \u251c\u2500\u2500 ref_DB.index\n \u251c\u2500\u2500 ref_DB.lookup\n \u2514\u2500\u2500 ref_DB.source\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables)." + }, + "amp_ampcombi_parsetables_cutoff": { + "type": "number", + "default": 0.6, + "description": "Specifies the prediction tools' cut-offs.", + "help_text": "This converts any prediction score below this cut-off to '0'. By doing so only values above this value will be used in the final AMPcombi2 summary table. This applies to all prediction tools except for hmmsearch, which uses e-value. To change the e-value cut-off use instead `--amp_ampcombi_parsetables_hmmevalue`.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--amp_cutoff`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_aalength": { + "type": "integer", + "default": 120, + "description": "Filter out all amino acid fragments shorter than this number.", + "help_text": "Any AMP hit that does not satisfy this length cut-off will be removed from the final AMPcombi2 summary table.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--aminoacid_length`", + "fa_icon": "fas fa-ruler-horizontal" + }, + "amp_ampcombi_parsetables_dbevalue": { + "type": "number", + "default": 5.0, + "description": "Remove all DRAMP annotations that have an e-value greater than this value.", + "help_text": "This e-value is used as a cut-off for the annotations from the internal Diamond alignment step (against the DRAMP database by default). Any e-value below this value will only remove the DRAMP classification and not the entire hit.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--db_evalue`", + "fa_icon": "fas fa-sort-numeric-down" + }, + "amp_ampcombi_parsetables_hmmevalue": { + "type": "number", + "default": 0.06, + "description": "Retain HMM hits that have an e-value lower than this.", + "help_text": "This converts any prediction score below this cut-off to '0'. By doing so only values above this value will be used in the final AMPcombi2 summary table. To change the prediction score cut-off for all other AMP prediction tools, use instead `--amp_cutoff`.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--hmm_evalue`", + "fa_icon": "fas fa-sort-numeric-up" + }, + "amp_ampcombi_parsetables_windowstopcodon": { + "type": "integer", + "default": 60, + "description": "Assign the number of codons used to look for stop codons, upstream and downstream of the AMP hit.", + "help_text": "This assigns the length of the window size required to look for stop codons downstream and upstream of the CDS hits. In the default case, it looks 60 codons downstream and upstream of the AMP hit and reports whether a stop codon was found.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--window_size_stop_codon`", + "fa_icon": "fas fa-stop-circle" + }, + "amp_ampcombi_parsetables_windowtransport": { + "type": "integer", + "default": 11, + "description": "Assign the number of CDSs upstream and downstream of the AMP to look for a transport protein.", + "help_text": "This assigns the length of the window size required to look for a 'transporter' (e.g. ABC transporter) downstream and upstream of the CDS hits. This is done on CDS classification level.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--window_size_transporter`", + "fa_icon": "fas fa-car-side" + }, + "amp_ampcombi_parsetables_removehitswostopcodons": { + "type": "boolean", + "description": "Remove hits that have no stop codon upstream and downstream of the AMP.", + "help_text": "Removes any hits/CDSs that don't have a stop codon found in the window downstream or upstream of the CDS assigned by `--amp_ampcombi_parsetables_windowstopcodon`. We recommend to turn it on if the results will be used downstream experimentally.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--remove_stop_codons`", + "fa_icon": "fas fa-ban" + }, + "amp_ampcombi_parsetables_ampir": { + "type": "string", + "default": ".ampir.tsv", + "description": "Assigns the file extension used to identify AMPIR output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--ampir_file`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_amplify": { + "type": "string", + "default": ".amplify.tsv", + "description": "Assigns the file extension used to identify AMPLIFY output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--amplify_file`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_macrel": { + "type": "string", + "default": ".macrel.prediction", + "description": "Assigns the file extension used to identify MACREL output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--macrel_file`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_hmmsearch": { + "type": "string", + "default": ".hmmer_hmmsearch.txt", + "description": "Assigns the file extension used to identify HMMER/HMMSEARCH output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--hmmsearch_file`", + "fa_icon": "fas fa-address-card" + } + }, + "fa_icon": "fa fa-plus-square" + }, + "amp_ampcombi2_cluster": { + "title": "AMP: ampcombi2 cluster", + "type": "object", + "description": "Clusters the AMP candidates identified with AMPcombi. More info: https://github.com/Darcy220606/AMPcombi", + "default": "", + "properties": { + "amp_ampcombi_cluster_covmode": { + "type": "number", + "default": 0.0, + "description": "MMseqs2 coverage mode.", + "help_text": "This assigns the coverage mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More details can be found in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_cov_mode`", + "fa_icon": "far fa-circle" + }, + "amp_ampcombi_cluster_sensitivity": { + "type": "number", + "default": 4.0, + "description": "Remove hits that have no stop codon upstream and downstream of the AMP.", + "help_text": "This assigns the sensitivity of alignment to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_sensitivity`", + "fa_icon": "fas fa-arrows-alt-h" + }, + "amp_ampcombi_cluster_minmembers": { + "type": "integer", + "default": 0, + "description": "Remove clusters that don't have more AMP hits than this number.", + "help_text": "Removes all clusters with this number of AMP hits and less.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_min_member`", + "fa_icon": "fas fa-book-dead" + }, + "amp_ampcombi_cluster_mode": { + "type": "number", + "default": 1.0, + "description": "MMseqs2 clustering mode.", + "help_text": "This assigns the cluster mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_mode`", + "fa_icon": "fas fa-circle" + }, + "amp_ampcombi_cluster_coverage": { + "type": "number", + "default": 0.8, + "description": "MMseqs2 alignment coverage.", + "help_text": "This assigns the coverage to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in[MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_coverage`", + "fa_icon": "far fa-arrow-alt-circle-right" + }, + "amp_ampcombi_cluster_seqid": { + "type": "number", + "default": 0.4, + "description": "MMseqs2 sequence identity.", + "help_text": "This assigns the cluster sequence identity to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_seq_id`", + "fa_icon": "far fa-address-card" + }, + "amp_ampcombi_cluster_removesingletons": { + "type": "boolean", + "description": "Remove any hits that form a single member cluster.", + "help_text": "Removes any AMP hits that form a single-member cluster.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_remove_singletons`", + "fa_icon": "fas fa-book-dead" + } + }, + "fa_icon": "fa fa-plus-square" + }, + "arg_amrfinderplus": { + "title": "ARG: AMRFinderPlus", + "type": "object", + "description": "Antimicrobial resistance gene detection based on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models. identifies AMR genes, resistance-associated point mutations, and select other classes of genes using protein annotations and/or assembled nucleotide sequences. More info: https://github.com/ncbi/amr/wiki", + "default": "", + "fa_icon": "fas fa-bacteria", + "properties": { + "arg_skip_amrfinderplus": { + "type": "boolean", + "description": "Skip AMRFinderPlus during the ARG screening.", + "fa_icon": "fas fa-ban" + }, + "arg_amrfinderplus_db": { + "type": "string", + "fa_icon": "fas fa-layer-group", + "help_text": "Specify the path to a local version of the ARMFinderPlus database.\n\nYou must give the `latest` directory to the pipeline, and the contents of the directory should include files such as `*.nbd`, `*.nhr`, `versions.txt` etc. in the top level.\n\nIf no input is given, the pipeline will download the database for you.\n\n See the nf-core/funcscan usage [documentation](https://nf-co.re/funcscan/usage) for more information.\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--database`", + "description": "Specify the path to a local version of the ARMFinderPlus database." + }, + "arg_amrfinderplus_identmin": { + "type": "number", + "default": -1.0, + "help_text": "Specify the minimum percentage amino-acid identity to reference protein or nucleotide identity for nucleotide reference must have if a BLAST alignment (based on methods: BLAST or PARTIAL) was detected, otherwise NA.\n\n If you specify `-1`, this means use a curated threshold if it exists and `0.9` otherwise.\n\nSetting this value to something other than `-1` will override any curated similarity cutoffs. For BLAST: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database. For PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--ident_min`", + "description": "Minimum percent identity to reference sequence.", + "fa_icon": "fas fa-angle-left" + }, + "arg_amrfinderplus_coveragemin": { + "type": "number", + "default": 0.5, + "description": "Minimum coverage of the reference protein.", + "help_text": "Minimum proportion of reference gene covered for a BLAST-based hit analysis if a BLAST alignment was detected, otherwise NA.\n\nFor BLAST-based hit analysis: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database or for PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--coverage_min`", + "fa_icon": "fas fa-arrow-alt-circle-down", + "minimum": 0, + "maximum": 1 + }, + "arg_amrfinderplus_translationtable": { + "type": "integer", + "default": 11, + "description": "Specify which NCBI genetic code to use for translated BLAST.", + "help_text": "NCBI genetic code for translated BLAST. Number from 1 to 33 to represent the translation table used for BLASTX.\n\nSee [translation table](https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for more details on which table to use. \n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--translation_table`", + "fa_icon": "fas fa-border-all", + "minimum": 1, + "maximum": 33 + }, + "arg_amrfinderplus_plus": { + "type": "boolean", + "description": "Add the plus genes to the report.", + "help_text": "Provide results from \"Plus\" genes in the output files.\n\nMostly the `plus` genes are an expanded set of genes that are of interest in pathogens. This set includes stress response (biocide, metal, and heat resistance), virulence factors, some antigens, and porins. These \"plus\" proteins have primarily been added to the database with curated BLAST cutoffs, and are generally identified by BLAST searches. Some of these may not be acquired genes or mutations, but may be intrinsic in some organisms. See [AMRFinderPlus database](https://github.com/ncbi/amr/wiki/AMRFinderPlus-database#types-of-proteins-covered) for more details.\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--plus`", + "fa_icon": "far fa-plus-square" + }, + "arg_amrfinderplus_name": { + "type": "boolean", + "description": "Add identified column to AMRFinderPlus output.", + "help_text": "Prepend a column containing an identifier for this run of AMRFinderPlus. For example this can be used to add a sample name column to the AMRFinderPlus results. If set to `true`, the `--name ` is the sample name. \n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--name`", + "fa_icon": "far fa-address-card" + } } }, - "reference_genome_options": { - "title": "Reference genome options", + "arg_deeparg": { + "title": "ARG: DeepARG", "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", + "description": "Antimicrobial resistance gene detection using a deep learning model. DeepARG is composed of two models for two types of input: short sequence reads and gene-like sequences. In this pipeline we use the `ls` model, which is suitable for annotating full sequence genes and to discover novel antibiotic resistance genes from assembled samples. The tool `Diamond` is used as an aligner. More info: https://bitbucket.org/gusphdproj/deeparg-ss/src/master", + "default": "", "properties": { - "genome": { + "arg_skip_deeparg": { + "type": "boolean", + "description": "Skip DeepARG during the ARG screening.", + "fa_icon": "fas fa-ban" + }, + "arg_deeparg_db": { "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "fa_icon": "fas fa-database", + "description": "Specify the path to the DeepARG database.", + "help_text": "Specify the path to a local version of the DeepARG database (see the pipelines' usage [documentation](https://nf-co.re/funcscan/dev/docs/usage#databases-and-reference-files)).\n\nThe contents of the directory should include directories such as `database`, `moderl`, and files such as `deeparg.gz` etc. in the top level.\n\nIf no input is given, the module will download the database for you, however this is not recommended, as the database is large and this will take time.\n\n> Modifies tool parameter(s):\n> - DeepARG: `--data-path`" + }, + "arg_deeparg_db_version": { + "type": "integer", + "default": 2, + "description": "Specify the numeric version number of a user supplied DeepaRG database.", + "fa_icon": "fas fa-code-branch", + "help_text": "The DeepARG tool itself does not report explicitly the database version it uses. We assume the latest version (as downloaded by the tool's database download module), however if you supply a different database, you must supply the version with this parameter for use with the downstream hAMRonization tool.\n\nThe version number must be without any leading `v` etc." }, - "fasta": { + "arg_deeparg_model": { "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "default": "LS", + "enum": ["LS", "SS"], + "description": "Specify which model to use (short or long sequences).", + "help_text": "Specify which model to use: short sequences for reads (`SS`), or long sequences for genes (`LS`). In the vast majority of cases we recommend using the `LS` model when using funcscan\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--model`", + "fa_icon": "fas fa-layer-group" + }, + "arg_deeparg_minprob": { + "type": "number", + "default": 0.8, + "description": "Specify minimum probability cutoff under which hits are discarded.", + "help_text": "Sets the minimum probability cutoff below which hits are discarded.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--min-prob`", + "fa_icon": "fas fa-dice" + }, + "arg_deeparg_alignmentevalue": { + "type": "number", + "default": 1e-10, + "description": "Specify E-value cutoff under which hits are discarded.", + "help_text": "Sets the cutoff value for Evalue below which hits are discarded.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-evalue`", + "fa_icon": "fas fa-align-center" + }, + "arg_deeparg_alignmentidentity": { + "type": "integer", + "default": 50, + "description": "Specify percent identity cutoff for sequence alignment under which hits are discarded.", + "help_text": "Sets the value for Identity cutoff for sequence alignment.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-identity`", + "fa_icon": "fas fa-align-center" + }, + "arg_deeparg_alignmentoverlap": { + "type": "number", + "default": 0.8, + "description": "Specify alignment read overlap.", + "help_text": "Sets the value for the allowed alignment read overlap.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-overlap`", + "fa_icon": "fas fa-align-center" + }, + "arg_deeparg_numalignmentsperentry": { + "type": "integer", + "default": 1000, + "description": "Specify minimum number of alignments per entry for DIAMOND step of DeepARG.", + "help_text": "Sets the value of minimum number of alignments per entry for DIAMOND.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-num-alignments-per-entry`", + "fa_icon": "far fa-gem" + } + }, + "fa_icon": "fas fa-bacteria" + }, + "arg_fargene": { + "title": "ARG: fARGene", + "type": "object", + "description": "Antimicrobial resistance gene detection using a deep learning model. The tool includes developed and optimised models for a number or resistance gene types, and the functionality to create and optimize models of your own choice of resistance genes. More info: https://github.com/fannyhb/fargene", + "default": "", + "properties": { + "arg_skip_fargene": { + "type": "boolean", + "description": "Skip fARGene during the ARG screening.", + "fa_icon": "fas fa-ban" + }, + "arg_fargene_hmmmodel": { + "type": "string", + "default": "class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme", + "pattern": "^(class_a|class_b_1_2|class_b_3|class_c|class_d_1|class_d_2|qnr|tet_efflux|tet_rpg|tet_enzyme)(,(class_a|class_b_1_2|class_b_3|class_c|class_d_1|class_d_2|qnr|tet_efflux|tet_rpg|tet_enzyme))*$", + "description": "Specify comma-separated list of which pre-defined HMM models to screen against", + "help_text": "Specify via a comma separated list any of the hmm-models of the pre-defined models:\n- Class A beta-lactamases: `class_a`\n- Subclass B1 and B2 beta-lactamases: `class_b_1_2`\n- Subclass B3 beta-lactamases: `class_b_3`\n- Class C beta-lactamases: `class_c`\n- Class D beta-lactamases: `class_d_1`, `class_d_2`\n- qnr: `qnr`\n- Tetracycline resistance genes `tet_efflux`, `tet_rpg`, `tet_enzyme`\n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n For example: `--arg_fargenemodel 'class_a,qnr,tet_enzyme'`\n\n>Modifies tool parameter(s):\n> - fARGene: `--hmm-model`", + "fa_icon": "fas fa-layer-group" + }, + "arg_fargene_savetmpfiles": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Specify to save intermediate temporary files to results directory.", + "help_text": "fARGene generates many additional temporary files which in most cases won't be useful and thus by default are not saved to the pipeline's result directory.\n\nBy specifying this parameter, the directories `tmpdir/`, `hmmsearchresults/` and `spades_assemblies/` will be also saved in the output directory for closer inspection by the user, if necessary." + }, + "arg_fargene_score": { + "type": "number", + "help_text": "The threshold score for a sequence to be classified as a (almost) complete gene. If not pre-assigned, it is assigned by the hmm_model used based on the trade-off between sensitivity and specificity.\n\nFor more details see code [documentation](https://github.com/fannyhb/fargene/blob/master/fargene_analysis/fargene_analysis.py).\n\n> Modifies tool parameter(s):\n> - fARGene: `--score`", + "description": "The threshold score for a sequence to be classified as a (almost) complete gene.", + "fa_icon": "fab fa-creative-commons-zero" + }, + "arg_fargene_minorflength": { + "type": "integer", + "default": 90, + "help_text": "The minimum length of a predicted ORF retrieved from annotating the nucleotide sequences. By default the pipeline assigns this to 90% of the assigned hmm_model sequence length. \n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--min-orf-length`", + "description": "The minimum length of a predicted ORF retrieved from annotating the nucleotide sequences.", + "fa_icon": "fas fa-ruler-horizontal", + "minimum": 1, + "maximum": 100 + }, + "arg_fargene_orffinder": { + "type": "boolean", + "description": "Defines which ORF finding algorithm to use.", + "help_text": "By default, pipeline uses prodigal/prokka for the prediction of ORFs from nucleotide sequences. Another option is the NCBI ORFfinder tool that is built into fARGene, the use of which is activated by this flag.\n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--orf-finder`", + "fa_icon": "fas fa-project-diagram" + }, + "arg_fargene_translationformat": { + "type": "string", + "default": "pearson", + "description": "The translation table/format to use for sequence annotation.", + "help_text": "The translation format that transeq should use for amino acid annotation from the nucleotide sequences. More sequence formats can be found in [transeq 'input sequence formats'](https://emboss.sourceforge.net/docs/themes/SequenceFormats.html).\n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--translation-format`", + "fa_icon": "fas fa-border-none" + } + }, + "fa_icon": "fas fa-bacteria" + }, + "arg_rgi": { + "title": "ARG: RGI", + "type": "object", + "description": "Antimicrobial resistance gene detection, based on alignment to the CARD database based on homology and SNP models. More info: https://github.com/arpcard/rgi", + "default": "", + "properties": { + "arg_skip_rgi": { + "type": "boolean", + "description": "Skip RGI during the ARG screening.", + "fa_icon": "fas fa-ban" + }, + "arg_rgi_db": { + "type": "string", + "description": "Path to user-defined local CARD database.", + "fa_icon": "fas fa-database", + "help_text": "You can pre-download the CARD database to your machine and pass the path of it to this parameter.\n\nThe contents of the directory should include files such as `card.json`, `aro_index.tsv`, `snps.txt` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#rgi) for details on how to download this.\n\n> Modifies tool parameter(s):\n> - RGI_CARDANNOTATION: `--input`" + }, + "arg_rgi_savejson": { + "type": "boolean", + "description": "Save RGI output .json file.", + "help_text": "When activated, this flag saves the `.json` file in the RGI output directory. The `.json` file contains the ARG predictions in a format that can be can be uploaded to the CARD website for visualization. See [RGI documentation](https://github.com/arpcard/rgi) for more details. By default, the `.json` file is generated in the working directory but not saved in the results directory to save disk space (`.json` file is quite large and not required downstream in the pipeline). ", + "fa_icon": "fas fa-save" + }, + "arg_rgi_savetmpfiles": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Specify to save intermediate temporary files in the results directory.", + "help_text": "RGI generates many additional temporary files which in most cases won't be useful, thus are not saved by default.\n\nBy specifying this parameter, files including `temp` in their name will be also saved in the output directory for closer inspection by the user." + }, + "arg_rgi_alignmenttool": { + "type": "string", + "default": "BLAST", + "description": "Specify the alignment tool to be used.", + "help_text": "Specifies the alignment tool to be used. By default RGI runs BLAST and this is also set as default in the nf-core/funcscan pipeline. With this flag the user can choose between BLAST and DIAMOND for the alignment step.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--alignment_tool`", + "enum": ["BLAST", "DIAMOND"], + "fa_icon": "fas fa-align-justify" + }, + "arg_rgi_includeloose": { + "type": "boolean", + "description": "Include all of loose, strict and perfect hits (i.e. \u2265 95% identity) found by RGI.", + "help_text": "When activated RGI output will include 'Loose' hits in addition to 'Strict' and 'Perfect' hits. The 'Loose' algorithm works outside of the detection model cut-offs to provide detection of new, emergent threats and more distant homologs of AMR genes, but will also catalog homologous sequences and spurious partial matches that may not have a role in AMR.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_loose`", + "fa_icon": "far fa-hand-scissors" + }, + "arg_rgi_includenudge": { + "type": "boolean", + "description": "Suppresses the default behaviour of RGI with `--arg_rgi_includeloose`.", + "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of \u2265 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`", + "fa_icon": "fas fa-hand-scissors" + }, + "arg_rgi_lowquality": { + "type": "boolean", + "description": "Include screening of low quality contigs for partial genes.", + "help_text": "This flag should be used only when the contigs are of poor quality (e.g. short) to predict partial genes.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--low_quality`", + "fa_icon": "fas fa-angle-double-down" + }, + "arg_rgi_data": { + "type": "string", + "default": "NA", + "description": "Specify a more specific data-type of input (e.g. plasmid, chromosome).", + "help_text": "This flag is used to specify the data type used as input to RGI. By default this is set as 'NA', which makes no assumptions on input data.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--data`", + "enum": ["NA", "wgs", "plasmid", "chromosome"], + "fa_icon": "fas fa-dna" }, - "igenomes_ignore": { + "arg_rgi_split_prodigal_jobs": { + "type": "boolean", + "description": "Run multiple prodigal jobs simultaneously for contigs in a fasta file.", + "help_text": "For more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\nModifies tool parameter:\n> - RGI_MAIN: `--split_prodigal_jobs`", + "fa_icon": "fas fa-angle-double-down", + "default": true + } + }, + "fa_icon": "fas fa-bacteria" + }, + "arg_abricate": { + "title": "ARG: ABRicate", + "type": "object", + "description": "Antimicrobial resistance gene detection based on alignment to CBI, CARD, ARG-ANNOT, ResFinder, MEGARES, EcOH, PlasmidFinder, Ecoli_VF and VFDB. More info: https://github.com/tseemann/abricate", + "default": "", + "fa_icon": "fas fa-bacteria", + "properties": { + "arg_skip_abricate": { "type": "boolean", - "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "description": "Skip ABRicate during the ARG screening." }, - "igenomes_base": { + "arg_abricate_db_id": { "type": "string", - "format": "directory-path", - "description": "The base path to the igenomes reference files", + "default": "ncbi", + "fa_icon": "fas fa-database", + "description": "Specify the name of the ABRicate database to use. Names of non-default databases can be supplied if `--arg_abricate_db` provided.", + "help_text": "Specifies which database to use from dedicated list of databases available by ABRicate.\n\nDefault supported are one of: `argannot`, `card`, `ecoh`, `ecoli_vf`, `megares`, `ncbi`, `plasmidfinder`, `resfinder`, `vfdb`. Other options can be supplied if you have installed a custom one within the directory you have supplied to `--arg_abricate_db`.\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--db`" + }, + "arg_abricate_db": { + "type": "string", + "description": "Path to user-defined local ABRicate database directory for using custom databases.", + "fa_icon": "far fa-folder-open", + "help_text": "Supply this only if you want to use additional custom databases you yourself have added to your ABRicate installation following the instructions [here](https://github.com/tseemann/abricate?tab=readme-ov-file#making-your-own-database).\n\nThe contents of the directory should have a directory named with the database name in the top level (e.g. `bacmet2/`).\n\nYou must also specify the name of the custom database with `--arg_abricate_db_id`.\n\n> Modifies tool parameter(s):\n> - ABRicate: `--datadir`" + }, + "arg_abricate_minid": { + "type": "integer", + "default": 80, + "description": "Minimum percent identity of alignment required for a hit to be considered.", + "help_text": "Specifies the minimum percent identity used to classify an ARG hit using BLAST alignment.\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--minid`", + "fa_icon": "far fa-arrow-alt-circle-down", + "minimum": 1, + "maximum": 100 + }, + "arg_abricate_mincov": { + "type": "integer", + "default": 80, + "description": "Minimum percent coverage of alignment required for a hit to be considered.", + "help_text": "Specifies the minimum coverage of the nucleotide sequence to be assigned an ARG hit using BLAST alignment. In the ABRicate matrix, an absent gene is assigned (`.`) and if present, it is assigned the estimated coverage (`#`).\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--mincov`", + "fa_icon": "far fa-arrow-alt-circle-down", + "minimum": 1, + "maximum": 100 + } + } + }, + "arg_hamronization": { + "title": "ARG: hAMRonization", + "type": "object", + "description": "Influences parameters required for the ARG summary by hAMRonization.", + "default": "", + "properties": { + "arg_hamronization_summarizeformat": { + "type": "string", + "default": "tsv", + "enum": ["interactive", "tsv", "json"], + "help_text": "Specifies which summary report format to apply with `hamronize summarize`: tsv, json or interactive (html)\n\n> Modifies tool parameter(s)\n> - hamronize summarize: `-t`, `--summary_type`", + "description": "Specifies summary output format.", + "fa_icon": "far fa-file-code" + } + }, + "fa_icon": "fas fa-bacteria", + "help_text": "" + }, + "arg_argnorm": { + "title": "ARG: argNorm", + "type": "object", + "description": "Influences parameters required for the normalization of ARG annotations by argNorm. More info: https://github.com/BigDataBiology/argNorm", + "default": "", + "properties": { + "arg_skip_argnorm": { + "type": "boolean", "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" + "description": "Skip argNorm during ARG screening." + } + }, + "fa_icon": "fas fa-bacteria" + }, + "bgc_general_options": { + "title": "BGC: general options", + "type": "object", + "description": "These parameters influence general BGC settings like minimum input sequence length.", + "default": "", + "fa_icon": "fa fa-sliders", + "properties": { + "bgc_mincontiglength": { + "type": "integer", + "default": 3000, + "fa_icon": "fas fa-ruler-horizontal", + "description": "Specify the minimum length of contigs that go into BGC screening.", + "help_text": "Specify the minimum length of contigs that go into BGC screening.\n\nIf BGC screening is turned on, nf-core/funcscan will generate for each input sample a second FASTA file of only contigs that are longer than the specified minimum length.\nThis is due to an (approximate) 'biological' minimum length that nucleotide sequences would need to have to code for a valid BGC (e.g. not on the edge of a contig), as well as to speeding up BGC screening sections of the pipeline by screening only meaningful contigs.\n\nNote this only affects BGCs. For ARG and AMPs no filtering is performed and all contigs are screened." + }, + "bgc_savefilteredcontigs": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Specify to save the length-filtered (unannotated) FASTAs used for BGC screening." } } }, + "bgc_antismash": { + "title": "BGC: antiSMASH", + "type": "object", + "description": "Biosynthetic gene cluster detection. More info: https://docs.antismash.secondarymetabolites.org", + "default": "", + "properties": { + "bgc_skip_antismash": { + "type": "boolean", + "description": "Skip antiSMASH during the BGC screening.", + "fa_icon": "fas fa-ban" + }, + "bgc_antismash_db": { + "type": "string", + "description": "Path to user-defined local antiSMASH database.", + "fa_icon": "fas fa-database", + "help_text": "It is recommend to pre-download the antiSMASH databases to your machine and pass the path of it to this parameter, as this can take a long time to download - particularly when running lots of pipeline runs.\n\nThe contents of the database directory should include directories such as `as-js/`, `clusterblast/`, `clustercompare/` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information." + }, + "bgc_antismash_installdir": { + "type": "string", + "description": "Path to user-defined local antiSMASH directory. Only required when running with docker/singularity.", + "fa_icon": "far fa-folder-open", + "help_text": "This is required when running with **docker and singularity** (not required for conda), due to attempted 'modifications' of files during database checks in the installation directory, something that cannot be done in immutable docker/singularity containers.\n\nTherefore, a local installation directory needs to be mounted (including all modified files from the downloading step) to the container as a workaround.\n\nThe contents of the installation directory should include directories such as `common/` `config/` and files such as `custom_typing.py` `custom_typing.pyi` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information." + }, + "bgc_antismash_contigminlength": { + "type": "integer", + "default": 3000, + "description": "Minimum length a contig must have to be screened with antiSMASH.", + "fa_icon": "fas fa-align-center", + "help_text": "This specifies the minimum length that a contig must have for the contig to be screened by antiSMASH.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\nThis will only apply to samples that are screened with antiSMASH (i.e., those samples that have not been removed by `--bgc_antismash_sampleminlength`).\n\nYou may wish to increase this value compared to that of `--bgc_antismash_sampleminlength`, in cases where you wish to screen higher-quality (i.e. longer) contigs, or speed up runs by not screening lower quality/less informative contigs.\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--minlength`" + }, + "bgc_antismash_cbgeneral": { + "type": "boolean", + "description": "Turn on clusterblast comparison against database of antiSMASH-predicted clusters.", + "help_text": "Compare identified clusters against a database of antiSMASH-predicted clusters using the clusterblast algorithm.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-general`", + "fa_icon": "fab fa-affiliatetheme" + }, + "bgc_antismash_cbknownclusters": { + "type": "boolean", + "description": "Turn on clusterblast comparison against known gene clusters from the MIBiG database.", + "fa_icon": "fas fa-puzzle-piece", + "help_text": "This will turn on comparing identified clusters against known gene clusters from the MIBiG database using the clusterblast algorithm.\n\n[MIBiG](https://mibig.secondarymetabolites.org/) is a curated database of experimentally characterised gene clusters and with rich associated metadata.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-knownclusters`" + }, + "bgc_antismash_cbsubclusters": { + "type": "boolean", + "description": "Turn on clusterblast comparison against known subclusters responsible for synthesising precursors.", + "fa_icon": "fas fa-adjust", + "help_text": "Turn on additional screening for operons involved in the biosynthesis of early secondary metabolites components using the clusterblast algorithm.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-subclusters`" + }, + "bgc_antismash_ccmibig": { + "type": "boolean", + "description": "Turn on ClusterCompare comparison against known gene clusters from the MIBiG database.", + "fa_icon": "fab fa-affiliatetheme", + "help_text": "Turn on comparison of detected genes against the MIBiG database using the ClusterCompare algorithm - an alternative to clusterblast.\n\nNote there will not be a dedicated ClusterCompare output in the antiSMASH results directory, but is present in the HTML.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cc-mibig`" + }, + "bgc_antismash_smcogtrees": { + "type": "boolean", + "description": "Generate phylogenetic trees of secondary metabolite group orthologs.", + "fa_icon": "fas fa-tree", + "help_text": "Turning this on will activate the generation of additional functional and phylogenetic analysis of genes, via comparison against databases of protein orthologs.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-smcog-trees`" + }, + "bgc_antismash_hmmdetectionstrictness": { + "type": "string", + "default": "relaxed", + "description": "Defines which level of strictness to use for HMM-based cluster detection.", + "help_text": "Levels of strictness correspond to screening different groups of 'how well-defined' clusters are. For example, `loose` will include screening for 'poorly defined' clusters (e.g. saccharides), `relaxed` for partially present clusters (e.g. certain types of NRPS), whereas `strict` will screen for well-defined clusters such as Ketosynthases.\n\nYou can see the rules for the levels of strictness [here](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/cluster_rules).\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--hmmdetection-strictness`", + "fa_icon": "fas fa-search", + "enum": ["relaxed", "strict", "loose"] + }, + "bgc_antismash_pfam2go": { + "type": "boolean", + "description": "Run Pfam to Gene Ontology mapping module.", + "help_text": "This maps the proteins to Pfam database to annotate BGC modules with functional information based on the protein families they contain. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--pfam2go`", + "fa_icon": "fas fa-search" + }, + "bgc_antismash_rre": { + "type": "boolean", + "description": "Run RREFinder precision mode on all RiPP gene clusters.", + "help_text": "This enables the prediction of regulatory elements on the BGC that help in the control of protein expression. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--rre`", + "fa_icon": "fas fa-search" + }, + "bgc_antismash_taxon": { + "type": "string", + "default": "bacteria", + "description": "Specify which taxonomic classification of input sequence to use.", + "help_text": "This specifies which set of secondary metabolites to screen for, based on the taxon type the secondary metabolites are from.\n\nThis will run different pipelines depending on whether the input sequences are from bacteria or fungi.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--taxon`", + "fa_icon": "fas fa-bacteria", + "enum": ["bacteria", "fungi"] + }, + "bgc_antismash_tfbs": { + "type": "boolean", + "description": "Run TFBS finder on all gene clusters.", + "help_text": "This enables the prediction of transcription factor binding sites which control the gene expression. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--tfbs`", + "fa_icon": "fas fa-search" + } + }, + "fa_icon": "fa fa-sliders" + }, + "bgc_deepbgc": { + "title": "BGC: DeepBGC", + "type": "object", + "description": "A deep learning genome-mining strategy for biosynthetic gene cluster prediction. More info: https://github.com/Merck/deepbgc/tree/master/deepbgc", + "default": "", + "properties": { + "bgc_skip_deepbgc": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "Skip DeepBGC during the BGC screening." + }, + "bgc_deepbgc_db": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "Path to local DeepBGC database folder.", + "help_text": "The contents of the database directory should include directories such as `common`, `0.1.0` in the top level.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: environment variable `DEEPBGC_DOWNLOADS_DIR`" + }, + "bgc_deepbgc_score": { + "type": "number", + "default": 0.5, + "description": "Average protein-wise DeepBGC score threshold for extracting BGC regions from Pfam sequences.", + "fa_icon": "fas fa-list-ol", + "help_text": "The DeepBGC score threshold for extracting BGC regions from Pfam sequences based on average protein-wise value.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--score`" + }, + "bgc_deepbgc_prodigalsinglemode": { + "type": "boolean", + "description": "Run DeepBGC's internal Prodigal step in `single` mode to restrict detecting genes to long contigs", + "help_text": "By default DeepBGC's Prodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--prodigal-meta-mode`", + "fa_icon": "fas fa-compress-alt" + }, + "bgc_deepbgc_mergemaxproteingap": { + "type": "integer", + "default": 0, + "description": "Merge detected BGCs within given number of proteins.", + "fa_icon": "fas fa-angle-double-up", + "help_text": "Merge detected BGCs within given number of proteins.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--merge-max-protein-gap`" + }, + "bgc_deepbgc_mergemaxnuclgap": { + "type": "integer", + "default": 0, + "description": "Merge detected BGCs within given number of nucleotides.", + "fa_icon": "fas fa-angle-double-up", + "help_text": "Merge detected BGCs within given number of nucleotides.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--merge-max-nucl-gap`" + }, + "bgc_deepbgc_minnucl": { + "type": "integer", + "default": 1, + "description": "Minimum BGC nucleotide length.", + "fa_icon": "fas fa-angle-double-down", + "help_text": "Minimum length a BGC must have (in bp) to be reported as detected.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--min-nucl`" + }, + "bgc_deepbgc_minproteins": { + "type": "integer", + "default": 1, + "description": "Minimum number of proteins in a BGC.", + "fa_icon": "fas fa-angle-double-down", + "help_text": "Minimum number of proteins in a BGC must have to be reported as 'detected'.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--min-proteins`" + }, + "bgc_deepbgc_mindomains": { + "type": "integer", + "default": 1, + "description": "Minimum number of protein domains in a BGC.", + "fa_icon": "fas fa-minus-square", + "help_text": "Minimum number of domains a BGC must have to be reported as 'detected'.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--min-domains`" + }, + "bgc_deepbgc_minbiodomains": { + "type": "integer", + "default": 0, + "description": "Minimum number of known biosynthetic (as defined by antiSMASH) protein domains in a BGC.", + "fa_icon": "fas fa-minus-square", + "help_text": "Minimum number of biosynthetic protein domains a BGC must have to be reported as 'detected'. This is based on antiSMASH definitions.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--min-bio-domains`" + }, + "bgc_deepbgc_classifierscore": { + "type": "number", + "default": 0.5, + "description": "DeepBGC classification score threshold for assigning classes to BGCs.", + "fa_icon": "fas fa-sort-amount-down", + "help_text": "DeepBGC classification score threshold for assigning classes to BGCs.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--classifier-score`" + } + }, + "fa_icon": "fa fa-sliders" + }, + "bgc_gecco": { + "title": "BGC: GECCO", + "type": "object", + "description": "Biosynthetic gene cluster detection using Conditional Random Fields (CRFs). More info: https://gecco.embl.de", + "default": "", + "properties": { + "bgc_skip_gecco": { + "type": "boolean", + "description": "Skip GECCO during the BGC screening.", + "fa_icon": "fas fa-ban" + }, + "bgc_gecco_mask": { + "type": "boolean", + "description": "Enable unknown region masking to prevent genes from stretching across unknown nucleotides.", + "fa_icon": "fas fa-mask", + "help_text": "Enable unknown region masking to prevent genes from stretching across unknown nucleotides during ORF detection based on P(y)rodigal.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--mask`" + }, + "bgc_gecco_cds": { + "type": "integer", + "default": 3, + "description": "The minimum number of coding sequences a valid cluster must contain.", + "fa_icon": "fas fa-align-right", + "help_text": "Specify the number of consecutive genes a hit must have to be considered as part of a possible BGC region during BGC extraction.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--cds`" + }, + "bgc_gecco_pfilter": { + "type": "number", + "description": "The p-value cutoff for protein domains to be included.", + "fa_icon": "fas fa-filter", + "default": 1e-9, + "help_text": "The p-value cutoff for protein domains to be included.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--pfilter`" + }, + "bgc_gecco_threshold": { + "type": "number", + "default": 0.8, + "description": "The probability threshold for cluster detection.", + "fa_icon": "fas fa-angle-double-up", + "help_text": "Specify the minimum probability a predicted gene must have to be considered as part of a BGC during BGC extraction.\n\nReducing this value may increase number and length of hits, but will reduce the accuracy of the predictions.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--threshold`" + }, + "bgc_gecco_edgedistance": { + "type": "integer", + "default": 0, + "description": "The minimum number of annotated genes that must separate a cluster from the edge.", + "help_text": "The minimum number of annotated genes that must separate a possible BGC cluster from the edge. Edge clusters will still be included if they are longer. A lower number will increase the number of false positives on small contigs. Used during BGC extraction.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--edge-distance`", + "fa_icon": "fas fa-ruler-horizontal" + } + }, + "fa_icon": "fa fa-sliders" + }, + "bgc_hmmsearch": { + "title": "BGC: hmmsearch", + "type": "object", + "description": "Biosynthetic Gene Cluster detection based on predefined HMM models. This tool implements methods using probabilistic models called profile hidden Markov models (profile HMMs) to search against a sequence database. More info: http://eddylab.org/software/hmmer/Userguide.pdf", + "default": "", + "properties": { + "bgc_run_hmmsearch": { + "type": "boolean", + "description": "Run hmmsearch during BGC screening.", + "help_text": "hmmsearch is not run by default because HMM model files must be provided by the user with the flag `bgc_hmmsearch_models`.", + "fa_icon": "fas fa-ban" + }, + "bgc_hmmsearch_models": { + "type": "string", + "description": "Specify path to the BGC hmm model file(s) to search against. Must have quotes if wildcard used.", + "help_text": "hmmsearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter, e.g.:\n\n```\n--bgc_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash! When using quotes, the absolute path to the HMM file(s) has to be given.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).", + "fa_icon": "fas fa-layer-group" + }, + "bgc_hmmsearch_savealignments": { + "type": "boolean", + "help_text": "Save a multiple alignment of all significant hits (those satisfying inclusion thresholds) to a file.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - hmmsearch: `-A`", + "description": "Saves a multiple alignment of all significant hits to a file.", + "fa_icon": "fas fa-save" + }, + "bgc_hmmsearch_savetargets": { + "type": "boolean", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-target output, with one data line per homologous target sequence found.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - hmmsearch: `--tblout`", + "description": "Save a simple tabular file summarising the per-target output.", + "fa_icon": "fas fa-save" + }, + "bgc_hmmsearch_savedomains": { + "type": "boolean", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-domain output, with one data line per homologous domain detected in a query sequence for each homologous model.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - hmmsearch:`--domtblout`", + "description": "Save a simple tabular file summarising the per-domain output.", + "fa_icon": "fas fa-save" + } + }, + "fa_icon": "fa fa-sliders" + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -233,7 +1546,88 @@ "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/$defs/reference_genome_options" + "$ref": "#/$defs/screening_type_activation" + }, + { + "$ref": "#/$defs/taxonomic_classification_general_options" + }, + { + "$ref": "#/$defs/taxonomic_classification_mmseqs2_databases" + }, + { + "$ref": "#/$defs/taxonomic_classification_mmseqs2_taxonomy" + }, + { + "$ref": "#/$defs/annotation_general_options" + }, + { + "$ref": "#/$defs/annotation_bakta" + }, + { + "$ref": "#/$defs/annotation_prokka" + }, + { + "$ref": "#/$defs/annotation_prodigal" + }, + { + "$ref": "#/$defs/annotation_pyrodigal" + }, + { + "$ref": "#/$defs/database_downloading_options" + }, + { + "$ref": "#/$defs/amp_amplify" + }, + { + "$ref": "#/$defs/amp_ampir" + }, + { + "$ref": "#/$defs/amp_hmmsearch" + }, + { + "$ref": "#/$defs/amp_macrel" + }, + { + "$ref": "#/$defs/amp_ampcombi2_parsetables" + }, + { + "$ref": "#/$defs/amp_ampcombi2_cluster" + }, + { + "$ref": "#/$defs/arg_amrfinderplus" + }, + { + "$ref": "#/$defs/arg_deeparg" + }, + { + "$ref": "#/$defs/arg_fargene" + }, + { + "$ref": "#/$defs/arg_rgi" + }, + { + "$ref": "#/$defs/arg_abricate" + }, + { + "$ref": "#/$defs/arg_hamronization" + }, + { + "$ref": "#/$defs/arg_argnorm" + }, + { + "$ref": "#/$defs/bgc_general_options" + }, + { + "$ref": "#/$defs/bgc_antismash" + }, + { + "$ref": "#/$defs/bgc_deepbgc" + }, + { + "$ref": "#/$defs/bgc_gecco" + }, + { + "$ref": "#/$defs/bgc_hmmsearch" }, { "$ref": "#/$defs/institutional_config_options" diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..03cfcfa8 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,19 @@ +config { + // location for all nf-tests + testsDir "tests/" + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of library folder that is added automatically to the classpath + libDir "tests/pipeline/lib/" + + // location of an optional nextflow.config file specific for executing tests + configFile "nextflow.config" + + // run all test with the defined docker profile from the main nextflow.config + profile "" + + // add Nextflow options + //options "-resume" +} diff --git a/subworkflows/local/amp.nf b/subworkflows/local/amp.nf new file mode 100644 index 00000000..293692a1 --- /dev/null +++ b/subworkflows/local/amp.nf @@ -0,0 +1,156 @@ +/* + Run AMP screening tools +*/ + +include { MACREL_CONTIGS } from '../../modules/nf-core/macrel/contigs/main' +include { HMMER_HMMSEARCH as AMP_HMMER_HMMSEARCH } from '../../modules/nf-core/hmmer/hmmsearch/main' +include { AMPLIFY_PREDICT } from '../../modules/nf-core/amplify/predict/main' +include { AMPIR } from '../../modules/nf-core/ampir/main' +include { AMP_DATABASE_DOWNLOAD } from '../../modules/local/amp_database_download' +include { AMPCOMBI2_PARSETABLES } from '../../modules/nf-core/ampcombi2/parsetables' +include { AMPCOMBI2_COMPLETE } from '../../modules/nf-core/ampcombi2/complete' +include { AMPCOMBI2_CLUSTER } from '../../modules/nf-core/ampcombi2/cluster' +include { GUNZIP as GUNZIP_MACREL_PRED ; GUNZIP as GUNZIP_MACREL_ORFS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as AMP_GUNZIP_HMMER_HMMSEARCH } from '../../modules/nf-core/gunzip/main' +include { TABIX_BGZIP as AMP_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_AMPCOMBI } from '../../modules/local/merge_taxonomy_ampcombi' + +workflow AMP { + take: + fastas // tuple val(meta), path(contigs) + faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + gbks // tuple val(meta), path(ANNOTATION_ANNOTATION_TOOL.out.gbk) + + main: + ch_versions = Channel.empty() + ch_ampresults_for_ampcombi = Channel.empty() + ch_ampcombi_summaries = Channel.empty() + ch_macrel_faa = Channel.empty() + ch_ampcombi_complete = Channel.empty() + ch_ampcombi_for_cluster = Channel.empty() + + // When adding new tool that requires FAA, make sure to update conditions + // in funcscan.nf around annotation and AMP subworkflow execution + // to ensure annotation is executed! + ch_faa_for_amplify = faas + ch_faa_for_amp_hmmsearch = faas + ch_faa_for_ampir = faas + ch_faa_for_ampcombi = faas + ch_gbk_for_ampcombi = gbks + + // AMPLIFY + if ( !params.amp_skip_amplify ) { + AMPLIFY_PREDICT ( ch_faa_for_amplify, [] ) + ch_versions = ch_versions.mix( AMPLIFY_PREDICT.out.versions ) + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( AMPLIFY_PREDICT.out.tsv ) + } + + // MACREL + if ( !params.amp_skip_macrel ) { + MACREL_CONTIGS ( fastas ) + ch_versions = ch_versions.mix( MACREL_CONTIGS.out.versions ) + GUNZIP_MACREL_PRED ( MACREL_CONTIGS.out.amp_prediction ) + GUNZIP_MACREL_ORFS ( MACREL_CONTIGS.out.all_orfs ) + ch_versions = ch_versions.mix( GUNZIP_MACREL_PRED.out.versions ) + ch_versions = ch_versions.mix( GUNZIP_MACREL_ORFS.out.versions ) + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( GUNZIP_MACREL_PRED.out.gunzip ) + ch_macrel_faa = ch_macrel_faa.mix( GUNZIP_MACREL_ORFS.out.gunzip ) + ch_faa_for_ampcombi = ch_faa_for_ampcombi.mix( ch_macrel_faa ) + } + + // AMPIR + if ( !params.amp_skip_ampir ) { + AMPIR ( ch_faa_for_ampir, params.amp_ampir_model, params.amp_ampir_minlength, 0.0 ) + ch_versions = ch_versions.mix( AMPIR.out.versions ) + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( AMPIR.out.amps_tsv ) + } + + // HMMSEARCH + if ( params.amp_run_hmmsearch ) { + if ( params.amp_hmmsearch_models ) { ch_amp_hmm_models = Channel.fromPath( params.amp_hmmsearch_models, checkIfExists: true ) } else { error('[nf-core/funcscan] error: HMM model files not found for --amp_hmmsearch_models! Please check input.') } + + ch_amp_hmm_models_meta = ch_amp_hmm_models + .map { + file -> + def meta = [:] + meta['id'] = file.extension == 'gz' ? file.name - '.hmm.gz' : file.name - '.hmm' + [ meta, file ] + } + + ch_in_for_amp_hmmsearch = ch_faa_for_amp_hmmsearch + .combine( ch_amp_hmm_models_meta ) + .map { + meta_faa, faa, meta_hmm, hmm -> + def meta_new = [:] + meta_new['id'] = meta_faa['id'] + meta_new['hmm_id'] = meta_hmm['id'] + [ meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains ] + } + + AMP_HMMER_HMMSEARCH ( ch_in_for_amp_hmmsearch ) + ch_versions = ch_versions.mix( AMP_HMMER_HMMSEARCH.out.versions ) + AMP_GUNZIP_HMMER_HMMSEARCH ( AMP_HMMER_HMMSEARCH.out.output ) + ch_versions = ch_versions.mix( AMP_GUNZIP_HMMER_HMMSEARCH.out.versions ) + ch_AMP_GUNZIP_HMMER_HMMSEARCH = AMP_GUNZIP_HMMER_HMMSEARCH.out.gunzip + .map { meta, file -> + [ [id: meta.id], file ] + } + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( ch_AMP_GUNZIP_HMMER_HMMSEARCH ) + } + + // AMPCOMBI2 + ch_input_for_ampcombi = ch_ampresults_for_ampcombi + .groupTuple() + .join( ch_faa_for_ampcombi ) + .join( ch_gbk_for_ampcombi ) + .multiMap{ + input: [ it[0], it[1] ] + faa: it[2] + gbk: it[3] + } + + if ( params.amp_ampcombi_db != null ) { + AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, params.amp_ampcombi_db, [] ) + } else { + AMP_DATABASE_DOWNLOAD( params.amp_ampcombi_db_id ) + ch_versions = ch_versions.mix( AMP_DATABASE_DOWNLOAD.out.versions ) + ch_ampcombi_input_db = AMP_DATABASE_DOWNLOAD.out.db + AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, ch_ampcombi_input_db, [] ) + } + ch_versions = ch_versions.mix( AMPCOMBI2_PARSETABLES.out.versions ) + + ch_ampcombi_summaries = AMPCOMBI2_PARSETABLES.out.tsv.map{ it[1] }.collect() + + AMPCOMBI2_COMPLETE ( ch_ampcombi_summaries ) + ch_versions = ch_versions.mix( AMPCOMBI2_COMPLETE.out.versions ) + + ch_ampcombi_complete = AMPCOMBI2_COMPLETE.out.tsv + .filter { file -> file.countLines() > 1 } + + if ( ch_ampcombi_complete != null ) { + AMPCOMBI2_CLUSTER ( ch_ampcombi_complete ) + ch_versions = ch_versions.mix( AMPCOMBI2_CLUSTER.out.versions ) + } else { + log.warn("[nf-core/funcscan] No AMP hits were found in the samples and so no clustering will be applied.") + } + + // MERGE_TAXONOMY + if ( params.run_taxa_classification && ch_ampcombi_complete == null ) { + log.warn("[nf-core/funcscan] No AMP hits were found in the samples, therefore no Taxonomy will be merged ") + } else if ( params.run_taxa_classification && ch_ampcombi_complete != null ) { + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + + MERGE_TAXONOMY_AMPCOMBI( AMPCOMBI2_CLUSTER.out.cluster_tsv, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_AMPCOMBI.out.versions ) + + ch_tabix_input = Channel.of( [ 'id':'ampcombi_complete_summary_taxonomy' ] ) + .combine( MERGE_TAXONOMY_AMPCOMBI.out.tsv ) + + AMP_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( AMP_TABIX_BGZIP.out.versions ) + } + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf new file mode 100644 index 00000000..c1c8e332 --- /dev/null +++ b/subworkflows/local/annotation.nf @@ -0,0 +1,95 @@ +/* + Run annotation tools +*/ + +include { PROKKA } from '../../modules/nf-core/prokka/main' +include { PRODIGAL } from '../../modules/nf-core/prodigal/main' +include { PYRODIGAL } from '../../modules/nf-core/pyrodigal/main' +include { BAKTA_BAKTADBDOWNLOAD } from '../../modules/nf-core/bakta/baktadbdownload/main' +include { BAKTA_BAKTA } from '../../modules/nf-core/bakta/bakta/main' +include { GUNZIP as GUNZIP_PRODIGAL_FNA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRODIGAL_FAA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRODIGAL_GBK } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_FNA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_FAA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_GBK } from '../../modules/nf-core/gunzip/main' + +workflow ANNOTATION { + take: + fasta // tuple val(meta), path(contigs) + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.annotation_tool == "pyrodigal" || ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) || ( params.annotation_tool == "prodigal" && params.run_amp_screening == true ) ) { // Need to use Pyrodigal for most BGC tools and AMPcombi because Prodigal GBK annotation format is incompatible with them. + + if ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with antiSMASH, DeepBGC, and GECCO. If you specifically wish to run Prodigal instead, please skip antiSMASH, DeepBGC, and GECCO or provide a pre-annotated GBK file in the samplesheet.") + } else if ( params.annotation_tool == "prodigal" && params.run_amp_screening == true ) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with AMPcombi. If you specifically wish to run Prodigal instead, please skip AMP workflow or provide a pre-annotated GBK file in the samplesheet.") + } + + PYRODIGAL ( fasta, "gbk" ) + GUNZIP_PYRODIGAL_FAA ( PYRODIGAL.out.faa ) + GUNZIP_PYRODIGAL_FNA ( PYRODIGAL.out.fna) + GUNZIP_PYRODIGAL_GBK ( PYRODIGAL.out.annotations ) + ch_versions = ch_versions.mix(PYRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PYRODIGAL_GBK.out.gunzip + + } else if ( params.annotation_tool == "prodigal" ) { + + PRODIGAL ( fasta, "gbk" ) + GUNZIP_PRODIGAL_FAA ( PRODIGAL.out.amino_acid_fasta ) + GUNZIP_PRODIGAL_FNA ( PRODIGAL.out.nucleotide_fasta) + GUNZIP_PRODIGAL_GBK ( PRODIGAL.out.gene_annotations ) + ch_versions = ch_versions.mix(PRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PRODIGAL_GBK.out.gunzip + + } else if ( params.annotation_tool == "prokka" ) { + + PROKKA ( fasta, [], [] ) + ch_versions = ch_versions.mix(PROKKA.out.versions) + ch_multiqc_files = PROKKA.out.txt.collect{it[1]}.ifEmpty([]) + ch_annotation_faa = PROKKA.out.faa + ch_annotation_fna = PROKKA.out.fna + ch_annotation_gbk = PROKKA.out.gbk + + } else if ( params.annotation_tool == "bakta" ) { + + // BAKTA prepare download + if ( params.annotation_bakta_db ) { + ch_bakta_db = Channel + .fromPath( params.annotation_bakta_db ) + .first() + } else { + BAKTA_BAKTADBDOWNLOAD ( ) + ch_versions = ch_versions.mix( BAKTA_BAKTADBDOWNLOAD.out.versions ) + ch_bakta_db = ( BAKTA_BAKTADBDOWNLOAD.out.db ) + } + + BAKTA_BAKTA ( fasta, ch_bakta_db, [], [] ) + ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) + ch_multiqc_files = BAKTA_BAKTA.out.txt.collect{it[1]}.ifEmpty([]) + ch_annotation_faa = BAKTA_BAKTA.out.faa + ch_annotation_fna = BAKTA_BAKTA.out.fna + ch_annotation_gbk = BAKTA_BAKTA.out.gbff + } + + emit: + versions = ch_versions + multiqc_files = ch_multiqc_files + faa = ch_annotation_faa // [ [meta], path(faa) ] + fna = ch_annotation_fna // [ [meta], path(fna) ] + gbk = ch_annotation_gbk // [ [meta], path(gbk) ] +} diff --git a/subworkflows/local/arg.nf b/subworkflows/local/arg.nf new file mode 100644 index 00000000..81dffb72 --- /dev/null +++ b/subworkflows/local/arg.nf @@ -0,0 +1,211 @@ +/* + Run ARG screening tools +*/ + +include { ABRICATE_RUN } from '../../modules/nf-core/abricate/run/main' +include { AMRFINDERPLUS_UPDATE } from '../../modules/nf-core/amrfinderplus/update/main' +include { AMRFINDERPLUS_RUN } from '../../modules/nf-core/amrfinderplus/run/main' +include { DEEPARG_DOWNLOADDATA } from '../../modules/nf-core/deeparg/downloaddata/main' +include { DEEPARG_PREDICT } from '../../modules/nf-core/deeparg/predict/main' +include { FARGENE } from '../../modules/nf-core/fargene/main' +include { RGI_CARDANNOTATION } from '../../modules/nf-core/rgi/cardannotation/main' +include { RGI_MAIN } from '../../modules/nf-core/rgi/main/main' +include { UNTAR as UNTAR_CARD } from '../../modules/nf-core/untar/main' +include { TABIX_BGZIP as ARG_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_HAMRONIZATION } from '../../modules/local/merge_taxonomy_hamronization' +include { HAMRONIZATION_RGI } from '../../modules/nf-core/hamronization/rgi/main' +include { HAMRONIZATION_FARGENE } from '../../modules/nf-core/hamronization/fargene/main' +include { HAMRONIZATION_SUMMARIZE } from '../../modules/nf-core/hamronization/summarize/main' +include { HAMRONIZATION_ABRICATE } from '../../modules/nf-core/hamronization/abricate/main' +include { HAMRONIZATION_DEEPARG } from '../../modules/nf-core/hamronization/deeparg/main' +include { HAMRONIZATION_AMRFINDERPLUS } from '../../modules/nf-core/hamronization/amrfinderplus/main' +include { ARGNORM as ARGNORM_DEEPARG } from '../../modules/nf-core/argnorm/main' +include { ARGNORM as ARGNORM_ABRICATE } from '../../modules/nf-core/argnorm/main' +include { ARGNORM as ARGNORM_AMRFINDERPLUS } from '../../modules/nf-core/argnorm/main' + +workflow ARG { + take: + fastas // tuple val(meta), path(contigs) + annotations + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + + main: + ch_versions = Channel.empty() + + // Prepare HAMRONIZATION reporting channel + ch_input_to_hamronization_summarize = Channel.empty() + + // AMRfinderplus run + // Prepare channel for database + if ( !params.arg_skip_amrfinderplus && params.arg_amrfinderplus_db ) { + ch_amrfinderplus_db = Channel + .fromPath( params.arg_amrfinderplus_db ) + .first() + } else if ( !params.arg_skip_amrfinderplus && !params.arg_amrfinderplus_db ) { + AMRFINDERPLUS_UPDATE( ) + ch_versions = ch_versions.mix( AMRFINDERPLUS_UPDATE.out.versions ) + ch_amrfinderplus_db = AMRFINDERPLUS_UPDATE.out.db + } + + if ( !params.arg_skip_amrfinderplus ) { + AMRFINDERPLUS_RUN ( fastas, ch_amrfinderplus_db ) + ch_versions = ch_versions.mix( AMRFINDERPLUS_RUN.out.versions ) + + // Reporting + HAMRONIZATION_AMRFINDERPLUS ( AMRFINDERPLUS_RUN.out.report, 'tsv', AMRFINDERPLUS_RUN.out.tool_version, AMRFINDERPLUS_RUN.out.db_version ) + ch_versions = ch_versions.mix( HAMRONIZATION_AMRFINDERPLUS.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_AMRFINDERPLUS.out.tsv ) + + if ( !params.arg_skip_argnorm ) { + ch_input_to_argnorm_amrfinderplus = HAMRONIZATION_AMRFINDERPLUS.out.tsv.filter{ meta, file -> !file.isEmpty() } + ARGNORM_AMRFINDERPLUS ( ch_input_to_argnorm_amrfinderplus, 'amrfinderplus', 'ncbi' ) + ch_versions = ch_versions.mix( ARGNORM_AMRFINDERPLUS.out.versions ) + } + } + + // fARGene run + if ( !params.arg_skip_fargene ) { + ch_fargene_classes = Channel.fromList( params.arg_fargene_hmmmodel.tokenize(',') ) + + ch_fargene_input = fastas + .combine( ch_fargene_classes ) + .map { + meta, fastas, hmm_class -> + def meta_new = meta.clone() + meta_new['hmm_class'] = hmm_class + [ meta_new, fastas, hmm_class ] + } + .multiMap { + fastas: [ it[0], it[1] ] + hmmclass: it[2] + } + + FARGENE ( ch_fargene_input.fastas, ch_fargene_input.hmmclass ) + ch_versions = ch_versions.mix( FARGENE.out.versions ) + + // Reporting + // Note: currently hardcoding versions, has to be updated with every fARGene-update + HAMRONIZATION_FARGENE( FARGENE.out.hmm_genes.transpose(), 'tsv', '0.1', '0.1' ) + ch_versions = ch_versions.mix( HAMRONIZATION_FARGENE.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_FARGENE.out.tsv ) + } + + // RGI run + if ( !params.arg_skip_rgi ) { + + if ( !params.arg_rgi_db ) { + + // Download and untar CARD + UNTAR_CARD ( [ [], file('https://card.mcmaster.ca/latest/data', checkIfExists: true) ] ) + ch_versions = ch_versions.mix( UNTAR_CARD.out.versions ) + rgi_db = UNTAR_CARD.out.untar.map{ it[1] } + + } else { + + // Use user-supplied database + rgi_db = params.arg_rgi_db + + } + + RGI_CARDANNOTATION ( rgi_db ) + ch_versions = ch_versions.mix( RGI_CARDANNOTATION.out.versions ) + + RGI_MAIN ( fastas, RGI_CARDANNOTATION.out.db, [] ) + ch_versions = ch_versions.mix( RGI_MAIN.out.versions ) + + // Reporting + HAMRONIZATION_RGI ( RGI_MAIN.out.tsv, 'tsv', RGI_MAIN.out.tool_version, RGI_MAIN.out.db_version ) + ch_versions = ch_versions.mix( HAMRONIZATION_RGI.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_RGI.out.tsv ) + } + + // DeepARG prepare download + if ( !params.arg_skip_deeparg && params.arg_deeparg_db ) { + ch_deeparg_db = Channel + .fromPath( params.arg_deeparg_db ) + .first() + } else if ( !params.arg_skip_deeparg && !params.arg_deeparg_db ) { + DEEPARG_DOWNLOADDATA( ) + ch_versions = ch_versions.mix( DEEPARG_DOWNLOADDATA.out.versions ) + ch_deeparg_db = DEEPARG_DOWNLOADDATA.out.db + } + + // DeepARG run + if ( !params.arg_skip_deeparg ) { + + annotations + .map { + it -> + def meta = it[0] + def anno = it[1] + def model = params.arg_deeparg_model + + [ meta, anno, model ] + } + .set { ch_input_for_deeparg } + + DEEPARG_PREDICT ( ch_input_for_deeparg, ch_deeparg_db ) + ch_versions = ch_versions.mix( DEEPARG_PREDICT.out.versions ) + + // Reporting + // Note: currently hardcoding versions as unreported by DeepARG + // Make sure to update on version bump. + ch_input_to_hamronization_deeparg = DEEPARG_PREDICT.out.arg.mix( DEEPARG_PREDICT.out.potential_arg ) + HAMRONIZATION_DEEPARG ( ch_input_to_hamronization_deeparg, 'tsv', '1.0.4', params.arg_deeparg_db_version ) + ch_versions = ch_versions.mix( HAMRONIZATION_DEEPARG.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_DEEPARG.out.tsv ) + + if ( !params.arg_skip_argnorm ) { + ch_input_to_argnorm_deeparg = HAMRONIZATION_DEEPARG.out.tsv.filter{ meta, file -> !file.isEmpty() } + ARGNORM_DEEPARG ( ch_input_to_argnorm_deeparg, 'deeparg', 'deeparg' ) + ch_versions = ch_versions.mix( ARGNORM_DEEPARG.out.versions ) + } + } + + // ABRicate run + if ( !params.arg_skip_abricate ) { + abricate_dbdir = params.arg_abricate_db ? file(params.arg_abricate_db, checkIfExists: true) : [] + ABRICATE_RUN ( fastas, abricate_dbdir ) + ch_versions = ch_versions.mix( ABRICATE_RUN.out.versions ) + + HAMRONIZATION_ABRICATE ( ABRICATE_RUN.out.report, 'tsv', '1.0.1', '2021-Mar-27' ) + ch_versions = ch_versions.mix( HAMRONIZATION_ABRICATE.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_ABRICATE.out.tsv ) + + if ( ( params.arg_abricate_db_id == 'ncbi' || + params.arg_abricate_db_id == 'resfinder' || + params.arg_abricate_db_id == 'argannot' || + params.arg_abricate_db_id == 'megares') && !params.arg_skip_argnorm ) { + ch_input_to_argnorm_abricate = HAMRONIZATION_ABRICATE.out.tsv.filter{ meta, file -> !file.isEmpty() } + ARGNORM_ABRICATE ( ch_input_to_argnorm_abricate, 'abricate', params.arg_abricate_db_id ) + ch_versions = ch_versions.mix( ARGNORM_ABRICATE.out.versions ) + } + } + + ch_input_to_hamronization_summarize + .map{ + it[1] + } + .collect() + .set { ch_input_for_hamronization_summarize } + + HAMRONIZATION_SUMMARIZE( ch_input_for_hamronization_summarize, params.arg_hamronization_summarizeformat ) + ch_versions = ch_versions.mix( HAMRONIZATION_SUMMARIZE.out.versions ) + + // MERGE_TAXONOMY + if ( params.run_taxa_classification ) { + + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + MERGE_TAXONOMY_HAMRONIZATION( HAMRONIZATION_SUMMARIZE.out.tsv, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_HAMRONIZATION.out.versions ) + + ch_tabix_input = Channel.of( [ 'id':'hamronization_combined_report' ] ) + .combine(MERGE_TAXONOMY_HAMRONIZATION.out.tsv) + + ARG_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( ARG_TABIX_BGZIP.out.versions ) + } + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf new file mode 100644 index 00000000..0130205d --- /dev/null +++ b/subworkflows/local/bgc.nf @@ -0,0 +1,202 @@ +/* + Run BGC screening tools +*/ + +include { UNTAR as UNTAR_CSS } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_DETECTION } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_MODULES } from '../../modules/nf-core/untar/main' +include { ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES } from '../../modules/nf-core/antismash/antismashlitedownloaddatabases/main' +include { ANTISMASH_ANTISMASHLITE } from '../../modules/nf-core/antismash/antismashlite/main' +include { GECCO_RUN } from '../../modules/nf-core/gecco/run/main' +include { HMMER_HMMSEARCH as BGC_HMMER_HMMSEARCH } from '../../modules/nf-core/hmmer/hmmsearch/main' +include { DEEPBGC_DOWNLOAD } from '../../modules/nf-core/deepbgc/download/main' +include { DEEPBGC_PIPELINE } from '../../modules/nf-core/deepbgc/pipeline/main' +include { COMBGC } from '../../modules/local/combgc' +include { TABIX_BGZIP as BGC_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/merge_taxonomy_combgc' + +workflow BGC { + + take: + fastas // tuple val(meta), path(PREPPED_INPUT.out.fna) + faas // tuple val(meta), path(.out.faa) + gbks // tuple val(meta), path(.out.gbk) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + + main: + ch_versions = Channel.empty() + ch_bgcresults_for_combgc = Channel.empty() + + // When adding new tool that requires FAA, make sure to update conditions + // in funcscan.nf around annotation and AMP subworkflow execution + // to ensure annotation is executed! + ch_faa_for_bgc_hmmsearch = faas + + // ANTISMASH + if ( !params.bgc_skip_antismash ) { + // Check whether user supplies database and/or antismash directory. If not, obtain them via the module antismashlite/antismashlitedownloaddatabases. + // Important for future maintenance: For CI tests, only the "else" option below is used. Both options should be tested locally whenever the antiSMASH module gets updated. + if ( params.bgc_antismash_db && params.bgc_antismash_installdir ) { + + ch_antismash_databases = Channel + .fromPath( params.bgc_antismash_db ) + .first() + + ch_antismash_directory = Channel + .fromPath( params.bgc_antismash_installdir ) + .first() + + } else if ( params.bgc_antismash_db && ( session.config.conda && session.config.conda.enabled ) ) { + + ch_antismash_databases = Channel + .fromPath( params.bgc_antismash_db ) + .first() + + ch_antismash_directory = [] + + } else { + + // May need to update on each new version of antismash-lite due to changes to scripts inside these tars + ch_css_for_antismash = "https://github.com/nf-core/test-datasets/raw/724737e23a53085129cd5e015acafbf7067822ca/data/delete_me/antismash/css.tar.gz" + ch_detection_for_antismash = "https://github.com/nf-core/test-datasets/raw/c3174c50bf654e477bf329dbaf72acc8345f9b7a/data/delete_me/antismash/detection.tar.gz" + ch_modules_for_antismash = "https://github.com/nf-core/test-datasets/raw/c3174c50bf654e477bf329dbaf72acc8345f9b7a/data/delete_me/antismash/modules.tar.gz" + + UNTAR_CSS ( [ [], ch_css_for_antismash ] ) + ch_versions = ch_versions.mix( UNTAR_CSS.out.versions ) + + UNTAR_DETECTION ( [ [], ch_detection_for_antismash ] ) + ch_versions = ch_versions.mix( UNTAR_DETECTION.out.versions ) + + UNTAR_MODULES ( [ [], ch_modules_for_antismash ] ) + ch_versions = ch_versions.mix( UNTAR_MODULES.out.versions ) + + ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES ( UNTAR_CSS.out.untar.map{ it[1] }, UNTAR_DETECTION.out.untar.map{ it[1] }, UNTAR_MODULES.out.untar.map{ it[1] } ) + ch_versions = ch_versions.mix( ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.versions ) + ch_antismash_databases = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database + + ch_antismash_directory = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir + } + + ANTISMASH_ANTISMASHLITE ( gbks, ch_antismash_databases, ch_antismash_directory, [] ) + + ch_versions = ch_versions.mix( ANTISMASH_ANTISMASHLITE.out.versions ) + ch_antismashresults = ANTISMASH_ANTISMASHLITE.out.knownclusterblast_dir + .mix( ANTISMASH_ANTISMASHLITE.out.gbk_input ) + .groupTuple() + .map{ + meta, files -> + [ meta, files.flatten() ] + } + + // Filter out samples with no BGC hits + ch_antismashresults_for_combgc = ch_antismashresults + .join(fastas, remainder: false) + .join(ANTISMASH_ANTISMASHLITE.out.gbk_results, remainder: false) + .map { + meta, gbk_input, fasta, gbk_results -> + [ meta, gbk_input ] + } + + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_antismashresults_for_combgc ) + } + + // DEEPBGC + if ( !params.bgc_skip_deepbgc ) { + if ( params.bgc_deepbgc_db ) { + + ch_deepbgc_database = Channel + .fromPath( params.bgc_deepbgc_db ) + .first() + } else { + DEEPBGC_DOWNLOAD() + ch_deepbgc_database = DEEPBGC_DOWNLOAD.out.db + ch_versions = ch_versions.mix( DEEPBGC_DOWNLOAD.out.versions ) + } + + DEEPBGC_PIPELINE ( gbks, ch_deepbgc_database ) + ch_versions = ch_versions.mix( DEEPBGC_PIPELINE.out.versions ) + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( DEEPBGC_PIPELINE.out.bgc_tsv ) + } + + // GECCO + if ( !params.bgc_skip_gecco ) { + ch_gecco_input = gbks.groupTuple() + .multiMap { + fastas: [ it[0], it[1], [] ] + } + + GECCO_RUN ( ch_gecco_input, [] ) + ch_versions = ch_versions.mix( GECCO_RUN.out.versions ) + ch_geccoresults_for_combgc = GECCO_RUN.out.gbk + .mix( GECCO_RUN.out.clusters ) + .groupTuple() + .map{ + meta, files -> + [ meta, files.flatten() ] + } + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_geccoresults_for_combgc ) + } + + // HMMSEARCH + if ( params.bgc_run_hmmsearch ) { + if ( params.bgc_hmmsearch_models ) { ch_bgc_hmm_models = Channel.fromPath( params.bgc_hmmsearch_models, checkIfExists: true ) } else { error('[nf-core/funcscan] error: hmm model files not found for --bgc_hmmsearch_models! Please check input.') } + + ch_bgc_hmm_models_meta = ch_bgc_hmm_models + .map { + file -> + def meta = [:] + meta['id'] = file.extension == 'gz' ? file.name - '.hmm.gz' : file.name - '.hmm' + + [ meta, file ] + } + + ch_in_for_bgc_hmmsearch = ch_faa_for_bgc_hmmsearch.combine(ch_bgc_hmm_models_meta) + .map { + meta_faa, faa, meta_hmm, hmm -> + def meta_new = [:] + meta_new['id'] = meta_faa['id'] + meta_new['hmm_id'] = meta_hmm['id'] + [ meta_new, hmm, faa, params.bgc_hmmsearch_savealignments, params.bgc_hmmsearch_savetargets, params.bgc_hmmsearch_savedomains ] + } + + BGC_HMMER_HMMSEARCH ( ch_in_for_bgc_hmmsearch ) + ch_versions = ch_versions.mix( BGC_HMMER_HMMSEARCH.out.versions ) + } + + // COMBGC + + ch_bgcresults_for_combgc + .join(fastas, remainder: true) + .filter { + meta, bgcfile, fasta -> + if ( !bgcfile ) { log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample: ${meta.id}") } + return [meta, bgcfile, fasta] + } + + COMBGC ( ch_bgcresults_for_combgc ) + ch_versions = ch_versions.mix( COMBGC.out.versions ) + + // COMBGC concatenation + if ( !params.run_taxa_classification ) { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true ) + } else { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', keepHeader:true ) + } + + // MERGE_TAXONOMY + if ( params.run_taxa_classification ) { + + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) + + ch_tabix_input = Channel.of( [ 'id':'combgc_complete_summary_taxonomy' ] ) + .combine(MERGE_TAXONOMY_COMBGC.out.tsv) + + BGC_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) + } + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/taxa_class.nf b/subworkflows/local/taxa_class.nf new file mode 100644 index 00000000..d76e1dff --- /dev/null +++ b/subworkflows/local/taxa_class.nf @@ -0,0 +1,62 @@ +/* + TAXONOMIC CLASSIFICATION +*/ + +include { MMSEQS_CREATEDB } from '../../modules/nf-core/mmseqs/createdb/main' +include { MMSEQS_DATABASES } from '../../modules/nf-core/mmseqs/databases/main' +include { MMSEQS_TAXONOMY } from '../../modules/nf-core/mmseqs/taxonomy/main' +include { MMSEQS_CREATETSV } from '../../modules/nf-core/mmseqs/createtsv/main' + +workflow TAXA_CLASS { + take: + contigs // tuple val(meta), path(contigs) + + main: + ch_versions = Channel.empty() + ch_mmseqs_db = Channel.empty() + ch_taxonomy_querydb = Channel.empty() + ch_taxonomy_querydb_taxdb = Channel.empty() + ch_taxonomy_tsv = Channel.empty() + + if ( params.taxa_classification_tool == 'mmseqs2') { + + // Download the ref db if not supplied by user + // MMSEQS_DATABASE + if ( params.taxa_classification_mmseqs_db != null ) { + ch_mmseqs_db = Channel + .fromPath( params.taxa_classification_mmseqs_db ) + .first() + } else { + MMSEQS_DATABASES ( params.taxa_classification_mmseqs_db_id ) + ch_versions = ch_versions.mix( MMSEQS_DATABASES.out.versions ) + ch_mmseqs_db = ( MMSEQS_DATABASES.out.database ) + } + + // Create db for query contigs, assign taxonomy and convert to table format + // MMSEQS_CREATEDB + MMSEQS_CREATEDB ( contigs ) + ch_versions = ch_versions.mix( MMSEQS_CREATEDB.out.versions ) + + // MMSEQS_TAXONOMY + MMSEQS_TAXONOMY ( MMSEQS_CREATEDB.out.db, ch_mmseqs_db ) + ch_versions = ch_versions.mix( MMSEQS_TAXONOMY.out.versions ) + ch_taxonomy_querydb_taxdb = MMSEQS_TAXONOMY.out.db_taxonomy + + // Join together to ensure in sync + ch_taxonomy_input_for_createtsv = MMSEQS_CREATEDB.out.db + .join(MMSEQS_TAXONOMY.out.db_taxonomy) + .multiMap { meta, db, db_taxonomy -> + db: [ meta,db ] + taxdb: [ meta, db_taxonomy ] + } + + // MMSEQS_CREATETSV + MMSEQS_CREATETSV ( ch_taxonomy_input_for_createtsv.taxdb, [[:],[]], ch_taxonomy_input_for_createtsv.db ) + ch_versions = ch_versions.mix( MMSEQS_CREATETSV.out.versions ) + ch_taxonomy_tsv = MMSEQS_CREATETSV.out.tsv + } + + emit: + versions = ch_versions + sample_taxonomy = ch_taxonomy_tsv // channel: [ val(meta), tsv ] +} diff --git a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf index 5693c827..d2093d01 100644 --- a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf @@ -8,14 +8,14 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,7 +24,6 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin */ workflow PIPELINE_INITIALISATION { - take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime @@ -40,7 +39,7 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, @@ -50,7 +49,7 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null @@ -59,7 +58,7 @@ workflow PIPELINE_INITIALISATION { // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) @@ -73,23 +72,7 @@ workflow PIPELINE_INITIALISATION { // Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } + .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) .set { ch_samplesheet } emit: @@ -104,7 +87,6 @@ workflow PIPELINE_INITIALISATION { */ workflow PIPELINE_COMPLETION { - take: email // string: email address email_on_fail // string: email address sent on pipeline failure @@ -141,7 +123,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -154,30 +136,46 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { - genomeExistsError() + // Validate antiSMASH inputs for containers + // 1. Make sure that either both or none of the antiSMASH directories are supplied + if (['docker', 'singularity'].contains(workflow.containerEngine) && ((params.run_bgc_screening && !params.bgc_antismash_db && params.bgc_antismash_installdir && !params.bgc_skip_antismash) || (params.run_bgc_screening && params.bgc_antismash_db && !params.bgc_antismash_installdir && !params.bgc_skip_antismash))) { + error("[nf-core/funcscan] ERROR: You supplied either the antiSMASH database or its installation directory, but not both. Please either supply both directories or none (letting the pipeline download them instead).") + } + else if (['docker', 'singularity'].contains(workflow.containerEngine) && (params.run_bgc_screening && params.bgc_antismash_db && params.bgc_antismash_installdir && !params.bgc_skip_antismash)) { + antismash_database_dir = new File(params.bgc_antismash_db) + antismash_install_dir = new File(params.bgc_antismash_installdir) + if (antismash_database_dir.name == antismash_install_dir.name) { + error("[nf-core/funcscan] ERROR: Your supplied antiSMASH database and installation directories have identical names: " + antismash_install_dir.name + ".\nPlease make sure to name them differently, for example:\n - Database directory: " + antismash_database_dir.parent + "/antismash_db\n - Installation directory: " + antismash_install_dir.parent + "/antismash_dir") + } + } + + // 3. Give warning if not using container system assuming conda + if (params.run_bgc_screening && (!params.bgc_antismash_db) && !params.bgc_skip_antismash && (session.config.conda && session.config.conda.enabled)) { + log.warn("[nf-core/funcscan] Running antiSMASH download database module, and detected conda has been enabled. Assuming using conda for pipeline run. Check config if this is not expected!") + } } // // Validate channels from input samplesheet // def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] + def (metas, fastas) = input[1..2] // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + def endedness_ok = metas.collect { meta -> meta.single_end }.unique().size == 1 if (!endedness_ok) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } - return [ metas[0], fastqs ] + return [metas[0], fastas] } // // Get attribute from genome config file e.g. fasta // def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] } } return null @@ -188,11 +186,7 @@ def getGenomeAttribute(attribute) { // def genomeExistsError() { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } } @@ -200,27 +194,112 @@ def genomeExistsError() { // Generate methods description for MultiQC // def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report + def preprocessing_text = "The pipeline used the following tools: preprocessing included SeqKit2 (Shen et al. 2024)." + + def annotation_text = [ + "Annotation was carried out with:", + params.annotation_tool == 'prodigal' ? "Prodigal (Hyatt et al. 2010)." : "", + params.annotation_tool == 'pyrodigal' ? "Pyrodigal (Larralde 2022)." : "", + params.annotation_tool == 'bakta' ? "BAKTA (Schwengers et al. 2021)." : "", + params.annotation_tool == 'prokka' ? "PROKKA (Seemann 2014)." : "" + ].join(' ').trim() + + def amp_text = [ + "The following antimicrobial peptide screening tools were used:", + !params.amp_skip_amplify ? "AMPlify (Li et al. 2022)," : "", + !params.amp_skip_macrel ? "Macrel (Santos-Júnior et al. 2020)," : "", + !params.amp_skip_ampir ? "ampir (Fingerhut et al. 2021)," : "", + params.amp_run_hmmsearch ? "HMMER (Eddy 2011)," : "", + ". The output from the antimicrobial peptide screening tools were standardised and summarised with AMPcombi (Ibrahim and Perelo 2023)." + ].join(' ').trim().replaceAll(', .', ".") + + def arg_text = [ + "The following antimicrobial resistance gene screening tools were used:", + !params.arg_skip_fargene ? "fARGene (Berglund et al. 2019)," : "", + !params.arg_skip_rgi ? "RGI (Alcock et al. 2020)," : "", + !params.arg_skip_amrfinderplus ? "AMRfinderplus (Feldgarden et al. 2021)," : "", + !params.arg_skip_deeparg ? "deepARG (Arango-Argoty 2018)," : "", + !params.arg_skip_abricate ? "ABRicate (Seemann 2020)," : "", + !params.arg_skip_argnorm ? ". The outputs from ARG screening tools were normalized to the antibiotic resistance ontology using argNorm (Perovic et al. 2024)," : "", + ". The output from the antimicrobial resistance gene screening tools were standardised and summarised with hAMRonization (Maguire et al. 2023)." + ].join(' ').trim().replaceAll(', +.', ".") + + def bgc_text = [ + "The following biosynthetic gene cluster screening tools were used:", + !params.bgc_skip_antismash ? "antiSMASH (Blin et al. 2021)," : "", + !params.bgc_skip_deepbgc ? "deepBGC (Hannigan et al. 2019)," : "", + !params.bgc_skip_gecco ? "GECCO (Carroll et al. 2021)," : "", + params.bgc_run_hmmsearch ? "HMMER (Eddy 2011)," : "", + ". The output from the biosynthetic gene cluster screening tools were standardised and summarised with comBGC (Frangenberg et al. 2023)." + ].join(' ').replaceAll(', +.', ".").trim() + + def postprocessing_text = "Run statistics were reported using MultiQC (Ewels et al. 2016)." + def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + preprocessing_text, + annotation_text, + params.run_amp_screening ? amp_text : "", + params.run_arg_screening ? arg_text : "", + params.run_bgc_screening ? bgc_text : "", + postprocessing_text + ].join(' ').trim() return citation_text } def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? '
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report + def preprocessing_text = '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. iMeta, e191. https://doi.org/10.1002/imt2.191
  • ' + + def annotation_text = [ + params.annotation_tool == 'prodigal' ? '
  • Hyatt, D., Chen, G. L., Locascio, P. F., Land, M. L., Larimer, F. W., & Hauser, L. J. (2010). Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC bioinformatics, 11, 119. DOI: 10.1186/1471-2105-11-119
  • ' : "", + params.annotation_tool == 'pyrodigal' ? '
  • Larralde, M. (2022). Pyrodigal: Python bindings and interface to Prodigal, an efficient method for gene prediction in prokaryotes. Journal of Open Source Software, 7(72), 4296. DOI: 10.21105/joss.04296
  • ' : "", + params.annotation_tool == 'bakta' ? '
  • Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). DOI: 10.1099/mgen.0.000685
  • ' : "", + params.annotation_tool == 'prokka' ? '
  • Seemann, T. (2014). Prokka: rapid prokaryotic genome annotation. Bioinformatics (Oxford, England), 30(14), 2068–2069. DOI: 10.1093/bioinformatics/btu153
  • ' : "" + ].join(' ').trim() + + def amp_text = [ + !params.amp_skip_amplify ? '
  • Li, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. DOI: 10.1186/s12864-022-08310-4
  • ' : "", + !params.amp_skip_macrel ? '
  • Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. DOI: 10.7717/peerj.10555
  • ' : "", + !params.amp_skip_ampir ? '
  • Fingerhut, L., Miller, D. J., Strugnell, J. M., Daly, N. L., & Cooke, I. R. (2021). ampir: an R package for fast genome-wide prediction of antimicrobial peptides. Bioinformatics (Oxford, England), 36(21), 5262–5263. DOI: 10.1093/bioinformatics/btaa653
  • ' : "", + '
  • Ibrahim, A. & Perelo, L. (2023). Darcy220606/AMPcombi. DOI: 10.5281/zenodo.7639121
  • ' + ].join(' ').trim().replaceAll(', .', ".") + + def arg_text = [ + !params.arg_skip_fargene ? '
  • Berglund, F., Österlund, T., Boulund, F., Marathe, N. P., Larsson, D., & Kristiansson, E. (2019). Identification and reconstruction of novel antibiotic resistance genes from metagenomes. Microbiome, 7(1), 52. DOI: 10.1186/s40168-019-0670-1
  • ' : "", + !params.arg_skip_rgi ? '
  • Alcock, B. P., Raphenya, A. R., Lau, T., Tsang, K. K., Bouchard, M., Edalatmand, A., Huynh, W., Nguyen, A. V., Cheng, A. A., Liu, S., Min, S. Y., Miroshnichenko, A., Tran, H. K., Werfalli, R. E., Nasir, J. A., Oloni, M., Speicher, D. J., Florescu, A., Singh, B., Faltyn, M., … McArthur, A. G. (2020). CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database. Nucleic acids research, 48(D1), D517–D525. DOI: 10.1093/nar/gkz935
  • ' : "", + !params.arg_skip_amrfinderplus ? '
  • Feldgarden, M., Brover, V., Gonzalez-Escalona, N., Frye, J. G., Haendiges, J., Haft, D. H., Hoffmann, M., Pettengill, J. B., Prasad, A. B., Tillman, G. E., Tyson, G. H., & Klimke, W. (2021). AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Scientific reports, 11(1), 12728. DOI: 10.1038/s41598-021-91456-0
  • ' : "", + !params.arg_skip_deeparg ? '
  • Arango-Argoty, G., Garner, E., Pruden, A., Heath, L. S., Vikesland, P., & Zhang, L. (2018). DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data. Microbiome, 6(1), 23. DOI: 10.1186/s40168-018-0401-z' : "", + !params.arg_skip_abricate ? '
  • Seemann, T. (2020). ABRicate. Github https://github.com/tseemann/abricate.
  • ' : "", + !params.arg_skip_argnorm ? '
  • Perovic, S. U., Ramji, V., Chong, H., Duan, Y., Maguire, F., Coelho, L. P. (2024). argNorm. DOI: .
  • ' : "", + '
  • Public Health Alliance for Genomic Epidemiology (pha4ge). (2022). Parse multiple Antimicrobial Resistance Analysis Reports into a common data structure. Github. Retrieved October 5, 2022, from https://github.com/pha4ge/hAMRonization
  • ' + ].join(' ').trim().replaceAll(', +.', ".") + + + def bgc_text = [ + !params.bgc_skip_antismash ? '
  • Blin, K., Shaw, S., Kloosterman, A. M., Charlop-Powers, Z., van Wezel, G. P., Medema, M. H., & Weber, T. (2021). antiSMASH 6.0: improving cluster detection and comparison capabilities. Nucleic acids research, 49(W1), W29–W35. DOI:
  • ' : "", + !params.bgc_skip_deepbgc ? '
  • Hannigan, G. D., Prihoda, D., Palicka, A., Soukup, J., Klempir, O., Rampula, L., Durcak, J., Wurst, M., Kotowski, J., Chang, D., Wang, R., Piizzi, G., Temesi, G., Hazuda, D. J., Woelk, C. H., & Bitton, D. A. (2019). A deep learning genome-mining strategy for biosynthetic gene cluster prediction. Nucleic acids research, 47(18), e110. DOI: 10.1093/nar/gkz654
  • ' : "", + !params.bgc_skip_gecco ? '
  • Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv DOI: 0.1101/2021.05.03.442509
  • ' : "", + '
  • Frangenberg, J. Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. https://doi.org/10.5281/zenodo.7643100
  • ' + ].join(' ').replaceAll(', +.', ".").trim() + + def postprocessing_text = '
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354
  • ' + + // Special as reused in multiple subworkflows, and we don't want to cause duplicates + def hmmsearch_text = (params.run_amp_screening && params.amp_run_hmmsearch) || (params.run_bgc_screening && params.bgc_run_hmmsearch) ? '
  • Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. DOI: 10.1371/journal.pcbi.1002195
  • ' : "" + def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + preprocessing_text, + annotation_text, + params.run_amp_screening ? amp_text : "", + params.run_arg_screening ? arg_text : "", + params.run_bgc_screening ? bgc_text : "", + hmmsearch_text, + postprocessing_text + ].join(' ').trim() return reference_text } @@ -242,23 +321,24 @@ def methodsDescriptionText(mqc_methods_yaml) { temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() + meta["tool_citations"] = toolCitationText().replaceAll(', .', ".").replaceAll('. .', ".").replaceAll(', .', ".") + meta["tool_bibliography"] = toolBibliographyText() def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() } - diff --git a/tests/test.nf.test b/tests/test.nf.test new file mode 100644 index 00000000..28e088b5 --- /dev/null +++ b/tests/test.nf.test @@ -0,0 +1,112 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test" + + test("test_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("NODE_515831_length_303_cov_1.532258_1") }, + { assert path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log").text.contains("amp_DRAMP_database is found and will be used") }, + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log") + ).match("ampcombi_logfiles") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG") + ).match("deeparg_tsv_ARG") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv").text.contains("rifampin_monooxygenase|rifamycin|rifampin_monooxygenase") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv").text.contains("rifampin_monooxygenase|rifamycin|rifampin_monooxygenase") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt") + ).match("fargene") + }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") } + ) + } + } +} diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap new file mode 100644 index 00000000..c09747ac --- /dev/null +++ b/tests/test.nf.test.snap @@ -0,0 +1,126 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,864466b0fb1acfc0e6b3425271f78ecb" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.641163055" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,69af3321b0bc808b7ef85f102395736f", + "sample_2.txt:md5,69af3321b0bc808b7ef85f102395736f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.590739146" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.625398198" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.614473482" + }, + "deeparg_tsv_ARG": { + "content": [ + "sample_1.align.daa.tsv:md5,21822364379fe8f991d27cdb52a33d1d", + "sample_2.align.daa.tsv:md5,f448465df58785a87cdee53691a77bfe", + "sample_1.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9", + "sample_2.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T12:41:33.325286058" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T12:41:33.055416682" + }, + "ampcombi_logfiles": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + "Ampcombi_parse_tables.log:md5,ff27d0c3657ce99d2c29a136f598e4f8" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T12:41:33.230701016" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T12:41:33.1312123" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,844bb10e2f84e1a2b2db56eb36391dcf", + "sample_2.macrel.all_orfs.faa.gz:md5,9c0b8b1c3b03d7b20aee0b57103861ab", + "sample_1.macrel.prediction.gz:md5,9553e1dae8a5b912da8d74fa3f1cd9eb", + "sample_2.macrel.prediction.gz:md5,ae155e454eb7abd7c48c06aad9261603", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T10:35:54.749106433" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe", + "sample_2.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.603392278" + } +} \ No newline at end of file diff --git a/tests/test_bakta.nf.test b/tests/test_bakta.nf.test new file mode 100644 index 00000000..b1913b04 --- /dev/null +++ b/tests/test_bakta.nf.test @@ -0,0 +1,109 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bakta" + + test("test_bakta_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("KKEJHB_00100") }, + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log") + ).match("ampcombi_logfiles") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG") + ).match("deeparg_tsv_ARG") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt") + ).match("fargene") + }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") }, + ) + } + } +} diff --git a/tests/test_bakta.nf.test.snap b/tests/test_bakta.nf.test.snap new file mode 100644 index 00000000..2a81f001 --- /dev/null +++ b/tests/test_bakta.nf.test.snap @@ -0,0 +1,126 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,864466b0fb1acfc0e6b3425271f78ecb" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.320498194" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,69af3321b0bc808b7ef85f102395736f", + "sample_2.txt:md5,69af3321b0bc808b7ef85f102395736f" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.261118633" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.30812705" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.292595392" + }, + "deeparg_tsv_ARG": { + "content": [ + "sample_1.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.align.daa.tsv:md5,4a86ca69defa4c861fabf236609afe8a", + "sample_1.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9", + "sample_2.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.245901486" + }, + "ampir": { + "content": [ + false, + false, + false, + false + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.19218768" + }, + "ampcombi_logfiles": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + "Ampcombi_parse_tables.log:md5,ff27d0c3657ce99d2c29a136f598e4f8" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T11:04:21.067236601" + }, + "amplify": { + "content": [ + false, + false + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.204985783" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,844bb10e2f84e1a2b2db56eb36391dcf", + "sample_2.macrel.all_orfs.faa.gz:md5,9c0b8b1c3b03d7b20aee0b57103861ab", + "sample_1.macrel.prediction.gz:md5,9553e1dae8a5b912da8d74fa3f1cd9eb", + "sample_2.macrel.prediction.gz:md5,ae155e454eb7abd7c48c06aad9261603", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T11:04:20.948791843" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe", + "sample_2.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.276435739" + } +} \ No newline at end of file diff --git a/tests/test_bgc_bakta.nf.test b/tests/test_bgc_bakta.nf.test new file mode 100644 index 00000000..37a0a0b1 --- /dev/null +++ b/tests/test_bgc_bakta.nf.test @@ -0,0 +1,45 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bgc_bakta" + + test("test_bgc_bakta_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert snapshot(path("$outputDir/bgc/antismash/sample_2/css")).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('NODE_861_length_4516_cov') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot(path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk")).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to: sample_2') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_bgc_bakta.nf.test.snap b/tests/test_bgc_bakta.nf.test.snap new file mode 100644 index 00000000..9bae9f24 --- /dev/null +++ b/tests/test_bgc_bakta.nf.test.snap @@ -0,0 +1,35 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:32:18.349501125" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:32:18.378687548" + }, + "gecco": { + "content": [ + "sample_2.genes.tsv:md5,66e3724c7e7da102bf58acd564211e8b", + "sample_2.features.tsv:md5,2ef146213836ca80d3079776f17c7cb2" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:32:18.404694725" + } +} \ No newline at end of file diff --git a/tests/test_bgc_prokka.nf.test b/tests/test_bgc_prokka.nf.test new file mode 100644 index 00000000..0fe53cd5 --- /dev/null +++ b/tests/test_bgc_prokka.nf.test @@ -0,0 +1,45 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bgc_prokka" + + test("test_bgc_prokka_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert snapshot(path("$outputDir/bgc/antismash/sample_2/css")).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('PROKKA_1') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot(path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk")).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to: sample_2') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_bgc_prokka.nf.test.snap b/tests/test_bgc_prokka.nf.test.snap new file mode 100644 index 00000000..4894afa1 --- /dev/null +++ b/tests/test_bgc_prokka.nf.test.snap @@ -0,0 +1,35 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:39:33.879464917" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:39:33.920624113" + }, + "gecco": { + "content": [ + "sample_2.genes.tsv:md5,050b82ca462430ecc0635acb2e297531", + "sample_2.features.tsv:md5,79354868ee3de6fdc419195b8fa8edb6" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:39:33.944935473" + } +} \ No newline at end of file diff --git a/tests/test_bgc_pyrodigal.nf.test b/tests/test_bgc_pyrodigal.nf.test new file mode 100644 index 00000000..cab97577 --- /dev/null +++ b/tests/test_bgc_pyrodigal.nf.test @@ -0,0 +1,45 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bgc_pyrodigal" + + test("test_bgc_pyrodigal_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert snapshot(path("$outputDir/bgc/antismash/sample_2/css")).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('NODE_861_length_4516_cov') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot(path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk")).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("NODE_861_length_4516_cov_2.736606") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to: sample_2') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_bgc_pyrodigal.nf.test.snap b/tests/test_bgc_pyrodigal.nf.test.snap new file mode 100644 index 00000000..67089772 --- /dev/null +++ b/tests/test_bgc_pyrodigal.nf.test.snap @@ -0,0 +1,35 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:45:25.720352923" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:45:44.435766452" + }, + "gecco": { + "content": [ + "sample_2.genes.tsv:md5,66e3724c7e7da102bf58acd564211e8b", + "sample_2.features.tsv:md5,2ef146213836ca80d3079776f17c7cb2" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:45:25.732866237" + } +} \ No newline at end of file diff --git a/tests/test_full.nf.test b/tests/test_full.nf.test new file mode 100644 index 00000000..b5d53e6d --- /dev/null +++ b/tests/test_full.nf.test @@ -0,0 +1,373 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_full" + + test("test_full_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMP workflow + + // AMPir + { assert snapshot( + path("$outputDir/amp/ampir/ERZ1664501/ERZ1664501.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664503/ERZ1664503.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664504/ERZ1664504.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664505/ERZ1664505.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664506/ERZ1664506.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664507/ERZ1664507.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664508/ERZ1664508.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664509/ERZ1664509.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664510/ERZ1664510.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664511/ERZ1664511.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664515/ERZ1664515.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664516/ERZ1664516.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664517/ERZ1664517.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664518/ERZ1664518.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664520/ERZ1664520.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664521/ERZ1664521.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664523/ERZ1664523.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664524/ERZ1664524.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664528/ERZ1664528.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664501/ERZ1664501.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664503/ERZ1664503.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664504/ERZ1664504.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664505/ERZ1664505.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664506/ERZ1664506.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664507/ERZ1664507.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664508/ERZ1664508.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664509/ERZ1664509.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664510/ERZ1664510.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664511/ERZ1664511.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664515/ERZ1664515.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664516/ERZ1664516.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664517/ERZ1664517.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664518/ERZ1664518.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664520/ERZ1664520.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664521/ERZ1664521.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664523/ERZ1664523.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664524/ERZ1664524.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664528/ERZ1664528.ampir.faa") + ).match("ampir") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664501/ERZ1664501_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664503/ERZ1664503_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664504/ERZ1664504_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664506/ERZ1664506_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664507/ERZ1664507_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664508/ERZ1664508_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664509/ERZ1664509_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664510/ERZ1664510_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664511/ERZ1664511_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664515/ERZ1664515_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664516/ERZ1664516_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664517/ERZ1664517_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664518/ERZ1664518_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664520/ERZ1664520_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664521/ERZ1664521_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664523/ERZ1664523_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664523/ERZ1664523_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664524/ERZ1664524_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664528/ERZ1664528_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/ERZ1664501.macrel/ERZ1664501.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664503.macrel/ERZ1664503.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664504.macrel/ERZ1664504.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664505.macrel/ERZ1664505.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664506.macrel/ERZ1664506.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664507.macrel/ERZ1664507.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664508.macrel/ERZ1664508.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664509.macrel/ERZ1664509.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664510.macrel/ERZ1664510.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664511.macrel/ERZ1664511.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664515.macrel/ERZ1664515.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664516.macrel/ERZ1664516.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664517.macrel/ERZ1664517.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664518.macrel/ERZ1664518.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664520.macrel/ERZ1664520.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664521.macrel/ERZ1664521.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664523.macrel/ERZ1664523.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664524.macrel/ERZ1664524.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664528.macrel/ERZ1664528.macrel.prediction.gz") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("ERZ1664515.11560-NODE-11560-length-551-cov-1.403226_2") }, + + // ARG workflow + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/ERZ1664501/ERZ1664501.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664503/ERZ1664503.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664504/ERZ1664504.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664505/ERZ1664505.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664506/ERZ1664506.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664507/ERZ1664507.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664508/ERZ1664508.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664509/ERZ1664509.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664510/ERZ1664510.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664511/ERZ1664511.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664515/ERZ1664515.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664516/ERZ1664516.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664517/ERZ1664517.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664518/ERZ1664518.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664520/ERZ1664520.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664521/ERZ1664521.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664523/ERZ1664523.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664524/ERZ1664524.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664528/ERZ1664528.mapping.ARG") + ).match("deeparg") }, + + { assert new File("$outputDir/arg/deeparg/ERZ1664501/ERZ1664501.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664503/ERZ1664503.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664504/ERZ1664504.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664505/ERZ1664505.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664506/ERZ1664506.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664507/ERZ1664507.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664508/ERZ1664508.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664509/ERZ1664509.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664510/ERZ1664510.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664511/ERZ1664511.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664515/ERZ1664515.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664516/ERZ1664516.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664517/ERZ1664517.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664518/ERZ1664518.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664520/ERZ1664520.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664521/ERZ1664521.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664523/ERZ1664523.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664524/ERZ1664524.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664528/ERZ1664528.align.daa").exists() }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/ERZ1664501/ERZ1664501.txt"), + path("$outputDir/arg/abricate/ERZ1664503/ERZ1664503.txt"), + path("$outputDir/arg/abricate/ERZ1664504/ERZ1664504.txt"), + path("$outputDir/arg/abricate/ERZ1664505/ERZ1664505.txt"), + path("$outputDir/arg/abricate/ERZ1664506/ERZ1664506.txt"), + path("$outputDir/arg/abricate/ERZ1664507/ERZ1664507.txt"), + path("$outputDir/arg/abricate/ERZ1664508/ERZ1664508.txt"), + path("$outputDir/arg/abricate/ERZ1664509/ERZ1664509.txt"), + path("$outputDir/arg/abricate/ERZ1664510/ERZ1664510.txt"), + path("$outputDir/arg/abricate/ERZ1664511/ERZ1664511.txt"), + path("$outputDir/arg/abricate/ERZ1664515/ERZ1664515.txt"), + path("$outputDir/arg/abricate/ERZ1664516/ERZ1664516.txt"), + path("$outputDir/arg/abricate/ERZ1664517/ERZ1664517.txt"), + path("$outputDir/arg/abricate/ERZ1664518/ERZ1664518.txt"), + path("$outputDir/arg/abricate/ERZ1664520/ERZ1664520.txt"), + path("$outputDir/arg/abricate/ERZ1664521/ERZ1664521.txt"), + path("$outputDir/arg/abricate/ERZ1664523/ERZ1664523.txt"), + path("$outputDir/arg/abricate/ERZ1664524/ERZ1664524.txt"), + path("$outputDir/arg/abricate/ERZ1664528/ERZ1664528.txt") + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/ERZ1664501/ERZ1664501.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664503/ERZ1664503.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664504/ERZ1664504.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664505/ERZ1664505.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664506/ERZ1664506.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664507/ERZ1664507.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664508/ERZ1664508.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664509/ERZ1664509.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664510/ERZ1664510.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664511/ERZ1664511.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664515/ERZ1664515.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664516/ERZ1664516.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664517/ERZ1664517.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664518/ERZ1664518.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664520/ERZ1664520.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664521/ERZ1664521.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664523/ERZ1664523.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664524/ERZ1664524.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664528/ERZ1664528.tsv") + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/ERZ1664501/ERZ1664501.txt"), + path("$outputDir/arg/rgi/ERZ1664503/ERZ1664503.txt"), + path("$outputDir/arg/rgi/ERZ1664504/ERZ1664504.txt"), + path("$outputDir/arg/rgi/ERZ1664505/ERZ1664505.txt"), + path("$outputDir/arg/rgi/ERZ1664506/ERZ1664506.txt"), + path("$outputDir/arg/rgi/ERZ1664507/ERZ1664507.txt"), + path("$outputDir/arg/rgi/ERZ1664508/ERZ1664508.txt"), + path("$outputDir/arg/rgi/ERZ1664509/ERZ1664509.txt"), + path("$outputDir/arg/rgi/ERZ1664510/ERZ1664510.txt"), + path("$outputDir/arg/rgi/ERZ1664511/ERZ1664511.txt"), + path("$outputDir/arg/rgi/ERZ1664515/ERZ1664515.txt"), + path("$outputDir/arg/rgi/ERZ1664516/ERZ1664516.txt"), + path("$outputDir/arg/rgi/ERZ1664517/ERZ1664517.txt"), + path("$outputDir/arg/rgi/ERZ1664518/ERZ1664518.txt"), + path("$outputDir/arg/rgi/ERZ1664520/ERZ1664520.txt"), + path("$outputDir/arg/rgi/ERZ1664521/ERZ1664521.txt"), + path("$outputDir/arg/rgi/ERZ1664523/ERZ1664523.txt"), + path("$outputDir/arg/rgi/ERZ1664524/ERZ1664524.txt"), + path("$outputDir/arg/rgi/ERZ1664528/ERZ1664528.txt") + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/ERZ1664501/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664503/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664504/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664505/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664506/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664507/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664508/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664509/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664510/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664511/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664515/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664516/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664517/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664518/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664520/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664521/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664523/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664524/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664528/class_a/results_summary.txt") + ).match("fargene") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") }, + + // argNorm + { assert snapshot ( + path("$outputDir/arg/argnorm/deeparg/ERZ1664501.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664503.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664504.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664505.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664506.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664507.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664508.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664509.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664510.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664511.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664515.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664516.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664517.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664518.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664520.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664521.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664523.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664524.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664528.ARG.normalized.tsv"), + ).match("argnorm_deeparg") }, + + { assert snapshot ( + path("$outputDir/arg/argnorm/abricate/ERZ1664501.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664503.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664504.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664505.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664506.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664507.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664508.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664509.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664510.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664511.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664515.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664516.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664517.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664518.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664520.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664521.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664523.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664524.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664528.normalized.tsv"), + ).match("argnorm_abricate") }, + + { assert snapshot ( + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664501.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664503.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664504.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664505.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664506.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664507.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664508.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664509.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664510.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664511.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664515.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664516.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664517.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664518.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664520.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664521.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664523.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664524.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664528.normalized.tsv"), + ).match("argnorm_amrfinderplus") }, + + // BGC workflow + + // antiSMASH + { assert snapshot( + path("$outputDir/bgc/antismash/ERZ1664501/ERZ1664501.gbk").text.contains("ccgcccatat cctttctgtc accgactcgg"), + path("$outputDir/bgc/antismash/ERZ1664503/ERZ1664503.gbk").text.contains("agaaggaacc gagcttgata aaacctatgc"), + path("$outputDir/bgc/antismash/ERZ1664504/ERZ1664504.gbk").text.contains("aggcaatacc ggctccaaca acagcagatt"), + path("$outputDir/bgc/antismash/ERZ1664505/ERZ1664505.gbk").text.contains("agggccacgc acacgggctc ggtgcacccc"), + path("$outputDir/bgc/antismash/ERZ1664506/ERZ1664506.gbk").text.contains("acaggatgga gcattgacta cattctggat"), + path("$outputDir/bgc/antismash/ERZ1664507/ERZ1664507.gbk").text.contains("aaagcaggaa aaagctgata acgcccgccc"), + path("$outputDir/bgc/antismash/ERZ1664508/ERZ1664508.gbk").text.contains("gggccgtttc gcggtaggcc tggttcatat"), + path("$outputDir/bgc/antismash/ERZ1664509/ERZ1664509.gbk").text.contains("aagcagtggg tctaaggcga agtcataccc"), + path("$outputDir/bgc/antismash/ERZ1664510/ERZ1664510.gbk").text.contains("ttcgcgataa agcgttccaa tggggatgag"), + path("$outputDir/bgc/antismash/ERZ1664511/ERZ1664511.gbk").text.contains("tttttgggaa cggcgtccgt tctcaaagag"), + path("$outputDir/bgc/antismash/ERZ1664515/ERZ1664515.gbk").text.contains("tattgcaaac atatcaagca ccttcccttc"), + path("$outputDir/bgc/antismash/ERZ1664516/ERZ1664516.gbk").text.contains("aactccctgg ttgaaccggc cgtaatactt"), + path("$outputDir/bgc/antismash/ERZ1664517/ERZ1664517.gbk").text.contains("gacgctttct ttcagaaacg ttttcccctt"), + path("$outputDir/bgc/antismash/ERZ1664518/ERZ1664518.gbk").text.contains("cagcgcataa gtctgcgtca cctgtcccag"), + path("$outputDir/bgc/antismash/ERZ1664520/ERZ1664520.gbk").text.contains("ggagtttttg cgctttgacc gccacgggga"), + path("$outputDir/bgc/antismash/ERZ1664521/ERZ1664521.gbk").text.contains("ggtaaaggaa accatccggc ggccaatctg"), + path("$outputDir/bgc/antismash/ERZ1664523/ERZ1664523.gbk").text.contains("gctgtgggga tgggtaagcg aggatgatgc"), + path("$outputDir/bgc/antismash/ERZ1664524/ERZ1664524.gbk").text.contains("ataatggctt cttttataaa tgcataaatt"), + path("$outputDir/bgc/antismash/ERZ1664528/ERZ1664528.gbk").text.contains("cagaaagaag aaaaacgcct gacttgggcg") + ).match("antismash") }, + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/ERZ1664504/ERZ1664504.220-NODE-220-length-4587-cov-2.552957_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664504/ERZ1664504.48-NODE-48-length-9582-cov-5.239425_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664506/ERZ1664506.42-NODE-42-length-11967-cov-6.006380_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664508/ERZ1664508.3061-NODE-3061-length-1263-cov-3.647351_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664510/ERZ1664510.123-NODE-123-length-8863-cov-8.649410_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664510/ERZ1664510.210-NODE-210-length-5173-cov-7.860688_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664511/ERZ1664511.16-NODE-16-length-49668-cov-9.810473_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664515/ERZ1664515.9-NODE-9-length-49063-cov-10.926196_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664516/ERZ1664516.158-NODE-158-length-6232-cov-9.863850_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664517/ERZ1664517.38-NODE-38-length-19981-cov-8.613771_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664518/ERZ1664518.217-NODE-217-length-4457-cov-6.415947_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664521/ERZ1664521.1871-NODE-1871-length-1473-cov-1.858251_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664521/ERZ1664521.895-NODE-895-length-1964-cov-2.221058_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664523/ERZ1664523.16-NODE-16-length-15072-cov-6.654591_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664524/ERZ1664524.1150-NODE-1150-length-2386-cov-3.450879_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664524/ERZ1664524.54-NODE-54-length-9607-cov-5.345582_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664528/ERZ1664528.138-NODE-138-length-5805-cov-4.599304_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664528/ERZ1664528.1641-NODE-1641-length-2049-cov-4.697091_cluster_1.gbk") + ).match("gecco") }, + + // comBGC + { assert snapshot("$outputDir/reports/combgc/combgc_complete_summary.tsv") } + ) + } + } +} diff --git a/tests/test_full.nf.test.snap b/tests/test_full.nf.test.snap new file mode 100644 index 00000000..296fd4b5 --- /dev/null +++ b/tests/test_full.nf.test.snap @@ -0,0 +1,366 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,69e71df9685cbd70579ac0030f624ca4" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:15.219521746" + }, + "deeparg": { + "content": [ + "ERZ1664501.mapping.ARG:md5,a4641ca28291c73f4ce664af575a4811", + "ERZ1664503.mapping.ARG:md5,c217c176a55170cf7f342dd6b082bec6", + "ERZ1664504.mapping.ARG:md5,2f2a6297dc8752766f65f2b3e966158d", + "ERZ1664505.mapping.ARG:md5,be741b562796026b46f649c8fbe6e73d", + "ERZ1664506.mapping.ARG:md5,17122078c5b1821ea9d841eb1775e987", + "ERZ1664507.mapping.ARG:md5,6dbd85abafa0f892c4b557eb8f93b788", + "ERZ1664508.mapping.ARG:md5,9a824269207740c926eb7d1ade69cd89", + "ERZ1664509.mapping.ARG:md5,fa8ffd39d8405bd167bb1676b5a29db7", + "ERZ1664510.mapping.ARG:md5,16f2e41c2378645dbbdf9867f9000acf", + "ERZ1664511.mapping.ARG:md5,7f2bd09ed161162a82c3c10c06bf1ee8", + "ERZ1664515.mapping.ARG:md5,a4bb295a414b9a26c4e2d032bb25624f", + "ERZ1664516.mapping.ARG:md5,e3ec14da3e206782e1151593d801015d", + "ERZ1664517.mapping.ARG:md5,9f22fec9df39231f0a52865ca9245451", + "ERZ1664518.mapping.ARG:md5,821fd592c54795e2666277e3a2c84169", + "ERZ1664520.mapping.ARG:md5,24942a7fadad6af031c116e8f9ea848e", + "ERZ1664521.mapping.ARG:md5,d61ee33f0395ab5dbb6b65f816186d77", + "ERZ1664523.mapping.ARG:md5,2ba512cfd091a9ab18825cd4d9560a83", + "ERZ1664524.mapping.ARG:md5,d3fd9b70a667f37478c901c4ec5c69be", + "ERZ1664528.mapping.ARG:md5,1da3f34f173fabe34ff5bc122d9ec7e8" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:11.887062467" + }, + "ampir": { + "content": [ + "ERZ1664501.ampir.tsv:md5,ef78b10c6f4c6a555eb9ef8d93291aad", + "ERZ1664503.ampir.tsv:md5,7354b3c583443d9d3cab33ce3cb6327e", + "ERZ1664504.ampir.tsv:md5,506a55b7dc97b33b5533febabde8b0cf", + "ERZ1664505.ampir.tsv:md5,27f72ca9213aac5b0d857df638db692b", + "ERZ1664506.ampir.tsv:md5,1c5df79369a889b136cdef0e89f3f120", + "ERZ1664507.ampir.tsv:md5,a44751ce33a3384583dd43795d729245", + "ERZ1664508.ampir.tsv:md5,a9267c72360b01116bc61ab360f01ab4", + "ERZ1664509.ampir.tsv:md5,c0d8115529d6d8ee4989bd1e9dfe5766", + "ERZ1664510.ampir.tsv:md5,2a6d6563be682769a83208fe025ed946", + "ERZ1664511.ampir.tsv:md5,b96317516b603ea796d58366693e6b96", + "ERZ1664515.ampir.tsv:md5,9fbeb531294e951336e4c91257d44e30", + "ERZ1664516.ampir.tsv:md5,44dcbd1371c1fcfe0e98e756d6a74996", + "ERZ1664517.ampir.tsv:md5,35a42d7aabc1edef65a0c0f2129530bc", + "ERZ1664518.ampir.tsv:md5,c7c9157000642e158b6746c719d65a85", + "ERZ1664520.ampir.tsv:md5,62f2e109031048fc593aa525405a19b4", + "ERZ1664521.ampir.tsv:md5,91bebaf23d2a63192359178af8ae1d42", + "ERZ1664523.ampir.tsv:md5,1e01f9649dc2e9bebd8ce635e051e3df", + "ERZ1664524.ampir.tsv:md5,8ea8ca6483c416695ad2307e7a939f8d", + "ERZ1664528.ampir.tsv:md5,a239169a2cd41265693442694bb5e329", + "ERZ1664501.ampir.faa:md5,88d04f76764566e029f1a0eb7481bd50", + "ERZ1664503.ampir.faa:md5,754b00982b99d20d24ddd2c39e3db060", + "ERZ1664504.ampir.faa:md5,c6e84c9ee141c097decb89def230a70b", + "ERZ1664505.ampir.faa:md5,7519e8f28ca3c3e8b33e65a672b6f418", + "ERZ1664506.ampir.faa:md5,39162c25303085463d893acee70c2921", + "ERZ1664507.ampir.faa:md5,8119bbc3daa1fc93cf3760b359001212", + "ERZ1664508.ampir.faa:md5,369131964554c5d7b7b56a99a4eeb851", + "ERZ1664509.ampir.faa:md5,2594cd39d2d0cf96d303955528e9c962", + "ERZ1664510.ampir.faa:md5,9bf556234e1a9843d7155118cb8b6afb", + "ERZ1664511.ampir.faa:md5,5ddc4c648af3db91d1aba27527c13622", + "ERZ1664515.ampir.faa:md5,a7830a1af51b290793af9ac83d8c3296", + "ERZ1664516.ampir.faa:md5,6c5b07f03f6f1bc55a44e0a8cbc18cb3", + "ERZ1664517.ampir.faa:md5,2c59abb9b9bfc690f97fefe10a6bc4ce", + "ERZ1664518.ampir.faa:md5,7f5519edb85db37903f3665541219c69", + "ERZ1664520.ampir.faa:md5,f3314a405c3c33e05722a8ab6021cb64", + "ERZ1664521.ampir.faa:md5,139303c88f5f5a4041ee059519ba7f98", + "ERZ1664523.ampir.faa:md5,fb34351d27a405e4a9968664878a0fd4", + "ERZ1664524.ampir.faa:md5,e4660c8d3ac00779a26ee2f0105bba2a", + "ERZ1664528.ampir.faa:md5,36fd7ea6830c3068015105d20b4404a7" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:05.572253729" + }, + "argnorm_amrfinderplus": { + "content": [ + "ERZ1664501.normalized.tsv:md5,ef07ef517f4e73b6cfd4155d14f1a459", + "ERZ1664503.normalized.tsv:md5,4a6be3b2878c77c351581283a3c6cb92", + "ERZ1664504.normalized.tsv:md5,8e984c4365df778d75e80c2928bad20d", + "ERZ1664505.normalized.tsv:md5,cbe3ce3a810cc6c628268617d2924f51", + "ERZ1664506.normalized.tsv:md5,14225d75f1af11a6b667d1a80a14e9d4", + "ERZ1664507.normalized.tsv:md5,8febe711ddd369571c5dd071d77fdbeb", + "ERZ1664508.normalized.tsv:md5,973d098a82e9d67e87a1bd7a2684299a", + "ERZ1664509.normalized.tsv:md5,e0a387b6727320a712e204af4776bd79", + "ERZ1664510.normalized.tsv:md5,949b8524b11e281d53fa67037a346497", + "ERZ1664511.normalized.tsv:md5,810ff27b0c8664f2350ade9e76095574", + "ERZ1664515.normalized.tsv:md5,64847a921608f2b37ecfbc324fec1cb1", + "ERZ1664516.normalized.tsv:md5,d25d1d43562344b463802dc5dfaccf52", + "ERZ1664517.normalized.tsv:md5,4d8e73eccd1001ebc6225167df6a2374", + "ERZ1664518.normalized.tsv:md5,f8b744ae41b1d0ba101ae9a228529d05", + "ERZ1664520.normalized.tsv:md5,f036a7211ad6df9b874bad4c99c5ddda", + "ERZ1664521.normalized.tsv:md5,e41c50f9524dfdde17bf782dfc6c7eea", + "ERZ1664523.normalized.tsv:md5,8590e4c5437121a93f527f55125291c5", + "ERZ1664524.normalized.tsv:md5,951d29c42bd2890bc1a28d91a3f9bb84", + "ERZ1664528.normalized.tsv:md5,52495202c208557c2c9ee0c7b7ef5497" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-19T17:43:22.768320462" + }, + "argnorm_abricate": { + "content": [ + "ERZ1664501.normalized.tsv:md5,618aa19dcaed5d3a5909cb84393d90cb", + "ERZ1664503.normalized.tsv:md5,f2ee4aeafc929e3893677c271b3a04d4", + "ERZ1664504.normalized.tsv:md5,88b9a76d726402b95b2cd348459f0666", + "ERZ1664505.normalized.tsv:md5,817218f39d51d6f327623b26512e2e2d", + "ERZ1664506.normalized.tsv:md5,66806a70c95b2186f085f27661639738", + "ERZ1664507.normalized.tsv:md5,47e109f815e4b8e8d28aaeb75e4947b7", + "ERZ1664508.normalized.tsv:md5,60990fe382f0b43a288a8f66bcbde19f", + "ERZ1664509.normalized.tsv:md5,9710235350f4ff66c06b4abb78c23f80", + "ERZ1664510.normalized.tsv:md5,520f6eff7afdc9c52b9a1a8bb363fe85", + "ERZ1664511.normalized.tsv:md5,893ce88576218cd6acb246046eadb1af", + "ERZ1664515.normalized.tsv:md5,f88c35c590379f3a8a62664679d7404b", + "ERZ1664516.normalized.tsv:md5,b0499afcad11e34f3224e58431fd1aff", + "ERZ1664517.normalized.tsv:md5,79d79caa0a5a87a8dfb48eb67e4bf3f1", + "ERZ1664518.normalized.tsv:md5,8cee92e968b380c2c1ab6b5707608092", + "ERZ1664520.normalized.tsv:md5,4ba7e4daeeaf7f5d064131a742225152", + "ERZ1664521.normalized.tsv:md5,b724f087cc957400a5ff404a11535e29", + "ERZ1664523.normalized.tsv:md5,6cbe41ccfb7660e70aec3b711a33e18d", + "ERZ1664524.normalized.tsv:md5,658d14b9cfd773bc0ada2da2f44252cd", + "ERZ1664528.normalized.tsv:md5,a3db7b884b5fe91a59cf55c332fd0337" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:44.957353948" + }, + "gecco": { + "content": [ + "ERZ1664504.220-NODE-220-length-4587-cov-2.552957_cluster_1.gbk:md5,261a667aef6a1fed0aab0a1e6c4c396f", + "ERZ1664504.48-NODE-48-length-9582-cov-5.239425_cluster_1.gbk:md5,1b0d356ed26b09b3c62e7bf06b003c1a", + "ERZ1664506.42-NODE-42-length-11967-cov-6.006380_cluster_1.gbk:md5,1c2e2d8033286841b412399039bbfd24", + "ERZ1664508.3061-NODE-3061-length-1263-cov-3.647351_cluster_1.gbk:md5,fc75b2afba00fa0c08fc677920c3dab5", + "ERZ1664510.123-NODE-123-length-8863-cov-8.649410_cluster_1.gbk:md5,1ebcc9f338bf402483f671b9e641b9f3", + "ERZ1664510.210-NODE-210-length-5173-cov-7.860688_cluster_1.gbk:md5,e11a7207dae596faa24f3ccc3bd4078d", + "ERZ1664511.16-NODE-16-length-49668-cov-9.810473_cluster_1.gbk:md5,611d4b75a1206df0ced28fb49de5d970", + "ERZ1664515.9-NODE-9-length-49063-cov-10.926196_cluster_1.gbk:md5,b4c89821cb6f28be3408e88490d38ae9", + "ERZ1664516.158-NODE-158-length-6232-cov-9.863850_cluster_1.gbk:md5,110480bf384c530e7aff3ad42be5e9fd", + "ERZ1664517.38-NODE-38-length-19981-cov-8.613771_cluster_1.gbk:md5,e2adfe2599cc481c84ef41167ef0192e", + "ERZ1664518.217-NODE-217-length-4457-cov-6.415947_cluster_1.gbk:md5,2c34b0b6e3611bba535afdea3b5d8f5a", + "ERZ1664521.1871-NODE-1871-length-1473-cov-1.858251_cluster_1.gbk:md5,9b91e8a5adc522ffa4a5fc47a2fbb570", + "ERZ1664521.895-NODE-895-length-1964-cov-2.221058_cluster_1.gbk:md5,f39ce0627a18c84feba727596b5e9b69", + "ERZ1664523.16-NODE-16-length-15072-cov-6.654591_cluster_1.gbk:md5,4e1c5e95f7d4c6e1e61a8ceddfa3137e", + "ERZ1664524.1150-NODE-1150-length-2386-cov-3.450879_cluster_1.gbk:md5,78b7101cad30b392a7bbf6d9be7c5152", + "ERZ1664524.54-NODE-54-length-9607-cov-5.345582_cluster_1.gbk:md5,56a8f6598d928e7514ab2a5ab663f076", + "ERZ1664528.138-NODE-138-length-5805-cov-4.599304_cluster_1.gbk:md5,096bf5dc83df18507982bd9b3dc0cf72", + "ERZ1664528.1641-NODE-1641-length-2049-cov-4.697091_cluster_1.gbk:md5,c122763612b7cbe1967d98784cb11273" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-19T17:43:36.771956416" + }, + "antismash": { + "content": [ + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:59.889179408" + }, + "argnorm_deeparg": { + "content": [ + "ERZ1664501.ARG.normalized.tsv:md5,b1112b8509e77e01b6810b71c9ab4cd2", + "ERZ1664503.ARG.normalized.tsv:md5,b736252e5dafbea27284d73650a1fae2", + "ERZ1664504.ARG.normalized.tsv:md5,6e8daa74f53ed59b99e2e281153a2a1b", + "ERZ1664505.ARG.normalized.tsv:md5,1ca35eca67e9d8cb61acaf80a0b27425", + "ERZ1664506.ARG.normalized.tsv:md5,2bba2c688159baff5b48d7547d330444", + "ERZ1664507.ARG.normalized.tsv:md5,63e96001b1ab9a64724f4c3c38c21004", + "ERZ1664508.ARG.normalized.tsv:md5,59764d22d08c34e3a5cefd682257b5f6", + "ERZ1664509.ARG.normalized.tsv:md5,86730f7950d84ef4a48e2042d92d9abc", + "ERZ1664510.ARG.normalized.tsv:md5,361e6f9a96d923f97d685df86492068a", + "ERZ1664511.ARG.normalized.tsv:md5,87628e85f45fd91c51c4fa1fe40a4150", + "ERZ1664515.ARG.normalized.tsv:md5,4e38fecd8c8ad0242e1b1907072af64b", + "ERZ1664516.ARG.normalized.tsv:md5,79ef10afc7673dcc633861d1e5871b24", + "ERZ1664517.ARG.normalized.tsv:md5,abed6aef4acab35851fb2e12f276a9e0", + "ERZ1664518.ARG.normalized.tsv:md5,6d8c2154cad737d01eceb497ee3482b3", + "ERZ1664520.ARG.normalized.tsv:md5,fd60cd7748be9074357033907053a0b0", + "ERZ1664521.ARG.normalized.tsv:md5,d4a368c0125cad652e07065516da794b", + "ERZ1664523.ARG.normalized.tsv:md5,6473552807041db9b4fd0cd17a81659c", + "ERZ1664524.ARG.normalized.tsv:md5,03840f3b0030f196bd890fb1e576d952", + "ERZ1664528.ARG.normalized.tsv:md5,473d63c133be0c8d402af3bcf0fbfda9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-19T17:43:22.511288416" + }, + "macrel": { + "content": [ + "ERZ1664501.macrel.prediction.gz:md5,a553cb5d0745a01816c7b9c75822df29", + "ERZ1664503.macrel.prediction.gz:md5,3ca2cd9a6d999725b4a487c13ddb7fd9", + "ERZ1664504.macrel.prediction.gz:md5,52e9c1ec54f486765dea07d97b1c97b7", + "ERZ1664505.macrel.prediction.gz:md5,94fa17cce88549aab16555ee598c02bb", + "ERZ1664506.macrel.prediction.gz:md5,732be08d0236cf63641ef445a02cf1ee", + "ERZ1664507.macrel.prediction.gz:md5,eab2d426cf31a47597e61ddb25bf3d49", + "ERZ1664508.macrel.prediction.gz:md5,a0e40483e0136c3bb4abea9c9cba7d4b", + "ERZ1664509.macrel.prediction.gz:md5,47ca65c261cf402b390f6951bb1ed9dc", + "ERZ1664510.macrel.prediction.gz:md5,3e6ca785e579757616fe515efef1537e", + "ERZ1664511.macrel.prediction.gz:md5,df36fa0448591690fc6b7ded2517256e", + "ERZ1664515.macrel.prediction.gz:md5,b896ac50f6ebec1e725cff67bdff8fed", + "ERZ1664516.macrel.prediction.gz:md5,5dcbc87c6a44e8783dfe48b6385dfca8", + "ERZ1664517.macrel.prediction.gz:md5,02373e1b4383dc7501e7e142c9443b7a", + "ERZ1664518.macrel.prediction.gz:md5,7290477960af29a76563e8ded5d4a623", + "ERZ1664520.macrel.prediction.gz:md5,bdf1a379ee49e34b9a448762e5301926", + "ERZ1664521.macrel.prediction.gz:md5,91bd81f6c4e5c8ff4cc684ec04fa0a30", + "ERZ1664523.macrel.prediction.gz:md5,cee5ae9ba4a8a3879ab245b767815394", + "ERZ1664524.macrel.prediction.gz:md5,73f21254f4e4056a71ebf43851af3698", + "ERZ1664528.macrel.prediction.gz:md5,2e957f217c570a58ee61d97c690c1424" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:08.697655199" + }, + "abricate": { + "content": [ + "ERZ1664501.txt:md5,1ace32d2d44194d635db31daa89a6fae", + "ERZ1664503.txt:md5,da890e2cef1b1a34ec035f6198f0a60b", + "ERZ1664504.txt:md5,3db1864a6efe5321379e89dcee34d505", + "ERZ1664505.txt:md5,317354f6091bad44ab5852399d48eb4a", + "ERZ1664506.txt:md5,8fcc568d6a15d1c3ac889169ce884093", + "ERZ1664507.txt:md5,0be8f76b35aca900f8f7fa6d2e7fc1f9", + "ERZ1664508.txt:md5,357da5e192d9a17b501446e181f41942", + "ERZ1664509.txt:md5,c14f4aef2c96e8c4f6688af35fe07a2c", + "ERZ1664510.txt:md5,0d88060e28b267a308271a2a2af38b12", + "ERZ1664511.txt:md5,46adf11d5d3952e3709ba05ec76b5e8a", + "ERZ1664515.txt:md5,4b7a0db47ac6e9baf723e6b2ef31bfc4", + "ERZ1664516.txt:md5,1ccfd94077fe329f7b30351aa846b327", + "ERZ1664517.txt:md5,8137ab84373a5300c9626a9459a2c935", + "ERZ1664518.txt:md5,db514f4bef8de0d4799f478e1807adc6", + "ERZ1664520.txt:md5,a3afa2368c941cdb0c4abd8efa855f0e", + "ERZ1664521.txt:md5,2849a78188c4793d4608ba1775da1d58", + "ERZ1664523.txt:md5,507e1710e7220965010ad8375b4c434a", + "ERZ1664524.txt:md5,b7d380fe3fbcb0fe2ac23823cb6b35e8", + "ERZ1664528.txt:md5,64aff1aaaab8b3d009edd40527446c08" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:25.215727223" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,3061d75086b3d25605eda0ea05e1c927", + "results_summary.txt:md5,ea00de6524c521e06583ee13ffbcf338", + "results_summary.txt:md5,c5a6205a249cb6112b6235bbab51c60d", + "results_summary.txt:md5,c5a6205a249cb6112b6235bbab51c60d", + "results_summary.txt:md5,cc647b7810c7335edb6aa939f9f0fbde", + "results_summary.txt:md5,fa58a7f510100be20ce22fe3f6b036e3", + "results_summary.txt:md5,33b51ce0c8ba7c65bdb8bfe1480d85cb", + "results_summary.txt:md5,fa58a7f510100be20ce22fe3f6b036e3", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,657d012f697a1a9e3ce7f8a0f675aed0", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,c5a6205a249cb6112b6235bbab51c60d", + "results_summary.txt:md5,33b51ce0c8ba7c65bdb8bfe1480d85cb", + "results_summary.txt:md5,54ba6a1a657fea6b78abac50820c2c24", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,cc647b7810c7335edb6aa939f9f0fbde" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:34.896363945" + }, + "rgi": { + "content": [ + "ERZ1664501.txt:md5,631580295a01dfa9942a84ec6daddb7e", + "ERZ1664503.txt:md5,19b414d2f84d99e7da99558f13ddc3e5", + "ERZ1664504.txt:md5,35cfd6af482966669d98a65b56331a3d", + "ERZ1664505.txt:md5,d9f983090909140617bc784635220c4b", + "ERZ1664506.txt:md5,7a52f37f5672b06b05741ee058391f8f", + "ERZ1664507.txt:md5,721b11a0d9a16cbcbfd9004478b00600", + "ERZ1664508.txt:md5,b216d24eb67e17b00176fd0e9fddee2d", + "ERZ1664509.txt:md5,a83a12f5eee2817adde168ceea2918c5", + "ERZ1664510.txt:md5,648ff158c4eb26a5ea32d784f035919e", + "ERZ1664511.txt:md5,9bae24f90a3ec78bf949a98fdf22a497", + "ERZ1664515.txt:md5,2d0d0b2f048fa6c28840b1b6a2c9454d", + "ERZ1664516.txt:md5,eb69d148d8dad471c8d9a36dd915f4a4", + "ERZ1664517.txt:md5,79b0f80950eb5f0f51542b394a77a173", + "ERZ1664518.txt:md5,887de51b7632b0c635b0fe6deda75266", + "ERZ1664520.txt:md5,3caf2e1b0afcbfb73522bfa1cee1d06e", + "ERZ1664521.txt:md5,19334a653a98bbced73f1f2ec92e4eb8", + "ERZ1664523.txt:md5,0e47ce5c41d4d0d39d270a18ce62773a", + "ERZ1664524.txt:md5,8ca49d7dee9c7de25910de130de93859", + "ERZ1664528.txt:md5,6a10752196f2f33bcee972d15d669803" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:31.62460906" + }, + "amrfinderplus": { + "content": [ + "ERZ1664501.tsv:md5,dd81ffbf4ceddfd08df9c93d350d19fa", + "ERZ1664503.tsv:md5,7b1e3c4be2e369a2ca04fcd63da1acaa", + "ERZ1664504.tsv:md5,34f751f50617c9612b2e84ee61182ace", + "ERZ1664505.tsv:md5,195ee9875c095324bf9da03627551f71", + "ERZ1664506.tsv:md5,ab833bb2d72e4165130c590feeb81abc", + "ERZ1664507.tsv:md5,a416a831bcc9f2334064c45b04b65893", + "ERZ1664508.tsv:md5,7bf5af85e96374b92dec02986f55cd29", + "ERZ1664509.tsv:md5,90090405b63b9e4e6b115ad4d7658681", + "ERZ1664510.tsv:md5,5cf184c3f55fca8b2ab74fd8e2c68c8b", + "ERZ1664511.tsv:md5,caac6335c1ef383c33173a8a627c0a95", + "ERZ1664515.tsv:md5,cc8b7e5d2df434729a08b0aabefba91c", + "ERZ1664516.tsv:md5,1a92c5bec7ff819a6f830a1726894f7c", + "ERZ1664517.tsv:md5,d8c4989f198d6853e35820da21feffe2", + "ERZ1664518.tsv:md5,709d6bfb280c509b74f3c1b4d8a1c4bc", + "ERZ1664520.tsv:md5,2367abb0f961e00bf8dcdfe7e6083c2c", + "ERZ1664521.tsv:md5,12f6aee5987e86669534d3b64a62a840", + "ERZ1664523.tsv:md5,ba69795aaea671108bfa1e48c509dd79", + "ERZ1664524.tsv:md5,b0aa6f732ca2b922d2291deaba0d1312", + "ERZ1664528.tsv:md5,93249b05df4a0587db305684da8a1f8e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:28.386682111" + } +} \ No newline at end of file diff --git a/tests/test_nothing.nf.test b/tests/test_nothing.nf.test new file mode 100644 index 00000000..a141d401 --- /dev/null +++ b/tests/test_nothing.nf.test @@ -0,0 +1,25 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_nothing" + + test("test_nothing_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + ) + } + } +} diff --git a/tests/test_preannotated.nf.test b/tests/test_preannotated.nf.test new file mode 100644 index 00000000..32a86ac4 --- /dev/null +++ b/tests/test_preannotated.nf.test @@ -0,0 +1,150 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_preannotated" + + test("test_preannotated_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + path("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa"), + path("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa"), + path("$outputDir/amp/ampir/sample_3/sample_3.ampir.tsv").text.contains("IPELEMRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/ampir/sample_3/sample_3.ampir.faa") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + path("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/amplify/sample_3/sample_3.amplify.tsv").text.contains("IPELEMRWGYPLSLVLMALSVAAPMIYFRRKGWLR") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_3/sample_3_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_3.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("NODE_515831_length_303_cov_1.532258_1"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log").text.contains(" \$\$\$\$\$\$\\ \$\$\\ \$\$\\") + ).match("ampcombi") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_3/sample_3.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG").text.contains("#ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG").text.contains("#ARG"), + path("$outputDir/arg/deeparg/sample_3/sample_3.mapping.ARG").text.contains("#ARG") + ).match("deeparg") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_3/sample_3.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_3/sample_3.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + path("$outputDir/arg/abricate/sample_3/sample_3.txt") + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + path("$outputDir/arg/amrfinderplus/sample_3/sample_3.tsv") + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + path("$outputDir/arg/rgi/sample_3/sample_3.txt") + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_3/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_3/class_b_1_2/results_summary.txt") + ).match("fargene") + }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_3/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv").readLines().size()).match("hamronization") }, + + // argNorm + { assert snapshot( + path("$outputDir/arg/argnorm/amrfinderplus/sample_1.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/sample_2.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/sample_3.normalized.tsv") + ).match("argnorm_amrfinderplus") }, + + { assert snapshot( + path("$outputDir/arg/argnorm/deeparg/sample_1.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_1.potential.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_2.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_2.potential.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_3.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_3.potential.ARG.normalized.tsv") + ).match("argnorm_deeparg") }, + + { assert snapshot( + path("$outputDir/arg/argnorm/abricate/sample_1.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/sample_2.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/sample_3.normalized.tsv") + ).match("argnorm_abricate") } + ) + } + } +} diff --git a/tests/test_preannotated.nf.test.snap b/tests/test_preannotated.nf.test.snap new file mode 100644 index 00000000..7f957b19 --- /dev/null +++ b/tests/test_preannotated.nf.test.snap @@ -0,0 +1,181 @@ +{ + "deeparg": { + "content": [ + "sample_1.align.daa.tsv:md5,0e71c37318bdc6cba792196d0455293d", + "sample_2.align.daa.tsv:md5,1092ecd3cd6931653168b46c7afeb9e3", + "sample_3.align.daa.tsv:md5,b79070fe26acd1a10ae3aaf06b0d5901", + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.751995878" + }, + "ampir": { + "content": [ + true, + "sample_1.ampir.faa:md5,ab02c6e9c5f36ba9c31af97f95f9c317", + true, + "sample_2.ampir.faa:md5,12826875bd18623da78770187a7bbd2c", + true, + "sample_3.ampir.faa:md5,0a36691485930a1b77c4b68a738fd98d" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.436374797" + }, + "argnorm_amrfinderplus": { + "content": [ + "sample_1.normalized.tsv:md5,0a7f76ceb606ac46730a51dd57290768", + "sample_2.normalized.tsv:md5,602afce3ee0ee179855c848bd87208fe", + "sample_3.normalized.tsv:md5,d4fb8fbd890217eb4d667d7a4dd80c9b" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:25.55764618" + }, + "argnorm_abricate": { + "content": [ + "sample_1.normalized.tsv:md5,ddd8d454672c57b798f477ca32504a42", + "sample_2.normalized.tsv:md5,0323fc890a8f698ac4b0ac25f5e65964", + "sample_3.normalized.tsv:md5,f71490c27790071bd5974ecc5502cf73" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:23:32.486921338" + }, + "amplify": { + "content": [ + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.483855968" + }, + "argnorm_deeparg": { + "content": [ + "sample_1.ARG.normalized.tsv:md5,26aa409bfd0fc9096f2ac404760cc492", + "sample_1.potential.ARG.normalized.tsv:md5,d6732b4b9765bfa47e27ba673e24b6a4", + "sample_2.ARG.normalized.tsv:md5,1a19b894a7315aaae5f799e4539e6619", + "sample_2.potential.ARG.normalized.tsv:md5,b241e22f9116d8f518ba8526d52ac4dc", + "sample_3.ARG.normalized.tsv:md5,d40d387176649ce80827420fef6a0169", + "sample_3.potential.ARG.normalized.tsv:md5,f331efd21ea143c180a15ae56a5210d3" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:23:32.446555281" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,a4f853b560c6a8c215e0d243c24ec056", + "sample_2.macrel.smorfs.faa.gz:md5,83ae7b9808d7183d87b41c10253c9c9e", + "sample_3.macrel.smorfs.faa.gz:md5,a4f853b560c6a8c215e0d243c24ec056", + "sample_1.macrel.all_orfs.faa.gz:md5,d1ae1cadc3770994b2ed4982aadd5406", + "sample_2.macrel.all_orfs.faa.gz:md5,d9612a4275a912cabdae13b1ccc1857e", + "sample_3.macrel.all_orfs.faa.gz:md5,d1ae1cadc3770994b2ed4982aadd5406", + "sample_1.macrel.prediction.gz:md5,62146cf9f759c9c6c2c2f9e5ba816119", + "sample_2.macrel.prediction.gz:md5,1b479d31bb7dbf636a2028ddef72f5cc", + "sample_3.macrel.prediction.gz:md5,62146cf9f759c9c6c2c2f9e5ba816119", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_3.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T11:50:30.926088397" + }, + "hamronization": { + "content": [ + 246 + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-09-05T10:17:06.711064611" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,427cec26e354ac6b0ab6047ec6621202", + "sample_2.txt:md5,4c140c932a48a22bcd8ae911bda8f4c7", + "sample_3.txt:md5,d6534efe3d03173749d003bf9e624e68" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.87794287" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,2c8a073d2a7938e8aedcc097e6df2aa5", + "results_summary.txt:md5,3b86a5513e89e22a4c8b9279678ce0c0", + "results_summary.txt:md5,2c8a073d2a7938e8aedcc097e6df2aa5", + "results_summary.txt:md5,59f2e69c670d72f0c0a401e0dc90cbeb", + "results_summary.txt:md5,59f2e69c670d72f0c0a401e0dc90cbeb", + "results_summary.txt:md5,59f2e69c670d72f0c0a401e0dc90cbeb" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:25.248986515" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,dde77ae2dc240ee4717d8d33a92dfb66", + "sample_2.txt:md5,0e652d35ef6e9272aa194b55db609e75", + "sample_3.txt:md5,dde77ae2dc240ee4717d8d33a92dfb66" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:25.117843821" + }, + "ampcombi": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.639509225" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,29cfb6f34f420d802eda95c6d9daa361", + "sample_2.tsv:md5,d9b6565167d603a1f07cff2374db8eb2", + "sample_3.tsv:md5,29cfb6f34f420d802eda95c6d9daa361" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.994284774" + } +} \ No newline at end of file diff --git a/tests/test_preannotated_bgc.nf.test b/tests/test_preannotated_bgc.nf.test new file mode 100644 index 00000000..0e9ca618 --- /dev/null +++ b/tests/test_preannotated_bgc.nf.test @@ -0,0 +1,73 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_preannotated_bgc" + + test("test_preannotated_bgc_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert snapshot( + path("$outputDir/bgc/antismash/sample_1/css"), + path("$outputDir/bgc/antismash/sample_2/css"), + path("$outputDir/bgc/antismash/sample_3/css") + ).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_1/sample_1.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert path("$outputDir/bgc/antismash/sample_1/sample_1.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_1/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_1/regions.js").text.contains('PROKKA_859') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_1/sample_1.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('PROKKA_859') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + { assert path("$outputDir/bgc/antismash/sample_3/sample_3.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert path("$outputDir/bgc/antismash/sample_3/sample_3.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_3/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_3/regions.js").text.contains('NODE_1328_length_3730_cov_3.647347') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_3/sample_3.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot( + path("$outputDir/bgc/deepbgc/sample_1/sample_1.bgc.gbk"), + path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk"), + path("$outputDir/bgc/deepbgc/sample_3/sample_3.bgc.gbk") + ).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_1/sample_1.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_1/LOG.txt").text.contains('Saved DeepBGC result to:') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_1/sample_1.full.gbk").text.contains('1 ttcgccagga gtggcgaagc gatgcgaggt') }, // channel: full_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to:') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + { assert path("$outputDir/bgc/deepbgc/sample_3/sample_3.antismash.json").text.contains("NODE_1328_length_3730_cov_3.647347") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_3/LOG.txt").text.contains('Saved DeepBGC result to:') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_3/sample_3.full.gbk").text.contains('1 tgaatctgtt ttaaagcaaa ttgatctcgc') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_1/sample_1.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_1/sample_1.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_3/sample_3.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_3/sample_3.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_preannotated_bgc.nf.test.snap b/tests/test_preannotated_bgc.nf.test.snap new file mode 100644 index 00000000..b05b7921 --- /dev/null +++ b/tests/test_preannotated_bgc.nf.test.snap @@ -0,0 +1,47 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ], + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ], + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:49:00.343547789" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_1.bgc.gbk:md5,e50e429959e9c4bf0c4b97d9dcd54a08", + "sample_2.bgc.gbk:md5,effe3cfc91772eb4e4b50ac46f13a941", + "sample_3.bgc.gbk:md5,c9028aca1282b314d296091e1f0b8e52" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T11:06:00.388012579" + }, + "gecco": { + "content": [ + "sample_1.genes.tsv:md5,804af8236a7148baf8919e3acf30947d", + "sample_1.features.tsv:md5,a84d59fd63e2593dc5872b4f9bb268b2", + "sample_2.genes.tsv:md5,5a2b20c5c1cd821a2af405229c4c0f78", + "sample_2.features.tsv:md5,579a27490188f5bc47a4deb4d1d1b8dc", + "sample_3.genes.tsv:md5,6874723404b3326f0f73e59f03b96837", + "sample_3.features.tsv:md5,490f98655089b3c73f88b93347cca465" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:49:00.44526019" + } +} \ No newline at end of file diff --git a/tests/test_prokka.nf.test b/tests/test_prokka.nf.test new file mode 100644 index 00000000..94e65ae2 --- /dev/null +++ b/tests/test_prokka.nf.test @@ -0,0 +1,108 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_prokka" + + test("test_prokka_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("PROKKA_00019") }, + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log") + ).match("ampcombi_logfiles") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG") + ).match("deeparg_tsv_ARG") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt") + ).match("fargene") }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") }, + ) + } + } +} diff --git a/tests/test_prokka.nf.test.snap b/tests/test_prokka.nf.test.snap new file mode 100644 index 00000000..1ac682b3 --- /dev/null +++ b/tests/test_prokka.nf.test.snap @@ -0,0 +1,126 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,864466b0fb1acfc0e6b3425271f78ecb" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.990722943" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,69af3321b0bc808b7ef85f102395736f", + "sample_2.txt:md5,69af3321b0bc808b7ef85f102395736f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.941179691" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.978326519" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.964420301" + }, + "deeparg_tsv_ARG": { + "content": [ + "sample_1.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.align.daa.tsv:md5,06648de08caca0b7f42eab9576615226", + "sample_1.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9", + "sample_2.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.928505029" + }, + "ampir": { + "content": [ + false, + false, + false, + false + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.879791305" + }, + "ampcombi_logfiles": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + "Ampcombi_parse_tables.log:md5,a00a87b772b12c0d712f3e524cbe3bb2" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T11:10:28.28666354" + }, + "amplify": { + "content": [ + false, + false + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.889521968" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,844bb10e2f84e1a2b2db56eb36391dcf", + "sample_2.macrel.all_orfs.faa.gz:md5,9c0b8b1c3b03d7b20aee0b57103861ab", + "sample_1.macrel.prediction.gz:md5,9553e1dae8a5b912da8d74fa3f1cd9eb", + "sample_2.macrel.prediction.gz:md5,ae155e454eb7abd7c48c06aad9261603", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T11:10:28.238554892" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe", + "sample_2.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.952983609" + } +} \ No newline at end of file diff --git a/tests/test_taxonomy_bakta.nf.test b/tests/test_taxonomy_bakta.nf.test new file mode 100644 index 00000000..5a412fa9 --- /dev/null +++ b/tests/test_taxonomy_bakta.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_taxonomy_bakta" + + test("test_taxonomy_bakta") { + + when { + params { + outdir = "$outputDir" + run_taxa_classification = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // ampir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("KKEJHB_00005"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("KKEJHB_00005"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("KDEMFK_00005"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("KDEMFK_00005") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("KKEJHB_00005"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("KDEMFK_00005") + ).match("amplify") }, + + // Macrel + { assert snapshot ( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // hmmsearch (AMP) + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // AMPcombi + { assert snapshot ( + file("$outputDir/reports/ampcombi2/sample_2/sample_2_ampcombi.tsv").text.contains("KDEMFK_00575"), + ).match("ampcombi") }, + { assert new File("$outputDir/reports/ampcombi2/ampcombi_complete_summary_taxonomy.tsv.gz").exists() }, + + // RGI + { assert snapshot ( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt") + ).match("rgi") }, + + // ABRicate + { assert snapshot ( + file("$outputDir/arg/abricate/sample_1/sample_1.txt").text.contains("COVERAGE_MAP"), + file("$outputDir/arg/abricate/sample_2/sample_2.txt").text.contains("COVERAGE_MAP") + ).match("abricate") }, + + // fARGene + { assert snapshot ( + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + file("$outputDir/arg/fargene/sample_2/class_b_3/results_summary.txt").text.contains("class_B_3.hmm"), + file("$outputDir/arg/fargene/sample_2/tet_efflux/results_summary.txt").text.contains("tet_efflux.hmm") + ).match("fargene") }, + + // hAMRonization + { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() }, + + // antiSMASH + { assert snapshot ( + file("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##"), + file("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") + ).match("antismash") }, + + // GECCO + { assert snapshot ( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv").text.contains("sequence_id"), + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv").text.contains("cluster_probability") + ).match("gecco") }, + + // hmmsearch (BGC) + { assert new File("$outputDir/bgc/hmmer_hmmsearch/sample_2/sample_2_ToyB.txt.gz").exists() } + ) + } + } +} diff --git a/tests/test_taxonomy_bakta.nf.test.snap b/tests/test_taxonomy_bakta.nf.test.snap new file mode 100644 index 00000000..c406c942 --- /dev/null +++ b/tests/test_taxonomy_bakta.nf.test.snap @@ -0,0 +1,111 @@ +{ + "abricate": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "gecco": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "ampcombi": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "antismash": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:06.451885596" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,844bb10e2f84e1a2b2db56eb36391dcf", + "sample_2.macrel.all_orfs.faa.gz:md5,9c0b8b1c3b03d7b20aee0b57103861ab", + "sample_1.macrel.prediction.gz:md5,9553e1dae8a5b912da8d74fa3f1cd9eb", + "sample_2.macrel.prediction.gz:md5,ae155e454eb7abd7c48c06aad9261603", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + } +} \ No newline at end of file diff --git a/tests/test_taxonomy_prokka.nf.test b/tests/test_taxonomy_prokka.nf.test new file mode 100644 index 00000000..e0992dbf --- /dev/null +++ b/tests/test_taxonomy_prokka.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_taxonomy_prokka" + + test("test_taxonomy_prokka") { + + when { + params { + outdir = "$outputDir" + run_taxa_classification = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // ampir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("PROKKA_00001"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("PROKKA_00001"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("PROKKA_00001"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("PROKKA_00001") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("PROKKA_00001"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("PROKKA_00001") + ).match("amplify") }, + + // Macrel + { assert snapshot ( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // hmmsearch (AMP) + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // AMPcombi + { assert snapshot ( + file("$outputDir/reports/ampcombi2/sample_2/sample_2_ampcombi.tsv").text.contains("PROKKA_00109"), + ).match("ampcombi") }, + { assert new File("$outputDir/reports/ampcombi2/ampcombi_complete_summary_taxonomy.tsv.gz").exists() }, + + // RGI + { assert snapshot ( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt") + ).match("rgi") }, + + // ABRicate + { assert snapshot ( + file("$outputDir/arg/abricate/sample_1/sample_1.txt").text.contains("COVERAGE_MAP"), + file("$outputDir/arg/abricate/sample_2/sample_2.txt").text.contains("COVERAGE_MAP") + ).match("abricate") }, + + // fARGene + { assert snapshot ( + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + file("$outputDir/arg/fargene/sample_2/class_b_3/results_summary.txt").text.contains("class_B_3.hmm"), + file("$outputDir/arg/fargene/sample_2/tet_efflux/results_summary.txt").text.contains("tet_efflux.hmm") + ).match("fargene") }, + + // hAMRonization + { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() }, + + // antiSMASH + { assert snapshot ( + file("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##"), + file("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") + ).match("antismash") }, + + // GECCO + { assert snapshot ( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv").text.contains("sequence_id"), + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv").text.contains("cluster_probability") + ).match("gecco") }, + + // hmmsearch (BGC) + { assert new File("$outputDir/bgc/hmmer_hmmsearch/sample_2/sample_2_ToyB.txt.gz").exists() } + ) + } + } +} diff --git a/tests/test_taxonomy_prokka.nf.test.snap b/tests/test_taxonomy_prokka.nf.test.snap new file mode 100644 index 00000000..c00c3286 --- /dev/null +++ b/tests/test_taxonomy_prokka.nf.test.snap @@ -0,0 +1,111 @@ +{ + "abricate": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "gecco": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "ampcombi": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "antismash": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:06.451885596" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,844bb10e2f84e1a2b2db56eb36391dcf", + "sample_2.macrel.all_orfs.faa.gz:md5,9c0b8b1c3b03d7b20aee0b57103861ab", + "sample_1.macrel.prediction.gz:md5,9553e1dae8a5b912da8d74fa3f1cd9eb", + "sample_2.macrel.prediction.gz:md5,ae155e454eb7abd7c48c06aad9261603", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + } +} \ No newline at end of file diff --git a/tests/test_taxonomy_pyrodigal.nf.test b/tests/test_taxonomy_pyrodigal.nf.test new file mode 100644 index 00000000..3cc5535e --- /dev/null +++ b/tests/test_taxonomy_pyrodigal.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_taxonomy_pyrodigal" + + test("test_taxonomy_pyrodigal") { + + when { + params { + outdir = "$outputDir" + run_taxa_classification = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // ampir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // Macrel + { assert snapshot ( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // hmmsearch (AMP) + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // AMPcombi + { assert snapshot ( + file("$outputDir/reports/ampcombi2/sample_2/sample_2_ampcombi.tsv").text.contains("NODE_515831_length_303_cov_1.532258_1"), + ).match("ampcombi") }, + { assert new File("$outputDir/reports/ampcombi2/ampcombi_complete_summary_taxonomy.tsv.gz").exists() }, + + // RGI + { assert snapshot ( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt") + ).match("rgi") }, + + // ABRicate + { assert snapshot ( + file("$outputDir/arg/abricate/sample_1/sample_1.txt").text.contains("COVERAGE_MAP"), + file("$outputDir/arg/abricate/sample_2/sample_2.txt").text.contains("COVERAGE_MAP") + ).match("abricate") }, + + // fARGene + { assert snapshot ( + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + file("$outputDir/arg/fargene/sample_2/class_b_3/results_summary.txt").text.contains("class_B_3.hmm"), + file("$outputDir/arg/fargene/sample_2/tet_efflux/results_summary.txt").text.contains("tet_efflux.hmm") + ).match("fargene") }, + + // hAMRonization + { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() }, + + // antiSMASH + { assert snapshot ( + file("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##"), + file("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") + ).match("antismash") }, + + // GECCO + { assert snapshot ( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv").text.contains("sequence_id"), + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv").text.contains("cluster_probability") + ).match("gecco") }, + + // hmmsearch (BGC) + { assert new File("$outputDir/bgc/hmmer_hmmsearch/sample_2/sample_2_ToyB.txt.gz").exists() } + ) + } + } +} diff --git a/tests/test_taxonomy_pyrodigal.nf.test.snap b/tests/test_taxonomy_pyrodigal.nf.test.snap new file mode 100644 index 00000000..9cde9d2a --- /dev/null +++ b/tests/test_taxonomy_pyrodigal.nf.test.snap @@ -0,0 +1,111 @@ +{ + "abricate": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "gecco": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "ampcombi": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "antismash": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:06.451885596" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,844bb10e2f84e1a2b2db56eb36391dcf", + "sample_2.macrel.all_orfs.faa.gz:md5,9c0b8b1c3b03d7b20aee0b57103861ab", + "sample_1.macrel.prediction.gz:md5,9553e1dae8a5b912da8d74fa3f1cd9eb", + "sample_2.macrel.prediction.gz:md5,ae155e454eb7abd7c48c06aad9261603", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "README.md:md5,cf088d9256ff7b7730699f17b64b4028", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-18T11:35:52.952483937" + } +} \ No newline at end of file diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index fafc69bf..e2e625f2 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -3,12 +3,39 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_funcscan_pipeline' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_funcscan_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { ANNOTATION } from '../subworkflows/local/annotation' +include { AMP } from '../subworkflows/local/amp' +include { ARG } from '../subworkflows/local/arg' +include { BGC } from '../subworkflows/local/bgc' +include { TAXA_CLASS } from '../subworkflows/local/taxa_class' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { GUNZIP as GUNZIP_INPUT_PREP } from '../modules/nf-core/gunzip/main' +include { SEQKIT_SEQ } from '../modules/nf-core/seqkit/seq/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -17,21 +44,281 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_func */ workflow FUNCSCAN { - take: ch_samplesheet // channel: samplesheet read in from --input + main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - // - // MODULE: Run FastQC - // - FASTQC ( - ch_samplesheet - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + + + // Some tools require uncompressed input + ch_input_prep = ch_samplesheet + .map { meta, fasta, faa, gbk -> [meta + [category: 'all'], [fasta, faa, gbk]] } + .transpose() + .branch { + compressed: it[1].toString().endsWith('.gz') + uncompressed: it[1] + } + + GUNZIP_INPUT_PREP(ch_input_prep.compressed) + ch_versions = ch_versions.mix(GUNZIP_INPUT_PREP.out.versions) + + // Merge all the already uncompressed and newly compressed FASTAs here into + // a single input channel for downstream + ch_intermediate_input = GUNZIP_INPUT_PREP.out.gunzip + .mix(ch_input_prep.uncompressed) + .groupTuple() + .map { meta, files -> + def fasta_found = files.find { it.toString().tokenize('.').last().matches('fasta|fas|fna|fa') } + def faa_found = files.find { it.toString().endsWith('.faa') } + def gbk_found = files.find { it.toString().tokenize('.').last().matches('gbk|gbff') } + def fasta = fasta_found != null ? fasta_found : [] + def faa = faa_found != null ? faa_found : [] + def gbk = gbk_found != null ? gbk_found : [] + + [meta, fasta, faa, gbk] + } + .branch { meta, fasta, faa, gbk -> + preannotated: gbk != [] + fastas: true + } + + // Duplicate and filter the duplicated file for long contigs only for BGC + // This is to speed up BGC run and prevent 'no hits found' fails + if (params.run_bgc_screening) { + SEQKIT_SEQ(ch_intermediate_input.fastas.map { meta, fasta, faa, gbk -> [meta, fasta] }) + ch_input_for_annotation = ch_intermediate_input.fastas + .map { meta, fasta, protein, gbk -> [meta, fasta] } + .mix(SEQKIT_SEQ.out.fastx.map { meta, fasta -> [meta + [category: 'long'], fasta] }) + .filter { meta, fasta -> + if (fasta != [] && fasta.isEmpty()) { + log.warn("[nf-core/funcscan] Sample ${meta.id} does not have contigs longer than ${params.bgc_mincontiglength} bp. Will not be screened for BGCs.") + } + !fasta.isEmpty() + } + ch_versions = ch_versions.mix(SEQKIT_SEQ.out.versions) + } + else { + ch_input_for_annotation = ch_intermediate_input.fastas.map { meta, fasta, protein, gbk -> [meta, fasta] } + } + + /* + ANNOTATION + */ + + // Some tools require annotated FASTAs + if ((params.run_arg_screening && !params.arg_skip_deeparg) || params.run_amp_screening || params.run_bgc_screening) { + ANNOTATION(ch_input_for_annotation) + ch_versions = ch_versions.mix(ANNOTATION.out.versions) + + ch_new_annotation = ch_input_for_annotation + .join(ANNOTATION.out.faa) + .join(ANNOTATION.out.gbk) + } + else { + ch_new_annotation = ch_intermediate_input.fastas + } + + // Mix back the preannotated samples with the newly annotated ones + ch_prepped_input = ch_new_annotation + .filter { meta, fasta, faa, gbk -> meta.category != 'long' } + .mix(ch_intermediate_input.preannotated) + .multiMap { meta, fasta, faa, gbk -> + fastas: [meta, fasta] + faas: [meta, faa] + gbks: [meta, gbk] + } + + if (params.run_bgc_screening) { + + ch_prepped_input_long = ch_new_annotation + .filter { meta, fasta, faa, gbk -> meta.category == 'long' } + .mix(ch_intermediate_input.preannotated) + .multiMap { meta, fasta, faa, gbk -> + fastas: [meta, fasta] + faas: [meta, faa] + gbks: [meta, gbk] + } + } + + /* + TAXONOMIC CLASSIFICATION + */ + + // The final subworkflow reports need taxonomic classification. + // This can be either on NT or AA level depending on annotation. + // TODO: Only NT at the moment. AA tax. classification will be added only when its PR is merged. + if (params.run_taxa_classification) { + TAXA_CLASS(ch_prepped_input.fastas) + ch_versions = ch_versions.mix(TAXA_CLASS.out.versions) + ch_taxonomy_tsv = TAXA_CLASS.out.sample_taxonomy + } + else { + + ch_mmseqs_db = Channel.empty() + ch_taxonomy_querydb = Channel.empty() + ch_taxonomy_querydb_taxdb = Channel.empty() + ch_taxonomy_tsv = Channel.empty() + } + + /* + SCREENING + */ + + /* + AMPs + */ + if (params.run_amp_screening && !params.run_taxa_classification) { + AMP( + ch_prepped_input.fastas, + ch_prepped_input.faas.filter { meta, file -> + if (file != [] && file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_taxonomy_tsv, + ch_prepped_input.gbks, + ) + ch_versions = ch_versions.mix(AMP.out.versions) + } + else if (params.run_amp_screening && params.run_taxa_classification) { + AMP( + ch_prepped_input.fastas, + ch_prepped_input.faas.filter { meta, file -> + if (file != [] && file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_taxonomy_tsv.filter { meta, file -> + if (file != [] && file.isEmpty()) { + log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_prepped_input.gbks, + ) + ch_versions = ch_versions.mix(AMP.out.versions) + } + + /* + ARGs + */ + if (params.run_arg_screening && !params.run_taxa_classification) { + if (params.arg_skip_deeparg) { + ARG( + ch_prepped_input.fastas, + [], + ch_taxonomy_tsv, + ) + } + else { + ARG( + ch_prepped_input.fastas, + ch_prepped_input.faas.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. ARG screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_taxonomy_tsv, + ) + } + ch_versions = ch_versions.mix(ARG.out.versions) + } + else if (params.run_arg_screening && params.run_taxa_classification) { + if (params.arg_skip_deeparg) { + ARG( + ch_prepped_input.fastas, + [], + ch_taxonomy_tsv.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ) + } + else { + ARG( + ch_prepped_input.fastas, + ch_prepped_input.faas.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. ARG screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_taxonomy_tsv.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ) + } + ch_versions = ch_versions.mix(ARG.out.versions) + } + + /* + BGCs + */ + if (params.run_bgc_screening && !params.run_taxa_classification) { + BGC( + ch_prepped_input_long.fastas, + ch_prepped_input_long.faas.filter { meta, file -> + if (file != [] && file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty GFF file. BGC screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_prepped_input_long.gbks.filter { meta, file -> + if (file != [] && file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. BGC screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_taxonomy_tsv, + ) + ch_versions = ch_versions.mix(BGC.out.versions) + } + else if (params.run_bgc_screening && params.run_taxa_classification) { + BGC( + ch_prepped_input_long.fastas, + ch_prepped_input_long.faas.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. BGC screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_prepped_input_long.gbks.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of following sample produced an empty GBK file. BGC screening tools requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ch_taxonomy_tsv.filter { meta, file -> + if (file.isEmpty()) { + log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + } + !file.isEmpty() + }, + ) + ch_versions = ch_versions.mix(BGC.out.versions) + } // // Collate and save software versions @@ -39,59 +326,64 @@ workflow FUNCSCAN { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'funcscan_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_' + 'funcscan_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true - ).set { ch_collated_versions } + newLine: true, + ) + .set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.fromPath("${workflow.projectDir}/docs/images/nf-core-funcscan_logo_light.png", checkIfExists: true) + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) - MULTIQC ( + if ((params.run_arg_screening && !params.arg_skip_deeparg) || (params.run_amp_screening && (params.amp_run_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir)) || params.run_bgc_screening) { + ch_multiqc_files = ch_multiqc_files.mix(ANNOTATION.out.multiqc_files.collect { it[1] }) + } + + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/