Skip to content

Commit

Permalink
Blastdbcmd new module (nf-core#5482)
Browse files Browse the repository at this point in the history
* starting blastdbcmd

* carry on

* Making it work with simple entry version

* Upgrade to make entry_batch work

* Upgrade to make it work with tests

* adding missing tag

* Removed versions to make it pass the tests in Github

* Make it work with versions and so

* Update modules/nf-core/blast/blastdbcmd/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/nf-core/blast/blastdbcmd/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Move module into two and upgrade according to comments

* upgrade with outfmt forced

* Turn back into one module

* Update modules/nf-core/blast/blastdbcmd/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* update tags and stub test

* fix stub

* making it work more widely

* editorcheck error

* addressing comments

* Update modules/nf-core/blast/blastdbcmd/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

---------

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
  • Loading branch information
toniher and jfy133 authored Apr 30, 2024
1 parent 84efd2f commit b00ebb6
Show file tree
Hide file tree
Showing 9 changed files with 428 additions and 1 deletion.
7 changes: 7 additions & 0 deletions modules/nf-core/blast/blastdbcmd/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: "blast_blastdbcmd"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::blast=2.15.0"
64 changes: 64 additions & 0 deletions modules/nf-core/blast/blastdbcmd/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process BLAST_BLASTDBCMD {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1':
'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }"

input:
tuple val(meta) , val(entry), path(entry_batch)
tuple val(meta2), path(db)

output:
tuple val(meta), path("*.fasta"), optional: true, emit: fasta
tuple val(meta), path("*.txt") , optional: true, emit: text
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
assert (!entry && entry_batch) || (entry && !entry_batch) : "ERROR: You must use either entry or entry_batch, not both at the same time"
def input = ''
if (entry) {
input = "-entry ${entry}"
} else {
input = "-entry_batch ${entry_batch}"
}
def extension = args.contains("-outfmt") && !args.contains("-outfmt %f") ? "txt" : "fasta"
"""
DB=`find -L ./ -name "*.nhr" | sed 's/\\.nhr\$//'`
if test -z "\$DB"
then
DB=`find -L ./ -name "*.phr" | sed 's/\\.phr\$//'`
fi
blastdbcmd \\
-db \$DB \\
${args} \\
-out ${prefix}.${extension} \\
${input}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blast: \$(blastdbcmd -version 2>&1 | head -n1 | sed 's/^.*blastdbcmd: //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = args.contains("-outfmt") && !args.contains("-outfmt %f") ? "txt" : "fasta"
"""
touch ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blast: \$(blastdbcmd -version 2>&1 | head -n1 | sed 's/^.*blastdbcmd: //; s/ .*\$//')
END_VERSIONS
"""
}
61 changes: 61 additions & 0 deletions modules/nf-core/blast/blastdbcmd/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: blast_blastdbcmd
description: Retrieve entries from a BLAST database
keywords:
- fasta
- blast
- database
- retrieval
- identifier
tools:
- blast:
description: |
BLAST finds regions of similarity between biological sequences.
homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi
documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs
doi: 10.1016/S0022-2836(05)80360-2
licence: ["US-Government-Work"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- entry:
type: string
description: Entry identifier of sequence in database. It cannot be used along with entry_batch
- entry_batch:
type: file
description: |
File with a list of entry identifiers of sequences in database (one identifier per line). It cannot be used along with entry
- meta2:
type: map
description: |
Groovy Map containing db information
e.g. [ id:'test2', single_end:false ]
- db:
type: file
description: Input BLAST-indexed database
pattern: "*.{fa.*,fasta.*}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Output fasta file (default format)
pattern: "*.{fasta}"
- text:
type: file
description: |
Output text file (generic format if fasta not used, i.e. `--outfmt` is supplied to `ext.args`)
pattern: "*.{txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@toniher"
maintainers:
- "@toniher"
122 changes: 122 additions & 0 deletions modules/nf-core/blast/blastdbcmd/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
nextflow_process {

name "Test Process BLAST_BLASTDBCMD"
script "../main.nf"
process "BLAST_BLASTDBCMD"
config "./nextflow.config"
tag "modules"
tag "modules_nfcore"
tag "blast"
tag "blast/blastdbcmd"
tag "blast/makeblastdb"

setup {
run("BLAST_MAKEBLASTDB") {
script "../../makeblastdb/main.nf"
process {
"""
input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
"""
}
}
}


test("Should query with a protein identifier against a FASTA DB") {

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = [ [id: 'test'], 'ENSSASP00005000002.1', [] ]
input[1] = BLAST_MAKEBLASTDB.out.db
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("Should query with a protein identifier against a FASTA DB - stub") {

options '-stub'

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = [ [id: 'test'], 'ENSSASP00005000002.1', [] ]
input[1] = BLAST_MAKEBLASTDB.out.db
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("Should query with a file containing a list of protein identifiers against a FASTA DB") {

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = [ [id:'test'], '', file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/blast/proteome.list', checkIfExists: true) ]
input[1] = BLAST_MAKEBLASTDB.out.db
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("Should query with a file containing a list of protein identifiers against a FASTA DB - text file") {

config "./nextflow.txt.config"

when {
params {
outdir = "$outputDir"
}
process {

"""
input[0] = [ [id:'test'], '', file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/blast/proteome.list', checkIfExists: true) ]
input[1] = BLAST_MAKEBLASTDB.out.db
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading

0 comments on commit b00ebb6

Please sign in to comment.