Skip to content

Commit

Permalink
Adding seqkit sliding command (nf-core#3637)
Browse files Browse the repository at this point in the history
* Adding seqkit sliding command

* Linting fix

* Updating for better matching

* reverted to regex using end line markers for better matching other wise fastq gets caught by fa, error in other modules

* Added a test for fastq too

---------

Co-authored-by: Matthieu Muffato <mm49@sanger.ac.uk>
  • Loading branch information
2 people authored and limrp committed Jul 28, 2023
1 parent 4592ece commit e335edf
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 0 deletions.
40 changes: 40 additions & 0 deletions modules/nf-core/seqkit/sliding/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process SEQKIT_SLIDING {
tag "$meta.id"
label 'process_low'

conda "bioconda::seqkit=2.1.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0':
'biocontainers/seqkit:2.1.0--h9ee0642_0' }"

input:
tuple val(meta), path(fastx)

output:
tuple val(meta), path("*.fast*"), emit: fastx
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = "fastq"
if ("$fastx" ==~ /.+\.fasta$|.+\.fa$|.+\.fas$|.+\.fna$/) {
extension = "fasta"
}
"""
seqkit \\
sliding \\
${fastx} \\
${args} \\
--threads ${task.cpus} \\
-o ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit | sed '3!d; s/Version: //' )
END_VERSIONS
"""
}
42 changes: 42 additions & 0 deletions modules/nf-core/seqkit/sliding/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: seqkit_sliding
description: Use seqkit to generate sliding windows of input fasta
keywords:
- seqkit
- sliding
- windows
tools:
- seqkit:
description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
homepage: https://bioinf.shenwei.me/seqkit/usage/
documentation: https://bioinf.shenwei.me/seqkit/usage/
tool_dev_url: https://github.com/shenwei356/seqkit/
doi: "10.1371/journal.pone.016396"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastx:
type: file
description: fasta/q file
pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}*"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- windows:
type: file
description: fasta/q window file
pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}*"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@DLBPointon"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3259,6 +3259,10 @@ seqkit/replace:
- modules/nf-core/seqkit/replace/**
- tests/modules/nf-core/seqkit/replace/**

seqkit/sliding:
- modules/nf-core/seqkit/sliding/**
- tests/modules/nf-core/seqkit/sliding/**

seqkit/split2:
- modules/nf-core/seqkit/split2/**
- tests/modules/nf-core/seqkit/split2/**
Expand Down
25 changes: 25 additions & 0 deletions tests/modules/nf-core/seqkit/sliding/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { SEQKIT_SLIDING } from '../../../../../modules/nf-core/seqkit/sliding/main.nf'

workflow test_seqkit_sliding_fasta {

input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
]

SEQKIT_SLIDING ( input )

}

workflow test_seqkit_sliding_fastq {

input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]

SEQKIT_SLIDING ( input )

}
7 changes: 7 additions & 0 deletions tests/modules/nf-core/seqkit/sliding/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
process {

withName: 'SEQKIT_SLIDING' {
ext.args = "-s 2 -W 5"
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}
}
18 changes: 18 additions & 0 deletions tests/modules/nf-core/seqkit/sliding/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
- name: seqkit sliding test_seqkit_sliding_fasta
command: nextflow run ./tests/modules/nf-core/seqkit/sliding -entry test_seqkit_sliding_fasta -c ./tests/config/nextflow.config
tags:
- seqkit/sliding
- seqkit
files:
- path: output/seqkit/test.fasta
md5sum: d5e112b55c37bef1f16f28ba16b323dd
- path: output/seqkit/versions.yml
- name: seqkit sliding test_seqkit_sliding_fastq
command: nextflow run ./tests/modules/nf-core/seqkit/sliding -entry test_seqkit_sliding_fastq -c ./tests/config/nextflow.config
tags:
- seqkit/sliding
- seqkit
files:
- path: output/seqkit/test.fastq
md5sum: 59f475cac6d2c372ce2d3a1e3c40305b
- path: output/seqkit/versions.yml

0 comments on commit e335edf

Please sign in to comment.