Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cellsnp to modules #5518

Merged
merged 22 commits into from
May 8, 2024
9 changes: 9 additions & 0 deletions modules/nf-core/cellsnp/modea/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "cellsnp_modea"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::cellsnp-lite=1.2.3"
68 changes: 68 additions & 0 deletions modules/nf-core/cellsnp/modea/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
process CELLSNP_MODEA {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/cellsnp-lite:1.2.3--h6141fd1_2' :
'biocontainers/cellsnp-lite:1.2.3--h6141fd1_2' }"

input:
tuple val(meta), path(bam), path(bai), path(region_vcf), path(barcode)

output:
tuple val(meta), path('*.base.vcf.gz') , emit: base
tuple val(meta), path('*.cells.vcf.gz'), emit: cell , optional: true
tuple val(meta), path('*.samples.tsv') , emit: sample
tuple val(meta), path('*.tag.AD.mtx') , emit: allele_depth
tuple val(meta), path('*.tag.DP.mtx') , emit: depth_coverage
tuple val(meta), path('*.tag.OTH.mtx') , emit: depth_other
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def region_file = region_vcf ? "-R $region_vcf" : ''
"""
cellsnp-lite -s $bam \\
-b $barcode \\
$region_file \\
-O . \\
--gzip \\
--nproc $task.cpus \\
$args

mv cellSNP.base.vcf.gz ${prefix}.base.vcf.gz
if [[ "$args" == *"--genotype"* ]]; then
mv cellSNP.cells.vcf.gz ${prefix}.cells.vcf.gz
fi
mv cellSNP.tag.AD.mtx ${prefix}.tag.AD.mtx
mv cellSNP.tag.DP.mtx ${prefix}.tag.DP.mtx
mv cellSNP.tag.OTH.mtx ${prefix}.tag.OTH.mtx
mv cellSNP.samples.tsv ${prefix}.samples.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
cellsnp: \$(cellsnp-lite --v | awk '{print \$2}')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir $prefix
touch ${prefix}.base.vcf.gz
touch ${prefix}.samples.tsv
touch ${prefix}.tag.AD.mtx
touch ${prefix}.tag.DP.mtx
touch ${prefix}.tag.OTH.mtx

cat <<-END_VERSIONS > versions.yml
"${task.process}":
cellsnp: \$(cellsnp-lite --v | awk '{print \$2}')
END_VERSIONS
"""
}
80 changes: 80 additions & 0 deletions modules/nf-core/cellsnp/modea/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "cellsnp_modea"
description: Cellsnp-lite is a C/C++ tool for efficient genotyping bi-allelic SNPs on single cells. You can use the mode A of cellsnp-lite after read alignment to obtain the snp x cell pileup UMI or read count matrices for each alleles of given or detected SNPs for droplet based single cell data.
keywords:
- genotyping
- single cell
- SNP
- droplet based single cells
tools:
- "cellsnp":
description: "Efficient genotyping bi-allelic SNPs on single cells"
homepage: "https://github.com/single-cell-genetics/cellsnp-lite"
documentation: "https://cellsnp-lite.readthedocs.io"
tool_dev_url: "https://github.com/single-cell-genetics/cellsnp-lite"
doi: "10.1093/bioinformatics/btab358"
licence: ["Apache-2.0"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- bam:
type: file
description: A single BAM/SAM/CRAM file, e.g., from CellRanger.
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: The index of the BAM/CRAM file.
pattern: "*.{bai,crai}"
- region_vcf:
type: file
description: A optional vcf file listing all candidate SNPs for genotyping.
pattern: "*.{vcf, vcf.gz}"
- barcode:
type: file
description: A plain file listing all effective cell barcodes.
pattern: "*.tsv"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- base:
type: file
description: A VCF file listing genotyped SNPs and aggregated AD & DP infomation (without GT).
pattern: "*.base.vcf.gz"
- cell:
type: file
description: A VCF file listing genotyped SNPs and aggregated AD & DP infomation & genotype (GT) information for each cell or sample.
pattern: "*.cells.vcf.gz"
- sample:
type: file
description: A TSV file listing cell barcodes or sample IDs.
pattern: "*.tsv"
- allele_depth:
type: file
description: A file in “Matrix Market exchange formats”, containing the allele depths of the alternative (ALT) alleles.
pattern: "*.tag.AD.mtx"
- depth_coverage:
type: file
description: A file in “Matrix Market exchange formats”, containing the sum of allele depths of the reference and alternative alleles (REF + ALT).
pattern: "*.tag.DP.mtx"
- depth_other:
type: file
description: A file in “Matrix Market exchange formats”, containing the sum of allele depths of all the alleles other than REF and ALT.
pattern: "*.tag.OTH.mtx"

authors:
- "@wxicu"
maintainers:
- "@wxicu"
91 changes: 91 additions & 0 deletions modules/nf-core/cellsnp/modea/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// nf-core modules test cellsnp/modea
nextflow_process {

name "Test Process CELLSNP_MODEA"
script "../main.nf"
process "CELLSNP_MODEA"

tag "modules"
tag "modules_nfcore"
tag "cellsnp"
tag "cellsnp/modea"
tag "samtools/index"

test("genotyping") {
setup {
run("SAMTOOLS_INDEX") {
script "../../../samtools/index/main.nf"
process {
"""
input[0] = [
[ id:'sample1' ],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/chr21.bam', checkIfExists: true) ]
"""
}
}
}

when {
process {
"""

input[0] = [[ id: 'sample1'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/chr21.bam', checkIfExists: true),
SAMTOOLS_INDEX.out.bai.map{it[1]},
file(params.modules_testdata_base_path + /genomics/homo_sapiens/demultiplexing/donor_genotype_chr21.vcf', checkIfExists: true),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
file(params.modules_testdata_base_path + /genomics/homo_sapiens/demultiplexing/donor_genotype_chr21.vcf', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/donor_genotype_chr21.vcf', checkIfExists: true),

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for pointing out. I have just found the error LOL

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SPPearce I tried your suggestion but didnt pass the tests due to the error Not a valid path value type: groovyx.gpars.dataflow.DataflowVariable (DataflowVariable(value=null)) . It seems like I was trying to add a channel to the input channel. So i go back to the previous version now

file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/barcodes.tsv', checkIfExists: true)]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("versions") },
{ assert path(process.out.base.get(0).get(1)).exists() },
{ assert snapshot(
process.out.sample,
process.out.allele_depth,
process.out.depth_coverage,
process.out.depth_other).match()
}
)
}

}

test("genotyping - stub") {

options "-stub"

when {
process {
"""

input[0] = [
[ id:'sample1'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/chr21.bam', checkIfExists: true),
[],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/donor_genotype_chr21.vcf', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/barcodes.tsv', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.sample,
process.out.allele_depth,
process.out.depth_coverage,
process.out.depth_other).match("stub")
},
{ assert path(process.out.base.get(0).get(1)).exists() } )
wxicu marked this conversation as resolved.
Show resolved Hide resolved


}
}

}
96 changes: 96 additions & 0 deletions modules/nf-core/cellsnp/modea/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"versions": {
"content": [
[
"versions.yml:md5,965121af3dc48657c2128c404589fa6b"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-05T18:34:17.845506428"
},
"genotyping": {
"content": [
[
[
{
"id": "sample1"
},
"sample1.samples.tsv:md5,9e488782c1bcd63c37ee3d1c4c0a9217"
]
],
[
[
{
"id": "sample1"
},
"sample1.tag.AD.mtx:md5,dbe2d13dca2717749554d1f2a8b85650"
]
],
[
[
{
"id": "sample1"
},
"sample1.tag.DP.mtx:md5,29fbb6241c6c7b0a0fa31021e622c415"
]
],
[
[
{
"id": "sample1"
},
"sample1.tag.OTH.mtx:md5,1e3429950c59edec58a80a9b4ecda552"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-05T18:34:17.88339067"
},
"stub": {
"content": [
[
[
{
"id": "sample1"
},
"sample1.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
[
[
{
"id": "sample1"
},
"sample1.tag.AD.mtx:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
[
[
{
"id": "sample1"
},
"sample1.tag.DP.mtx:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
[
[
{
"id": "sample1"
},
"sample1.tag.OTH.mtx:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-05T18:34:29.159643061"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/cellsnp/modea/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cellsnp/modea:
- "modules/nf-core/cellsnp/modea/**"
Loading