diff --git a/modules/nf-core/cellsnp/modea/environment.yml b/modules/nf-core/cellsnp/modea/environment.yml new file mode 100644 index 00000000000..2a6f6cab965 --- /dev/null +++ b/modules/nf-core/cellsnp/modea/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "cellsnp_modea" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::cellsnp-lite=1.2.3" diff --git a/modules/nf-core/cellsnp/modea/main.nf b/modules/nf-core/cellsnp/modea/main.nf new file mode 100644 index 00000000000..048a2449225 --- /dev/null +++ b/modules/nf-core/cellsnp/modea/main.nf @@ -0,0 +1,68 @@ +process CELLSNP_MODEA { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cellsnp-lite:1.2.3--h6141fd1_2' : + 'biocontainers/cellsnp-lite:1.2.3--h6141fd1_2' }" + + input: + tuple val(meta), path(bam), path(bai), path(region_vcf), path(barcode) + + output: + tuple val(meta), path('*.base.vcf.gz') , emit: base + tuple val(meta), path('*.cells.vcf.gz'), emit: cell , optional: true + tuple val(meta), path('*.samples.tsv') , emit: sample + tuple val(meta), path('*.tag.AD.mtx') , emit: allele_depth + tuple val(meta), path('*.tag.DP.mtx') , emit: depth_coverage + tuple val(meta), path('*.tag.OTH.mtx') , emit: depth_other + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def region_file = region_vcf ? "-R $region_vcf" : '' + """ + cellsnp-lite -s $bam \\ + -b $barcode \\ + $region_file \\ + -O . \\ + --gzip \\ + --nproc $task.cpus \\ + $args + + mv cellSNP.base.vcf.gz ${prefix}.base.vcf.gz + if [[ "$args" == *"--genotype"* ]]; then + mv cellSNP.cells.vcf.gz ${prefix}.cells.vcf.gz + fi + mv cellSNP.tag.AD.mtx ${prefix}.tag.AD.mtx + mv cellSNP.tag.DP.mtx ${prefix}.tag.DP.mtx + mv cellSNP.tag.OTH.mtx ${prefix}.tag.OTH.mtx + mv cellSNP.samples.tsv ${prefix}.samples.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellsnp: \$(cellsnp-lite --v | awk '{print \$2}') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir $prefix + echo "" | gzip > ${prefix}.base.vcf.gz + touch ${prefix}.samples.tsv + touch ${prefix}.tag.AD.mtx + touch ${prefix}.tag.DP.mtx + touch ${prefix}.tag.OTH.mtx + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellsnp: \$(cellsnp-lite --v | awk '{print \$2}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellsnp/modea/meta.yml b/modules/nf-core/cellsnp/modea/meta.yml new file mode 100644 index 00000000000..bed1bc482bd --- /dev/null +++ b/modules/nf-core/cellsnp/modea/meta.yml @@ -0,0 +1,80 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "cellsnp_modea" +description: Cellsnp-lite is a C/C++ tool for efficient genotyping bi-allelic SNPs on single cells. You can use the mode A of cellsnp-lite after read alignment to obtain the snp x cell pileup UMI or read count matrices for each alleles of given or detected SNPs for droplet based single cell data. +keywords: + - genotyping + - single cell + - SNP + - droplet based single cells +tools: + - "cellsnp": + description: "Efficient genotyping bi-allelic SNPs on single cells" + homepage: "https://github.com/single-cell-genetics/cellsnp-lite" + documentation: "https://cellsnp-lite.readthedocs.io" + tool_dev_url: "https://github.com/single-cell-genetics/cellsnp-lite" + doi: "10.1093/bioinformatics/btab358" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - bam: + type: file + description: A single BAM/SAM/CRAM file, e.g., from CellRanger. + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: The index of the BAM/CRAM file. + pattern: "*.{bai,crai}" + - region_vcf: + type: file + description: A optional vcf file listing all candidate SNPs for genotyping. + pattern: "*.{vcf, vcf.gz}" + - barcode: + type: file + description: A plain file listing all effective cell barcodes. + pattern: "*.tsv" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - base: + type: file + description: A VCF file listing genotyped SNPs and aggregated AD & DP infomation (without GT). + pattern: "*.base.vcf.gz" + - cell: + type: file + description: A VCF file listing genotyped SNPs and aggregated AD & DP infomation & genotype (GT) information for each cell or sample. + pattern: "*.cells.vcf.gz" + - sample: + type: file + description: A TSV file listing cell barcodes or sample IDs. + pattern: "*.tsv" + - allele_depth: + type: file + description: A file in “Matrix Market exchange formats”, containing the allele depths of the alternative (ALT) alleles. + pattern: "*.tag.AD.mtx" + - depth_coverage: + type: file + description: A file in “Matrix Market exchange formats”, containing the sum of allele depths of the reference and alternative alleles (REF + ALT). + pattern: "*.tag.DP.mtx" + - depth_other: + type: file + description: A file in “Matrix Market exchange formats”, containing the sum of allele depths of all the alleles other than REF and ALT. + pattern: "*.tag.OTH.mtx" + +authors: + - "@wxicu" +maintainers: + - "@wxicu" diff --git a/modules/nf-core/cellsnp/modea/tests/main.nf.test b/modules/nf-core/cellsnp/modea/tests/main.nf.test new file mode 100644 index 00000000000..8f67d265153 --- /dev/null +++ b/modules/nf-core/cellsnp/modea/tests/main.nf.test @@ -0,0 +1,85 @@ +// nf-core modules test cellsnp/modea +nextflow_process { + + name "Test Process CELLSNP_MODEA" + script "../main.nf" + process "CELLSNP_MODEA" + + tag "modules" + tag "modules_nfcore" + tag "cellsnp" + tag "cellsnp/modea" + tag "samtools/index" + + test("genotyping") { + setup { + run("SAMTOOLS_INDEX") { + script "../../../samtools/index/main.nf" + process { + """ + input[0] = [ + [ id:'sample1' ], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/chr21.bam', checkIfExists: true) ] + """ + } + } + } + + when { + process { + """ + + input[0] = SAMTOOLS_INDEX.out.bai.collect{ meta, bai -> bai }.map{ + bai -> [[ id: 'sample1'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/chr21.bam', checkIfExists: true), + bai, + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/donor_genotype_chr21.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/barcodes.tsv', checkIfExists: true) ]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert path(process.out.base.get(0).get(1)).exists() }, + { assert snapshot( + process.out.sample, + process.out.allele_depth, + process.out.depth_coverage, + process.out.depth_other).match() + } + ) + } + + } + + test("genotyping - stub") { + + options "-stub" + + when { + process { + """ + + input[0] = [ + [ id:'sample1'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/chr21.bam', checkIfExists: true), + [], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/donor_genotype_chr21.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/demultiplexing/barcodes.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub")}) + + } + } + +} diff --git a/modules/nf-core/cellsnp/modea/tests/main.nf.test.snap b/modules/nf-core/cellsnp/modea/tests/main.nf.test.snap new file mode 100644 index 00000000000..07997e3f8d9 --- /dev/null +++ b/modules/nf-core/cellsnp/modea/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,965121af3dc48657c2128c404589fa6b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T22:02:32.670061197" + }, + "genotyping": { + "content": [ + [ + [ + { + "id": "sample1" + }, + "sample1.samples.tsv:md5,9e488782c1bcd63c37ee3d1c4c0a9217" + ] + ], + [ + [ + { + "id": "sample1" + }, + "sample1.tag.AD.mtx:md5,dbe2d13dca2717749554d1f2a8b85650" + ] + ], + [ + [ + { + "id": "sample1" + }, + "sample1.tag.DP.mtx:md5,29fbb6241c6c7b0a0fa31021e622c415" + ] + ], + [ + [ + { + "id": "sample1" + }, + "sample1.tag.OTH.mtx:md5,1e3429950c59edec58a80a9b4ecda552" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T22:02:32.689525045" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "sample1" + }, + "sample1.base.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "sample1" + }, + "sample1.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "sample1" + }, + "sample1.tag.AD.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "sample1" + }, + "sample1.tag.DP.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "sample1" + }, + "sample1.tag.OTH.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,965121af3dc48657c2128c404589fa6b" + ], + "allele_depth": [ + [ + { + "id": "sample1" + }, + "sample1.tag.AD.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "base": [ + [ + { + "id": "sample1" + }, + "sample1.base.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "cell": [ + + ], + "depth_coverage": [ + [ + { + "id": "sample1" + }, + "sample1.tag.DP.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "depth_other": [ + [ + { + "id": "sample1" + }, + "sample1.tag.OTH.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sample": [ + [ + { + "id": "sample1" + }, + "sample1.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,965121af3dc48657c2128c404589fa6b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T22:02:39.591564384" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellsnp/modea/tests/tags.yml b/modules/nf-core/cellsnp/modea/tests/tags.yml new file mode 100644 index 00000000000..7dd3b88a3af --- /dev/null +++ b/modules/nf-core/cellsnp/modea/tests/tags.yml @@ -0,0 +1,2 @@ +cellsnp/modea: + - "modules/nf-core/cellsnp/modea/**"