-
Notifications
You must be signed in to change notification settings - Fork 720
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add module fgbio/collectduplexseqmetrics (#5960)
* Initial commit * Rerun nf-test * Change assertion for duplex_qc output * Add ggplot2 to version.yml * Update snapshot * Correct conda packages version
- Loading branch information
1 parent
6494138
commit af50683
Showing
6 changed files
with
355 additions
and
0 deletions.
There are no files selected for viewing
10 changes: 10 additions & 0 deletions
10
modules/nf-core/fgbio/collectduplexseqmetrics/environment.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
name: "fgbio_collectduplexseqmetrics" | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- "bioconda::fgbio=2.0.2" | ||
- "conda-forge::r-ggplot2=3.4.4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
process FGBIO_COLLECTDUPLEXSEQMETRICS { | ||
tag "$meta.id" | ||
label 'process_single' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/mulled-v2-51891ad0b60843e4aade9cde2eb5d40c5ae92b80:72c944cdea5caff7f03b96034968ce2a4f1737bc-0': | ||
'biocontainers/mulled-v2-51891ad0b60843e4aade9cde2eb5d40c5ae92b80:72c944cdea5caff7f03b96034968ce2a4f1737bc-0' }" | ||
|
||
input: | ||
tuple val(meta), path(grouped_bam) | ||
path interval_list | ||
|
||
output: | ||
tuple val(meta), path("**.family_sizes.txt") , emit: family_sizes | ||
tuple val(meta), path("**.duplex_family_sizes.txt") , emit: duplex_family_sizes | ||
tuple val(meta), path("**.duplex_yield_metrics.txt"), emit: duplex_yield_metrics | ||
tuple val(meta), path("**.umi_counts.txt") , emit: umi_counts | ||
tuple val(meta), path("**.duplex_qc.pdf") , emit: duplex_qc | ||
tuple val(meta), path("**.duplex_umi_counts.txt") , emit: duplex_umi_counts, optional: true | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def intervals = interval_list ? "--intervals ${bed}" : "" | ||
def mem_gb = 8 | ||
|
||
if (!task.memory) { | ||
log.info '[fgbio CollectDuplexSeqMetrics] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.' | ||
} else if (mem_gb > task.memory.giga) { | ||
if (task.memory.giga < 2) { | ||
mem_gb = 1 | ||
} else { | ||
mem_gb = task.memory.giga - 1 | ||
} | ||
} | ||
|
||
""" | ||
fgbio \\ | ||
-Xmx${mem_gb}g \\ | ||
--tmp-dir=. \\ | ||
--async-io=true \\ | ||
--compression=1 \\ | ||
CollectDuplexSeqMetrics \\ | ||
--input $grouped_bam \\ | ||
--output ${prefix} \\ | ||
$intervals \\ | ||
$args | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') | ||
ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def touch_duplex_umi = args.contains("--duplex-umi-counts") || args.contains("-u") ? "touch ${prefix}.duplex_umi_counts.txt" : "" | ||
|
||
""" | ||
touch ${prefix}.family_sizes.txt | ||
touch ${prefix}.duplex_family_sizes.txt | ||
touch ${prefix}.duplex_yield_metrics.txt | ||
touch ${prefix}.umi_counts.txt | ||
touch ${prefix}.duplex_qc.pdf | ||
$touch_duplex_umi | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') | ||
ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
--- | ||
name: "fgbio_collectduplexseqmetrics" | ||
description: Collects a suite of metrics to QC duplex sequencing data. | ||
keywords: | ||
- UMIs | ||
- QC | ||
- bam | ||
- duplex | ||
tools: | ||
- "fgbio": | ||
description: "A set of tools for working with genomic and high throughput sequencing data, including UMIs" | ||
homepage: "http://fulcrumgenomics.github.io/fgbio/" | ||
documentation: "http://fulcrumgenomics.github.io/fgbio/" | ||
tool_dev_url: "https://github.com/fulcrumgenomics/fgbio" | ||
licence: ["MIT"] | ||
- "r-ggplot2": | ||
description: "ggplot2 is a system for declaratively creating graphics, based on The Grammar of Graphics. " | ||
homepage: "https://ggplot2.tidyverse.org/" | ||
documentation: "https://ggplot2.tidyverse.org/" | ||
tool_dev_url: "https://github.com/tidyverse/ggplot2" | ||
licence: ["MIT"] | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- grouped_bam: | ||
type: file | ||
description: It has to be either 1)The exact BAM output by the GroupReadsByUmi tool (in the sort-order it was produced in) 2)A BAM file that has MI tags present on all reads (usually set by GroupReadsByUmi and has been sorted with SortBam into TemplateCoordinate order. | ||
pattern: "*.bam" | ||
|
||
- interval_list: | ||
type: file | ||
description: Calculation of metrics may be restricted to a set of regions using the --intervals parameter. The file format is descripted here https://samtools.github.io/htsjdk/javadoc/htsjdk/index.html?htsjdk/samtools/util/Interval.html | ||
pattern: "*.{tsv|txt|interval_list}" | ||
|
||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- family_sizes: | ||
type: file | ||
description: Metrics on the frequency of different types of families of different sizes | ||
pattern: "*.txt" | ||
- duplex_family_sizes: | ||
type: file | ||
description: Metrics on the frequency of duplex tag families by the number of observations from each strand | ||
pattern: "*.txt" | ||
- duplex_yield_metrics: | ||
type: file | ||
description: Summary QC metrics produced using 5%, 10%, 15%...100% of the data | ||
pattern: "*.txt" | ||
- umi_counts: | ||
type: file | ||
description: Metrics on the frequency of observations of UMIs within reads and tag families | ||
pattern: "*.txt" | ||
- duplex_qc: | ||
type: file | ||
description: A series of plots generated from the preceding metrics files for visualization | ||
pattern: "*.pdf" | ||
- duplex_umi_counts: | ||
type: file | ||
description: Metrics on the frequency of observations of duplex UMIs within reads and tag families. | ||
pattern: "*.txt" | ||
|
||
authors: | ||
- "@georgiakes" | ||
maintainers: | ||
- "@georgiakes" |
79 changes: 79 additions & 0 deletions
79
modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
nextflow_process { | ||
|
||
name "Test Process FGBIO_COLLECTDUPLEXSEQMETRICS" | ||
script "../main.nf" | ||
process "FGBIO_COLLECTDUPLEXSEQMETRICS" | ||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "fgbio" | ||
tag "fgbio/collectduplexseqmetrics" | ||
|
||
|
||
test("homo_sapiens - bam") { | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true) | ||
] | ||
input[1]=[] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out.family_sizes, | ||
process.out.duplex_family_sizes, | ||
process.out.duplex_yield_metrics, | ||
process.out.umi_counts, | ||
process.out.duplex_umi_counts, | ||
process.out.versions, | ||
file(process.out.duplex_qc[0][1]).name) | ||
.match() } | ||
|
||
) | ||
} | ||
|
||
} | ||
|
||
test("homo_sapiens - stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true) | ||
] | ||
input[1] = [] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out.family_sizes, | ||
process.out.duplex_family_sizes, | ||
process.out.duplex_yield_metrics, | ||
process.out.umi_counts, | ||
process.out.duplex_umi_counts, | ||
process.out.versions, | ||
file(process.out.duplex_qc[0][1]).name) | ||
.match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
} |
106 changes: 106 additions & 0 deletions
106
modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
{ | ||
"homo_sapiens - stub": { | ||
"content": [ | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.family_sizes.txt:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.duplex_family_sizes.txt:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.duplex_yield_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.umi_counts.txt:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
[ | ||
|
||
], | ||
[ | ||
"versions.yml:md5,637a7384cd910f0e0541a631c52b95e1" | ||
], | ||
"test.duplex_qc.pdf" | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "24.04.3" | ||
}, | ||
"timestamp": "2024-07-17T19:26:23.325859809" | ||
}, | ||
"homo_sapiens - bam": { | ||
"content": [ | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.family_sizes.txt:md5,a49de49bd587440c316fec830f502620" | ||
] | ||
], | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.duplex_family_sizes.txt:md5,129e41170b9f5f2f8edce62a686c8548" | ||
] | ||
], | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.duplex_yield_metrics.txt:md5,237e4e4ee713fdf672b0ee796827fb9d" | ||
] | ||
], | ||
[ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.umi_counts.txt:md5,9fe38b2a49ca80492b3a1c6a55679155" | ||
] | ||
], | ||
[ | ||
|
||
], | ||
[ | ||
"versions.yml:md5,637a7384cd910f0e0541a631c52b95e1" | ||
], | ||
"test.duplex_qc.pdf" | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "24.04.3" | ||
}, | ||
"timestamp": "2024-07-17T19:26:03.1373243" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
fgbio/collectduplexseqmetrics: | ||
- "modules/nf-core/fgbio/collectduplexseqmetrics/**" |