From d9cb16e47041166d89c6afd898142e47e710f5a9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:28:23 +0200 Subject: [PATCH 1/4] wgsmetrics --- .github/workflows/test.yml | 2 + modules/nf-core/sentieon/wgsmetrics/main.nf | 81 +++++++++++++++++++ modules/nf-core/sentieon/wgsmetrics/meta.yml | 73 +++++++++++++++++ tests/config/pytest_modules.yml | 4 + .../nf-core/sentieon/wgsmetrics/main.nf | 24 ++++++ .../sentieon/wgsmetrics/nextflow.config | 10 +++ .../nf-core/sentieon/wgsmetrics/test.yml | 8 ++ 7 files changed, 202 insertions(+) create mode 100644 modules/nf-core/sentieon/wgsmetrics/main.nf create mode 100644 modules/nf-core/sentieon/wgsmetrics/meta.yml create mode 100644 tests/modules/nf-core/sentieon/wgsmetrics/main.nf create mode 100644 tests/modules/nf-core/sentieon/wgsmetrics/nextflow.config create mode 100644 tests/modules/nf-core/sentieon/wgsmetrics/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ae42931b6fa..aa9c56b60df 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -228,6 +228,8 @@ jobs: tags: sentieon/readwriter - profile: "conda" tags: sentieon/varcal + - profile: "conda" + tags: sentieon/wgmetrics - profile: "conda" tags: spaceranger/count - profile: "conda" diff --git a/modules/nf-core/sentieon/wgsmetrics/main.nf b/modules/nf-core/sentieon/wgsmetrics/main.nf new file mode 100644 index 00000000000..b325e38fd42 --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/main.nf @@ -0,0 +1,81 @@ +process SENTIEON_WGSMETRICS { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta3), path(intervals_list) + + + output: + tuple val(meta), path('*.txt'), emit: wgs_metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def input = bam.sort().collect{"-i $it"}.join(' ') + def prefix = task.ext.prefix ?: "${meta.id}" + def interval = intervals_list ? "--interval ${intervals_list}" : "" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon \\ + driver \\ + -t $task.cpus \\ + -r $fasta \\ + $input \\ + $interval \\ + $args \\ + --algo WgsMetricsAlgo ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/wgsmetrics/meta.yml b/modules/nf-core/sentieon/wgsmetrics/meta.yml new file mode 100644 index 00000000000..85e39faa8d7 --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/meta.yml @@ -0,0 +1,73 @@ +name: "sentieon_datametrics" +description: Collects multiple quality metrics from a bam file +keywords: + - metrics + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index of th sorted BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - interval_list: + type: file + description: bed or interval_list file containing interval in the reference that will be used in the analysis + pattern: "*.{bed,interval_list}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wgs_metrics: + type: file + description: File containing the information about mean base quality score for each sequencing cycle + pattern: "*.txt" + +authors: + - "@ramprasadn" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2f32193488f..8a11a58ea8b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -3287,6 +3287,10 @@ sentieon/readwriter: - modules/nf-core/sentieon/readwriter/** - tests/modules/nf-core/sentieon/readwriter/** +sentieon/wgsmetrics: + - modules/nf-core/sentieon/wgsmetrics/** + - tests/modules/nf-core/sentieon/wgsmetrics/** + sentieon/varcal: - modules/nf-core/sentieon/varcal/** - tests/modules/nf-core/sentieon/varcal/** diff --git a/tests/modules/nf-core/sentieon/wgsmetrics/main.nf b/tests/modules/nf-core/sentieon/wgsmetrics/main.nf new file mode 100644 index 00000000000..b5f3e1a0f54 --- /dev/null +++ b/tests/modules/nf-core/sentieon/wgsmetrics/main.nf @@ -0,0 +1,24 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SENTIEON_WGSMETRICS } from '../../../../../modules/nf-core/sentieon/wgsmetrics/main.nf' + +workflow test_sentieon_wgsmetrics { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + fasta = [ + [id:'genome'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + fai = [ + [id:'genome'], + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + + SENTIEON_WGSMETRICS ( input, fasta, fai, [[:],[]] ) +} diff --git a/tests/modules/nf-core/sentieon/wgsmetrics/nextflow.config b/tests/modules/nf-core/sentieon/wgsmetrics/nextflow.config new file mode 100644 index 00000000000..ccf30410f8d --- /dev/null +++ b/tests/modules/nf-core/sentieon/wgsmetrics/nextflow.config @@ -0,0 +1,10 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withLabel: 'sentieon' { + ext.sentieon_auth_mech_base64 = secrets.SENTIEON_AUTH_MECH_BASE64 + ext.sentieon_auth_data_base64 = secrets.SENTIEON_AUTH_DATA_BASE64 + } + +} diff --git a/tests/modules/nf-core/sentieon/wgsmetrics/test.yml b/tests/modules/nf-core/sentieon/wgsmetrics/test.yml new file mode 100644 index 00000000000..257bcec1a94 --- /dev/null +++ b/tests/modules/nf-core/sentieon/wgsmetrics/test.yml @@ -0,0 +1,8 @@ +- name: "sentieon wgsmetrics" + command: nextflow run ./tests/modules/nf-core/sentieon/wgsmetrics -entry test_sentieon_wgsmetrics -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/sentieon/wgsmetrics/nextflow.config + tags: + - "sentieon" + - "sentieon/wgsmetrics" + files: + - path: "output/sentieon/test.txt" + - path: "output/sentieon/versions.yml" From e03f2e4d94cea632883818cf1949418f4ae191ce Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:32:27 +0200 Subject: [PATCH 2/4] fix lint error --- modules/nf-core/sentieon/wgsmetrics/meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/sentieon/wgsmetrics/meta.yml b/modules/nf-core/sentieon/wgsmetrics/meta.yml index 85e39faa8d7..09fb9ecaaf6 100644 --- a/modules/nf-core/sentieon/wgsmetrics/meta.yml +++ b/modules/nf-core/sentieon/wgsmetrics/meta.yml @@ -1,5 +1,5 @@ -name: "sentieon_datametrics" -description: Collects multiple quality metrics from a bam file +name: "sentieon_wgsmetrics" +description: Collects whole genome quality metrics from a bam file keywords: - metrics - bam From 2244e3de9ef9186181a892a36bfefcaa0d533dc6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 11 Aug 2023 12:15:45 +0200 Subject: [PATCH 3/4] typo --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aa9c56b60df..3b62ffebbe6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -229,7 +229,7 @@ jobs: - profile: "conda" tags: sentieon/varcal - profile: "conda" - tags: sentieon/wgmetrics + tags: sentieon/wgsmetrics - profile: "conda" tags: spaceranger/count - profile: "conda" From 741a6a043d8feb3e6dd64ab4f5083c64f0d87f00 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 15 Aug 2023 11:51:17 +0200 Subject: [PATCH 4/4] Update modules/nf-core/sentieon/wgsmetrics/main.nf Co-authored-by: Anders Sune Pedersen <37172585+asp8200@users.noreply.github.com> --- modules/nf-core/sentieon/wgsmetrics/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/sentieon/wgsmetrics/main.nf b/modules/nf-core/sentieon/wgsmetrics/main.nf index b325e38fd42..fd7fdae5f1e 100644 --- a/modules/nf-core/sentieon/wgsmetrics/main.nf +++ b/modules/nf-core/sentieon/wgsmetrics/main.nf @@ -11,7 +11,7 @@ process SENTIEON_WGSMETRICS { tuple val(meta), path(bam), path(bai) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) - tuple val(meta3), path(intervals_list) + tuple val(meta4), path(intervals_list) output: