diff --git a/CHANGELOG.md b/CHANGELOG.md index de991758..7a0bff35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New parameters to skip fastqc and haplocheck (`--skip_fastqc` and `--skip_haplocheck`) [#438](https://github.com/nf-core/raredisease/pull/438) - CNVnator for copy number variant calling [#438](https://github.com/nf-core/raredisease/pull/434) - A new parameter `svdb_query_bedpedbs` to provide bedpe files as databases for SVDB query [#449](https://github.com/nf-core/raredisease/pull/449) +- ngsbits samplegender to check sex [#453] (https://github.com/nf-core/raredisease/pull/453) ### `Changed` diff --git a/CITATIONS.md b/CITATIONS.md index 5f404de8..71414414 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -80,6 +80,8 @@ > Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699 +- [ngs-bits-samplegender](https://github.com/imgag/ngs-bits/tree/master) + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config index cc744773..48e7d3c1 100644 --- a/conf/modules/qc_bam.config +++ b/conf/modules/qc_bam.config @@ -53,6 +53,17 @@ process { ext.prefix = { "${meta.id}_mosdepth" } } + withName: '.*QC_BAM:NGSBITS_SAMPLEGENDER' { + // NGSBITS_SAMPLEGENDER needs a chrX and chrY in order to run so we skip it for the two test profiles + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample', 'test_sentieon']).size() >= 1) || workflow.stubRun } + ext.prefix = { "${meta.id}_ngsbits_sex" } + publishDir = [ + path: { "${params.outdir}/ngsbits_samplegender" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*QC_BAM:PICARD_COLLECTWGSMETRICS' { ext.when = { params.analysis_type.equals("wgs") && params.aligner.equals("bwamem2") } ext.prefix = { "${meta.id}_wgsmetrics" } diff --git a/main.nf b/main.nf index 12cdadc0..ee02c024 100644 --- a/main.nf +++ b/main.nf @@ -35,6 +35,7 @@ params.known_dbsnp = WorkflowMain.getGenomeAttribute(params, params.known_dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp_tbi') params.ml_model = WorkflowMain.getGenomeAttribute(params, 'ml_model') params.mt_fasta = WorkflowMain.getGenomeAttribute(params, 'mt_fasta') +params.ngsbits_samplegender_method = WorkflowMain.getGenomeAttribute(params, 'ngsbits_samplegender_method') params.ploidy_model = WorkflowMain.getGenomeAttribute(params, 'ploidy_model') params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance') params.readcount_intervals = WorkflowMain.getGenomeAttribute(params, 'readcount_intervals') diff --git a/modules.json b/modules.json index 1c4709dd..72c3e200 100644 --- a/modules.json +++ b/modules.json @@ -277,6 +277,11 @@ "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] }, + "ngsbits/samplegender": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "peddy": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/nf-core/ngsbits/samplegender/environment.yml b/modules/nf-core/ngsbits/samplegender/environment.yml new file mode 100644 index 00000000..486c233c --- /dev/null +++ b/modules/nf-core/ngsbits/samplegender/environment.yml @@ -0,0 +1,7 @@ +name: ngsbits_samplegender +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ngs-bits=2023_02 diff --git a/modules/nf-core/ngsbits/samplegender/main.nf b/modules/nf-core/ngsbits/samplegender/main.nf new file mode 100644 index 00000000..3562238d --- /dev/null +++ b/modules/nf-core/ngsbits/samplegender/main.nf @@ -0,0 +1,51 @@ +process NGSBITS_SAMPLEGENDER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ngs-bits:2023_02--py311ha0b7adc_2': + 'biocontainers/ngs-bits:2023_02--py311ha0b7adc_2' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta2), path(fai) + val method + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ref = fasta ? "-ref ${fasta}" : "" + """ + SampleGender \\ + -in ${bam} \\ + -method ${method} \\ + -out ${prefix}.tsv \\ + ${ref} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngs-bits: \$(echo \$(SampleGender --version 2>&1) | sed 's/SampleGender //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngs-bits: \$(echo \$(SampleGender --version 2>&1) | sed 's/SampleGender //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ngsbits/samplegender/meta.yml b/modules/nf-core/ngsbits/samplegender/meta.yml new file mode 100644 index 00000000..997bc06e --- /dev/null +++ b/modules/nf-core/ngsbits/samplegender/meta.yml @@ -0,0 +1,69 @@ +--- +name: "ngsbits_samplegender" +description: Determines the gender of a sample from the BAM/CRAM file. +keywords: + - gender + - cram + - bam + - short reads +tools: + - "ngsbits": + description: "Short-read sequencing tools" + homepage: "https://github.com/imgag/ngs-bits" + documentation: "https://github.com/imgag/ngs-bits" + tool_dev_url: "https://github.com/imgag/ngs-bits" + licence: "['MIT license']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: One or more BAM/CRAM files to determine the gender of + pattern: "*.{bam,cram}" + - bai: + type: file + description: The index file(s) from the input BAM/CRAM file(s) + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference fasta information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference FASTA to use (mandatory when CRAM files are used) + pattern: "*.{fasta,fa,fna}" + - meta3: + type: map + description: | + Groovy Map containing reference fasta index information + e.g. [ id:'test' ] + - fasta: + type: file + description: The index of the reference FASTA to use (mandatory when CRAM files are used) + pattern: "*.fai" + - method: + type: string + description: The method to use to define the gender (posibilities are 'xy', 'hetx' and 'sry') + pattern: "(xy|hetx|sry)" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: An output TSV file containing the results of the gender prediction + pattern: "*.tsv" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/nextflow.config b/nextflow.config index 7309d888..6a58436a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,7 @@ params { // Required options - input = null + input = null // References genome = 'GRCh38' @@ -37,6 +37,9 @@ params { cadd_resources = null platform = 'illumina' + // Bam_qc + ngsbits_samplegender_method = 'xy' + // File params svdb_query_bedpedbs = null svdb_query_dbs = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 82618fca..a5690fbb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -386,6 +386,13 @@ "fa_icon": "fas fa-align-center", "enum": ["illumina"] }, + "ngsbits_samplegender_method": { + "type": "string", + "default": "xy", + "enum": ["xy", "hetx", "sry"], + "description": "Method selection for ngs-bits samplegender", + "fa_icon": "fas fa-align-center" + }, "save_mapped_as_cram": { "type": "boolean", "description": "Specifies whether to generate and publish alignment files as cram instead of bam", @@ -473,7 +480,7 @@ "cnvnator_binsize": { "type": "number", "description": "Bin size for CNVnator", - "default": "1000", + "default": 1000, "fa_icon": "fas fa-align-center" }, "sentieon_dnascope_pcr_indel_model": { diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 11439a7b..d4ae2842 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -12,20 +12,22 @@ include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../m include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main' include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { NGSBITS_SAMPLEGENDER } from '../../modules/nf-core/ngsbits/samplegender/main' workflow QC_BAM { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_bait_intervals // channel: [mandatory] [ path(intervals_list) ] - ch_target_intervals // channel: [mandatory] [ path(intervals_list) ] - ch_chrom_sizes // channel: [mandatory] [ path(sizes) ] - ch_intervals_wgs // channel: [mandatory] [ path(intervals) ] - ch_intervals_y // channel: [mandatory] [ path(intervals) ] + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_bait_intervals // channel: [mandatory] [ path(intervals_list) ] + ch_target_intervals // channel: [mandatory] [ path(intervals_list) ] + ch_chrom_sizes // channel: [mandatory] [ path(sizes) ] + ch_intervals_wgs // channel: [mandatory] [ path(intervals) ] + ch_intervals_y // channel: [mandatory] [ path(intervals) ] + ngsbits_samplegender_method // channel [val(method)] main: ch_versions = Channel.empty() @@ -59,6 +61,9 @@ workflow QC_BAM { SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs.map{ interval -> [[:], interval]} ) SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y.map{ interval -> [[:], interval]} ) + // Check sex + NGSBITS_SAMPLEGENDER(ch_bam_bai, ch_genome_fasta, ch_genome_fai, ngsbits_samplegender_method) + ch_cov = Channel.empty().mix(PICARD_COLLECTWGSMETRICS.out.metrics, SENTIEON_WGSMETRICS.out.wgs_metrics) ch_cov_y = Channel.empty().mix(PICARD_COLLECTWGSMETRICS_Y.out.metrics, SENTIEON_WGSMETRICS_Y.out.wgs_metrics) @@ -67,6 +72,7 @@ workflow QC_BAM { ch_versions = ch_versions.mix(TIDDIT_COV.out.versions.first()) ch_versions = ch_versions.mix(UCSC_WIGTOBIGWIG.out.versions.first()) ch_versions = ch_versions.mix(MOSDEPTH.out.versions.first()) + ch_versions = ch_versions.mix(NGSBITS_SAMPLEGENDER.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS.out.versions.first(), SENTIEON_WGSMETRICS.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS_Y.out.versions.first(), SENTIEON_WGSMETRICS_Y.out.versions.first()) @@ -78,6 +84,7 @@ workflow QC_BAM { bigwig = UCSC_WIGTOBIGWIG.out.bw // channel: [ val(meta), path(bw) ] d4 = MOSDEPTH.out.per_base_d4 // channel: [ val(meta), path(d4) ] global_dist = MOSDEPTH.out.global_txt // channel: [ val(meta), path(txt) ] + sex_check = NGSBITS_SAMPLEGENDER.out.tsv // channel: [val(meta), path(tsv) ] cov = ch_cov // channel: [ val(meta), path(metrics) ] cov_y = ch_cov_y // channel: [ val(meta), path(metrics) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 4d1e51e7..c3acb0c7 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -339,7 +339,8 @@ workflow RAREDISEASE { ch_target_intervals, ch_genome_chrsizes, ch_intervals_wgs, - ch_intervals_y + ch_intervals_y, + Channel.value(params.ngsbits_samplegender_method) ) ch_versions = ch_versions.mix(QC_BAM.out.versions)