From a6adba9a9f5ef90814cc5b94fa2801543d240953 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 4 May 2022 13:19:46 +0200 Subject: [PATCH 1/5] params.sequencing_center -> params.seq_center + add params.seq_platform --- nextflow.config | 3 ++- nextflow_schema.json | 14 +++++++++++--- workflows/sarek.nf | 8 ++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8dbbb69937..a3510b18f3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -48,7 +48,8 @@ params { markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default save_bam_mapped = false // Mapped BAMs not saved - sequencing_center = null // No sequencing center to be written in BAM header by aligner + seq_center = null // No sequencing center to be written in read group CN field by aligner + seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner // Variant Calling only_paired_variant_calling = false //if true, skips germline variant calling for normal-paired samples diff --git a/nextflow_schema.json b/nextflow_schema.json index 5995db9b7a..4d45b2d727 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -607,11 +607,19 @@ "hidden": true, "fa_icon": "fas fa-users-cog" }, - "sequencing_center": { + "seq_center": { "type": "string", "fa_icon": "fas fa-university", - "description": "Name of sequencing center to be displayed in BAM file", - "help_text": "It will be in the CN field", + "description": "Sequencing center information to be added to read group (CN field).", + "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK", + "hidden": true + }, + "seq_platform": { + "type": "string", + "fa_icon": "fas fa-university", + "default": "ILLUMINA", + "description": "Sequencing platform information to be added to read group (PL field).", + "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK", "hidden": true } } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index d793c6f76d..66fd89d021 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -842,8 +842,8 @@ def extract_csv(csv_file) { meta.id = "${row.sample}-${row.lane}".toString() def fastq_1 = file(row.fastq_1, checkIfExists: true) def fastq_2 = file(row.fastq_2, checkIfExists: true) - def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\"" + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\" meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "fastq" @@ -853,8 +853,8 @@ def extract_csv(csv_file) { } else if (row.lane && row.bam) { meta.id = "${row.sample}-${row.lane}".toString() def bam = file(row.bam, checkIfExists: true) - def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\"" + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\" meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "bam" From 14f5b8f2ad99e8e12634801066dcf6ba10bbb848 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 4 May 2022 13:25:55 +0200 Subject: [PATCH 2/5] update CHANGELOG --- CHANGELOG.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02fb7a5385..e9e716b7f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - [#388](https://github.com/nf-core/sarek/pull/388) - Add cram support + read splitting with `SeqKit` for speedup +- [#394](https://github.com/nf-core/sarek/pull/394) - Add `DeepVariant` - [#411](https://github.com/nf-core/sarek/pull/411) - cram in csv samplesheet - [#448](https://github.com/nf-core/sarek/pull/448) - Allow to skip base quality recalibration with `--skip_bqsr` -- [#449](https://github.com/nf-core/sarek/pull/449) - @FriederikeHanssen is now a `CODEOWNERS` +- [#449](https://github.com/nf-core/sarek/pull/449) - [@FriederikeHanssen](https://github.com/FriederikeHanssen) is now a `CODEOWNERS` - [#460](https://github.com/nf-core/sarek/pull/460) - Add posters - [#463](https://github.com/nf-core/sarek/pull/463) - Add dark/light logo versions - [#464](https://github.com/nf-core/sarek/pull/464), [#514](https://github.com/nf-core/sarek/pull/514) - Add `DRAGMAP` as a possible aligner @@ -22,12 +23,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#512](https://github.com/nf-core/sarek/pull/512), [#531](https://github.com/nf-core/sarek/pull/531), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for pipeline - [#522](https://github.com/nf-core/sarek/pull/522) - Add QC for vcf files & MultiQC - [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples +- [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA` ### Changed - [#383](https://github.com/nf-core/sarek/pull/383), [#528](https://github.com/nf-core/sarek/pull/528) - Update `CHANGELOG` - [#390](https://github.com/nf-core/sarek/pull/390) - Update `nextflow_schema.json` -- [#394](https://github.com/nf-core/sarek/pull/394) - Add `DeepVariant` - [#408](https://github.com/nf-core/sarek/pull/408) - Sync `TEMPLATE` with `tools` `2.0.1` - [#416](https://github.com/nf-core/sarek/pull/416) - Sync `TEMPLATE` with `tools` `2.1` - [#417](https://github.com/nf-core/sarek/pull/417) - Merge `dsl2` and `dev` branches @@ -40,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#466](https://github.com/nf-core/sarek/pull/466), [#485](https://github.com/nf-core/sarek/pull/485), [#492](https://github.com/nf-core/sarek/pull/492), [#494](https://github.com/nf-core/sarek/pull/494), [#515](https://github.com/nf-core/sarek/pull/515) - Improve preprocessing subworkflows - [#474](https://github.com/nf-core/sarek/pull/474), [#475](https://github.com/nf-core/sarek/pull/475) - Sync `TEMPLATE` with `tools` `2.2` - [#487](https://github.com/nf-core/sarek/pull/487), [#489](https://github.com/nf-core/sarek/pull/489), [#492](https://github.com/nf-core/sarek/pull/492), [#497](https://github.com/nf-core/sarek/pull/497), [#522](https://github.com/nf-core/sarek/pull/522) - Improve variant calling subworkflows +- [#498](https://github.com/nf-core/sarek/pull/498) - Update docs - [#501](https://github.com/nf-core/sarek/pull/501) - Sync `TEMPLATE` with `tools` `2.3` - [#511](https://github.com/nf-core/sarek/pull/511) - Sync `TEMPLATE` with `tools` `2.3.2` - [#520](https://github.com/nf-core/sarek/pull/520) - Improve annotation subworkflows @@ -73,6 +75,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools` +- [#538](https://github.com/nf-core/sarek/pull/538) - `--sequencing_center` is now `--seq_center` ## [2.7.1](https://github.com/nf-core/sarek/releases/tag/2.7.1) - PĆ„rtejekna From c548b4bcc4bed820223408f2712ccd1b19570bca Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 4 May 2022 13:38:02 +0200 Subject: [PATCH 3/5] typo --- workflows/sarek.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 66fd89d021..755565c931 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -843,9 +843,12 @@ def extract_csv(csv_file) { def fastq_1 = file(row.fastq_1, checkIfExists: true) def fastq_2 = file(row.fastq_2, checkIfExists: true) def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\" + def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() + + println read_group + meta.data_type = "fastq" meta.size = 1 // default number of splitted fastq return [meta, [fastq_1, fastq_2]] @@ -854,7 +857,7 @@ def extract_csv(csv_file) { meta.id = "${row.sample}-${row.lane}".toString() def bam = file(row.bam, checkIfExists: true) def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\" + def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "bam" From 643187793700a73ce94a4a60f0c305aa970c3ddd Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Wed, 4 May 2022 13:59:27 +0200 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: FriederikeHanssen --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4d45b2d727..77d21a5a7c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -611,7 +611,6 @@ "type": "string", "fa_icon": "fas fa-university", "description": "Sequencing center information to be added to read group (CN field).", - "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK", "hidden": true }, "seq_platform": { From 5eb1df46dfcfb7d96798f93bdad16bd0b21f8bae Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 4 May 2022 14:34:27 +0200 Subject: [PATCH 5/5] remove --markdup_java_options --- CHANGELOG.md | 1 + nextflow.config | 1 - nextflow_schema.json | 10 +--------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9e716b7f9..12dde941b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools` - [#538](https://github.com/nf-core/sarek/pull/538) - `--sequencing_center` is now `--seq_center` +- [#538](https://github.com/nf-core/sarek/pull/538) - `--markdup_java_options` has been removed ## [2.7.1](https://github.com/nf-core/sarek/releases/tag/2.7.1) - PĆ„rtejekna diff --git a/nextflow.config b/nextflow.config index a3510b18f3..32c35ad057 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,7 +45,6 @@ params { // Preprocessing aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too - markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default save_bam_mapped = false // Mapped BAMs not saved seq_center = null // No sequencing center to be written in read group CN field by aligner diff --git a/nextflow_schema.json b/nextflow_schema.json index 77d21a5a7c..d9b208744d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -203,14 +203,6 @@ "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> Use `--bwa=false` to have `Sarek` build them automatically.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes", "hidden": true }, - "markdup_java_options": { - "type": "string", - "default": "\"-Xms4000m -Xmx7g\"", - "fa_icon": "fas fa-memory", - "description": "Establish values for GATK MarkDuplicates memory consumption", - "help_text": "See [SciLifeLab/Sarek/pull/689](https://github.com/SciLifeLab/Sarek/pull/689)", - "hidden": true - }, "use_gatk_spark": { "type": "string", "fa_icon": "fas fa-forward", @@ -618,7 +610,7 @@ "fa_icon": "fas fa-university", "default": "ILLUMINA", "description": "Sequencing platform information to be added to read group (PL field).", - "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK", + "help_text": "Default: ILLUMINA. Will be used to create a proper header for further GATK4 downstream analysis", "hidden": true } }