From a6adba9a9f5ef90814cc5b94fa2801543d240953 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 4 May 2022 13:19:46 +0200
Subject: [PATCH 1/5] params.sequencing_center -> params.seq_center + add
 params.seq_platform

---
 nextflow.config      |  3 ++-
 nextflow_schema.json | 14 +++++++++++---
 workflows/sarek.nf   |  8 ++++----
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 8dbbb69937..a3510b18f3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -48,7 +48,8 @@ params {
     markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details
     use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default
     save_bam_mapped = false // Mapped BAMs not saved
-    sequencing_center = null // No sequencing center to be written in BAM header by aligner
+    seq_center = null // No sequencing center to be written in read group CN field by aligner
+    seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner
 
     // Variant Calling
     only_paired_variant_calling = false //if true, skips germline variant calling for normal-paired samples
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5995db9b7a..4d45b2d727 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -607,11 +607,19 @@
                     "hidden": true,
                     "fa_icon": "fas fa-users-cog"
                 },
-                "sequencing_center": {
+                "seq_center": {
                     "type": "string",
                     "fa_icon": "fas fa-university",
-                    "description": "Name of sequencing center to be displayed in BAM file",
-                    "help_text": "It will be in the CN field",
+                    "description": "Sequencing center information to be added to read group (CN field).",
+                    "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK",
+                    "hidden": true
+                },
+                "seq_platform": {
+                    "type": "string",
+                    "fa_icon": "fas fa-university",
+                    "default": "ILLUMINA",
+                    "description": "Sequencing platform information to be added to read group (PL field).",
+                    "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK",
                     "hidden": true
                 }
             }
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index d793c6f76d..66fd89d021 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -842,8 +842,8 @@ def extract_csv(csv_file) {
             meta.id         = "${row.sample}-${row.lane}".toString()
             def fastq_1     = file(row.fastq_1, checkIfExists: true)
             def fastq_2     = file(row.fastq_2, checkIfExists: true)
-            def CN          = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ''
-            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\""
+            def CN          = params.seq_center ? "CN:${params.seq_center}\\t" : ''
+            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\"
             meta.numLanes   = numLanes.toInteger()
             meta.read_group = read_group.toString()
             meta.data_type  = "fastq"
@@ -853,8 +853,8 @@ def extract_csv(csv_file) {
         } else if (row.lane && row.bam) {
             meta.id         = "${row.sample}-${row.lane}".toString()
             def bam         = file(row.bam,   checkIfExists: true)
-            def CN          = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ''
-            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\""
+            def CN          = params.seq_center ? "CN:${params.seq_center}\\t" : ''
+            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\"
             meta.numLanes   = numLanes.toInteger()
             meta.read_group = read_group.toString()
             meta.data_type  = "bam"

From 14f5b8f2ad99e8e12634801066dcf6ba10bbb848 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 4 May 2022 13:25:55 +0200
Subject: [PATCH 2/5] update CHANGELOG

---
 CHANGELOG.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 02fb7a5385..e9e716b7f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,9 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - [#388](https://github.com/nf-core/sarek/pull/388) - Add cram support + read splitting with `SeqKit` for speedup
+- [#394](https://github.com/nf-core/sarek/pull/394) - Add `DeepVariant`
 - [#411](https://github.com/nf-core/sarek/pull/411) - cram in csv samplesheet
 - [#448](https://github.com/nf-core/sarek/pull/448) - Allow to skip base quality recalibration with `--skip_bqsr`
-- [#449](https://github.com/nf-core/sarek/pull/449) - @FriederikeHanssen is now a `CODEOWNERS`
+- [#449](https://github.com/nf-core/sarek/pull/449) - [@FriederikeHanssen](https://github.com/FriederikeHanssen) is now a `CODEOWNERS`
 - [#460](https://github.com/nf-core/sarek/pull/460) - Add posters
 - [#463](https://github.com/nf-core/sarek/pull/463) - Add dark/light logo versions
 - [#464](https://github.com/nf-core/sarek/pull/464), [#514](https://github.com/nf-core/sarek/pull/514) - Add `DRAGMAP` as a possible aligner
@@ -22,12 +23,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#512](https://github.com/nf-core/sarek/pull/512), [#531](https://github.com/nf-core/sarek/pull/531), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for pipeline
 - [#522](https://github.com/nf-core/sarek/pull/522) - Add QC for vcf files & MultiQC
 - [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples
+- [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA`
 
 ### Changed
 
 - [#383](https://github.com/nf-core/sarek/pull/383), [#528](https://github.com/nf-core/sarek/pull/528) - Update `CHANGELOG`
 - [#390](https://github.com/nf-core/sarek/pull/390) - Update `nextflow_schema.json`
-- [#394](https://github.com/nf-core/sarek/pull/394) - Add `DeepVariant`
 - [#408](https://github.com/nf-core/sarek/pull/408) - Sync `TEMPLATE` with `tools` `2.0.1`
 - [#416](https://github.com/nf-core/sarek/pull/416) - Sync `TEMPLATE` with `tools` `2.1`
 - [#417](https://github.com/nf-core/sarek/pull/417) - Merge `dsl2` and `dev` branches
@@ -40,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#466](https://github.com/nf-core/sarek/pull/466), [#485](https://github.com/nf-core/sarek/pull/485), [#492](https://github.com/nf-core/sarek/pull/492), [#494](https://github.com/nf-core/sarek/pull/494), [#515](https://github.com/nf-core/sarek/pull/515) - Improve preprocessing subworkflows
 - [#474](https://github.com/nf-core/sarek/pull/474), [#475](https://github.com/nf-core/sarek/pull/475) - Sync `TEMPLATE` with `tools` `2.2`
 - [#487](https://github.com/nf-core/sarek/pull/487), [#489](https://github.com/nf-core/sarek/pull/489), [#492](https://github.com/nf-core/sarek/pull/492), [#497](https://github.com/nf-core/sarek/pull/497), [#522](https://github.com/nf-core/sarek/pull/522) - Improve variant calling subworkflows
+- [#498](https://github.com/nf-core/sarek/pull/498) - Update docs
 - [#501](https://github.com/nf-core/sarek/pull/501) - Sync `TEMPLATE` with `tools` `2.3`
 - [#511](https://github.com/nf-core/sarek/pull/511) - Sync `TEMPLATE` with `tools` `2.3.2`
 - [#520](https://github.com/nf-core/sarek/pull/520) - Improve annotation subworkflows
@@ -73,6 +75,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Removed
 
 - [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools`
+- [#538](https://github.com/nf-core/sarek/pull/538) - `--sequencing_center` is now `--seq_center`
 
 ## [2.7.1](https://github.com/nf-core/sarek/releases/tag/2.7.1) - Pårtejekna
 

From c548b4bcc4bed820223408f2712ccd1b19570bca Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 4 May 2022 13:38:02 +0200
Subject: [PATCH 3/5] typo

---
 workflows/sarek.nf | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index 66fd89d021..755565c931 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -843,9 +843,12 @@ def extract_csv(csv_file) {
             def fastq_1     = file(row.fastq_1, checkIfExists: true)
             def fastq_2     = file(row.fastq_2, checkIfExists: true)
             def CN          = params.seq_center ? "CN:${params.seq_center}\\t" : ''
-            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\"
+            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\""
             meta.numLanes   = numLanes.toInteger()
             meta.read_group = read_group.toString()
+
+            println read_group
+
             meta.data_type  = "fastq"
             meta.size       = 1 // default number of splitted fastq
             return [meta, [fastq_1, fastq_2]]
@@ -854,7 +857,7 @@ def extract_csv(csv_file) {
             meta.id         = "${row.sample}-${row.lane}".toString()
             def bam         = file(row.bam,   checkIfExists: true)
             def CN          = params.seq_center ? "CN:${params.seq_center}\\t" : ''
-            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\\"
+            def read_group  = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\""
             meta.numLanes   = numLanes.toInteger()
             meta.read_group = read_group.toString()
             meta.data_type  = "bam"

From 643187793700a73ce94a4a60f0c305aa970c3ddd Mon Sep 17 00:00:00 2001
From: "Maxime U. Garcia" <maxime.garcia@scilifelab.se>
Date: Wed, 4 May 2022 13:59:27 +0200
Subject: [PATCH 4/5] Apply suggestions from code review

Co-authored-by: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
---
 nextflow_schema.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4d45b2d727..77d21a5a7c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -611,7 +611,6 @@
                     "type": "string",
                     "fa_icon": "fas fa-university",
                     "description": "Sequencing center information to be added to read group (CN field).",
-                    "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK",
                     "hidden": true
                 },
                 "seq_platform": {

From 5eb1df46dfcfb7d96798f93bdad16bd0b21f8bae Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 4 May 2022 14:34:27 +0200
Subject: [PATCH 5/5] remove --markdup_java_options

---
 CHANGELOG.md         |  1 +
 nextflow.config      |  1 -
 nextflow_schema.json | 10 +---------
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e9e716b7f9..12dde941b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -76,6 +76,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools`
 - [#538](https://github.com/nf-core/sarek/pull/538) - `--sequencing_center` is now `--seq_center`
+- [#538](https://github.com/nf-core/sarek/pull/538) - `--markdup_java_options` has been removed
 
 ## [2.7.1](https://github.com/nf-core/sarek/releases/tag/2.7.1) - Pårtejekna
 
diff --git a/nextflow.config b/nextflow.config
index a3510b18f3..32c35ad057 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -45,7 +45,6 @@ params {
 
     // Preprocessing
     aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too
-    markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details
     use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default
     save_bam_mapped = false // Mapped BAMs not saved
     seq_center = null // No sequencing center to be written in read group CN field by aligner
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 77d21a5a7c..d9b208744d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -203,14 +203,6 @@
                     "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> Use `--bwa=false` to have `Sarek` build them automatically.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes",
                     "hidden": true
                 },
-                "markdup_java_options": {
-                    "type": "string",
-                    "default": "\"-Xms4000m -Xmx7g\"",
-                    "fa_icon": "fas fa-memory",
-                    "description": "Establish values for GATK MarkDuplicates memory consumption",
-                    "help_text": "See [SciLifeLab/Sarek/pull/689](https://github.com/SciLifeLab/Sarek/pull/689)",
-                    "hidden": true
-                },
                 "use_gatk_spark": {
                     "type": "string",
                     "fa_icon": "fas fa-forward",
@@ -618,7 +610,7 @@
                     "fa_icon": "fas fa-university",
                     "default": "ILLUMINA",
                     "description": "Sequencing platform information to be added to read group (PL field).",
-                    "help_text": "This parameter is required for creating a proper header to use in the downstream analysis of GATK",
+                    "help_text": "Default: ILLUMINA. Will be used to create a proper header for further GATK4 downstream analysis",
                     "hidden": true
                 }
             }