From 9cc0f73d839537763f0f0cb6cfe14ca8050a4a33 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Mon, 24 Jul 2023 15:29:57 -0700
Subject: [PATCH 01/20] update submodules

---
 external/pipeline-Nextflow-config | 2 +-
 external/pipeline-Nextflow-module | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/pipeline-Nextflow-config b/external/pipeline-Nextflow-config
index eb52a2c..cb9a702 160000
--- a/external/pipeline-Nextflow-config
+++ b/external/pipeline-Nextflow-config
@@ -1 +1 @@
-Subproject commit eb52a2c1f7237917bc90944c58c962c5ff88cc58
+Subproject commit cb9a702a2e4f04162529315b87cc19149866df65
diff --git a/external/pipeline-Nextflow-module b/external/pipeline-Nextflow-module
index 5e315ab..4bc4336 160000
--- a/external/pipeline-Nextflow-module
+++ b/external/pipeline-Nextflow-module
@@ -1 +1 @@
-Subproject commit 5e315ab002aae48df64d1ecb0689f290744aae22
+Subproject commit 4bc43369bee35170132b6ff3374a0156773ecca1

From 97723bc4ad9d11d10ca17ec6cd2d8107fd3b7a14 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Mon, 24 Jul 2023 15:31:55 -0700
Subject: [PATCH 02/20] remove CSV and add YAML input

---
 input/call-sSV-input.csv  | 2 --
 input/call-sSV-input.yaml | 8 ++++++++
 2 files changed, 8 insertions(+), 2 deletions(-)
 delete mode 100644 input/call-sSV-input.csv
 create mode 100644 input/call-sSV-input.yaml

diff --git a/input/call-sSV-input.csv b/input/call-sSV-input.csv
deleted file mode 100644
index cfe7233..0000000
--- a/input/call-sSV-input.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-normal_bam,tumor_bam
-/path/to/normal_sample.bam,/path/to/tumor_sample.bam
diff --git a/input/call-sSV-input.yaml b/input/call-sSV-input.yaml
new file mode 100644
index 0000000..1c15bc0
--- /dev/null
+++ b/input/call-sSV-input.yaml
@@ -0,0 +1,8 @@
+---
+patient_id: "patient_id"
+input:
+  BAM:
+    normal:
+      - "/absolute/path/to/BAM"
+    tumor:
+      - "/abosolute/path/to/BAM"
\ No newline at end of file

From f539666cbe585ce9137dbaaa96bf3e436426f82e Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:02:44 -0700
Subject: [PATCH 03/20] add custom schema

---
 config/custom_schema_types.config | 91 +++++++++++++++++++++++++++++++
 config/schema.yaml                | 27 +++++++--
 2 files changed, 113 insertions(+), 5 deletions(-)
 create mode 100644 config/custom_schema_types.config

diff --git a/config/custom_schema_types.config b/config/custom_schema_types.config
new file mode 100644
index 0000000..25cf64f
--- /dev/null
+++ b/config/custom_schema_types.config
@@ -0,0 +1,91 @@
+/**
+* This custom schema namespace implements a custom type for checking input BAMs for call-gSNP
+*/
+custom_schema_types {
+    allowed_input_types = [
+        'BAM'
+    ]
+    allowed_bam_types = [
+        'normal',
+        'tumor'
+    ]
+
+    /**
+    * Check that input types are in allowed list
+    */
+    check_input_type_keys = { List given, String name, List choices=custom_schema_types.allowed_input_types ->
+        for (elem in given) {
+            if (!(elem in choices)) {
+                throw new Exception("Invalid paramter ${name}. Valid types: ${choices}.")
+            }
+        }
+    }
+
+    /**
+    * Check if given input is a Namespace
+    */
+    check_if_namespace = { val, String name ->
+        if (!(val in Map)) {
+            throw new Exception("${name} should be a Namespace, not ${val.getClass()}.")
+        }
+    }
+
+    /**
+    * Check if given input is a list
+    */
+    check_if_list = { val, String name ->
+        if (!(val in List || val in Set)) {
+            throw new Exception("${name} should be a List, not ${val.getClass()}.")
+        }
+    }
+
+    /**
+    * Check that input is namespace of expected types
+    */
+    check_input_namespace = { Map options, String name, Map properties ->
+        // Check parameters keys
+        custom_schema_types.check_if_namespace(options[name], name)
+        def given_keys = options[name].keySet() as ArrayList
+        custom_schema_types.check_input_type_keys(given_keys, name)
+
+        options[name].each { entry ->
+            def entry_as_map = [:]
+            entry_as_map[entry.key] = entry.value
+            schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
+        }
+    }
+
+    /**
+    * Check namespace BAM
+    */
+    check_bam_namespace = { Map options, String name, Map properties ->
+        custom_schema_types.check_if_namespace(options[name], name)
+        def given_keys = options[name].keySet() as ArrayList
+        if (given_keys.size() <= 0) {
+            throw new Exception("No inputs provided! Please provide inputs in the CSV or YAML.")
+        }
+        custom_schema_types.check_input_type_keys(given_keys, name, custom_schema_types.allowed_bam_types)
+
+        options[name].each { entry ->
+            def entry_as_map = [:]
+            entry_as_map[entry.key] = entry.value
+            schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
+        }
+    }
+
+    /**
+    * Check if proper BAM entry list
+    */
+    check_bam_list = { Map options, String name, Map properties ->
+        custom_schema_types.check_if_list(options[name], name)
+        for (item in options[name]) {
+            schema.check_path(item, 'r')
+        }
+    }
+
+    types = [
+        'InputNamespace': custom_schema_types.check_input_namespace,
+        'InputBAMNamespace': custom_schema_types.check_bam_namespace,
+        'BAMEntryList': custom_schema_types.check_bam_list
+    ]
+}
diff --git a/config/schema.yaml b/config/schema.yaml
index e971065..d9cff64 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -1,9 +1,8 @@
 ---
-input_csv:
-    type: 'Path'
-    mode: 'r'
-    required: true
-    help: 'Absolute path to the input CSV file'
+sample_id:
+  type: 'String'
+  required: true
+  help: 'Sample ID'
 reference_fasta:
     type: 'Path'
     mode: 'r'
@@ -49,3 +48,21 @@ filter_condition:
     type: 'String'
     required: true
     default: "FILTER=='PASS'"
+input:
+  type: 'InputNamespace'
+  required: true
+  help: 'Input samples'
+  elements:
+    BAM:
+      type: 'InputBAMNamespace'
+      required: true
+      help: 'Input BAMs for calling'
+      elements:
+        normal:
+          type: 'BAMEntryList'
+          required: false
+          help: 'Input normal BAMs'
+        tumor:
+          type: 'BAMEntryList'
+          required: false
+          help: 'Input tumor BAMs'

From 177e03558e9a557a3772f04ca26eefc0ef1eafb8 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:03:16 -0700
Subject: [PATCH 04/20] udate ouput dir structure set up

---
 config/methods.config | 44 ++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/config/methods.config b/config/methods.config
index a8ae92d..f51e12d 100644
--- a/config/methods.config
+++ b/config/methods.config
@@ -1,6 +1,8 @@
-includeConfig "../external/pipeline-Nextflow-config/config/retry/retry.config"
+import nextflow.util.SysHelper
+includeConfig "../external/pipeline-Nextflow-config/config/bam/bam_parser.config"
+includeConfig "../external/pipeline-Nextflow-config/config/methods/common_methods.config"
 includeConfig "../external/pipeline-Nextflow-config/config/schema/schema.config"
-
+includeConfig "../external/pipeline-Nextflow-config/config/retry/retry.config"
 
 methods {
     check_permissions = { path ->
@@ -19,15 +21,26 @@ methods {
             }
         }
 
+    set_ids_from_bams = {
+        params.samples_to_process = [] as Set
+        params.input.BAM.each { k, v ->
+            v.each { bam_path ->
+                def bam_header = bam_parser.parse_bam_header(bam_path)
+                def sm_tags = bam_header['read_group'].collect{ it['SM'] }.unique()
 
-    set_output_dir = {
-        def sample
+                if (sm_tags.size() != 1) {
+                    throw new Exception("${bam_path} contains multiple samples! Please run pipeline with single sample BAMs.")
+                }
+                params.samples_to_process.add(['id': sm_tags[0], 'path': bam_path, 'sample_type': k])
+            }
+        }
+    }
 
-        // assumes that project and samples name are in the pipeline.config
-        def reader = new FileReader(params.input_csv)
-        reader.splitEachLine(',') { parts -> [sample = parts[1].split('/')[-1].split('.bam')[0]] }
+    set_output_dir = {
+        sample_header = bam_parser.parse_bam_header(params.input.BAM.tumor)
+        sample_tag = sample_header['read_group'].collect{ it['SM'] }.unique()
 
-        params.sample = "${sample}"
+        params.sample = sample_tag[0]
 
         params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample}"
         }
@@ -98,7 +111,7 @@ methods {
 
     set_resources_allocation = {
         // Function to ensure that resource requirements don't go beyond
-        // a maximum limit        
+        // a maximum limit
         node_cpus = params.max_cpus
         node_memory_GB = params.max_memory.toGiga()
         // Load base.config by default for all pipelines
@@ -185,7 +198,7 @@ methods {
 
         timeline.enabled = true
         timeline.file = "${params.log_output_dir}/nextflow-log/timeline.html"
-        
+
         report.enabled = true
         report.file = "${params.log_output_dir}/nextflow-log/report.html"
     }
@@ -202,16 +215,17 @@ methods {
 
     // Set up env, timeline, trace, and report above.
     setup = {
+        methods.set_env()
+        schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
+        schema.validate()
+        methods.set_ids_from_bams()
+        methods.set_resources_allocation()
         methods.set_output_dir()
         methods.set_log_output_dir()
         methods.check_permissions(params.log_output_dir)
-        methods.set_env()
-        methods.set_resources_allocation()
+        methods.set_pipeline_logs()
         methods.set_process()
         methods.set_docker_sudo()
-        methods.set_pipeline_logs()
         retry.setup_retry()
-        schema.validate()
         }
     }
-

From 84ed7de8032c173a388f73b93c68bc5cf6ddf55d Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:03:42 -0700
Subject: [PATCH 05/20] change patient_id to sample_id

---
 input/call-sSV-input.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/input/call-sSV-input.yaml b/input/call-sSV-input.yaml
index 1c15bc0..5d4130f 100644
--- a/input/call-sSV-input.yaml
+++ b/input/call-sSV-input.yaml
@@ -1,5 +1,5 @@
 ---
-patient_id: "patient_id"
+sample_id: "sample_id"
 input:
   BAM:
     normal:

From 677736fdd70fc347df7e199ab13412e7b1360ce6 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:04:07 -0700
Subject: [PATCH 06/20] update pipeval

---
 config/default.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/default.config b/config/default.config
index 7ae1604..6d7e43b 100644
--- a/config/default.config
+++ b/config/default.config
@@ -28,7 +28,7 @@ params {
     delly_version = '1.1.3'
     manta_version = '1.6.0'
     bcftools_version = '1.15.1'
-    pipeval_version = '3.0.0'
+    pipeval_version = '4.0.0-rc.2'
 
     // Docker tool versions
     docker_image_delly = "${-> params.docker_container_registry}/delly:${params.delly_version}"

From d4735ab1f8015a1de3823fc54c5f17559a2c3fa3 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:04:48 -0700
Subject: [PATCH 07/20] remove empty line

---
 module/delly.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module/delly.nf b/module/delly.nf
index c2fb243..61629a5 100644
--- a/module/delly.nf
+++ b/module/delly.nf
@@ -82,7 +82,7 @@ process filter_sSV_Delly {
         path "${output_filename}.bcf", emit: somatic_bcf
         path "${output_filename}.bcf.csi", emit: somatic_bcf_csi
         path ".command.*"
- 
+
     script:
         output_filename = generate_standard_filename(
             "DELLY-${params.delly_version}",

From 6d11b7bb3b4d1dbd0518e5963ded83cf22707df8 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:05:41 -0700
Subject: [PATCH 08/20] update channels to parse input from YAML

---
 main.nf | 96 ++++++++++++++++++++-------------------------------------
 1 file changed, 33 insertions(+), 63 deletions(-)

diff --git a/main.nf b/main.nf
index b3f31b0..08d9f35 100644
--- a/main.nf
+++ b/main.nf
@@ -61,77 +61,47 @@ include { generate_sha512 as generate_sha512_Manta } from './module/sha512' addP
     workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}"
     )
 
-/**
-* The input file params.input_csv looks as below:
-* normal_bam, tumor_bam
-* /hot/users/ybugh/A-mini/0/output/HG002.N-0.bam, /hot/users/ybugh/A-mini/0/output/S2.T-0.bam
-*
-* Later, calling "delly call -g hg19.fa -v t1.pre.bcf -o geno.bcf -x hg19.excl tumor1.bam normal1.bam ... normalN.bam" needs all the normal samples, 
-* which will be collected from params.input_normal_bams.
-*/
-
-/**
-* Create input_validation to validate the input bams
-*/
-validation_mode = Channel.of("file-input")
-
-input_files = Channel
-    .fromPath(params.input_csv, checkIfExists:true)
-    .splitCsv(header:true)
-    .map {
-        row -> [
-            row.tumor_bam,
-            "${row.tumor_bam}.bai",
-            row.normal_bam,
-            "${row.normal_bam}.bai"
-            ]
-        }
-    .flatten()
+// Returns the index file for the given bam or vcf
+def indexFile(bam_or_vcf) {
+    if (bam_or_vcf.endsWith('.bam')) {
+        return "${bam_or_vcf}.bai"
+    } else if (bam_or_vcf.endsWith('vcf.gz')) {
+        return "${bam_or_vcf}.tbi"
+    } else {
+        throw new Exception("Index file for ${bam_or_vcf} file type not supported. Use .bam or .vcf.gz files.")
+    }
+}
 
-validation_mode
-     .combine(input_files)
-     .set { input_validation }
+Channel.from(params.samples_to_process)
+    .map{ sample -> ['index': indexFile(sample.path)] + sample }
+    .set{ input_ch_samples_with_index }
 
+input_ch_samples_with_index
+    .map{ sample -> [sample.path, sample.index] }
+    .flatten()
+    .set{ input_validation }
 if (params.verbose){
     input_validation.view()
     }
 
-/**
-* Create input_paired_bams_ch to get the paired turmor sample and normal sample
-*/
-input_paired_bams_ch = Channel
-    .fromPath(params.input_csv, checkIfExists:true)
-    .splitCsv(header:true)
-    .map{
-        row -> tuple(
-            Paths.get(row.tumor_bam).getFileName().toString().split('.bam')[0],
-            row.tumor_bam,
-            "${row.tumor_bam}.bai",
-            row.normal_bam,
-            "${row.normal_bam}.bai"
-            )
-        }
+tumor_id = input_ch_samples_with_index
+    .filter{ it.sample_type == 'tumor' }
+    .map{ it -> [it.id] }
+    .flatten()
+
+tumor_id_bam_bai = input_ch_samples_with_index
+    .filter{ it.sample_type == 'tumor' }
+    .map{ it -> [it.id, it.path, it.index] }
+normal_bam_bai = input_ch_samples_with_index
+    .filter{ it.sample_type == 'normal' }
+    .map{ it -> [it.path, it.index] }
 
+input_paired_bams_ch = tumor_id_bam_bai.combine(normal_bam_bai)
 if (params.verbose){
     input_paired_bams_ch.view()
     }
 
-/**
-* Create tumor_bams_ch to only get the turmor samples.
-* I tried to reuse input_paired_bams_ch, however, in that way, I have to filter the paired normal sample out of the all_normal_samples_bams_list,
-* otherwise, nextflow complains a same normal sample is declared twice.
-*/
-tumor_bams_ch = Channel
-    .fromPath(params.input_csv, checkIfExists:true)
-    .splitCsv(header:true)
-    .map{
-        row -> tuple(
-            Paths.get(row.tumor_bam).getFileName().toString().split('.bam')[0],
-            row.tumor_bam,
-            "${row.tumor_bam}.bai"
-            )
-        }
-
+tumor_bams_ch = tumor_id_bam_bai
 if (params.verbose){
     tumor_bams_ch.view()
     }
@@ -166,11 +136,11 @@ workflow {
         * sample_name   sample_type
         * S2_v1.1.5	tumor
         * HG002.N	normal
-        * 
+        *
         * Use bcftools query -l to get the sample names out of call_sSV_Delly.out.nt_call_bcf
         * Further generate BCFtools_${params.bcftools_version}_${params.dataset_id}_${tumor_id}_query-tumor-normal-name.tsv which will be used by delly filter
         * Note, the order of samples in call_sSV_Delly.out.nt_call_bcf is determined by the order of samples in delly call.
-        * For example, 
+        * For example,
         *    delly call \
         *    -g /tmp/ref/genome/genome.fa \
         *    -x /tmp/ref/delly/human.hg38.excl.tsv \
@@ -181,7 +151,7 @@ workflow {
         * HG002.N
         * S2_v1.1.5
         * If you put /tmp/bams/S2.T-0.bam in front of /tmp/bams/HG002.N-0.bam, bcftools query -l /tmp/output/output.bcf will yield
-        * S2_v1.1.5 
+        * S2_v1.1.5
         * HG002.N
         */
         query_SampleName_BCFtools(

From f04f118a839ed7e907e4e09bec40a5d4c0fcd1a1 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 01:11:18 -0700
Subject: [PATCH 09/20] Update CHANGELOG.md

---
 CHANGELOG.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3a99613..d21d41c 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,15 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ---
 
 ## [Unreleased]
-- Update `README.md` to clarify adjustable parameters and note lab default values.
+### Added
+- YAML input
+
+### Changed
+- Parse sample ID from tumor BAM for output directory naming
+- Update `README.md` to clarify adjustable parameters and note lab default values
+
+### Removed
+- CSV input
 
 ---
 

From d3553bae8c3eb787891670167c8db78a42633f8c Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 02:16:55 -0700
Subject: [PATCH 10/20] fix YAML linting

---
 config/schema.yaml        | 72 +++++++++++++++++++--------------------
 input/call-sSV-input.yaml |  2 +-
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/config/schema.yaml b/config/schema.yaml
index d9cff64..fa0cc32 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -4,50 +4,50 @@ sample_id:
   required: true
   help: 'Sample ID'
 reference_fasta:
-    type: 'Path'
-    mode: 'r'
-    required: true
-    help: 'Absolute path to a reference FASTA file'
+  type: 'Path'
+  mode: 'r'
+  required: true
+  help: 'Absolute path to a reference FASTA file'
 exclusion_file:
-    type: 'Path'
-    mode: 'r'
-    required: true
-    help: 'Absoulte path to an exclusion file'
+  type: 'Path'
+  mode: 'r'
+  required: true
+  help: 'Absoulte path to an exclusion file'
 algorithm:
-    type: 'List'
-    required: true
-    help: 'List of SV caller(s) for calling'
-    default:
-        - delly
-        - manta
-    choices:
-        - delly
-        - manta
+  type: 'List'
+  required: true
+  help: 'List of SV caller(s) for calling'
+  default:
+      - delly
+      - manta
+  choices:
+      - delly
+      - manta
 output_dir:
-    type: 'Path'
-    mode: 'w'
-    required: true
-    help: 'Absolute path to output directory'
+  type: 'Path'
+  mode: 'w'
+  required: true
+  help: 'Absolute path to output directory'
 dataset_id:
-    type: 'String'
-    required: true
-    help: 'Dataset identifier'
+  type: 'String'
+  required: true
+  help: 'Dataset identifier'
 map_qual:
-    type: 'Integer'
-    required: true
-    default: 20
+  type: 'Integer'
+  required: true
+  default: 20
 min_clique_size:
-    type: 'Integer'
-    required: true
-    default: 5
+  type: 'Integer'
+  required: true
+  default: 5
 mad_cutoff:
-    type: 'Integer'
-    required: true
-    default: 15
+  type: 'Integer'
+  required: true
+  default: 15
 filter_condition:
-    type: 'String'
-    required: true
-    default: "FILTER=='PASS'"
+  type: 'String'
+  required: true
+  default: "FILTER=='PASS'"
 input:
   type: 'InputNamespace'
   required: true
diff --git a/input/call-sSV-input.yaml b/input/call-sSV-input.yaml
index 5d4130f..04d9d54 100644
--- a/input/call-sSV-input.yaml
+++ b/input/call-sSV-input.yaml
@@ -5,4 +5,4 @@ input:
     normal:
       - "/absolute/path/to/BAM"
     tumor:
-      - "/abosolute/path/to/BAM"
\ No newline at end of file
+      - "/abosolute/path/to/BAM"

From 847eba2fc61a07295bf11f004231a47cd94507e2 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 09:18:24 -0700
Subject: [PATCH 11/20] fix YAML linting

---
 config/schema.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/schema.yaml b/config/schema.yaml
index fa0cc32..753964d 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -18,11 +18,11 @@ algorithm:
   required: true
   help: 'List of SV caller(s) for calling'
   default:
-      - delly
-      - manta
+    - delly
+    - manta
   choices:
-      - delly
-      - manta
+    - delly
+    - manta
 output_dir:
   type: 'Path'
   mode: 'w'

From 63e9e2c1ede7cc551d18b4e26f8acfeb5efe4a8c Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 15:49:05 -0700
Subject: [PATCH 12/20] update help comments

---
 config/schema.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/schema.yaml b/config/schema.yaml
index 753964d..63cfac5 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -16,7 +16,7 @@ exclusion_file:
 algorithm:
   type: 'List'
   required: true
-  help: 'List of SV caller(s) for calling'
+  help: 'List of available somatic SV callers'
   default:
     - delly
     - manta
@@ -56,7 +56,7 @@ input:
     BAM:
       type: 'InputBAMNamespace'
       required: true
-      help: 'Input BAMs for calling'
+      help: 'Input BAMs for somatic structural variant calling'
       elements:
         normal:
           type: 'BAMEntryList'

From c8e016e71f3d3b0387d7204428553735fe5cd49d Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 15:49:26 -0700
Subject: [PATCH 13/20] remove redundant code

---
 main.nf | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/main.nf b/main.nf
index 08d9f35..be42ad5 100644
--- a/main.nf
+++ b/main.nf
@@ -61,14 +61,12 @@ include { generate_sha512 as generate_sha512_Manta } from './module/sha512' addP
     workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}"
     )
 
-// Returns the index file for the given bam or vcf
-def indexFile(bam_or_vcf) {
-    if (bam_or_vcf.endsWith('.bam')) {
-        return "${bam_or_vcf}.bai"
-    } else if (bam_or_vcf.endsWith('vcf.gz')) {
-        return "${bam_or_vcf}.tbi"
+// Returns the index file for the given bam
+def indexFile(bam) {
+    if (bam.endsWith('.bam')) {
+        return "${bam}.bai"
     } else {
-        throw new Exception("Index file for ${bam_or_vcf} file type not supported. Use .bam or .vcf.gz files.")
+        throw new Exception("Index file for ${bam} file type not supported. Use .bam!")
     }
 }
 
@@ -84,11 +82,6 @@ if (params.verbose){
     input_validation.view()
     }
 
-tumor_id = input_ch_samples_with_index
-    .filter{ it.sample_type == 'tumor' }
-    .map{ it -> [it.id] }
-    .flatten()
-
 tumor_id_bam_bai = input_ch_samples_with_index
     .filter{ it.sample_type == 'tumor' }
     .map{ it -> [it.id, it.path, it.index] }

From 41e8a0b110501995e7c64edf17890dcc9354635d Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 16:43:48 -0700
Subject: [PATCH 14/20] update comment in config

---
 config/custom_schema_types.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/custom_schema_types.config b/config/custom_schema_types.config
index 25cf64f..ced9942 100644
--- a/config/custom_schema_types.config
+++ b/config/custom_schema_types.config
@@ -1,5 +1,5 @@
 /**
-* This custom schema namespace implements a custom type for checking input BAMs for call-gSNP
+* This custom schema namespace implements a custom type for checking input BAMs for call-sSV
 */
 custom_schema_types {
     allowed_input_types = [

From c852722388005ce7b01e09efe052f55fed1b9f12 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 16:44:23 -0700
Subject: [PATCH 15/20] update logging

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index be42ad5..6df461b 100644
--- a/main.nf
+++ b/main.nf
@@ -16,7 +16,7 @@ Current Configuration:
     version: ${workflow.manifest.version}
 
 - input:
-    input_csv: "${params.input_csv}"
+    samples: ${params.samples_to_process}
     reference_fasta: "${params.reference_fasta}"
     reference_fasta_index: "${params.reference_fasta}.fai"
     exclusion_file: "${params.exclusion_file}"

From a2e86de607ae819ccd1c67cb180667cb5ac49474 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 16:45:08 -0700
Subject: [PATCH 16/20] use params.samples_to_process to parse sample id

---
 config/methods.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/methods.config b/config/methods.config
index f51e12d..612906b 100644
--- a/config/methods.config
+++ b/config/methods.config
@@ -37,10 +37,10 @@ methods {
     }
 
     set_output_dir = {
-        sample_header = bam_parser.parse_bam_header(params.input.BAM.tumor)
-        sample_tag = sample_header['read_group'].collect{ it['SM'] }.unique()
-
-        params.sample = sample_tag[0]
+        params.sample = Channel.from(params.samples_to_process)
+            .filter{ it.sample_type == 'tumor' }
+            .map{ it -> [it.id] }
+            .flatten()
 
         params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample}"
         }

From 13059c4c76a0ac7ccfc68e6cb30628f19c90eaf1 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 22:18:03 -0700
Subject: [PATCH 17/20] set sample ID from tumor ID extracted using
 params.samples_to_process

---
 config/methods.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/methods.config b/config/methods.config
index 612906b..de497ea 100644
--- a/config/methods.config
+++ b/config/methods.config
@@ -37,10 +37,10 @@ methods {
     }
 
     set_output_dir = {
-        params.sample = Channel.from(params.samples_to_process)
-            .filter{ it.sample_type == 'tumor' }
-            .map{ it -> [it.id] }
-            .flatten()
+        params.sample = params.samples_to_process
+            .findAll{ it.sample_type == 'tumor' }
+            .collect{ it.id }
+            .join()
 
         params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample}"
         }

From 5854a759911eef94751859d7d4f6bb5f385b204e Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 25 Jul 2023 22:30:14 -0700
Subject: [PATCH 18/20] remove redundant code

---
 main.nf | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/main.nf b/main.nf
index 6df461b..57654f5 100644
--- a/main.nf
+++ b/main.nf
@@ -94,11 +94,6 @@ if (params.verbose){
     input_paired_bams_ch.view()
     }
 
-tumor_bams_ch = tumor_id_bam_bai
-if (params.verbose){
-    tumor_bams_ch.view()
-    }
-
 reference_fasta_index = "${params.reference_fasta}.fai"
 
 workflow {

From f3747be7769be32e6589f5f5f40c9d4d51939666 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Thu, 27 Jul 2023 16:46:16 -0700
Subject: [PATCH 19/20] check for multiple tumor samples before setting sample
 param

---
 config/methods.config | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/config/methods.config b/config/methods.config
index de497ea..1fd3bf4 100644
--- a/config/methods.config
+++ b/config/methods.config
@@ -37,10 +37,15 @@ methods {
     }
 
     set_output_dir = {
-        params.sample = params.samples_to_process
+        sample = params.samples_to_process
             .findAll{ it.sample_type == 'tumor' }
             .collect{ it.id }
-            .join()
+
+        if (sample.size() != 1) {
+            throw new Exception("${params.samples_to_process}\n\n Multile Tumor BAMs found in the input! Please run pipeline one Tumor sample at a time.")
+        }
+
+        params.sample = sample[0]
 
         params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample}"
         }
@@ -138,7 +143,6 @@ methods {
             }
         }
 
-
     /**
      * Check the permissions and existence of workDir.
      * If it doesn't exist, recursively find first existing directory and check write permission.

From 06a0c6796f7c1749bc55a2cd87cbbf0ac2702841 Mon Sep 17 00:00:00 2001
From: Mootor
 <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
Date: Tue, 1 Aug 2023 10:52:34 -0700
Subject: [PATCH 20/20] fix typo in error

---
 config/methods.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/methods.config b/config/methods.config
index 1fd3bf4..56240e1 100644
--- a/config/methods.config
+++ b/config/methods.config
@@ -42,7 +42,7 @@ methods {
             .collect{ it.id }
 
         if (sample.size() != 1) {
-            throw new Exception("${params.samples_to_process}\n\n Multile Tumor BAMs found in the input! Please run pipeline one Tumor sample at a time.")
+            throw new Exception("${params.samples_to_process}\n\n Multiple Tumor BAMs found in the input! Please run pipeline one Tumor sample at a time.")
         }
 
         params.sample = sample[0]