From e62878e14a6f77ed3b489cdf6d180c776e87fca3 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:44:55 +0100
Subject: [PATCH 01/24] Delete conf/biomina.config

---
 conf/biomina.config | 55 ---------------------------------------------
 1 file changed, 55 deletions(-)
 delete mode 100644 conf/biomina.config

diff --git a/conf/biomina.config b/conf/biomina.config
deleted file mode 100644
index b3a9c3b..0000000
--- a/conf/biomina.config
+++ /dev/null
@@ -1,55 +0,0 @@
-executor {
-    /* https://www.nextflow.io/docs/latest/config.html?highlight=polling#scope-executor */
-    /* https://www.nextflow.io/blog/2021/5_tips_for_hpc_users.html */
-
-    queueSize = 10
-    // pollInterval = '10sec'
-    // submitRateLimit = '50/2min'
-}
-
-
-docker {
-    enabled = true
-    runOptions = "-u root"
-}
-
-process {
-
-    cache = 'lenient'
-    errorStrategy = { task.attempt < 3 ? 'retry' : 'ignore' }
-
-    // SLURM
-    beforeScript = "source /home/bratbuser/mambaforge/etc/profile.d/conda.sh"
-    afterScript = 'conda deactivate'
-    executor = "slurm"
-    queue = "batch"
-    clusterOptions = "--nodelist=oncovm-n002 "
-
-
-
-    cpus = 4
-    memory = 8.GB
-
-    withName: '.*GATK_VARIANT_RECALIBRATOR.*' {
-        memory = 48.GB
-    }
-
-    withName: '.*GATK_MARK_DUPLICATES.*' {
-        memory = 16.GB
-    }
-
-    withName: '.*GATK_HAPLOTYPE_CALLER.*' {
-        memory = 16.GB
-    }
-
-    withName: '.*SAMTOOLS_MERGE.*' {
-        memory = 16.GB
-    }
-
-    withName: 'IQTREE.*' {
-        cpus = 2
-    }
-
-}
-
-

From 9e2d7fd46e3dc0b1c58c8345dee5332243621677 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:45:09 +0100
Subject: [PATCH 02/24] Delete conf/singularity.config

---
 conf/singularity.config | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 conf/singularity.config

diff --git a/conf/singularity.config b/conf/singularity.config
deleted file mode 100644
index b3197ad..0000000
--- a/conf/singularity.config
+++ /dev/null
@@ -1,27 +0,0 @@
-process {
-
-    withName:
-    'TBPROFILER.*' {
-        container = "lcerdeira/bratb/biocontainer-tbprofiler:6.3.0"
-    }
-
-    withName:
-    'NTMPROFILER.*' {
-        container = "lcerdeira/bratb/biocontainer-ntmprofiler:0.4.0"
-    }
-
-    withName:
-    'ISMAPPER.*|GATK.*|LOFREQ.*|DELLY.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' {
-        container = "lcerdeira/bratb/bratb-container:1.0.0"
-    }
-
-    withName:
-    'BWA.*|IQTREE.*|SNPDISTS.*|SNPSITES.*|BCFTOOLS.*|BGZIP.*|SAMTOOLS.*|SNPEFF.*|CLUSTERPICKER.*' {
-        container = "lcerdeira/bratb/mapping-container:1.0.0"
-    }
-
-}
-
-singularity {
-    enabled = true
-}
\ No newline at end of file

From 4146c5a458aeb00c854ecff12ac4c952363eadc9 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:48:12 +0100
Subject: [PATCH 03/24] Update bratb-test.yml

---
 bratb-test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bratb-test.yml b/bratb-test.yml
index b3b5f99..c0cea91 100644
--- a/bratb-test.yml
+++ b/bratb-test.yml
@@ -1,6 +1,6 @@
 
-# Sample contents of my_parameters_1.yml file
+# Sample contents of paramns.yml file
 
-input_samplesheet: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/data/input-data/input_test.csv
+input_samplesheet: /home/lcerdeira/BraSeqTB/data/input-data/input_test.csv
 only_validate_fastqs: true
-conda_envs_location: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/conda_envs
\ No newline at end of file
+conda_envs_location: /home/lcerdeira/BraSeqTB/conda_envs

From 92ea64e857d25a8fd3bb0eba62f25625971303eb Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:49:46 +0100
Subject: [PATCH 04/24] Update bratb.yml

---
 bratb.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bratb.yml b/bratb.yml
index 9c4bfbc..471bc93 100644
--- a/bratb.yml
+++ b/bratb.yml
@@ -1,6 +1,6 @@
 
-# Sample contents of my_parameters_1.yml file
+# Sample contents of paramns_1.yml file
 
-input_samplesheet: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/data/input-data/ialbratb-input.csv
+input_samplesheet: /home/lcerdeira/BraSeqTB/data/input-data/input_test.csv
 only_validate_fastqs: true
-conda_envs_location: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/conda_envs
\ No newline at end of file
+conda_envs_location: /home/lcerdeira/BraSeqTB/conda_envs

From 3ecd6a9f964b3025d230f26710ca335552195a78 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:51:06 +0100
Subject: [PATCH 05/24] Update mapping-env.yml

---
 conda_envs/mapping-env.yml | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/conda_envs/mapping-env.yml b/conda_envs/mapping-env.yml
index aa5e78b..946e64d 100644
--- a/conda_envs/mapping-env.yml
+++ b/conda_envs/mapping-env.yml
@@ -4,14 +4,7 @@ channels:
   - bioconda
   - defaults
 dependencies:
-#NOTE: Not natively. Python 2.7 was sunsetted prior to release of the osx-arm64 platform, so there isn't any such build. One could try requesting such a build on the Conda Forge Python feedstock, but even if someone did that you'd still face the issue that most Python packages will also lack osx-arm64 builds for Python 2.7.
-#Emulate through Rosetta. Apple provides an x86_64 emulator, Rosetta 2, which will run x86_64 binaries, such as what would be installed with Conda environments using an osx-64 subdir. One can create environments with such a subdir setting with something like:
-#CONDA_SUBDIR=osx-64 conda create -n py27 python=2.7  # include other packages here
-# ensure that future package installs in this env stick to 'osx-64'
-#conda activate py27
-#conda config --env --set subdir osx-64
-
-#  - python=2.7 
+  - python=2.7 
   - bwa=0.7.17
   - samtools=1.9
   - iqtree=2.1.2

From 3310ef42e2d4e3411b791a04a00f7a32eba4d471 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:54:33 +0100
Subject: [PATCH 06/24] Update setup_conda_envs.sh

---
 conda_envs/setup_conda_envs.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/conda_envs/setup_conda_envs.sh b/conda_envs/setup_conda_envs.sh
index ec05317..b61efa0 100644
--- a/conda_envs/setup_conda_envs.sh
+++ b/conda_envs/setup_conda_envs.sh
@@ -3,7 +3,7 @@
 set -e
 
 # NOTE: Please replace `conda` with `mamba` if it is installed for faster installs.
-resolverCondaBinary="mamba" # pick either conda OR mamba
+resolverCondaBinary="conda" # pick either conda OR mamba
 
 #===========================================================
 #
@@ -17,15 +17,16 @@ $resolverCondaBinary env create -p bratb-env --file conda_envs/bratb-env.yml
 
 $resolverCondaBinary env create -p bratb-tbprofiler-env --file conda_envs/bratb-tbprofiler-env.yml
 
-echo "INFO: Activate mamba env with tb-profiler and setup the WHO database"
-eval "$(mamba shell.bash hook)"
-mamba activate "./conda_envs/bratb-tbprofiler-env"
+echo "INFO: Activate conda env with tb-profiler and setup the WHO database"
+eval "$(conda shell.bash hook)"
+#Note after mamba installation peharps the conda envs messy the conda path so one tip, if not works the command below, added the full PATH or fix the conda path
+conda activate "./conda_envs/bratb-tbprofiler-env"
 
 #echo "INFO: Use WHO-v2 database in bratb-tbprofiler-env"
 #tb-profiler update_tbdb --commit bdace1f82d948ce0001e1dade6eb93d2da9c47e5 --logging DEBUG
 
-#echo "INFO: Use BRATB branch from tbdb database in bratb-tbprofiler-env"
+#echo "INFO: Use BraTB branch from tbdb database in bratb-tbprofiler-env"
 tb-profiler update_tbdb --commit 30f8bc37df15affa378ebbfbd3e1eb4c5903056e --logging DEBUG
 
 echo "INFO: Deactivate the bratb-tbprofiler-env "
-mamba deactivate
\ No newline at end of file
+conda deactivate

From 06d13a829890211a0e375ebf068f8f7abb29b4ef Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 18:56:06 +0100
Subject: [PATCH 07/24] Update template_noconda.config

---
 conf/template_noconda.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/template_noconda.config b/conf/template_noconda.config
index 1c05d14..a898339 100644
--- a/conf/template_noconda.config
+++ b/conf/template_noconda.config
@@ -22,7 +22,7 @@
 
 params {
 
-    input_samplesheet = "${projectDir}/resources/reference_set/bratb.pbs.test.csv"
+    input_samplesheet = "${projectDir}/data/input-data/bratb.csv"
     outdir = "${projectDir}/results"
 
 }

From 02bc7287f653a6851d9e8b596756192274398763 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:10:00 +0100
Subject: [PATCH 08/24] Update nextflow.config

---
 nextflow.config | 109 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 83 insertions(+), 26 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index c80df84..0fecd18 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,39 +1,96 @@
 /*
- * Copyright (c) 2024 LAPAM.
+ * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648
  *
+ * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA
+ *
+ * For quick overview of GPL-3 license, please refer
+ * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3
+ *
+ * - You MUST keep this license with original authors in your copy
+ * - You MUST acknowledge the original source of this software
+ * - You MUST state significant changes made to the original software
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program . If not, see <http://www.gnu.org/licenses/>.
  */
 
-manifest {
-  description = 'BRATB Nextflow'
-  author = 'Louise Cerdeira'
+manifest 
+  name = 'BraTB'
+  description = 'Draft version 1.0.0'
+  defaultBranch = 'master'
+  homePage = 'https://github.com/LaPAM-USP/BraSeqTB'
 }
 
-/*
- * defines execution profiles for different environments
- */
+params { includeConfig 'default_params.config' }
+
+process {
+
+    //Default values if a label hasn't been specified within a process
+    cpus = { 4 * task.attempt }
+    memory = { 4.GB * task.attempt }
+
+    //Default action is to ignore the process if the second attempt fails
+    errorStrategy = { task.attempt < 3 ? 'retry' : 'ignore' }
+    maxRetries = 3
+
+    //NOTE: These labels are ordered by number of cpus allocated and then the memory
+    withLabel: 'cpu_2_memory_2' {
+        cpus = 2
+        memory = 2.GB
+    }
+
+    withLabel: 'cpu_4_memory_8' {
+        cpus = 4
+        memory = 8.GB
+    }
+
+    withLabel: 'cpu_4_memory_16' {
+        cpus = 4
+        memory = 16.GB
+    }
+
+    withLabel: 'cpu_8_memory_4' {
+        cpus = 8
+        memory = 4.GB
+    }
+
+    withLabel: 'cpu_8_memory_8' {
+        cpus = 8
+        memory = 8.GB
+    }
+
+    withLabel: 'cpu_8_memory_16' {
+        cpus = 8
+        memory = 16.GB
+    }
 
-params {
-  trim_galore_args = ""
-  bwa_args = ""       
-  multiqc_args = ""              
-  kaiju_args = ""           
-  lofreq_args = ""                
-  gatk_args = ""         
-  tbprofile_args = ""              
-  snpeff_args = ""         
-  delly_args = ""
-  trim = false
-  help = false
 }
 
 profiles {
 
-  standard {
-    process.executor = 'local'
-    includeConfig 'conda.config'
-  }
+    // Package management specific settings
+    conda_local { includeConfig 'conf/conda_local.config' }
+    docker { includeConfig 'conf/docker.config' }
+
+    // Executor specific settings
+    pbs { includeConfig 'conf/pbs.config' }
+    server { includeConfig 'conf/server.config' }
+    low_memory { includeConfig 'conf/low_memory.config' }
+    laptop { includeConfig 'conf/laptop.config' }
+
+    //NOTE: Test profile - DO NOT USE
+    test { includeConfig 'conf/test.config' }
 
-  slurm {
-    includeConfig 'slurm.config'
-  }
+    //NOTE: Frequent settings needed for analysis
+    bwa_k66 { includeConfig 'conf/bwa_k66.config' }
 }

From 39cf14e88e7e1031e63504656d589ab85944dd4a Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:12:47 +0100
Subject: [PATCH 09/24] Update build.sh

---
 containers/biocontainer-tbprofiler/build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/containers/biocontainer-tbprofiler/build.sh b/containers/biocontainer-tbprofiler/build.sh
index 334ae9b..4275ea7 100644
--- a/containers/biocontainer-tbprofiler/build.sh
+++ b/containers/biocontainer-tbprofiler/build.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
 set -uex
 
-# NOTE: Make sure you've set the environment correctly and are logged in to the registry.
+# NOTE: Make sure you've set the environment correctly and are logged in to the registry along with the sudo permission adjustment; otherwise, you will need to run using sudo.
 
 TBPROFILER_VERSION=6.3.0
-DOCKER_NAMESPACE="lcerdeira/bratb"
+DOCKER_NAMESPACE="lcerdeira/bratb-tbprofiler"
 
 CONTAINER_NAME="$DOCKER_NAMESPACE/biocontainer-tbprofiler:$TBPROFILER_VERSION"
 

From 9a71dac4d931ae8ddcfa2712e41a523765d4547a Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:25:42 +0100
Subject: [PATCH 10/24] Update build.sh

---
 containers/bratb-container/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/containers/bratb-container/build.sh b/containers/bratb-container/build.sh
index 366d80c..43372eb 100644
--- a/containers/bratb-container/build.sh
+++ b/containers/bratb-container/build.sh
@@ -3,7 +3,7 @@ set -uex
 
 # NOTE: Make sure you've set the environment correctly and are logged in to the registry.
 
-CONTAINER_TAG=2.0.0
+CONTAINER_TAG=1.0.0
 CONTAINER_DIR=bratb-container
 DOCKER_NAMESPACE="lcerdeira/bratb"
 

From 753bccdeed20f420ea8d385e015159bec62c94b3 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:27:10 +0100
Subject: [PATCH 11/24] Update build.sh

---
 containers/misc/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/containers/misc/build.sh b/containers/misc/build.sh
index cf5dcbd..de8403d 100644
--- a/containers/misc/build.sh
+++ b/containers/misc/build.sh
@@ -3,7 +3,7 @@ set -uex
 
 # NOTE: Make sure you've set the environment correctly and are logged in to the registry.
 
-CONTAINER_TAG=2.0.0-theta
+CONTAINER_TAG=1.0.0-theta
 DOCKER_NAMESPACE="lcerdeira/bratb"
 CONTAINER_DIR=misc
 

From c6869a4aeb40381858d22bdc1366a3ea10aef203 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:28:57 +0100
Subject: [PATCH 12/24] Update build.sh

---
 containers/mapping-container/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/containers/mapping-container/build.sh b/containers/mapping-container/build.sh
index edd57f3..bfbb0c5 100644
--- a/containers/mapping-container/build.sh
+++ b/containers/mapping-container/build.sh
@@ -4,7 +4,7 @@ set -uex
 # NOTE: Make sure you've set the environment correctly and are logged in to the registry.
 #
 
-CONTAINER_TAG=2.0.0
+CONTAINER_TAG=1.0.0
 CONTAINER_DIR=mapping-container
 DOCKER_NAMESPACE="lcerdeira/bratb"
 

From 86ad03de7824e784ae04f308f2f18556252fb176 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:29:39 +0100
Subject: [PATCH 13/24] Delete containers/Dockerfile

---
 containers/Dockerfile | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 containers/Dockerfile

diff --git a/containers/Dockerfile b/containers/Dockerfile
deleted file mode 100644
index 91827ae..0000000
--- a/containers/Dockerfile
+++ /dev/null
@@ -1,13 +0,0 @@
-FROM quay.io/biocontainers/tb-profiler:6.3.0--pyhdfd78af_0 AS base
-
-FROM base AS tbprofiler
-
-#NOTE: Just update the tb-profiler databaes to rely upon the relevant branch.
-
-# WHO-v2 specific tag https://github.com/jodyphelan/tbdb/releases/tag/who-v2-strict
-# COMMIT bdace1f82d948ce0001e1dade6eb93d2da9c47e5 
-
-# bratb branch 
-#RUN tb-profiler update_tbdb --branch bratb --logging DEBUG
-
-RUN tb-profiler update_tbdb --commit 30f8bc37df15affa378ebbfbd3e1eb4c5903056e --logging DEBUG

From 3e307a440ac9fac16131453f1085f0b85772e6e4 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 19:29:49 +0100
Subject: [PATCH 14/24] Delete containers/build.sh

---
 containers/build.sh | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 containers/build.sh

diff --git a/containers/build.sh b/containers/build.sh
deleted file mode 100644
index 334ae9b..0000000
--- a/containers/build.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-set -uex
-
-# NOTE: Make sure you've set the environment correctly and are logged in to the registry.
-
-TBPROFILER_VERSION=6.3.0
-DOCKER_NAMESPACE="lcerdeira/bratb"
-
-CONTAINER_NAME="$DOCKER_NAMESPACE/biocontainer-tbprofiler:$TBPROFILER_VERSION"
-
-echo "Building container : $CONTAINER_NAME "
-
-docker build -t $CONTAINER_NAME .
-CONTAINER_ID=$(docker run -d $CONTAINER_NAME)
-docker commit $CONTAINER_ID $CONTAINER_NAME
-docker push $CONTAINER_NAME
-docker stop $CONTAINER_ID

From 440f54778773e40a8eafef15a64426005e1ba20b Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 21:50:06 +0100
Subject: [PATCH 15/24] Update nextflow.config

---
 nextflow.config | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 0fecd18..8b8236f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -24,8 +24,8 @@
  * along with this program . If not, see <http://www.gnu.org/licenses/>.
  */
 
-manifest 
-  name = 'BraTB'
+manifest {
+  name = 'Bratb'
   description = 'Draft version 1.0.0'
   defaultBranch = 'master'
   homePage = 'https://github.com/LaPAM-USP/BraSeqTB'
@@ -73,7 +73,6 @@ process {
         cpus = 8
         memory = 16.GB
     }
-
 }
 
 profiles {

From d9810555b147db729c8a49557cacdcedba697317 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 22:00:29 +0100
Subject: [PATCH 16/24] template input

---
 samplesheet/template_samplesheet.csv | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/samplesheet/template_samplesheet.csv b/samplesheet/template_samplesheet.csv
index ef96662..d80445e 100644
--- a/samplesheet/template_samplesheet.csv
+++ b/samplesheet/template_samplesheet.csv
@@ -1,5 +1,3 @@
 Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence
-Study_Name,S0001,1,1,full_path_to_directory_of_fastq_files/S0001_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0001_01_R1.fastq.gz,1,1,1
-Study_Name,S0002,1,1,full_path_to_directory_of_fastq_files/S0002_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0002_01_R2.fastq.gz,1,1,1
-Study_Name,S0003,1,1,full_path_to_directory_of_fastq_files/S0003_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0003_01_R2.fastq.gz,1,1,1
-Study_Name,S0004,1,1,full_path_to_directory_of_fastq_files/S0004_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0004_01_R2.fastq.gz,1,1,1
\ No newline at end of file
+ialbratb,ERR4813741,1,1,/home/lcerdeira/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813741_2.fastq.gz,1,1,1
+ialbratb,ERR4813742,1,1,/home/lcerdeira/data/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813742_2.fastq.gz,1,1,1
\ No newline at end of file

From f8e9d552810f2716c0832d2ba3c7c0afe491db27 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 22:22:42 +0100
Subject: [PATCH 17/24] fix quality check

---
 bin/generate_merged_cohort_stats.py         |  4 +-
 bin/sample_stats.py                         | 14 +-----
 bin/summarize_resistance_mixed_infection.py |  2 +-
 conf/laptop.config                          |  4 --
 conf/low_memory.config                      |  4 --
 conf/server.config                          |  8 ----
 default_params.config                       | 27 -----------
 main.nf                                     | 11 +----
 modules/lofreq/call__ntm.nf                 | 50 ++++++++++-----------
 modules/lofreq/indelqual.nf                 |  1 -
 modules/utils/cohort_stats.nf               |  2 +-
 modules/utils/sample_stats.nf               |  6 +--
 params/params.yaml                          |  3 --
 workflows/call_wf.nf                        | 17 -------
 workflows/quality_check_wf.nf               | 28 ++++++------
 15 files changed, 47 insertions(+), 134 deletions(-)

diff --git a/bin/generate_merged_cohort_stats.py b/bin/generate_merged_cohort_stats.py
index 8ad80a4..2fab1b7 100755
--- a/bin/generate_merged_cohort_stats.py
+++ b/bin/generate_merged_cohort_stats.py
@@ -33,21 +33,19 @@
 
     # Reorder the columns
     df_joint_cohort_stats.columns = df_joint_cohort_stats.columns.str.strip()
-    new_cols = ['AVG_INSERT_SIZE', 'MAPPED_PERCENTAGE', 'RAW_TOTAL_SEQS', 'AVERAGE_BASE_QUALITY', 'MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X', 'LINEAGES', 'FREQUENCIES', 'MAPPED_NTM_FRACTION_16S', 'MAPPED_NTM_FRACTION_16S_THRESHOLD_MET', 'COVERAGE_THRESHOLD_MET', 'BREADTH_OF_COVERAGE_THRESHOLD_MET', 'RELABUNDANCE_THRESHOLD_MET', 'ALL_THRESHOLDS_MET']
+    new_cols = ['AVG_INSERT_SIZE', 'MAPPED_PERCENTAGE', 'RAW_TOTAL_SEQS', 'AVERAGE_BASE_QUALITY', 'MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X', 'LINEAGES', 'FREQUENCIES', 'COVERAGE_THRESHOLD_MET', 'BREADTH_OF_COVERAGE_THRESHOLD_MET', 'RELABUNDANCE_THRESHOLD_MET', 'ALL_THRESHOLDS_MET']
     df_final_cohort_stats = df_joint_cohort_stats[new_cols]
 
     # Impute the NaN value after join
     df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'] = df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].fillna(0)
 
     # Prepare for boolean operation
-    df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'] = df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'].fillna(0).astype('Int64')
     df_final_cohort_stats['COVERAGE_THRESHOLD_MET'] = df_final_cohort_stats['COVERAGE_THRESHOLD_MET'].fillna(0).astype('Int64')
     df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'] = df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'].fillna(0).astype('Int64')
     df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'] = df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].fillna(0).astype('Int64')
 
     # Derive the final threshold using Boolean operations
     df_final_cohort_stats['ALL_THRESHOLDS_MET'] = (
-        df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'].apply(lambda x: bool(x) if pd.notna(x) else False) &
         df_final_cohort_stats['COVERAGE_THRESHOLD_MET'].astype('bool') &
         df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'].astype('bool') &
         df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].astype('bool')
diff --git a/bin/sample_stats.py b/bin/sample_stats.py
index 7fcbb68..6a5ac4e 100755
--- a/bin/sample_stats.py
+++ b/bin/sample_stats.py
@@ -14,11 +14,8 @@
     parser.add_argument('--flagstat_file', dest='flagstat_file', required=True, metavar='flagstat_file', type=str, help='The flag stats file')
     parser.add_argument('--samtoolsstats_file', dest='samtoolsstats_file', required=True, metavar='samtoolsstats_file', type=str, help='The samtools stats file')
     parser.add_argument('--wgsmetrics_file', dest='wgsmetrics_file', required=True, metavar='wgsmetrics_file', type=str, help='The WGS metrics file')
-    parser.add_argument('--ntmfraction_file', dest='ntmfraction_file', required=True, metavar='ntmfraction_file', type=str, help='The NTM fraction file')
-
     parser.add_argument('--cutoff_median_coverage', metavar='cutoff_median_coverage', default=10, type=float, help='The median coverage cutoff threshold')
     parser.add_argument('--cutoff_breadth_of_coverage', metavar='cutoff_breadth_of_coverage', default=0.9, type=float, help='The breadth of coverage cutoff threshold')
-    parser.add_argument('--cutoff_ntm_fraction', metavar='cutoff_ntm_fraction', default=0.2, type=float, help='The NTM fraction cutoff threshold')
 
 ## NOTE: This is computed by the multiple_infection_filter script
 #    parser.add_argument('--cutoff_rel_abundance', metavar='cutoff_rel_abundance', default=0.8, type=float, help='The relative abundance cutoff threshold')
@@ -30,8 +27,6 @@
             if '## METRICS CLASS' in line:
                 rows = [f.readline().strip(), f.readline().strip()]
                 wgsmetrics = pd.DataFrame([rows[1].split('\t')], columns=rows[0].split('\t'))
-    with open(args['ntmfraction_file']) as f:
-        ntm_fraction = float(f.read().strip())
     with open(args['samtoolsstats_file']) as f:
         for line in f:
             if 'insert size average' in line:
@@ -56,16 +51,11 @@
     else:
         breadth_of_coverage_threshold_met = 0
 
-    if ntm_fraction <= args['cutoff_ntm_fraction']:
-        ntm_fraction_threshold_met = 1
-    else:
-        ntm_fraction_threshold_met = 0
-
-    if coverage_threshold_met and breadth_of_coverage_threshold_met and ntm_fraction_threshold_met:
+    if coverage_threshold_met and breadth_of_coverage_threshold_met:
         all_thresholds_met = 1
     else:
         all_thresholds_met = 0
 
     with open('{}.stats.tsv'.format(args['sample_name']), 'w') as f:
-        f.write('\t'.join([str(i) for i in [args['sample_name'], ins_size, mapped_p, total_seqs, avg_qual] + list(wgsmetrics.loc[0, ['MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X']]) + [ntm_fraction, ntm_fraction_threshold_met, coverage_threshold_met, breadth_of_coverage_threshold_met, all_thresholds_met]]))
+        f.write('\t'.join([str(i) for i in [args['sample_name'], ins_size, mapped_p, total_seqs, avg_qual] + list(wgsmetrics.loc[0, ['MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X']]) + [coverage_threshold_met, breadth_of_coverage_threshold_met, all_thresholds_met]]))
         f.write('\n')
diff --git a/bin/summarize_resistance_mixed_infection.py b/bin/summarize_resistance_mixed_infection.py
index 434e5bc..86662bf 100755
--- a/bin/summarize_resistance_mixed_infection.py
+++ b/bin/summarize_resistance_mixed_infection.py
@@ -141,7 +141,7 @@ def create_resistance_df(sample_res, method):
 # ADD FILTER FOR SAMPLES FAILING ONLY << RELABUNDANCE THRESHOLD_MET >>
 #===============
     stats_df = pd.read_csv(args["merged_cohort_stats_file"], sep="\t")
-    filtered_stats_df = stats_df.loc[ (stats_df["RELABUNDANCE_THRESHOLD_MET"]==0) & (stats_df["MAPPED_NTM_FRACTION_16S_THRESHOLD_MET"]==1) & (stats_df["COVERAGE_THRESHOLD_MET"]==1) & (stats_df["BREADTH_OF_COVERAGE_THRESHOLD_MET"]==1)]
+    filtered_stats_df = stats_df.loc[ (stats_df["RELABUNDANCE_THRESHOLD_MET"]==0) & (stats_df["COVERAGE_THRESHOLD_MET"]==1) & (stats_df["BREADTH_OF_COVERAGE_THRESHOLD_MET"]==1)]
 
     samples_df = pd.DataFrame(list(samples), columns=['full_sample'])
     filtered_samples_df = samples_df[samples_df["full_sample"].isin(filtered_stats_df["SAMPLE"].to_list())]
diff --git a/conf/laptop.config b/conf/laptop.config
index c212812..01416c4 100644
--- a/conf/laptop.config
+++ b/conf/laptop.config
@@ -38,10 +38,6 @@ process {
     cpus = 4
     memory = 1.GB
   }
-  withName: 'LOFREQ_CALL__NTM' {
-    cpus = 2
-    memory = 1.GB
-  }
   withName: 'LOFREQ_FILTER' {
     cpus = 2
     memory = 1.GB
diff --git a/conf/low_memory.config b/conf/low_memory.config
index c814442..05771ac 100644
--- a/conf/low_memory.config
+++ b/conf/low_memory.config
@@ -38,10 +38,6 @@ process {
     cpus = 6
     memory = 1.GB
   }
-  withName: 'LOFREQ_CALL__NTM' {
-    cpus = 2
-    memory = 1.GB
-  }
   withName: 'LOFREQ_FILTER' {
     cpus = 2
     memory = 1.GB
diff --git a/conf/server.config b/conf/server.config
index bbf52c1..9428551 100644
--- a/conf/server.config
+++ b/conf/server.config
@@ -38,10 +38,6 @@ process {
     cpus = 8
     memory = 1.GB
   }
-  withName: 'CALL_WF:LOFREQ_CALL__NTM' {
-    cpus = 2
-    memory = 1.GB
-  }
   withName: 'CALL_WF:LOFREQ_FILTER' {
     cpus = 2
     memory = 1.GB
@@ -245,10 +241,6 @@ process {
     cpus = 2
     memory = 1.GB
   }
-  withName: 'QUALITY_CHECK_WF:FASTQC' {
-    cpus = 3
-    memory = 1.GB
-  }
   withName: 'REPORTS_WF:MULTIQC' {
     cpus = 1
     memory = 4.GB
diff --git a/default_params.config b/default_params.config
index a973dc6..c224b8f 100644
--- a/default_params.config
+++ b/default_params.config
@@ -29,12 +29,6 @@ cutoff_median_coverage = 10
 //The breadth of coverage required to process the sample
 cutoff_breadth_of_coverage = 0.90
 
-//The relative abundunce of the majority strain required to process the sample
-// cutoff_rel_abundance = 0.80
-
-// //The maximum fraction of NTM DNA allowed to process the sample
-// cutoff_ntm_fraction = 0.20
-
 // The minimum fraction of samples that need to have a call at a site before the site is considered in phylogeny
 cutoff_site_representation = 0.95
 
@@ -149,7 +143,6 @@ snpdists_path = "snp-dists"
 snpsites_path = "snp-sites"
 bgzip_path = "bgzip"
 tbprofiler_path = "tb-profiler"
-// ntmprofiler_path = "ntm-profiler"
 iqtree_path = "iqtree"
 fastq_validator_path = "fastq_validator.sh"
 
@@ -301,15 +294,6 @@ GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS {
                     --output-mode EMIT_ALL_ACTIVE_SITES "
 }
 
-// LOFREQ_CALL__NTM {
-//     results_dir = "${params.outdir}/non-tuberculous_mycobacteria/vcf_files/variants"
-
-//     region = "1472307-1472307"
-//     arguments = " -m 60 -Q 20 -a 1 "
-
-//     should_publish = false
-// }
-
 LOFREQ_INDELQUAL {
     results_dir = "${params.outdir}/vcf_files/per_sample/minor_variants/"
 
@@ -344,11 +328,6 @@ DELLY_CALL {
     arguments = "-u 30"
 }
 
-// NTMPROFILER_PROFILE {
-//     results_dir = "${params.outdir}/non-tuberculous_mycobacteria/per_sample/"
-// }
-
-
 BCFTOOLS_VIEW__ISMAPPER {
     results_dir = "${params.outdir}/vcf_files/per_sample/structural_variants/ismapper"
 }
@@ -416,12 +395,6 @@ UTILS_MERGE_COHORT_STATS {
 // Processes used in MERGE_WF
 //-----------------------
 
-// NTMPROFILER_COLLATE {
-//     results_dir = "${params.outdir}/non-tuberculous_mycobacteria/cohort"
-
-//     prefix = "ntmprofiler.collate"
-// }
-
 GATK_COMBINE_GVCFS {
     results_dir = "${params.outdir}/vcf_files/cohort/raw_variant_files/combined"
 
diff --git a/main.nf b/main.nf
index cbe6f1a..8156963 100644
--- a/main.nf
+++ b/main.nf
@@ -12,7 +12,6 @@ include { MAP_WF } from './workflows/map_wf.nf'
 include { MERGE_WF } from './workflows/merge_wf.nf'
 include { MINOR_VARIANTS_ANALYSIS_WF } from './workflows/minor_variants_analysis_wf.nf'
 // include { MULTIQC AS MULTIQC_FASTQS } from '../modules/multiqc/multiqc.nf' addParams (params.MULTIQC_FASTQS)
-include { QUALITY_CHECK_WF } from './workflows/quality_check_wf.nf'
 include { REPORTS_WF } from './workflows/reports_wf.nf'
 include { SAMPLESHEET_VALIDATION } from './modules/utils/samplesheet_validation.nf'  addParams ( params.SAMPLESHEET_VALIDATION )
 include { STRUCTURAL_VARIANTS_ANALYSIS_WF } from './workflows/structural_variants_analysis_wf.nf'
@@ -30,10 +29,6 @@ workflow {
 
         VALIDATE_FASTQS_WF( SAMPLESHEET_VALIDATION.out.validated_samplesheet , SAMPLESHEET_VALIDATION.out.status )
 
-        QUALITY_CHECK_WF( VALIDATE_FASTQS_WF.out.approved_fastqs_ch )
-
-        //MULTIQC_FASTQS( QUALITY_CHECK_WF.out.reports_fastqc_ch )
-
     } else  {
 
         SAMPLESHEET_VALIDATION(params.input_samplesheet)
@@ -41,9 +36,6 @@ workflow {
 
         VALIDATE_FASTQS_WF( SAMPLESHEET_VALIDATION.out.validated_samplesheet , SAMPLESHEET_VALIDATION.out.status )
 
-        QUALITY_CHECK_WF( VALIDATE_FASTQS_WF.out.approved_fastqs_ch )
-
-
         MAP_WF( VALIDATE_FASTQS_WF.out.approved_fastqs_ch  )
 
         CALL_WF( MAP_WF.out.sorted_reads_ch )
@@ -88,8 +80,7 @@ workflow {
                       approved_samples_ch )
 
 
-            REPORTS_WF( QUALITY_CHECK_WF.out.reports_fastqc_ch,
-                        UTILS_MERGE_COHORT_STATS.out.merged_cohort_stats_ch,
+            REPORTS_WF( UTILS_MERGE_COHORT_STATS.out.merged_cohort_stats_ch,
                         MERGE_WF.out.major_variants_results_ch,
                         MINOR_VARIANTS_ANALYSIS_WF.out.minor_variants_results_ch,
                         STRUCTURAL_VARIANTS_ANALYSIS_WF.out.structural_variants_results_ch )
diff --git a/modules/lofreq/call__ntm.nf b/modules/lofreq/call__ntm.nf
index 6c08ccf..174aecd 100644
--- a/modules/lofreq/call__ntm.nf
+++ b/modules/lofreq/call__ntm.nf
@@ -1,35 +1,35 @@
-process LOFREQ_CALL__NTM {
-    tag "${sampleName}"
-    publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish
+// process LOFREQ_CALL__NTM {
+//     tag "${sampleName}"
+//     publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish
 
-    input:
-        tuple val(sampleName), path(bamIndex), path(recalibratedBam)
-        path(reference)
-        path("*")
+//     input:
+//         tuple val(sampleName), path(bamIndex), path(recalibratedBam)
+//         path(reference)
+//         path("*")
 
-    output:
-        tuple val(sampleName), path("*.potential_NTM_fraction.txt")
+//     output:
+//         tuple val(sampleName), path("*.potential_NTM_fraction.txt")
 
-    shell:
+//     shell:
 
-        '''
+//         '''
 
-        if [[ $(!{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=') ]]
-        then
-            !{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=' | awk '{Total=Total+$1} END{print Total}' > !{sampleName}.potential_NTM_fraction.txt
-        else
-            echo "0" > !{sampleName}.potential_NTM_fraction.txt
-        fi
-        '''
+//         if [[ $(!{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=') ]]
+//         then
+//             !{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=' | awk '{Total=Total+$1} END{print Total}' > !{sampleName}.potential_NTM_fraction.txt
+//         else
+//             echo "0" > !{sampleName}.potential_NTM_fraction.txt
+//         fi
+//         '''
 
-    stub:
+//     stub:
 
-        """
-        echo "${reference} -- ${reference.getBaseName()} -- ${params.region} -- ${sampleName} -- ${recalibratedBam}"
+//         """
+//         echo "${reference} -- ${reference.getBaseName()} -- ${params.region} -- ${sampleName} -- ${recalibratedBam}"
 
-        echo "${params.arguments}"
+//         echo "${params.arguments}"
 
-        touch ${sampleName}.potential_NTM_fraction.txt
-        """
+//         touch ${sampleName}.potential_NTM_fraction.txt
+//         """
 
-}
+// }
diff --git a/modules/lofreq/indelqual.nf b/modules/lofreq/indelqual.nf
index 3c08ec0..2cab809 100644
--- a/modules/lofreq/indelqual.nf
+++ b/modules/lofreq/indelqual.nf
@@ -29,7 +29,6 @@ process LOFREQ_INDELQUAL {
             -o ${sampleName}.dindel.bam \\
             ${recalibratedBam} "
 
-        touch ${sampleName}.potential_NTM_fraction.txt
         touch ${sampleName}.dindel.bam
         """
 
diff --git a/modules/utils/cohort_stats.nf b/modules/utils/cohort_stats.nf
index b048653..be4de12 100644
--- a/modules/utils/cohort_stats.nf
+++ b/modules/utils/cohort_stats.nf
@@ -11,7 +11,7 @@ process UTILS_COHORT_STATS {
 
     shell:
         '''
-        echo -e "SAMPLE\tAVG_INSERT_SIZE\tMAPPED_PERCENTAGE\tRAW_TOTAL_SEQS\tAVERAGE_BASE_QUALITY\tMEAN_COVERAGE\tSD_COVERAGE\tMEDIAN_COVERAGE\tMAD_COVERAGE\tPCT_EXC_ADAPTER\tPCT_EXC_MAPQ\tPCT_EXC_DUPE\tPCT_EXC_UNPAIRED\tPCT_EXC_BASEQ\tPCT_EXC_OVERLAP\tPCT_EXC_CAPPED\tPCT_EXC_TOTAL\tPCT_1X\tPCT_5X\tPCT_10X\tPCT_30X\tPCT_50X\tPCT_100X\tMAPPED_NTM_FRACTION_16S\tMAPPED_NTM_FRACTION_16S_THRESHOLD_MET\tCOVERAGE_THRESHOLD_MET\tBREADTH_OF_COVERAGE_THRESHOLD_MET\tALL_THRESHOLDS_MET" > !{params.vcf_name}.cohort_stats.tsv
+        echo -e "SAMPLE\tAVG_INSERT_SIZE\tMAPPED_PERCENTAGE\tRAW_TOTAL_SEQS\tAVERAGE_BASE_QUALITY\tMEAN_COVERAGE\tSD_COVERAGE\tMEDIAN_COVERAGE\tMAD_COVERAGE\tPCT_EXC_ADAPTER\tPCT_EXC_MAPQ\tPCT_EXC_DUPE\tPCT_EXC_UNPAIRED\tPCT_EXC_BASEQ\tPCT_EXC_OVERLAP\tPCT_EXC_CAPPED\tPCT_EXC_TOTAL\tPCT_1X\tPCT_5X\tPCT_10X\tPCT_30X\tPCT_50X\tPCT_100X\tCOVERAGE_THRESHOLD_MET\tBREADTH_OF_COVERAGE_THRESHOLD_MET\tALL_THRESHOLDS_MET" > !{params.vcf_name}.cohort_stats.tsv
         cat sample_stats/*tsv >> !{params.vcf_name}.cohort_stats.tsv
         '''
 }
diff --git a/modules/utils/sample_stats.nf b/modules/utils/sample_stats.nf
index 472ced6..7c48518 100644
--- a/modules/utils/sample_stats.nf
+++ b/modules/utils/sample_stats.nf
@@ -3,7 +3,7 @@ process UTILS_SAMPLE_STATS {
     publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish
 
     input:
-        tuple val(sampleName), path(samtoolsStats), path(wgsMetrics), path(flagStats), path(ntmFraction)
+        tuple val(sampleName), path(samtoolsStats), path(wgsMetrics), path(flagStats)
 
     output:
         path("*.stats.tsv")
@@ -15,10 +15,8 @@ process UTILS_SAMPLE_STATS {
             --flagstat_file ${flagStats}  \\
             --samtoolsstats_file ${samtoolsStats} \\
             --wgsmetrics_file ${wgsMetrics} \\
-            --ntmfraction_file ${ntmFraction} \\
             --cutoff_median_coverage ${params.cutoff_median_coverage} \\
-            --cutoff_breadth_of_coverage ${params.cutoff_breadth_of_coverage} \\
-            --cutoff_ntm_fraction ${params.cutoff_ntm_fraction}
+            --cutoff_breadth_of_coverage ${params.cutoff_breadth_of_coverage}
         """
 
 }
diff --git a/params/params.yaml b/params/params.yaml
index 6efb954..1cb29e6 100644
--- a/params/params.yaml
+++ b/params/params.yaml
@@ -20,9 +20,6 @@ cutoff_breadth_of_coverage : 0.90
 #The relative abundunce of the majority strain required to process the sample
 cutoff_rel_abundance : 0.80
 
-# #The maximum fraction of NTM DNA allowed to process the sample
-# cutoff_ntm_fraction : 0.20
-
 # The minimum fraction of samples that need to have a call at a site before the site is considered in phylogeny
 cutoff_site_representation : 0.95
 
diff --git a/workflows/call_wf.nf b/workflows/call_wf.nf
index b6e7376..8e9c5dc 100644
--- a/workflows/call_wf.nf
+++ b/workflows/call_wf.nf
@@ -5,7 +5,6 @@ include { GATK_BASE_RECALIBRATOR } from "../modules/gatk/base_recalibrator.nf" a
 include { GATK_APPLY_BQSR } from "../modules/gatk/apply_bqsr.nf" addParams ( params.GATK_APPLY_BQSR )
 include { GATK_HAPLOTYPE_CALLER } from "../modules/gatk/haplotype_caller.nf" addParams ( params.GATK_HAPLOTYPE_CALLER )
 include { GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS } from "../modules/gatk/haplotype_caller__minor_variants.nf" addParams ( params.GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS )
-// include { LOFREQ_CALL__NTM } from "../modules/lofreq/call__ntm.nf" addParams ( params.LOFREQ_CALL__NTM )
 include { LOFREQ_INDELQUAL } from "../modules/lofreq/indelqual.nf" addParams ( params.LOFREQ_INDELQUAL )
 include { SAMTOOLS_INDEX } from "../modules/samtools/index.nf" addParams ( params.SAMTOOLS_INDEX )
 include { SAMTOOLS_INDEX__LOFREQ } from "../modules/samtools/index__lofreq.nf" addParams ( params.SAMTOOLS_INDEX__LOFREQ )
@@ -87,19 +86,6 @@ workflow CALL_WF {
                                                 [params.ref_fasta_fai, params.ref_fasta_dict])
         }
 
-        //----------------------------------------------------------------------------------
-        // Infer potential NTM contamination
-        //----------------------------------------------------------------------------------
-
-
-        // call_ntm
-        // LOFREQ_CALL__NTM(SAMTOOLS_INDEX.out,
-        //                  params.ref_fasta,
-        //                  [params.ref_fasta_fai])
-
-        //----------------------------------------------------------------------------------
-        // Infer minor variants with LoFreq
-        //----------------------------------------------------------------------------------
 
         // call_lofreq
         LOFREQ_INDELQUAL(recalibrated_bam_ch, params.ref_fasta)
@@ -131,9 +117,6 @@ workflow CALL_WF {
         sample_stats_ch = (SAMTOOLS_STATS.out)
             .join(GATK_COLLECT_WGS_METRICS.out)
             .join(GATK_FLAG_STAT.out)
-            // .join(LOFREQ_CALL__NTM.out)
-            //.dump(tag: "CALL_WF sample_stats_ch : ", pretty: true)
-
 
         UTILS_SAMPLE_STATS(sample_stats_ch)
 
diff --git a/workflows/quality_check_wf.nf b/workflows/quality_check_wf.nf
index 59f7933..11f667d 100644
--- a/workflows/quality_check_wf.nf
+++ b/workflows/quality_check_wf.nf
@@ -1,23 +1,23 @@
-include { FASTQC              } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC)
-// include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE)
-// include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE)
+// include { FASTQC              } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC)
+// // include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE)
+// // include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE)
 
-workflow QUALITY_CHECK_WF {
+// workflow QUALITY_CHECK_WF {
 
-    take:
-        reads_ch
+//     take:
+//         reads_ch
 
-    main:
+//     main:
 
-        FASTQC(reads_ch)
+//         FASTQC(reads_ch)
 
-        NTMPROFILER_PROFILE( reads_ch )
+//         NTMPROFILER_PROFILE( reads_ch )
 
-        NTMPROFILER_COLLATE( params.vcf_name,
-                             NTMPROFILER_PROFILE.out.profile_json.collect() )
+//         NTMPROFILER_COLLATE( params.vcf_name,
+//                              NTMPROFILER_PROFILE.out.profile_json.collect() )
 
 
-    emit:
-        reports_fastqc_ch =  FASTQC.out.collect()
+//     emit:
+//         reports_fastqc_ch =  FASTQC.out.collect()
 
-}
+// }

From 6f6ababe18a14eb9bed0d10e0e9d504f1e750c57 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 22:37:07 +0100
Subject: [PATCH 18/24] tes 2

---
 main.nf                       |  1 -
 modules/lofreq/call__ntm.nf   | 35 -----------------------------------
 workflows/quality_check_wf.nf | 23 -----------------------
 3 files changed, 59 deletions(-)
 delete mode 100644 modules/lofreq/call__ntm.nf
 delete mode 100644 workflows/quality_check_wf.nf

diff --git a/main.nf b/main.nf
index 8156963..26eab65 100644
--- a/main.nf
+++ b/main.nf
@@ -47,7 +47,6 @@ workflow {
                                   MINOR_VARIANTS_ANALYSIS_WF.out.rejected_samples_ch,
                                   CALL_WF.out.cohort_stats_tsv )
 
-
         all_samples_ch = UTILS_MERGE_COHORT_STATS.out.merged_cohort_stats_ch
                                 .splitCsv(header: false, skip: 1, sep: '\t' )
                                 .map { row -> [
diff --git a/modules/lofreq/call__ntm.nf b/modules/lofreq/call__ntm.nf
deleted file mode 100644
index 174aecd..0000000
--- a/modules/lofreq/call__ntm.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-// process LOFREQ_CALL__NTM {
-//     tag "${sampleName}"
-//     publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish
-
-//     input:
-//         tuple val(sampleName), path(bamIndex), path(recalibratedBam)
-//         path(reference)
-//         path("*")
-
-//     output:
-//         tuple val(sampleName), path("*.potential_NTM_fraction.txt")
-
-//     shell:
-
-//         '''
-
-//         if [[ $(!{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=') ]]
-//         then
-//             !{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=' | awk '{Total=Total+$1} END{print Total}' > !{sampleName}.potential_NTM_fraction.txt
-//         else
-//             echo "0" > !{sampleName}.potential_NTM_fraction.txt
-//         fi
-//         '''
-
-//     stub:
-
-//         """
-//         echo "${reference} -- ${reference.getBaseName()} -- ${params.region} -- ${sampleName} -- ${recalibratedBam}"
-
-//         echo "${params.arguments}"
-
-//         touch ${sampleName}.potential_NTM_fraction.txt
-//         """
-
-// }
diff --git a/workflows/quality_check_wf.nf b/workflows/quality_check_wf.nf
deleted file mode 100644
index 11f667d..0000000
--- a/workflows/quality_check_wf.nf
+++ /dev/null
@@ -1,23 +0,0 @@
-// include { FASTQC              } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC)
-// // include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE)
-// // include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE)
-
-// workflow QUALITY_CHECK_WF {
-
-//     take:
-//         reads_ch
-
-//     main:
-
-//         FASTQC(reads_ch)
-
-//         NTMPROFILER_PROFILE( reads_ch )
-
-//         NTMPROFILER_COLLATE( params.vcf_name,
-//                              NTMPROFILER_PROFILE.out.profile_json.collect() )
-
-
-//     emit:
-//         reports_fastqc_ch =  FASTQC.out.collect()
-
-// }

From abcadbaf1e6e60e4e2f6adcb5fbfb51cfd995670 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Fri, 4 Oct 2024 22:54:31 +0100
Subject: [PATCH 19/24] test3

---
 modules/gatk/collect_wgs_metrics.nf | 36 -----------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 modules/gatk/collect_wgs_metrics.nf

diff --git a/modules/gatk/collect_wgs_metrics.nf b/modules/gatk/collect_wgs_metrics.nf
deleted file mode 100644
index de351b4..0000000
--- a/modules/gatk/collect_wgs_metrics.nf
+++ /dev/null
@@ -1,36 +0,0 @@
-process GATK_COLLECT_WGS_METRICS {
-    tag "${sampleName}"
-    label 'cpu_2_memory_2'
-    publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish
-
-    input:
-        tuple val(sampleName), path(bam)
-        path(reference)
-
-    output:
-        tuple val(sampleName), path("*.WgsMetrics.txt")
-
-
-    script:
-
-        """
-        ${params.gatk_path} CollectWgsMetrics --java-options "-Xmx${task.memory.giga}G" \\
-            -R ${reference} \\
-            -I ${bam} \\
-            ${params.arguments} \\
-            -O ${sampleName}.WgsMetrics.txt
-        """
-
-    stub:
-
-        """
-        echo "gatk CollectWgsMetrics -Xmx${task.memory.giga}G \\
-            -R ${reference} \\
-            -I ${bam} \\
-            ${params.arguments} \\
-            -O ${sampleName}.WgsMetrics.txt"
-
-        touch ${sampleName}.WgsMetrics.txt
-        """
-}
-

From b9f0272834a3c9dd78b0fd13540a5fcb1d34c30f Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Mon, 7 Oct 2024 04:46:11 +0100
Subject: [PATCH 20/24] Update low_memory.config

---
 conf/low_memory.config | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/conf/low_memory.config b/conf/low_memory.config
index 05771ac..0ea9aa7 100644
--- a/conf/low_memory.config
+++ b/conf/low_memory.config
@@ -241,10 +241,6 @@ process {
     cpus = 2
     memory = 1.GB
   }
-  withName: 'FASTQC' {
-    cpus = 3
-    memory = 1.GB
-  }
   withName: 'MULTIQC' {
     cpus = 1
     memory = 4.GB

From 3f6fbbcbf62632d8d7563030246a01671d42bbe6 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Mon, 7 Oct 2024 05:10:01 +0100
Subject: [PATCH 21/24] Update template_samplesheet.csv

---
 samplesheet/template_samplesheet.csv | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/samplesheet/template_samplesheet.csv b/samplesheet/template_samplesheet.csv
index d80445e..14f0fea 100644
--- a/samplesheet/template_samplesheet.csv
+++ b/samplesheet/template_samplesheet.csv
@@ -1,3 +1,3 @@
-Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence
-ialbratb,ERR4813741,1,1,/home/lcerdeira/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813741_2.fastq.gz,1,1,1
-ialbratb,ERR4813742,1,1,/home/lcerdeira/data/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813742_2.fastq.gz,1,1,1
\ No newline at end of file
+Sample,R1,R2
+ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz,1,1,1
+ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz,1,1,1

From 5f4ec2586303e8dbcb46e46532e4e6fa1322661c Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Mon, 7 Oct 2024 05:10:53 +0100
Subject: [PATCH 22/24] Update test.samples.csv

---
 samplesheet/test.samples.csv | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/samplesheet/test.samples.csv b/samplesheet/test.samples.csv
index de1a6b7..8f35a3a 100644
--- a/samplesheet/test.samples.csv
+++ b/samplesheet/test.samples.csv
@@ -1,4 +1,3 @@
 Sample,R1,R2
-SRR26331590,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/090/SRR26331590/SRR26331590_1.fastq.gz,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/090/SRR26331590/SRR26331590_2.fastq.gz
-SRR26331595,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/095/SRR26331595/SRR26331595_1.fastq.gz,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/095/SRR26331595/SRR26331595_2.fastq.gz
-SRR26331599,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/099/SRR26331599/SRR26331599_1.fastq.gz,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/099/SRR26331599/SRR26331599_2.fastq.gz
+ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz
+ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz

From 5368079f7df3ba0b63e9fbf9533d463ea1b69c34 Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Mon, 7 Oct 2024 05:11:15 +0100
Subject: [PATCH 23/24] Update template_samplesheet.csv

---
 samplesheet/template_samplesheet.csv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samplesheet/template_samplesheet.csv b/samplesheet/template_samplesheet.csv
index 14f0fea..c08ef85 100644
--- a/samplesheet/template_samplesheet.csv
+++ b/samplesheet/template_samplesheet.csv
@@ -1,3 +1,3 @@
 Sample,R1,R2
-ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz,1,1,1
-ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz,1,1,1
+ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz
+ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz

From b696c6a399a0c9152a607b613af040b6baf6428c Mon Sep 17 00:00:00 2001
From: Louise Cerdeira <lcerdeira@gmail.com>
Date: Mon, 7 Oct 2024 05:12:35 +0100
Subject: [PATCH 24/24] Update default_params.config

---
 default_params.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/default_params.config b/default_params.config
index c224b8f..4cc4845 100644
--- a/default_params.config
+++ b/default_params.config
@@ -4,7 +4,7 @@
 //NOTE: The samplesheet should have the following fields [study, sample, library, attempt, flowcell, lane, index_sequence, r1, r2]
 //NOTE: Most of these parameters are used to create unique_id in XBS_main.py
 
-input_samplesheet = "./data/input-data/ialbratb-input.csv"
+input_samplesheet = "./samplesheet/test.samples.csv"
 
 // The directory to which all output files should be written
 outdir = "bratb-results"