From e62878e14a6f77ed3b489cdf6d180c776e87fca3 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:44:55 +0100 Subject: [PATCH 01/24] Delete conf/biomina.config --- conf/biomina.config | 55 --------------------------------------------- 1 file changed, 55 deletions(-) delete mode 100644 conf/biomina.config diff --git a/conf/biomina.config b/conf/biomina.config deleted file mode 100644 index b3a9c3b..0000000 --- a/conf/biomina.config +++ /dev/null @@ -1,55 +0,0 @@ -executor { - /* https://www.nextflow.io/docs/latest/config.html?highlight=polling#scope-executor */ - /* https://www.nextflow.io/blog/2021/5_tips_for_hpc_users.html */ - - queueSize = 10 - // pollInterval = '10sec' - // submitRateLimit = '50/2min' -} - - -docker { - enabled = true - runOptions = "-u root" -} - -process { - - cache = 'lenient' - errorStrategy = { task.attempt < 3 ? 'retry' : 'ignore' } - - // SLURM - beforeScript = "source /home/bratbuser/mambaforge/etc/profile.d/conda.sh" - afterScript = 'conda deactivate' - executor = "slurm" - queue = "batch" - clusterOptions = "--nodelist=oncovm-n002 " - - - - cpus = 4 - memory = 8.GB - - withName: '.*GATK_VARIANT_RECALIBRATOR.*' { - memory = 48.GB - } - - withName: '.*GATK_MARK_DUPLICATES.*' { - memory = 16.GB - } - - withName: '.*GATK_HAPLOTYPE_CALLER.*' { - memory = 16.GB - } - - withName: '.*SAMTOOLS_MERGE.*' { - memory = 16.GB - } - - withName: 'IQTREE.*' { - cpus = 2 - } - -} - - From 9e2d7fd46e3dc0b1c58c8345dee5332243621677 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:45:09 +0100 Subject: [PATCH 02/24] Delete conf/singularity.config --- conf/singularity.config | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 conf/singularity.config diff --git a/conf/singularity.config b/conf/singularity.config deleted file mode 100644 index b3197ad..0000000 --- a/conf/singularity.config +++ /dev/null @@ -1,27 +0,0 @@ -process { - - withName: - 'TBPROFILER.*' { - container = "lcerdeira/bratb/biocontainer-tbprofiler:6.3.0" - } - - withName: - 'NTMPROFILER.*' { - container = "lcerdeira/bratb/biocontainer-ntmprofiler:0.4.0" - } - - withName: - 'ISMAPPER.*|GATK.*|LOFREQ.*|DELLY.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' { - container = "lcerdeira/bratb/bratb-container:1.0.0" - } - - withName: - 'BWA.*|IQTREE.*|SNPDISTS.*|SNPSITES.*|BCFTOOLS.*|BGZIP.*|SAMTOOLS.*|SNPEFF.*|CLUSTERPICKER.*' { - container = "lcerdeira/bratb/mapping-container:1.0.0" - } - -} - -singularity { - enabled = true -} \ No newline at end of file From 4146c5a458aeb00c854ecff12ac4c952363eadc9 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:48:12 +0100 Subject: [PATCH 03/24] Update bratb-test.yml --- bratb-test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bratb-test.yml b/bratb-test.yml index b3b5f99..c0cea91 100644 --- a/bratb-test.yml +++ b/bratb-test.yml @@ -1,6 +1,6 @@ -# Sample contents of my_parameters_1.yml file +# Sample contents of paramns.yml file -input_samplesheet: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/data/input-data/input_test.csv +input_samplesheet: /home/lcerdeira/BraSeqTB/data/input-data/input_test.csv only_validate_fastqs: true -conda_envs_location: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/conda_envs \ No newline at end of file +conda_envs_location: /home/lcerdeira/BraSeqTB/conda_envs From 92ea64e857d25a8fd3bb0eba62f25625971303eb Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:49:46 +0100 Subject: [PATCH 04/24] Update bratb.yml --- bratb.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bratb.yml b/bratb.yml index 9c4bfbc..471bc93 100644 --- a/bratb.yml +++ b/bratb.yml @@ -1,6 +1,6 @@ -# Sample contents of my_parameters_1.yml file +# Sample contents of paramns_1.yml file -input_samplesheet: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/data/input-data/ialbratb-input.csv +input_samplesheet: /home/lcerdeira/BraSeqTB/data/input-data/input_test.csv only_validate_fastqs: true -conda_envs_location: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/conda_envs \ No newline at end of file +conda_envs_location: /home/lcerdeira/BraSeqTB/conda_envs From 3ecd6a9f964b3025d230f26710ca335552195a78 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:51:06 +0100 Subject: [PATCH 05/24] Update mapping-env.yml --- conda_envs/mapping-env.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/conda_envs/mapping-env.yml b/conda_envs/mapping-env.yml index aa5e78b..946e64d 100644 --- a/conda_envs/mapping-env.yml +++ b/conda_envs/mapping-env.yml @@ -4,14 +4,7 @@ channels: - bioconda - defaults dependencies: -#NOTE: Not natively. Python 2.7 was sunsetted prior to release of the osx-arm64 platform, so there isn't any such build. One could try requesting such a build on the Conda Forge Python feedstock, but even if someone did that you'd still face the issue that most Python packages will also lack osx-arm64 builds for Python 2.7. -#Emulate through Rosetta. Apple provides an x86_64 emulator, Rosetta 2, which will run x86_64 binaries, such as what would be installed with Conda environments using an osx-64 subdir. One can create environments with such a subdir setting with something like: -#CONDA_SUBDIR=osx-64 conda create -n py27 python=2.7 # include other packages here -# ensure that future package installs in this env stick to 'osx-64' -#conda activate py27 -#conda config --env --set subdir osx-64 - -# - python=2.7 + - python=2.7 - bwa=0.7.17 - samtools=1.9 - iqtree=2.1.2 From 3310ef42e2d4e3411b791a04a00f7a32eba4d471 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:54:33 +0100 Subject: [PATCH 06/24] Update setup_conda_envs.sh --- conda_envs/setup_conda_envs.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/conda_envs/setup_conda_envs.sh b/conda_envs/setup_conda_envs.sh index ec05317..b61efa0 100644 --- a/conda_envs/setup_conda_envs.sh +++ b/conda_envs/setup_conda_envs.sh @@ -3,7 +3,7 @@ set -e # NOTE: Please replace `conda` with `mamba` if it is installed for faster installs. -resolverCondaBinary="mamba" # pick either conda OR mamba +resolverCondaBinary="conda" # pick either conda OR mamba #=========================================================== # @@ -17,15 +17,16 @@ $resolverCondaBinary env create -p bratb-env --file conda_envs/bratb-env.yml $resolverCondaBinary env create -p bratb-tbprofiler-env --file conda_envs/bratb-tbprofiler-env.yml -echo "INFO: Activate mamba env with tb-profiler and setup the WHO database" -eval "$(mamba shell.bash hook)" -mamba activate "./conda_envs/bratb-tbprofiler-env" +echo "INFO: Activate conda env with tb-profiler and setup the WHO database" +eval "$(conda shell.bash hook)" +#Note after mamba installation peharps the conda envs messy the conda path so one tip, if not works the command below, added the full PATH or fix the conda path +conda activate "./conda_envs/bratb-tbprofiler-env" #echo "INFO: Use WHO-v2 database in bratb-tbprofiler-env" #tb-profiler update_tbdb --commit bdace1f82d948ce0001e1dade6eb93d2da9c47e5 --logging DEBUG -#echo "INFO: Use BRATB branch from tbdb database in bratb-tbprofiler-env" +#echo "INFO: Use BraTB branch from tbdb database in bratb-tbprofiler-env" tb-profiler update_tbdb --commit 30f8bc37df15affa378ebbfbd3e1eb4c5903056e --logging DEBUG echo "INFO: Deactivate the bratb-tbprofiler-env " -mamba deactivate \ No newline at end of file +conda deactivate From 06d13a829890211a0e375ebf068f8f7abb29b4ef Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 18:56:06 +0100 Subject: [PATCH 07/24] Update template_noconda.config --- conf/template_noconda.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/template_noconda.config b/conf/template_noconda.config index 1c05d14..a898339 100644 --- a/conf/template_noconda.config +++ b/conf/template_noconda.config @@ -22,7 +22,7 @@ params { - input_samplesheet = "${projectDir}/resources/reference_set/bratb.pbs.test.csv" + input_samplesheet = "${projectDir}/data/input-data/bratb.csv" outdir = "${projectDir}/results" } From 02bc7287f653a6851d9e8b596756192274398763 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:10:00 +0100 Subject: [PATCH 08/24] Update nextflow.config --- nextflow.config | 109 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 26 deletions(-) diff --git a/nextflow.config b/nextflow.config index c80df84..0fecd18 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,39 +1,96 @@ /* - * Copyright (c) 2024 LAPAM. + * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648 * + * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA + * + * For quick overview of GPL-3 license, please refer + * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3 + * + * - You MUST keep this license with original authors in your copy + * - You MUST acknowledge the original source of this software + * - You MUST state significant changes made to the original software + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program . If not, see . */ -manifest { - description = 'BRATB Nextflow' - author = 'Louise Cerdeira' +manifest + name = 'BraTB' + description = 'Draft version 1.0.0' + defaultBranch = 'master' + homePage = 'https://github.com/LaPAM-USP/BraSeqTB' } -/* - * defines execution profiles for different environments - */ +params { includeConfig 'default_params.config' } + +process { + + //Default values if a label hasn't been specified within a process + cpus = { 4 * task.attempt } + memory = { 4.GB * task.attempt } + + //Default action is to ignore the process if the second attempt fails + errorStrategy = { task.attempt < 3 ? 'retry' : 'ignore' } + maxRetries = 3 + + //NOTE: These labels are ordered by number of cpus allocated and then the memory + withLabel: 'cpu_2_memory_2' { + cpus = 2 + memory = 2.GB + } + + withLabel: 'cpu_4_memory_8' { + cpus = 4 + memory = 8.GB + } + + withLabel: 'cpu_4_memory_16' { + cpus = 4 + memory = 16.GB + } + + withLabel: 'cpu_8_memory_4' { + cpus = 8 + memory = 4.GB + } + + withLabel: 'cpu_8_memory_8' { + cpus = 8 + memory = 8.GB + } + + withLabel: 'cpu_8_memory_16' { + cpus = 8 + memory = 16.GB + } -params { - trim_galore_args = "" - bwa_args = "" - multiqc_args = "" - kaiju_args = "" - lofreq_args = "" - gatk_args = "" - tbprofile_args = "" - snpeff_args = "" - delly_args = "" - trim = false - help = false } profiles { - standard { - process.executor = 'local' - includeConfig 'conda.config' - } + // Package management specific settings + conda_local { includeConfig 'conf/conda_local.config' } + docker { includeConfig 'conf/docker.config' } + + // Executor specific settings + pbs { includeConfig 'conf/pbs.config' } + server { includeConfig 'conf/server.config' } + low_memory { includeConfig 'conf/low_memory.config' } + laptop { includeConfig 'conf/laptop.config' } + + //NOTE: Test profile - DO NOT USE + test { includeConfig 'conf/test.config' } - slurm { - includeConfig 'slurm.config' - } + //NOTE: Frequent settings needed for analysis + bwa_k66 { includeConfig 'conf/bwa_k66.config' } } From 39cf14e88e7e1031e63504656d589ab85944dd4a Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:12:47 +0100 Subject: [PATCH 09/24] Update build.sh --- containers/biocontainer-tbprofiler/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/containers/biocontainer-tbprofiler/build.sh b/containers/biocontainer-tbprofiler/build.sh index 334ae9b..4275ea7 100644 --- a/containers/biocontainer-tbprofiler/build.sh +++ b/containers/biocontainer-tbprofiler/build.sh @@ -1,10 +1,10 @@ #!/bin/bash set -uex -# NOTE: Make sure you've set the environment correctly and are logged in to the registry. +# NOTE: Make sure you've set the environment correctly and are logged in to the registry along with the sudo permission adjustment; otherwise, you will need to run using sudo. TBPROFILER_VERSION=6.3.0 -DOCKER_NAMESPACE="lcerdeira/bratb" +DOCKER_NAMESPACE="lcerdeira/bratb-tbprofiler" CONTAINER_NAME="$DOCKER_NAMESPACE/biocontainer-tbprofiler:$TBPROFILER_VERSION" From 9a71dac4d931ae8ddcfa2712e41a523765d4547a Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:25:42 +0100 Subject: [PATCH 10/24] Update build.sh --- containers/bratb-container/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/bratb-container/build.sh b/containers/bratb-container/build.sh index 366d80c..43372eb 100644 --- a/containers/bratb-container/build.sh +++ b/containers/bratb-container/build.sh @@ -3,7 +3,7 @@ set -uex # NOTE: Make sure you've set the environment correctly and are logged in to the registry. -CONTAINER_TAG=2.0.0 +CONTAINER_TAG=1.0.0 CONTAINER_DIR=bratb-container DOCKER_NAMESPACE="lcerdeira/bratb" From 753bccdeed20f420ea8d385e015159bec62c94b3 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:27:10 +0100 Subject: [PATCH 11/24] Update build.sh --- containers/misc/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/misc/build.sh b/containers/misc/build.sh index cf5dcbd..de8403d 100644 --- a/containers/misc/build.sh +++ b/containers/misc/build.sh @@ -3,7 +3,7 @@ set -uex # NOTE: Make sure you've set the environment correctly and are logged in to the registry. -CONTAINER_TAG=2.0.0-theta +CONTAINER_TAG=1.0.0-theta DOCKER_NAMESPACE="lcerdeira/bratb" CONTAINER_DIR=misc From c6869a4aeb40381858d22bdc1366a3ea10aef203 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:28:57 +0100 Subject: [PATCH 12/24] Update build.sh --- containers/mapping-container/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/mapping-container/build.sh b/containers/mapping-container/build.sh index edd57f3..bfbb0c5 100644 --- a/containers/mapping-container/build.sh +++ b/containers/mapping-container/build.sh @@ -4,7 +4,7 @@ set -uex # NOTE: Make sure you've set the environment correctly and are logged in to the registry. # -CONTAINER_TAG=2.0.0 +CONTAINER_TAG=1.0.0 CONTAINER_DIR=mapping-container DOCKER_NAMESPACE="lcerdeira/bratb" From 86ad03de7824e784ae04f308f2f18556252fb176 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:29:39 +0100 Subject: [PATCH 13/24] Delete containers/Dockerfile --- containers/Dockerfile | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 containers/Dockerfile diff --git a/containers/Dockerfile b/containers/Dockerfile deleted file mode 100644 index 91827ae..0000000 --- a/containers/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM quay.io/biocontainers/tb-profiler:6.3.0--pyhdfd78af_0 AS base - -FROM base AS tbprofiler - -#NOTE: Just update the tb-profiler databaes to rely upon the relevant branch. - -# WHO-v2 specific tag https://github.com/jodyphelan/tbdb/releases/tag/who-v2-strict -# COMMIT bdace1f82d948ce0001e1dade6eb93d2da9c47e5 - -# bratb branch -#RUN tb-profiler update_tbdb --branch bratb --logging DEBUG - -RUN tb-profiler update_tbdb --commit 30f8bc37df15affa378ebbfbd3e1eb4c5903056e --logging DEBUG From 3e307a440ac9fac16131453f1085f0b85772e6e4 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 19:29:49 +0100 Subject: [PATCH 14/24] Delete containers/build.sh --- containers/build.sh | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 containers/build.sh diff --git a/containers/build.sh b/containers/build.sh deleted file mode 100644 index 334ae9b..0000000 --- a/containers/build.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -uex - -# NOTE: Make sure you've set the environment correctly and are logged in to the registry. - -TBPROFILER_VERSION=6.3.0 -DOCKER_NAMESPACE="lcerdeira/bratb" - -CONTAINER_NAME="$DOCKER_NAMESPACE/biocontainer-tbprofiler:$TBPROFILER_VERSION" - -echo "Building container : $CONTAINER_NAME " - -docker build -t $CONTAINER_NAME . -CONTAINER_ID=$(docker run -d $CONTAINER_NAME) -docker commit $CONTAINER_ID $CONTAINER_NAME -docker push $CONTAINER_NAME -docker stop $CONTAINER_ID From 440f54778773e40a8eafef15a64426005e1ba20b Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 21:50:06 +0100 Subject: [PATCH 15/24] Update nextflow.config --- nextflow.config | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index 0fecd18..8b8236f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,8 +24,8 @@ * along with this program . If not, see . */ -manifest - name = 'BraTB' +manifest { + name = 'Bratb' description = 'Draft version 1.0.0' defaultBranch = 'master' homePage = 'https://github.com/LaPAM-USP/BraSeqTB' @@ -73,7 +73,6 @@ process { cpus = 8 memory = 16.GB } - } profiles { From d9810555b147db729c8a49557cacdcedba697317 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 22:00:29 +0100 Subject: [PATCH 16/24] template input --- samplesheet/template_samplesheet.csv | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samplesheet/template_samplesheet.csv b/samplesheet/template_samplesheet.csv index ef96662..d80445e 100644 --- a/samplesheet/template_samplesheet.csv +++ b/samplesheet/template_samplesheet.csv @@ -1,5 +1,3 @@ Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence -Study_Name,S0001,1,1,full_path_to_directory_of_fastq_files/S0001_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0001_01_R1.fastq.gz,1,1,1 -Study_Name,S0002,1,1,full_path_to_directory_of_fastq_files/S0002_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0002_01_R2.fastq.gz,1,1,1 -Study_Name,S0003,1,1,full_path_to_directory_of_fastq_files/S0003_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0003_01_R2.fastq.gz,1,1,1 -Study_Name,S0004,1,1,full_path_to_directory_of_fastq_files/S0004_01_R1.fastq.gz,full_path_to_directory_of_fastq_files/S0004_01_R2.fastq.gz,1,1,1 \ No newline at end of file +ialbratb,ERR4813741,1,1,/home/lcerdeira/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813741_2.fastq.gz,1,1,1 +ialbratb,ERR4813742,1,1,/home/lcerdeira/data/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813742_2.fastq.gz,1,1,1 \ No newline at end of file From f8e9d552810f2716c0832d2ba3c7c0afe491db27 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 22:22:42 +0100 Subject: [PATCH 17/24] fix quality check --- bin/generate_merged_cohort_stats.py | 4 +- bin/sample_stats.py | 14 +----- bin/summarize_resistance_mixed_infection.py | 2 +- conf/laptop.config | 4 -- conf/low_memory.config | 4 -- conf/server.config | 8 ---- default_params.config | 27 ----------- main.nf | 11 +---- modules/lofreq/call__ntm.nf | 50 ++++++++++----------- modules/lofreq/indelqual.nf | 1 - modules/utils/cohort_stats.nf | 2 +- modules/utils/sample_stats.nf | 6 +-- params/params.yaml | 3 -- workflows/call_wf.nf | 17 ------- workflows/quality_check_wf.nf | 28 ++++++------ 15 files changed, 47 insertions(+), 134 deletions(-) diff --git a/bin/generate_merged_cohort_stats.py b/bin/generate_merged_cohort_stats.py index 8ad80a4..2fab1b7 100755 --- a/bin/generate_merged_cohort_stats.py +++ b/bin/generate_merged_cohort_stats.py @@ -33,21 +33,19 @@ # Reorder the columns df_joint_cohort_stats.columns = df_joint_cohort_stats.columns.str.strip() - new_cols = ['AVG_INSERT_SIZE', 'MAPPED_PERCENTAGE', 'RAW_TOTAL_SEQS', 'AVERAGE_BASE_QUALITY', 'MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X', 'LINEAGES', 'FREQUENCIES', 'MAPPED_NTM_FRACTION_16S', 'MAPPED_NTM_FRACTION_16S_THRESHOLD_MET', 'COVERAGE_THRESHOLD_MET', 'BREADTH_OF_COVERAGE_THRESHOLD_MET', 'RELABUNDANCE_THRESHOLD_MET', 'ALL_THRESHOLDS_MET'] + new_cols = ['AVG_INSERT_SIZE', 'MAPPED_PERCENTAGE', 'RAW_TOTAL_SEQS', 'AVERAGE_BASE_QUALITY', 'MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X', 'LINEAGES', 'FREQUENCIES', 'COVERAGE_THRESHOLD_MET', 'BREADTH_OF_COVERAGE_THRESHOLD_MET', 'RELABUNDANCE_THRESHOLD_MET', 'ALL_THRESHOLDS_MET'] df_final_cohort_stats = df_joint_cohort_stats[new_cols] # Impute the NaN value after join df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'] = df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].fillna(0) # Prepare for boolean operation - df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'] = df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'].fillna(0).astype('Int64') df_final_cohort_stats['COVERAGE_THRESHOLD_MET'] = df_final_cohort_stats['COVERAGE_THRESHOLD_MET'].fillna(0).astype('Int64') df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'] = df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'].fillna(0).astype('Int64') df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'] = df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].fillna(0).astype('Int64') # Derive the final threshold using Boolean operations df_final_cohort_stats['ALL_THRESHOLDS_MET'] = ( - df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'].apply(lambda x: bool(x) if pd.notna(x) else False) & df_final_cohort_stats['COVERAGE_THRESHOLD_MET'].astype('bool') & df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'].astype('bool') & df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].astype('bool') diff --git a/bin/sample_stats.py b/bin/sample_stats.py index 7fcbb68..6a5ac4e 100755 --- a/bin/sample_stats.py +++ b/bin/sample_stats.py @@ -14,11 +14,8 @@ parser.add_argument('--flagstat_file', dest='flagstat_file', required=True, metavar='flagstat_file', type=str, help='The flag stats file') parser.add_argument('--samtoolsstats_file', dest='samtoolsstats_file', required=True, metavar='samtoolsstats_file', type=str, help='The samtools stats file') parser.add_argument('--wgsmetrics_file', dest='wgsmetrics_file', required=True, metavar='wgsmetrics_file', type=str, help='The WGS metrics file') - parser.add_argument('--ntmfraction_file', dest='ntmfraction_file', required=True, metavar='ntmfraction_file', type=str, help='The NTM fraction file') - parser.add_argument('--cutoff_median_coverage', metavar='cutoff_median_coverage', default=10, type=float, help='The median coverage cutoff threshold') parser.add_argument('--cutoff_breadth_of_coverage', metavar='cutoff_breadth_of_coverage', default=0.9, type=float, help='The breadth of coverage cutoff threshold') - parser.add_argument('--cutoff_ntm_fraction', metavar='cutoff_ntm_fraction', default=0.2, type=float, help='The NTM fraction cutoff threshold') ## NOTE: This is computed by the multiple_infection_filter script # parser.add_argument('--cutoff_rel_abundance', metavar='cutoff_rel_abundance', default=0.8, type=float, help='The relative abundance cutoff threshold') @@ -30,8 +27,6 @@ if '## METRICS CLASS' in line: rows = [f.readline().strip(), f.readline().strip()] wgsmetrics = pd.DataFrame([rows[1].split('\t')], columns=rows[0].split('\t')) - with open(args['ntmfraction_file']) as f: - ntm_fraction = float(f.read().strip()) with open(args['samtoolsstats_file']) as f: for line in f: if 'insert size average' in line: @@ -56,16 +51,11 @@ else: breadth_of_coverage_threshold_met = 0 - if ntm_fraction <= args['cutoff_ntm_fraction']: - ntm_fraction_threshold_met = 1 - else: - ntm_fraction_threshold_met = 0 - - if coverage_threshold_met and breadth_of_coverage_threshold_met and ntm_fraction_threshold_met: + if coverage_threshold_met and breadth_of_coverage_threshold_met: all_thresholds_met = 1 else: all_thresholds_met = 0 with open('{}.stats.tsv'.format(args['sample_name']), 'w') as f: - f.write('\t'.join([str(i) for i in [args['sample_name'], ins_size, mapped_p, total_seqs, avg_qual] + list(wgsmetrics.loc[0, ['MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X']]) + [ntm_fraction, ntm_fraction_threshold_met, coverage_threshold_met, breadth_of_coverage_threshold_met, all_thresholds_met]])) + f.write('\t'.join([str(i) for i in [args['sample_name'], ins_size, mapped_p, total_seqs, avg_qual] + list(wgsmetrics.loc[0, ['MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X']]) + [coverage_threshold_met, breadth_of_coverage_threshold_met, all_thresholds_met]])) f.write('\n') diff --git a/bin/summarize_resistance_mixed_infection.py b/bin/summarize_resistance_mixed_infection.py index 434e5bc..86662bf 100755 --- a/bin/summarize_resistance_mixed_infection.py +++ b/bin/summarize_resistance_mixed_infection.py @@ -141,7 +141,7 @@ def create_resistance_df(sample_res, method): # ADD FILTER FOR SAMPLES FAILING ONLY << RELABUNDANCE THRESHOLD_MET >> #=============== stats_df = pd.read_csv(args["merged_cohort_stats_file"], sep="\t") - filtered_stats_df = stats_df.loc[ (stats_df["RELABUNDANCE_THRESHOLD_MET"]==0) & (stats_df["MAPPED_NTM_FRACTION_16S_THRESHOLD_MET"]==1) & (stats_df["COVERAGE_THRESHOLD_MET"]==1) & (stats_df["BREADTH_OF_COVERAGE_THRESHOLD_MET"]==1)] + filtered_stats_df = stats_df.loc[ (stats_df["RELABUNDANCE_THRESHOLD_MET"]==0) & (stats_df["COVERAGE_THRESHOLD_MET"]==1) & (stats_df["BREADTH_OF_COVERAGE_THRESHOLD_MET"]==1)] samples_df = pd.DataFrame(list(samples), columns=['full_sample']) filtered_samples_df = samples_df[samples_df["full_sample"].isin(filtered_stats_df["SAMPLE"].to_list())] diff --git a/conf/laptop.config b/conf/laptop.config index c212812..01416c4 100644 --- a/conf/laptop.config +++ b/conf/laptop.config @@ -38,10 +38,6 @@ process { cpus = 4 memory = 1.GB } - withName: 'LOFREQ_CALL__NTM' { - cpus = 2 - memory = 1.GB - } withName: 'LOFREQ_FILTER' { cpus = 2 memory = 1.GB diff --git a/conf/low_memory.config b/conf/low_memory.config index c814442..05771ac 100644 --- a/conf/low_memory.config +++ b/conf/low_memory.config @@ -38,10 +38,6 @@ process { cpus = 6 memory = 1.GB } - withName: 'LOFREQ_CALL__NTM' { - cpus = 2 - memory = 1.GB - } withName: 'LOFREQ_FILTER' { cpus = 2 memory = 1.GB diff --git a/conf/server.config b/conf/server.config index bbf52c1..9428551 100644 --- a/conf/server.config +++ b/conf/server.config @@ -38,10 +38,6 @@ process { cpus = 8 memory = 1.GB } - withName: 'CALL_WF:LOFREQ_CALL__NTM' { - cpus = 2 - memory = 1.GB - } withName: 'CALL_WF:LOFREQ_FILTER' { cpus = 2 memory = 1.GB @@ -245,10 +241,6 @@ process { cpus = 2 memory = 1.GB } - withName: 'QUALITY_CHECK_WF:FASTQC' { - cpus = 3 - memory = 1.GB - } withName: 'REPORTS_WF:MULTIQC' { cpus = 1 memory = 4.GB diff --git a/default_params.config b/default_params.config index a973dc6..c224b8f 100644 --- a/default_params.config +++ b/default_params.config @@ -29,12 +29,6 @@ cutoff_median_coverage = 10 //The breadth of coverage required to process the sample cutoff_breadth_of_coverage = 0.90 -//The relative abundunce of the majority strain required to process the sample -// cutoff_rel_abundance = 0.80 - -// //The maximum fraction of NTM DNA allowed to process the sample -// cutoff_ntm_fraction = 0.20 - // The minimum fraction of samples that need to have a call at a site before the site is considered in phylogeny cutoff_site_representation = 0.95 @@ -149,7 +143,6 @@ snpdists_path = "snp-dists" snpsites_path = "snp-sites" bgzip_path = "bgzip" tbprofiler_path = "tb-profiler" -// ntmprofiler_path = "ntm-profiler" iqtree_path = "iqtree" fastq_validator_path = "fastq_validator.sh" @@ -301,15 +294,6 @@ GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS { --output-mode EMIT_ALL_ACTIVE_SITES " } -// LOFREQ_CALL__NTM { -// results_dir = "${params.outdir}/non-tuberculous_mycobacteria/vcf_files/variants" - -// region = "1472307-1472307" -// arguments = " -m 60 -Q 20 -a 1 " - -// should_publish = false -// } - LOFREQ_INDELQUAL { results_dir = "${params.outdir}/vcf_files/per_sample/minor_variants/" @@ -344,11 +328,6 @@ DELLY_CALL { arguments = "-u 30" } -// NTMPROFILER_PROFILE { -// results_dir = "${params.outdir}/non-tuberculous_mycobacteria/per_sample/" -// } - - BCFTOOLS_VIEW__ISMAPPER { results_dir = "${params.outdir}/vcf_files/per_sample/structural_variants/ismapper" } @@ -416,12 +395,6 @@ UTILS_MERGE_COHORT_STATS { // Processes used in MERGE_WF //----------------------- -// NTMPROFILER_COLLATE { -// results_dir = "${params.outdir}/non-tuberculous_mycobacteria/cohort" - -// prefix = "ntmprofiler.collate" -// } - GATK_COMBINE_GVCFS { results_dir = "${params.outdir}/vcf_files/cohort/raw_variant_files/combined" diff --git a/main.nf b/main.nf index cbe6f1a..8156963 100644 --- a/main.nf +++ b/main.nf @@ -12,7 +12,6 @@ include { MAP_WF } from './workflows/map_wf.nf' include { MERGE_WF } from './workflows/merge_wf.nf' include { MINOR_VARIANTS_ANALYSIS_WF } from './workflows/minor_variants_analysis_wf.nf' // include { MULTIQC AS MULTIQC_FASTQS } from '../modules/multiqc/multiqc.nf' addParams (params.MULTIQC_FASTQS) -include { QUALITY_CHECK_WF } from './workflows/quality_check_wf.nf' include { REPORTS_WF } from './workflows/reports_wf.nf' include { SAMPLESHEET_VALIDATION } from './modules/utils/samplesheet_validation.nf' addParams ( params.SAMPLESHEET_VALIDATION ) include { STRUCTURAL_VARIANTS_ANALYSIS_WF } from './workflows/structural_variants_analysis_wf.nf' @@ -30,10 +29,6 @@ workflow { VALIDATE_FASTQS_WF( SAMPLESHEET_VALIDATION.out.validated_samplesheet , SAMPLESHEET_VALIDATION.out.status ) - QUALITY_CHECK_WF( VALIDATE_FASTQS_WF.out.approved_fastqs_ch ) - - //MULTIQC_FASTQS( QUALITY_CHECK_WF.out.reports_fastqc_ch ) - } else { SAMPLESHEET_VALIDATION(params.input_samplesheet) @@ -41,9 +36,6 @@ workflow { VALIDATE_FASTQS_WF( SAMPLESHEET_VALIDATION.out.validated_samplesheet , SAMPLESHEET_VALIDATION.out.status ) - QUALITY_CHECK_WF( VALIDATE_FASTQS_WF.out.approved_fastqs_ch ) - - MAP_WF( VALIDATE_FASTQS_WF.out.approved_fastqs_ch ) CALL_WF( MAP_WF.out.sorted_reads_ch ) @@ -88,8 +80,7 @@ workflow { approved_samples_ch ) - REPORTS_WF( QUALITY_CHECK_WF.out.reports_fastqc_ch, - UTILS_MERGE_COHORT_STATS.out.merged_cohort_stats_ch, + REPORTS_WF( UTILS_MERGE_COHORT_STATS.out.merged_cohort_stats_ch, MERGE_WF.out.major_variants_results_ch, MINOR_VARIANTS_ANALYSIS_WF.out.minor_variants_results_ch, STRUCTURAL_VARIANTS_ANALYSIS_WF.out.structural_variants_results_ch ) diff --git a/modules/lofreq/call__ntm.nf b/modules/lofreq/call__ntm.nf index 6c08ccf..174aecd 100644 --- a/modules/lofreq/call__ntm.nf +++ b/modules/lofreq/call__ntm.nf @@ -1,35 +1,35 @@ -process LOFREQ_CALL__NTM { - tag "${sampleName}" - publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish +// process LOFREQ_CALL__NTM { +// tag "${sampleName}" +// publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish - input: - tuple val(sampleName), path(bamIndex), path(recalibratedBam) - path(reference) - path("*") +// input: +// tuple val(sampleName), path(bamIndex), path(recalibratedBam) +// path(reference) +// path("*") - output: - tuple val(sampleName), path("*.potential_NTM_fraction.txt") +// output: +// tuple val(sampleName), path("*.potential_NTM_fraction.txt") - shell: +// shell: - ''' +// ''' - if [[ $(!{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=') ]] - then - !{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=' | awk '{Total=Total+$1} END{print Total}' > !{sampleName}.potential_NTM_fraction.txt - else - echo "0" > !{sampleName}.potential_NTM_fraction.txt - fi - ''' +// if [[ $(!{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=') ]] +// then +// !{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=' | awk '{Total=Total+$1} END{print Total}' > !{sampleName}.potential_NTM_fraction.txt +// else +// echo "0" > !{sampleName}.potential_NTM_fraction.txt +// fi +// ''' - stub: +// stub: - """ - echo "${reference} -- ${reference.getBaseName()} -- ${params.region} -- ${sampleName} -- ${recalibratedBam}" +// """ +// echo "${reference} -- ${reference.getBaseName()} -- ${params.region} -- ${sampleName} -- ${recalibratedBam}" - echo "${params.arguments}" +// echo "${params.arguments}" - touch ${sampleName}.potential_NTM_fraction.txt - """ +// touch ${sampleName}.potential_NTM_fraction.txt +// """ -} +// } diff --git a/modules/lofreq/indelqual.nf b/modules/lofreq/indelqual.nf index 3c08ec0..2cab809 100644 --- a/modules/lofreq/indelqual.nf +++ b/modules/lofreq/indelqual.nf @@ -29,7 +29,6 @@ process LOFREQ_INDELQUAL { -o ${sampleName}.dindel.bam \\ ${recalibratedBam} " - touch ${sampleName}.potential_NTM_fraction.txt touch ${sampleName}.dindel.bam """ diff --git a/modules/utils/cohort_stats.nf b/modules/utils/cohort_stats.nf index b048653..be4de12 100644 --- a/modules/utils/cohort_stats.nf +++ b/modules/utils/cohort_stats.nf @@ -11,7 +11,7 @@ process UTILS_COHORT_STATS { shell: ''' - echo -e "SAMPLE\tAVG_INSERT_SIZE\tMAPPED_PERCENTAGE\tRAW_TOTAL_SEQS\tAVERAGE_BASE_QUALITY\tMEAN_COVERAGE\tSD_COVERAGE\tMEDIAN_COVERAGE\tMAD_COVERAGE\tPCT_EXC_ADAPTER\tPCT_EXC_MAPQ\tPCT_EXC_DUPE\tPCT_EXC_UNPAIRED\tPCT_EXC_BASEQ\tPCT_EXC_OVERLAP\tPCT_EXC_CAPPED\tPCT_EXC_TOTAL\tPCT_1X\tPCT_5X\tPCT_10X\tPCT_30X\tPCT_50X\tPCT_100X\tMAPPED_NTM_FRACTION_16S\tMAPPED_NTM_FRACTION_16S_THRESHOLD_MET\tCOVERAGE_THRESHOLD_MET\tBREADTH_OF_COVERAGE_THRESHOLD_MET\tALL_THRESHOLDS_MET" > !{params.vcf_name}.cohort_stats.tsv + echo -e "SAMPLE\tAVG_INSERT_SIZE\tMAPPED_PERCENTAGE\tRAW_TOTAL_SEQS\tAVERAGE_BASE_QUALITY\tMEAN_COVERAGE\tSD_COVERAGE\tMEDIAN_COVERAGE\tMAD_COVERAGE\tPCT_EXC_ADAPTER\tPCT_EXC_MAPQ\tPCT_EXC_DUPE\tPCT_EXC_UNPAIRED\tPCT_EXC_BASEQ\tPCT_EXC_OVERLAP\tPCT_EXC_CAPPED\tPCT_EXC_TOTAL\tPCT_1X\tPCT_5X\tPCT_10X\tPCT_30X\tPCT_50X\tPCT_100X\tCOVERAGE_THRESHOLD_MET\tBREADTH_OF_COVERAGE_THRESHOLD_MET\tALL_THRESHOLDS_MET" > !{params.vcf_name}.cohort_stats.tsv cat sample_stats/*tsv >> !{params.vcf_name}.cohort_stats.tsv ''' } diff --git a/modules/utils/sample_stats.nf b/modules/utils/sample_stats.nf index 472ced6..7c48518 100644 --- a/modules/utils/sample_stats.nf +++ b/modules/utils/sample_stats.nf @@ -3,7 +3,7 @@ process UTILS_SAMPLE_STATS { publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish input: - tuple val(sampleName), path(samtoolsStats), path(wgsMetrics), path(flagStats), path(ntmFraction) + tuple val(sampleName), path(samtoolsStats), path(wgsMetrics), path(flagStats) output: path("*.stats.tsv") @@ -15,10 +15,8 @@ process UTILS_SAMPLE_STATS { --flagstat_file ${flagStats} \\ --samtoolsstats_file ${samtoolsStats} \\ --wgsmetrics_file ${wgsMetrics} \\ - --ntmfraction_file ${ntmFraction} \\ --cutoff_median_coverage ${params.cutoff_median_coverage} \\ - --cutoff_breadth_of_coverage ${params.cutoff_breadth_of_coverage} \\ - --cutoff_ntm_fraction ${params.cutoff_ntm_fraction} + --cutoff_breadth_of_coverage ${params.cutoff_breadth_of_coverage} """ } diff --git a/params/params.yaml b/params/params.yaml index 6efb954..1cb29e6 100644 --- a/params/params.yaml +++ b/params/params.yaml @@ -20,9 +20,6 @@ cutoff_breadth_of_coverage : 0.90 #The relative abundunce of the majority strain required to process the sample cutoff_rel_abundance : 0.80 -# #The maximum fraction of NTM DNA allowed to process the sample -# cutoff_ntm_fraction : 0.20 - # The minimum fraction of samples that need to have a call at a site before the site is considered in phylogeny cutoff_site_representation : 0.95 diff --git a/workflows/call_wf.nf b/workflows/call_wf.nf index b6e7376..8e9c5dc 100644 --- a/workflows/call_wf.nf +++ b/workflows/call_wf.nf @@ -5,7 +5,6 @@ include { GATK_BASE_RECALIBRATOR } from "../modules/gatk/base_recalibrator.nf" a include { GATK_APPLY_BQSR } from "../modules/gatk/apply_bqsr.nf" addParams ( params.GATK_APPLY_BQSR ) include { GATK_HAPLOTYPE_CALLER } from "../modules/gatk/haplotype_caller.nf" addParams ( params.GATK_HAPLOTYPE_CALLER ) include { GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS } from "../modules/gatk/haplotype_caller__minor_variants.nf" addParams ( params.GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS ) -// include { LOFREQ_CALL__NTM } from "../modules/lofreq/call__ntm.nf" addParams ( params.LOFREQ_CALL__NTM ) include { LOFREQ_INDELQUAL } from "../modules/lofreq/indelqual.nf" addParams ( params.LOFREQ_INDELQUAL ) include { SAMTOOLS_INDEX } from "../modules/samtools/index.nf" addParams ( params.SAMTOOLS_INDEX ) include { SAMTOOLS_INDEX__LOFREQ } from "../modules/samtools/index__lofreq.nf" addParams ( params.SAMTOOLS_INDEX__LOFREQ ) @@ -87,19 +86,6 @@ workflow CALL_WF { [params.ref_fasta_fai, params.ref_fasta_dict]) } - //---------------------------------------------------------------------------------- - // Infer potential NTM contamination - //---------------------------------------------------------------------------------- - - - // call_ntm - // LOFREQ_CALL__NTM(SAMTOOLS_INDEX.out, - // params.ref_fasta, - // [params.ref_fasta_fai]) - - //---------------------------------------------------------------------------------- - // Infer minor variants with LoFreq - //---------------------------------------------------------------------------------- // call_lofreq LOFREQ_INDELQUAL(recalibrated_bam_ch, params.ref_fasta) @@ -131,9 +117,6 @@ workflow CALL_WF { sample_stats_ch = (SAMTOOLS_STATS.out) .join(GATK_COLLECT_WGS_METRICS.out) .join(GATK_FLAG_STAT.out) - // .join(LOFREQ_CALL__NTM.out) - //.dump(tag: "CALL_WF sample_stats_ch : ", pretty: true) - UTILS_SAMPLE_STATS(sample_stats_ch) diff --git a/workflows/quality_check_wf.nf b/workflows/quality_check_wf.nf index 59f7933..11f667d 100644 --- a/workflows/quality_check_wf.nf +++ b/workflows/quality_check_wf.nf @@ -1,23 +1,23 @@ -include { FASTQC } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC) -// include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE) -// include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE) +// include { FASTQC } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC) +// // include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE) +// // include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE) -workflow QUALITY_CHECK_WF { +// workflow QUALITY_CHECK_WF { - take: - reads_ch +// take: +// reads_ch - main: +// main: - FASTQC(reads_ch) +// FASTQC(reads_ch) - NTMPROFILER_PROFILE( reads_ch ) +// NTMPROFILER_PROFILE( reads_ch ) - NTMPROFILER_COLLATE( params.vcf_name, - NTMPROFILER_PROFILE.out.profile_json.collect() ) +// NTMPROFILER_COLLATE( params.vcf_name, +// NTMPROFILER_PROFILE.out.profile_json.collect() ) - emit: - reports_fastqc_ch = FASTQC.out.collect() +// emit: +// reports_fastqc_ch = FASTQC.out.collect() -} +// } From 6f6ababe18a14eb9bed0d10e0e9d504f1e750c57 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 22:37:07 +0100 Subject: [PATCH 18/24] tes 2 --- main.nf | 1 - modules/lofreq/call__ntm.nf | 35 ----------------------------------- workflows/quality_check_wf.nf | 23 ----------------------- 3 files changed, 59 deletions(-) delete mode 100644 modules/lofreq/call__ntm.nf delete mode 100644 workflows/quality_check_wf.nf diff --git a/main.nf b/main.nf index 8156963..26eab65 100644 --- a/main.nf +++ b/main.nf @@ -47,7 +47,6 @@ workflow { MINOR_VARIANTS_ANALYSIS_WF.out.rejected_samples_ch, CALL_WF.out.cohort_stats_tsv ) - all_samples_ch = UTILS_MERGE_COHORT_STATS.out.merged_cohort_stats_ch .splitCsv(header: false, skip: 1, sep: '\t' ) .map { row -> [ diff --git a/modules/lofreq/call__ntm.nf b/modules/lofreq/call__ntm.nf deleted file mode 100644 index 174aecd..0000000 --- a/modules/lofreq/call__ntm.nf +++ /dev/null @@ -1,35 +0,0 @@ -// process LOFREQ_CALL__NTM { -// tag "${sampleName}" -// publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish - -// input: -// tuple val(sampleName), path(bamIndex), path(recalibratedBam) -// path(reference) -// path("*") - -// output: -// tuple val(sampleName), path("*.potential_NTM_fraction.txt") - -// shell: - -// ''' - -// if [[ $(!{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=') ]] -// then -// !{params.lofreq_path} call -f !{reference} -r !{reference.getBaseName()}:!{params.region} !{params.arguments} !{recalibratedBam} | grep -v "#" | cut -f 2 -d ";" | tr -d 'AF=' | awk '{Total=Total+$1} END{print Total}' > !{sampleName}.potential_NTM_fraction.txt -// else -// echo "0" > !{sampleName}.potential_NTM_fraction.txt -// fi -// ''' - -// stub: - -// """ -// echo "${reference} -- ${reference.getBaseName()} -- ${params.region} -- ${sampleName} -- ${recalibratedBam}" - -// echo "${params.arguments}" - -// touch ${sampleName}.potential_NTM_fraction.txt -// """ - -// } diff --git a/workflows/quality_check_wf.nf b/workflows/quality_check_wf.nf deleted file mode 100644 index 11f667d..0000000 --- a/workflows/quality_check_wf.nf +++ /dev/null @@ -1,23 +0,0 @@ -// include { FASTQC } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC) -// // include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE) -// // include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE) - -// workflow QUALITY_CHECK_WF { - -// take: -// reads_ch - -// main: - -// FASTQC(reads_ch) - -// NTMPROFILER_PROFILE( reads_ch ) - -// NTMPROFILER_COLLATE( params.vcf_name, -// NTMPROFILER_PROFILE.out.profile_json.collect() ) - - -// emit: -// reports_fastqc_ch = FASTQC.out.collect() - -// } From abcadbaf1e6e60e4e2f6adcb5fbfb51cfd995670 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Fri, 4 Oct 2024 22:54:31 +0100 Subject: [PATCH 19/24] test3 --- modules/gatk/collect_wgs_metrics.nf | 36 ----------------------------- 1 file changed, 36 deletions(-) delete mode 100644 modules/gatk/collect_wgs_metrics.nf diff --git a/modules/gatk/collect_wgs_metrics.nf b/modules/gatk/collect_wgs_metrics.nf deleted file mode 100644 index de351b4..0000000 --- a/modules/gatk/collect_wgs_metrics.nf +++ /dev/null @@ -1,36 +0,0 @@ -process GATK_COLLECT_WGS_METRICS { - tag "${sampleName}" - label 'cpu_2_memory_2' - publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish - - input: - tuple val(sampleName), path(bam) - path(reference) - - output: - tuple val(sampleName), path("*.WgsMetrics.txt") - - - script: - - """ - ${params.gatk_path} CollectWgsMetrics --java-options "-Xmx${task.memory.giga}G" \\ - -R ${reference} \\ - -I ${bam} \\ - ${params.arguments} \\ - -O ${sampleName}.WgsMetrics.txt - """ - - stub: - - """ - echo "gatk CollectWgsMetrics -Xmx${task.memory.giga}G \\ - -R ${reference} \\ - -I ${bam} \\ - ${params.arguments} \\ - -O ${sampleName}.WgsMetrics.txt" - - touch ${sampleName}.WgsMetrics.txt - """ -} - From b9f0272834a3c9dd78b0fd13540a5fcb1d34c30f Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Mon, 7 Oct 2024 04:46:11 +0100 Subject: [PATCH 20/24] Update low_memory.config --- conf/low_memory.config | 4 ---- 1 file changed, 4 deletions(-) diff --git a/conf/low_memory.config b/conf/low_memory.config index 05771ac..0ea9aa7 100644 --- a/conf/low_memory.config +++ b/conf/low_memory.config @@ -241,10 +241,6 @@ process { cpus = 2 memory = 1.GB } - withName: 'FASTQC' { - cpus = 3 - memory = 1.GB - } withName: 'MULTIQC' { cpus = 1 memory = 4.GB From 3f6fbbcbf62632d8d7563030246a01671d42bbe6 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Mon, 7 Oct 2024 05:10:01 +0100 Subject: [PATCH 21/24] Update template_samplesheet.csv --- samplesheet/template_samplesheet.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samplesheet/template_samplesheet.csv b/samplesheet/template_samplesheet.csv index d80445e..14f0fea 100644 --- a/samplesheet/template_samplesheet.csv +++ b/samplesheet/template_samplesheet.csv @@ -1,3 +1,3 @@ -Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence -ialbratb,ERR4813741,1,1,/home/lcerdeira/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813741_2.fastq.gz,1,1,1 -ialbratb,ERR4813742,1,1,/home/lcerdeira/data/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/data/input-data/ERR4813742_2.fastq.gz,1,1,1 \ No newline at end of file +Sample,R1,R2 +ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz,1,1,1 +ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz,1,1,1 From 5f4ec2586303e8dbcb46e46532e4e6fa1322661c Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Mon, 7 Oct 2024 05:10:53 +0100 Subject: [PATCH 22/24] Update test.samples.csv --- samplesheet/test.samples.csv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samplesheet/test.samples.csv b/samplesheet/test.samples.csv index de1a6b7..8f35a3a 100644 --- a/samplesheet/test.samples.csv +++ b/samplesheet/test.samples.csv @@ -1,4 +1,3 @@ Sample,R1,R2 -SRR26331590,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/090/SRR26331590/SRR26331590_1.fastq.gz,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/090/SRR26331590/SRR26331590_2.fastq.gz -SRR26331595,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/095/SRR26331595/SRR26331595_1.fastq.gz,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/095/SRR26331595/SRR26331595_2.fastq.gz -SRR26331599,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/099/SRR26331599/SRR26331599_1.fastq.gz,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR263/099/SRR26331599/SRR26331599_2.fastq.gz +ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz +ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz From 5368079f7df3ba0b63e9fbf9533d463ea1b69c34 Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Mon, 7 Oct 2024 05:11:15 +0100 Subject: [PATCH 23/24] Update template_samplesheet.csv --- samplesheet/template_samplesheet.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samplesheet/template_samplesheet.csv b/samplesheet/template_samplesheet.csv index 14f0fea..c08ef85 100644 --- a/samplesheet/template_samplesheet.csv +++ b/samplesheet/template_samplesheet.csv @@ -1,3 +1,3 @@ Sample,R1,R2 -ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz,1,1,1 -ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz,1,1,1 +ERR4813741,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813741_2.fastq.gz +ERR4813742,/home/lcerdeira/BraSeqTBdata/input-data/ERR4813742_1.fastq.gz,/home/lcerdeira/BraSeqTB/data/input-data/ERR4813742_2.fastq.gz From b696c6a399a0c9152a607b613af040b6baf6428c Mon Sep 17 00:00:00 2001 From: Louise Cerdeira Date: Mon, 7 Oct 2024 05:12:35 +0100 Subject: [PATCH 24/24] Update default_params.config --- default_params.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/default_params.config b/default_params.config index c224b8f..4cc4845 100644 --- a/default_params.config +++ b/default_params.config @@ -4,7 +4,7 @@ //NOTE: The samplesheet should have the following fields [study, sample, library, attempt, flowcell, lane, index_sequence, r1, r2] //NOTE: Most of these parameters are used to create unique_id in XBS_main.py -input_samplesheet = "./data/input-data/ialbratb-input.csv" +input_samplesheet = "./samplesheet/test.samples.csv" // The directory to which all output files should be written outdir = "bratb-results"