Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] main from LaPAM-USP:main #5

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e655358
Merge pull request #4 from lcerdeira/main
lcerdeira Oct 2, 2024
e62878e
Delete conf/biomina.config
lcerdeira Oct 4, 2024
9e2d7fd
Delete conf/singularity.config
lcerdeira Oct 4, 2024
4146c5a
Update bratb-test.yml
lcerdeira Oct 4, 2024
92ea64e
Update bratb.yml
lcerdeira Oct 4, 2024
3ecd6a9
Update mapping-env.yml
lcerdeira Oct 4, 2024
3310ef4
Update setup_conda_envs.sh
lcerdeira Oct 4, 2024
06d13a8
Update template_noconda.config
lcerdeira Oct 4, 2024
02bc728
Update nextflow.config
lcerdeira Oct 4, 2024
39cf14e
Update build.sh
lcerdeira Oct 4, 2024
9a71dac
Update build.sh
lcerdeira Oct 4, 2024
753bccd
Update build.sh
lcerdeira Oct 4, 2024
c6869a4
Update build.sh
lcerdeira Oct 4, 2024
86ad03d
Delete containers/Dockerfile
lcerdeira Oct 4, 2024
3e307a4
Delete containers/build.sh
lcerdeira Oct 4, 2024
440f547
Update nextflow.config
lcerdeira Oct 4, 2024
d981055
template input
lcerdeira Oct 4, 2024
612610f
Merge pull request #5 from LaPAM-USP/teste
lcerdeira Oct 4, 2024
f8e9d55
fix quality check
lcerdeira Oct 4, 2024
181f238
Merge pull request #6 from LaPAM-USP/teste
lcerdeira Oct 4, 2024
6f6abab
tes 2
lcerdeira Oct 4, 2024
8f51a65
Merge pull request #7 from LaPAM-USP/teste
lcerdeira Oct 4, 2024
abcadba
test3
lcerdeira Oct 4, 2024
3a305da
Merge pull request #8 from LaPAM-USP/teste
lcerdeira Oct 4, 2024
b9f0272
Update low_memory.config
lcerdeira Oct 7, 2024
cb185b8
Merge pull request #9 from LaPAM-USP/teste
lcerdeira Oct 7, 2024
3f6fbbc
Update template_samplesheet.csv
lcerdeira Oct 7, 2024
5f4ec25
Update test.samples.csv
lcerdeira Oct 7, 2024
5368079
Update template_samplesheet.csv
lcerdeira Oct 7, 2024
b696c6a
Update default_params.config
lcerdeira Oct 7, 2024
b020c9d
Merge pull request #10 from LaPAM-USP/teste
lcerdeira Oct 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions bin/generate_merged_cohort_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,19 @@

# Reorder the columns
df_joint_cohort_stats.columns = df_joint_cohort_stats.columns.str.strip()
new_cols = ['AVG_INSERT_SIZE', 'MAPPED_PERCENTAGE', 'RAW_TOTAL_SEQS', 'AVERAGE_BASE_QUALITY', 'MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X', 'LINEAGES', 'FREQUENCIES', 'MAPPED_NTM_FRACTION_16S', 'MAPPED_NTM_FRACTION_16S_THRESHOLD_MET', 'COVERAGE_THRESHOLD_MET', 'BREADTH_OF_COVERAGE_THRESHOLD_MET', 'RELABUNDANCE_THRESHOLD_MET', 'ALL_THRESHOLDS_MET']
new_cols = ['AVG_INSERT_SIZE', 'MAPPED_PERCENTAGE', 'RAW_TOTAL_SEQS', 'AVERAGE_BASE_QUALITY', 'MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X', 'LINEAGES', 'FREQUENCIES', 'COVERAGE_THRESHOLD_MET', 'BREADTH_OF_COVERAGE_THRESHOLD_MET', 'RELABUNDANCE_THRESHOLD_MET', 'ALL_THRESHOLDS_MET']
df_final_cohort_stats = df_joint_cohort_stats[new_cols]

# Impute the NaN value after join
df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'] = df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].fillna(0)

# Prepare for boolean operation
df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'] = df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'].fillna(0).astype('Int64')
df_final_cohort_stats['COVERAGE_THRESHOLD_MET'] = df_final_cohort_stats['COVERAGE_THRESHOLD_MET'].fillna(0).astype('Int64')
df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'] = df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'].fillna(0).astype('Int64')
df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'] = df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].fillna(0).astype('Int64')

# Derive the final threshold using Boolean operations
df_final_cohort_stats['ALL_THRESHOLDS_MET'] = (
df_final_cohort_stats['MAPPED_NTM_FRACTION_16S_THRESHOLD_MET'].apply(lambda x: bool(x) if pd.notna(x) else False) &
df_final_cohort_stats['COVERAGE_THRESHOLD_MET'].astype('bool') &
df_final_cohort_stats['BREADTH_OF_COVERAGE_THRESHOLD_MET'].astype('bool') &
df_final_cohort_stats['RELABUNDANCE_THRESHOLD_MET'].astype('bool')
Expand Down
14 changes: 2 additions & 12 deletions bin/sample_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@
parser.add_argument('--flagstat_file', dest='flagstat_file', required=True, metavar='flagstat_file', type=str, help='The flag stats file')
parser.add_argument('--samtoolsstats_file', dest='samtoolsstats_file', required=True, metavar='samtoolsstats_file', type=str, help='The samtools stats file')
parser.add_argument('--wgsmetrics_file', dest='wgsmetrics_file', required=True, metavar='wgsmetrics_file', type=str, help='The WGS metrics file')
parser.add_argument('--ntmfraction_file', dest='ntmfraction_file', required=True, metavar='ntmfraction_file', type=str, help='The NTM fraction file')

parser.add_argument('--cutoff_median_coverage', metavar='cutoff_median_coverage', default=10, type=float, help='The median coverage cutoff threshold')
parser.add_argument('--cutoff_breadth_of_coverage', metavar='cutoff_breadth_of_coverage', default=0.9, type=float, help='The breadth of coverage cutoff threshold')
parser.add_argument('--cutoff_ntm_fraction', metavar='cutoff_ntm_fraction', default=0.2, type=float, help='The NTM fraction cutoff threshold')

## NOTE: This is computed by the multiple_infection_filter script
# parser.add_argument('--cutoff_rel_abundance', metavar='cutoff_rel_abundance', default=0.8, type=float, help='The relative abundance cutoff threshold')
Expand All @@ -30,8 +27,6 @@
if '## METRICS CLASS' in line:
rows = [f.readline().strip(), f.readline().strip()]
wgsmetrics = pd.DataFrame([rows[1].split('\t')], columns=rows[0].split('\t'))
with open(args['ntmfraction_file']) as f:
ntm_fraction = float(f.read().strip())
with open(args['samtoolsstats_file']) as f:
for line in f:
if 'insert size average' in line:
Expand All @@ -56,16 +51,11 @@
else:
breadth_of_coverage_threshold_met = 0

if ntm_fraction <= args['cutoff_ntm_fraction']:
ntm_fraction_threshold_met = 1
else:
ntm_fraction_threshold_met = 0

if coverage_threshold_met and breadth_of_coverage_threshold_met and ntm_fraction_threshold_met:
if coverage_threshold_met and breadth_of_coverage_threshold_met:
all_thresholds_met = 1
else:
all_thresholds_met = 0

with open('{}.stats.tsv'.format(args['sample_name']), 'w') as f:
f.write('\t'.join([str(i) for i in [args['sample_name'], ins_size, mapped_p, total_seqs, avg_qual] + list(wgsmetrics.loc[0, ['MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X']]) + [ntm_fraction, ntm_fraction_threshold_met, coverage_threshold_met, breadth_of_coverage_threshold_met, all_thresholds_met]]))
f.write('\t'.join([str(i) for i in [args['sample_name'], ins_size, mapped_p, total_seqs, avg_qual] + list(wgsmetrics.loc[0, ['MEAN_COVERAGE', 'SD_COVERAGE', 'MEDIAN_COVERAGE', 'MAD_COVERAGE', 'PCT_EXC_ADAPTER', 'PCT_EXC_MAPQ', 'PCT_EXC_DUPE', 'PCT_EXC_UNPAIRED', 'PCT_EXC_BASEQ', 'PCT_EXC_OVERLAP', 'PCT_EXC_CAPPED', 'PCT_EXC_TOTAL', 'PCT_1X', 'PCT_5X', 'PCT_10X', 'PCT_30X', 'PCT_50X', 'PCT_100X']]) + [coverage_threshold_met, breadth_of_coverage_threshold_met, all_thresholds_met]]))
f.write('\n')
2 changes: 1 addition & 1 deletion bin/summarize_resistance_mixed_infection.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def create_resistance_df(sample_res, method):
# ADD FILTER FOR SAMPLES FAILING ONLY << RELABUNDANCE THRESHOLD_MET >>
#===============
stats_df = pd.read_csv(args["merged_cohort_stats_file"], sep="\t")
filtered_stats_df = stats_df.loc[ (stats_df["RELABUNDANCE_THRESHOLD_MET"]==0) & (stats_df["MAPPED_NTM_FRACTION_16S_THRESHOLD_MET"]==1) & (stats_df["COVERAGE_THRESHOLD_MET"]==1) & (stats_df["BREADTH_OF_COVERAGE_THRESHOLD_MET"]==1)]
filtered_stats_df = stats_df.loc[ (stats_df["RELABUNDANCE_THRESHOLD_MET"]==0) & (stats_df["COVERAGE_THRESHOLD_MET"]==1) & (stats_df["BREADTH_OF_COVERAGE_THRESHOLD_MET"]==1)]

samples_df = pd.DataFrame(list(samples), columns=['full_sample'])
filtered_samples_df = samples_df[samples_df["full_sample"].isin(filtered_stats_df["SAMPLE"].to_list())]
Expand Down
6 changes: 3 additions & 3 deletions bratb-test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

# Sample contents of my_parameters_1.yml file
# Sample contents of paramns.yml file

input_samplesheet: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/data/input-data/input_test.csv
input_samplesheet: /home/lcerdeira/BraSeqTB/data/input-data/input_test.csv
only_validate_fastqs: true
conda_envs_location: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/conda_envs
conda_envs_location: /home/lcerdeira/BraSeqTB/conda_envs
6 changes: 3 additions & 3 deletions bratb.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

# Sample contents of my_parameters_1.yml file
# Sample contents of paramns_1.yml file

input_samplesheet: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/data/input-data/ialbratb-input.csv
input_samplesheet: /home/lcerdeira/BraSeqTB/data/input-data/input_test.csv
only_validate_fastqs: true
conda_envs_location: /Users/lshlt19/GitHub/BRATBLC/BraSeqTB/conda_envs
conda_envs_location: /home/lcerdeira/BraSeqTB/conda_envs
9 changes: 1 addition & 8 deletions conda_envs/mapping-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,7 @@ channels:
- bioconda
- defaults
dependencies:
#NOTE: Not natively. Python 2.7 was sunsetted prior to release of the osx-arm64 platform, so there isn't any such build. One could try requesting such a build on the Conda Forge Python feedstock, but even if someone did that you'd still face the issue that most Python packages will also lack osx-arm64 builds for Python 2.7.
#Emulate through Rosetta. Apple provides an x86_64 emulator, Rosetta 2, which will run x86_64 binaries, such as what would be installed with Conda environments using an osx-64 subdir. One can create environments with such a subdir setting with something like:
#CONDA_SUBDIR=osx-64 conda create -n py27 python=2.7 # include other packages here
# ensure that future package installs in this env stick to 'osx-64'
#conda activate py27
#conda config --env --set subdir osx-64

# - python=2.7
- python=2.7
- bwa=0.7.17
- samtools=1.9
- iqtree=2.1.2
Expand Down
13 changes: 7 additions & 6 deletions conda_envs/setup_conda_envs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

# NOTE: Please replace `conda` with `mamba` if it is installed for faster installs.
resolverCondaBinary="mamba" # pick either conda OR mamba
resolverCondaBinary="conda" # pick either conda OR mamba

#===========================================================
#
Expand All @@ -17,15 +17,16 @@ $resolverCondaBinary env create -p bratb-env --file conda_envs/bratb-env.yml

$resolverCondaBinary env create -p bratb-tbprofiler-env --file conda_envs/bratb-tbprofiler-env.yml

echo "INFO: Activate mamba env with tb-profiler and setup the WHO database"
eval "$(mamba shell.bash hook)"
mamba activate "./conda_envs/bratb-tbprofiler-env"
echo "INFO: Activate conda env with tb-profiler and setup the WHO database"
eval "$(conda shell.bash hook)"
#Note after mamba installation peharps the conda envs messy the conda path so one tip, if not works the command below, added the full PATH or fix the conda path
conda activate "./conda_envs/bratb-tbprofiler-env"

#echo "INFO: Use WHO-v2 database in bratb-tbprofiler-env"
#tb-profiler update_tbdb --commit bdace1f82d948ce0001e1dade6eb93d2da9c47e5 --logging DEBUG

#echo "INFO: Use BRATB branch from tbdb database in bratb-tbprofiler-env"
#echo "INFO: Use BraTB branch from tbdb database in bratb-tbprofiler-env"
tb-profiler update_tbdb --commit 30f8bc37df15affa378ebbfbd3e1eb4c5903056e --logging DEBUG

echo "INFO: Deactivate the bratb-tbprofiler-env "
mamba deactivate
conda deactivate
55 changes: 0 additions & 55 deletions conf/biomina.config

This file was deleted.

4 changes: 0 additions & 4 deletions conf/laptop.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ process {
cpus = 4
memory = 1.GB
}
withName: 'LOFREQ_CALL__NTM' {
cpus = 2
memory = 1.GB
}
withName: 'LOFREQ_FILTER' {
cpus = 2
memory = 1.GB
Expand Down
8 changes: 0 additions & 8 deletions conf/low_memory.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ process {
cpus = 6
memory = 1.GB
}
withName: 'LOFREQ_CALL__NTM' {
cpus = 2
memory = 1.GB
}
withName: 'LOFREQ_FILTER' {
cpus = 2
memory = 1.GB
Expand Down Expand Up @@ -245,10 +241,6 @@ process {
cpus = 2
memory = 1.GB
}
withName: 'FASTQC' {
cpus = 3
memory = 1.GB
}
withName: 'MULTIQC' {
cpus = 1
memory = 4.GB
Expand Down
8 changes: 0 additions & 8 deletions conf/server.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ process {
cpus = 8
memory = 1.GB
}
withName: 'CALL_WF:LOFREQ_CALL__NTM' {
cpus = 2
memory = 1.GB
}
withName: 'CALL_WF:LOFREQ_FILTER' {
cpus = 2
memory = 1.GB
Expand Down Expand Up @@ -245,10 +241,6 @@ process {
cpus = 2
memory = 1.GB
}
withName: 'QUALITY_CHECK_WF:FASTQC' {
cpus = 3
memory = 1.GB
}
withName: 'REPORTS_WF:MULTIQC' {
cpus = 1
memory = 4.GB
Expand Down
27 changes: 0 additions & 27 deletions conf/singularity.config

This file was deleted.

2 changes: 1 addition & 1 deletion conf/template_noconda.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

params {

input_samplesheet = "${projectDir}/resources/reference_set/bratb.pbs.test.csv"
input_samplesheet = "${projectDir}/data/input-data/bratb.csv"
outdir = "${projectDir}/results"

}
Expand Down
13 changes: 0 additions & 13 deletions containers/Dockerfile

This file was deleted.

4 changes: 2 additions & 2 deletions containers/biocontainer-tbprofiler/build.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/bash
set -uex

# NOTE: Make sure you've set the environment correctly and are logged in to the registry.
# NOTE: Make sure you've set the environment correctly and are logged in to the registry along with the sudo permission adjustment; otherwise, you will need to run using sudo.

TBPROFILER_VERSION=6.3.0
DOCKER_NAMESPACE="lcerdeira/bratb"
DOCKER_NAMESPACE="lcerdeira/bratb-tbprofiler"

CONTAINER_NAME="$DOCKER_NAMESPACE/biocontainer-tbprofiler:$TBPROFILER_VERSION"

Expand Down
2 changes: 1 addition & 1 deletion containers/bratb-container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -uex

# NOTE: Make sure you've set the environment correctly and are logged in to the registry.

CONTAINER_TAG=2.0.0
CONTAINER_TAG=1.0.0
CONTAINER_DIR=bratb-container
DOCKER_NAMESPACE="lcerdeira/bratb"

Expand Down
17 changes: 0 additions & 17 deletions containers/build.sh

This file was deleted.

2 changes: 1 addition & 1 deletion containers/mapping-container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -uex
# NOTE: Make sure you've set the environment correctly and are logged in to the registry.
#

CONTAINER_TAG=2.0.0
CONTAINER_TAG=1.0.0
CONTAINER_DIR=mapping-container
DOCKER_NAMESPACE="lcerdeira/bratb"

Expand Down
2 changes: 1 addition & 1 deletion containers/misc/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -uex

# NOTE: Make sure you've set the environment correctly and are logged in to the registry.

CONTAINER_TAG=2.0.0-theta
CONTAINER_TAG=1.0.0-theta
DOCKER_NAMESPACE="lcerdeira/bratb"
CONTAINER_DIR=misc

Expand Down
Loading
Loading