Skip to content

Commit

Permalink
Add working run command
Browse files Browse the repository at this point in the history
Add working run command
  • Loading branch information
farchaab authored Aug 2, 2024
2 parents 0ed9c4b + 1e7f6be commit a0dc44d
Show file tree
Hide file tree
Showing 48 changed files with 2,249 additions and 932 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
zamp.egg-info/
zamp/__pycache__
zamp/__pycache__
zamp/ressources/
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_data_files():
"Click>=8.1.3",
"attrmap>=0.0.7",
"snaketool-utils>=0.0.5",
"metasnek>=0.0.8",
"biopython>=1.83",
],
entry_points={"console_scripts": ["zamp=zamp.__main__:main"]},
Expand Down
2 changes: 1 addition & 1 deletion zamp/Insilico_quality_control.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ rule check_amplicon_quality:
input:
expand(
"QualityControl/{denoiser}/compare_quality_table.tsv",
denoiser=config["denoiser"],
denoiser=DENOISER,
),
12 changes: 5 additions & 7 deletions zamp/Insilico_taxa_assign.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ import os
## When using singularity
if "--use-singularity" in sys.argv:
### Bind the directory of the database to the singularity containers.
workflow.deployment_settings.apptainer_args += (
f' -B {config["tax_DB_path"]}:{config["tax_DB_path"]}'
)
workflow.deployment_settings.apptainer_args += f" -B {os.path.abspath(config.args.database)}:{os.path.abspath(config.args.database)}"
#### Load a dictionnary of singularity containers that will be called from each rule

singularity_envs = yaml.safe_load(
Expand Down Expand Up @@ -35,12 +33,12 @@ rule insilico_validation:
input:
expand(
"InSilico/3_classified/{classifier}_{tax_DB}/dna-sequences_tax_assignments.txt",
classifier=config["classifier"],
tax_DB=config["tax_DB_name"],
classifier=CLASSIFIER,
tax_DB=DBNAME,
),
"InSilico/2_denoised/count_table.tsv",
expand(
"InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax.tsv",
classifier=config["classifier"],
tax_DB=config["tax_DB_name"],
classifier=CLASSIFIER,
tax_DB=DBNAME,
),
146 changes: 127 additions & 19 deletions zamp/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,126 @@ import re
import yaml
import os
from snakemake.utils import min_version
from attrmap import AttrMap
import attrmap.utils as au
import pandas as pd
import glob
from metasnek import fastq_finder
import warnings

min_version("8.10.6")

warnings.filterwarnings(
"ignore",
category=UserWarning,
)


# Concatenate Snakemake's own log file with the master log file
def copy_log_file():
files = glob.glob(os.path.join(".snakemake", "log", "*.snakemake.log"))
if not files:
return None
current_log = max(files, key=os.path.getmtime)
shell("cat " + current_log + " >> " + LOG)


onsuccess:
copy_log_file()


onerror:
copy_log_file()


# config file
configfile: os.path.join(workflow.basedir, "config", "config.yaml")


config = AttrMap(config)
config = au.convert_state(config, read_only=True)


include: os.path.join("rules", "0_preprocessing", "directories.smk")
include: os.path.join("rules", "0_preprocessing", "functions.smk")


OUTPUT = config.args.output
LOG = os.path.join(OUTPUT, "zamp.log")


# Read run args
## Inputs args
INPUT = config.args.input
METADATA = config.args.metadata
if os.path.isdir(INPUT) and METADATA:

reads_df = (
pd.DataFrame.from_dict(fastq_finder.parse_samples_to_dictionary(INPUT))
.T.reset_index()
.rename(columns={"index": "fastq"})
)
meta_df = pd.read_csv(METADATA, sep="\t")
SAMPLES = reads_df.merge(meta_df, on="fastq")
SAMPLES.set_index("sample", inplace=True)
elif os.path.isfile(INPUT) and not METADATA:
SAMPLES = pd.read_csv(INPUT, sep="\t", index_col="sample")


# Add read pairing
SAMPLES["paired"] = [True if R2 else False for R2 in SAMPLES.R2]
PAIRING = {
sample: ("paired" if SAMPLES.loc[sample, "paired"] else "single")
for sample in SAMPLES.index
}
## Denoiser
DENOISER = config.args.denoiser
## Primers args
TRIM = config.args.trim
AMPLICON = config.args.amplicon
OVERLAP = config.args.min_overlap
FW_PRIMER = config.args.fw_primer
RV_PRIMER = config.args.rv_primer

## Merged sequences args
MINLEN = config.args.minlen
MAXLEN = config.args.maxlen

## Denoising args
FW_TRIM = config.args.fw_trim
RV_TRIM = config.args.rv_trim
FW_ERRORS = config.args.fw_errors
RV_ERRORS = config.args.rv_errors

## Database args
DBPATH = os.path.dirname(os.path.abspath(config.args.database))
DBNAME = os.listdir(DBPATH)

## Classifier
CLASSIFIER = config.args.classifier


## Post-processing args
RAREFACTION = config.args.rarefaction.split(",")
MIN_PREV = config.args.min_prev
MIN_COUNT = config.args.min_count
NORM = config.args.normalization.split(",")
REPL_EMPTY = config.args.replace_empty
KEEP_RANK = config.args.keep_rank
KEEP_TAXA = config.args.keep_taxa.split(",")
EXCL_RANK = config.args.exclude_rank.split(",")
EXCL_TAXA = config.args.exclude_taxa.split(",")

## Special out args
MELTED = config.args.melted
PHYSEQ_RANK = config.args.physeq_rank.split(",")
TRANSPOSED = config.args.transposed
QIIME_VIZ = config.args.qiime_viz

## When using singularity
if "--use-singularity" in sys.argv:
### Bind the directory of the database to the singularity containers.
workflow.deployment_settings.apptainer_args += (
f' -B {config["tax_DB_path"]}:{config["tax_DB_path"]}'
)
### Mount Database path to singularity containers.
workflow.deployment_settings.apptainer_args += f" -B {os.path.abspath(config.args.database)}:{os.path.abspath(config.args.database)}"
#### Load a dictionnary of singularity containers that will be called from each rule
singularity_envs = yaml.safe_load(
open(os.path.join(workflow.basedir, "envs/singularity/sing_envs.yml"), "r")
Expand All @@ -20,22 +130,20 @@ singularity_envs = yaml.safe_load(

## Include the pipeline rules
### Sets of script to handle logs, input files and list of output:
include: "rules/0_preprocessing/scripts/logging.py"
include: "rules/0_preprocessing/scripts/make_input_lists.py"
include: "rules/0_preprocessing/scripts/make_output_lists.py"
include: os.path.join("rules", "0_preprocessing", "scripts", "make_output_lists.py")
### Snakemake rules to do the job:
include: "rules/0_preprocessing/get_inputs.rules"
include: "rules/0_preprocessing/QC_raw_reads.rules"
include: "rules/1_2_DADA2_ASVs/1_cutadapt_trim.rules"
include: "rules/1_2_DADA2_ASVs/2_DADA2_denoising.rules"
include: "rules/1_2_vsearch_OTUs/1_PANDAseq_trim_filter_merge.rules"
include: "rules/1_2_vsearch_OTUs/2_vsearch_denoising.rules"
include: "rules/3_tax_assignment/tax_assign.rules"
include: "rules/4_post_processing/physeq_processing.rules"
include: "rules/4_post_processing/TreeShrink.rules"
include: "rules/5_visualization/General_plotting.rules"
include: "rules/5_visualization/QIIME2_import.rules"
include: "rules/PICRUSt2/picrust.rules"
include: os.path.join("rules", "0_preprocessing", "get_inputs.rules")
include: os.path.join("rules", "0_preprocessing", "QC_raw_reads.rules")
include: os.path.join("rules", "1_2_DADA2_ASVs", "1_cutadapt_trim.rules")
include: os.path.join("rules", "1_2_DADA2_ASVs", "2_DADA2_denoising.rules")
include: os.path.join("rules", "1_2_vsearch_OTUs", "1_PANDAseq_trim_filter_merge.rules")
include: os.path.join("rules", "1_2_vsearch_OTUs", "2_vsearch_denoising.rules")
include: os.path.join("rules", "3_tax_assignment", "tax_assign.rules")
include: os.path.join("rules", "4_post_processing", "physeq_processing.rules")
include: os.path.join("rules", "4_post_processing", "TreeShrink.rules")
include: os.path.join("rules", "5_visualization", "General_plotting.rules")
include: os.path.join("rules", "5_visualization", "QIIME2_import.rules")
include: os.path.join("rules", "PICRUSt2", "picrust.rules")


report: "workflow.rst"
Expand Down
2 changes: 2 additions & 0 deletions zamp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
print_citation,
common_options,
db_options,
run_options,
run_snakemake,
)

Expand Down Expand Up @@ -52,6 +53,7 @@ def db(**kwargs):
help_option_names=["-h", "--help"], ignore_unknown_options=True
),
)
@run_options
@common_options
def run(**kwargs):
"""
Expand Down
103 changes: 70 additions & 33 deletions zamp/rules/0_preprocessing/QC_raw_reads.rules
Original file line number Diff line number Diff line change
@@ -1,57 +1,82 @@
## Generate MultiQC reports


rule assess_quality_raw_reads_with_fastqc:
rule fastQC_raw_reads:
conda:
"../../envs/FastQC.yml"
os.path.join(dir.envs, "FastQC.yml")
container:
singularity_envs["fastqc"]
input:
"raw_reads/{sample}_{suffix}.fastq.gz",
os.path.join(dir.out.fastq, "{sample}_{suffix}.fastq.gz"),
output:
temp("QC/FastQC/raw_reads/{RUN}/{sample}_{suffix}_fastqc.zip"),
temp(
os.path.join(
dir.out.qc,
"FastQC",
"raw_reads",
"{run}",
"{sample}_{suffix}_fastqc.zip",
)
),
log:
logging_folder + "QC/FastQC/raw_reads/{RUN}/{sample}_{suffix}_fastqc.txt",
os.path.join(
dir.logs,
"QC",
"FastQC",
"raw_reads",
"{run}",
"{sample}_{suffix}_fastqc.txt",
),
threads: 1
shell:
"""
fastqc -t {threads} {input} -o $(dirname {output[0]}) --nogroup 2> {log[0]}
"""


def sample_list_run_QC(RUN):
def sample_list_run_QC(run):
file_list = []
samples = list(all_samples.index.values[all_samples[config["run_column"]] == RUN])
for s in samples:
suffix_s = reads_ext[s]
samples = list(SAMPLES[SAMPLES["run"] == run].index)
for sample in samples:
if SAMPLES.loc[sample, "paired"]:
suffix = ["R1", "R2"]
else:
suffix = ["single"]
combined_values = expand(
"QC/FastQC/raw_reads/{RUN}/{sample}_{suffix}_fastqc.zip",
RUN=RUN,
sample=s,
suffix=suffix_s,
os.path.join(
"{qc}", "FastQC", "raw_reads", "{run}", "{sample}_{suffix}_fastqc.zip"
),
qc=dir.out.qc,
run=run,
sample=sample,
suffix=suffix,
)
file_list = file_list + combined_values

return file_list


rule create_per_run_multiqc_report:
rule multiQC_per_run:
conda:
"../../envs/MultiQC.yml"
os.path.join(dir.envs, "MultiQC.yml")
container:
singularity_envs["multiqc"]
input:
lambda wildcards: sample_list_run_QC(wildcards.RUN),
lambda wildcards: sample_list_run_QC(wildcards.run),
output:
report(
"QC/{RUN}_multiqc_raw_reads_report.html",
os.path.join(dir.out.qc, "{run}_multiqc_raw_reads_report.html"),
caption="report/MultiQC.rst",
category="MultiQC",
subcategory="{RUN}",
subcategory="{run}",
),
os.path.join(
dir.out.qc,
"{run}_multiqc_raw_reads_report_data",
"multiqc_general_stats.txt",
),
"QC/{RUN}_multiqc_raw_reads_report_data/multiqc_general_stats.txt",
log:
logging_folder + "QC/{RUN}_multiqc_raw_reads_report.txt",
os.path.join(dir.logs, "QC", "{run}_multiqc_raw_reads_report.txt"),
priority: 100
shell:
"""
Expand All @@ -62,38 +87,50 @@ rule create_per_run_multiqc_report:
def sample_list_overall_QC():
file_list = []

for i in set(all_samples[config["run_column"]]):
samples = list(all_samples.index.values[all_samples[config["run_column"]] == i])
for s in samples:
suffix_s = reads_ext[s]
for run in set(SAMPLES["run"]):
samples = list(SAMPLES[SAMPLES["run"] == run].index)
for sample in samples:
if SAMPLES.loc[sample, "paired"]:
suffix = ["R1", "R2"]
else:
suffix = ["single"]
combined_values = expand(
"QC/FastQC/raw_reads/{RUN}/{sample}_{suffix}_fastqc.zip",
RUN=i,
sample=s,
suffix=suffix_s,
os.path.join(
"{qc}",
"FastQC",
"raw_reads",
"{run}",
"{sample}_{suffix}_fastqc.zip",
),
qc=dir.out.qc,
run=run,
sample=sample,
suffix=suffix,
)
file_list = file_list + combined_values

return file_list


rule create_overall_raw_reads_multiqc_report:
rule multiQC_all:
conda:
"../../envs/MultiQC.yml"
os.path.join(dir.envs, "MultiQC.yml")
container:
singularity_envs["multiqc"]
input:
sample_list_overall_QC(),
output:
report(
"QC/multiqc_raw_reads_report.html",
os.path.join(dir.out.qc, "multiqc_raw_reads_report.html"),
caption="report/MultiQC.rst",
category="MultiQC",
subcategory="All run",
),
"QC/multiqc_raw_reads_report_data/multiqc_general_stats.txt",
os.path.join(
dir.out.qc, "multiqc_raw_reads_report_data/multiqc_general_stats.txt"
),
log:
logging_folder + "QC/multiqc_raw_reads_report.txt",
os.path.join(dir.logs, "QC", "multiqc_raw_reads_report.txt"),
priority: 100
shell:
"""
Expand Down
Loading

0 comments on commit a0dc44d

Please sign in to comment.