diff --git a/zamp/rules/In_silico/amplicons_reference.rules b/zamp/rules/In_silico/amplicons_reference.rules index af2aeea..f030157 100644 --- a/zamp/rules/In_silico/amplicons_reference.rules +++ b/zamp/rules/In_silico/amplicons_reference.rules @@ -20,15 +20,14 @@ rule Import_ReferenceSequences: def get_DADA2_samples_path(sample): - RUN = all_samples.at[sample, config["run_column"]] - sample_layout = layout[sample] + run = SAMPLES.at[sample, "run"] path = ( "DADA2/2_denoised/" - + RUN + + run + "/" + sample + "_" - + sample_layout + + PAIRING[sample] + "_infer_seq_tab.rds" ) return path @@ -37,7 +36,7 @@ def get_DADA2_samples_path(sample): ### Take the squences after denoising rule export_DADA2_QuerySequences: conda: - "../../envs/DADA2_in_R.yml" + os.path.join(dir.envs, "DADA2_in_R.yml") container: singularity_envs["dada2"] input: @@ -49,10 +48,10 @@ rule export_DADA2_QuerySequences: count_table="QualityControl/DADA2/{sample}/count_table.tsv", length_histo="QualityControl/DADA2/{sample}/merged_reads_length.png", params: - merged_min_length=config["merged_min_length"], - merged_max_length=config["merged_max_length"], + merged_min_length=MINLEN, + merged_max_length=MAXLEN, log: - logging_folder + "QualityControl/DADA2/{sample}/dna-sequences.log", + dir.logs + "/" + "QualityControl/DADA2/{sample}/dna-sequences.log", script: "scripts/extract_DADA2_sequences.R" @@ -60,7 +59,7 @@ rule export_DADA2_QuerySequences: ### Count the number of occurences of the representative sequences in the samples. I takes the sequences before clustering, i.e. their "raw sequence" rule vsearch_count_occurences_for_QC: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: @@ -68,7 +67,7 @@ rule vsearch_count_occurences_for_QC: output: count_table="QualityControl/vsearch/{sample}/count_table.tsv", log: - logging_folder + "QualityControl/vsearch/{sample}/count_table.log", + dir.logs + "/" + "QualityControl/vsearch/{sample}/count_table.log", script: "scripts/extract_vsearch_counts.R" @@ -201,14 +200,14 @@ rule Export_quality_Control_raw: """ -def get_RUN(sample): - RUN = all_samples.at[sample, config["run_column"]] - return RUN +def get_run(sample): + run = SAMPLES.at[sample, "run"] + return run rule mismatch_table_per_sample: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: @@ -218,18 +217,18 @@ rule mismatch_table_per_sample: merged_mismatch_table_path="QualityControl/{denoiser}/export_{sample}/with_counts.tsv", missmatch_plot_path="QualityControl/{denoiser}/export_{sample}/with_counts_plot.png", params: - RUN=lambda wildcards: get_RUN(wildcards.sample), + run=lambda wildcards: get_run(wildcards.sample), log: - logging_folder + "QualityControl/{denoiser}/export_{sample}/with_counts.log", + dir.logs + "/" + "QualityControl/{denoiser}/export_{sample}/with_counts.log", threads: 1 script: "scripts/abundance_to_mismatches.R" -def select_samples(RUN): - selected_samples = all_samples.index.values[ - (all_samples[config["select_column"]] == config["select_value"]) - & (all_samples[config["run_column"]] == RUN) +def select_samples(run): + selected_samples = SAMPLES.index.values[ + (SAMPLES[config["select_column"]] == config["select_value"]) + & (SAMPLES["run"] == run) ] selected_paths = expand( "QualityControl/{{denoiser}}/export_{sample}/with_counts.tsv", @@ -241,16 +240,16 @@ def select_samples(RUN): rule Compare_mismatch_per_run: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: - mismatch_tables_path=lambda wildcards: select_samples(wildcards.RUN), + mismatch_tables_path=lambda wildcards: select_samples(wildcards.run), output: - missmatch_plot="QualityControl/{denoiser}/{RUN}_compare_quality.png", - merged_mismatch_table="QualityControl/{denoiser}/{RUN}_compare_quality_table.tsv", + missmatch_plot="QualityControl/{denoiser}/{run}_compare_quality.png", + merged_mismatch_table="QualityControl/{denoiser}/{run}_compare_quality_table.tsv", log: - logging_folder + "QualityControl/{denoiser}/{RUN}_compare_quality_table.log", + dir.logs + "/" + "QualityControl/{denoiser}/{run}_compare_quality_table.log", threads: 1 script: "scripts/compare_mismatches.R" @@ -258,19 +257,19 @@ rule Compare_mismatch_per_run: rule Compare_mismatch_all: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: mismatch_tables_path=expand( - "QualityControl/{{denoiser}}/{RUN}_compare_quality_table.tsv", - RUN=set(all_samples[config["run_column"]]), + "QualityControl/{{denoiser}}/{run}_compare_quality_table.tsv", + run=set(SAMPLES["run"]), ), output: missmatch_plot="QualityControl/{denoiser}/compare_quality.pdf", merged_mismatch_table="QualityControl/{denoiser}/compare_quality_table.tsv", log: - logging_folder + "QualityControl/{denoiser}/compare_quality.log", + dir.logs + "/" + "QualityControl/{denoiser}/compare_quality.log", threads: 1 script: "scripts/compare_all_mismatches.R" diff --git a/zamp/rules/In_silico/insilico_validation.rules b/zamp/rules/In_silico/insilico_validation.rules index db460d3..0fa5fa9 100644 --- a/zamp/rules/In_silico/insilico_validation.rules +++ b/zamp/rules/In_silico/insilico_validation.rules @@ -15,8 +15,8 @@ rule print_primers_to_files: output: "InSilico/PCR/Primers.fasta", params: - config["forward_primer"], - config["reverse_primer"], + FW_PRIMER, + RV_PRIMER, shell: """ printf \ @@ -34,12 +34,13 @@ rule Extract_amplicon: "InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons.fasta", temp("InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.blastout"), params: - min_length=config["merged_min_length"], - max_length=config["merged_max_length"], + min_length=MINLEN, + max_length=MAXLEN, mismatch=config["mismatch"], threeprime=config["threeprime"], log: - logging_folder + dir.logs + + "/" + "InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons.log", threads: 1 shell: @@ -67,7 +68,7 @@ rule Extract_amplicon: ## Some sequences are extracted as reverse-complemented. Two be sure to have them in the right direction, we add reverse complemented version to each fasta. Then, the next step with cutadapt w rule reverse_complement: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: @@ -75,7 +76,8 @@ rule reverse_complement: output: fasta_with_rev="InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons_complemented.fasta", log: - logging_folder + dir.logs + + "/" + "InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons_complemented.txt", threads: 1 script: @@ -84,7 +86,7 @@ rule reverse_complement: rule cutadapt_trim_in_silico: conda: - "../../envs/cutadapt.yml" + os.path.join(dir.envs, "cutadapt.yml") container: singularity_envs["cutadapt"] input: @@ -92,13 +94,13 @@ rule cutadapt_trim_in_silico: output: R1_trimmed_reads=temp("InSilico/1a_trimmed_primers/{sample}_trimmed.fasta"), log: - logging_folder + "InSilico/1a_trimmed_primers/{sample}_trimmed.txt", + dir.logs + "/" + "InSilico/1a_trimmed_primers/{sample}_trimmed.txt", params: - forward_primer=config["forward_primer"], - reverse_primer=config["reverse_primer"], + forward_primer=FW_PRIMER, + reverse_primer=RV_PRIMER, excepted_errors=config["excepted_errors"], - min_length=config["merged_min_length"], - max_length=config["merged_max_length"], + min_length=MINLEN, + max_length=MAXLEN, coverage=config["amplicon_min_coverage"], threads: 1 script: @@ -133,7 +135,7 @@ rule Insilico_merge_all_in_one_fasta: ### Again, dereplicate all identical sequences after merging. rule InSilico_derepicate_all: conda: - "../../envs/vsearch.yml" + os.path.join(dir.envs, "vsearch.yml") container: singularity_envs["vsearch"] input: @@ -141,7 +143,7 @@ rule InSilico_derepicate_all: output: "InSilico/2_denoised/dna-sequences.fasta", log: - logging_folder + "InSilico/1c_all_merged_sequences/dereplicate_all.txt", + dir.logs + "/" + "InSilico/1c_all_merged_sequences/dereplicate_all.txt", shell: """ vsearch --derep_fulllength {input} \ @@ -157,7 +159,7 @@ rule InSilico_derepicate_all: rule InSilico_count_occurences: conda: - "../../envs/vsearch.yml" + os.path.join(dir.envs, "vsearch.yml") container: singularity_envs["vsearch"] input: @@ -166,7 +168,7 @@ rule InSilico_count_occurences: output: "InSilico/2_denoised/countSeqs/{sample}_count_table.tsv", log: - logging_folder + "InSilico/2_denoised/countSeqs/{sample}_count_table.tsv", + dir.logs + "/" + "InSilico/2_denoised/countSeqs/{sample}_count_table.tsv", shell: """ if [ -s "{input[samples]}" ] @@ -199,7 +201,7 @@ def list_samples_counts(wildcards): ### Format count table from InSilico rule create_InSilico_count_table: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: @@ -207,7 +209,7 @@ rule create_InSilico_count_table: output: count_table="InSilico/2_denoised/count_table.tsv", log: - logging_folder + "InSilico/2_denoised/count_table.tsv", + dir.logs + "/" + "InSilico/2_denoised/count_table.tsv", script: "scripts/create_count_table_from_insilico.R" @@ -215,7 +217,7 @@ rule create_InSilico_count_table: ### Create a table to compare tax assignment rule In_silico_tax_compare: conda: - "../../envs/amplicons_r_utils.yml" + os.path.join(dir.envs, "amplicons_r_utils.yml") container: singularity_envs["r_utils"] input: @@ -226,9 +228,10 @@ rule In_silico_tax_compare: output_table="InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax.tsv", output_table_long="InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax_long.tsv", params: - viz_replace_empty_tax=config["viz_replace_empty_tax"], + viz_replace_empty_tax=REPL_EMPTY, log: - logging_folder + dir.logs + + "/" + "InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax.log", script: "scripts/In_silico_tax_comparison.R" diff --git a/zamp/rules/PICRUSt2/picrust.rules b/zamp/rules/PICRUSt2/picrust.rules index 7b99891..9419700 100644 --- a/zamp/rules/PICRUSt2/picrust.rules +++ b/zamp/rules/PICRUSt2/picrust.rules @@ -3,7 +3,7 @@ rule picrust2_custom_tree: conda: - "../../envs/picrust2.yml" + os.path.join(dir.envs, "picrust2.yml") container: singularity_envs["picrust2"] input: @@ -14,7 +14,8 @@ rule picrust2_custom_tree: "{denoiser}/6_picrust2/{classifier}_{tax_DB}/{filtering_or_not}/{raref_or_not}/picrust/" ), log: - logging_folder + dir.logs + + "/" + "{denoiser}/5_visualization/{classifier}_{tax_DB}/{filtering_or_not}/{raref_or_not}/picrust/picrust.txt", threads: 4 shell: