Skip to content

Commit

Permalink
updated some paths
Browse files Browse the repository at this point in the history
  • Loading branch information
farchaab committed Aug 2, 2024
1 parent 9e1031e commit 1e7f6be
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 53 deletions.
57 changes: 28 additions & 29 deletions zamp/rules/In_silico/amplicons_reference.rules
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@ rule Import_ReferenceSequences:


def get_DADA2_samples_path(sample):
RUN = all_samples.at[sample, config["run_column"]]
sample_layout = layout[sample]
run = SAMPLES.at[sample, "run"]
path = (
"DADA2/2_denoised/"
+ RUN
+ run
+ "/"
+ sample
+ "_"
+ sample_layout
+ PAIRING[sample]
+ "_infer_seq_tab.rds"
)
return path
Expand All @@ -37,7 +36,7 @@ def get_DADA2_samples_path(sample):
### Take the squences after denoising
rule export_DADA2_QuerySequences:
conda:
"../../envs/DADA2_in_R.yml"
os.path.join(dir.envs, "DADA2_in_R.yml")
container:
singularity_envs["dada2"]
input:
Expand All @@ -49,26 +48,26 @@ rule export_DADA2_QuerySequences:
count_table="QualityControl/DADA2/{sample}/count_table.tsv",
length_histo="QualityControl/DADA2/{sample}/merged_reads_length.png",
params:
merged_min_length=config["merged_min_length"],
merged_max_length=config["merged_max_length"],
merged_min_length=MINLEN,
merged_max_length=MAXLEN,
log:
logging_folder + "QualityControl/DADA2/{sample}/dna-sequences.log",
dir.logs + "/" + "QualityControl/DADA2/{sample}/dna-sequences.log",
script:
"scripts/extract_DADA2_sequences.R"


### Count the number of occurences of the representative sequences in the samples. I takes the sequences before clustering, i.e. their "raw sequence"
rule vsearch_count_occurences_for_QC:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
uc="vsearch/1c_derep/{sample}_derep.uc",
output:
count_table="QualityControl/vsearch/{sample}/count_table.tsv",
log:
logging_folder + "QualityControl/vsearch/{sample}/count_table.log",
dir.logs + "/" + "QualityControl/vsearch/{sample}/count_table.log",
script:
"scripts/extract_vsearch_counts.R"

Expand Down Expand Up @@ -201,14 +200,14 @@ rule Export_quality_Control_raw:
"""


def get_RUN(sample):
RUN = all_samples.at[sample, config["run_column"]]
return RUN
def get_run(sample):
run = SAMPLES.at[sample, "run"]
return run


rule mismatch_table_per_sample:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
Expand All @@ -218,18 +217,18 @@ rule mismatch_table_per_sample:
merged_mismatch_table_path="QualityControl/{denoiser}/export_{sample}/with_counts.tsv",
missmatch_plot_path="QualityControl/{denoiser}/export_{sample}/with_counts_plot.png",
params:
RUN=lambda wildcards: get_RUN(wildcards.sample),
run=lambda wildcards: get_run(wildcards.sample),
log:
logging_folder + "QualityControl/{denoiser}/export_{sample}/with_counts.log",
dir.logs + "/" + "QualityControl/{denoiser}/export_{sample}/with_counts.log",
threads: 1
script:
"scripts/abundance_to_mismatches.R"


def select_samples(RUN):
selected_samples = all_samples.index.values[
(all_samples[config["select_column"]] == config["select_value"])
& (all_samples[config["run_column"]] == RUN)
def select_samples(run):
selected_samples = SAMPLES.index.values[
(SAMPLES[config["select_column"]] == config["select_value"])
& (SAMPLES["run"] == run)
]
selected_paths = expand(
"QualityControl/{{denoiser}}/export_{sample}/with_counts.tsv",
Expand All @@ -241,36 +240,36 @@ def select_samples(RUN):

rule Compare_mismatch_per_run:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
mismatch_tables_path=lambda wildcards: select_samples(wildcards.RUN),
mismatch_tables_path=lambda wildcards: select_samples(wildcards.run),
output:
missmatch_plot="QualityControl/{denoiser}/{RUN}_compare_quality.png",
merged_mismatch_table="QualityControl/{denoiser}/{RUN}_compare_quality_table.tsv",
missmatch_plot="QualityControl/{denoiser}/{run}_compare_quality.png",
merged_mismatch_table="QualityControl/{denoiser}/{run}_compare_quality_table.tsv",
log:
logging_folder + "QualityControl/{denoiser}/{RUN}_compare_quality_table.log",
dir.logs + "/" + "QualityControl/{denoiser}/{run}_compare_quality_table.log",
threads: 1
script:
"scripts/compare_mismatches.R"


rule Compare_mismatch_all:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
mismatch_tables_path=expand(
"QualityControl/{{denoiser}}/{RUN}_compare_quality_table.tsv",
RUN=set(all_samples[config["run_column"]]),
"QualityControl/{{denoiser}}/{run}_compare_quality_table.tsv",
run=set(SAMPLES["run"]),
),
output:
missmatch_plot="QualityControl/{denoiser}/compare_quality.pdf",
merged_mismatch_table="QualityControl/{denoiser}/compare_quality_table.tsv",
log:
logging_folder + "QualityControl/{denoiser}/compare_quality.log",
dir.logs + "/" + "QualityControl/{denoiser}/compare_quality.log",
threads: 1
script:
"scripts/compare_all_mismatches.R"
47 changes: 25 additions & 22 deletions zamp/rules/In_silico/insilico_validation.rules
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ rule print_primers_to_files:
output:
"InSilico/PCR/Primers.fasta",
params:
config["forward_primer"],
config["reverse_primer"],
FW_PRIMER,
RV_PRIMER,
shell:
"""
printf \
Expand All @@ -34,12 +34,13 @@ rule Extract_amplicon:
"InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons.fasta",
temp("InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.blastout"),
params:
min_length=config["merged_min_length"],
max_length=config["merged_max_length"],
min_length=MINLEN,
max_length=MAXLEN,
mismatch=config["mismatch"],
threeprime=config["threeprime"],
log:
logging_folder
dir.logs
+ "/"
+ "InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons.log",
threads: 1
shell:
Expand Down Expand Up @@ -67,15 +68,16 @@ rule Extract_amplicon:
## Some sequences are extracted as reverse-complemented. Two be sure to have them in the right direction, we add reverse complemented version to each fasta. Then, the next step with cutadapt w
rule reverse_complement:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
simple_fasta="InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons.fasta",
output:
fasta_with_rev="InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons_complemented.fasta",
log:
logging_folder
dir.logs
+ "/"
+ "InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons_complemented.txt",
threads: 1
script:
Expand All @@ -84,21 +86,21 @@ rule reverse_complement:

rule cutadapt_trim_in_silico:
conda:
"../../envs/cutadapt.yml"
os.path.join(dir.envs, "cutadapt.yml")
container:
singularity_envs["cutadapt"]
input:
R1_raw_reads="InSilico/PCR/{sample}/Primers.fasta.pair.{sample}.fna.amplicons_complemented.fasta",
output:
R1_trimmed_reads=temp("InSilico/1a_trimmed_primers/{sample}_trimmed.fasta"),
log:
logging_folder + "InSilico/1a_trimmed_primers/{sample}_trimmed.txt",
dir.logs + "/" + "InSilico/1a_trimmed_primers/{sample}_trimmed.txt",
params:
forward_primer=config["forward_primer"],
reverse_primer=config["reverse_primer"],
forward_primer=FW_PRIMER,
reverse_primer=RV_PRIMER,
excepted_errors=config["excepted_errors"],
min_length=config["merged_min_length"],
max_length=config["merged_max_length"],
min_length=MINLEN,
max_length=MAXLEN,
coverage=config["amplicon_min_coverage"],
threads: 1
script:
Expand Down Expand Up @@ -133,15 +135,15 @@ rule Insilico_merge_all_in_one_fasta:
### Again, dereplicate all identical sequences after merging.
rule InSilico_derepicate_all:
conda:
"../../envs/vsearch.yml"
os.path.join(dir.envs, "vsearch.yml")
container:
singularity_envs["vsearch"]
input:
"InSilico/1c_derep/merged_all.fasta",
output:
"InSilico/2_denoised/dna-sequences.fasta",
log:
logging_folder + "InSilico/1c_all_merged_sequences/dereplicate_all.txt",
dir.logs + "/" + "InSilico/1c_all_merged_sequences/dereplicate_all.txt",
shell:
"""
vsearch --derep_fulllength {input} \
Expand All @@ -157,7 +159,7 @@ rule InSilico_derepicate_all:

rule InSilico_count_occurences:
conda:
"../../envs/vsearch.yml"
os.path.join(dir.envs, "vsearch.yml")
container:
singularity_envs["vsearch"]
input:
Expand All @@ -166,7 +168,7 @@ rule InSilico_count_occurences:
output:
"InSilico/2_denoised/countSeqs/{sample}_count_table.tsv",
log:
logging_folder + "InSilico/2_denoised/countSeqs/{sample}_count_table.tsv",
dir.logs + "/" + "InSilico/2_denoised/countSeqs/{sample}_count_table.tsv",
shell:
"""
if [ -s "{input[samples]}" ]
Expand Down Expand Up @@ -199,23 +201,23 @@ def list_samples_counts(wildcards):
### Format count table from InSilico
rule create_InSilico_count_table:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
count_table_samples=list_samples_counts,
output:
count_table="InSilico/2_denoised/count_table.tsv",
log:
logging_folder + "InSilico/2_denoised/count_table.tsv",
dir.logs + "/" + "InSilico/2_denoised/count_table.tsv",
script:
"scripts/create_count_table_from_insilico.R"


### Create a table to compare tax assignment
rule In_silico_tax_compare:
conda:
"../../envs/amplicons_r_utils.yml"
os.path.join(dir.envs, "amplicons_r_utils.yml")
container:
singularity_envs["r_utils"]
input:
Expand All @@ -226,9 +228,10 @@ rule In_silico_tax_compare:
output_table="InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax.tsv",
output_table_long="InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax_long.tsv",
params:
viz_replace_empty_tax=config["viz_replace_empty_tax"],
viz_replace_empty_tax=REPL_EMPTY,
log:
logging_folder
dir.logs
+ "/"
+ "InSilico/3_classified/{classifier}_{tax_DB}/InSilico_compare_tax.log",
script:
"scripts/In_silico_tax_comparison.R"
5 changes: 3 additions & 2 deletions zamp/rules/PICRUSt2/picrust.rules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

rule picrust2_custom_tree:
conda:
"../../envs/picrust2.yml"
os.path.join(dir.envs, "picrust2.yml")
container:
singularity_envs["picrust2"]
input:
Expand All @@ -14,7 +14,8 @@ rule picrust2_custom_tree:
"{denoiser}/6_picrust2/{classifier}_{tax_DB}/{filtering_or_not}/{raref_or_not}/picrust/"
),
log:
logging_folder
dir.logs
+ "/"
+ "{denoiser}/5_visualization/{classifier}_{tax_DB}/{filtering_or_not}/{raref_or_not}/picrust/picrust.txt",
threads: 4
shell:
Expand Down

0 comments on commit 1e7f6be

Please sign in to comment.