Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplifies plot to use phold & add --no_medaka as an option #33

Merged
merged 2 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions sphae/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ def annotate(genome, output, db_dir, temp_dir, configfile, **kwargs):
@click.command(epilog=help_msg_run, context_settings=dict(help_option_names=["-h", "--help"], ignore_unknown_options=True))
@common_options
@click.option('--sequencing', 'sequencing', help="sequencing method", default='paired', show_default=True, type=click.Choice(['paired', 'longread']))
def run(_input, output, db_dir, sequencing, temp_dir, configfile, **kwargs):
@click.option('--no_medaka', 'no_medaka', help="turns off Medaka polishing for --sequencing longread", is_flag=True, default=False)
def run(_input, output, db_dir, sequencing, no_medaka, temp_dir, configfile, **kwargs):
"""Run sphae"""
copy_config(configfile, system_config=snake_base(os.path.join('config', 'config.yaml')))

Expand All @@ -180,6 +181,7 @@ def run(_input, output, db_dir, sequencing, temp_dir, configfile, **kwargs):
"output": output,
"db_dir": db_dir,
"sequencing": sequencing,
"no_medaka": no_medaka,
"configfile": configfile,
"temp_dir": temp_dir,
}
Expand Down Expand Up @@ -217,4 +219,4 @@ def main():
cli()

if __name__ == '__main__':
main()
main()
5 changes: 4 additions & 1 deletion sphae/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ configfile: os.path.join(workflow.basedir, "..", "config", "config.yaml")
include: os.path.join("rules", "1.preflight.smk")
include: os.path.join("rules", "2.targets.smk")
include: os.path.join("rules", "3.qc_qa.smk")
include: os.path.join("rules", "4.assembly.smk")
if config["args"]["no_medaka"] is False:
include: os.path.join("rules", "4.assembly.smk")
else:
include: os.path.join("rules", "4.assembly_no_medaka.smk")
include: os.path.join("rules", "5.components.smk")
include: os.path.join("rules", "5.viralverify.smk")
include: os.path.join("rules", "5.checkv.smk")
Expand Down
3 changes: 2 additions & 1 deletion sphae/workflow/envs/phold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ channels:
- bioconda
- defaults
dependencies:
- phold >=0.1.4
- phold >=0.1.4
- genbank_to
4 changes: 2 additions & 2 deletions sphae/workflow/rules/10.final-reporting.smk
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ rule summarize_paired:
table= os.path.join(dir_genome, "{sample}-pr", "{sample}-genome-candidates.csv"),
genome=os.path.join(dir_genome, "{sample}-pr", "{sample}.fasta"),
gbk=os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "phynteny.gbk"),
plot=os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "pharokka_plot.png"),
plot=os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "plots", "{sample}_1.png"),
ph_taxa=os.path.join(dir_annotate, "pharokka-pr", "{sample}_1_pharokka", "{sample}_1_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir_annotate, "pharokka-pr", "{sample}_1_pharokka", "{sample}_1_length_gc_cds_density.tsv"),
cds=os.path.join(dir_annotate, "pharokka-pr", "{sample}_1_pharokka", "{sample}_1_cds_functions.tsv"),
Expand Down Expand Up @@ -174,7 +174,7 @@ rule summarize_longread:
table= os.path.join(dir_genome, "{sample}-sr", "{sample}-genome-candidates.csv"),
genome=os.path.join(dir_genome, "{sample}-sr", "{sample}.fasta"),
gbk=os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "phynteny.gbk"),
plot=os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "pharokka_plot.png"),
plot=os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "plots", "{sample}_1.png"),
ph_taxa =os.path.join(dir_annotate, "pharokka-sr","{sample}_1_pharokka", "{sample}_1_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir_annotate, "pharokka-sr", "{sample}_1_pharokka", "{sample}_1_length_gc_cds_density.tsv"),
cds=os.path.join(dir_annotate, "pharokka-sr", "{sample}_1_pharokka", "{sample}_1_cds_functions.tsv"),
Expand Down
2 changes: 1 addition & 1 deletion sphae/workflow/rules/2.targets-annot.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ targets['annotate'].append(expand(os.path.join(dir_annot, "{sample}-phold", "sub
targets['annotate'].append(expand(os.path.join(dir_annot, "{sample}-phold", "sub_db_tophits", "defensefinder_cds_predictions.tsv"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annot, "{sample}-phold", "sub_db_tophits", "vfdb_cds_predictions.tsv"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annot, "{sample}-phynteny", "phynteny.gbk"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annot, "{sample}-phynteny", "pharokka_plot.png"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annot, "{sample}-phynteny", "plots", "{sample}.png"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_final, "{sample}", "{sample}_summary.txt"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_final, "{sample}", "{sample}_summary.functions"), sample=samples_names))
4 changes: 2 additions & 2 deletions sphae/workflow/rules/2.targets.smk
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ if config['args']['sequencing'] == 'paired':
targets['annotate'].append(expand(os.path.join(dir_annotate, "phold-pr", "{sample}_1_phold", "sub_db_tophits", "defensefinder_cds_predictions.tsv"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phold-pr", "{sample}_1_phold", "sub_db_tophits", "vfdb_cds_predictions.tsv"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "phynteny.gbk"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "pharokka_plot.png"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "plots", "{sample}_1.png"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_final, "{sample}-pr", "{sample}_summary.txt"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_final, "{sample}-pr", "{sample}_1_summary.functions"), sample=samples_names))
elif config['args']['sequencing']== 'longread':
Expand All @@ -62,7 +62,7 @@ elif config['args']['sequencing']== 'longread':
targets['annotate'].append(expand(os.path.join(dir_annotate, "phold-sr", "{sample}_1_phold", "sub_db_tophits", "defensefinder_cds_predictions.tsv"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phold-sr", "{sample}_1_phold", "sub_db_tophits", "vfdb_cds_predictions.tsv"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "phynteny.gbk"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "pharokka_plot.png"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "plots", "{sample}_1.png"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_final, "{sample}-sr", "{sample}_summary.txt"), sample=samples_names))
targets['annotate'].append(expand(os.path.join(dir_final, "{sample}-sr", "{sample}_1_summary.functions"), sample=samples_names))

135 changes: 135 additions & 0 deletions sphae/workflow/rules/4.assembly_no_medaka.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""
Assembly rules
Illumina paired end reads - Megahit
Nanopore reads - Flye
"""

rule flye:
"""Assemble longreads with Flye"""
input:
os.path.join(dir_nanopore, "{sample}_filt.fastq.gz"),
output:
fasta = os.path.join(dir_flye, "{sample}-sr", "assembly.fasta"),
gfa = os.path.join(dir_flye, "{sample}-sr", "assembly_graph.gfa"),
path= os.path.join(dir_flye, "{sample}-sr", "assembly_info.txt"),
log=os.path.join(dir_flye, "{sample}-sr", "flye.log")
params:
out= os.path.join(dir_flye, "{sample}-sr"),
model = config['params']['flye'],
g = config['params']['genomeSize']
log:
os.path.join(dir_log, "flye.{sample}.log")
conda:
os.path.join(dir_env, "flye.yaml")
threads:
config['resources']['bigjob']['cpu']
resources:
mem_mb=config['resources']['bigjob']['mem'],
time=config['resources']['bigjob']['time']
shell:
"""
if flye \
{params.model} \
{input} \
--threads {threads} \
--asm-coverage 50 \
--genome-size {params.g} \
--out-dir {params.out} \
2> {log}; then
touch {output.fasta}
touch {output.gfa}
touch {output.path}
touch {output.log}
else
touch {output.fasta}
touch {output.gfa}
touch {output.path}
touch {output.log}
fi
"""

rule no_medaka:
"""Do not polish longread assembly with medaka - just copy the file so snakemake is happy"""
input:
fasta = os.path.join(dir_flye, "{sample}-sr", "assembly.fasta"),
output:
fasta = os.path.join(dir_flye,"{sample}-sr", "consensus.fasta")
conda:
os.path.join(dir_env, "flye.yaml")
threads:
config['resources']['smalljob']['cpu']
resources:
mem_mb=config['resources']['smalljob']['mem'],
time=config['resources']['smalljob']['time']
log:
os.path.join(dir_log, "no_medaka.{sample}.log")
shell:
"""
if [[ -s {input.fasta} ]] ; then
cp {input.fasta} {output.fasta}
touch {output.fasta}
else
touch {output.fasta}
fi
"""


rule megahit:
"""Assemble short reads with MEGAHIT"""
input:
r1 = os.path.join(dir_fastp, "{sample}_subsampled_R1.fastq.gz"),
r2 = os.path.join(dir_fastp, "{sample}_subsampled_R2.fastq.gz")
output:
contigs=os.path.join(dir_megahit, "{sample}-pr", "final.contigs.fa"),
log=os.path.join(dir_megahit, "{sample}-pr", "log")
params:
os.path.join(dir_megahit, "{sample}-pr")
log:
os.path.join(dir_log, "megahit.{sample}.log")
threads:
config['resources']['bigjob']['cpu']
resources:
mem_mb=config['resources']['bigjob']['mem'],
time=config['resources']['bigjob']['time']
conda:
os.path.join(dir_env, "megahit.yaml")
shell:
"""
if megahit \
-1 {input.r1} \
-2 {input.r2} \
-o {params} \
-t {threads} -f \
2> {log}; then
touch {output.contigs}
touch {output.log}
else
touch {output.contigs}
touch {output.log}
fi
"""

rule fastg:
"""Save the MEGAHIT graph"""
input:
os.path.join(dir_megahit, "{sample}-pr", "final.contigs.fa")
output:
fastg=os.path.join(dir_megahit, "{sample}-pr", "final.fastg"),
graph=os.path.join(dir_megahit, "{sample}-pr", "final.gfa")
conda:
os.path.join(dir_env, "megahit.yaml")
log:
os.path.join(dir_log, "fastg.{sample}.log")
shell:
"""
if [[ -s {input} ]] ; then
kmer=$(head -1 {input} | sed 's/>//' | sed 's/_.*//')
megahit_toolkit contig2fastg $kmer {input} > {output.fastg} 2> {log}
Bandage reduce {output.fastg} {output.graph} 2>> {log}
touch {output.fastg}
touch {output.graph}
else
touch {output.fastg}
touch {output.graph}
fi
"""
10 changes: 5 additions & 5 deletions sphae/workflow/rules/789.annot.smk
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,19 @@ rule phynteny_plotter:
params:
gff3=os.path.join(dir_annot, "{sample}-phynteny", "phynteny.gff3"),
prefix="phynteny",
output=os.path.join(dir_annot, "{sample}-phynteny")
output=os.path.join(dir_annot, "{sample}-phynteny", "plots")
output:
plot=os.path.join(dir_annot, "{sample}-phynteny", "pharokka_plot.png")
plot=os.path.join(dir_annot, "{sample}-phynteny", "plots", "{sample}.png")
resources:
mem =config['resources']['smalljob']['mem'],
time = config['resources']['smalljob']['time']
conda:
os.path.join(dir_env, "pharokka.yaml")
os.path.join(dir_env, "phold.yaml")
shell:
"""
if [[ -s {input.gbk} ]] ; then
genbank_to -g {input.gbk} --gff3 {params.gff3}
pharokka_plotter.py -i {input.fasta} --genbank {input.gbk} --gff {params.gff3} -f -p {params.prefix} -o {params.output}
phold plot -i {input.gbk} -f -p {wildcards.sample} -o {params.output}
touch {output.plot}
else
touch {output.plot}
Expand Down Expand Up @@ -191,7 +191,7 @@ rule summarize:
input:
genome=os.path.join(input_dir, PATTERN_LONG),
gbk=os.path.join(dir_annot, "{sample}-phynteny", "phynteny.gbk"),
plot=os.path.join(dir_annot, "{sample}-phynteny", "pharokka_plot.png"),
plot=os.path.join(dir_annot, "{sample}-phynteny", "plots", "{sample}.png"),
ph_taxa =os.path.join(dir_annot, "{sample}-pharokka", "{sample}_top_hits_mash_inphared.tsv"),
cdden=os.path.join(dir_annot, "{sample}-pharokka", "{sample}_length_gc_cds_density.tsv"),
cds=os.path.join(dir_annot, "{sample}-pharokka", "{sample}_cds_functions.tsv"),
Expand Down
14 changes: 6 additions & 8 deletions sphae/workflow/rules/9.phynteny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ rule phynteny_plotter_paired:
inputdir=os.path.join(dir_annotate, "{sample}-pr-genomes"),
idir=os.path.join(dir_annotate, "phynteny-pr"),
output:
plot=os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "pharokka_plot.png")
plot=os.path.join(dir_annotate, "phynteny-pr", "{sample}_1_phynteny", "plots", "{sample}_1.png")
resources:
mem =config['resources']['smalljob']['mem'],
time = config['resources']['smalljob']['time']
Expand All @@ -55,8 +55,7 @@ rule phynteny_plotter_paired:
for f in {params.inputdir}/*; do
data="$(basename "$f" .fasta)"
genbank_to -g {params.idir}/"$data"_phynteny/phynteny.gbk --gff3 {params.idir}/"$data"_phynteny/phynteny.gff3
pharokka_plotter.py -i {params.inputdir}/"$data".fasta --genbank {params.idir}/"$data"_phynteny/phynteny.gbk --gff {params.idir}/"$data"_phynteny/phynteny.gff3 \
-f -p "$data"_phyntney -o {params.idir}/"$data"_phynteny
phold plot -i {params.idir}/"$data"_phynteny/phynteny.gbk -f -p "$data"_phynteny -o {params.idir}/"$data"_phynteny
done
touch {output.plot}
else
Expand Down Expand Up @@ -107,23 +106,22 @@ rule phynteny_plotter_longreads:
idir=os.path.join(dir_annotate, "phynteny-sr"),
prefix="phynteny",
output:
plot=os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "pharokka_plot.png")
plot=os.path.join(dir_annotate, "phynteny-sr", "{sample}_1_phynteny", "plots", "{sample}_1.png")
resources:
mem =config['resources']['smalljob']['mem'],
time = config['resources']['smalljob']['time']
conda:
os.path.join(dir_env, "pharokka.yaml")
os.path.join(dir_env, "phold.yaml")
shell:
"""
if [[ -s {input.gbk} ]] ; then
for f in {params.inputdir}/*; do
data="$(basename "$f" .fasta)"
genbank_to -g {params.idir}/"$data"_phynteny/phynteny.gbk --gff3 {params.idir}/"$data"_phynteny/phynteny.gff3
pharokka_plotter.py -i {params.inputdir}/"$data".fasta --genbank {params.idir}/"$data"_phynteny/phynteny.gbk --gff {params.idir}/"$data"_phynteny/phynteny.gff3 \
-f -p "$data"_phytney -o {params.idir}/"$data"_phynteny
phold plot -i {params.idir}/"$data"_phynteny/phynteny.gbk -f -o {params.idir}/"$data"_phynteny/plots
done
touch {output.plot}
else
touch {output.plot}
fi
"""
"""
Loading