Skip to content

Commit

Permalink
add to scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
gbouras13 committed Aug 22, 2023
1 parent 2a67bd0 commit 16db3c8
Show file tree
Hide file tree
Showing 16 changed files with 69 additions and 141 deletions.
1 change: 0 additions & 1 deletion bin/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import requests
from alive_progress import alive_bar
from loguru import logger

from post_processing import remove_directory

# to hold information about the different DBs
Expand Down
7 changes: 1 addition & 6 deletions bin/input_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
import os
import shutil
import subprocess as sp
import sys
from argparse import RawTextHelpFormatter

from Bio import SeqIO
from loguru import logger

from util import get_version


Expand All @@ -17,10 +15,7 @@ def get_input():
formatter_class=RawTextHelpFormatter,
)
parser.add_argument(
"-i",
"--infile",
action="store",
help="Input genome file in fasta format."
"-i", "--infile", action="store", help="Input genome file in fasta format."
)
parser.add_argument(
"-o",
Expand Down
2 changes: 1 addition & 1 deletion bin/install_databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import os
import sys
from argparse import RawTextHelpFormatter
from loguru import logger

from databases import instantiate_install
from loguru import logger


def get_db_input():
Expand Down
45 changes: 12 additions & 33 deletions bin/pharokka.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,21 @@
import time
from pathlib import Path

from loguru import logger

from databases import check_db_installation
from hmm import run_pyhmmer
from input_commands import (
check_dependencies,
get_input,
instantiate_dirs,
instantiate_split_output,
validate_fasta,
validate_gene_predictor,
validate_meta,
validate_terminase,
validate_threads,
)
from input_commands import (check_dependencies, get_input, instantiate_dirs,
instantiate_split_output, validate_fasta,
validate_gene_predictor, validate_meta,
validate_terminase, validate_threads)
from loguru import logger
from post_processing import Pharok, remove_post_processing_files
from processes import (
concat_phanotate_meta,
concat_trnascan_meta,
convert_gff_to_gbk,
reorient_terminase,
run_aragorn,
run_dnaapler,
run_mash_dist,
run_mash_sketch,
run_minced,
run_mmseqs,
run_phanotate,
run_phanotate_fasta_meta,
run_phanotate_txt_meta,
run_pyrodigal,
run_trna_scan,
run_trnascan_meta,
split_input_fasta,
translate_fastas,
)
from processes import (concat_phanotate_meta, concat_trnascan_meta,
convert_gff_to_gbk, reorient_terminase, run_aragorn,
run_dnaapler, run_mash_dist, run_mash_sketch,
run_minced, run_mmseqs, run_phanotate,
run_phanotate_fasta_meta, run_phanotate_txt_meta,
run_pyrodigal, run_trna_scan, run_trnascan_meta,
split_input_fasta, translate_fastas)
from util import get_version


Expand Down
16 changes: 9 additions & 7 deletions bin/pharokka_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import os
import sys
from argparse import RawTextHelpFormatter
from loguru import logger
from pathlib import Path

from input_commands import validate_fasta
from loguru import logger
from plot import create_plot
from util import get_version

Expand Down Expand Up @@ -247,12 +247,12 @@ def get_input():
label_force_list = []

if args.label_ids != "":
logger.info(f"You have specified a file {args.label_ids} containing a list of CDS IDs to force label.")
logger.info(
f"You have specified a file {args.label_ids} containing a list of CDS IDs to force label."
)
# check if it is a file
if os.path.isfile(args.label_ids) == False:
logger.error(
f"{args.label_ids} was not found."
)
logger.error(f"{args.label_ids} was not found.")
# check if it contains text
try:
# Open the file in read mode
Expand All @@ -267,7 +267,9 @@ def get_input():
label_force_list = list(ignore_dict)

except FileNotFoundError:
logger.warning(f"{args.label_id} contains no text. No contigs will be ignored")
logger.warning(
f"{args.label_id} contains no text. No contigs will be ignored"
)

logger.info("All files checked.")
logger.info("Plotting the phage.")
Expand All @@ -285,5 +287,5 @@ def get_input():
args.label_size,
args.label_hypotheticals,
args.remove_other_features_labels,
label_force_list
label_force_list,
)
20 changes: 5 additions & 15 deletions bin/pharokka_proteins.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,13 @@
import time
from pathlib import Path

from loguru import logger

from databases import check_db_installation

from input_commands import (
check_dependencies,
instantiate_dirs,
validate_fasta,
validate_threads,
)
from input_commands import (check_dependencies, instantiate_dirs,
validate_fasta, validate_threads)
from loguru import logger
from post_processing import remove_directory, remove_file
from proteins import (
Pharok_Prot,
get_input_proteins,
run_mmseqs_proteins,
run_pyhmmer_proteins,
)
from proteins import (Pharok_Prot, get_input_proteins, run_mmseqs_proteins,
run_pyhmmer_proteins)
from util import get_version


Expand Down
26 changes: 12 additions & 14 deletions bin/plot.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
from loguru import logger
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from pycirclize import Circos
from pycirclize.parser import Genbank, Gff
from loguru import logger

# Load GFF file

Expand All @@ -21,7 +21,7 @@ def create_plot(
label_size,
label_hypotheticals,
remove_other_features_labels,
label_force_list
label_force_list,
):
gff = Gff(gff_file)

Expand Down Expand Up @@ -136,7 +136,7 @@ def create_plot(
r_lim=(75, 80),
fc=data_dict[key]["col"],
)
# rev
# rev
cds_track.genomic_features(
data_dict[key]["rev_list"],
plotstyle="arrow",
Expand Down Expand Up @@ -293,31 +293,30 @@ def create_plot(
id = f.qualifiers.get("ID", [""])[0]

# skip hypotheticals if the flag is false (default)
if id in label_force_list: # if in the list
if id in label_force_list: # if in the list
if len(label) > truncate:
label = label[:truncate] + "..."
pos_list.append(pos)
labels.append(label)
length_list.append(length)
id_list.append(id)
continue # to break if in the list
continue # to break if in the list
else:
if label_hypotheticals == False:
if (
label == ""
or label.startswith("hypothetical")
or label.startswith("unknown")
):
continue # if hypothetical not in the list
else: # all others
if len(label) > truncate:
label == ""
or label.startswith("hypothetical")
or label.startswith("unknown")
):
continue # if hypothetical not in the list
else: # all others
if len(label) > truncate:
label = label[:truncate] + "..."
pos_list.append(pos)
labels.append(label)
length_list.append(length)
id_list.append(id)


###################################################
#### thin out CDS annotations
###################################################
Expand Down Expand Up @@ -360,7 +359,6 @@ def create_plot(
labels = [labels[i] for i in filtered_indices]
length_list = [length_list[i] for i in filtered_indices]


# Plot CDS product labels on outer position
cds_track.xticks(
pos_list,
Expand Down
5 changes: 2 additions & 3 deletions bin/post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from Bio import SeqIO
from Bio.SeqUtils import GC
from loguru import logger

from processes import convert_gff_to_gbk
from util import remove_directory, remove_file, touch_file

Expand Down Expand Up @@ -529,8 +528,8 @@ def create_gff(self):
locus_df["locus_tag"] = locus_df.contig + "_CDS_" + locus_df["count"]

# assign count and locus_tag to merged_df (for meta)
self.merged_df["locus_tag"] = locus_df["locus_tag"]
self.merged_df["count"] = locus_df["count"]
self.merged_df["locus_tag"] = locus_df["locus_tag"]
self.merged_df["count"] = locus_df["count"]
#################################

#########
Expand Down
3 changes: 1 addition & 2 deletions bin/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from loguru import logger

from external_tools import ExternalTool
from loguru import logger
from util import count_contigs, remove_directory

##### phanotate meta mode ########
Expand Down
19 changes: 5 additions & 14 deletions bin/proteins.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,15 @@
import pyhmmer
from Bio import SeqIO
from Bio.SeqUtils import GC
from external_tools import ExternalTool
from lib.util import (count_contigs, get_contig_headers, get_version,
remove_directory)
from loguru import logger
from post_processing import (process_card_results, process_pyhmmer_results,
process_vfdb_results)
from pyhmmer.easel import SequenceFile
from pyhmmer.plan7 import HMM, HMMFile

from external_tools import ExternalTool
from post_processing import (
process_card_results,
process_pyhmmer_results,
process_vfdb_results,
)

from lib.util import (
count_contigs,
get_contig_headers,
get_version,
remove_directory
)

Result = collections.namedtuple("Result", ["protein", "phrog", "bitscore", "evalue"])


Expand Down
3 changes: 1 addition & 2 deletions bin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
import click
import pandas as pd
from Bio import SeqIO
from loguru import logger

from citation import __citation__
from loguru import logger
from version import __version__


Expand Down
11 changes: 6 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from setuptools import setup, find_packages
from setuptools import find_packages, setup


def get_version():
Expand Down Expand Up @@ -45,17 +45,18 @@ def package_files(directory):
"bin/citation.py",
"bin/databases.py",
"bin/external_tools.py",
"bin/input_commands.py",
"bin/hmm.py",
"bin/plot.py",
"bin/post_processing.py",
"bin/processes.py",
"bin/proteins.py",
"bin/util.py",
"bin/version.py"
"bin/version.py",
],
packages=['pharokka_runner'],
package_dir=dict(pharokka_runner='bin'),
package_data=dict(pharokka_runner=package_files('bin/')),
packages=["pharokka_runner"],
package_dir=dict(pharokka_runner="bin"),
package_data=dict(pharokka_runner=package_files("bin/")),
include_package_data=True,
license="MIT License",
platforms=["Unix"],
Expand Down
10 changes: 2 additions & 8 deletions tests/test_external_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,8 @@
import pytest
from loguru import logger

from bin.processes import (
run_aragorn,
run_mash_sketch,
run_minced,
run_phanotate,
run_pyrodigal,
)

from bin.processes import (run_aragorn, run_mash_sketch, run_minced,
run_phanotate, run_pyrodigal)
# import functions
from bin.util import remove_directory

Expand Down
Loading

0 comments on commit 16db3c8

Please sign in to comment.