diff --git a/bin/databases.py b/bin/databases.py index 4ca0847..ffc9c8b 100644 --- a/bin/databases.py +++ b/bin/databases.py @@ -26,7 +26,6 @@ import requests from alive_progress import alive_bar from loguru import logger - from post_processing import remove_directory # to hold information about the different DBs diff --git a/bin/input_commands.py b/bin/input_commands.py index 88b6adf..b473a56 100644 --- a/bin/input_commands.py +++ b/bin/input_commands.py @@ -2,12 +2,10 @@ import os import shutil import subprocess as sp -import sys from argparse import RawTextHelpFormatter from Bio import SeqIO from loguru import logger - from util import get_version @@ -17,10 +15,7 @@ def get_input(): formatter_class=RawTextHelpFormatter, ) parser.add_argument( - "-i", - "--infile", - action="store", - help="Input genome file in fasta format." + "-i", "--infile", action="store", help="Input genome file in fasta format." ) parser.add_argument( "-o", diff --git a/bin/install_databases.py b/bin/install_databases.py index e4cf2bd..32d85ab 100755 --- a/bin/install_databases.py +++ b/bin/install_databases.py @@ -3,9 +3,9 @@ import os import sys from argparse import RawTextHelpFormatter -from loguru import logger from databases import instantiate_install +from loguru import logger def get_db_input(): diff --git a/bin/pharokka.py b/bin/pharokka.py index da28d14..859deea 100755 --- a/bin/pharokka.py +++ b/bin/pharokka.py @@ -6,42 +6,21 @@ import time from pathlib import Path -from loguru import logger - from databases import check_db_installation from hmm import run_pyhmmer -from input_commands import ( - check_dependencies, - get_input, - instantiate_dirs, - instantiate_split_output, - validate_fasta, - validate_gene_predictor, - validate_meta, - validate_terminase, - validate_threads, -) +from input_commands import (check_dependencies, get_input, instantiate_dirs, + instantiate_split_output, validate_fasta, + validate_gene_predictor, validate_meta, + validate_terminase, validate_threads) +from loguru import logger from post_processing import Pharok, remove_post_processing_files -from processes import ( - concat_phanotate_meta, - concat_trnascan_meta, - convert_gff_to_gbk, - reorient_terminase, - run_aragorn, - run_dnaapler, - run_mash_dist, - run_mash_sketch, - run_minced, - run_mmseqs, - run_phanotate, - run_phanotate_fasta_meta, - run_phanotate_txt_meta, - run_pyrodigal, - run_trna_scan, - run_trnascan_meta, - split_input_fasta, - translate_fastas, -) +from processes import (concat_phanotate_meta, concat_trnascan_meta, + convert_gff_to_gbk, reorient_terminase, run_aragorn, + run_dnaapler, run_mash_dist, run_mash_sketch, + run_minced, run_mmseqs, run_phanotate, + run_phanotate_fasta_meta, run_phanotate_txt_meta, + run_pyrodigal, run_trna_scan, run_trnascan_meta, + split_input_fasta, translate_fastas) from util import get_version diff --git a/bin/pharokka_plotter.py b/bin/pharokka_plotter.py index b9b1b40..de3d206 100755 --- a/bin/pharokka_plotter.py +++ b/bin/pharokka_plotter.py @@ -3,10 +3,10 @@ import os import sys from argparse import RawTextHelpFormatter -from loguru import logger from pathlib import Path from input_commands import validate_fasta +from loguru import logger from plot import create_plot from util import get_version @@ -247,12 +247,12 @@ def get_input(): label_force_list = [] if args.label_ids != "": - logger.info(f"You have specified a file {args.label_ids} containing a list of CDS IDs to force label.") + logger.info( + f"You have specified a file {args.label_ids} containing a list of CDS IDs to force label." + ) # check if it is a file if os.path.isfile(args.label_ids) == False: - logger.error( - f"{args.label_ids} was not found." - ) + logger.error(f"{args.label_ids} was not found.") # check if it contains text try: # Open the file in read mode @@ -267,7 +267,9 @@ def get_input(): label_force_list = list(ignore_dict) except FileNotFoundError: - logger.warning(f"{args.label_id} contains no text. No contigs will be ignored") + logger.warning( + f"{args.label_id} contains no text. No contigs will be ignored" + ) logger.info("All files checked.") logger.info("Plotting the phage.") @@ -285,5 +287,5 @@ def get_input(): args.label_size, args.label_hypotheticals, args.remove_other_features_labels, - label_force_list + label_force_list, ) diff --git a/bin/pharokka_proteins.py b/bin/pharokka_proteins.py index 79470e1..d47d4a4 100755 --- a/bin/pharokka_proteins.py +++ b/bin/pharokka_proteins.py @@ -5,23 +5,13 @@ import time from pathlib import Path -from loguru import logger - from databases import check_db_installation - -from input_commands import ( - check_dependencies, - instantiate_dirs, - validate_fasta, - validate_threads, -) +from input_commands import (check_dependencies, instantiate_dirs, + validate_fasta, validate_threads) +from loguru import logger from post_processing import remove_directory, remove_file -from proteins import ( - Pharok_Prot, - get_input_proteins, - run_mmseqs_proteins, - run_pyhmmer_proteins, -) +from proteins import (Pharok_Prot, get_input_proteins, run_mmseqs_proteins, + run_pyhmmer_proteins) from util import get_version diff --git a/bin/plot.py b/bin/plot.py index 6ba1179..1417b01 100644 --- a/bin/plot.py +++ b/bin/plot.py @@ -1,9 +1,9 @@ import numpy as np +from loguru import logger from matplotlib.lines import Line2D from matplotlib.patches import Patch from pycirclize import Circos from pycirclize.parser import Genbank, Gff -from loguru import logger # Load GFF file @@ -21,7 +21,7 @@ def create_plot( label_size, label_hypotheticals, remove_other_features_labels, - label_force_list + label_force_list, ): gff = Gff(gff_file) @@ -136,7 +136,7 @@ def create_plot( r_lim=(75, 80), fc=data_dict[key]["col"], ) - # rev + # rev cds_track.genomic_features( data_dict[key]["rev_list"], plotstyle="arrow", @@ -293,31 +293,30 @@ def create_plot( id = f.qualifiers.get("ID", [""])[0] # skip hypotheticals if the flag is false (default) - if id in label_force_list: # if in the list + if id in label_force_list: # if in the list if len(label) > truncate: label = label[:truncate] + "..." pos_list.append(pos) labels.append(label) length_list.append(length) id_list.append(id) - continue # to break if in the list + continue # to break if in the list else: if label_hypotheticals == False: if ( - label == "" - or label.startswith("hypothetical") - or label.startswith("unknown") - ): - continue # if hypothetical not in the list - else: # all others - if len(label) > truncate: + label == "" + or label.startswith("hypothetical") + or label.startswith("unknown") + ): + continue # if hypothetical not in the list + else: # all others + if len(label) > truncate: label = label[:truncate] + "..." pos_list.append(pos) labels.append(label) length_list.append(length) id_list.append(id) - ################################################### #### thin out CDS annotations ################################################### @@ -360,7 +359,6 @@ def create_plot( labels = [labels[i] for i in filtered_indices] length_list = [length_list[i] for i in filtered_indices] - # Plot CDS product labels on outer position cds_track.xticks( pos_list, diff --git a/bin/post_processing.py b/bin/post_processing.py index 1c47fe9..e24bd0b 100644 --- a/bin/post_processing.py +++ b/bin/post_processing.py @@ -11,7 +11,6 @@ from Bio import SeqIO from Bio.SeqUtils import GC from loguru import logger - from processes import convert_gff_to_gbk from util import remove_directory, remove_file, touch_file @@ -529,8 +528,8 @@ def create_gff(self): locus_df["locus_tag"] = locus_df.contig + "_CDS_" + locus_df["count"] # assign count and locus_tag to merged_df (for meta) - self.merged_df["locus_tag"] = locus_df["locus_tag"] - self.merged_df["count"] = locus_df["count"] + self.merged_df["locus_tag"] = locus_df["locus_tag"] + self.merged_df["count"] = locus_df["count"] ################################# ######### diff --git a/bin/processes.py b/bin/processes.py index e27eb7e..4d5a0b1 100644 --- a/bin/processes.py +++ b/bin/processes.py @@ -11,9 +11,8 @@ from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from loguru import logger - from external_tools import ExternalTool +from loguru import logger from util import count_contigs, remove_directory ##### phanotate meta mode ######## diff --git a/bin/proteins.py b/bin/proteins.py index 97a0153..2368b9f 100644 --- a/bin/proteins.py +++ b/bin/proteins.py @@ -15,24 +15,15 @@ import pyhmmer from Bio import SeqIO from Bio.SeqUtils import GC +from external_tools import ExternalTool +from lib.util import (count_contigs, get_contig_headers, get_version, + remove_directory) from loguru import logger +from post_processing import (process_card_results, process_pyhmmer_results, + process_vfdb_results) from pyhmmer.easel import SequenceFile from pyhmmer.plan7 import HMM, HMMFile -from external_tools import ExternalTool -from post_processing import ( - process_card_results, - process_pyhmmer_results, - process_vfdb_results, -) - -from lib.util import ( - count_contigs, - get_contig_headers, - get_version, - remove_directory -) - Result = collections.namedtuple("Result", ["protein", "phrog", "bitscore", "evalue"]) diff --git a/bin/util.py b/bin/util.py index a39ac01..55fed46 100644 --- a/bin/util.py +++ b/bin/util.py @@ -4,9 +4,8 @@ import click import pandas as pd from Bio import SeqIO -from loguru import logger - from citation import __citation__ +from loguru import logger from version import __version__ diff --git a/setup.py b/setup.py index d4ade61..2456b8a 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ import os -from setuptools import setup, find_packages +from setuptools import find_packages, setup def get_version(): @@ -45,17 +45,18 @@ def package_files(directory): "bin/citation.py", "bin/databases.py", "bin/external_tools.py", + "bin/input_commands.py", "bin/hmm.py", "bin/plot.py", "bin/post_processing.py", "bin/processes.py", "bin/proteins.py", "bin/util.py", - "bin/version.py" + "bin/version.py", ], - packages=['pharokka_runner'], - package_dir=dict(pharokka_runner='bin'), - package_data=dict(pharokka_runner=package_files('bin/')), + packages=["pharokka_runner"], + package_dir=dict(pharokka_runner="bin"), + package_data=dict(pharokka_runner=package_files("bin/")), include_package_data=True, license="MIT License", platforms=["Unix"], diff --git a/tests/test_external_commands.py b/tests/test_external_commands.py index c4f6a33..6ddda6c 100644 --- a/tests/test_external_commands.py +++ b/tests/test_external_commands.py @@ -14,14 +14,8 @@ import pytest from loguru import logger -from bin.processes import ( - run_aragorn, - run_mash_sketch, - run_minced, - run_phanotate, - run_pyrodigal, -) - +from bin.processes import (run_aragorn, run_mash_sketch, run_minced, + run_phanotate, run_pyrodigal) # import functions from bin.util import remove_directory diff --git a/tests/test_input_commands.py b/tests/test_input_commands.py index 2b927ae..110203c 100755 --- a/tests/test_input_commands.py +++ b/tests/test_input_commands.py @@ -6,7 +6,6 @@ """ import sys - # import import unittest from pathlib import Path @@ -15,16 +14,10 @@ import pytest from loguru import logger -from bin.input_commands import ( - instantiate_dirs, - validate_fasta, - validate_gene_predictor, - validate_meta, - validate_strand, - validate_terminase, - validate_terminase_start, - validate_threads, -) +from bin.input_commands import (instantiate_dirs, validate_fasta, + validate_gene_predictor, validate_meta, + validate_strand, validate_terminase, + validate_terminase_start, validate_threads) from bin.util import remove_directory # test data diff --git a/tests/test_overall.py b/tests/test_overall.py index cb56fab..fb4105b 100755 --- a/tests/test_overall.py +++ b/tests/test_overall.py @@ -8,7 +8,6 @@ # import import os import shutil - # import functions import subprocess import sys @@ -19,16 +18,10 @@ import pytest from loguru import logger -from bin.input_commands import ( - instantiate_dirs, - validate_fasta, - validate_gene_predictor, - validate_meta, - validate_strand, - validate_terminase, - validate_terminase_start, - validate_threads, -) +from bin.input_commands import (instantiate_dirs, validate_fasta, + validate_gene_predictor, validate_meta, + validate_strand, validate_terminase, + validate_terminase_start, validate_threads) from bin.util import remove_directory # import functions diff --git a/tests/test_proteins.py b/tests/test_proteins.py index 29993af..afc7cdb 100755 --- a/tests/test_proteins.py +++ b/tests/test_proteins.py @@ -8,7 +8,6 @@ # import import os import shutil - # import functions import subprocess import sys @@ -69,7 +68,6 @@ def test_download(tmp_dir): exec_command(cmd) - def test_proteins(tmp_dir): """test pharokka proteins""" input_fasta: Path = f"{proteins_data}/phanotate.faa" @@ -82,22 +80,20 @@ def test_proteins(tmp_dir): def test_proteins_hmm_only(tmp_dir): """test pharokka proteins hmm_only""" input_fasta: Path = f"{proteins_data}/phanotate.faa" - cmd = ( - f"pharokka_proteins.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t 1 -f --hmm_only" - ) + cmd = f"pharokka_proteins.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t 1 -f --hmm_only" exec_command(cmd) + def test_proteins_mmseqs_only(tmp_dir): """test pharokka proteins mmseqs_only""" input_fasta: Path = f"{proteins_data}/phanotate.faa" - cmd = ( - f"pharokka_proteins.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t 1 -f --mmseqs2_only" - ) + cmd = f"pharokka_proteins.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t 1 -f --mmseqs2_only" exec_command(cmd) temp_dir = Path(f"{test_data}/fake_out") + class testFails(unittest.TestCase): """Tests for fails"""