Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added auto defaults for all hmm*.py, create_dbs.py, and download_eggn… #347

Merged
merged 1 commit into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions create_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
## CPCantalapiedra 2021

import os, sys, shutil
from argparse import ArgumentParser
import argparse

from eggnogmapper.common import set_data_path, get_data_path, pexists, pjoin, existing_dir
from eggnogmapper.utils import ask, ask_name, colorify
Expand All @@ -18,6 +18,10 @@ def get_eggnog_taxid_info_file(): return pjoin(get_data_path(), "e5.taxid_info.t

BASE_URL = f'http://eggnog5.embl.de/download/eggnog_5.0'

class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter):
pass

def run(cmd):
print(colorify(cmd, 'cyan'))
if not args.simulate:
Expand Down Expand Up @@ -110,15 +114,14 @@ def parse_proteins(out_file, proteins_file, taxa_set):
##
# MAIN
if __name__ == "__main__":
parser = ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=CustomFormatter)

parser.add_argument('-m', dest='mode',
choices = ['diamond', 'mmseqs'],
default='diamond',
help=(
'diamond: search seed orthologs using diamond (-i is required). '
'mmseqs: search seed orthologs using MMseqs2 (-i is required). '
'Default:diamond'
'mmseqs: search seed orthologs using MMseqs2 (-i is required).'
))

parser.add_argument('-x', action="store_true", dest='skip_mmseqs_index',
Expand Down
9 changes: 6 additions & 3 deletions download_eggnog_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3

import os, sys
from argparse import ArgumentParser
import argparse

from eggnogmapper.common import get_eggnogdb_file, get_ncbitaxadb_file, get_eggnog_dmnd_db, get_eggnog_mmseqs_dbpath, get_pfam_dbpath, get_hmmer_base_dbpath
from eggnogmapper.common import pexists, set_data_path, get_data_path, existing_dir, HMMPRESS
Expand All @@ -15,6 +15,10 @@
EGGNOG_URL = f'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level'
EGGNOG_DOWNLOADS_URL = 'http://eggnog5.embl.de/#/app/downloads'

class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter):
pass

def run(cmd):
print(colorify(cmd, 'cyan'))
if not args.simulate:
Expand Down Expand Up @@ -166,11 +170,10 @@ def download_hmm_database(level, dbname, dbpath):

return


##
# MAIN
if __name__ == "__main__":
parser = ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=CustomFormatter)

parser.add_argument('-D', action="store_true", dest='skip_diamond',
help='Do not install the diamond database')
Expand Down
36 changes: 18 additions & 18 deletions hmm_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@
from eggnogmapper.search.hmmer.hmmer_search import QUERY_TYPE_SEQ, QUERY_TYPE_HMM, DB_TYPE_SEQ, DB_TYPE_HMM
from eggnogmapper.search.hmmer.hmmer_setup import DEFAULT_PORT, DEFAULT_END_PORT


__description__ = ('A program wrapping HMM in-memory searches')
__author__ = 'Jaime Huerta Cepas'
__license__ = "GPL v2"

class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter):
pass

def create_arg_parser():

parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=CustomFormatter)

parser.add_argument('--version', action='store_true',
help="show version and exit.")
Expand All @@ -34,26 +37,25 @@ def create_arg_parser():
pg_exec = parser.add_argument_group('Execution Options')

pg_exec.add_argument('--cpu', type=int, default=1, metavar='NUM_CPU',
help="Number of CPUs to be used. --cpu 0 to run with all available CPUs. Default: 2")
help="Number of CPUs to be used. --cpu 0 to run with all available CPUs.")

pg_exec.add_argument('--mp_start_method', type=str, default=MP_START_METHOD_DEFAULT,
choices = [MP_START_METHOD_FORK, MP_START_METHOD_SPAWN, MP_START_METHOD_FORKSERVER],
help="Sets the python multiprocessing start method. Check https://docs.python.org/3/library/multiprocessing.html. Only use if the default method is not working properly in your OS. Default: "+str(MP_START_METHOD_DEFAULT))
help="Sets the python multiprocessing start method. Check https://docs.python.org/3/library/multiprocessing.html. Only use if the default method is not working properly in your OS.")

##
pg_input = parser.add_argument_group('Input Data Options')

pg_input.add_argument('-i', dest="input", metavar='FASTA_FILE', type=existing_file,
help=f'Input with queries. Either a FASTA file with sequences (proteins by default; see --translate)'
' or a HMM file with profiles (--qtype hmm)')
' or a HMM file with profiles (--qtype hmm).')

pg_input.add_argument('--translate', action="store_true",
help='Assume input sequences are CDS instead of proteins (it has effect only if --qtype seq, and also when -d is a plain FASTA file)')
help='Assume input sequences are CDS instead of proteins (it has effect only if --qtype seq, and also when -d is a plain FASTA file).')

pg_input.add_argument('--trans_table', dest='trans_table', type=str, metavar='TRANS_TABLE_CODE',
help=(
f"It is used when --translate, check https://biopython.org/docs/1.75/api/Bio.Seq.html#Bio.Seq.Seq.translate. "
))
f"It is used when --translate, check https://biopython.org/docs/1.75/api/Bio.Seq.html#Bio.Seq.Seq.translate."))

##
pg_hmmer = parser.add_argument_group('HMMER Search Options')
Expand All @@ -68,13 +70,11 @@ def create_arg_parser():
"If --servers_list is specified, host and port from -d option will be ignored.")

pg_hmmer.add_argument('--qtype', choices=[QUERY_TYPE_HMM, QUERY_TYPE_SEQ], default=QUERY_TYPE_SEQ,
help="Type of input data (-i). "
f"Default: {QUERY_TYPE_SEQ}")
help="Type of input data (-i).")

pg_hmmer.add_argument('--dbtype', dest="dbtype",
choices=[DB_TYPE_HMM, DB_TYPE_SEQ], default=DB_TYPE_HMM,
help="Type of data in DB (-d). "
f"Default: {DB_TYPE_HMM}")
help="Type of data in DB (-d).")

pg_hmmer.add_argument('--usemem', action="store_true",
help='''Use this option to allocate the whole database (-d) in memory using hmmpgmd.
Expand All @@ -97,26 +97,26 @@ def create_arg_parser():
" By default, cpus specified with --cpu will be distributed among servers and workers.")

pg_hmmer.add_argument('--hmm_maxhits', dest='maxhits', type=int, default=1, metavar='MAXHITS',
help="Max number of hits to report (0 to report all). Default=1.")
help="Max number of hits to report (0 to report all).")

pg_hmmer.add_argument('--report_no_hits', action="store_true",
help="Whether queries without hits should be included in the output table.")

pg_hmmer.add_argument('--hmm_maxseqlen', dest='maxseqlen', type=int, default=5000, metavar='MAXSEQLEN',
help="Ignore query sequences larger than `maxseqlen`. Default=5000")
help="Ignore query sequences larger than `maxseqlen`")

pg_hmmer.add_argument('--hmm_evalue', dest='evalue', default=None, type=float, metavar='MIN_E-VALUE',
help="E-value threshold. For example, -hmm_evalue 0.001. Default=10")
help="E-value threshold. For example, -hmm_evalue 0.001.")

pg_hmmer.add_argument('--hmm_score', dest='score', default=None, type=float, metavar='MIN_SCORE',
help="Bit score threshold. For example, --hmm_score 20. Default=None")
help="Bit score threshold. For example, --hmm_score 20.")

pg_hmmer.add_argument('--hmm_qcov', dest='qcov', type=float, metavar='MIN_QCOV',
help="min query coverage (from 0 to 1). Default=(disabled)")
help="min query coverage (from 0 to 1).")

pg_hmmer.add_argument('--Z', dest='Z', type=float, default=40000000, metavar='DB_SIZE',
help='Fixed database size used in phmmer/hmmscan'
' (allows comparing e-values among databases). Default=40,000,000')
' (allows comparing e-values among databases).')

pg_hmmer.add_argument('--cut_ga', action="store_true",
help="Adds the --cut_ga to hmmer commands (useful for Pfam mappings, for example). See hmmer documentation.")
Expand Down
12 changes: 7 additions & 5 deletions hmm_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@
__author__ = 'Jaime Huerta Cepas'
__license__ = "GPL v2"

class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter):
pass

def create_arg_parser():

parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=CustomFormatter)

parser.add_argument('--version', action='store_true',
help="show version and exit.")
Expand All @@ -33,11 +36,11 @@ def create_arg_parser():
pg_exec = parser.add_argument_group('Execution Options')

pg_exec.add_argument('--cpu', type=int, default=1, metavar='NUM_CPU',
help="Number of CPUs to be used. --cpu 0 to run with all available CPUs. Default: 2")
help="Number of CPUs to be used. --cpu 0 to run with all available CPUs.")

pg_exec.add_argument('--mp_start_method', type=str, default=MP_START_METHOD_DEFAULT,
choices = [MP_START_METHOD_FORK, MP_START_METHOD_SPAWN, MP_START_METHOD_FORKSERVER],
help="Sets the python multiprocessing start method. Check https://docs.python.org/3/library/multiprocessing.html. Only use if the default method is not working properly in your OS. Default: "+str(MP_START_METHOD_DEFAULT))
help="Sets the python multiprocessing start method. Check https://docs.python.org/3/library/multiprocessing.html. Only use if the default method is not working properly in your OS.")

##
pg_server = parser.add_argument_group('HMM Server Options')
Expand All @@ -48,8 +51,7 @@ def create_arg_parser():

pg_server.add_argument('--dbtype', dest="dbtype",
choices=[DB_TYPE_HMM, DB_TYPE_SEQ], default=DB_TYPE_HMM,
help="Type of data in DB (-db). "
f"Default: {DB_TYPE_HMM}")
help="Type of data in DB (-db).")

pg_server.add_argument('-p', '--port', dest='port', type=int, default=DEFAULT_PORT, metavar='PORT',
help=('Port used by clients to connect to this HMM master server'))
Expand Down
10 changes: 7 additions & 3 deletions hmm_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
__author__ = 'Jaime Huerta Cepas'
__license__ = "GPL v2"

class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter):
pass

def create_arg_parser():

parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(formatter_class=CustomFormatter)

parser.add_argument('--version', action='store_true',
help="show version and exit.")
Expand All @@ -30,11 +34,11 @@ def create_arg_parser():
pg_exec = parser.add_argument_group('Execution Options')

pg_exec.add_argument('--cpu', type=int, default=2, metavar='NUM_CPU',
help="Number of CPUs to be used. --cpu 0 to run with all available CPUs. Default: 2")
help="Number of CPUs to be used. --cpu 0 to run with all available CPUs.")

pg_exec.add_argument('--mp_start_method', type=str, default=MP_START_METHOD_DEFAULT,
choices = [MP_START_METHOD_FORK, MP_START_METHOD_SPAWN, MP_START_METHOD_FORKSERVER],
help="Sets the python multiprocessing start method. Check https://docs.python.org/3/library/multiprocessing.html. Only use if the default method is not working properly in your OS. Default: "+str(MP_START_METHOD_DEFAULT))
help="Sets the python multiprocessing start method. Check https://docs.python.org/3/library/multiprocessing.html. Only use if the default method is not working properly in your OS.")

##
pg_master = parser.add_argument_group('HMM Master Server Options')
Expand Down