Skip to content

Commit

Permalink
Merge pull request #142 from dfornika/names-to-taxids-141
Browse files Browse the repository at this point in the history
Add missing args to initETE3Database
  • Loading branch information
jrober84 authored Jun 30, 2023
2 parents 5126ee7 + 5a5e8af commit af3014b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
2 changes: 1 addition & 1 deletion mob_suite/mob_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def main():
new_seq_info[seq_id]['organism'] = organism

ETE3DBTAXAFILE = os.path.abspath(database_dir + "/taxa.sqlite")
taxids = NamesToTaxIDs(organisms, ETE3DBTAXAFILE)
taxids = NamesToTaxIDs(organisms, ETE3DBTAXAFILE, database_dir)
del(organisms)

for seq_id in new_seq_info:
Expand Down
2 changes: 1 addition & 1 deletion mob_suite/mob_typer.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def main():
record['relaxase_type_accession(s)'] = ",".join(record['relaxase_type_accession(s)'])

host_range = hostrange(record['rep_type(s)'].split(','), record['relaxase_type_accession(s)'].split(','),
mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE)
mob_cluster_id, ncbi, lit, ETE3DBTAXAFILE, database_dir)

for field in host_range:
record[field] = host_range[field]
Expand Down
30 changes: 15 additions & 15 deletions mob_suite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ def filter_invalid_taxids(taxids):
return filtered


def getHeirarchy(taxid,ETE3DBTAXAFILE):
def getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory):
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
initETE3Database()
initETE3Database(database_directory, ETE3DBTAXAFILE)

ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE)
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
Expand All @@ -103,10 +103,10 @@ def getHeirarchy(taxid,ETE3DBTAXAFILE):
return {'names': names, 'ranks': names}


def getTaxid(taxon,ETE3DBTAXAFILE):
def getTaxid(taxon,ETE3DBTAXAFILE,database_directory):
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
initETE3Database()
initETE3Database(database_directory, ETE3DBTAXAFILE)

ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE)
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
Expand All @@ -121,10 +121,10 @@ def getTaxid(taxon,ETE3DBTAXAFILE):



def NamesToTaxIDs(names,ETE3DBTAXAFILE):
def NamesToTaxIDs(names,ETE3DBTAXAFILE,database_directory):
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
initETE3Database(ETE3DBTAXAFILE)
initETE3Database(database_directory, ETE3DBTAXAFILE)

ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE)

Expand All @@ -138,10 +138,10 @@ def NamesToTaxIDs(names,ETE3DBTAXAFILE):



def getTaxonConvergence(taxids,ETE3DBTAXAFILE):
def getTaxonConvergence(taxids,ETE3DBTAXAFILE,database_directory):
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
initETE3Database(ETE3DBTAXAFILE)
initETE3Database(database_directory, ETE3DBTAXAFILE)

ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE)

Expand Down Expand Up @@ -206,7 +206,7 @@ def getTaxonConvergence(taxids,ETE3DBTAXAFILE):
return (['-', '-'])


def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE):
def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit, ETE3DBTAXAFILE, database_directory):
host_range_predictions = {
'observed_host_range_ncbi_name': '',
'observed_host_range_ncbi_rank': '',
Expand Down Expand Up @@ -251,25 +251,25 @@ def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAX
ncbi_unique_taxids = filter_invalid_taxids(
list(set(ncbi_replicon_taxids + ncbi_cluster_taxids + ncbi_relaxase_taxids)))
host_range_predictions['observed_host_range_ncbi_rank'], host_range_predictions[
'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE)
'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE,database_directory)

# Determine taxids associated with literature

lit_unique_taxids = filter_invalid_taxids(list(set(lit_replicon_taxids)))

host_range_predictions['reported_host_range_lit_rank'], host_range_predictions[
'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE)
'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE,database_directory)

# determine overall host range
overall_taxids = filter_invalid_taxids(list(set(ncbi_unique_taxids + lit_unique_taxids)))
host_range_predictions['predicted_host_range_overall_rank'], host_range_predictions[
'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE)
'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE,database_directory)

# move host-range prediction up to family when it is at genus or species level
if host_range_predictions['predicted_host_range_overall_rank'] == 'genus' or host_range_predictions[
'predicted_host_range_overall_rank'] == 'species':
taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE)
heir = getHeirarchy(taxid,ETE3DBTAXAFILE)
taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE,database_directory)
heir = getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory)
names = heir['names']
ranks = heir['ranks']

Expand Down Expand Up @@ -369,7 +369,7 @@ def isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
return True


def initETE3Database(database_directory, ETE3DBTAXAFILE, logging):
def initETE3Database(database_directory, ETE3DBTAXAFILE):
lockfilepath = os.path.join(database_directory, ".lock")

if os.path.exists(lockfilepath) == False:
Expand Down

0 comments on commit af3014b

Please sign in to comment.