Skip to content

Commit

Permalink
Merge pull request #2376 from merenlab/iss2375
Browse files Browse the repository at this point in the history
A global argument to keep all functional annotations per gene (fixes #2375)
  • Loading branch information
meren authored Nov 26, 2024
2 parents f90e2db + 18a567d commit b995ab6
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
3 changes: 3 additions & 0 deletions anvio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
USER_KNOWS_IT_IS_NOT_A_GOOD_IDEA = '--I-know-this-is-not-a-good-idea' in sys.argv
DOCS_PATH = os.path.join(os.path.dirname(__file__), 'docs')
TMP_DIR = None
# global args that we can set internally as needed
RETURN_ALL_FUNCTIONS_FROM_SOURCE_FOR_EACH_GENE = False # set to True if you want all functional annotations from a given annotation source
# instead of the best hit per gene

# if the user wants to use a non-default tmp directory, we set it here
if '--tmp-dir' in sys.argv:
Expand Down
23 changes: 17 additions & 6 deletions anvio/dbops.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,9 @@ def init_functions(self, requested_sources=[], dont_panic=False):
If self.split_names_of_interest has a value, the dictionary only includes gene calls from those splits.
Afterwards, it sets self.gene_function_calls_initiated to True.
Note: the global argument RETURN_ALL_FUNCTIONS_FROM_SOURCE_FOR_EACH_GENE affects the behavior of this function. If False, we get
the best hit per gene (lowest e-value) for a given annotation source. If True, we get all hits.
"""
if not self.contigs_db_path:
return
Expand Down Expand Up @@ -643,13 +646,21 @@ def init_functions(self, requested_sources=[], dont_panic=False):
if gene_callers_id not in self.gene_function_calls_dict:
self.gene_function_calls_dict[gene_callers_id] = dict([(s, None) for s in self.gene_function_call_sources])

if self.gene_function_calls_dict[gene_callers_id][source] and e_value:
if self.gene_function_calls_dict[gene_callers_id][source][2] < e_value:
# 'what we have:', self.gene_function_calls_dict[gene_callers_id][source]
# 'rejected :', ('%s :: %s' % (function if function else 'unknown', accession), e_value)
continue

entry = (accession, '%s' % (function if function else 'unknown'), e_value)

if self.gene_function_calls_dict[gene_callers_id][source]:
if anvio.RETURN_ALL_FUNCTIONS_FROM_SOURCE_FOR_EACH_GENE:
previous_entry_acc, previous_entry_func, previous_entry_evalue = self.gene_function_calls_dict[gene_callers_id][source]
combined_acc = f"{previous_entry_acc}!!!{accession}"
combined_func = f"{previous_entry_func}!!!{entry[1]}"
combined_evalue = f"{previous_entry_evalue}!!!{e_value}"
entry = (combined_acc, combined_func, combined_evalue)
else:
if e_value and self.gene_function_calls_dict[gene_callers_id][source][2] < e_value:
# 'what we have:', self.gene_function_calls_dict[gene_callers_id][source]
# 'rejected :', ('%s :: %s' % (function if function else 'unknown', accession), e_value)
continue

self.gene_function_calls_dict[gene_callers_id][source] = entry

contigs_db.disconnect()
Expand Down
3 changes: 3 additions & 0 deletions bin/anvi-summarize
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def main(args):
if not args.contigs_db:
raise ConfigError("You must provide a contigs database when you summarize anvi'o profiles. True story.")

# set this global arg so that all functional annotations are reflected in the summary output
anvio.RETURN_ALL_FUNCTIONS_FROM_SOURCE_FOR_EACH_GENE = True

summary = summarizer.ProfileSummarizer(args)
else:
raise ConfigError("Well. '%s' is neither an anvi'o pan database, nor an anvi'o profile database. There is nothing this "
Expand Down

0 comments on commit b995ab6

Please sign in to comment.