Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved handling of xrefs #208

Merged
merged 8 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,602 changes: 860 additions & 742 deletions docs/source/tutorials/connectomics.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/vfb_connect.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: vfb_connect
Version: 2.2.7.dev7+f46d531.dirty
Version: 2.2.10.dev8+970cf66.dirty
Summary: Wrapper for querying VirtualFlyBrain servers.
Home-page: https://github.com/VirtualFlyBrain/VFB_connect
Author: David Osumi-Sutherland
Expand Down
42 changes: 39 additions & 3 deletions src/vfb_connect/cross_server_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
from colormath.color_conversions import convert_color
from scipy.spatial import KDTree

VFB_DBS_2_SYMBOLS = {"JRC_OpticLobe":"neuprint_JRC_OpticLobe_v1_0_1", "FAFB":"catmaid_fafb", "L1EM":"catmaid_l1em", "MANC":"neuprint_JRC_Manc_1_2_1",
"FlyEM-HB":"neuprint_JRC_Hemibrain_1point1","ol":"neuprint_JRC_OpticLobe_v1_0_1", "fafb":"catmaid_fafb", "l1em":"catmaid_l1em",
"fw":"flywire783", "mv":"neuprint_JRC_Manc_1_2_1", "hb":"neuprint_JRC_Hemibrain_1point1"}


def gen_short_form(iri):
"""Generate short_form (string) from an IRI string.
Expand Down Expand Up @@ -811,6 +815,12 @@ def get_terms_by_xref(self, xrefs: iter, db='', summary=True, return_dataframe=T
`return_dataframe` is `True` and `summary` is `True`.
:rtype: list of dicts or pandas.DataFrame
"""
if isinstance(xrefs, str):
xrefs = [xrefs]

if db in VFB_DBS_2_SYMBOLS.keys():
db = VFB_DBS_2_SYMBOLS[db]

return self.neo_query_wrapper.get_terms_by_xref(xrefs, db=db, summary=summary, return_dataframe=False)

def xref_2_vfb_id(self, acc=None, db='', id_type='', reverse_return=False, return_just_ids=True, verbose=False):
Expand Down Expand Up @@ -843,7 +853,10 @@ def xref_2_vfb_id(self, acc=None, db='', id_type='', reverse_return=False, retur
else:
new_acc.append(xref.split(':')[-1])
acc = new_acc
if db in VFB_DBS_2_SYMBOLS.keys():
db = VFB_DBS_2_SYMBOLS[db]
result = self.neo_query_wrapper.xref_2_vfb_id(acc=acc, db=db, id_type=id_type, reverse_return=reverse_return, verbose=verbose)
print(result) if verbose else None
if return_just_ids & reverse_return:
return [x.key for x in result]
if return_just_ids and not reverse_return:
Expand Down Expand Up @@ -899,8 +912,7 @@ def get_TermInfo(self, short_forms: iter, summary=True, cache=True, return_dataf
print(short_forms) if verbose else None
return self.neo_query_wrapper.get_TermInfo(short_forms, summary=summary, cache=cache, return_dataframe=False, limit=limit, verbose=verbose)

@batch_query
def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
def vfb_id_2_xrefs(self, vfb_id, db='', id_type='', reverse_return=False, verbose=False, datasource_only=True):
"""Map a list of short_form IDs in VFB to external DB IDs

:param vfb_id: An iterable (e.g. a list) of VFB short_form IDs.
Expand All @@ -912,7 +924,31 @@ def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
Return if `reverse_return` is `True`:
dict { acc : [{ db: <db> : vfb_id : <VFB_id> }
"""
return self.neo_query_wrapper.vfb_id_2_xrefs(vfb_id=vfb_id, db=db, id_type=id_type, reverse_return=reverse_return)
if isinstance(vfb_id, str):
vfb_id = [vfb_id]
if db in VFB_DBS_2_SYMBOLS.keys():
db = VFB_DBS_2_SYMBOLS[db]
print(f"vfb_id_2_xrefs: {vfb_id}, {db}, {id_type}, {reverse_return}") if verbose else None
result = self.neo_query_wrapper.vfb_id_2_xrefs(vfb_id=vfb_id, db=db, id_type=id_type, reverse_return=False, verbose=verbose, datasource_only=datasource_only)
print(f"Returned: {result}") if verbose else None
rl = {}
if reverse_return:
for id in vfb_id:
if id not in result.keys():
print(f"No match found for {id}")
else:
for r in result[id]:
rl[":".join([r['db'], r['acc']])] = id
else:
for id in vfb_id:
if id not in result.keys():
print(f"No match found for {id}")
else:
rl[id] = []
for r in result[id]:
rl[id].append(":".join([r['db'], r['acc']]))
print(rl) if verbose else None
return rl

def get_dbs(self, include_symbols=True, data_sources_only=True, verbose=False):
"""Get all external databases in the database, optionally filtering by data sources and including symbols.
Expand Down
9 changes: 7 additions & 2 deletions src/vfb_connect/neo/query_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def get_templates(self, summary=True, return_dataframe=True, include_symbols=Fal
short_forms.extend([d['s'] for d in dc if d['s']])
return self.get_anatomical_individual_TermInfo(short_forms, summary=summary, return_dataframe=return_dataframe)

def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False, verbose=False, datasource_only=False):
"""Map a list of short_form IDs in VFB to external DB IDs

:param vfb_id: An iterable (e.g. a list) of VFB short_form IDs.
Expand All @@ -422,14 +422,19 @@ def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
clause2 = ''
if id_type:
clause2 = "AND r.id_type = '%s'" % id_type
if datasource_only:
clause2 = "AND s.is_data_source = [True]"
ret = "RETURN i.short_form as key, " \
"collect({ db: s.short_form, acc: r.accession[0]}) as mapping"
if reverse_return:
ret = "RETURN r.accession[0] as key, " \
"collect({ db: s.short_form, vfb_id: i.short_form }) as mapping"
"collect({ db: CASE WHEN s.symbol IS NOT NULL AND size(s.symbol) > 0 AND NOT s.symbol[0] = '' THEN s.symbol[0] ELSE s.short_form END, vfb_id: i.short_form }) as mapping"
q = ' '.join([match, clause1, clause2, ret])
print(q) if verbose else None
dc = self._query(q)
print(dc) if verbose else None
mapping = {d['key']: d['mapping'] for d in dc}
print(mapping) if verbose else None
unmapped = set(vfb_id)-set(mapping.keys())
if unmapped:
print("33mWarning:\033[0m The following IDs do not match DB &/or id_type constraints: %s" % str(unmapped))
Expand Down
1 change: 1 addition & 0 deletions src/vfb_connect/schema/test/vfb_term_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ def test_vfbterm_xref(self):
self.assertTrue(term.xref_id)
print(dir(term))
print(term.xref_id)
print(self.vfb.xref_2_vfb_id(term.xref_id, return_just_ids=True, verbose=True))
self.assertEqual(self.vfb.xref_2_vfb_id(term.xref_id, return_just_ids=True)[0], term.id)

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions src/vfb_connect/schema/vfb_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -1667,6 +1667,7 @@ def __init__(self, id=None, term: Optional[Term] = None, related_terms: Optional
if xref.is_data_source:
self.data_source = xref.site_name
self.xref_id = xref.id
self.xref_accession = xref.accession if hasattr(xref, 'accession') else None
self.xref_url = xref.link if hasattr(xref, 'link') and xref.link else xref.homepage
self.xref_name = xref.name

Expand Down
12 changes: 12 additions & 0 deletions src/vfb_connect/test/cross_server_tools_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,18 @@ def test_nt_receptors_in_downstream_neurons(self):
print(bar)
self.assertTrue(len(bar) > 9)

def test_xref_to_id(self):
fu = self.vc.xref_2_vfb_id('FlyEM-HB:1353544607')
self.assertTrue(fu)
print(fu)
self.assertTrue(fu == ['VFB_jrchk3bp'])

def test_id_to_xref(self):
fu = self.vc.vfb_id_2_xrefs('VFB_jrchk3bp', verbose=True)
self.assertTrue(fu)
print(fu)
self.assertNotEqual(fu.keys(),['VFB_jrchk3bp'])

def test_get_neuron_pubs(self):
fu = self.vc.get_neuron_pubs('Kenyon cell')
self.assertTrue(len(fu)> 9)
Expand Down
Loading