From 8f0dee86a0444e54cf8fe37a182e4df5effe7394 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Thu, 23 Sep 2021 13:05:17 -0400 Subject: [PATCH 1/4] Remove obsolete input/output prefixes arguments --- strider/compatibility.py | 2 -- strider/fetcher.py | 2 -- strider/util.py | 6 +----- tests/test_compatibility.py | 2 -- 4 files changed, 1 insertion(+), 11 deletions(-) diff --git a/strider/compatibility.py b/strider/compatibility.py index 3f8f2d14..2446165c 100644 --- a/strider/compatibility.py +++ b/strider/compatibility.py @@ -110,8 +110,6 @@ async def fetch( self, kp_id: str, request: dict, - input_prefixes: dict = None, - output_prefixes: dict = None, ): """Wrap fetch with CURIE mapping(s).""" request = remove_null_values(request) diff --git a/strider/fetcher.py b/strider/fetcher.py index a7c30826..2cb66ea5 100644 --- a/strider/fetcher.py +++ b/strider/fetcher.py @@ -405,8 +405,6 @@ async def setup( details, self.portal, kp_id, - self.kp_preferred_prefixes[kp_id], - self.preferred_prefixes, ) for kp_id, details in kps.items() } diff --git a/strider/util.py b/strider/util.py index ff52162c..70256460 100644 --- a/strider/util.py +++ b/strider/util.py @@ -193,22 +193,18 @@ async def post_json(url, request, logger, log_name): class KnowledgeProvider(): """Knowledge provider.""" - def __init__(self, details, portal, id, in_prefixes, out_prefixes, *args, **kwargs): + def __init__(self, details, portal, id, *args, **kwargs): """Initialize.""" self.details = details self.portal = portal # self.portal: KnowledgePortal = portal self.id = id - self.in_prefixes = in_prefixes - self.out_prefixes = out_prefixes async def solve_onehop(self, request): """Solve one-hop query.""" return await self.portal.fetch( self.id, {"message": {"query_graph": request}}, - self.in_prefixes, - self.out_prefixes, ) diff --git a/tests/test_compatibility.py b/tests/test_compatibility.py index 1861d0bf..1b9527fa 100644 --- a/tests/test_compatibility.py +++ b/tests/test_compatibility.py @@ -306,8 +306,6 @@ async def test_fetch(): response = await portal.fetch( kp_id="ctd", request={"message": {"query_graph": query_graph}}, - input_prefixes=CTD_PREFIXES, - output_prefixes=preferred_prefixes, ) allowed_response_prefixes = [ From 5c7f01951016c4325e6faa42da860f6360ac4128 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Thu, 23 Sep 2021 13:52:01 -0400 Subject: [PATCH 2/4] Send only CURIEs with the best matching prefix --- strider/compatibility.py | 45 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/strider/compatibility.py b/strider/compatibility.py index 2446165c..d7758ee1 100644 --- a/strider/compatibility.py +++ b/strider/compatibility.py @@ -240,17 +240,17 @@ def map_curie( data: dict[str, Entity], prefixes: dict[str, list[str]], logger: logging.Logger = None, - ): + ) -> str: """Map single CURIE.""" try: categories, identifiers = data[curie] except KeyError: return [curie] - prefixes = { + prefixes = list(dict.fromkeys( prefix for category in categories for prefix in prefixes.get(category, []) - } + )) if not prefixes: # no preferred prefixes for these categories logger.debug( @@ -259,25 +259,24 @@ def map_curie( categories, ) ) - return identifiers + prefixes = identifiers[0].split(":")[0] # Find CURIEs beginning with any of prefixes - prefix_identifiers = [ - curie - for curie in identifiers - if any( - curie.startswith(prefix) - for prefix in prefixes - ) - ] - if not prefix_identifiers: - # no preferred curie with these prefixes - logger.debug( - "[{}] Cannot find identifier in {} with a preferred prefix in {}".format( - getattr(logger, "context", ""), - identifiers, - prefixes, - ), - ) - return [curie] - return prefix_identifiers + for prefix in prefixes: + curies = [ + _curie + for _curie in identifiers + if _curie.startswith(prefix) + ] + if curies: + return curies + + # no preferred curie with these prefixes + logger.debug( + "[{}] Cannot find identifier in {} with a preferred prefix in {}".format( + getattr(logger, "context", ""), + identifiers, + prefixes, + ), + ) + return [curie] From 73d157bad8561ea048e48119d3efd797655b802f Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Thu, 23 Sep 2021 13:52:52 -0400 Subject: [PATCH 3/4] Expedite KP comb test with big biolink --- tests/helpers/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py index 3e909fd6..96182755 100644 --- a/tests/helpers/utils.py +++ b/tests/helpers/utils.py @@ -53,7 +53,7 @@ def generate_kps(qty): ) ) - return {str(i): kp for i, kp in enumerate(kp_generator) if i < qty} + return {str(i): kp for i, kp in zip(range(qty), kp_generator)} def query_graph_from_string(s): From 59980fe0048aa9333c69c8ef71a98bddd6722bb4 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Tue, 28 Sep 2021 11:32:05 -0400 Subject: [PATCH 4/4] Improve CURIE-mapping documentation --- strider/compatibility.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/strider/compatibility.py b/strider/compatibility.py index d7758ee1..d379c510 100644 --- a/strider/compatibility.py +++ b/strider/compatibility.py @@ -241,18 +241,23 @@ def map_curie( prefixes: dict[str, list[str]], logger: logging.Logger = None, ) -> str: - """Map single CURIE.""" + """Map a single CURIE to the list of preferred equivalent CURIES. + + 1. Find the most-preferred prefix for which the provided CURIE has synonyms. + 2. Return all synonymous CURIEs that have that prefix. + """ try: categories, identifiers = data[curie] except KeyError: return [curie] + # Gather the preferred prefixes for each category, deduplicating while retaining order prefixes = list(dict.fromkeys( prefix for category in categories for prefix in prefixes.get(category, []) )) if not prefixes: - # no preferred prefixes for these categories + # There are no preferred prefixes for these categories - use the prefixes that Biolink prefers logger.debug( "[{}] Cannot not find preferred prefixes for at least one of: {}".format( getattr(logger, "context", ""), @@ -261,7 +266,7 @@ def map_curie( ) prefixes = identifiers[0].split(":")[0] - # Find CURIEs beginning with any of prefixes + # Find CURIEs beginning with the most-preferred prefix for prefix in prefixes: curies = [ _curie @@ -271,7 +276,7 @@ def map_curie( if curies: return curies - # no preferred curie with these prefixes + # There is no equivalent CURIE with any of the acceptable prefixes - return the original CURIE logger.debug( "[{}] Cannot find identifier in {} with a preferred prefix in {}".format( getattr(logger, "context", ""),