-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Don't send so many synonyms #296
Changes from all commits
8f0dee8
5c7f019
73d157b
59980fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -110,8 +110,6 @@ async def fetch( | |
self, | ||
kp_id: str, | ||
request: dict, | ||
input_prefixes: dict = None, | ||
output_prefixes: dict = None, | ||
): | ||
"""Wrap fetch with CURIE mapping(s).""" | ||
request = remove_null_values(request) | ||
|
@@ -242,44 +240,48 @@ def map_curie( | |
data: dict[str, Entity], | ||
prefixes: dict[str, list[str]], | ||
logger: logging.Logger = None, | ||
): | ||
"""Map single CURIE.""" | ||
) -> str: | ||
"""Map a single CURIE to the list of preferred equivalent CURIES. | ||
|
||
1. Find the most-preferred prefix for which the provided CURIE has synonyms. | ||
2. Return all synonymous CURIEs that have that prefix. | ||
""" | ||
try: | ||
categories, identifiers = data[curie] | ||
except KeyError: | ||
return [curie] | ||
prefixes = { | ||
# Gather the preferred prefixes for each category, deduplicating while retaining order | ||
prefixes = list(dict.fromkeys( | ||
prefix | ||
for category in categories | ||
for prefix in prefixes.get(category, []) | ||
} | ||
)) | ||
if not prefixes: | ||
# no preferred prefixes for these categories | ||
# There are no preferred prefixes for these categories - use the prefixes that Biolink prefers | ||
logger.debug( | ||
"[{}] Cannot not find preferred prefixes for at least one of: {}".format( | ||
getattr(logger, "context", ""), | ||
categories, | ||
) | ||
) | ||
return identifiers | ||
prefixes = identifiers[0].split(":")[0] | ||
|
||
# Find CURIEs beginning with any of prefixes | ||
prefix_identifiers = [ | ||
curie | ||
for curie in identifiers | ||
if any( | ||
curie.startswith(prefix) | ||
for prefix in prefixes | ||
) | ||
] | ||
if not prefix_identifiers: | ||
# no preferred curie with these prefixes | ||
logger.debug( | ||
"[{}] Cannot find identifier in {} with a preferred prefix in {}".format( | ||
getattr(logger, "context", ""), | ||
identifiers, | ||
prefixes, | ||
), | ||
) | ||
return [curie] | ||
return prefix_identifiers | ||
# Find CURIEs beginning with the most-preferred prefix | ||
for prefix in prefixes: | ||
curies = [ | ||
_curie | ||
for _curie in identifiers | ||
if _curie.startswith(prefix) | ||
] | ||
if curies: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this returns curies for the first prefix where curies exist, instead of all curies for all prefixes? If I'm understanding it right, can we document this behavior explicitly in a docstring or comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. |
||
return curies | ||
|
||
# There is no equivalent CURIE with any of the acceptable prefixes - return the original CURIE | ||
logger.debug( | ||
"[{}] Cannot find identifier in {} with a preferred prefix in {}".format( | ||
getattr(logger, "context", ""), | ||
identifiers, | ||
prefixes, | ||
), | ||
) | ||
return [curie] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this just to deduplicate? Maybe switch to
set()
if that's the case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we need to deduplicate but also retain the order, which is why I've done this rather than using
set()
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha. Can we document this in a comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.