Skip to content

Commit b103ca8

Browse files
committed
Address review comments
1 parent 7599e82 commit b103ca8

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

common/ontology.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ def _ontoma_udf(row, ontoma_instance):
1414
for attempt in range(1, ONTOMA_MAX_ATTEMPTS + 1):
1515
# Try to map first by disease name (because that branch of OnToma is more stable), then by disease ID.
1616
try:
17-
mappings = ontoma_instance.find_term(query=disease_name, code=False)
18-
if not mappings:
17+
mappings = []
18+
if disease_name:
19+
mappings = ontoma_instance.find_term(query=disease_name, code=False)
20+
if disease_id and not mappings:
1921
mappings = ontoma_instance.find_term(query=disease_id, code=True)
2022
return [m.id_ot_schema for m in mappings]
2123
except:
@@ -29,7 +31,10 @@ def _ontoma_udf(row, ontoma_instance):
2931
def add_efo_mapping(evidence_strings, spark_instance, ontoma_cache_dir=None):
3032
"""Given evidence strings with diseaseFromSource and diseaseFromSourceId fields, try to populate EFO mapping
3133
field diseaseFromSourceMappedId. In case there are multiple matches, the evidence strings will be exploded
32-
accordingly."""
34+
accordingly.
35+
36+
Currently, both source columns (diseaseFromSource and diseaseFromSourceId) need to be present in the original
37+
schema, although they do not have to be populated for all rows."""
3338
logging.info('Collect all distinct (disease name, disease ID) pairs.')
3439
disease_info_to_map = (
3540
evidence_strings

0 commit comments

Comments
 (0)