File tree 1 file changed +8
-3
lines changed
1 file changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -14,8 +14,10 @@ def _ontoma_udf(row, ontoma_instance):
14
14
for attempt in range (1 , ONTOMA_MAX_ATTEMPTS + 1 ):
15
15
# Try to map first by disease name (because that branch of OnToma is more stable), then by disease ID.
16
16
try :
17
- mappings = ontoma_instance .find_term (query = disease_name , code = False )
18
- if not mappings :
17
+ mappings = []
18
+ if disease_name :
19
+ mappings = ontoma_instance .find_term (query = disease_name , code = False )
20
+ if disease_id and not mappings :
19
21
mappings = ontoma_instance .find_term (query = disease_id , code = True )
20
22
return [m .id_ot_schema for m in mappings ]
21
23
except :
@@ -29,7 +31,10 @@ def _ontoma_udf(row, ontoma_instance):
29
31
def add_efo_mapping (evidence_strings , spark_instance , ontoma_cache_dir = None ):
30
32
"""Given evidence strings with diseaseFromSource and diseaseFromSourceId fields, try to populate EFO mapping
31
33
field diseaseFromSourceMappedId. In case there are multiple matches, the evidence strings will be exploded
32
- accordingly."""
34
+ accordingly.
35
+
36
+ Currently, both source columns (diseaseFromSource and diseaseFromSourceId) need to be present in the original
37
+ schema, although they do not have to be populated for all rows."""
33
38
logging .info ('Collect all distinct (disease name, disease ID) pairs.' )
34
39
disease_info_to_map = (
35
40
evidence_strings
You can’t perform that action at this time.
0 commit comments