diff --git a/ehr_risk_kp.yaml b/ehr_risk_kp.yaml index c379d72..ac3ed3d 100644 --- a/ehr_risk_kp.yaml +++ b/ehr_risk_kp.yaml @@ -813,7 +813,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -844,7 +844,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -877,7 +877,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -908,7 +908,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -941,7 +941,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -972,7 +972,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1005,7 +1005,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1036,7 +1036,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1079,7 +1079,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1110,7 +1110,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1143,7 +1143,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1174,7 +1174,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1207,7 +1207,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1238,7 +1238,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1277,7 +1277,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1308,7 +1308,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1345,7 +1345,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1376,7 +1376,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1413,7 +1413,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1444,7 +1444,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1477,7 +1477,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1508,7 +1508,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1541,7 +1541,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1572,7 +1572,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1611,7 +1611,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1642,7 +1642,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1679,7 +1679,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1710,7 +1710,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1743,7 +1743,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1774,7 +1774,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1815,7 +1815,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1846,7 +1846,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1883,7 +1883,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1914,7 +1914,7 @@ components: parameters: fields: >- subject.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1951,7 +1951,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -1982,7 +1982,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2015,7 +2015,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2046,7 +2046,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2079,7 +2079,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2110,7 +2110,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2143,7 +2143,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2174,7 +2174,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2211,7 +2211,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2242,7 +2242,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2277,7 +2277,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2308,7 +2308,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2341,7 +2341,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2372,7 +2372,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2405,7 +2405,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2436,7 +2436,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2469,7 +2469,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2500,7 +2500,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2533,7 +2533,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2564,7 +2564,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2601,7 +2601,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2632,7 +2632,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2669,7 +2669,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2700,7 +2700,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2737,7 +2737,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2768,7 +2768,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2801,7 +2801,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2832,7 +2832,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2865,7 +2865,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2896,7 +2896,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2929,7 +2929,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2960,7 +2960,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -2995,7 +2995,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3026,7 +3026,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3059,7 +3059,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3090,7 +3090,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3125,7 +3125,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3156,7 +3156,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3189,7 +3189,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3220,7 +3220,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3253,7 +3253,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3284,7 +3284,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3317,7 +3317,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3348,7 +3348,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3381,7 +3381,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3412,7 +3412,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3445,7 +3445,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3476,7 +3476,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3511,7 +3511,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3542,7 +3542,7 @@ components: parameters: fields: >- subject.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3579,7 +3579,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3610,7 +3610,7 @@ components: parameters: fields: >- subject.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3647,7 +3647,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3678,7 +3678,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3711,7 +3711,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3742,7 +3742,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3775,7 +3775,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3806,7 +3806,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3839,7 +3839,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3870,7 +3870,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3903,7 +3903,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3934,7 +3934,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3967,7 +3967,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -3998,7 +3998,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4031,7 +4031,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4062,7 +4062,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4097,7 +4097,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4128,7 +4128,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4161,7 +4161,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4192,7 +4192,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4225,7 +4225,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4256,7 +4256,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4289,7 +4289,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4320,7 +4320,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4353,7 +4353,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4384,7 +4384,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4417,7 +4417,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4448,7 +4448,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4483,7 +4483,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4514,7 +4514,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4551,7 +4551,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4582,7 +4582,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4615,7 +4615,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4646,7 +4646,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4681,7 +4681,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4712,7 +4712,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4745,7 +4745,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4776,7 +4776,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4809,7 +4809,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4840,7 +4840,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4873,7 +4873,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4904,7 +4904,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4939,7 +4939,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -4970,7 +4970,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5003,7 +5003,7 @@ components: parameters: fields: >- object.MONDO, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5034,7 +5034,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5069,7 +5069,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5100,7 +5100,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5133,7 +5133,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5164,7 +5164,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5197,7 +5197,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5228,7 +5228,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5261,7 +5261,7 @@ components: parameters: fields: >- object.SNOMEDCT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5292,7 +5292,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5325,7 +5325,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5356,7 +5356,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5389,7 +5389,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5420,7 +5420,7 @@ components: parameters: fields: >- subject.UNII, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5455,7 +5455,7 @@ components: parameters: fields: >- object.HP, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5486,7 +5486,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5519,7 +5519,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5550,7 +5550,7 @@ components: parameters: fields: >- subject.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5585,7 +5585,7 @@ components: parameters: fields: >- object.NCIT, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5616,7 +5616,7 @@ components: parameters: fields: >- subject.CHEBI, - association.edge_attributes, + association.edge_attributes,source.edge_sources, subject.name,object.name size: 1000 outputs: @@ -5641,58 +5641,70 @@ components: input_name: subject.name output_name: object.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources object-NCIT: NCIT: object.NCIT input_name: subject.name output_name: object.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources object-SNOMEDCT: SNOMEDCT: object.SNOMEDCT input_name: subject.name output_name: object.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources object-HP: HP: object.HP input_name: subject.name output_name: object.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources object-CHEBI: CHEBI: object.CHEBI input_name: subject.name output_name: object.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources object-UNII: UNII: object.UNII input_name: subject.name output_name: object.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources subject-MONDO: MONDO: subject.MONDO input_name: object.name output_name: subject.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources subject-NCIT: NCIT: subject.NCIT input_name: object.name output_name: subject.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources subject-SNOMEDCT: SNOMEDCT: subject.SNOMEDCT input_name: object.name output_name: subject.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources subject-HP: HP: subject.HP input_name: object.name output_name: subject.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources subject-CHEBI: CHEBI: subject.CHEBI input_name: object.name output_name: subject.name edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources subject-UNII: UNII: subject.UNII input_name: object.name output_name: subject.name - edge-attributes: association.edge_attributes \ No newline at end of file + edge-attributes: association.edge_attributes + trapi_sources: source.edge_sources diff --git a/legacy_parsers/EHR_Risk_parser.ipynb b/legacy_parsers/EHR_Risk_parser.ipynb new file mode 100644 index 0000000..6e1564d --- /dev/null +++ b/legacy_parsers/EHR_Risk_parser.ipynb @@ -0,0 +1,12376 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "4d2996ec", + "metadata": {}, + "outputs": [], + "source": [ + "# qualifiers to include: https://github.com/biolink/biolink-model/issues/1050" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0dc9bbef", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import json\n", + "import sys, os\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "92478b80", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecategoryxrefprovided_by
0HP:0025181Abdominal aseptic abscessbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
1HP:0002027Abdominal painbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
2HP:0003115Abnormal EKGbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
3HP:0006919Abnormal aggressive, impulsive or violent beha...biolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
4HP:0003119Abnormal circulating lipid concentrationbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
..................
801CHEBI:27300vitamin dbiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
802CHEBI:33234vitamin ebiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
803CHEBI:87732warfarinbiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
804CHEBI:36560zinc oxidebiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
805CHEBI:10125zolpidembiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
\n", + "

806 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id name \\\n", + "0 HP:0025181 Abdominal aseptic abscess \n", + "1 HP:0002027 Abdominal pain \n", + "2 HP:0003115 Abnormal EKG \n", + "3 HP:0006919 Abnormal aggressive, impulsive or violent beha... \n", + "4 HP:0003119 Abnormal circulating lipid concentration \n", + ".. ... ... \n", + "801 CHEBI:27300 vitamin d \n", + "802 CHEBI:33234 vitamin e \n", + "803 CHEBI:87732 warfarin \n", + "804 CHEBI:36560 zinc oxide \n", + "805 CHEBI:10125 zolpidem \n", + "\n", + " category xref provided_by \n", + "0 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "1 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "2 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "3 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "4 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + ".. ... ... ... \n", + "801 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "802 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "803 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "804 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "805 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "\n", + "[806 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# nodes_filepath = os.path.join(\"../../data\", \"ehr_risk_nodes_data_2023_03_09.csv\")\n", + "# edges_filepath = os.path.join(\"../../data\", \"ehr_risk_edges_data_2023_03_09.csv\")\n", + "nodes_filepath = os.path.join(\"../../data\", \"ehr_risk_nodes_data_2022_06_01.csv\")\n", + "edges_filepath = os.path.join(\"../../data\", \"ehr_risk_edges_data_2022_06_01.csv\")\n", + "nodes_data = pd.read_csv(nodes_filepath, sep = ',')\n", + "edges_data = pd.read_csv(edges_filepath, sep = ',')\n", + "\n", + "nodes_data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0cb5c158", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecategoryxrefprovided_by
511MONDO:0009421hypogonadism, malebiolink:DiseaseNaNEHR Risk Provider (Multiomics)
\n", + "
" + ], + "text/plain": [ + " id name category xref \\\n", + "511 MONDO:0009421 hypogonadism, male biolink:Disease NaN \n", + "\n", + " provided_by \n", + "511 EHR Risk Provider (Multiomics) " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nodes_data.loc[nodes_data['name'].isin(['hypogonadism, male'])]" + ] + }, + { + "cell_type": "markdown", + "id": "0a92a7a5", + "metadata": {}, + "source": [ + "# There appears to be duplicate CURIEs \n", + "## This should be fixed at the enclave level" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d9673bb6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecategoryxrefprovided_by
0HP:0025181Abdominal aseptic abscessbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
1HP:0002027Abdominal painbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
2HP:0003115Abnormal EKGbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
3HP:0006919Abnormal aggressive, impulsive or violent beha...biolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
4HP:0003119Abnormal circulating lipid concentrationbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
..................
801CHEBI:27300vitamin dbiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
802CHEBI:33234vitamin ebiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
803CHEBI:87732warfarinbiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
804CHEBI:36560zinc oxidebiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
805CHEBI:10125zolpidembiolink:ChemicalSubstanceNaNEHR Risk Provider (Multiomics)
\n", + "

804 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id name \\\n", + "0 HP:0025181 Abdominal aseptic abscess \n", + "1 HP:0002027 Abdominal pain \n", + "2 HP:0003115 Abnormal EKG \n", + "3 HP:0006919 Abnormal aggressive, impulsive or violent beha... \n", + "4 HP:0003119 Abnormal circulating lipid concentration \n", + ".. ... ... \n", + "801 CHEBI:27300 vitamin d \n", + "802 CHEBI:33234 vitamin e \n", + "803 CHEBI:87732 warfarin \n", + "804 CHEBI:36560 zinc oxide \n", + "805 CHEBI:10125 zolpidem \n", + "\n", + " category xref provided_by \n", + "0 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "1 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "2 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "3 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "4 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + ".. ... ... ... \n", + "801 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "802 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "803 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "804 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "805 biolink:ChemicalSubstance NaN EHR Risk Provider (Multiomics) \n", + "\n", + "[804 rows x 5 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# there appears to be multiple CURIEs given for a node of the same name. Get just 1 CURIE for each name\n", + "nodes_data = nodes_data.drop_duplicates(subset='id', keep=\"first\")\n", + "nodes_data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3eec3454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecategoryxrefprovided_by
0HP:0025181Abdominal aseptic abscessbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
1HP:0002027Abdominal painbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
2HP:0003115Abnormal EKGbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
3HP:0006919Abnormal aggressive, impulsive or violent beha...biolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
4HP:0003119Abnormal circulating lipid concentrationbiolink:PhenotypicFeatureNaNEHR Risk Provider (Multiomics)
..................
801CHEBI:27300vitamin dbiolink:ChemicalEntityNaNEHR Risk Provider (Multiomics)
802CHEBI:33234vitamin ebiolink:ChemicalEntityNaNEHR Risk Provider (Multiomics)
803CHEBI:87732warfarinbiolink:ChemicalEntityNaNEHR Risk Provider (Multiomics)
804CHEBI:36560zinc oxidebiolink:ChemicalEntityNaNEHR Risk Provider (Multiomics)
805CHEBI:10125zolpidembiolink:ChemicalEntityNaNEHR Risk Provider (Multiomics)
\n", + "

804 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id name \\\n", + "0 HP:0025181 Abdominal aseptic abscess \n", + "1 HP:0002027 Abdominal pain \n", + "2 HP:0003115 Abnormal EKG \n", + "3 HP:0006919 Abnormal aggressive, impulsive or violent beha... \n", + "4 HP:0003119 Abnormal circulating lipid concentration \n", + ".. ... ... \n", + "801 CHEBI:27300 vitamin d \n", + "802 CHEBI:33234 vitamin e \n", + "803 CHEBI:87732 warfarin \n", + "804 CHEBI:36560 zinc oxide \n", + "805 CHEBI:10125 zolpidem \n", + "\n", + " category xref provided_by \n", + "0 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "1 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "2 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "3 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + "4 biolink:PhenotypicFeature NaN EHR Risk Provider (Multiomics) \n", + ".. ... ... ... \n", + "801 biolink:ChemicalEntity NaN EHR Risk Provider (Multiomics) \n", + "802 biolink:ChemicalEntity NaN EHR Risk Provider (Multiomics) \n", + "803 biolink:ChemicalEntity NaN EHR Risk Provider (Multiomics) \n", + "804 biolink:ChemicalEntity NaN EHR Risk Provider (Multiomics) \n", + "805 biolink:ChemicalEntity NaN EHR Risk Provider (Multiomics) \n", + "\n", + "[804 rows x 5 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# biolink:ChemicalSubstance has been depecrated. Use biolink:ChemicalEntity instead (change all values ChemicalSubstance--->ChemialEntity)\n", + "nodes_data[\"category\"].mask(nodes_data[\"category\"] == \"biolink:ChemicalSubstance\", \"biolink:ChemicalEntity\" , inplace=True )\n", + "nodes_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "af3fe28d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "804" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check if the xref column is all NaN or empty\n", + "# get count of null values \n", + "nodes_data['xref'].isna().sum()\n", + "\n", + "# since xref is empty, we won't use it" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d3bdd0e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datecategoryclassifierauc_rocp_valuefeature_importancefeature_coefficientlog_positive_patient_countlog_negative_patient_count
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000NaN8.79639957
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872NaN8.58521257
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767NaN4.55817757
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563NaN4.35752257
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959NaN3.92606457
.............................................
237099MONDO:0013600biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.245984NaN-0.79041857
237100MONDO:0016264biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.984249NaN-0.79608557
237101MONDO:0004565biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.820564NaN-0.80397357
237102CHEBI:28864biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.472603NaN-0.82257557
237103HP:0011947biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.000207NaN-0.82573157
\n", + "

237104 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 MONDO:0013600 biolink:associated_with_decreased_likelihood_of \n", + "237100 MONDO:0016264 biolink:associated_with_decreased_likelihood_of \n", + "237101 MONDO:0004565 biolink:associated_with_decreased_likelihood_of \n", + "237102 CHEBI:28864 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0011947 biolink:associated_with_decreased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " category classifier auc_roc p_value \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 \n", + "... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.956860 0.245984 \n", + "237100 biolink:Association Logistic Regression 0.956860 0.984249 \n", + "237101 biolink:Association Logistic Regression 0.956860 0.820564 \n", + "237102 biolink:Association Logistic Regression 0.956860 0.472603 \n", + "237103 biolink:Association Logistic Regression 0.956860 0.000207 \n", + "\n", + " feature_importance feature_coefficient log_positive_patient_count \\\n", + "0 NaN 8.796399 5 \n", + "1 NaN 8.585212 5 \n", + "2 NaN 4.558177 5 \n", + "3 NaN 4.357522 5 \n", + "4 NaN 3.926064 5 \n", + "... ... ... ... \n", + "237099 NaN -0.790418 5 \n", + "237100 NaN -0.796085 5 \n", + "237101 NaN -0.803973 5 \n", + "237102 NaN -0.822575 5 \n", + "237103 NaN -0.825731 5 \n", + "\n", + " log_negative_patient_count \n", + "0 7 \n", + "1 7 \n", + "2 7 \n", + "3 7 \n", + "4 7 \n", + "... ... \n", + "237099 7 \n", + "237100 7 \n", + "237101 7 \n", + "237102 7 \n", + "237103 7 \n", + "\n", + "[237104 rows x 14 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "edges_data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "29d182f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['biolink:associated_with_increased_likelihood_of',\n", + " 'biolink:associated_with_decreased_likelihood_of'], dtype=object)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sanity check on predicates\n", + "edges_data['predicate'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2f5092be", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datecategoryclassifierauc_rocp_valuefeature_importancefeature_coefficientnum_patients_with_conditionnum_patients_without_condition
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000NaN8.796399996699999902
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872NaN8.5852129993810000835
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767NaN4.5581779977010000939
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563NaN4.357522999109998659
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959NaN3.9260641002429998750
.............................................
237099MONDO:0013600biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.245984NaN-0.790418999229995233
237100MONDO:0016264biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.984249NaN-0.7960859982210002245
237101MONDO:0004565biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.820564NaN-0.803973999379999068
237102CHEBI:28864biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.472603NaN-0.8225759958810000517
237103HP:0011947biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.000207NaN-0.8257311000739995547
\n", + "

237104 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 MONDO:0013600 biolink:associated_with_decreased_likelihood_of \n", + "237100 MONDO:0016264 biolink:associated_with_decreased_likelihood_of \n", + "237101 MONDO:0004565 biolink:associated_with_decreased_likelihood_of \n", + "237102 CHEBI:28864 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0011947 biolink:associated_with_decreased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " category classifier auc_roc p_value \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 \n", + "... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.956860 0.245984 \n", + "237100 biolink:Association Logistic Regression 0.956860 0.984249 \n", + "237101 biolink:Association Logistic Regression 0.956860 0.820564 \n", + "237102 biolink:Association Logistic Regression 0.956860 0.472603 \n", + "237103 biolink:Association Logistic Regression 0.956860 0.000207 \n", + "\n", + " feature_importance feature_coefficient num_patients_with_condition \\\n", + "0 NaN 8.796399 99669 \n", + "1 NaN 8.585212 99938 \n", + "2 NaN 4.558177 99770 \n", + "3 NaN 4.357522 99910 \n", + "4 NaN 3.926064 100242 \n", + "... ... ... ... \n", + "237099 NaN -0.790418 99922 \n", + "237100 NaN -0.796085 99822 \n", + "237101 NaN -0.803973 99937 \n", + "237102 NaN -0.822575 99588 \n", + "237103 NaN -0.825731 100073 \n", + "\n", + " num_patients_without_condition \n", + "0 9999902 \n", + "1 10000835 \n", + "2 10000939 \n", + "3 9998659 \n", + "4 9998750 \n", + "... ... \n", + "237099 9995233 \n", + "237100 10002245 \n", + "237101 9999068 \n", + "237102 10000517 \n", + "237103 9995547 \n", + "\n", + "[237104 rows x 14 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "edges_data[\"num_patients_with_condition\"] = 10**(edges_data['log_positive_patient_count']) # convert log pos patient count to an actual # \n", + "edges_data[\"num_patients_without_condition\"] = 10**(edges_data['log_negative_patient_count']) # convert log neg patient count to an actual # \n", + "edges_data[\"num_patients_with_condition\"] = np.random.poisson(edges_data['num_patients_with_condition']) # add poisson noise injection\n", + "edges_data[\"num_patients_without_condition\"] = np.random.poisson(edges_data['num_patients_without_condition']) # add poisson noise injection\n", + "edges_data = edges_data.drop(['log_positive_patient_count', 'log_negative_patient_count'], axis=1)\n", + "edges_data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ef4fb895", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datecategoryclassifierauc_rocp_valuefeature_importancefeature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozenset
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000NaN8.796399996699999902(HP:0000360, HP:0008629)
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872NaN8.5852129993810000835(HP:0000360, MONDO:0010643)
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767NaN4.5581779977010000939(UNII:25ADE2236L, HP:0000360)
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563NaN4.357522999109998659(HP:0000360, UNII:K16AIQ8CTM)
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959NaN3.9260641002429998750(MONDO:0007972, HP:0000360)
................................................
237099MONDO:0013600biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.245984NaN-0.790418999229995233(MONDO:0013600, HP:0033106)
237100MONDO:0016264biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.984249NaN-0.7960859982210002245(MONDO:0016264, HP:0033106)
237101MONDO:0004565biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.820564NaN-0.803973999379999068(HP:0033106, MONDO:0004565)
237102CHEBI:28864biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.472603NaN-0.8225759958810000517(CHEBI:28864, HP:0033106)
237103HP:0011947biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.000207NaN-0.8257311000739995547(HP:0011947, HP:0033106)
\n", + "

237104 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 MONDO:0013600 biolink:associated_with_decreased_likelihood_of \n", + "237100 MONDO:0016264 biolink:associated_with_decreased_likelihood_of \n", + "237101 MONDO:0004565 biolink:associated_with_decreased_likelihood_of \n", + "237102 CHEBI:28864 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0011947 biolink:associated_with_decreased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " category classifier auc_roc p_value \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 \n", + "... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.956860 0.245984 \n", + "237100 biolink:Association Logistic Regression 0.956860 0.984249 \n", + "237101 biolink:Association Logistic Regression 0.956860 0.820564 \n", + "237102 biolink:Association Logistic Regression 0.956860 0.472603 \n", + "237103 biolink:Association Logistic Regression 0.956860 0.000207 \n", + "\n", + " feature_importance feature_coefficient num_patients_with_condition \\\n", + "0 NaN 8.796399 99669 \n", + "1 NaN 8.585212 99938 \n", + "2 NaN 4.558177 99770 \n", + "3 NaN 4.357522 99910 \n", + "4 NaN 3.926064 100242 \n", + "... ... ... ... \n", + "237099 NaN -0.790418 99922 \n", + "237100 NaN -0.796085 99822 \n", + "237101 NaN -0.803973 99937 \n", + "237102 NaN -0.822575 99588 \n", + "237103 NaN -0.825731 100073 \n", + "\n", + " num_patients_without_condition nodes_frozenset \n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 9995233 (MONDO:0013600, HP:0033106) \n", + "237100 10002245 (MONDO:0016264, HP:0033106) \n", + "237101 9999068 (HP:0033106, MONDO:0004565) \n", + "237102 10000517 (CHEBI:28864, HP:0033106) \n", + "237103 9995547 (HP:0011947, HP:0033106) \n", + "\n", + "[237104 rows x 15 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert columns A and B to frozenset\n", + "# df['AB'] = df[['A', 'B']].apply(frozenset, axis=1)\n", + "edges_data['nodes_frozenset'] = edges_data[['subject', 'object']].apply(frozenset, axis=1)\n", + "edges_data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "610615b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datecategoryclassifierauc_rocp_valuefeature_importancefeature_coefficientnum_patients_with_conditionnum_patients_without_condition
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000NaN8.796399996699999902
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872NaN8.5852129993810000835
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767NaN4.5581779977010000939
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563NaN4.357522999109998659
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959NaN3.9260641002429998750
.............................................
237099MONDO:0013600biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.245984NaN-0.790418999229995233
237100MONDO:0016264biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.984249NaN-0.7960859982210002245
237101MONDO:0004565biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.820564NaN-0.803973999379999068
237102CHEBI:28864biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.472603NaN-0.8225759958810000517
237103HP:0011947biolink:associated_with_decreased_likelihood_ofHP:0033106RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9568600.000207NaN-0.8257311000739995547
\n", + "

237104 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 MONDO:0013600 biolink:associated_with_decreased_likelihood_of \n", + "237100 MONDO:0016264 biolink:associated_with_decreased_likelihood_of \n", + "237101 MONDO:0004565 biolink:associated_with_decreased_likelihood_of \n", + "237102 CHEBI:28864 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0011947 biolink:associated_with_decreased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0033106 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " category classifier auc_roc p_value \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 \n", + "... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.956860 0.245984 \n", + "237100 biolink:Association Logistic Regression 0.956860 0.984249 \n", + "237101 biolink:Association Logistic Regression 0.956860 0.820564 \n", + "237102 biolink:Association Logistic Regression 0.956860 0.472603 \n", + "237103 biolink:Association Logistic Regression 0.956860 0.000207 \n", + "\n", + " feature_importance feature_coefficient num_patients_with_condition \\\n", + "0 NaN 8.796399 99669 \n", + "1 NaN 8.585212 99938 \n", + "2 NaN 4.558177 99770 \n", + "3 NaN 4.357522 99910 \n", + "4 NaN 3.926064 100242 \n", + "... ... ... ... \n", + "237099 NaN -0.790418 99922 \n", + "237100 NaN -0.796085 99822 \n", + "237101 NaN -0.803973 99937 \n", + "237102 NaN -0.822575 99588 \n", + "237103 NaN -0.825731 100073 \n", + "\n", + " num_patients_without_condition \n", + "0 9999902 \n", + "1 10000835 \n", + "2 10000939 \n", + "3 9998659 \n", + "4 9998750 \n", + "... ... \n", + "237099 9995233 \n", + "237100 10002245 \n", + "237101 9999068 \n", + "237102 10000517 \n", + "237103 9995547 \n", + "\n", + "[237104 rows x 14 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Find duplicate rows based on frozenset of columns A and B\n", + "duplicate_rows = edges_data.duplicated(['nodes_frozenset', 'auc_roc', 'p_value', 'feature_coefficient'])\n", + "\n", + "# Filter the DataFrame to remove duplicate rows\n", + "test = edges_data[~duplicate_rows]\n", + "\n", + "test = test.drop(columns='nodes_frozenset')\n", + "\n", + "test" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "dc4252fb", + "metadata": {}, + "outputs": [], + "source": [ + "# # create confidence interval column by concatenating 'lower_confidence_bound'and 'upper_confidence_bound', then dropping those columns\n", + "# edges_data['log_odds_ratio_95_confidence_interval'] = edges_data.apply(lambda row: [row['lower_confidence_bound'], row['upper_confidence_bound']], axis=1)\n", + "# edges_data = edges_data.drop(['lower_confidence_bound', 'upper_confidence_bound'], axis=1)\n", + "# edges_data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8531ffa3", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datecategory_xclassifierauc_rocp_valuefeature_importancefeature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetidnamecategory_y
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000NaN8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
1HP:0008629biolink:associated_with_decreased_likelihood_ofHP:0000739RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8761000.985604NaN-0.530062998419999688(HP:0008629, HP:0000739)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
2HP:0008629biolink:associated_with_decreased_likelihood_ofHP:0000787RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8411020.981654NaN-1.18066910035410002196(HP:0008629, HP:0000787)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
3HP:0008629biolink:associated_with_decreased_likelihood_ofHP:0000790RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9108380.960982NaN-1.2000199983510001455(HP:0008629, HP:0000790)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
4HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000870RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8845800.988524NaN1.4326119689996545(HP:0008629, HP:0000870)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
.........................................................
237099HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0032312RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9214170.927675NaN-0.1716079972110001233(HP:0000360, HP:0032312)HP:0000360Tinnitusbiolink:PhenotypicFeature
237100HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0032372RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9900230.962012NaN-2.81756810419998402(HP:0000360, HP:0032372)HP:0000360Tinnitusbiolink:PhenotypicFeature
237101HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0032473RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9632060.989129NaN-5.438955810000998(HP:0000360, HP:0032473)HP:0000360Tinnitusbiolink:PhenotypicFeature
237102HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0033077RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9872300.541701NaN-1.01730910054810003263(HP:0000360, HP:0033077)HP:0000360Tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0033078RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9341150.807916NaN-0.92952410034510001251(HP:0000360, HP:0033078)HP:0000360Tinnitusbiolink:PhenotypicFeature
\n", + "

237104 rows × 18 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 HP:0008629 biolink:associated_with_decreased_likelihood_of \n", + "2 HP:0008629 biolink:associated_with_decreased_likelihood_of \n", + "3 HP:0008629 biolink:associated_with_decreased_likelihood_of \n", + "4 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237100 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237101 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237102 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000739 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000787 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000790 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000870 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0032312 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0032372 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0032473 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0033077 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0033078 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " category_x classifier auc_roc p_value \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 \n", + "1 biolink:Association Logistic Regression 0.876100 0.985604 \n", + "2 biolink:Association Logistic Regression 0.841102 0.981654 \n", + "3 biolink:Association Logistic Regression 0.910838 0.960982 \n", + "4 biolink:Association Logistic Regression 0.884580 0.988524 \n", + "... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.921417 0.927675 \n", + "237100 biolink:Association Logistic Regression 0.990023 0.962012 \n", + "237101 biolink:Association Logistic Regression 0.963206 0.989129 \n", + "237102 biolink:Association Logistic Regression 0.987230 0.541701 \n", + "237103 biolink:Association Logistic Regression 0.934115 0.807916 \n", + "\n", + " feature_importance feature_coefficient num_patients_with_condition \\\n", + "0 NaN 8.796399 99669 \n", + "1 NaN -0.530062 99841 \n", + "2 NaN -1.180669 100354 \n", + "3 NaN -1.200019 99835 \n", + "4 NaN 1.432611 968 \n", + "... ... ... ... \n", + "237099 NaN -0.171607 99721 \n", + "237100 NaN -2.817568 1041 \n", + "237101 NaN -5.438955 8 \n", + "237102 NaN -1.017309 100548 \n", + "237103 NaN -0.929524 100345 \n", + "\n", + " num_patients_without_condition nodes_frozenset id \\\n", + "0 9999902 (HP:0000360, HP:0008629) HP:0008629 \n", + "1 9999688 (HP:0008629, HP:0000739) HP:0008629 \n", + "2 10002196 (HP:0008629, HP:0000787) HP:0008629 \n", + "3 10001455 (HP:0008629, HP:0000790) HP:0008629 \n", + "4 9996545 (HP:0008629, HP:0000870) HP:0008629 \n", + "... ... ... ... \n", + "237099 10001233 (HP:0000360, HP:0032312) HP:0000360 \n", + "237100 9998402 (HP:0000360, HP:0032372) HP:0000360 \n", + "237101 10000998 (HP:0000360, HP:0032473) HP:0000360 \n", + "237102 10003263 (HP:0000360, HP:0033077) HP:0000360 \n", + "237103 10001251 (HP:0000360, HP:0033078) HP:0000360 \n", + "\n", + " name category_y \n", + "0 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "1 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "2 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "3 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "4 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "... ... ... \n", + "237099 Tinnitus biolink:PhenotypicFeature \n", + "237100 Tinnitus biolink:PhenotypicFeature \n", + "237101 Tinnitus biolink:PhenotypicFeature \n", + "237102 Tinnitus biolink:PhenotypicFeature \n", + "237103 Tinnitus biolink:PhenotypicFeature \n", + "\n", + "[237104 rows x 18 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kg = pd.merge(edges_data, nodes_data[['id', 'name', 'category']], left_on='subject', right_on = 'id', how=\"inner\")\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "60ab6b78", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_valuefeature_importancefeature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_category
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000NaN8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
1HP:0008629biolink:associated_with_decreased_likelihood_ofHP:0000739RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8761000.985604NaN-0.530062998419999688(HP:0008629, HP:0000739)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
2HP:0008629biolink:associated_with_decreased_likelihood_ofHP:0000787RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8411020.981654NaN-1.18066910035410002196(HP:0008629, HP:0000787)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
3HP:0008629biolink:associated_with_decreased_likelihood_ofHP:0000790RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9108380.960982NaN-1.2000199983510001455(HP:0008629, HP:0000790)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
4HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000870RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8845800.988524NaN1.4326119689996545(HP:0008629, HP:0000870)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
.........................................................
237099HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0032312RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9214170.927675NaN-0.1716079972110001233(HP:0000360, HP:0032312)HP:0000360Tinnitusbiolink:PhenotypicFeature
237100HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0032372RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9900230.962012NaN-2.81756810419998402(HP:0000360, HP:0032372)HP:0000360Tinnitusbiolink:PhenotypicFeature
237101HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0032473RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9632060.989129NaN-5.438955810000998(HP:0000360, HP:0032473)HP:0000360Tinnitusbiolink:PhenotypicFeature
237102HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0033077RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9872300.541701NaN-1.01730910054810003263(HP:0000360, HP:0033077)HP:0000360Tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_decreased_likelihood_ofHP:0033078RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9341150.807916NaN-0.92952410034510001251(HP:0000360, HP:0033078)HP:0000360Tinnitusbiolink:PhenotypicFeature
\n", + "

237104 rows × 18 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 HP:0008629 biolink:associated_with_decreased_likelihood_of \n", + "2 HP:0008629 biolink:associated_with_decreased_likelihood_of \n", + "3 HP:0008629 biolink:associated_with_decreased_likelihood_of \n", + "4 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237100 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237101 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237102 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_decreased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000739 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000787 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000790 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000870 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0032312 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0032372 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0032473 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0033077 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0033078 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 \n", + "1 biolink:Association Logistic Regression 0.876100 0.985604 \n", + "2 biolink:Association Logistic Regression 0.841102 0.981654 \n", + "3 biolink:Association Logistic Regression 0.910838 0.960982 \n", + "4 biolink:Association Logistic Regression 0.884580 0.988524 \n", + "... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.921417 0.927675 \n", + "237100 biolink:Association Logistic Regression 0.990023 0.962012 \n", + "237101 biolink:Association Logistic Regression 0.963206 0.989129 \n", + "237102 biolink:Association Logistic Regression 0.987230 0.541701 \n", + "237103 biolink:Association Logistic Regression 0.934115 0.807916 \n", + "\n", + " feature_importance feature_coefficient num_patients_with_condition \\\n", + "0 NaN 8.796399 99669 \n", + "1 NaN -0.530062 99841 \n", + "2 NaN -1.180669 100354 \n", + "3 NaN -1.200019 99835 \n", + "4 NaN 1.432611 968 \n", + "... ... ... ... \n", + "237099 NaN -0.171607 99721 \n", + "237100 NaN -2.817568 1041 \n", + "237101 NaN -5.438955 8 \n", + "237102 NaN -1.017309 100548 \n", + "237103 NaN -0.929524 100345 \n", + "\n", + " num_patients_without_condition nodes_frozenset subject_id \\\n", + "0 9999902 (HP:0000360, HP:0008629) HP:0008629 \n", + "1 9999688 (HP:0008629, HP:0000739) HP:0008629 \n", + "2 10002196 (HP:0008629, HP:0000787) HP:0008629 \n", + "3 10001455 (HP:0008629, HP:0000790) HP:0008629 \n", + "4 9996545 (HP:0008629, HP:0000870) HP:0008629 \n", + "... ... ... ... \n", + "237099 10001233 (HP:0000360, HP:0032312) HP:0000360 \n", + "237100 9998402 (HP:0000360, HP:0032372) HP:0000360 \n", + "237101 10000998 (HP:0000360, HP:0032473) HP:0000360 \n", + "237102 10003263 (HP:0000360, HP:0033077) HP:0000360 \n", + "237103 10001251 (HP:0000360, HP:0033078) HP:0000360 \n", + "\n", + " subject_name subject_category \n", + "0 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "1 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "2 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "3 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "4 Pulsatile tinnitus biolink:PhenotypicFeature \n", + "... ... ... \n", + "237099 Tinnitus biolink:PhenotypicFeature \n", + "237100 Tinnitus biolink:PhenotypicFeature \n", + "237101 Tinnitus biolink:PhenotypicFeature \n", + "237102 Tinnitus biolink:PhenotypicFeature \n", + "237103 Tinnitus biolink:PhenotypicFeature \n", + "\n", + "[237104 rows x 18 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kg.rename(columns = {'category_x':'predicate_category',\n", + " 'category_y': 'subject_category',\n", + " 'id': 'subject_id',\n", + " 'name': 'subject_name'}, inplace = True)\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "89d414ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_value...feature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_categoryidnamecategory
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000...8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHP:0000360Tinnitusbiolink:PhenotypicFeature
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872...8.5852129993810000835(HP:0000360, MONDO:0010643)MONDO:0010643acute leukemia (disease)biolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767...4.5581779977010000939(UNII:25ADE2236L, HP:0000360)UNII:25ADE2236Lthrombinbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563...4.357522999109998659(HP:0000360, UNII:K16AIQ8CTM)UNII:K16AIQ8CTMpertuzumabbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959...3.9260641002429998750(MONDO:0007972, HP:0000360)MONDO:0007972Meniere diseasebiolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
..................................................................
237099CHEBI:114785biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998503...-5.087542100710007299(HP:0008629, CHEBI:114785)CHEBI:114785erlotinibbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237100UNII:52CMI0WC3Ybiolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.999719...-5.06644510169996273(UNII:52CMI0WC3Y, HP:0008629)UNII:52CMI0WC3Yatezolizumabbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237101CHEBI:135738biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998357...-5.0796409789998030(HP:0008629, CHEBI:135738)CHEBI:135738clevidipinebiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237102MONDO:0004967biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.997631...-4.683547100610001385(HP:0008629, MONDO:0004967)MONDO:0004967acute lymphoblastic leukemia (disease)biolink:DiseaseHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_increased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.000000...7.5530039979997731(HP:0000360, HP:0008629)HP:0000360Tinnitusbiolink:PhenotypicFeatureHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
\n", + "

237104 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 CHEBI:114785 biolink:associated_with_decreased_likelihood_of \n", + "237100 UNII:52CMI0WC3Y biolink:associated_with_decreased_likelihood_of \n", + "237101 CHEBI:135738 biolink:associated_with_decreased_likelihood_of \n", + "237102 MONDO:0004967 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_increased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value ... \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 ... \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 ... \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 ... \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 ... \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 ... \n", + "... ... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.959791 0.998503 ... \n", + "237100 biolink:Association Logistic Regression 0.959791 0.999719 ... \n", + "237101 biolink:Association Logistic Regression 0.959791 0.998357 ... \n", + "237102 biolink:Association Logistic Regression 0.959791 0.997631 ... \n", + "237103 biolink:Association Logistic Regression 0.959791 0.000000 ... \n", + "\n", + " feature_coefficient num_patients_with_condition \\\n", + "0 8.796399 99669 \n", + "1 8.585212 99938 \n", + "2 4.558177 99770 \n", + "3 4.357522 99910 \n", + "4 3.926064 100242 \n", + "... ... ... \n", + "237099 -5.087542 1007 \n", + "237100 -5.066445 1016 \n", + "237101 -5.079640 978 \n", + "237102 -4.683547 1006 \n", + "237103 7.553003 997 \n", + "\n", + " num_patients_without_condition nodes_frozenset \\\n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 10007299 (HP:0008629, CHEBI:114785) \n", + "237100 9996273 (UNII:52CMI0WC3Y, HP:0008629) \n", + "237101 9998030 (HP:0008629, CHEBI:135738) \n", + "237102 10001385 (HP:0008629, MONDO:0004967) \n", + "237103 9997731 (HP:0000360, HP:0008629) \n", + "\n", + " subject_id subject_name \\\n", + "0 HP:0008629 Pulsatile tinnitus \n", + "1 MONDO:0010643 acute leukemia (disease) \n", + "2 UNII:25ADE2236L thrombin \n", + "3 UNII:K16AIQ8CTM pertuzumab \n", + "4 MONDO:0007972 Meniere disease \n", + "... ... ... \n", + "237099 CHEBI:114785 erlotinib \n", + "237100 UNII:52CMI0WC3Y atezolizumab \n", + "237101 CHEBI:135738 clevidipine \n", + "237102 MONDO:0004967 acute lymphoblastic leukemia (disease) \n", + "237103 HP:0000360 Tinnitus \n", + "\n", + " subject_category id name \\\n", + "0 biolink:PhenotypicFeature HP:0000360 Tinnitus \n", + "1 biolink:Disease HP:0000360 Tinnitus \n", + "2 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "3 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "4 biolink:Disease HP:0000360 Tinnitus \n", + "... ... ... ... \n", + "237099 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237100 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237101 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237102 biolink:Disease HP:0008629 Pulsatile tinnitus \n", + "237103 biolink:PhenotypicFeature HP:0008629 Pulsatile tinnitus \n", + "\n", + " category \n", + "0 biolink:PhenotypicFeature \n", + "1 biolink:PhenotypicFeature \n", + "2 biolink:PhenotypicFeature \n", + "3 biolink:PhenotypicFeature \n", + "4 biolink:PhenotypicFeature \n", + "... ... \n", + "237099 biolink:PhenotypicFeature \n", + "237100 biolink:PhenotypicFeature \n", + "237101 biolink:PhenotypicFeature \n", + "237102 biolink:PhenotypicFeature \n", + "237103 biolink:PhenotypicFeature \n", + "\n", + "[237104 rows x 21 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# merging object info from nodes df\n", + "kg = pd.merge(kg, nodes_data[['id', 'name', 'category']], left_on='object', right_on = 'id', how=\"inner\")\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b81d60e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_value...feature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_categoryobject_idobject_nameobject_category
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000...8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHP:0000360Tinnitusbiolink:PhenotypicFeature
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872...8.5852129993810000835(HP:0000360, MONDO:0010643)MONDO:0010643acute leukemia (disease)biolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767...4.5581779977010000939(UNII:25ADE2236L, HP:0000360)UNII:25ADE2236Lthrombinbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563...4.357522999109998659(HP:0000360, UNII:K16AIQ8CTM)UNII:K16AIQ8CTMpertuzumabbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959...3.9260641002429998750(MONDO:0007972, HP:0000360)MONDO:0007972Meniere diseasebiolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
..................................................................
237099CHEBI:114785biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998503...-5.087542100710007299(HP:0008629, CHEBI:114785)CHEBI:114785erlotinibbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237100UNII:52CMI0WC3Ybiolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.999719...-5.06644510169996273(UNII:52CMI0WC3Y, HP:0008629)UNII:52CMI0WC3Yatezolizumabbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237101CHEBI:135738biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998357...-5.0796409789998030(HP:0008629, CHEBI:135738)CHEBI:135738clevidipinebiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237102MONDO:0004967biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.997631...-4.683547100610001385(HP:0008629, MONDO:0004967)MONDO:0004967acute lymphoblastic leukemia (disease)biolink:DiseaseHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_increased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.000000...7.5530039979997731(HP:0000360, HP:0008629)HP:0000360Tinnitusbiolink:PhenotypicFeatureHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
\n", + "

237104 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 CHEBI:114785 biolink:associated_with_decreased_likelihood_of \n", + "237100 UNII:52CMI0WC3Y biolink:associated_with_decreased_likelihood_of \n", + "237101 CHEBI:135738 biolink:associated_with_decreased_likelihood_of \n", + "237102 MONDO:0004967 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_increased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value ... \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 ... \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 ... \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 ... \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 ... \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 ... \n", + "... ... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.959791 0.998503 ... \n", + "237100 biolink:Association Logistic Regression 0.959791 0.999719 ... \n", + "237101 biolink:Association Logistic Regression 0.959791 0.998357 ... \n", + "237102 biolink:Association Logistic Regression 0.959791 0.997631 ... \n", + "237103 biolink:Association Logistic Regression 0.959791 0.000000 ... \n", + "\n", + " feature_coefficient num_patients_with_condition \\\n", + "0 8.796399 99669 \n", + "1 8.585212 99938 \n", + "2 4.558177 99770 \n", + "3 4.357522 99910 \n", + "4 3.926064 100242 \n", + "... ... ... \n", + "237099 -5.087542 1007 \n", + "237100 -5.066445 1016 \n", + "237101 -5.079640 978 \n", + "237102 -4.683547 1006 \n", + "237103 7.553003 997 \n", + "\n", + " num_patients_without_condition nodes_frozenset \\\n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 10007299 (HP:0008629, CHEBI:114785) \n", + "237100 9996273 (UNII:52CMI0WC3Y, HP:0008629) \n", + "237101 9998030 (HP:0008629, CHEBI:135738) \n", + "237102 10001385 (HP:0008629, MONDO:0004967) \n", + "237103 9997731 (HP:0000360, HP:0008629) \n", + "\n", + " subject_id subject_name \\\n", + "0 HP:0008629 Pulsatile tinnitus \n", + "1 MONDO:0010643 acute leukemia (disease) \n", + "2 UNII:25ADE2236L thrombin \n", + "3 UNII:K16AIQ8CTM pertuzumab \n", + "4 MONDO:0007972 Meniere disease \n", + "... ... ... \n", + "237099 CHEBI:114785 erlotinib \n", + "237100 UNII:52CMI0WC3Y atezolizumab \n", + "237101 CHEBI:135738 clevidipine \n", + "237102 MONDO:0004967 acute lymphoblastic leukemia (disease) \n", + "237103 HP:0000360 Tinnitus \n", + "\n", + " subject_category object_id object_name \\\n", + "0 biolink:PhenotypicFeature HP:0000360 Tinnitus \n", + "1 biolink:Disease HP:0000360 Tinnitus \n", + "2 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "3 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "4 biolink:Disease HP:0000360 Tinnitus \n", + "... ... ... ... \n", + "237099 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237100 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237101 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237102 biolink:Disease HP:0008629 Pulsatile tinnitus \n", + "237103 biolink:PhenotypicFeature HP:0008629 Pulsatile tinnitus \n", + "\n", + " object_category \n", + "0 biolink:PhenotypicFeature \n", + "1 biolink:PhenotypicFeature \n", + "2 biolink:PhenotypicFeature \n", + "3 biolink:PhenotypicFeature \n", + "4 biolink:PhenotypicFeature \n", + "... ... \n", + "237099 biolink:PhenotypicFeature \n", + "237100 biolink:PhenotypicFeature \n", + "237101 biolink:PhenotypicFeature \n", + "237102 biolink:PhenotypicFeature \n", + "237103 biolink:PhenotypicFeature \n", + "\n", + "[237104 rows x 21 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kg.rename(columns = {'id':'object_id',\n", + " 'category': 'object_category',\n", + " 'name': 'object_name'}, inplace = True)\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d54c7830", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_value...feature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_categoryobject_idobject_nameobject_category
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000...8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHP:0000360Tinnitusbiolink:PhenotypicFeature
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872...8.5852129993810000835(HP:0000360, MONDO:0010643)MONDO:0010643acute leukemia (disease)biolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767...4.5581779977010000939(UNII:25ADE2236L, HP:0000360)UNII:25ADE2236Lthrombinbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563...4.357522999109998659(HP:0000360, UNII:K16AIQ8CTM)UNII:K16AIQ8CTMpertuzumabbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959...3.9260641002429998750(MONDO:0007972, HP:0000360)MONDO:0007972Meniere diseasebiolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
..................................................................
237099CHEBI:114785biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998503...-5.087542100710007299(HP:0008629, CHEBI:114785)CHEBI:114785erlotinibbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237100UNII:52CMI0WC3Ybiolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.999719...-5.06644510169996273(UNII:52CMI0WC3Y, HP:0008629)UNII:52CMI0WC3Yatezolizumabbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237101CHEBI:135738biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998357...-5.0796409789998030(HP:0008629, CHEBI:135738)CHEBI:135738clevidipinebiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237102MONDO:0004967biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.997631...-4.683547100610001385(HP:0008629, MONDO:0004967)MONDO:0004967acute lymphoblastic leukemia (disease)biolink:DiseaseHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_increased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.000000...7.5530039979997731(HP:0000360, HP:0008629)HP:0000360Tinnitusbiolink:PhenotypicFeatureHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
\n", + "

237104 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 CHEBI:114785 biolink:associated_with_decreased_likelihood_of \n", + "237100 UNII:52CMI0WC3Y biolink:associated_with_decreased_likelihood_of \n", + "237101 CHEBI:135738 biolink:associated_with_decreased_likelihood_of \n", + "237102 MONDO:0004967 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_increased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value ... \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 ... \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 ... \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 ... \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 ... \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 ... \n", + "... ... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.959791 0.998503 ... \n", + "237100 biolink:Association Logistic Regression 0.959791 0.999719 ... \n", + "237101 biolink:Association Logistic Regression 0.959791 0.998357 ... \n", + "237102 biolink:Association Logistic Regression 0.959791 0.997631 ... \n", + "237103 biolink:Association Logistic Regression 0.959791 0.000000 ... \n", + "\n", + " feature_coefficient num_patients_with_condition \\\n", + "0 8.796399 99669 \n", + "1 8.585212 99938 \n", + "2 4.558177 99770 \n", + "3 4.357522 99910 \n", + "4 3.926064 100242 \n", + "... ... ... \n", + "237099 -5.087542 1007 \n", + "237100 -5.066445 1016 \n", + "237101 -5.079640 978 \n", + "237102 -4.683547 1006 \n", + "237103 7.553003 997 \n", + "\n", + " num_patients_without_condition nodes_frozenset \\\n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 10007299 (HP:0008629, CHEBI:114785) \n", + "237100 9996273 (UNII:52CMI0WC3Y, HP:0008629) \n", + "237101 9998030 (HP:0008629, CHEBI:135738) \n", + "237102 10001385 (HP:0008629, MONDO:0004967) \n", + "237103 9997731 (HP:0000360, HP:0008629) \n", + "\n", + " subject_id subject_name \\\n", + "0 HP:0008629 Pulsatile tinnitus \n", + "1 MONDO:0010643 acute leukemia (disease) \n", + "2 UNII:25ADE2236L thrombin \n", + "3 UNII:K16AIQ8CTM pertuzumab \n", + "4 MONDO:0007972 Meniere disease \n", + "... ... ... \n", + "237099 CHEBI:114785 erlotinib \n", + "237100 UNII:52CMI0WC3Y atezolizumab \n", + "237101 CHEBI:135738 clevidipine \n", + "237102 MONDO:0004967 acute lymphoblastic leukemia (disease) \n", + "237103 HP:0000360 Tinnitus \n", + "\n", + " subject_category object_id object_name \\\n", + "0 biolink:PhenotypicFeature HP:0000360 Tinnitus \n", + "1 biolink:Disease HP:0000360 Tinnitus \n", + "2 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "3 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "4 biolink:Disease HP:0000360 Tinnitus \n", + "... ... ... ... \n", + "237099 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237100 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237101 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237102 biolink:Disease HP:0008629 Pulsatile tinnitus \n", + "237103 biolink:PhenotypicFeature HP:0008629 Pulsatile tinnitus \n", + "\n", + " object_category \n", + "0 biolink:PhenotypicFeature \n", + "1 biolink:PhenotypicFeature \n", + "2 biolink:PhenotypicFeature \n", + "3 biolink:PhenotypicFeature \n", + "4 biolink:PhenotypicFeature \n", + "... ... \n", + "237099 biolink:PhenotypicFeature \n", + "237100 biolink:PhenotypicFeature \n", + "237101 biolink:PhenotypicFeature \n", + "237102 biolink:PhenotypicFeature \n", + "237103 biolink:PhenotypicFeature \n", + "\n", + "[237104 rows x 21 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# ENSURE THERE ARE UNIQUE RECORDS/ROWS\n", + "kg = kg.drop_duplicates(['subject', 'object', 'auc_roc', 'p_value', 'feature_coefficient'], keep='first')\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "08820f13", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "292\n", + "0\n" + ] + } + ], + "source": [ + "kg_NONE_subjects = kg[kg[\"subject\"].str.contains(\"NONE\")==True]\n", + "print(len(kg_NONE_subjects))\n", + "kg_NONE_objects = kg[kg[\"object\"].str.contains(\"NONE\")==True]\n", + "print(len(kg_NONE_objects))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "cc1c65fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_value...feature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_categoryobject_idobject_nameobject_category
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000...8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHP:0000360Tinnitusbiolink:PhenotypicFeature
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872...8.5852129993810000835(HP:0000360, MONDO:0010643)MONDO:0010643acute leukemia (disease)biolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767...4.5581779977010000939(UNII:25ADE2236L, HP:0000360)UNII:25ADE2236Lthrombinbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563...4.357522999109998659(HP:0000360, UNII:K16AIQ8CTM)UNII:K16AIQ8CTMpertuzumabbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959...3.9260641002429998750(MONDO:0007972, HP:0000360)MONDO:0007972Meniere diseasebiolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
..................................................................
237099CHEBI:114785biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998503...-5.087542100710007299(HP:0008629, CHEBI:114785)CHEBI:114785erlotinibbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237100UNII:52CMI0WC3Ybiolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.999719...-5.06644510169996273(UNII:52CMI0WC3Y, HP:0008629)UNII:52CMI0WC3Yatezolizumabbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237101CHEBI:135738biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998357...-5.0796409789998030(HP:0008629, CHEBI:135738)CHEBI:135738clevidipinebiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237102MONDO:0004967biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.997631...-4.683547100610001385(HP:0008629, MONDO:0004967)MONDO:0004967acute lymphoblastic leukemia (disease)biolink:DiseaseHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_increased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.000000...7.5530039979997731(HP:0000360, HP:0008629)HP:0000360Tinnitusbiolink:PhenotypicFeatureHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
\n", + "

236812 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 CHEBI:114785 biolink:associated_with_decreased_likelihood_of \n", + "237100 UNII:52CMI0WC3Y biolink:associated_with_decreased_likelihood_of \n", + "237101 CHEBI:135738 biolink:associated_with_decreased_likelihood_of \n", + "237102 MONDO:0004967 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_increased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value ... \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 ... \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 ... \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 ... \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 ... \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 ... \n", + "... ... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.959791 0.998503 ... \n", + "237100 biolink:Association Logistic Regression 0.959791 0.999719 ... \n", + "237101 biolink:Association Logistic Regression 0.959791 0.998357 ... \n", + "237102 biolink:Association Logistic Regression 0.959791 0.997631 ... \n", + "237103 biolink:Association Logistic Regression 0.959791 0.000000 ... \n", + "\n", + " feature_coefficient num_patients_with_condition \\\n", + "0 8.796399 99669 \n", + "1 8.585212 99938 \n", + "2 4.558177 99770 \n", + "3 4.357522 99910 \n", + "4 3.926064 100242 \n", + "... ... ... \n", + "237099 -5.087542 1007 \n", + "237100 -5.066445 1016 \n", + "237101 -5.079640 978 \n", + "237102 -4.683547 1006 \n", + "237103 7.553003 997 \n", + "\n", + " num_patients_without_condition nodes_frozenset \\\n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 10007299 (HP:0008629, CHEBI:114785) \n", + "237100 9996273 (UNII:52CMI0WC3Y, HP:0008629) \n", + "237101 9998030 (HP:0008629, CHEBI:135738) \n", + "237102 10001385 (HP:0008629, MONDO:0004967) \n", + "237103 9997731 (HP:0000360, HP:0008629) \n", + "\n", + " subject_id subject_name \\\n", + "0 HP:0008629 Pulsatile tinnitus \n", + "1 MONDO:0010643 acute leukemia (disease) \n", + "2 UNII:25ADE2236L thrombin \n", + "3 UNII:K16AIQ8CTM pertuzumab \n", + "4 MONDO:0007972 Meniere disease \n", + "... ... ... \n", + "237099 CHEBI:114785 erlotinib \n", + "237100 UNII:52CMI0WC3Y atezolizumab \n", + "237101 CHEBI:135738 clevidipine \n", + "237102 MONDO:0004967 acute lymphoblastic leukemia (disease) \n", + "237103 HP:0000360 Tinnitus \n", + "\n", + " subject_category object_id object_name \\\n", + "0 biolink:PhenotypicFeature HP:0000360 Tinnitus \n", + "1 biolink:Disease HP:0000360 Tinnitus \n", + "2 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "3 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "4 biolink:Disease HP:0000360 Tinnitus \n", + "... ... ... ... \n", + "237099 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237100 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237101 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237102 biolink:Disease HP:0008629 Pulsatile tinnitus \n", + "237103 biolink:PhenotypicFeature HP:0008629 Pulsatile tinnitus \n", + "\n", + " object_category \n", + "0 biolink:PhenotypicFeature \n", + "1 biolink:PhenotypicFeature \n", + "2 biolink:PhenotypicFeature \n", + "3 biolink:PhenotypicFeature \n", + "4 biolink:PhenotypicFeature \n", + "... ... \n", + "237099 biolink:PhenotypicFeature \n", + "237100 biolink:PhenotypicFeature \n", + "237101 biolink:PhenotypicFeature \n", + "237102 biolink:PhenotypicFeature \n", + "237103 biolink:PhenotypicFeature \n", + "\n", + "[236812 rows x 21 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# for some reason, some subject or object ids are empty (they actually literally contain the string \"NONE\")\n", + "kg = kg.dropna(axis=0, subset=['subject'])\n", + "kg = kg.dropna(axis=0, subset=['object'])\n", + "kg = kg[~kg[\"subject\"].str.contains(\"NONE\")==True]\n", + "kg = kg[~kg[\"object\"].str.contains(\"NONE\")==True]\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c14460e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_value...feature_coefficientnum_patients_with_conditionnum_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_categoryobject_idobject_nameobject_category
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000...8.796399996699999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHP:0000360Tinnitusbiolink:PhenotypicFeature
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872...8.5852129993810000835(HP:0000360, MONDO:0010643)MONDO:0010643acute leukemia (disease)biolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767...4.5581779977010000939(UNII:25ADE2236L, HP:0000360)UNII:25ADE2236Lthrombinbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563...4.357522999109998659(HP:0000360, UNII:K16AIQ8CTM)UNII:K16AIQ8CTMpertuzumabbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeature
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959...3.9260641002429998750(MONDO:0007972, HP:0000360)MONDO:0007972Meniere diseasebiolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeature
..................................................................
237099CHEBI:114785biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998503...-5.087542100710007299(HP:0008629, CHEBI:114785)CHEBI:114785erlotinibbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237100UNII:52CMI0WC3Ybiolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.999719...-5.06644510169996273(UNII:52CMI0WC3Y, HP:0008629)UNII:52CMI0WC3Yatezolizumabbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237101CHEBI:135738biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998357...-5.0796409789998030(HP:0008629, CHEBI:135738)CHEBI:135738clevidipinebiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237102MONDO:0004967biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.997631...-4.683547100610001385(HP:0008629, MONDO:0004967)MONDO:0004967acute lymphoblastic leukemia (disease)biolink:DiseaseHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
237103HP:0000360biolink:associated_with_increased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.000000...7.5530039979997731(HP:0000360, HP:0008629)HP:0000360Tinnitusbiolink:PhenotypicFeatureHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeature
\n", + "

236812 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 CHEBI:114785 biolink:associated_with_decreased_likelihood_of \n", + "237100 UNII:52CMI0WC3Y biolink:associated_with_decreased_likelihood_of \n", + "237101 CHEBI:135738 biolink:associated_with_decreased_likelihood_of \n", + "237102 MONDO:0004967 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_increased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value ... \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 ... \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 ... \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 ... \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 ... \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 ... \n", + "... ... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.959791 0.998503 ... \n", + "237100 biolink:Association Logistic Regression 0.959791 0.999719 ... \n", + "237101 biolink:Association Logistic Regression 0.959791 0.998357 ... \n", + "237102 biolink:Association Logistic Regression 0.959791 0.997631 ... \n", + "237103 biolink:Association Logistic Regression 0.959791 0.000000 ... \n", + "\n", + " feature_coefficient num_patients_with_condition \\\n", + "0 8.796399 99669 \n", + "1 8.585212 99938 \n", + "2 4.558177 99770 \n", + "3 4.357522 99910 \n", + "4 3.926064 100242 \n", + "... ... ... \n", + "237099 -5.087542 1007 \n", + "237100 -5.066445 1016 \n", + "237101 -5.079640 978 \n", + "237102 -4.683547 1006 \n", + "237103 7.553003 997 \n", + "\n", + " num_patients_without_condition nodes_frozenset \\\n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 10007299 (HP:0008629, CHEBI:114785) \n", + "237100 9996273 (UNII:52CMI0WC3Y, HP:0008629) \n", + "237101 9998030 (HP:0008629, CHEBI:135738) \n", + "237102 10001385 (HP:0008629, MONDO:0004967) \n", + "237103 9997731 (HP:0000360, HP:0008629) \n", + "\n", + " subject_id subject_name \\\n", + "0 HP:0008629 Pulsatile tinnitus \n", + "1 MONDO:0010643 acute leukemia (disease) \n", + "2 UNII:25ADE2236L thrombin \n", + "3 UNII:K16AIQ8CTM pertuzumab \n", + "4 MONDO:0007972 Meniere disease \n", + "... ... ... \n", + "237099 CHEBI:114785 erlotinib \n", + "237100 UNII:52CMI0WC3Y atezolizumab \n", + "237101 CHEBI:135738 clevidipine \n", + "237102 MONDO:0004967 acute lymphoblastic leukemia (disease) \n", + "237103 HP:0000360 Tinnitus \n", + "\n", + " subject_category object_id object_name \\\n", + "0 biolink:PhenotypicFeature HP:0000360 Tinnitus \n", + "1 biolink:Disease HP:0000360 Tinnitus \n", + "2 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "3 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "4 biolink:Disease HP:0000360 Tinnitus \n", + "... ... ... ... \n", + "237099 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237100 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237101 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237102 biolink:Disease HP:0008629 Pulsatile tinnitus \n", + "237103 biolink:PhenotypicFeature HP:0008629 Pulsatile tinnitus \n", + "\n", + " object_category \n", + "0 biolink:PhenotypicFeature \n", + "1 biolink:PhenotypicFeature \n", + "2 biolink:PhenotypicFeature \n", + "3 biolink:PhenotypicFeature \n", + "4 biolink:PhenotypicFeature \n", + "... ... \n", + "237099 biolink:PhenotypicFeature \n", + "237100 biolink:PhenotypicFeature \n", + "237101 biolink:PhenotypicFeature \n", + "237102 biolink:PhenotypicFeature \n", + "237103 biolink:PhenotypicFeature \n", + "\n", + "[236812 rows x 21 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# for some reason, some subject or object ids are empty (they actually literally contain the string \"NONE\")\n", + "# get rid of these rows\n", + "kg = kg[~kg[\"subject\"].str.contains(\"NONE\")==True] # subject and object are all CURIEs, not names\n", + "kg = kg[~kg[\"object\"].str.contains(\"NONE\")==True]\n", + "kg = kg[~kg[\"subject\"].str.contains(\"none\")==True]\n", + "kg = kg[~kg[\"object\"].str.contains(\"none\")==True]\n", + "kg = kg[~kg[\"subject\"].str.contains(\"None\")==True]\n", + "kg = kg[~kg[\"object\"].str.contains(\"None\")==True]\n", + "kg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5201401a", + "metadata": {}, + "outputs": [], + "source": [ + "# OPTIONAL THRESHOLD\n", + "# kg_pval_subsetted = kg[kg[\"p_value\"] < 0.2]\n", + "# kg_pval_subsetted" + ] + }, + { + "cell_type": "code", + "execution_count": 334, + "id": "e809e4a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subject_category_postfixsubject_prefixincreased_or_decreasedobject_category_postfixobject_prefixCountobject_category_postfix_abbvsubject_category_postfix_abbvjoined_to_match_xbte
0ChemicalEntityCHEBIdecreasedDiseaseMONDO34917DiseaseChemChemCHEBI_decreased_DiseaseMONDO
1ChemicalEntityCHEBIdecreasedDiseaseNCIT212DiseaseChemChemCHEBI_decreased_DiseaseNCIT
2ChemicalEntityCHEBIdecreasedDiseaseSNOMEDCT2917DiseaseChemChemCHEBI_decreased_DiseaseSNOMEDCT
3ChemicalEntityCHEBIdecreasedPhenotypicFeatureHP24505PhenoChemChemCHEBI_decreased_PhenoHP
4ChemicalEntityCHEBIdecreasedPhenotypicFeatureNCIT2254PhenoChemChemCHEBI_decreased_PhenoNCIT
5ChemicalEntityCHEBIdecreasedPhenotypicFeatureSNOMEDCT244PhenoChemChemCHEBI_decreased_PhenoSNOMEDCT
6ChemicalEntityCHEBIdecreasedProcedureNCIT222ProcedureChemChemCHEBI_decreased_ProcedureNCIT
7ChemicalEntityCHEBIincreasedDiseaseMONDO18248DiseaseChemChemCHEBI_increased_DiseaseMONDO
8ChemicalEntityCHEBIincreasedDiseaseNCIT131DiseaseChemChemCHEBI_increased_DiseaseNCIT
9ChemicalEntityCHEBIincreasedDiseaseSNOMEDCT1542DiseaseChemChemCHEBI_increased_DiseaseSNOMEDCT
10ChemicalEntityCHEBIincreasedPhenotypicFeatureHP13225PhenoChemChemCHEBI_increased_PhenoHP
11ChemicalEntityCHEBIincreasedPhenotypicFeatureNCIT1176PhenoChemChemCHEBI_increased_PhenoNCIT
12ChemicalEntityCHEBIincreasedPhenotypicFeatureSNOMEDCT99PhenoChemChemCHEBI_increased_PhenoSNOMEDCT
13ChemicalEntityCHEBIincreasedProcedureNCIT121ProcedureChemChemCHEBI_increased_ProcedureNCIT
14ChemicalEntityUNIIdecreasedDiseaseMONDO5902DiseaseChemChemUNII_decreased_DiseaseMONDO
15ChemicalEntityUNIIdecreasedDiseaseNCIT44DiseaseChemChemUNII_decreased_DiseaseNCIT
16ChemicalEntityUNIIdecreasedDiseaseSNOMEDCT505DiseaseChemChemUNII_decreased_DiseaseSNOMEDCT
17ChemicalEntityUNIIdecreasedPhenotypicFeatureHP3727PhenoChemChemUNII_decreased_PhenoHP
18ChemicalEntityUNIIdecreasedPhenotypicFeatureNCIT381PhenoChemChemUNII_decreased_PhenoNCIT
19ChemicalEntityUNIIdecreasedPhenotypicFeatureSNOMEDCT39PhenoChemChemUNII_decreased_PhenoSNOMEDCT
20ChemicalEntityUNIIdecreasedProcedureNCIT43ProcedureChemChemUNII_decreased_ProcedureNCIT
21ChemicalEntityUNIIincreasedDiseaseMONDO2158DiseaseChemChemUNII_increased_DiseaseMONDO
22ChemicalEntityUNIIincreasedDiseaseNCIT8DiseaseChemChemUNII_increased_DiseaseNCIT
23ChemicalEntityUNIIincreasedDiseaseSNOMEDCT171DiseaseChemChemUNII_increased_DiseaseSNOMEDCT
24ChemicalEntityUNIIincreasedPhenotypicFeatureHP1993PhenoChemChemUNII_increased_PhenoHP
25ChemicalEntityUNIIincreasedPhenotypicFeatureNCIT139PhenoChemChemUNII_increased_PhenoNCIT
26ChemicalEntityUNIIincreasedPhenotypicFeatureSNOMEDCT13PhenoChemChemUNII_increased_PhenoSNOMEDCT
27ChemicalEntityUNIIincreasedProcedureNCIT9ProcedureChemChemUNII_increased_ProcedureNCIT
28DiseaseMONDOdecreasedDiseaseMONDO13640DiseaseDiseaseDiseaseMONDO_decreased_DiseaseMONDO
29DiseaseMONDOdecreasedDiseaseNCIT86DiseaseDiseaseDiseaseMONDO_decreased_DiseaseNCIT
30DiseaseMONDOdecreasedDiseaseSNOMEDCT1237DiseaseDiseaseDiseaseMONDO_decreased_DiseaseSNOMEDCT
31DiseaseMONDOdecreasedPhenotypicFeatureHP10101PhenoDiseaseDiseaseMONDO_decreased_PhenoHP
32DiseaseMONDOdecreasedPhenotypicFeatureNCIT1293PhenoDiseaseDiseaseMONDO_decreased_PhenoNCIT
33DiseaseMONDOdecreasedPhenotypicFeatureSNOMEDCT88PhenoDiseaseDiseaseMONDO_decreased_PhenoSNOMEDCT
34DiseaseMONDOdecreasedProcedureNCIT113ProcedureDiseaseDiseaseMONDO_decreased_ProcedureNCIT
35DiseaseMONDOincreasedDiseaseMONDO14415DiseaseDiseaseDiseaseMONDO_increased_DiseaseMONDO
36DiseaseMONDOincreasedDiseaseNCIT96DiseaseDiseaseDiseaseMONDO_increased_DiseaseNCIT
37DiseaseMONDOincreasedDiseaseSNOMEDCT1129DiseaseDiseaseDiseaseMONDO_increased_DiseaseSNOMEDCT
38DiseaseMONDOincreasedPhenotypicFeatureHP9919PhenoDiseaseDiseaseMONDO_increased_PhenoHP
39DiseaseMONDOincreasedPhenotypicFeatureNCIT527PhenoDiseaseDiseaseMONDO_increased_PhenoNCIT
40DiseaseMONDOincreasedPhenotypicFeatureSNOMEDCT94PhenoDiseaseDiseaseMONDO_increased_PhenoSNOMEDCT
41DiseaseMONDOincreasedProcedureNCIT69ProcedureDiseaseDiseaseMONDO_increased_ProcedureNCIT
42DiseaseNCITdecreasedDiseaseMONDO73DiseaseDiseaseDiseaseNCIT_decreased_DiseaseMONDO
43DiseaseNCITdecreasedDiseaseSNOMEDCT7DiseaseDiseaseDiseaseNCIT_decreased_DiseaseSNOMEDCT
44DiseaseNCITdecreasedPhenotypicFeatureHP44PhenoDiseaseDiseaseNCIT_decreased_PhenoHP
45DiseaseNCITdecreasedPhenotypicFeatureNCIT9PhenoDiseaseDiseaseNCIT_decreased_PhenoNCIT
46DiseaseNCITdecreasedProcedureNCIT1ProcedureDiseaseDiseaseNCIT_decreased_ProcedureNCIT
47DiseaseNCITincreasedDiseaseMONDO82DiseaseDiseaseDiseaseNCIT_increased_DiseaseMONDO
48DiseaseNCITincreasedDiseaseSNOMEDCT6DiseaseDiseaseDiseaseNCIT_increased_DiseaseSNOMEDCT
49DiseaseNCITincreasedPhenotypicFeatureHP66PhenoDiseaseDiseaseNCIT_increased_PhenoHP
50DiseaseNCITincreasedPhenotypicFeatureNCIT1PhenoDiseaseDiseaseNCIT_increased_PhenoNCIT
51DiseaseNCITincreasedPhenotypicFeatureSNOMEDCT1PhenoDiseaseDiseaseNCIT_increased_PhenoSNOMEDCT
52DiseaseSNOMEDCTdecreasedDiseaseMONDO1318DiseaseDiseaseDiseaseSNOMEDCT_decreased_DiseaseMONDO
53DiseaseSNOMEDCTdecreasedDiseaseNCIT10DiseaseDiseaseDiseaseSNOMEDCT_decreased_DiseaseNCIT
54DiseaseSNOMEDCTdecreasedDiseaseSNOMEDCT101DiseaseDiseaseDiseaseSNOMEDCT_decreased_DiseaseSNOMEDCT
55DiseaseSNOMEDCTdecreasedPhenotypicFeatureHP957PhenoDiseaseDiseaseSNOMEDCT_decreased_PhenoHP
56DiseaseSNOMEDCTdecreasedPhenotypicFeatureNCIT116PhenoDiseaseDiseaseSNOMEDCT_decreased_PhenoNCIT
57DiseaseSNOMEDCTdecreasedPhenotypicFeatureSNOMEDCT8PhenoDiseaseDiseaseSNOMEDCT_decreased_PhenoSNOMEDCT
58DiseaseSNOMEDCTdecreasedProcedureNCIT13ProcedureDiseaseDiseaseSNOMEDCT_decreased_ProcedureNCIT
59DiseaseSNOMEDCTincreasedDiseaseMONDO1007DiseaseDiseaseDiseaseSNOMEDCT_increased_DiseaseMONDO
60DiseaseSNOMEDCTincreasedDiseaseNCIT5DiseaseDiseaseDiseaseSNOMEDCT_increased_DiseaseNCIT
61DiseaseSNOMEDCTincreasedDiseaseSNOMEDCT81DiseaseDiseaseDiseaseSNOMEDCT_increased_DiseaseSNOMEDCT
62DiseaseSNOMEDCTincreasedPhenotypicFeatureHP693PhenoDiseaseDiseaseSNOMEDCT_increased_PhenoHP
63DiseaseSNOMEDCTincreasedPhenotypicFeatureNCIT34PhenoDiseaseDiseaseSNOMEDCT_increased_PhenoNCIT
64DiseaseSNOMEDCTincreasedPhenotypicFeatureSNOMEDCT7PhenoDiseaseDiseaseSNOMEDCT_increased_PhenoSNOMEDCT
65DiseaseSNOMEDCTincreasedProcedureNCIT2ProcedureDiseaseDiseaseSNOMEDCT_increased_ProcedureNCIT
66PhenotypicFeatureHPdecreasedDiseaseMONDO18759DiseasePhenoPhenoHP_decreased_DiseaseMONDO
67PhenotypicFeatureHPdecreasedDiseaseNCIT113DiseasePhenoPhenoHP_decreased_DiseaseNCIT
68PhenotypicFeatureHPdecreasedDiseaseSNOMEDCT1532DiseasePhenoPhenoHP_decreased_DiseaseSNOMEDCT
69PhenotypicFeatureHPdecreasedPhenotypicFeatureHP11425PhenoPhenoPhenoHP_decreased_PhenoHP
70PhenotypicFeatureHPdecreasedPhenotypicFeatureNCIT1257PhenoPhenoPhenoHP_decreased_PhenoNCIT
71PhenotypicFeatureHPdecreasedPhenotypicFeatureSNOMEDCT121PhenoPhenoPhenoHP_decreased_PhenoSNOMEDCT
72PhenotypicFeatureHPdecreasedProcedureNCIT132ProcedurePhenoPhenoHP_decreased_ProcedureNCIT
73PhenotypicFeatureHPincreasedDiseaseMONDO12086DiseasePhenoPhenoHP_increased_DiseaseMONDO
74PhenotypicFeatureHPincreasedDiseaseNCIT86DiseasePhenoPhenoHP_increased_DiseaseNCIT
75PhenotypicFeatureHPincreasedDiseaseSNOMEDCT1055DiseasePhenoPhenoHP_increased_DiseaseSNOMEDCT
76PhenotypicFeatureHPincreasedPhenotypicFeatureHP10355PhenoPhenoPhenoHP_increased_PhenoHP
77PhenotypicFeatureHPincreasedPhenotypicFeatureNCIT733PhenoPhenoPhenoHP_increased_PhenoNCIT
78PhenotypicFeatureHPincreasedPhenotypicFeatureSNOMEDCT78PhenoPhenoPhenoHP_increased_PhenoSNOMEDCT
79PhenotypicFeatureHPincreasedProcedureNCIT67ProcedurePhenoPhenoHP_increased_ProcedureNCIT
80PhenotypicFeatureNCITdecreasedDiseaseMONDO1170DiseasePhenoPhenoNCIT_decreased_DiseaseMONDO
81PhenotypicFeatureNCITdecreasedDiseaseNCIT7DiseasePhenoPhenoNCIT_decreased_DiseaseNCIT
82PhenotypicFeatureNCITdecreasedDiseaseSNOMEDCT94DiseasePhenoPhenoNCIT_decreased_DiseaseSNOMEDCT
83PhenotypicFeatureNCITdecreasedPhenotypicFeatureHP705PhenoPhenoPhenoNCIT_decreased_PhenoHP
84PhenotypicFeatureNCITdecreasedPhenotypicFeatureNCIT56PhenoPhenoPhenoNCIT_decreased_PhenoNCIT
85PhenotypicFeatureNCITdecreasedPhenotypicFeatureSNOMEDCT9PhenoPhenoPhenoNCIT_decreased_PhenoSNOMEDCT
86PhenotypicFeatureNCITdecreasedProcedureNCIT7ProcedurePhenoPhenoNCIT_decreased_ProcedureNCIT
87PhenotypicFeatureNCITincreasedDiseaseMONDO380DiseasePhenoPhenoNCIT_increased_DiseaseMONDO
88PhenotypicFeatureNCITincreasedDiseaseNCIT3DiseasePhenoPhenoNCIT_increased_DiseaseNCIT
89PhenotypicFeatureNCITincreasedDiseaseSNOMEDCT36DiseasePhenoPhenoNCIT_increased_DiseaseSNOMEDCT
90PhenotypicFeatureNCITincreasedPhenotypicFeatureHP395PhenoPhenoPhenoNCIT_increased_PhenoHP
91PhenotypicFeatureNCITincreasedPhenotypicFeatureNCIT34PhenoPhenoPhenoNCIT_increased_PhenoNCIT
92PhenotypicFeatureNCITincreasedPhenotypicFeatureSNOMEDCT1PhenoPhenoPhenoNCIT_increased_PhenoSNOMEDCT
93PhenotypicFeatureNCITincreasedProcedureNCIT3ProcedurePhenoPhenoNCIT_increased_ProcedureNCIT
94PhenotypicFeatureSNOMEDCTdecreasedDiseaseMONDO307DiseasePhenoPhenoSNOMEDCT_decreased_DiseaseMONDO
95PhenotypicFeatureSNOMEDCTdecreasedDiseaseNCIT1DiseasePhenoPhenoSNOMEDCT_decreased_DiseaseNCIT
96PhenotypicFeatureSNOMEDCTdecreasedDiseaseSNOMEDCT26DiseasePhenoPhenoSNOMEDCT_decreased_DiseaseSNOMEDCT
97PhenotypicFeatureSNOMEDCTdecreasedPhenotypicFeatureHP217PhenoPhenoPhenoSNOMEDCT_decreased_PhenoHP
98PhenotypicFeatureSNOMEDCTdecreasedPhenotypicFeatureNCIT25PhenoPhenoPhenoSNOMEDCT_decreased_PhenoNCIT
99PhenotypicFeatureSNOMEDCTdecreasedPhenotypicFeatureSNOMEDCT2PhenoPhenoPhenoSNOMEDCT_decreased_PhenoSNOMEDCT
100PhenotypicFeatureSNOMEDCTdecreasedProcedureNCIT2ProcedurePhenoPhenoSNOMEDCT_decreased_ProcedureNCIT
101PhenotypicFeatureSNOMEDCTincreasedDiseaseMONDO158DiseasePhenoPhenoSNOMEDCT_increased_DiseaseMONDO
102PhenotypicFeatureSNOMEDCTincreasedDiseaseNCIT2DiseasePhenoPhenoSNOMEDCT_increased_DiseaseNCIT
103PhenotypicFeatureSNOMEDCTincreasedDiseaseSNOMEDCT13DiseasePhenoPhenoSNOMEDCT_increased_DiseaseSNOMEDCT
104PhenotypicFeatureSNOMEDCTincreasedPhenotypicFeatureHP113PhenoPhenoPhenoSNOMEDCT_increased_PhenoHP
105PhenotypicFeatureSNOMEDCTincreasedPhenotypicFeatureNCIT5PhenoPhenoPhenoSNOMEDCT_increased_PhenoNCIT
106PhenotypicFeatureSNOMEDCTincreasedProcedureNCIT1ProcedurePhenoPhenoSNOMEDCT_increased_ProcedureNCIT
107ProcedureNCITdecreasedDiseaseMONDO721DiseaseProcedureProcedureNCIT_decreased_DiseaseMONDO
108ProcedureNCITdecreasedDiseaseNCIT4DiseaseProcedureProcedureNCIT_decreased_DiseaseNCIT
109ProcedureNCITdecreasedDiseaseSNOMEDCT58DiseaseProcedureProcedureNCIT_decreased_DiseaseSNOMEDCT
110ProcedureNCITdecreasedPhenotypicFeatureHP502PhenoProcedureProcedureNCIT_decreased_PhenoHP
111ProcedureNCITdecreasedPhenotypicFeatureNCIT56PhenoProcedureProcedureNCIT_decreased_PhenoNCIT
112ProcedureNCITdecreasedPhenotypicFeatureSNOMEDCT4PhenoProcedureProcedureNCIT_decreased_PhenoSNOMEDCT
113ProcedureNCITdecreasedProcedureNCIT3ProcedureProcedureProcedureNCIT_decreased_ProcedureNCIT
114ProcedureNCITincreasedDiseaseMONDO209DiseaseProcedureProcedureNCIT_increased_DiseaseMONDO
115ProcedureNCITincreasedDiseaseNCIT2DiseaseProcedureProcedureNCIT_increased_DiseaseNCIT
116ProcedureNCITincreasedDiseaseSNOMEDCT20DiseaseProcedureProcedureNCIT_increased_DiseaseSNOMEDCT
117ProcedureNCITincreasedPhenotypicFeatureHP158PhenoProcedureProcedureNCIT_increased_PhenoHP
118ProcedureNCITincreasedPhenotypicFeatureNCIT4PhenoProcedureProcedureNCIT_increased_PhenoNCIT
119ProcedureNCITincreasedPhenotypicFeatureSNOMEDCT2PhenoProcedureProcedureNCIT_increased_PhenoSNOMEDCT
120ProcedureNCITincreasedProcedureNCIT2ProcedureProcedureProcedureNCIT_increased_ProcedureNCIT
\n", + "
" + ], + "text/plain": [ + " subject_category_postfix subject_prefix increased_or_decreased \\\n", + "0 ChemicalEntity CHEBI decreased \n", + "1 ChemicalEntity CHEBI decreased \n", + "2 ChemicalEntity CHEBI decreased \n", + "3 ChemicalEntity CHEBI decreased \n", + "4 ChemicalEntity CHEBI decreased \n", + "5 ChemicalEntity CHEBI decreased \n", + "6 ChemicalEntity CHEBI decreased \n", + "7 ChemicalEntity CHEBI increased \n", + "8 ChemicalEntity CHEBI increased \n", + "9 ChemicalEntity CHEBI increased \n", + "10 ChemicalEntity CHEBI increased \n", + "11 ChemicalEntity CHEBI increased \n", + "12 ChemicalEntity CHEBI increased \n", + "13 ChemicalEntity CHEBI increased \n", + "14 ChemicalEntity UNII decreased \n", + "15 ChemicalEntity UNII decreased \n", + "16 ChemicalEntity UNII decreased \n", + "17 ChemicalEntity UNII decreased \n", + "18 ChemicalEntity UNII decreased \n", + "19 ChemicalEntity UNII decreased \n", + "20 ChemicalEntity UNII decreased \n", + "21 ChemicalEntity UNII increased \n", + "22 ChemicalEntity UNII increased \n", + "23 ChemicalEntity UNII increased \n", + "24 ChemicalEntity UNII increased \n", + "25 ChemicalEntity UNII increased \n", + "26 ChemicalEntity UNII increased \n", + "27 ChemicalEntity UNII increased \n", + "28 Disease MONDO decreased \n", + "29 Disease MONDO decreased \n", + "30 Disease MONDO decreased \n", + "31 Disease MONDO decreased \n", + "32 Disease MONDO decreased \n", + "33 Disease MONDO decreased \n", + "34 Disease MONDO decreased \n", + "35 Disease MONDO increased \n", + "36 Disease MONDO increased \n", + "37 Disease MONDO increased \n", + "38 Disease MONDO increased \n", + "39 Disease MONDO increased \n", + "40 Disease MONDO increased \n", + "41 Disease MONDO increased \n", + "42 Disease NCIT decreased \n", + "43 Disease NCIT decreased \n", + "44 Disease NCIT decreased \n", + "45 Disease NCIT decreased \n", + "46 Disease NCIT decreased \n", + "47 Disease NCIT increased \n", + "48 Disease NCIT increased \n", + "49 Disease NCIT increased \n", + "50 Disease NCIT increased \n", + "51 Disease NCIT increased \n", + "52 Disease SNOMEDCT decreased \n", + "53 Disease SNOMEDCT decreased \n", + "54 Disease SNOMEDCT decreased \n", + "55 Disease SNOMEDCT decreased \n", + "56 Disease SNOMEDCT decreased \n", + "57 Disease SNOMEDCT decreased \n", + "58 Disease SNOMEDCT decreased \n", + "59 Disease SNOMEDCT increased \n", + "60 Disease SNOMEDCT increased \n", + "61 Disease SNOMEDCT increased \n", + "62 Disease SNOMEDCT increased \n", + "63 Disease SNOMEDCT increased \n", + "64 Disease SNOMEDCT increased \n", + "65 Disease SNOMEDCT increased \n", + "66 PhenotypicFeature HP decreased \n", + "67 PhenotypicFeature HP decreased \n", + "68 PhenotypicFeature HP decreased \n", + "69 PhenotypicFeature HP decreased \n", + "70 PhenotypicFeature HP decreased \n", + "71 PhenotypicFeature HP decreased \n", + "72 PhenotypicFeature HP decreased \n", + "73 PhenotypicFeature HP increased \n", + "74 PhenotypicFeature HP increased \n", + "75 PhenotypicFeature HP increased \n", + "76 PhenotypicFeature HP increased \n", + "77 PhenotypicFeature HP increased \n", + "78 PhenotypicFeature HP increased \n", + "79 PhenotypicFeature HP increased \n", + "80 PhenotypicFeature NCIT decreased \n", + "81 PhenotypicFeature NCIT decreased \n", + "82 PhenotypicFeature NCIT decreased \n", + "83 PhenotypicFeature NCIT decreased \n", + "84 PhenotypicFeature NCIT decreased \n", + "85 PhenotypicFeature NCIT decreased \n", + "86 PhenotypicFeature NCIT decreased \n", + "87 PhenotypicFeature NCIT increased \n", + "88 PhenotypicFeature NCIT increased \n", + "89 PhenotypicFeature NCIT increased \n", + "90 PhenotypicFeature NCIT increased \n", + "91 PhenotypicFeature NCIT increased \n", + "92 PhenotypicFeature NCIT increased \n", + "93 PhenotypicFeature NCIT increased \n", + "94 PhenotypicFeature SNOMEDCT decreased \n", + "95 PhenotypicFeature SNOMEDCT decreased \n", + "96 PhenotypicFeature SNOMEDCT decreased \n", + "97 PhenotypicFeature SNOMEDCT decreased \n", + "98 PhenotypicFeature SNOMEDCT decreased \n", + "99 PhenotypicFeature SNOMEDCT decreased \n", + "100 PhenotypicFeature SNOMEDCT decreased \n", + "101 PhenotypicFeature SNOMEDCT increased \n", + "102 PhenotypicFeature SNOMEDCT increased \n", + "103 PhenotypicFeature SNOMEDCT increased \n", + "104 PhenotypicFeature SNOMEDCT increased \n", + "105 PhenotypicFeature SNOMEDCT increased \n", + "106 PhenotypicFeature SNOMEDCT increased \n", + "107 Procedure NCIT decreased \n", + "108 Procedure NCIT decreased \n", + "109 Procedure NCIT decreased \n", + "110 Procedure NCIT decreased \n", + "111 Procedure NCIT decreased \n", + "112 Procedure NCIT decreased \n", + "113 Procedure NCIT decreased \n", + "114 Procedure NCIT increased \n", + "115 Procedure NCIT increased \n", + "116 Procedure NCIT increased \n", + "117 Procedure NCIT increased \n", + "118 Procedure NCIT increased \n", + "119 Procedure NCIT increased \n", + "120 Procedure NCIT increased \n", + "\n", + " object_category_postfix object_prefix Count object_category_postfix_abbv \\\n", + "0 Disease MONDO 34917 Disease \n", + "1 Disease NCIT 212 Disease \n", + "2 Disease SNOMEDCT 2917 Disease \n", + "3 PhenotypicFeature HP 24505 Pheno \n", + "4 PhenotypicFeature NCIT 2254 Pheno \n", + "5 PhenotypicFeature SNOMEDCT 244 Pheno \n", + "6 Procedure NCIT 222 Procedure \n", + "7 Disease MONDO 18248 Disease \n", + "8 Disease NCIT 131 Disease \n", + "9 Disease SNOMEDCT 1542 Disease \n", + "10 PhenotypicFeature HP 13225 Pheno \n", + "11 PhenotypicFeature NCIT 1176 Pheno \n", + "12 PhenotypicFeature SNOMEDCT 99 Pheno \n", + "13 Procedure NCIT 121 Procedure \n", + "14 Disease MONDO 5902 Disease \n", + "15 Disease NCIT 44 Disease \n", + "16 Disease SNOMEDCT 505 Disease \n", + "17 PhenotypicFeature HP 3727 Pheno \n", + "18 PhenotypicFeature NCIT 381 Pheno \n", + "19 PhenotypicFeature SNOMEDCT 39 Pheno \n", + "20 Procedure NCIT 43 Procedure \n", + "21 Disease MONDO 2158 Disease \n", + "22 Disease NCIT 8 Disease \n", + "23 Disease SNOMEDCT 171 Disease \n", + "24 PhenotypicFeature HP 1993 Pheno \n", + "25 PhenotypicFeature NCIT 139 Pheno \n", + "26 PhenotypicFeature SNOMEDCT 13 Pheno \n", + "27 Procedure NCIT 9 Procedure \n", + "28 Disease MONDO 13640 Disease \n", + "29 Disease NCIT 86 Disease \n", + "30 Disease SNOMEDCT 1237 Disease \n", + "31 PhenotypicFeature HP 10101 Pheno \n", + "32 PhenotypicFeature NCIT 1293 Pheno \n", + "33 PhenotypicFeature SNOMEDCT 88 Pheno \n", + "34 Procedure NCIT 113 Procedure \n", + "35 Disease MONDO 14415 Disease \n", + "36 Disease NCIT 96 Disease \n", + "37 Disease SNOMEDCT 1129 Disease \n", + "38 PhenotypicFeature HP 9919 Pheno \n", + "39 PhenotypicFeature NCIT 527 Pheno \n", + "40 PhenotypicFeature SNOMEDCT 94 Pheno \n", + "41 Procedure NCIT 69 Procedure \n", + "42 Disease MONDO 73 Disease \n", + "43 Disease SNOMEDCT 7 Disease \n", + "44 PhenotypicFeature HP 44 Pheno \n", + "45 PhenotypicFeature NCIT 9 Pheno \n", + "46 Procedure NCIT 1 Procedure \n", + "47 Disease MONDO 82 Disease \n", + "48 Disease SNOMEDCT 6 Disease \n", + "49 PhenotypicFeature HP 66 Pheno \n", + "50 PhenotypicFeature NCIT 1 Pheno \n", + "51 PhenotypicFeature SNOMEDCT 1 Pheno \n", + "52 Disease MONDO 1318 Disease \n", + "53 Disease NCIT 10 Disease \n", + "54 Disease SNOMEDCT 101 Disease \n", + "55 PhenotypicFeature HP 957 Pheno \n", + "56 PhenotypicFeature NCIT 116 Pheno \n", + "57 PhenotypicFeature SNOMEDCT 8 Pheno \n", + "58 Procedure NCIT 13 Procedure \n", + "59 Disease MONDO 1007 Disease \n", + "60 Disease NCIT 5 Disease \n", + "61 Disease SNOMEDCT 81 Disease \n", + "62 PhenotypicFeature HP 693 Pheno \n", + "63 PhenotypicFeature NCIT 34 Pheno \n", + "64 PhenotypicFeature SNOMEDCT 7 Pheno \n", + "65 Procedure NCIT 2 Procedure \n", + "66 Disease MONDO 18759 Disease \n", + "67 Disease NCIT 113 Disease \n", + "68 Disease SNOMEDCT 1532 Disease \n", + "69 PhenotypicFeature HP 11425 Pheno \n", + "70 PhenotypicFeature NCIT 1257 Pheno \n", + "71 PhenotypicFeature SNOMEDCT 121 Pheno \n", + "72 Procedure NCIT 132 Procedure \n", + "73 Disease MONDO 12086 Disease \n", + "74 Disease NCIT 86 Disease \n", + "75 Disease SNOMEDCT 1055 Disease \n", + "76 PhenotypicFeature HP 10355 Pheno \n", + "77 PhenotypicFeature NCIT 733 Pheno \n", + "78 PhenotypicFeature SNOMEDCT 78 Pheno \n", + "79 Procedure NCIT 67 Procedure \n", + "80 Disease MONDO 1170 Disease \n", + "81 Disease NCIT 7 Disease \n", + "82 Disease SNOMEDCT 94 Disease \n", + "83 PhenotypicFeature HP 705 Pheno \n", + "84 PhenotypicFeature NCIT 56 Pheno \n", + "85 PhenotypicFeature SNOMEDCT 9 Pheno \n", + "86 Procedure NCIT 7 Procedure \n", + "87 Disease MONDO 380 Disease \n", + "88 Disease NCIT 3 Disease \n", + "89 Disease SNOMEDCT 36 Disease \n", + "90 PhenotypicFeature HP 395 Pheno \n", + "91 PhenotypicFeature NCIT 34 Pheno \n", + "92 PhenotypicFeature SNOMEDCT 1 Pheno \n", + "93 Procedure NCIT 3 Procedure \n", + "94 Disease MONDO 307 Disease \n", + "95 Disease NCIT 1 Disease \n", + "96 Disease SNOMEDCT 26 Disease \n", + "97 PhenotypicFeature HP 217 Pheno \n", + "98 PhenotypicFeature NCIT 25 Pheno \n", + "99 PhenotypicFeature SNOMEDCT 2 Pheno \n", + "100 Procedure NCIT 2 Procedure \n", + "101 Disease MONDO 158 Disease \n", + "102 Disease NCIT 2 Disease \n", + "103 Disease SNOMEDCT 13 Disease \n", + "104 PhenotypicFeature HP 113 Pheno \n", + "105 PhenotypicFeature NCIT 5 Pheno \n", + "106 Procedure NCIT 1 Procedure \n", + "107 Disease MONDO 721 Disease \n", + "108 Disease NCIT 4 Disease \n", + "109 Disease SNOMEDCT 58 Disease \n", + "110 PhenotypicFeature HP 502 Pheno \n", + "111 PhenotypicFeature NCIT 56 Pheno \n", + "112 PhenotypicFeature SNOMEDCT 4 Pheno \n", + "113 Procedure NCIT 3 Procedure \n", + "114 Disease MONDO 209 Disease \n", + "115 Disease NCIT 2 Disease \n", + "116 Disease SNOMEDCT 20 Disease \n", + "117 PhenotypicFeature HP 158 Pheno \n", + "118 PhenotypicFeature NCIT 4 Pheno \n", + "119 PhenotypicFeature SNOMEDCT 2 Pheno \n", + "120 Procedure NCIT 2 Procedure \n", + "\n", + " subject_category_postfix_abbv joined_to_match_xbte \n", + "0 Chem ChemCHEBI_decreased_DiseaseMONDO \n", + "1 Chem ChemCHEBI_decreased_DiseaseNCIT \n", + "2 Chem ChemCHEBI_decreased_DiseaseSNOMEDCT \n", + "3 Chem ChemCHEBI_decreased_PhenoHP \n", + "4 Chem ChemCHEBI_decreased_PhenoNCIT \n", + "5 Chem ChemCHEBI_decreased_PhenoSNOMEDCT \n", + "6 Chem ChemCHEBI_decreased_ProcedureNCIT \n", + "7 Chem ChemCHEBI_increased_DiseaseMONDO \n", + "8 Chem ChemCHEBI_increased_DiseaseNCIT \n", + "9 Chem ChemCHEBI_increased_DiseaseSNOMEDCT \n", + "10 Chem ChemCHEBI_increased_PhenoHP \n", + "11 Chem ChemCHEBI_increased_PhenoNCIT \n", + "12 Chem ChemCHEBI_increased_PhenoSNOMEDCT \n", + "13 Chem ChemCHEBI_increased_ProcedureNCIT \n", + "14 Chem ChemUNII_decreased_DiseaseMONDO \n", + "15 Chem ChemUNII_decreased_DiseaseNCIT \n", + "16 Chem ChemUNII_decreased_DiseaseSNOMEDCT \n", + "17 Chem ChemUNII_decreased_PhenoHP \n", + "18 Chem ChemUNII_decreased_PhenoNCIT \n", + "19 Chem ChemUNII_decreased_PhenoSNOMEDCT \n", + "20 Chem ChemUNII_decreased_ProcedureNCIT \n", + "21 Chem ChemUNII_increased_DiseaseMONDO \n", + "22 Chem ChemUNII_increased_DiseaseNCIT \n", + "23 Chem ChemUNII_increased_DiseaseSNOMEDCT \n", + "24 Chem ChemUNII_increased_PhenoHP \n", + "25 Chem ChemUNII_increased_PhenoNCIT \n", + "26 Chem ChemUNII_increased_PhenoSNOMEDCT \n", + "27 Chem ChemUNII_increased_ProcedureNCIT \n", + "28 Disease DiseaseMONDO_decreased_DiseaseMONDO \n", + "29 Disease DiseaseMONDO_decreased_DiseaseNCIT \n", + "30 Disease DiseaseMONDO_decreased_DiseaseSNOMEDCT \n", + "31 Disease DiseaseMONDO_decreased_PhenoHP \n", + "32 Disease DiseaseMONDO_decreased_PhenoNCIT \n", + "33 Disease DiseaseMONDO_decreased_PhenoSNOMEDCT \n", + "34 Disease DiseaseMONDO_decreased_ProcedureNCIT \n", + "35 Disease DiseaseMONDO_increased_DiseaseMONDO \n", + "36 Disease DiseaseMONDO_increased_DiseaseNCIT \n", + "37 Disease DiseaseMONDO_increased_DiseaseSNOMEDCT \n", + "38 Disease DiseaseMONDO_increased_PhenoHP \n", + "39 Disease DiseaseMONDO_increased_PhenoNCIT \n", + "40 Disease DiseaseMONDO_increased_PhenoSNOMEDCT \n", + "41 Disease DiseaseMONDO_increased_ProcedureNCIT \n", + "42 Disease DiseaseNCIT_decreased_DiseaseMONDO \n", + "43 Disease DiseaseNCIT_decreased_DiseaseSNOMEDCT \n", + "44 Disease DiseaseNCIT_decreased_PhenoHP \n", + "45 Disease DiseaseNCIT_decreased_PhenoNCIT \n", + "46 Disease DiseaseNCIT_decreased_ProcedureNCIT \n", + "47 Disease DiseaseNCIT_increased_DiseaseMONDO \n", + "48 Disease DiseaseNCIT_increased_DiseaseSNOMEDCT \n", + "49 Disease DiseaseNCIT_increased_PhenoHP \n", + "50 Disease DiseaseNCIT_increased_PhenoNCIT \n", + "51 Disease DiseaseNCIT_increased_PhenoSNOMEDCT \n", + "52 Disease DiseaseSNOMEDCT_decreased_DiseaseMONDO \n", + "53 Disease DiseaseSNOMEDCT_decreased_DiseaseNCIT \n", + "54 Disease DiseaseSNOMEDCT_decreased_DiseaseSNOMEDCT \n", + "55 Disease DiseaseSNOMEDCT_decreased_PhenoHP \n", + "56 Disease DiseaseSNOMEDCT_decreased_PhenoNCIT \n", + "57 Disease DiseaseSNOMEDCT_decreased_PhenoSNOMEDCT \n", + "58 Disease DiseaseSNOMEDCT_decreased_ProcedureNCIT \n", + "59 Disease DiseaseSNOMEDCT_increased_DiseaseMONDO \n", + "60 Disease DiseaseSNOMEDCT_increased_DiseaseNCIT \n", + "61 Disease DiseaseSNOMEDCT_increased_DiseaseSNOMEDCT \n", + "62 Disease DiseaseSNOMEDCT_increased_PhenoHP \n", + "63 Disease DiseaseSNOMEDCT_increased_PhenoNCIT \n", + "64 Disease DiseaseSNOMEDCT_increased_PhenoSNOMEDCT \n", + "65 Disease DiseaseSNOMEDCT_increased_ProcedureNCIT \n", + "66 Pheno PhenoHP_decreased_DiseaseMONDO \n", + "67 Pheno PhenoHP_decreased_DiseaseNCIT \n", + "68 Pheno PhenoHP_decreased_DiseaseSNOMEDCT \n", + "69 Pheno PhenoHP_decreased_PhenoHP \n", + "70 Pheno PhenoHP_decreased_PhenoNCIT \n", + "71 Pheno PhenoHP_decreased_PhenoSNOMEDCT \n", + "72 Pheno PhenoHP_decreased_ProcedureNCIT \n", + "73 Pheno PhenoHP_increased_DiseaseMONDO \n", + "74 Pheno PhenoHP_increased_DiseaseNCIT \n", + "75 Pheno PhenoHP_increased_DiseaseSNOMEDCT \n", + "76 Pheno PhenoHP_increased_PhenoHP \n", + "77 Pheno PhenoHP_increased_PhenoNCIT \n", + "78 Pheno PhenoHP_increased_PhenoSNOMEDCT \n", + "79 Pheno PhenoHP_increased_ProcedureNCIT \n", + "80 Pheno PhenoNCIT_decreased_DiseaseMONDO \n", + "81 Pheno PhenoNCIT_decreased_DiseaseNCIT \n", + "82 Pheno PhenoNCIT_decreased_DiseaseSNOMEDCT \n", + "83 Pheno PhenoNCIT_decreased_PhenoHP \n", + "84 Pheno PhenoNCIT_decreased_PhenoNCIT \n", + "85 Pheno PhenoNCIT_decreased_PhenoSNOMEDCT \n", + "86 Pheno PhenoNCIT_decreased_ProcedureNCIT \n", + "87 Pheno PhenoNCIT_increased_DiseaseMONDO \n", + "88 Pheno PhenoNCIT_increased_DiseaseNCIT \n", + "89 Pheno PhenoNCIT_increased_DiseaseSNOMEDCT \n", + "90 Pheno PhenoNCIT_increased_PhenoHP \n", + "91 Pheno PhenoNCIT_increased_PhenoNCIT \n", + "92 Pheno PhenoNCIT_increased_PhenoSNOMEDCT \n", + "93 Pheno PhenoNCIT_increased_ProcedureNCIT \n", + "94 Pheno PhenoSNOMEDCT_decreased_DiseaseMONDO \n", + "95 Pheno PhenoSNOMEDCT_decreased_DiseaseNCIT \n", + "96 Pheno PhenoSNOMEDCT_decreased_DiseaseSNOMEDCT \n", + "97 Pheno PhenoSNOMEDCT_decreased_PhenoHP \n", + "98 Pheno PhenoSNOMEDCT_decreased_PhenoNCIT \n", + "99 Pheno PhenoSNOMEDCT_decreased_PhenoSNOMEDCT \n", + "100 Pheno PhenoSNOMEDCT_decreased_ProcedureNCIT \n", + "101 Pheno PhenoSNOMEDCT_increased_DiseaseMONDO \n", + "102 Pheno PhenoSNOMEDCT_increased_DiseaseNCIT \n", + "103 Pheno PhenoSNOMEDCT_increased_DiseaseSNOMEDCT \n", + "104 Pheno PhenoSNOMEDCT_increased_PhenoHP \n", + "105 Pheno PhenoSNOMEDCT_increased_PhenoNCIT \n", + "106 Pheno PhenoSNOMEDCT_increased_ProcedureNCIT \n", + "107 Procedure ProcedureNCIT_decreased_DiseaseMONDO \n", + "108 Procedure ProcedureNCIT_decreased_DiseaseNCIT \n", + "109 Procedure ProcedureNCIT_decreased_DiseaseSNOMEDCT \n", + "110 Procedure ProcedureNCIT_decreased_PhenoHP \n", + "111 Procedure ProcedureNCIT_decreased_PhenoNCIT \n", + "112 Procedure ProcedureNCIT_decreased_PhenoSNOMEDCT \n", + "113 Procedure ProcedureNCIT_decreased_ProcedureNCIT \n", + "114 Procedure ProcedureNCIT_increased_DiseaseMONDO \n", + "115 Procedure ProcedureNCIT_increased_DiseaseNCIT \n", + "116 Procedure ProcedureNCIT_increased_DiseaseSNOMEDCT \n", + "117 Procedure ProcedureNCIT_increased_PhenoHP \n", + "118 Procedure ProcedureNCIT_increased_PhenoNCIT \n", + "119 Procedure ProcedureNCIT_increased_PhenoSNOMEDCT \n", + "120 Procedure ProcedureNCIT_increased_ProcedureNCIT " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# get a list of meta-triples for this KP: make it look like what BTE wants as closely as possible (see yaml)\n", + "# X_BTE KNOWN OPERATIONS TO ADD\n", + "# unique combos of subject-prefix / subject-category / predicate / object-prefix / object-category \n", + "# (and qualifier-set, if applicable)\n", + "\n", + "# Use the grouped[\"joined_to_match_xbte\"] column made below to create the tags and x-bte-kgs-operations portions in ehr_risk_kg.yaml\n", + " \n", + "meta_triples = kg.copy()\n", + "\n", + "meta_triples[\"subject_prefix\"] = meta_triples[\"subject\"].str.split(\":\").str[0]\n", + "meta_triples[\"object_prefix\"] = meta_triples[\"object\"].str.split(\":\").str[0]\n", + "\n", + "meta_triples[\"subject_category_postfix\"] = meta_triples[\"subject_category\"].str.split(\":\").str[1]\n", + "meta_triples[\"object_category_postfix\"] = meta_triples[\"object_category\"].str.split(\":\").str[1]\n", + "\n", + "meta_triples['increased_or_decreased'] = None\n", + "meta_triples.loc[meta_triples['predicate'].str.contains('increased', case=False, na=False), 'increased_or_decreased'] = 'increased'\n", + "meta_triples.loc[meta_triples['predicate'].str.contains('decreased', case=False, na=False), 'increased_or_decreased'] = 'decreased'\n", + "\n", + "# unique combos of subject-prefix / subject-category / predicate / object-prefix / object-category\n", + "\n", + "# Define the columns you want to combine\n", + "columns_to_combine = ['subject_category_postfix', 'subject_prefix', 'increased_or_decreased', 'object_category_postfix', 'object_prefix']\n", + "\n", + "# Group by the combination of columns and count the occurrences\n", + "grouped = meta_triples.groupby(columns_to_combine).size().reset_index(name='Count')\n", + "grouped['object_category_postfix_abbv'] = grouped['object_category_postfix'].str.replace('PhenotypicFeature', 'Pheno')\n", + "grouped['subject_category_postfix_abbv'] = grouped['subject_category_postfix'].str.replace('PhenotypicFeature', 'Pheno')\n", + "\n", + "grouped['object_category_postfix_abbv'] = grouped['object_category_postfix_abbv'].str.replace('ChemicalEntity', 'Chem')\n", + "grouped['subject_category_postfix_abbv'] = grouped['subject_category_postfix_abbv'].str.replace('ChemicalEntity', 'Chem')\n", + "\n", + "grouped['joined_to_match_xbte'] = grouped['subject_category_postfix_abbv'].astype(str) + grouped['subject_prefix'].astype(str) + '_' + grouped['increased_or_decreased'].astype(str) + '_' + grouped['object_category_postfix_abbv'].astype(str) + grouped['object_prefix'].astype(str)\n", + "\n", + "# grouped.to_csv('yaml_all_metatriples.tsv', sep=\"\\t\")\n", + "# # Print the DataFrame with unique combinations and their counts\n", + "\n", + "with pd.option_context(\"display.max_rows\", 30000):\n", + " display(grouped)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 335, + "id": "ac11f711", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectpredicateobjectrelationprovided_byprovided_datepredicate_categoryclassifierauc_rocp_value...num_patients_without_conditionnodes_frozensetsubject_idsubject_namesubject_categoryobject_idobject_nameobject_categorysubject_prefixobject_prefix
0HP:0008629biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.000000...9999902(HP:0000360, HP:0008629)HP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHP:0000360Tinnitusbiolink:PhenotypicFeatureHPHP
1MONDO:0010643biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.999872...10000835(HP:0000360, MONDO:0010643)MONDO:0010643acute leukemia (disease)biolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeatureMONDOHP
2UNII:25ADE2236Lbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.936767...10000939(UNII:25ADE2236L, HP:0000360)UNII:25ADE2236Lthrombinbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeatureUNIIHP
3UNII:K16AIQ8CTMbiolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.998563...9998659(HP:0000360, UNII:K16AIQ8CTM)UNII:K16AIQ8CTMpertuzumabbiolink:ChemicalEntityHP:0000360Tinnitusbiolink:PhenotypicFeatureUNIIHP
4MONDO:0007972biolink:associated_with_increased_likelihood_ofHP:0000360RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.8401320.093959...9998750(MONDO:0007972, HP:0000360)MONDO:0007972Meniere diseasebiolink:DiseaseHP:0000360Tinnitusbiolink:PhenotypicFeatureMONDOHP
..................................................................
237099CHEBI:114785biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998503...10007299(HP:0008629, CHEBI:114785)CHEBI:114785erlotinibbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureCHEBIHP
237100UNII:52CMI0WC3Ybiolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.999719...9996273(UNII:52CMI0WC3Y, HP:0008629)UNII:52CMI0WC3Yatezolizumabbiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureUNIIHP
237101CHEBI:135738biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.998357...9998030(HP:0008629, CHEBI:135738)CHEBI:135738clevidipinebiolink:ChemicalEntityHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureCHEBIHP
237102MONDO:0004967biolink:associated_with_decreased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.997631...10001385(HP:0008629, MONDO:0004967)MONDO:0004967acute lymphoblastic leukemia (disease)biolink:DiseaseHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureMONDOHP
237103HP:0000360biolink:associated_with_increased_likelihood_ofHP:0008629RO:0003308EHR Risk Provider (Multiomics)2022-05-18biolink:AssociationLogistic Regression0.9597910.000000...9997731(HP:0000360, HP:0008629)HP:0000360Tinnitusbiolink:PhenotypicFeatureHP:0008629Pulsatile tinnitusbiolink:PhenotypicFeatureHPHP
\n", + "

236812 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " subject predicate \\\n", + "0 HP:0008629 biolink:associated_with_increased_likelihood_of \n", + "1 MONDO:0010643 biolink:associated_with_increased_likelihood_of \n", + "2 UNII:25ADE2236L biolink:associated_with_increased_likelihood_of \n", + "3 UNII:K16AIQ8CTM biolink:associated_with_increased_likelihood_of \n", + "4 MONDO:0007972 biolink:associated_with_increased_likelihood_of \n", + "... ... ... \n", + "237099 CHEBI:114785 biolink:associated_with_decreased_likelihood_of \n", + "237100 UNII:52CMI0WC3Y biolink:associated_with_decreased_likelihood_of \n", + "237101 CHEBI:135738 biolink:associated_with_decreased_likelihood_of \n", + "237102 MONDO:0004967 biolink:associated_with_decreased_likelihood_of \n", + "237103 HP:0000360 biolink:associated_with_increased_likelihood_of \n", + "\n", + " object relation provided_by provided_date \\\n", + "0 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "1 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "2 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "3 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "4 HP:0000360 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "... ... ... ... ... \n", + "237099 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237100 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237101 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237102 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "237103 HP:0008629 RO:0003308 EHR Risk Provider (Multiomics) 2022-05-18 \n", + "\n", + " predicate_category classifier auc_roc p_value ... \\\n", + "0 biolink:Association Logistic Regression 0.840132 0.000000 ... \n", + "1 biolink:Association Logistic Regression 0.840132 0.999872 ... \n", + "2 biolink:Association Logistic Regression 0.840132 0.936767 ... \n", + "3 biolink:Association Logistic Regression 0.840132 0.998563 ... \n", + "4 biolink:Association Logistic Regression 0.840132 0.093959 ... \n", + "... ... ... ... ... ... \n", + "237099 biolink:Association Logistic Regression 0.959791 0.998503 ... \n", + "237100 biolink:Association Logistic Regression 0.959791 0.999719 ... \n", + "237101 biolink:Association Logistic Regression 0.959791 0.998357 ... \n", + "237102 biolink:Association Logistic Regression 0.959791 0.997631 ... \n", + "237103 biolink:Association Logistic Regression 0.959791 0.000000 ... \n", + "\n", + " num_patients_without_condition nodes_frozenset \\\n", + "0 9999902 (HP:0000360, HP:0008629) \n", + "1 10000835 (HP:0000360, MONDO:0010643) \n", + "2 10000939 (UNII:25ADE2236L, HP:0000360) \n", + "3 9998659 (HP:0000360, UNII:K16AIQ8CTM) \n", + "4 9998750 (MONDO:0007972, HP:0000360) \n", + "... ... ... \n", + "237099 10007299 (HP:0008629, CHEBI:114785) \n", + "237100 9996273 (UNII:52CMI0WC3Y, HP:0008629) \n", + "237101 9998030 (HP:0008629, CHEBI:135738) \n", + "237102 10001385 (HP:0008629, MONDO:0004967) \n", + "237103 9997731 (HP:0000360, HP:0008629) \n", + "\n", + " subject_id subject_name \\\n", + "0 HP:0008629 Pulsatile tinnitus \n", + "1 MONDO:0010643 acute leukemia (disease) \n", + "2 UNII:25ADE2236L thrombin \n", + "3 UNII:K16AIQ8CTM pertuzumab \n", + "4 MONDO:0007972 Meniere disease \n", + "... ... ... \n", + "237099 CHEBI:114785 erlotinib \n", + "237100 UNII:52CMI0WC3Y atezolizumab \n", + "237101 CHEBI:135738 clevidipine \n", + "237102 MONDO:0004967 acute lymphoblastic leukemia (disease) \n", + "237103 HP:0000360 Tinnitus \n", + "\n", + " subject_category object_id object_name \\\n", + "0 biolink:PhenotypicFeature HP:0000360 Tinnitus \n", + "1 biolink:Disease HP:0000360 Tinnitus \n", + "2 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "3 biolink:ChemicalEntity HP:0000360 Tinnitus \n", + "4 biolink:Disease HP:0000360 Tinnitus \n", + "... ... ... ... \n", + "237099 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237100 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237101 biolink:ChemicalEntity HP:0008629 Pulsatile tinnitus \n", + "237102 biolink:Disease HP:0008629 Pulsatile tinnitus \n", + "237103 biolink:PhenotypicFeature HP:0008629 Pulsatile tinnitus \n", + "\n", + " object_category subject_prefix object_prefix \n", + "0 biolink:PhenotypicFeature HP HP \n", + "1 biolink:PhenotypicFeature MONDO HP \n", + "2 biolink:PhenotypicFeature UNII HP \n", + "3 biolink:PhenotypicFeature UNII HP \n", + "4 biolink:PhenotypicFeature MONDO HP \n", + "... ... ... ... \n", + "237099 biolink:PhenotypicFeature CHEBI HP \n", + "237100 biolink:PhenotypicFeature UNII HP \n", + "237101 biolink:PhenotypicFeature CHEBI HP \n", + "237102 biolink:PhenotypicFeature MONDO HP \n", + "237103 biolink:PhenotypicFeature HP HP \n", + "\n", + "[236812 rows x 23 columns]" + ] + }, + "execution_count": 335, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# need this (dataframe called kg_yaml) to lookup examples for testExamples (qInput and oneOutput) portion of yaml\n", + "# additionally need this generate portion of x-bte-response-mapping\n", + "\n", + "kg_yaml = kg.copy()\n", + "\n", + "kg_yaml[\"subject_prefix\"] = kg_yaml[\"subject\"].str.split(\":\").str[0]\n", + "kg_yaml[\"object_prefix\"] = kg_yaml[\"object\"].str.split(\":\").str[0]\n", + "\n", + "kg_yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 339, + "id": "f5c776cc", + "metadata": {}, + "outputs": [], + "source": [ + "# the following code writes the tags and x-bte-kgs-operations of ehr_risk_kg.yaml to an output file titled 'x-bte-kgs-operations-response-mapping.txt'\n", + "# copy paste the output to the relevant section in the yaml\n", + "with open('x-bte-kgs-operations-response-mapping.txt', 'w+') as file:\n", + " file.write(\" tags:\" + \"\\n\")\n", + " file.write(\" - query:\" + \"\\n\")\n", + " file.write(f\" ## {len(grouped)} operations based on TSV of KG\" + \"\\n\")\n", + " for index, row in grouped.iterrows():\n", + " file.write(f\" ## - {row['joined_to_match_xbte']}\" + \"\\n\")\n", + " file.write(\" x-bte-kgs-operations\" + \"\\n\")\n", + " for index, row in grouped.iterrows(): \n", + " file.write(f\" - $ref: '#/components/x-bte-kgs-operations/{row['joined_to_match_xbte']}'\" + \"\\n\")\n", + " file.write(f\" - $ref: '#/components/x-bte-kgs-operations/{row['joined_to_match_xbte']}-rev'\" + \"\\n\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 340, + "id": "232ad5c1", + "metadata": {}, + "outputs": [], + "source": [ + "# the following code writes the x-bte-kgs-operations of ehr_risk_kg.yaml to an output file titled 'x-bte-kgs-operations-response-mapping.txt'\n", + "# copy paste the output to the relevant section in the yaml\n", + "\n", + "with open('x-bte-kgs-operations-response-mapping.txt', 'a') as file:\n", + " file.write(\" \" + \"x-bte-kgs-operations:\" + \"\\n\")\n", + " for index, row in grouped.iterrows():\n", + " xbte_header = f'{row[\"joined_to_match_xbte\"]}:'\n", + " file.write(\" \" + xbte_header + \"\\n\")\n", + " \n", + " base_url = \"https://pending.biothings.io/multiomics_ehr_risk_kp/query?q=\"\n", + " url = base_url + f'''subject.type:\"biolink:{row['subject_category_postfix']}\"%20AND%20_exists_:subject.\"biolink:{row['subject_prefix']}\"%20AND%20association.predicate:associated_with_decreased_likelihood_of%20AND%20object.type:\"biolink:{row['object_category_postfix']}\"%20AND%20_exists_:object.{row['object_prefix']}'''\n", + " commented_url = \"## \" + url\n", + " file.write(\" \" + commented_url + \"\\n\")\n", + " \n", + " file.write(\" ## \" + str(row[\"Count\"]) + \" records\" + \"\\n\")\n", + " \n", + " file.write(\" -supportBatch: true\" + \"\\n\")\n", + " file.write(\" useTemplating: true\" + \"\\n\")\n", + " file.write(\" inputs:\" + \"\\n\")\n", + " \n", + " file.write(f\" - id: {row['subject_prefix']}\" + \"\\n\")\n", + " file.write(f\" - semantic: {row['subject_category_postfix']}\" + \"\\n\")\n", + " \n", + " file.write(\" requestBody:\" + \"\\n\")\n", + " file.write(\" body: >-\" + \"\\n\")\n", + " \n", + " file.write(\" {\" + \"\\n\")\n", + " q_string = f\"\"\" \"q\": [ {{ queryInputs | rmPrefix() | wrap ( '[\"' , '\",\"biolink:{row['subject_category_postfix']}\", \"associated_with_{row['increased_or_decreased']}_likelihood_of\", \"biolink:{row['object_category_postfix']}\"]') }} ]\"\"\"\n", + " file.write(' ' + q_string + \"\\n\") \n", + " scopes_string = f\"\"\" \"scopes\": [\"subject.{row['subject_prefix']}\", \"subject.type\", \"association.predicate\", \"object.type\"]\"\"\"\n", + " file.write(' ' + scopes_string + \"\\n\") \n", + " file.write(\" }\" + \"\\n\")\n", + " \n", + " file.write(\" parameters:\" + \"\\n\")\n", + " file.write(f\" fields: object.{row['object_prefix']}, association.edge_attributes, subject.name, object.name, source.edge_sources\" + \"\\n\")\n", + " file.write(f\" size: 1000\" + \"\\n\")\n", + " \n", + " file.write(\" outputs:\" + \"\\n\")\n", + " file.write(f\" -id: {row['object_prefix']}\" + \"\\n\")\n", + " file.write(f\" semantic: {row['object_category_postfix']}\" + \"\\n\")\n", + " \n", + " file.write(\" predicate: associated_with\" + \"\\n\")\n", + " file.write(\" qualifiers:\" + \"\\n\")\n", + " file.write(f\" object_direction_qualifier: {row['increased_or_decreased']}\" + \"\\n\")\n", + " file.write(f\" object_aspect_qualifier: likelihood\" + \"\\n\")\n", + " \n", + " file.write(\" response_mapping:\" + \"\\n\")\n", + " file.write(f' \"ref\": \"#/components/x-bte-response-mapping/object-{row[\"object_prefix\"]}\"' + \"\\n\")\n", + " \n", + " file.write(\" # testExamples:\" + \"\\n\")\n", + " sub_prefix = row[\"subject_prefix\"]\n", + " obj_prefix = row[\"object_prefix\"]\n", + " matched_rows = kg_yaml.loc[(kg_yaml['subject_prefix'] == sub_prefix) & (kg_yaml['object_prefix'] == obj_prefix)]\n", + " select_row = matched_rows.sample()\n", + " qInput = select_row['subject'].values[0]\n", + " qName = select_row['subject_name'].values[0]\n", + " oneOutput = select_row['object'].values[0]\n", + " oneOutputName = select_row['object_name'].values[0]\n", + " file.write(f' # - qInput: \"{qInput}\" ## {qName} ' + \"\\n\")\n", + " file.write(f' # oneOutput: \"{oneOutput}\" ## {oneOutputName} ' + \"\\n\")\n", + " \n", + " #### DO REVERSE #### \n", + " \n", + " xbte_header_rev = f'{row[\"joined_to_match_xbte\"]}_rev:'\n", + " file.write(\" \" + xbte_header_rev + \"\\n\")\n", + "\n", + " file.write(\" -supportBatch: true\" + \"\\n\")\n", + " file.write(\" useTemplating: true\" + \"\\n\")\n", + " file.write(\" inputs:\" + \"\\n\")\n", + " \n", + " file.write(f\" - id: {row['object_prefix']}\" + \"\\n\")\n", + " file.write(f\" - semantic: {row['object_category_postfix']}\" + \"\\n\")\n", + " \n", + " file.write(\" requestBody:\" + \"\\n\")\n", + " file.write(\" body: >-\" + \"\\n\")\n", + " \n", + " file.write(\" {\" + \"\\n\")\n", + " q_string = f\"\"\" \"q\": [ {{ queryInputs | rmPrefix() | wrap ( '[\"' , '\",\"biolink:{row['subject_category_postfix']}\", \"associated_with_{row['increased_or_decreased']}_likelihood_of\", \"biolink:{row['object_category_postfix']}\"]') }} ]\"\"\"\n", + " file.write(' ' + q_string + \"\\n\") \n", + " scopes_string = f\"\"\" \"scopes\": [\"object.{row['object_prefix']}\", \"subject.type\", \"association.predicate\", \"object.type\"]\"\"\"\n", + " file.write(' ' + scopes_string + \"\\n\") \n", + " file.write(\" }\" + \"\\n\")\n", + " \n", + " file.write(\" parameters:\" + \"\\n\")\n", + " file.write(f\" fields: subject.{row['subject_prefix']}, association.edge_attributes, subject.name, object.name, source.edge_sources\" + \"\\n\")\n", + " file.write(f\" size: 1000\" + \"\\n\")\n", + " \n", + " file.write(\" outputs:\" + \"\\n\")\n", + " file.write(f\" -id: {row['subject_prefix']}\" + \"\\n\")\n", + " file.write(f\" semantic: {row['subject_category_postfix']}\" + \"\\n\")\n", + " \n", + " file.write(\" predicate: associated_with\" + \"\\n\")\n", + " file.write(\" qualifiers:\" + \"\\n\")\n", + " file.write(f\" object_direction_qualifier: {row['increased_or_decreased']}\" + \"\\n\")\n", + " file.write(f\" object_aspect_qualifier: likelihood\" + \"\\n\")\n", + " \n", + " file.write(\" response_mapping:\" + \"\\n\")\n", + " file.write(f' \"ref\": \"#/components/x-bte-response-mapping/object-{row[\"subject_prefix\"]}\"' + \"\\n\")\n", + " \n", + " file.write(\" # testExamples:\" + \"\\n\")\n", + " sub_prefix = row[\"subject_prefix\"]\n", + " obj_prefix = row[\"object_prefix\"]\n", + " matched_rows = kg_yaml.loc[(kg_yaml['subject_prefix'] == sub_prefix) & (kg_yaml['object_prefix'] == obj_prefix)]\n", + " select_row = matched_rows.sample()\n", + " qInput = select_row['object'].values[0]\n", + " qName = select_row['object_name'].values[0]\n", + " oneOutput = select_row['subject'].values[0]\n", + " oneOutputName = select_row['subject_name'].values[0]\n", + " file.write(f' # - qInput: \"{qInput}\" ## {qName} ' + \"\\n\")\n", + " file.write(f' # oneOutput: \"{oneOutput}\" ## {oneOutputName} ' + \"\\n\")\n", + " \n", + " \n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bbc605f", + "metadata": {}, + "outputs": [], + "source": [ + "Output example for x-bte-kgs-operations\n", + "\n", + " ChemCHEBI_decreased_PhenoNCIT:\n", + " ## https://pending.biothings.io/multiomics_ehr_risk_kp/query?q=subject.type:ChemicalEntity%20AND%20_exists_:subject.CHEBI%20AND%20association.predicate:associated_with_decreased_likelihood_of%20AND%20object.type:PhenotypicFeature%20AND%20_exists_:object.NCIT\n", + " ## 191 records\n", + " - supportBatch: true\n", + " useTemplating: true \n", + " inputs:\n", + " - id: CHEBI\n", + " semantic: SmallMolecule\n", + " requestBodyType: object\n", + " requestBody:\n", + " body: >-\n", + " {\n", + " \"q\": [ {{ queryInputs | rmPrefix() | wrap ( '[\"' , '\",\"biolink:ChemicalEntity\", \"associated_with_decreased_likelihood_of\", \"biolink:PhenotypicFeature\"]') }} ],\n", + " \"scopes\": [\"subject.CHEBI\", \"subject.type\", \"association.predicate\", \"object.type\"]\n", + " }\n", + " parameters:\n", + " fields: >-\n", + " object.NCIT,\n", + " association.edge_attributes,\n", + " subject.name,object.name\n", + " size: 1000\n", + " outputs:\n", + " - id: NCIT\n", + " semantic: PhenotypicFeature\n", + " predicate: associated_with\n", + " qualifiers:\n", + " object_direction_qualifier: decreased\n", + " object_aspect_qualifier: likelihood\n", + " response_mapping:\n", + " \"$ref\": \"#/components/x-bte-response-mapping/object-NCIT\"\n", + " # testExamples:\n", + " # - qInput: \"CHEBI:135866\" ## clindamycin\n", + " # oneOutput: \"NCIT:C171647\" ## SARS Coronavirus 2 Positive\n", + " ChemCHEBI_decreased_PhenoNCIT-rev:\n", + " - supportBatch: true\n", + " useTemplating: true \n", + " inputs:\n", + " - id: NCIT\n", + " semantic: PhenotypicFeature\n", + " requestBodyType: object\n", + " requestBody:\n", + " body: >-\n", + " {\n", + " \"q\": [ {{ queryInputs | rmPrefix() | wrap( '[\"' , '\",\"biolink:ChemicalEntity\", \"associated_with_decreased_likelihood_of\", \"biolink:PhenotypicFeature\"]') }} ],\n", + " \"scopes\": [\"object.NCIT\", \"subject.type\", \"association.predicate\", \"object.type\"]\n", + " }\n", + " parameters:\n", + " fields: >-\n", + " subject.CHEBI,\n", + " association.edge_attributes,\n", + " subject.name,object.name\n", + " size: 1000\n", + " outputs:\n", + " - id: CHEBI\n", + " semantic: SmallMolecule\n", + " predicate: associated_with\n", + " qualifiers:\n", + " subject_direction_qualifier: decreased\n", + " subject_aspect_qualifier: likelihood\n", + " response_mapping:\n", + " \"$ref\": \"#/components/x-bte-response-mapping/subject-CHEBI\"\n", + " # testExamples:\n", + " # - qInput: \"NCIT:C171647\" ## SARS Coronavirus 2 Positive\n", + " # oneOutput: \"CHEBI:119915\" ## fentanyl" + ] + }, + { + "cell_type": "code", + "execution_count": 341, + "id": "d834fe3e", + "metadata": {}, + "outputs": [], + "source": [ + "# the following code writes the x-bte-response-maping of ehr_risk_kg.yaml to an output file titled 'x-bte-kgs-operations-response-mapping.txt'\n", + "# copy paste the output to the relevant section in the yaml\n", + "with open('x-bte-kgs-operations-response-mapping.txt', 'a') as file:\n", + " file.write(\" \" + \"x-bte-response-mapping:\" + \"\\n\")\n", + " for sprefix in list(kg_yaml['subject_prefix'].unique()):\n", + " file.write(f\" subject-{sprefix}:\" + \"\\n\")\n", + " file.write(f\" {sprefix}: subject.{sprefix}\" + \"\\n\")\n", + " file.write(f\" input_name: subject.name\" + \"\\n\")\n", + " file.write(f\" output_name: object.name\" + \"\\n\")\n", + " file.write(f\" edge-attributes: sources.edge_attributes\" + \"\\n\")\n", + " file.write(f\" trapi_sources: source.edge_sources\" + \"\\n\")\n", + " for oprefix in list(kg_yaml['object_prefix'].unique()):\n", + " file.write(f\" object-{oprefix}:\" + \"\\n\")\n", + " file.write(f\" {oprefix}: object.{oprefix}\" + \"\\n\")\n", + " file.write(f\" input_name: object.name\" + \"\\n\")\n", + " file.write(f\" output_name: subject.name\" + \"\\n\")\n", + " file.write(f\" edge-attributes: sources.edge_attributes\" + \"\\n\")\n", + " file.write(f\" trapi_sources: source.edge_sources\" + \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 325, + "id": "30781582", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subjectobjectpredicatecount
111072HP:0001974NCIT:C167118biolink:associated_with_decreased_likelihood_of4
211014NCIT:C167118HP:0001974biolink:associated_with_decreased_likelihood_of4
117058HP:0002900NCIT:C167118biolink:associated_with_increased_likelihood_of4
122579HP:0003124NCIT:C167118biolink:associated_with_decreased_likelihood_of4
117810HP:0002902NCIT:C167118biolink:associated_with_increased_likelihood_of3
...............
78133CHEBI:77431MONDO:0004721biolink:associated_with_decreased_likelihood_of1
78134CHEBI:77431MONDO:0004866biolink:associated_with_decreased_likelihood_of1
78135CHEBI:77431MONDO:0004868biolink:associated_with_increased_likelihood_of1
78136CHEBI:77431MONDO:0004946biolink:associated_with_increased_likelihood_of1
234248UNII:X85G7936GVSNOMEDCT:76571007biolink:associated_with_decreased_likelihood_of1
\n", + "

234249 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " subject object \\\n", + "111072 HP:0001974 NCIT:C167118 \n", + "211014 NCIT:C167118 HP:0001974 \n", + "117058 HP:0002900 NCIT:C167118 \n", + "122579 HP:0003124 NCIT:C167118 \n", + "117810 HP:0002902 NCIT:C167118 \n", + "... ... ... \n", + "78133 CHEBI:77431 MONDO:0004721 \n", + "78134 CHEBI:77431 MONDO:0004866 \n", + "78135 CHEBI:77431 MONDO:0004868 \n", + "78136 CHEBI:77431 MONDO:0004946 \n", + "234248 UNII:X85G7936GV SNOMEDCT:76571007 \n", + "\n", + " predicate count \n", + "111072 biolink:associated_with_decreased_likelihood_of 4 \n", + "211014 biolink:associated_with_decreased_likelihood_of 4 \n", + "117058 biolink:associated_with_increased_likelihood_of 4 \n", + "122579 biolink:associated_with_decreased_likelihood_of 4 \n", + "117810 biolink:associated_with_increased_likelihood_of 3 \n", + "... ... ... \n", + "78133 biolink:associated_with_decreased_likelihood_of 1 \n", + "78134 biolink:associated_with_decreased_likelihood_of 1 \n", + "78135 biolink:associated_with_increased_likelihood_of 1 \n", + "78136 biolink:associated_with_increased_likelihood_of 1 \n", + "234248 biolink:associated_with_decreased_likelihood_of 1 \n", + "\n", + "[234249 rows x 4 columns]" + ] + }, + "execution_count": 325, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# # check to see how many identical triples there are in KG (requested by BTE)\n", + "# identical_triples_check = kg.groupby(['subject','object', 'predicate']).size().reset_index().rename(columns={0:'count'}).sort_values(by=['count'], ascending=False)\n", + "# identical_triples_check" + ] + }, + { + "cell_type": "markdown", + "id": "647beb4b", + "metadata": {}, + "source": [ + "# Make a cleaned version of the parser below" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "9a68e6a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"_id\": \"HP:0008629_HP:0000360_0.8401321539277617_00_8796399245685702_10102731\",\n", + " \"subject\": {\n", + " \"HP\": \"0008629\",\n", + " \"id\": \"HP:0008629\",\n", + " \"name\": \"Pulsatile tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 8.796399245685702,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10102731,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0010643_HP:0000360_0.8401321539277617_09998721067797812_8585212287149526_10107468\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0010643\",\n", + " \"id\": \"MONDO:0010643\",\n", + " \"name\": \"acute leukemia (disease)\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9998721067797812,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 8.585212287149526,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10107468,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"UNII:25ADE2236L_HP:0000360_0.8401321539277617_09367666401584368_4558176672832635_10095297\",\n", + " \"subject\": {\n", + " \"UNII\": \"25ADE2236L\",\n", + " \"id\": \"UNII:25ADE2236L\",\n", + " \"name\": \"thrombin\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9367666401584368,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 4.558176672832635,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10095297,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"UNII:K16AIQ8CTM_HP:0000360_0.8401321539277617_09985626800193924_43575215395209606_10099409\",\n", + " \"subject\": {\n", + " \"UNII\": \"K16AIQ8CTM\",\n", + " \"id\": \"UNII:K16AIQ8CTM\",\n", + " \"name\": \"pertuzumab\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9985626800193924,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 4.3575215395209606,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099409,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0007972_HP:0000360_0.8401321539277617_009395878968875304_392606416950393_10100235\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0007972\",\n", + " \"id\": \"MONDO:0007972\",\n", + " \"name\": \"Meniere disease\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.09395878968875304,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 3.92606416950393,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10100235,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0004866_HP:0000360_0.8401321539277617_00_30228399397470613_10094256\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0004866\",\n", + " \"id\": \"MONDO:0004866\",\n", + " \"name\": \"eustachian tube disease\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 3.0228399397470613,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10094256,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"HP:0002321_HP:0000360_0.8401321539277617_00_297792187563902_10105748\",\n", + " \"subject\": {\n", + " \"HP\": \"0002321\",\n", + " \"id\": \"HP:0002321\",\n", + " \"name\": \"Vertigo\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.97792187563902,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10105748,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"HP:0011897_HP:0000360_0.8401321539277617_09882157516627652_29664217619814317_10103231\",\n", + " \"subject\": {\n", + " \"HP\": \"0011897\",\n", + " \"id\": \"HP:0011897\",\n", + " \"name\": \"Neutrophilia\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9882157516627652,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.9664217619814317,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10103231,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:3403_HP:0000360_0.8401321539277617_07343407676622777_26320323984271305_10094497\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"3403\",\n", + " \"id\": \"CHEBI:3403\",\n", + " \"name\": \"carboprost\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.7343407676622777,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.6320323984271305,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10094497,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:42758_HP:0000360_0.8401321539277617_041935921163506107_25903432895291627_10090857\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"42758\",\n", + " \"id\": \"CHEBI:42758\",\n", + " \"name\": \"dextrose\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.41935921163506107,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.5903432895291627,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10090857,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"HP:0030788_HP:0000360_0.8401321539277617_00_2533460251458441_10098414\",\n", + " \"subject\": {\n", + " \"HP\": \"0030788\",\n", + " \"id\": \"HP:0030788\",\n", + " \"name\": \"Impacted cerumen\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.533460251458441,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10098414,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0012883_HP:0000360_0.8401321539277617_099451727004904_23595369689943997_10098573\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0012883\",\n", + " \"id\": \"MONDO:0012883\",\n", + " \"name\": \"acute promyelocytic leukemia\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.99451727004904,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.3595369689943997,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10098573,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:15882_HP:0000360_0.8401321539277617_09962946928940472_21934060641992064_10099906\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"15882\",\n", + " \"id\": \"CHEBI:15882\",\n", + " \"name\": \"phenol\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9962946928940472,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 2.1934060641992064,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099906,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0019065_HP:0000360_0.8401321539277617_09965366860113084_1909472317873171_10094912\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0019065\",\n", + " \"id\": \"MONDO:0019065\",\n", + " \"name\": \"amyloidosis (disease)\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9965366860113084,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.909472317873171,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10094912,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"UNII:1RXS4UE564_HP:0000360_0.8401321539277617_09145182816982448_17682389618102223_10099640\",\n", + " \"subject\": {\n", + " \"UNII\": \"1RXS4UE564\",\n", + " \"id\": \"UNII:1RXS4UE564\",\n", + " \"name\": \"alteplase\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9145182816982448,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.7682389618102223,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099640,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:2637_HP:0000360_0.8401321539277617_09984753422140926_17273309005514552_10097751\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"2637\",\n", + " \"id\": \"CHEBI:2637\",\n", + " \"name\": \"amikacin\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9984753422140926,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.7273309005514552,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10097751,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:28001_HP:0000360_0.8401321539277617_0327274960009581_17001729126903409_10099157\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"28001\",\n", + " \"id\": \"CHEBI:28001\",\n", + " \"name\": \"vancomycin\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.327274960009581,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.7001729126903409,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099157,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:32142_HP:0000360_0.8401321539277617_08551368959397363_16756681953392343_10100133\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"32142\",\n", + " \"id\": \"CHEBI:32142\",\n", + " \"name\": \"sodium citrate\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.8551368959397363,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.6756681953392343,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10100133,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:5280_HP:0000360_0.8401321539277617_09843217708630094_16666971285058136_10101665\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"5280\",\n", + " \"id\": \"CHEBI:5280\",\n", + " \"name\": \"gatifloxacin\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9843217708630094,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.6666971285058136,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10101665,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:31705_HP:0000360_0.8401321539277617_09099121721955136_16359973452904688_10097430\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"31705\",\n", + " \"id\": \"CHEBI:31705\",\n", + " \"name\": \"iodixanol\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9099121721955136,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.6359973452904688,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10097430,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"HP:0002315_HP:0000360_0.8401321539277617_00_16166497779176117_10105844\",\n", + " \"subject\": {\n", + " \"HP\": \"0002315\",\n", + " \"id\": \"HP:0002315\",\n", + " \"name\": \"Headache\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.6166497779176117,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10105844,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:6709_HP:0000360_0.8401321539277617_9059419880941276e-14_1598480857062902_10100577\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"6709\",\n", + " \"id\": \"CHEBI:6709\",\n", + " \"name\": \"meclizine\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 9.059419880941276e-14,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.598480857062902,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10100577,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"UNII:1BJ477IO2L_HP:0000360_0.8401321539277617_00_15423681686921502_10101289\",\n", + " \"subject\": {\n", + " \"UNII\": \"1BJ477IO2L\",\n", + " \"id\": \"UNII:1BJ477IO2L\",\n", + " \"name\": \"gadobutrol\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.5423681686921502,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10101289,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:9421_HP:0000360_0.8401321539277617_08516479806885544_14807202351994573_10096948\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"9421\",\n", + " \"id\": \"CHEBI:9421\",\n", + " \"name\": \"tazobactam\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.8516479806885544,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.4807202351994573,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10096948,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:478164_HP:0000360_0.8401321539277617_07859646244357879_1454940814532088_10106536\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"478164\",\n", + " \"id\": \"CHEBI:478164\",\n", + " \"name\": \"cefepime\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.7859646244357879,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.454940814532088,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10106536,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:4462_HP:0000360_0.8401321539277617_08450305750308531_13343018079195257_10093475\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"4462\",\n", + " \"id\": \"CHEBI:4462\",\n", + " \"name\": \"sodium phosphate\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.8450305750308531,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.3343018079195257,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10093475,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"SNOMEDCT:127295002_HP:0000360_0.8401321539277617_05945757673388871_13294623195442723_10100698\",\n", + " \"subject\": {\n", + " \"SNOMEDCT\": \"127295002\",\n", + " \"id\": \"SNOMEDCT:127295002\",\n", + " \"name\": \"Traumatic brain injury (disorder)\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.5945757673388871,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.3294623195442723,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10100698,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0010030_HP:0000360_0.8401321539277617_08231940924433323_13284310019939671_10099754\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0010030\",\n", + " \"id\": \"MONDO:0010030\",\n", + " \"name\": \"Sjogren syndrome\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.8231940924433323,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.3284310019939671,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099754,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"UNII:FQ3DRG0N5K_HP:0000360_0.8401321539277617_09997741237977732_12685220499445722_10100132\",\n", + " \"subject\": {\n", + " \"UNII\": \"FQ3DRG0N5K\",\n", + " \"id\": \"UNII:FQ3DRG0N5K\",\n", + " \"name\": \"pancrelipase\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9997741237977732,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.2685220499445722,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10100132,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"HP:0032372_HP:0000360_0.8401321539277617_09739456841856716_12651078229598214_10095958\",\n", + " \"subject\": {\n", + " \"HP\": \"0032372\",\n", + " \"id\": \"HP:0032372\",\n", + " \"name\": \"Increased peripheral blast count\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9739456841856716,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.2651078229598214,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10095958,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:6754_HP:0000360_0.8401321539277617_05082567984079447_11344123128490888_10097573\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"6754\",\n", + " \"id\": \"CHEBI:6754\",\n", + " \"name\": \"meperidine\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.5082567984079447,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.1344123128490888,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10097573,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0005391_HP:0000360_0.8401321539277617_019858432436236395_11125118199146522_10096019\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0005391\",\n", + " \"id\": \"MONDO:0005391\",\n", + " \"name\": \"restless legs syndrome\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.19858432436236395,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.1125118199146522,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10096019,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0018935_HP:0000360_0.8401321539277617_099536202601544_11117474952487418_10099681\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0018935\",\n", + " \"id\": \"MONDO:0018935\",\n", + " \"name\": \"hairy cell leukemia\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.99536202601544,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.1117474952487418,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099681,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0011786_HP:0000360_0.8401321539277617_00_11102871262689296_10101809\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0011786\",\n", + " \"id\": \"MONDO:0011786\",\n", + " \"name\": \"allergic rhinitis\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.1102871262689296,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10101809,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0002039_HP:0000360_0.8401321539277617_08357342064986439_10945588244442226_10102298\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0002039\",\n", + " \"id\": \"MONDO:0002039\",\n", + " \"name\": \"cognitive disorder\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.8357342064986439,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.0945588244442226,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10102298,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"HP:0001251_HP:0000360_0.8401321539277617_07111840915054395_10809198194248215_10099363\",\n", + " \"subject\": {\n", + " \"HP\": \"0001251\",\n", + " \"id\": \"HP:0001251\",\n", + " \"name\": \"Ataxia\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.7111840915054395,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.0809198194248215,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10099363,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:4031_HP:0000360_0.8401321539277617_08886902851131677_10571543050145833_10098612\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"4031\",\n", + " \"id\": \"CHEBI:4031\",\n", + " \"name\": \"cyclosporine\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.8886902851131677,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.0571543050145833,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10098612,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0001119_HP:0000360_0.8401321539277617_09257571836006121_1042593529879973_10100198\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0001119\",\n", + " \"id\": \"MONDO:0001119\",\n", + " \"name\": \"premature menopause\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.9257571836006121,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.042593529879973,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10100198,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"MONDO:0013600_HP:0000360_0.8401321539277617_00_1040152790331654_10101431\",\n", + " \"subject\": {\n", + " \"MONDO\": \"0013600\",\n", + " \"id\": \"MONDO:0013600\",\n", + " \"name\": \"insomnia (disease)\",\n", + " \"type\": \"biolink:Disease\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.0,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.040152790331654,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10101431,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"_id\": \"CHEBI:15407_HP:0000360_0.8401321539277617_0002238827241549002_10328062673251501_10092760\",\n", + " \"subject\": {\n", + " \"CHEBI\": \"15407\",\n", + " \"id\": \"CHEBI:15407\",\n", + " \"name\": \"ephedrine\",\n", + " \"type\": \"biolink:ChemicalEntity\"\n", + " },\n", + " \"association\": {\n", + " \"predicate\": \"associated_with_increased_likelihood_of\",\n", + " \"edge_attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:has_supporting_study_result\",\n", + " \"value\": \"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:update_date\",\n", + " \"value\": \"2022-05-18\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": 0.002238827241549002,\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": 0.8401321539277617,\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": 1.0328062673251501,\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_size\",\n", + " \"value\": 10092760,\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \"value\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_data_source\",\n", + " \"value\": \"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\"\n", + " }\n", + " ]\n", + " },\n", + " \"object\": {\n", + " \"HP\": \"0000360\",\n", + " \"id\": \"HP:0000360\",\n", + " \"name\": \"Tinnitus\",\n", + " \"type\": \"biolink:PhenotypicFeature\"\n", + " },\n", + " \"source\": {\n", + " \"edge_sources\": [\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " },\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "Document IDs appear to be unique\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import json\n", + "import sys, os\n", + "import numpy as np\n", + "\n", + "def parse_ehr_risk(data_folder):\n", + "\n", + " edges_filename = \"ehr_risk_edges_data_2022_06_01.csv\"\n", + " nodes_filename = \"ehr_risk_nodes_data_2022_06_01.csv\"\n", + "\n", + " nodes_filepath = os.path.join(data_folder, nodes_filename)\n", + " edges_filepath = os.path.join(data_folder, edges_filename)\n", + " nodes_data = pd.read_csv(nodes_filepath, sep = ',')\n", + " edges_data = pd.read_csv(edges_filepath, sep = ',')\n", + " \n", + " # the nodes file has duplicate ids; fix in enclave in future\n", + " nodes_data = nodes_data.drop_duplicates(subset='id', keep=\"first\")\n", + " \n", + " # biolink category biolink:ChemicalSubstance has been deprecated. Use biolink:ChemicalEntity instead\n", + " nodes_data[\"category\"].mask(nodes_data[\"category\"] == \"biolink:ChemicalSubstance\", \"biolink:ChemicalEntity\" , inplace=True )\n", + "\n", + " # we originally provided the # of patients with condition --> log + patient count, and # of patients without condition --> log - patient count\n", + " # get the approximate total number of patients in the study and call it \"total_sample_size\"\n", + " edges_data[\"num_patients_with_condition\"] = 10**(edges_data['log_positive_patient_count']) # convert log pos patient count to an actual # \n", + " edges_data[\"num_patients_without_condition\"] = 10**(edges_data['log_negative_patient_count']) # convert log neg patient count to an actual #\n", + " edges_data = edges_data.drop(['log_positive_patient_count', 'log_negative_patient_count'], axis=1)\n", + " edges_data[\"total_sample_size\"] = edges_data[\"num_patients_with_condition\"] + edges_data[\"num_patients_without_condition\"]\n", + " edges_data = edges_data.drop(['num_patients_with_condition', 'num_patients_without_condition'], axis=1)\n", + " edges_data[\"total_sample_size\"] = np.random.poisson(edges_data[\"total_sample_size\"]) # add poisson noise injection \n", + "\n", + "# # create confidence interval column by concatenating 'lower_confidence_bound'and 'upper_confidence_bound', then dropping those columns\n", + "# edges_data['log_odds_ratio_95_confidence_interval'] = edges_data.apply(lambda row: [row['lower_confidence_bound'], row['upper_confidence_bound']], axis=1)\n", + "# edges_data = edges_data.drop(['lower_confidence_bound', 'upper_confidence_bound'], axis=1)\n", + " \n", + " # ----- RE-CONSTRUCT KG FROM NODES AND EDGES FILES ------ #\n", + " # merge the subject names, categories and ids from the nodes csv/table to the edges table\n", + " kg = pd.merge(edges_data, nodes_data[['id', 'name', 'category']], left_on='subject', right_on = 'id', how=\"inner\")\n", + " kg.rename(columns = {'category_x':'predicate_category',\n", + " 'category_y': 'subject_category',\n", + " 'id': 'subject_id',\n", + " 'name': 'subject_name'}, inplace = True)\n", + " # merge the object names, categories and ids from the nodes csv/table to the edges table\n", + " kg = pd.merge(kg, nodes_data[['id', 'name', 'category']], left_on='object', right_on = 'id', how=\"inner\")\n", + " kg.rename(columns = {'id':'object_id',\n", + " 'category': 'object_category',\n", + " 'name': 'object_name'}, inplace = True)\n", + " # ----- ------------------------------------------ ------ #\n", + " \n", + " # ensure there are no duplicates\n", + " kg = kg.drop_duplicates(['subject', 'object', 'auc_roc', 'p_value', 'feature_coefficient'], keep='first')\n", + " \n", + " # some of the subjects/objects contain the string literal \"NONE\" (specific culprit is COVID Negative or something) Should look into this in future \n", + " kg = kg[~kg[\"subject\"].str.contains(\"NONE\")==True] # subject and object are all CURIEs, not names\n", + " kg = kg[~kg[\"object\"].str.contains(\"NONE\")==True]\n", + " kg = kg[~kg[\"subject\"].str.contains(\"none\")==True]\n", + " kg = kg[~kg[\"object\"].str.contains(\"none\")==True]\n", + " kg = kg[~kg[\"subject\"].str.contains(\"None\")==True]\n", + " kg = kg[~kg[\"object\"].str.contains(\"None\")==True]\n", + " \n", + " id_list = [] # use this to check if your document IDs are unique. Collect them and see if they're all unique\n", + " \n", + " # iterate through each row in KG to yield json formatted triple\n", + " for index, row in kg[:40].iterrows(): # comment for testing \n", + " id_dict = {} # this is the outter dict that holds inner dicts: subject_dict, association_dict, object_dict, and source_dict\n", + " subject_dict = {} # inner dict\n", + " association_dict = {} # inner dict\n", + " object_dict = {} # inner dict\n", + " source_dict = {} # inner dict (provides provenance as per TRAPI 1.4 standards)\n", + "\n", + " # id generated by concatenating the following: subject_id CURIE, object_id CURIE, AUCROC (removing decimal point) and p-value (removing decimal point), feature coeffcient (removing decimal point), and total sample size\n", + " doc_id = \"{}_{}_{}_{}_{}_{}\".format(row[\"subject\"],\n", + " row[\"object\"],\n", + " str(row['auc_roc']),\n", + " str(row['p_value']).replace('.', ''),\n", + " str(row['feature_coefficient']).replace('.', ''),\n", + " str(row[\"total_sample_size\"]))\n", + "\n", + " id_list.append(doc_id)\n", + " id_dict[\"_id\"] = doc_id\n", + " subject_dict[\"{}\".format(row[\"subject\"].split(':')[0])] = \"{}\".format(row[\"subject\"].split(':')[1]) # create the subject dict from the rows of the df \n", + " subject_dict[\"id\"] = row[\"subject\"]\n", + " subject_dict[\"name\"] = row[\"subject_name\"]\n", + " subject_dict[\"type\"] = row[\"subject_category\"]\n", + "\n", + " association_dict[\"predicate\"] = \"{}\".format(row[\"predicate\"].split(':')[1]) # create the association dict from the rows of the df. Edge attributes need extra work. The predicate is separated out into qualified predicate by X-BTE annotation, so we don't have to worry about qualifiers here\n", + " association_dict[\"edge_attributes\"] = []\n", + "\n", + " source_dict[\"edge_sources\"] = []\n", + "\n", + " association_dict[\"edge_attributes\"].append(\n", + " {\n", + " \"attribute_type_id\":\"biolink:has_supporting_study_result\",\n", + " \"value\":\"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\":\"biolink:update_date\",\n", + " \"value\":row[\"provided_date\"]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": row[\"p_value\"],\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": row[\"auc_roc\"],\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": row['feature_coefficient'],\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + "# {\n", + "# \"attribute_type_id\": \"biolink:log_odds_ratio_95_confidence_interval\",\n", + "# \"value\": row['log_odds_ratio_95_confidence_interval'],\n", + "# \"description\": \"log_odds_ratio_95_confidence_interval\"\n", + "# },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:total_sample_size\",\n", + " \"value\": row[\"total_sample_size\"],\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " }\n", + " )\n", + " association_dict[\"edge_attributes\"].append(\n", + " {\n", + " \"attribute_type_id\":\"biolink:primary_knowledge_source\",\n", + " \"value\":\"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " }\n", + " )\n", + " association_dict[\"edge_attributes\"].append(\n", + " {\n", + " \"attribute_type_id\":\"biolink:supporting_data_source\",\n", + " \"value\":\"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\",\n", + " }\n", + " )\n", + "\n", + " object_dict[\"{}\".format(row[\"object\"].split(':')[0])] = \"{}\".format(row[\"object\"].split(':')[1]) # create the object dict from the rows of the df \n", + " object_dict[\"id\"] = row[\"object\"]\n", + " object_dict[\"name\"] = row[\"object_name\"]\n", + " object_dict[\"type\"] = row[\"object_category\"]\n", + "\n", + " source_dict[\"edge_sources\"].append(\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " }\n", + " )\n", + "\n", + " source_dict[\"edge_sources\"].append(\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " )\n", + "\n", + " id_dict[\"subject\"] = subject_dict # put the subject, association, object, and source dicts into the outer dict called id_dict\n", + " id_dict[\"association\"] = association_dict\n", + " id_dict[\"object\"] = object_dict\n", + " id_dict[\"source\"] = source_dict\n", + " \n", + " # throw error for any rows that are missing any relevant values, such as subject name, subject id/CURIE, subject category, p-value, etc...\n", + " try:\n", + " assert not {x for x in {row[\"total_sample_size\"],\n", + " row[\"subject\"],\n", + " row[\"subject_name\"],\n", + " row[\"subject_category\"],\n", + " row[\"object\"],\n", + " row[\"object_name\"],\n", + " row[\"object_category\"],\n", + " row[\"p_value\"],\n", + " row[\"auc_roc\"],\n", + " row['feature_coefficient']} if x in {None,\n", + " \"NONE\",\n", + " \"None\",\n", + " \"none\",\n", + " \"NA\"}}, \"Error: All values including subject and object IDs, categories, names, p-value, AUC-ROC, and feature coefficient must be non-null and not contain string literal None or NONE\"\n", + " print(json.dumps(id_dict, indent=2)) # uncomment for testing\n", + "# print(index) # uncomment for testing\n", + "# yield id_dict # comment for testing\n", + " except AssertionError as msg:\n", + " print(msg)\n", + "\n", + " if len(id_list) != len(set(id_list)):\n", + " print(\"You do not have unique document IDs for each edge in your KG. Either you have duplicate rows/edges, or you simply didn't make a unique identifer (Document ID) for each one.\\n\\n\\n\")\n", + " else:\n", + " print(\"Document IDs appear to be unique\\n\\n\\n\")\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3a074a8", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import json\n", + "import sys, os\n", + "import numpy as np\n", + "\n", + "def parse_ehr_risk(data_folder):\n", + "\n", + " edges_filename = \"ehr_risk_edges_data_2022_06_01.csv\"\n", + " nodes_filename = \"ehr_risk_nodes_data_2022_06_01.csv\"\n", + "\n", + " nodes_filepath = os.path.join(data_folder, nodes_filename)\n", + " edges_filepath = os.path.join(data_folder, edges_filename)\n", + " nodes_data = pd.read_csv(nodes_filepath, sep = ',')\n", + " edges_data = pd.read_csv(edges_filepath, sep = ',')\n", + " \n", + " # the nodes file has duplicate ids; fix in enclave in future\n", + " nodes_data = nodes_data.drop_duplicates(subset='id', keep=\"first\")\n", + " \n", + " # biolink category biolink:ChemicalSubstance has been deprecated. Use biolink:ChemicalEntity instead\n", + " nodes_data[\"category\"].mask(nodes_data[\"category\"] == \"biolink:ChemicalSubstance\", \"biolink:ChemicalEntity\" , inplace=True )\n", + "\n", + " # we originally provided the # of patients with condition --> log + patient count, and # of patients without condition --> log - patient count\n", + " # get the approximate total number of patients in the study and call it \"total_sample_size\"\n", + " edges_data[\"num_patients_with_condition\"] = 10**(edges_data['log_positive_patient_count']) # convert log pos patient count to an actual # \n", + " edges_data[\"num_patients_without_condition\"] = 10**(edges_data['log_negative_patient_count']) # convert log neg patient count to an actual #\n", + " edges_data = edges_data.drop(['log_positive_patient_count', 'log_negative_patient_count'], axis=1)\n", + " edges_data[\"total_sample_size\"] = edges_data[\"num_patients_with_condition\"] + edges_data[\"num_patients_without_condition\"]\n", + " edges_data = edges_data.drop(['num_patients_with_condition', 'num_patients_without_condition'], axis=1)\n", + " edges_data[\"total_sample_size\"] = np.random.poisson(edges_data[\"total_sample_size\"]) # add poisson noise injection \n", + "\n", + "# # create confidence interval column by concatenating 'lower_confidence_bound'and 'upper_confidence_bound', then dropping those columns\n", + "# edges_data['log_odds_ratio_95_confidence_interval'] = edges_data.apply(lambda row: [row['lower_confidence_bound'], row['upper_confidence_bound']], axis=1)\n", + "# edges_data = edges_data.drop(['lower_confidence_bound', 'upper_confidence_bound'], axis=1)\n", + " \n", + " # ----- RE-CONSTRUCT KG FROM NODES AND EDGES FILES ------ #\n", + " # merge the subject names, categories and ids from the nodes csv/table to the edges table\n", + " kg = pd.merge(edges_data, nodes_data[['id', 'name', 'category']], left_on='subject', right_on = 'id', how=\"inner\")\n", + " kg.rename(columns = {'category_x':'predicate_category',\n", + " 'category_y': 'subject_category',\n", + " 'id': 'subject_id',\n", + " 'name': 'subject_name'}, inplace = True)\n", + " # merge the object names, categories and ids from the nodes csv/table to the edges table\n", + " kg = pd.merge(kg, nodes_data[['id', 'name', 'category']], left_on='object', right_on = 'id', how=\"inner\")\n", + " kg.rename(columns = {'id':'object_id',\n", + " 'category': 'object_category',\n", + " 'name': 'object_name'}, inplace = True)\n", + " # ----- ------------------------------------------ ------ #\n", + " \n", + " # ensure there are no duplicates\n", + " kg = kg.drop_duplicates(['subject', 'object', 'auc_roc', 'p_value', 'feature_coefficient'], keep='first')\n", + " \n", + " # some of the subjects/objects contain the string literal \"NONE\" (specific culprit is COVID Negative or something) Should look into this in future \n", + " kg = kg[~kg[\"subject\"].str.contains(\"NONE\")==True] # subject and object are all CURIEs, not names\n", + " kg = kg[~kg[\"object\"].str.contains(\"NONE\")==True]\n", + " kg = kg[~kg[\"subject\"].str.contains(\"none\")==True]\n", + " kg = kg[~kg[\"object\"].str.contains(\"none\")==True]\n", + " kg = kg[~kg[\"subject\"].str.contains(\"None\")==True]\n", + " kg = kg[~kg[\"object\"].str.contains(\"None\")==True]\n", + " \n", + " id_list = [] # use this to check if your document IDs are unique. Collect them and see if they're all unique\n", + " \n", + " # iterate through each row in KG to yield json formatted triple\n", + " for index, row in kg[:40].iterrows(): # comment for testing \n", + " id_dict = {} # this is the outter dict that holds inner dicts: subject_dict, association_dict, object_dict, and source_dict\n", + " subject_dict = {} # inner dict\n", + " association_dict = {} # inner dict\n", + " object_dict = {} # inner dict\n", + " source_dict = {} # inner dict (provides provenance as per TRAPI 1.4 standards)\n", + "\n", + " # id generated by concatenating the following: subject_id CURIE, object_id CURIE, AUCROC (removing decimal point) and p-value (removing decimal point), feature coeffcient (removing decimal point), and total sample size\n", + " doc_id = \"{}_{}_{}_{}_{}_{}\".format(row[\"subject\"],\n", + " row[\"object\"],\n", + " str(row['auc_roc']),\n", + " str(row['p_value']).replace('.', ''),\n", + " str(row['feature_coefficient']).replace('.', ''),\n", + " str(row[\"total_sample_size\"]))\n", + "\n", + " id_list.append(doc_id)\n", + " id_dict[\"_id\"] = doc_id\n", + " subject_dict[\"{}\".format(row[\"subject\"].split(':')[0])] = \"{}\".format(row[\"subject\"].split(':')[1]) # create the subject dict from the rows of the df \n", + " subject_dict[\"id\"] = row[\"subject\"]\n", + " subject_dict[\"name\"] = row[\"subject_name\"]\n", + " subject_dict[\"type\"] = row[\"subject_category\"]\n", + "\n", + " association_dict[\"predicate\"] = \"{}\".format(row[\"predicate\"].split(':')[1]) # create the association dict from the rows of the df. Edge attributes need extra work. The predicate is separated out into qualified predicate by X-BTE annotation, so we don't have to worry about qualifiers here\n", + " association_dict[\"edge_attributes\"] = []\n", + "\n", + " source_dict[\"edge_sources\"] = []\n", + "\n", + " association_dict[\"edge_attributes\"].append(\n", + " {\n", + " \"attribute_type_id\":\"biolink:has_supporting_study_result\",\n", + " \"value\":\"We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " \"attributes\": [\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_method_type\",\n", + " \"value\": \"STATO:0000149\",\n", + " \"description\": \"Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\":\"biolink:update_date\",\n", + " \"value\":row[\"provided_date\"]\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:p_value\",\n", + " \"value\": row[\"p_value\"],\n", + " \"description\": \"The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"STATO:0000209\",\n", + " \"value\": row[\"auc_roc\"],\n", + " \"description\": \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:log_odds_ratio\",\n", + " \"value\": row['feature_coefficient'],\n", + " \"description\": \"The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.\"\n", + " },\n", + "# {\n", + "# \"attribute_type_id\": \"biolink:log_odds_ratio_95_confidence_interval\",\n", + "# \"value\": row['log_odds_ratio_95_confidence_interval'],\n", + "# \"description\": \"log_odds_ratio_95_confidence_interval\"\n", + "# },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_cohort\",\n", + " \"value\": \"age < 18 excluded\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:supporting_study_date_range\",\n", + " \"value\": \"2020-2022 (prediction)\"\n", + " },\n", + " {\n", + " \"attribute_type_id\": \"biolink:total_sample_size\",\n", + " \"value\": row[\"total_sample_size\"],\n", + " \"description\": \"The total number of patients or participants within a sample population.\"\n", + " }\n", + " ]\n", + " }\n", + " )\n", + " association_dict[\"edge_attributes\"].append(\n", + " {\n", + " \"attribute_type_id\":\"biolink:primary_knowledge_source\",\n", + " \"value\":\"infores:biothings-multiomics-ehr-risk\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a\",\n", + " \"description\": \"The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).\",\n", + " }\n", + " )\n", + " association_dict[\"edge_attributes\"].append(\n", + " {\n", + " \"attribute_type_id\":\"biolink:supporting_data_source\",\n", + " \"value\":\"infores:providence-st-joseph-ehr\",\n", + " \"value_type_id\": \"biolink:InformationResource\",\n", + " \"value_url\": \"https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP\",\n", + " \"description\": \"A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.\",\n", + " }\n", + " )\n", + "\n", + " object_dict[\"{}\".format(row[\"object\"].split(':')[0])] = \"{}\".format(row[\"object\"].split(':')[1]) # create the object dict from the rows of the df \n", + " object_dict[\"id\"] = row[\"object\"]\n", + " object_dict[\"name\"] = row[\"object_name\"]\n", + " object_dict[\"type\"] = row[\"object_category\"]\n", + "\n", + " source_dict[\"edge_sources\"].append(\n", + " {\n", + " \"resource_id\": \"infores:biothings-multiomics-ehr-risk\",\n", + " \"resource_role\": \"primary_knowledge_source\",\n", + " \"upstream_resource_ids\": \"infores:providence-st-joseph-ehr\"\n", + " }\n", + " )\n", + "\n", + " source_dict[\"edge_sources\"].append(\n", + " {\n", + " \"resource_id\": \"infores:providence-st-joseph-ehr\",\n", + " \"resource_role\": \"supporting_data_source\"\n", + " }\n", + " )\n", + "\n", + " id_dict[\"subject\"] = subject_dict # put the subject, association, object, and source dicts into the outer dict called id_dict\n", + " id_dict[\"association\"] = association_dict\n", + " id_dict[\"object\"] = object_dict\n", + " id_dict[\"source\"] = source_dict\n", + " \n", + " # throw error for any rows that are missing any relevant values, such as subject name, subject id/CURIE, subject category, p-value, etc...\n", + "\n", + " try:\n", + " assert not {x for x in {row[\"total_sample_size\"],\n", + " row[\"subject\"],\n", + " row[\"subject_name\"],\n", + " row[\"subject_category\"],\n", + " row[\"object\"],\n", + " row[\"object_name\"],\n", + " row[\"object_category\"],\n", + " row[\"p_value\"],\n", + " row[\"auc_roc\"],\n", + " row['feature_coefficient']} if x in {None,\n", + " \"NONE\",\n", + " \"None\",\n", + " \"none\",\n", + " \"NA\"}}, \"Error: All values including subject and object IDs, categories, names, p-value, AUC-ROC, and feature coefficient must be non-null and not contain string literal None or NONE\"\n", + " print(json.dumps(id_dict, indent=2)) # uncomment for testing\n", + "# print(index) # uncomment for testing\n", + "# yield id_dict # comment for testing\n", + " except AssertionError as msg:\n", + " print(msg)\n", + "\n", + " if len(id_list) != len(set(id_list)):\n", + " print(\"You do not have unique document IDs for each edge in your KG. Either you have duplicate rows/edges, or you simply didn't make a unique identifer (Document ID) for each one.\\n\\n\\n\")\n", + " else:\n", + " print(\"Document IDs appear to be unique\\n\\n\\n\")\n", + "\n", + "\n", + "\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "03180ac7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document IDs appear to be unique\n", + "\n", + "[{'_id': 'HP:0008629_HP:0000360_0.8401321539277617_00_8796399245685702_10096539', 'subject': {'HP': '0008629', 'id': 'HP:0008629', 'name': 'Pulsatile tinnitus', 'type': 'biolink:PhenotypicFeature'}, 'association': {'predicate': 'associated_with_increased_likelihood_of', 'edge_attributes': [{'attribute_type_id': 'biolink:has_supporting_study_result', 'value': 'We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).', 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type', 'value': 'STATO:0000149', 'description': 'Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)'}, {'attribute_type_id': 'biolink:update_date', 'value': '2022-05-18'}, {'attribute_type_id': 'biolink:p_value', 'value': 0.0, 'description': 'The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.'}, {'attribute_type_id': 'STATO:0000209', 'value': 0.8401321539277617, 'description': \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"}, {'attribute_type_id': 'biolink:log_odds_ratio', 'value': 8.796399245685702, 'description': 'The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.'}, {'attribute_type_id': 'biolink:supporting_study_cohort', 'value': 'age < 18 excluded'}, {'attribute_type_id': 'biolink:supporting_study_date_range', 'value': '2020-2022 (prediction)'}, {'attribute_type_id': 'biolink:supporting_study_size', 'value': 10096539, 'description': 'The total number of patients or participants within a sample population.'}]}, {'attribute_type_id': 'biolink:primary_knowledge_source', 'value': 'infores:biothings-multiomics-ehr-risk', 'value_type_id': 'biolink:InformationResource', 'value_url': 'http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a', 'description': 'The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).'}, {'attribute_type_id': 'biolink:supporting_data_source', 'value': 'infores:providence-st-joseph-ehr', 'value_type_id': 'biolink:InformationResource', 'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP', 'description': 'A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.'}]}, 'object': {'HP': '0000360', 'id': 'HP:0000360', 'name': 'Tinnitus', 'type': 'biolink:PhenotypicFeature'}, 'source': {'edge_sources': [{'resource_id': 'infores:biothings-multiomics-ehr-risk', 'resource_role': 'primary_knowledge_source', 'upstream_resource_ids': 'infores:providence-st-joseph-ehr'}, {'resource_id': 'infores:providence-st-joseph-ehr', 'resource_role': 'supporting_data_source'}]}}, {'_id': 'MONDO:0010643_HP:0000360_0.8401321539277617_09998721067797812_8585212287149526_10100701', 'subject': {'MONDO': '0010643', 'id': 'MONDO:0010643', 'name': 'acute leukemia (disease)', 'type': 'biolink:Disease'}, 'association': {'predicate': 'associated_with_increased_likelihood_of', 'edge_attributes': [{'attribute_type_id': 'biolink:has_supporting_study_result', 'value': 'We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).', 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type', 'value': 'STATO:0000149', 'description': 'Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)'}, {'attribute_type_id': 'biolink:update_date', 'value': '2022-05-18'}, {'attribute_type_id': 'biolink:p_value', 'value': 0.9998721067797812, 'description': 'The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.'}, {'attribute_type_id': 'STATO:0000209', 'value': 0.8401321539277617, 'description': \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"}, {'attribute_type_id': 'biolink:log_odds_ratio', 'value': 8.585212287149526, 'description': 'The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.'}, {'attribute_type_id': 'biolink:supporting_study_cohort', 'value': 'age < 18 excluded'}, {'attribute_type_id': 'biolink:supporting_study_date_range', 'value': '2020-2022 (prediction)'}, {'attribute_type_id': 'biolink:supporting_study_size', 'value': 10100701, 'description': 'The total number of patients or participants within a sample population.'}]}, {'attribute_type_id': 'biolink:primary_knowledge_source', 'value': 'infores:biothings-multiomics-ehr-risk', 'value_type_id': 'biolink:InformationResource', 'value_url': 'http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a', 'description': 'The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).'}, {'attribute_type_id': 'biolink:supporting_data_source', 'value': 'infores:providence-st-joseph-ehr', 'value_type_id': 'biolink:InformationResource', 'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP', 'description': 'A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.'}]}, 'object': {'HP': '0000360', 'id': 'HP:0000360', 'name': 'Tinnitus', 'type': 'biolink:PhenotypicFeature'}, 'source': {'edge_sources': [{'resource_id': 'infores:biothings-multiomics-ehr-risk', 'resource_role': 'primary_knowledge_source', 'upstream_resource_ids': 'infores:providence-st-joseph-ehr'}, {'resource_id': 'infores:providence-st-joseph-ehr', 'resource_role': 'supporting_data_source'}]}}, {'_id': 'UNII:25ADE2236L_HP:0000360_0.8401321539277617_09367666401584368_4558176672832635_10098743', 'subject': {'UNII': '25ADE2236L', 'id': 'UNII:25ADE2236L', 'name': 'thrombin', 'type': 'biolink:ChemicalSubstance'}, 'association': {'predicate': 'associated_with_increased_likelihood_of', 'edge_attributes': [{'attribute_type_id': 'biolink:has_supporting_study_result', 'value': 'We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).', 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type', 'value': 'STATO:0000149', 'description': 'Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)'}, {'attribute_type_id': 'biolink:update_date', 'value': '2022-05-18'}, {'attribute_type_id': 'biolink:p_value', 'value': 0.9367666401584368, 'description': 'The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.'}, {'attribute_type_id': 'STATO:0000209', 'value': 0.8401321539277617, 'description': \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"}, {'attribute_type_id': 'biolink:log_odds_ratio', 'value': 4.558176672832635, 'description': 'The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.'}, {'attribute_type_id': 'biolink:supporting_study_cohort', 'value': 'age < 18 excluded'}, {'attribute_type_id': 'biolink:supporting_study_date_range', 'value': '2020-2022 (prediction)'}, {'attribute_type_id': 'biolink:supporting_study_size', 'value': 10098743, 'description': 'The total number of patients or participants within a sample population.'}]}, {'attribute_type_id': 'biolink:primary_knowledge_source', 'value': 'infores:biothings-multiomics-ehr-risk', 'value_type_id': 'biolink:InformationResource', 'value_url': 'http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a', 'description': 'The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).'}, {'attribute_type_id': 'biolink:supporting_data_source', 'value': 'infores:providence-st-joseph-ehr', 'value_type_id': 'biolink:InformationResource', 'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP', 'description': 'A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.'}]}, 'object': {'HP': '0000360', 'id': 'HP:0000360', 'name': 'Tinnitus', 'type': 'biolink:PhenotypicFeature'}, 'source': {'edge_sources': [{'resource_id': 'infores:biothings-multiomics-ehr-risk', 'resource_role': 'primary_knowledge_source', 'upstream_resource_ids': 'infores:providence-st-joseph-ehr'}, {'resource_id': 'infores:providence-st-joseph-ehr', 'resource_role': 'supporting_data_source'}]}}, {'_id': 'UNII:K16AIQ8CTM_HP:0000360_0.8401321539277617_09985626800193924_43575215395209606_10097768', 'subject': {'UNII': 'K16AIQ8CTM', 'id': 'UNII:K16AIQ8CTM', 'name': 'pertuzumab', 'type': 'biolink:ChemicalSubstance'}, 'association': {'predicate': 'associated_with_increased_likelihood_of', 'edge_attributes': [{'attribute_type_id': 'biolink:has_supporting_study_result', 'value': 'We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).', 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type', 'value': 'STATO:0000149', 'description': 'Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)'}, {'attribute_type_id': 'biolink:update_date', 'value': '2022-05-18'}, {'attribute_type_id': 'biolink:p_value', 'value': 0.9985626800193924, 'description': 'The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.'}, {'attribute_type_id': 'STATO:0000209', 'value': 0.8401321539277617, 'description': \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"}, {'attribute_type_id': 'biolink:log_odds_ratio', 'value': 4.3575215395209606, 'description': 'The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.'}, {'attribute_type_id': 'biolink:supporting_study_cohort', 'value': 'age < 18 excluded'}, {'attribute_type_id': 'biolink:supporting_study_date_range', 'value': '2020-2022 (prediction)'}, {'attribute_type_id': 'biolink:supporting_study_size', 'value': 10097768, 'description': 'The total number of patients or participants within a sample population.'}]}, {'attribute_type_id': 'biolink:primary_knowledge_source', 'value': 'infores:biothings-multiomics-ehr-risk', 'value_type_id': 'biolink:InformationResource', 'value_url': 'http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a', 'description': 'The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).'}, {'attribute_type_id': 'biolink:supporting_data_source', 'value': 'infores:providence-st-joseph-ehr', 'value_type_id': 'biolink:InformationResource', 'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP', 'description': 'A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.'}]}, 'object': {'HP': '0000360', 'id': 'HP:0000360', 'name': 'Tinnitus', 'type': 'biolink:PhenotypicFeature'}, 'source': {'edge_sources': [{'resource_id': 'infores:biothings-multiomics-ehr-risk', 'resource_role': 'primary_knowledge_source', 'upstream_resource_ids': 'infores:providence-st-joseph-ehr'}, {'resource_id': 'infores:providence-st-joseph-ehr', 'resource_role': 'supporting_data_source'}]}}, {'_id': 'MONDO:0007972_HP:0000360_0.8401321539277617_009395878968875304_392606416950393_10102280', 'subject': {'MONDO': '0007972', 'id': 'MONDO:0007972', 'name': 'Meniere disease', 'type': 'biolink:Disease'}, 'association': {'predicate': 'associated_with_increased_likelihood_of', 'edge_attributes': [{'attribute_type_id': 'biolink:has_supporting_study_result', 'value': 'We train a large collection of multivariable, binary logistic regression models on EHR data for each specific condition/disease/outcome. Features include labs, medications, and phenotypes. Directed edges point from risk factors to specific outcomes (diseases, phenotype, or medication exposure).', 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type', 'value': 'STATO:0000149', 'description': 'Binomial logistic regression for analysis of dichotomous dependent variable (in this case, for having this particular condition/disease/outcome or not)'}, {'attribute_type_id': 'biolink:update_date', 'value': '2022-05-18'}, {'attribute_type_id': 'biolink:p_value', 'value': 0.09395878968875304, 'description': 'The p-value represents the probability of observing the estimated coefficient (or more extreme value) under the assumption of the null hypothesis (which assumes that there is no relationship between the independent variable and outcome variable). The p-value associated with each coefficient helps determine whether the relationship between the independent variable and the outcome is statistically significant. A low p-value suggests that the observed relationship between the independent variable and the outcome is unlikely to occur by chance alone, providing evidence against the null hypothesis.'}, {'attribute_type_id': 'STATO:0000209', 'value': 0.8401321539277617, 'description': \"The AUROC provides a way to evaluate the model's ability to discriminate between the two classes (the presenece of absence of condition/disease/outcome). Values range between 0-1; the higher the AUROC, the better the model's ability to discriminate between clasess.\"}, {'attribute_type_id': 'biolink:log_odds_ratio', 'value': 3.92606416950393, 'description': 'The logarithm of the odds ratio (log odds ratio), or the ratio of the odds of event Y occurring in an exposed group versus the odds of event Y occurring in a non-exposed group.'}, {'attribute_type_id': 'biolink:supporting_study_cohort', 'value': 'age < 18 excluded'}, {'attribute_type_id': 'biolink:supporting_study_date_range', 'value': '2020-2022 (prediction)'}, {'attribute_type_id': 'biolink:supporting_study_size', 'value': 10102280, 'description': 'The total number of patients or participants within a sample population.'}]}, {'attribute_type_id': 'biolink:primary_knowledge_source', 'value': 'infores:biothings-multiomics-ehr-risk', 'value_type_id': 'biolink:InformationResource', 'value_url': 'http://smart-api.info/registry?q=d86a24f6027ffe778f84ba10a7a1861a', 'description': 'The EHR Risk KP is created and maintained by the Multiomics Provider team from the Institute for Systems Biology in Seattle, WA. Through a partnership with Providence/Swedish Health Services and Institute for Systems Biology, we analyze over 26 million EHRs. We use these records to train a large collection of interpretable machine learning models which are integrated into a single large Knowledge Graph, with directed edges pointing from risk factors to specific outcomes (diseases, phenotype, or medication exposure).'}, {'attribute_type_id': 'biolink:supporting_data_source', 'value': 'infores:providence-st-joseph-ehr', 'value_type_id': 'biolink:InformationResource', 'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/EHR-Risk-KP', 'description': 'A partnership with Providence/Swedish Health Services and Institute for Systems Biology allows analysis of 26 million EHRs from patients in seven states in the US, including Alaska, California, Montana, Oregon, Washington, Texas, and New Mexico. Please email data-access@isbscience.org for more information.'}]}, 'object': {'HP': '0000360', 'id': 'HP:0000360', 'name': 'Tinnitus', 'type': 'biolink:PhenotypicFeature'}, 'source': {'edge_sources': [{'resource_id': 'infores:biothings-multiomics-ehr-risk', 'resource_role': 'primary_knowledge_source', 'upstream_resource_ids': 'infores:providence-st-joseph-ehr'}, {'resource_id': 'infores:providence-st-joseph-ehr', 'resource_role': 'supporting_data_source'}]}}]\n" + ] + } + ], + "source": [ + "def main():\n", + " data_folder = \"../../data\" # uncomment for testing\n", + " parse_ehr_risk(data_folder) \n", + "\n", + " \n", + "if __name__ == \"__main__\":\n", + " main()\n", + "\n", + "# def f(): return list(parse_ehr_risk(data_folder))\n", + "\n", + "# print(f())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f8e6295", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b391313d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9131832", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d579a7c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3deaf666", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38af4ce6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "975b39b8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee8cb830", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8da035e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36dbb201", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea4aaa3b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1993c430", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c1a5344", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2b7d630", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87af529d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d0a883a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "855f14b3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9dd7dd4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dffef3dd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1f1b174", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14117bf5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b4f2f35", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f620ac0c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9f8ddcb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c12970b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6075e928", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b885d179", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ad7fd3a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31661afb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33b5bdf8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f2de47d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc5ba61b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfbc4354", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ed22495", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0005db9e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04e71223", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76b3e63f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6399bef2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "508fcd23", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80e6c8da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81f51f0b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddcf59bd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2f7626b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43f24102", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b054438", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b600cab2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a26f5991", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d19cabbf", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed293615", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcff8936", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67cea542", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4683b850", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53da2865", + "metadata": {}, + "outputs": [], + "source": [ + "### from UI, we found duplicate predicate in June 6 2020 edges files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "658cef8a", + "metadata": {}, + "outputs": [], + "source": [ + "edges_data.loc[(edges_data['subject'] == 'MONDO:0011849') & (edges_data['object'] == 'MONDO:0005083')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61b5337d", + "metadata": {}, + "outputs": [], + "source": [ + "edges_data.loc[(edges_data['subject'] == 'MONDO:0005083') & (edges_data['object'] == 'MONDO:0011849')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c149a94c", + "metadata": {}, + "outputs": [], + "source": [ + "https://github.com/uhbrar/ReasonerAPI/blob/update_guide/MigrationAndImplementationGuide1-4.md" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed91434c", + "metadata": {}, + "outputs": [], + "source": [ + "variable names to use for Clinical Data Committee:\n", + "log_odds_ratio, total_sample_size, log_odds_ratio_95_confidence_interval" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cecd7f59", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1.4 output from molepro\n", + "{\n", + " \t\"attribute_type_id\": \"biolink:aggregator_knowledge_source\",\t\n", + " \t\"value\": \"infores:molepro\", \n", + " \t\"value_type_id\": \"biolink:InformationResource\", \n", + " \t\"value_url\": \"https://translator.broadinstitute.org/molepro/trapi/v1.0\",\n", + " \t\"description\": \"The Molecular Data Provider KP from NCATS Translator\",\n", + " \t\"attribute_source\": \"infores:molepro\"\n", + " },\n", + "\t{\n", + " \t\"attribute_type_id\": \"biolink:aggregator_knowledge_source\",\n", + " \t\"value\": \"infores:chembl\",\n", + " \t\"value_type_id\": \"biolink:InformationResource\", \n", + " \t\"value_url\": \"https://www.ebi.ac.uk/chembl\",\n", + " \t\"description\": \"ChEMBL is a manually curated database of bioactive molecules...\",\n", + " \t\"attribute_source\": \"infores:molepro\"\n", + "\t},\n", + "\t{\n", + " \t\"attribute_type_id\": \"biolink:primary_knowledge_source\",\n", + " \t\"value\": \"infores:clinical-trials-gov\", \n", + " \t\"value_type_id\": \"biolink:InformationResource\", \n", + " \t\"value_url\": \"https://www.clinicaltrials.gov\",\n", + " \t\"description\": \"ClinicalTrials.gov is...\",\n", + " \t\"attribute_source\": \"infores:chembl\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3a5ca8d", + "metadata": {}, + "outputs": [], + "source": [ + "\"edges\": [\n", + " {\n", + " \"id\": \"Association002\", \n", + " \"category\": \"biolink:FeatureVariableAssociation\",\n", + " \"subject\": \"ncit:C29886\" # Airborne Particulate Matter (PM2.5), \t \n", + " \"predicate\": \"biolink:correlates_with\",\n", + " \"object\": \"tvfo:xxxxx\" # t.b.d. term for 'ED Visits for Asthma',\n", + " \"subject_modifier\": \"biolink:Exposure\",\n", + " \"attributes\": [\n", + "\t{\n", + " \t\"attribute_type_id\": \"biolink:primary_knowledge_source\", \n", + " \t\"value\": \"infores:icees-asthma\", \n", + " \t\"value_type_id\": \"biolink:InformationResource\",\t \n", + " \t\"value_url\": \"https://icees.renci.org:16339\",\n", + " \t\"description\": \"The ICEES Provider ...\",\n", + " \t\"attribute_source\": \"infores:icees-asthma\"\n", + "\t},\n", + "\t{\n", + " \t\"attribute_type_id\": \"biolink:supporting_data_source\", \n", + " \t\"value\": \"infores:us-epa-airborne-pollutant-exposures-data\",\n", + " \t\"value_type_id\": \"biolink:InformationResource\",\t \n", + " \t\"description\": \"US Environmental Protection Agency Airborne Pollutant Exposure Data\",\n", + " \t\"attribute_source\": \"infores:icees-asthma\"\n", + "\t},\n", + "\t{\n", + " \t\"attribute_type_id\": \"biolink:supporting_data_source\", \n", + " \t\"value\": \"infores:unc-cdw-health\",\n", + " \t\"value_type_id\": \"biolink:InformationResource\",\t \n", + " \t\"description\": \"UNC Carolina Data Warehouse for Health Patient EHR Data\",\n", + " \t\"attribute_source\": \"infores:icees-asthma\"\n", + "\t}\n", + " ]\n", + " }\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df78b8ff", + "metadata": {}, + "outputs": [], + "source": [ + "edges_sig[\"num_patients_with_condition\"] = 10**(edges_sig['log_positive_patient_count'])\n", + "edges_sig[\"num_patients_without_condition\"] = 10**(edges_sig['log_negative_patient_count'])\n", + "edges_sig" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}