Skip to content

Commit

Permalink
Merge pull request #207 from abearab/main
Browse files Browse the repository at this point in the history
Improvements in the context of knowledge graph resources
  • Loading branch information
amva13 authored Mar 12, 2024
2 parents 0e14a64 + 6b73b78 commit 2ccd732
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ __pycache__/
*.so

# Distribution / packaging
.idea/
.Python
build/
develop-eggs/
Expand Down
1 change: 1 addition & 0 deletions tdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .oracles import Oracle
from .benchmark_deprecated import BenchmarkGroup
from .tdc_hf import tdc_hf_interface
from tdc.utils.knowledge_graph import KnowledgeGraph
41 changes: 37 additions & 4 deletions tdc/resource/primekg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@
This file contains a primekg dataloader.
"""

import pandas as pd
import numpy as np
import sys
import warnings

warnings.filterwarnings("ignore")

from ..utils import general_load
from ..utils.knowledge_graph import KnowledgeGraph

warnings.filterwarnings("ignore")


class PrimeKG:
Expand Down Expand Up @@ -46,3 +45,37 @@ def get_node_list(self, node_type):
df = self.df
return np.unique(df[(df.x_type == node_type)].x_id.unique().tolist() +
df[(df.y_type == node_type)].y_id.unique().tolist())


class PrimeKGDev(KnowledgeGraph):
"""PrimeKG data loader class to load the knowledge graph with additional support functions.
"""

def __init__(self, path="./data"):
"""load the KG to the specified path"""
self = KnowledgeGraph(df=general_load("primekg", path, ","))
self.path = path

def get_data(self):
return self.df

def to_nx(self):
import networkx as nx

G = nx.Graph()
for i in self.df.relation.unique():
G.add_edges_from(self.df[self.df.relation == i][["x_id",
"y_id"]].values,
relation=i)
return G

def get_features(self, feature_type):
if feature_type not in ["drug", "disease"]:
raise ValueError("feature_type only supports drug/disease!")
return general_load("primekg_" + feature_type + "_feature", self.path,
"\t")

def get_node_list(self, node_type):
df = self.df
return np.unique(df[(df.x_type == node_type)].x_id.unique().tolist() +
df[(df.y_type == node_type)].y_id.unique().tolist())
34 changes: 34 additions & 0 deletions tdc/utils/knowledge_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""A python module to build, handle, explore, and manipulate knowledge graphs.
"""

import pandas as pd
from copy import copy

kg_columns = [
'relation', 'display_relation', 'x_id', 'x_type', 'x_name', 'x_source',
'y_id', 'y_type', 'y_name', 'y_source'
]


class KnowledgeGraph:

def __init__(self, df=None):
if df is not None:
self.df = df
else:
self.df = pd.DataFrame('', columns=kg_columns)

def copy(self):
return copy(self)

def run_query(self, query):
"""build subgraph using given query"""
self.df_raw = self.df
self.df = self.df.query(query).reset_index(drop=True)

def get_nodes_by_source(self, source):
# extract x nodes
x_df = self.df.query(
f"x_source == '{source}' | y_source == '{source}'")[[
col for col in self.df.columns if col.startswith("x_")
]]

0 comments on commit 2ccd732

Please sign in to comment.