-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobo2json.py
executable file
·148 lines (114 loc) · 4.17 KB
/
obo2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
'''
Goal: From a Django view, or called by a Django view, this is attempting
to parse OBO file and pass the data for a cytoscape.js graph
Parse object data from a .obo file
From http://stackoverflow.com/q/32989776/4014959
Written by PM 2Ring 2015.10.07
'''
from __future__ import print_function, division
import json
from collections import defaultdict
import urllib.request
import io
#fname = "/home/dolley/practice_cytoscape/eco.obo"
term_head = "[Term]"
#Keep the desired object data here
all_objects = {}
def add_object(d):
# #Ignore obsolete objects
# if "is_obsolete" in d:
# return
####################################################################
# This will grab term id and set all attributes to it
# This should prevent term attributes from being missed
term_key = d['id'][0] # set object key with eco term id
# This removes 'id' because its going to be set as object's key
# and keeping it would make 'id' appear twice
#if "id" in d: # REMOVED THESE 2 LINES TO MAKE JQUERY TERM SEARCH EASIER TO PARSE
# del d["id"]
if "is_a" not in d:
d["is_a"] = []
# This 'is_a' description info. Remove it to keep descriptive info
# if "is_a" in d:
# d["is_a"] = [s.partition(' ! ')[0] for s in d["is_a"]]
all_objects[term_key] = d # set all of object's data that is present to id
#####################################################################
#####################################################################
'''
# This is for picking out specific parts of the terms
#Gather desired data into a single list,
# and store it in the main all_objects dict
key = d["id"][0]
name = d["name"]
definition = d["def"]
comment = d["comment"]
synonym = d["synonym"]
disjoint_from = d["disjoint_from"]
xref = d["xref"]
is_a = d["is_a"] #will be 'target' for Cytoscape.js data json
is_obsolete = d["is_obsolete"] # dont want to graph an obsolete term, but could have an obsolete list
created_by = d["created_by"]
creation_date = d["creation_date"]
#Remove the next line if you want to keep the is_a description info
is_a = [s.partition(' ! ')[0] for s in is_a]
# all_objects[key] = d["name"] + name + definition + is_a
all_objects[key] = {"name": name,
"def": definition,
"comment": comment,
"synonym": synonym,
"disjoint_from": disjoint_from,
"xref": xref,
"is_a": is_a,
"is_obsolete": is_obsolete,
"created_by": created_by,
"creation_date": creation_date
}
'''
####################################################################
#A temporary dict to hold object data
current = defaultdict(list)
# Will hold the different term properties that each term COULD have
term_keys = list()
# Gets the most current ECO release
eco_obo_url = 'https://raw.githubusercontent.com/evidenceontology/evidenceontology/master/eco.obo'
# Open the URL and get the response (eco.obo object)
response = urllib.request.urlopen(eco_obo_url)
# Open and convert response from bytes into string
response_string = response.read().decode('utf-8')
# set the response_string as a StringIO object
# this includes a builtin open() so 'with open(...) as f' is NOT needed
# StringIO: http://stackoverflow.com/a/7472878/2900840
# Its included open(): http://stackoverflow.com/a/33395561/2900840
fname = io.StringIO(response_string)
for line in fname:
if line.rstrip() == term_head:
break
for line in fname:
line = line.rstrip()
if not line:
#ignore blank lines
continue
if line == term_head:
#end of term
add_object(current)
current = defaultdict(list)
else:
#accumulate object data into current
key, _, val = line.partition(": ")
current[key].append(val)
# these 3 lines gather term properties.
# This allows for me to know what possible fields exist for a term
current_key = key
if current_key not in term_keys:
term_keys.append(current_key)
if current:
add_object(current)
#print("\nall_objects =")
#print(json.dumps(all_objects, indent = 4, sort_keys=True))
# print out the possible term properties
for key in term_keys:
print(key)
with open("eco_from_obo.json", "w") as jsonfile:
json.dump(all_objects, jsonfile, sort_keys = True, indent = 4)
fname.close() #clear the memory