-
Notifications
You must be signed in to change notification settings - Fork 1
/
Reactome.R
224 lines (212 loc) · 7.94 KB
/
Reactome.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#' Build a mully graph from a given pathway
#'
#' @param biopax The BioPaX object containing the parsed data from an OWL file. This can be obtained using readBiopax(filepath)
#' @param pathwayID The ID of the pathway in the biopax object
#'
#' @return A mully graph built from the given pathway
#' @export
#'
#' @note
#' This should be preceded by readBiopax(filepath) to obtain the biopax object
#' @examples
#' \dontrun{
#' biopax=readBiopax(pi3k.owl)
#' pi3kmully=pathway2mully(biopax,"pathway1")
#' }
#' @import mully
#' @import rBiopaxParser
#' @import igraph
#' @importFrom stringr str_split str_trim
#' @importFrom graph nodeData
pathway2Mully<-function(biopax,pathwayID){
#Transform created Pathway GraphNEL to igraph object
pathwaygraph=pathway2Graph(biopax,pathwayID)
pathwayigraph=graph_from_graphnel(pathwaygraph, name = TRUE, weight = TRUE,unlist.attrs = TRUE)
#Create mully Graph
properties=getInstanceProperty(biopax,pathwayID)
pathwayName=properties[1]
pathwaymully=mully(name=pathwayName,direct = T)
#Get Nodes
##Empty Graph
if(length(nodeData(pathwaygraph))==0)
return(pathwaymully)
listNodes=V(pathwayigraph)$name
#Add Layers
classes=biopax$dt$class[which(biopax$dt$id%in%listNodes)]
layers=unique(classes)
pathwaymully=addLayer(pathwaymully,layers)
#Add Nodes
for(i in 1:length(listNodes)){
nodeAttributes=getInstanceAttributes(biopax,listNodes[i])
nodeClass=getInstanceClass(biopax,listNodes[i])
pathwaymully=mully::addNode(pathwaymully,nodeName = listNodes[i],layerName = nodeClass,attributes=nodeAttributes[-1])
}
#Add Edges
edges=as_long_data_frame(pathwayigraph)[,c("from","to")]
names(edges)=c("V1","V2")
edgeAttributes=as.data.frame(edge.attributes(pathwayigraph))
for(i in 1:dim(edges)[1]){
startNode=listNodes[edges$V1[i]]
endNode=listNodes[edges$V2[i]]
attributes=edgeAttributes[i,]
names(attributes)=names(edgeAttributes)
pathwaymully=mully::addEdge(pathwaymully,startNode,endNode,attributes=attributes)
}
return(pathwaymully)
}
getInstanceAttributes<-function(biopax,name){
XRefs=getXrefAnnotations(biopax,name)
description=getInstanceProperty(biopax,name)
l=list("name"=name,id="","description"=description,"database"="")
for(i in 1:dim(XRefs)[1]){
string=unlist(str_split(XRefs$annotation[i],":"))
if(i>1){
l$id=paste(l$id,",",sep="")
l$database=paste(l$database,",",sep="")
}
l$database=paste(l$database,str_trim(string[1],"both"),sep="")
l$id=paste(l$id,str_trim(string[2],"both"),sep="")
}
return(l)
}
#' Get External Database IDs of nodes
#'
#' @param biopax The biopax object
#' @param nodes The list of internal IDs of the nodes
#' @param database The name of the database. If missing, the list of all external ids of all databases will be returned.
#'
#' @return A dataframe with the mappings between the internal and external IDs
#' @export
#'
#' @examples
#' \dontrun{
#' biopax=readBiopax(pi3k.owl)
#' getExternalIDs(wntBiopax,c("Protein1","Protein2"),"UniProt")
#' }
getExternalIDs<-function(biopax,nodes,database=NULL){
idsRes=data.frame("id"=is.character(c()),"extid"=is.character(c()),"database"=is.character(c()),stringsAsFactors = F)[-1,]
for(j in 1:length(nodes)){
atts=getInstanceAttributes(biopax,nodes[j])
ids=unlist(str_split(atts$id,","))
dbs=unlist(str_split(atts$database,","))
if(!missing(database) && !is.null(database)){
ids=ids[which(dbs==database)]
}
l=list(rep(nodes[j],length(ids)),ids,dbs)
names(l)=c("id","extid","database")
idsRes=rbind(idsRes,l)
}
return(idsRes)
}
#' Get Internal ID of a given node or list of nodes
#'
#' @param biopax The biopax object
#' @param nodes The list of external IDs of the nodes. If missing, the list of all internal IDs will be returned.
#' @param g The graph generated from the given biopax object.
#'
#' @return A dataframe with the mappings between the external and internal IDs
#' @export
#'
#' @examples
#' \dontrun{
#' biopax=readBiopax(pi3k.owl)
#' g=pathway2Mully(biopax,"pathway1")
#' getInternalIDs(wntBiopax,c("R-HSA-5368514","Q9BQB4"))
#' }
getInternalIDs<-function(g,nodes=NULL){
idsRes=data.frame("id"=is.character(c()),"intid"=is.character(c()),"database"=is.character(c()),stringsAsFactors = F)[-1,]
allNodes=getNodeAttributes(g)
for(j in 1:length(allNodes$name)){
ids=unlist(str_split(allNodes$id[j],","))
dbs=unlist(str_split(allNodes$database[j],","))
l=list(rep(allNodes$name[j],length(ids)),ids,dbs)
names(l)=c("id","extid","database")
idsRes=rbind(idsRes,l)
}
if(!missing(nodes) && !is.null(nodes))
return(idsRes[which(idsRes$extid %in% nodes),])
return(idsRes)
}
#' Get internal pathway ID in a BioPAX file
#'
#' @param biopax The biopax object
#' @param reactomeID The Reactome ID of the pathway
#'
#' @return The internal ID of the pathway in the parsed BioPAX object
#' @note
#' This should be preceded by readBiopax(filepath) to obtain the biopax object
#' @examples
#' \dontrun{
#' biopax=readBiopax(pi3k.owl)
#' id=getPathwayID(biopax,"R-HSA-167057")
#' pi3kmully=pathway2mully(biopax,id)
#' }
#' @importFrom rBiopaxParser listPathways
#' @importFrom stringr str_split
#' @export
getPathwayID<-function(biopax,reactomeID){
listPathways=listPathways(biopax)
for(pathway in listPathways$id){
annots=getInstanceAttributes(biopax,pathway)
ids=unlist(str_split(annots$id,","))
dbs=unlist(str_split(annots$database,","))
id=ids[which(dbs=="Reactome")]
if(id==reactomeID)
return(pathway)
}
return(NULL)
}
#' Download Reactome Pathways in BioPAX level 2 and 3
#'
#' @param pathwayID The Reactome ID or list of IDs of the pathways to be downloaded. The ID should start with R-HSA-.
#' @param biopaxLevel The BioPAX Level, 2 or 3. By default set to 3.
#' @param destDirectory The Directory in which the Pathway Files should be saved. If missing, the files are saved in the working directory. The Reactome IDs are used to name the files.
#' @param overwrite A Boolean whether to overwrite existing files with the same name.
#'
#' @return The Directory in which the files are saved.
#' @export
#'
#' @examples
#' \dontrun{
#' downloadPathway(c("R-HSA-195721","R-HSA-9609507"),biopaxLevel=3,overwrite=T)
#' }
#' @importFrom utils download.file
#' @importFrom RCurl url.exists
#' @importFrom stringr str_split
downloadPathway <-function(pathwayID,biopaxLevel = "3",destDirectory,overwrite = F) {
if (missing(pathwayID))
stop("Please provide a Reactome Pathway ID.")
if (!biopaxLevel %in% c("2", "3"))
stop("The given Biopax Level is wrong.")
if (missing(destDirectory)) {
message("The files will be saved in the Working directory.")
destDirectory = getwd()
}
url = paste0("https://reactome.org/ReactomeRESTfulAPI/RESTfulWS/biopaxExporter/Level",biopaxLevel,"/",sep = "")
urlcheck="https://reactome.org/content/detail/"
for (pathway in pathwayID) {
destFile = paste0(destDirectory, "/", pathway, ".owl", sep = "")
if (file.exists(destFile)) {
if (overwrite == T)
message(paste0("The file ",pathway,".owl already exists and will be overwritten.",sep = ""))
else{
message(paste0("The file ",pathway,".owl already exists and will be skipped. If you wish to overwrite it, re-call the function with the argument overwrite set to TRUE.",sep = ""))
next
}
}
spl = unlist(str_split(pathway, "R-HSA-"))
if (length(spl) != 2){
message(paste0("The following given Reactome ID is wrong and will be skipped: ",pathway,sep=""))
next
}
urlchecktmp=paste0(urlcheck,pathway,sep="")
urltmp = paste0(url, spl[2], sep = "")
if (!url.exists(urlchecktmp)){
message(paste0("The following given Reactome ID is wrong and will be skipped: ",pathway,sep=""))
next
}
download.file(urltmp, destFile)
}
message(paste0("The pathways are download and saved in the following directory: ",destDirectory,sep = ""))
return(destDirectory)
}