forked from Goodman-lab/DP5
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TreeRenum.py
executable file
·163 lines (134 loc) · 4.37 KB
/
TreeRenum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 31 14:54:17 2015
@author: ke291
"""
import sys
def FindAllPaths(molgraph, start, end, path=[]):
path = path + [start]
if start == end:
return [path]
if start not in [x[0] for x in molgraph]:
print "No such node in graph"
return []
paths = []
for node in molgraph[start-1][1:]:
if node not in path:
newpaths = FindAllPaths(molgraph, node, end, path)
for newpath in newpaths:
paths.append(newpath)
return paths
def FindTerminatingPaths(molgraph, start, trunk, path=[]):
path = path + [start]
if start not in [x[0] for x in molgraph]:
print "No such node in graph"
return []
paths = []
for node in molgraph[start-1][1:]:
if node not in path and node not in trunk:
newpaths = FindTerminatingPaths(molgraph, node, trunk, path)
for newpath in newpaths:
paths.append(newpath)
if paths == []:
return [path]
else:
return paths
def FindTreeMap(f):
#get molecular graph
molgraph = GenMolGraph(f)
#Find potential endpoints - atoms with only one neighbour
endpoints = []
for node in molgraph:
if len(node) < 3:
endpoints.append(node[0])
#get the longest paths for all endpoint combinations
maxpaths = []
for i in range(0, len(endpoints)):
for j in range(0, len(endpoints)):
if i != j:
maxpaths.append(max(FindAllPaths(molgraph, endpoints[i],
endpoints[j]), key=len))
#get the absolute longest path possible in the molecule
molmap = max(maxpaths, key=len)
#Add longest branches to the longest trunk
for atom in molmap:
for node in molgraph[atom-1]:
if node not in molmap:
maxbranch = []
branches = FindTerminatingPaths(molgraph, node, molmap)
if branches != []:
maxbranch = max(branches, key=len)
if maxbranch != []:
molmap.extend(maxbranch)
return molmap
def TreeRenumSDF(f, ExpNMR):
molmap = FindTreeMap(f)
#Read molecule from file
obconversion = OBConversion()
obconversion.SetInFormat("sdf")
obmol = OBMol()
obconversion.ReadFile(obmol, f)
temp = []
anums = []
for atom in OBMolAtomIter(obmol):
temp.append(atom)
anums.append(atom.GetAtomicNum())
newmol = OBMol()
for atom in molmap:
newmol.AddAtom(temp[atom-1])
#Restore the bonds
newmol.ConnectTheDots()
newmol.PerceiveBondOrders()
#Write renumbered molecule to file
obconversion.SetOutFormat("sdf")
obconversion.WriteFile(newmol, f[:-4] + 'r.sdf')
#Prepare NMR translation
NMRmap = []
i = 1
for atom in molmap:
anum = anums[atom-1]
if anum == 1:
NMRmap.append(['H' + str(atom), 'H' + str(i)])
if anum == 6:
NMRmap.append(['C' + str(atom), 'C' + str(i)])
i += 1
print NMRmap
RenumNMR(ExpNMR, NMRmap)
def RenumNMR(ExpNMR, NMRmap):
f = open(ExpNMR, 'r')
NMRfile = f.read(1000000)
f.close()
print '\nOld NMR file:\n' + NMRfile
#Replace old atom labels with new atom labels
#tag replacements with '_' to avoid double replacement
for atom in NMRmap:
NMRfile = NMRfile.replace(atom[0] + ')', atom[1] + '_)')
NMRfile = NMRfile.replace(atom[0] + ' ', atom[1] + '_ ')
NMRfile = NMRfile.replace(atom[0] + ',', atom[1] + '_,')
NMRfile = NMRfile.replace(atom[0] + '\n', atom[1] + '_\n')
#Strip temporary udnerscore tags
NMRfile = NMRfile.replace('_', '')
print '\nNew NMR file:\n' + NMRfile
f = open(ExpNMR + 'r', 'w')
f.write(NMRfile)
f.close()
def GenMolGraph(f):
obconversion = OBConversion()
obconversion.SetInFormat("sdf")
obmol = OBMol()
obconversion.ReadFile(obmol, f)
molgraph = []
for atom in OBMolAtomIter(obmol):
idx = atom.GetIdx()
molgraph.append([])
molgraph[idx-1].append(idx)
for NbrAtom in OBAtomAtomIter(atom):
molgraph[idx-1].append(NbrAtom.GetIdx())
return molgraph
if __name__ == '__main__':
#print sys.argv
ExpNMR = sys.argv[2]
filename = sys.argv[1] + '.sdf'
TreeRenumSDF(filename, ExpNMR)
#main()