-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbionoi.py
executable file
·287 lines (222 loc) · 9.86 KB
/
bionoi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
from scipy.spatial import Voronoi, voronoi_plot_2d
import numpy as np
import pandas as pd
import matplotlib
from biopandas.mol2 import PandasMol2
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
from sklearn.cluster import KMeans
from math import sqrt, asin, atan2, log, pi, tan
from alignment import align
def k_different_colors(k: int):
colors = dict(**mcolors.CSS4_COLORS)
rgb = lambda color: mcolors.to_rgba(color)[:3]
hsv = lambda color: mcolors.rgb_to_hsv(color)
col_dict = [(k, rgb(k)) for c, k in colors.items()]
X = np.array([j for i, j in col_dict])
# Perform kmeans on rqb vectors
kmeans = KMeans(n_clusters=k)
kmeans = kmeans.fit(X)
# Getting the cluster labels
labels = kmeans.predict(X)
# Centroid values
C = kmeans.cluster_centers_
# Find one color near each of the k cluster centers
closest_colors = np.array([np.sum((X - C[i]) ** 2, axis=1) for i in range(C.shape[0])])
keys = sorted(closest_colors.argmin(axis=1))
return [col_dict[i][0] for i in keys]
def voronoi_finite_polygons_2d(vor, radius=None):
"""
Reconstruct infinite voronoi regions in a 2D diagram to finite
regions.
Parameters
----------
vor : Voronoi
Input diagram
radius : float, optional
Distance to 'points at infinity'.
Returns
-------
regions : list of tuples
Indices of vertices in each revised Voronoi regions.
vertices : list of tuples
Coordinates for revised Voronoi vertices. Same as coordinates
of input vertices, with 'points at infinity' appended to the
end.
Source
-------
Copied from https://gist.github.com/pv/8036995
"""
if vor.points.shape[1] != 2:
raise ValueError("Requires 2D input")
new_regions = []
new_vertices = vor.vertices.tolist()
center = vor.points.mean(axis=0)
if radius is None:
radius = vor.points.ptp().max() * 2
# Construct a map containing all ridges for a given point
all_ridges = {}
for (p1, p2), (v1, v2) in zip(vor.ridge_points, vor.ridge_vertices):
all_ridges.setdefault(p1, []).append((p2, v1, v2))
all_ridges.setdefault(p2, []).append((p1, v1, v2))
# Reconstruct infinite regions
for p1, region in enumerate(vor.point_region):
vertices = vor.regions[region]
if all(v >= 0 for v in vertices):
# finite region
new_regions.append(vertices)
continue
# reconstruct a non-finite region
ridges = all_ridges[p1]
new_region = [v for v in vertices if v >= 0]
for p2, v1, v2 in ridges:
if v2 < 0:
v1, v2 = v2, v1
if v1 >= 0:
# finite ridge: already in the region
continue
# Compute the missing endpoint of an infinite ridge
t = vor.points[p2] - vor.points[p1] # tangent
t /= np.linalg.norm(t)
n = np.array([-t[1], t[0]]) # normal
midpoint = vor.points[[p1, p2]].mean(axis=0)
direction = np.sign(np.dot(midpoint - center, n)) * n
far_point = vor.vertices[v2] + direction * radius
new_region.append(len(new_vertices))
new_vertices.append(far_point.tolist())
# sort region counterclockwise
vs = np.asarray([new_vertices[v] for v in new_region])
c = vs.mean(axis=0)
angles = np.arctan2(vs[:, 1] - c[1], vs[:, 0] - c[0])
new_region = np.array(new_region)[np.argsort(angles)]
# finish
new_regions.append(new_region.tolist())
return new_regions, np.asarray(new_vertices)
def fig_to_numpy(fig, alpha=1) -> np.ndarray:
'''
Converts matplotlib figure to a numpy array.
Source
------
Adapted from https://stackoverflow.com/questions/7821518/matplotlib-save-plot-to-numpy-array
'''
# Setup figure
fig.patch.set_alpha(alpha)
fig.canvas.draw()
# Now we can save it to a numpy array.
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
return data
def miller(x, y, z):
radius = sqrt(x ** 2 + y ** 2 + z ** 2)
latitude = asin(z / radius)
longitude = atan2(y, x)
lat = 5 / 4 * log(tan(pi / 4 + 2 / 5 * latitude))
return lat, longitude
def alignment(pocket, proj_direction):
"""Principal Axes Alignment
Returns transformation coordinates(matrix: X*3)"""
pocket_coords = np.array([pocket.x, pocket.y, pocket.z]).T
pocket_center = np.mean(pocket_coords, axis=0) # calculate mean of each column
pocket_coords = pocket_coords - pocket_center # Centralization
inertia = np.cov(pocket_coords.T) # get covariance matrix (of centralized data)
e_values, e_vectors = np.linalg.eig(inertia) # linear algebra eigenvalue eigenvector
sorted_index = np.argsort(e_values)[::-1] # sort eigenvalues (increase)and reverse (decrease)
sorted_vectors = e_vectors[:, sorted_index]
transformation_matrix = align(sorted_vectors, proj_direction)
transformed_coords = (np.matmul(transformation_matrix, pocket_coords.T)).T
return transformed_coords
def voronoi_atoms(bs, color_map, colorby, bs_out=None, size=None, dpi=None, alpha=1, save_fig=True,
projection=miller, proj_direction=None):
# Suppresses warning
pd.options.mode.chained_assignment = None
# Read molecules in mol2 format
mol2 = PandasMol2().read_mol2(bs)
atoms = mol2.df[['subst_id', 'subst_name', 'atom_type', 'atom_name', 'x', 'y', 'z']]
atoms.columns = ['res_id', 'residue_type', 'atom_type', 'atom_name', 'x', 'y', 'z']
atoms['residue_type'] = atoms['residue_type'].apply(lambda x: x[0:3])
# Align to principal Axis
trans_coords = alignment(atoms, proj_direction) # get the transformation coordinate
atoms['x'] = trans_coords[:, 0]
atoms['y'] = trans_coords[:, 1]
atoms['z'] = trans_coords[:, 2]
# convert 3D to 2D
atoms["P(x)"] = atoms[['x', 'y', 'z']].apply(lambda coord: projection(coord.x, coord.y, coord.z)[0], axis=1)
atoms["P(y)"] = atoms[['x', 'y', 'z']].apply(lambda coord: projection(coord.x, coord.y, coord.z)[1], axis=1)
# setting output image size, labels off, set 120 dpi w x h
size = 128 if size is None else size
dpi = 120 if dpi is None else dpi
figure = plt.figure(figsize=(int(size) / int(dpi), int(size) / int(dpi)), dpi=int(dpi))
# figsize is in inches, dpi is the resolution of the figure
ax = plt.subplot(111) # default is (111)
ax.axis('off')
ax.tick_params(axis='both', bottom=False, left=False, right=False,
labelleft=False, labeltop=False,
labelright=False, labelbottom=False)
# Compute Voronoi tesselation
vor = Voronoi(atoms[['P(x)', 'P(y)']])
regions, vertices = voronoi_finite_polygons_2d(vor)
polygons = []
for reg in regions:
polygon = vertices[reg]
polygons.append(polygon)
atoms.loc[:, 'polygons'] = polygons
# Check alpha
alpha = float(alpha)
# Color by colorby
if colorby in ["atom_type", "residue_type"]:
colors = [color_map[_type]["color"] for _type in atoms[colorby]]
elif colorby == "residue_num":
color_map = k_different_colors(len(set(atoms["res_id"])))
color_map = {res_num: color for res_num, color in zip(set(atoms["res_id"]), color_map)}
colors = atoms["res_id"].apply(lambda x: color_map[x])
else:
raise ValueError
atoms["color"] = colors
for i, row in atoms.iterrows():
colored_cell = matplotlib.patches.Polygon(row["polygons"],
facecolor=row['color'],
edgecolor=row['color'],
alpha=alpha,
linewidth=0.2)
ax.add_patch(colored_cell)
# Set limits
ax.set_xlim(vor.min_bound[0], vor.max_bound[0])
ax.set_ylim(vor.min_bound[1], vor.max_bound[1])
# Output image saving in any format; default jpg
bs_out = 'out.jpg' if bs_out is None else bs_out
# Get image as numpy array
figure.tight_layout(pad=0)
img = fig_to_numpy(figure, alpha=alpha)
if save_fig:
plt.subplots_adjust(bottom=0, top=1, left=0, right=1)
plt.savefig(bs_out, frameon=False, pad_inches=False)
plt.close(fig=figure)
return atoms, vor, img
def Bionoi(mol, bs_out, size, dpi, alpha, colorby, proj_direction):
if colorby in ["atom_type", "residue_type"]:
color_map = "./cmaps/atom_cmap.csv" if colorby == "atom_type" else "./cmaps/res_hydro_cmap.csv"
# Check for color mapping file, make dict
try:
with open(color_map, "rt") as color_mapF:
# Parse color map file
color_map = np.array(
[line.replace("\n", "").split(";") for line in color_mapF.readlines() if not line.startswith("#")])
# To dict
color_map = {code: {"color": color, "definition": definition} for code, definition, color in color_map}
except FileNotFoundError:
raise FileNotFoundError(
"Color mapping file not found. Be sure to specify YOURPATH/color_maps/ before the color_map basename.")
except ValueError:
raise ValueError(
"Error while parsing color_map file. Check the file's delimiters and \
compare to examples in color_maps folder")
else:
color_map = None
# Run
atoms, vor, img = voronoi_atoms(mol, color_map, colorby,
bs_out=bs_out,
size=size, dpi=dpi,
alpha=alpha,
save_fig=False,
proj_direction=proj_direction)
return atoms, vor, img