-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtool.py
117 lines (98 loc) · 4.75 KB
/
tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import torch
import numpy as np
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from random import random
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
def sample(iteration, x_dim, mu_gmm, lambda_gmm, sample_num, sample_k, model_dir="./vae_gmm"):
sigma = 0.1
sigma_kdd = sigma * np.identity(x_dim, dtype=float)
"""
Sample the random variable that will be the input to the VAE decoder
from the posterior distribution estimated by the GMM
"""
manual_sample = np.random.multivariate_normal(mean=mu_gmm[sample_k], cov=sigma_kdd, size=sample_num)
random_sample = np.random.multivariate_normal(mean=mu_gmm[sample_k], cov=np.linalg.inv(lambda_gmm[sample_k]), size=sample_num)
return manual_sample, random_sample
def visualize_gmm(iteration, decode_k, sample_num, model_dir="./vae_gmm"):
mu_gmm_kd, lambda_gmm_kdd, pi_gmm_k = get_param(iteration=iteration, model_dir=model_dir)
manual_sample, random_sample = sample(iteration=iteration, x_dim=12,
mu_gmm=mu_gmm_kd, lambda_gmm=lambda_gmm_kdd,
sample_k=decode_k, sample_num=sample_num, model_dir=model_dir
)
mu_gmm2d_kd = np.zeros((10,2)) # mu 2 dimention
lambda_gmm2d_kdd = np.zeros((10,2,2)) # lambda 2 dimention
for k in range(10):
mu_gmm2d_kd[k] = mu_gmm_kd[k][:2]
for dim1 in range(2):
for dim2 in range(2):
lambda_gmm2d_kdd[k][dim1][dim2] = lambda_gmm_kdd[k][dim1][dim2]
x_1_line = np.linspace(
np.min(mu_gmm_kd[:, 0] - 0.5 * np.sqrt(lambda_gmm_kdd[:, 0, 0])),
np.max(mu_gmm_kd[:, 0] + 0.5 * np.sqrt(lambda_gmm_kdd[:, 0, 0])),
num=900
)
x_2_line = np.linspace(
np.min(mu_gmm_kd[:, 1] - 0.5 * np.sqrt(lambda_gmm_kdd[:, 1, 1])),
np.max(mu_gmm_kd[:, 1] + 0.5 * np.sqrt(lambda_gmm_kdd[:, 1, 1])),
num=900
)
x_1_grid, x_2_grid = np.meshgrid(x_1_line, x_2_line)
x_point = np.stack([x_1_grid.flatten(), x_2_grid.flatten()], axis=1)
x_dim = x_1_grid.shape
res_density_k = 0
tmp_density_k = multivariate_normal.pdf(x=x_point, mean=mu_gmm_kd[decode_k][:2], cov=np.linalg.inv(lambda_gmm2d_kdd[decode_k]))
res_density_k += tmp_density_k * pi_gmm_k[0]
plt.figure(figsize=(12, 9))
plt.scatter(x=manual_sample[:, 0], y=manual_sample[:, 1], label='cluster:' + str(k + 1))
plt.scatter(x=mu_gmm2d_kd[:, 0], y=mu_gmm2d_kd[:, 1], color='red', s=100, marker='x')
plt.contour(x_1_grid, x_2_grid, res_density_k.reshape(x_dim), alpha=0.5, linestyles='dashed')
#plt.contour(x_1_grid, x_2_grid, true_model.reshape(x_dim), linestyles='--') # 真の分布
plt.suptitle('Gaussian Mixture Model', fontsize=20)
plt.title('Number of sample='+str(len(manual_sample))+', K='+str(decode_k))
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.colorbar()
plt.savefig(model_dir+'/graph/gause_I'+str(iteration)+'k'+str(decode_k)+'.png')
plt.show()
plt.close()
def get_param(iteration, model_dir="./vae_gmm"):
mu_gmm_kd = np.load(model_dir+"/npy/mu_"+str(iteration)+".npy")
lambda_gmm_kdd = np.load(model_dir+"/npy/lambda_"+str(iteration)+".npy")
pi_gmm_k = np.load(model_dir+"/npy/pi_"+str(iteration)+".npy")
return mu_gmm_kd, lambda_gmm_kdd, pi_gmm_k
def visualize_ls(iteration, z, labels, save_dir):
colors = ["red", "green", "blue", "orange", "purple", "yellow", "black", "cyan", '#a65628', '#f781bf']
#points = PCA(n_components=2, random_state=0).fit_transform(z)
points = TSNE(n_components=2, random_state=0).fit_transform(z)
plt.figure(figsize=(10,10))
for p, l in zip(points, labels):
plt.title("Latent space on VAE")
plt.xlabel("x1")
plt.ylabel("x2")
plt.tick_params(labelsize=17)
plt.scatter(p[0], p[1], marker="${}$".format(l),c=colors[l],s=100)
plt.savefig(save_dir+'/graph/z_'+str(iteration)+'.png')
plt.close()
def calc_acc( results, correct ):
K = np.max(results)+1 # Number of category
D = len(results) # Number of data points
max_acc = 0 # Max acc
changed = True
while changed:
changed = False
for i in range(K):
for j in range(K):
tmp_result = np.zeros( D )
for n in range(D):
if results[n]==i: tmp_result[n]=j
elif results[n]==j: tmp_result[n]=i
else: tmp_result[n] = results[n]
# Caluculate acc
acc = (tmp_result==correct).sum()/float(D)
if acc > max_acc:
max_acc = acc
results = tmp_result
changed = True
return max_acc, results