diff --git a/main_density_est.py b/main_density_est.py index ac75944..5421342 100644 --- a/main_density_est.py +++ b/main_density_est.py @@ -28,7 +28,7 @@ Dy(.) - discriminator network in y space ''' class RoundtripModel(object): - def __init__(self, g_net, h_net, dx_net, dy_net, x_sampler, y_sampler, pool, batch_size, alpha, beta, sd_y, df, scale): + def __init__(self, g_net, h_net, dx_net, dy_net, x_sampler, y_sampler, pool, batch_size, alpha, beta ,df): self.model = model self.data = data self.g_net = g_net @@ -40,10 +40,8 @@ def __init__(self, g_net, h_net, dx_net, dy_net, x_sampler, y_sampler, pool, bat self.batch_size = batch_size self.alpha = alpha self.beta = beta - self.pool = pool - self.sd_y = sd_y self.df = df - self.scale = scale + self.pool = pool self.x_dim = self.dx_net.input_dim self.y_dim = self.dy_net.input_dim tf.reset_default_graph() @@ -115,12 +113,12 @@ def __init__(self, g_net, h_net, dx_net, dy_net, x_sampler, y_sampler, pool, bat self.d_merged_summary = tf.summary.merge([self.dx_loss_summary,self.dy_loss_summary]) #graph path for tensorboard visualization - self.graph_dir = 'graph/density_est_{}_{}_x_dim={}_y_dim={}_alpha={}_beta={}_sd={}_df={}_scale={}'.format(self.timestamp,self.data,self.x_dim, self.y_dim, self.alpha, self.beta, self.sd_y, self.df, self.scale) + self.graph_dir = 'graph/density_est_{}_{}_x_dim={}_y_dim={}_alpha={}_beta={}'.format(self.timestamp,self.data,self.x_dim, self.y_dim, self.alpha, self.beta) if not os.path.exists(self.graph_dir): os.makedirs(self.graph_dir) #save path for saving predicted data - self.save_dir = 'data/density_est/density_est_{}_{}_x_dim={}_y_dim={}_alpha={}_beta={}_sd={}_df={}_scale={}'.format(self.timestamp,self.data,self.x_dim, self.y_dim, self.alpha, self.beta, self.sd_y, self.df, self.scale) + self.save_dir = 'data/density_est/density_est_{}_{}_x_dim={}_y_dim={}_alpha={}_beta={}'.format(self.timestamp,self.data,self.x_dim, self.y_dim, self.alpha, self.beta) if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) @@ -132,6 +130,7 @@ def __init__(self, g_net, h_net, dx_net, dy_net, x_sampler, y_sampler, pool, bat self.sess = tf.Session(config=run_config) + def train(self, epochs, cv_epoch, patience): data_y_train = copy.copy(self.y_sampler.X_train) data_y_test = self.y_sampler.X_test @@ -176,30 +175,33 @@ def train(self, epochs, cv_epoch, patience): (epoch, counter, time.time() - start_time, g_loss_adv, h_loss_adv, l2_loss_x, l2_loss_y, \ g_loss, h_loss, g_h_loss, dx_loss, dy_loss, d_loss)) counter+=1 - if epoch == cv_epoch: - best_sd, best_scale = self.model_selection() + global best_sd, best_scale + if use_cv: + best_sd, best_scale = self.model_selection() + f_val = open('%s/log_val.txt'%self.save_dir,'a+') + f_test = open('%s/log_test.txt'%self.save_dir,'a+') + f_val.write('epoch\taverage_likelihood\tstandard_deviation\n') + f_test.write('epoch\taverage_likelihood\tstandard_deviation\n') if epoch >= cv_epoch: py_est_val = self.estimate_py_with_IS(data_y_val,epoch,sd_y=best_sd,scale=best_scale,sample_size=40000,log=True,save=False) average_likelihood_val = np.mean(py_est_val) sd_likelihood_val = np.std(py_est_val)/np.sqrt(len(py_est_val)) - f=open('%s/val_likelihood.txt'%self.save_dir,'a+') - f.write('%d\t%f\t%f\t%f\t%f\n'%(epoch,average_likelihood_val,sd_likelihood_val)) - f.close() + f_val.write('%d\t%f\t%f\n'%(epoch,average_likelihood_val,sd_likelihood_val)) if average_likelihood_val > best_likelihood_val: best_likelihood_val = average_likelihood_val wait=0 py_est_test = self.estimate_py_with_IS(data_y_test,epoch,sd_y=best_sd,scale=best_scale,sample_size=40000,log=True) average_likelihood_test = np.mean(py_est_test) sd_likelihood_test = np.std(py_est_test)/np.sqrt(len(py_est_test)) - f=open('%s/test_likelihood.txt'%self.save_dir,'a+') - f.write('%d\t%f\t%f\t%f\t%f\n'%(epoch,average_likelihood_test,sd_likelihood_test)) - f.close() + f_test.write('%d\t%f\t%f\n'%(epoch,average_likelihood_test,sd_likelihood_test)) self.save(epoch) else: wait+=1 if wait>patience or epoch+1==epochs: print('Early stopping at %d with best sd:%f, best scale:%f, test average likelihood%f, test sd likelihood%f'%(epoch,best_sd,best_scale, average_likelihood_test,sd_likelihood_test)) + f_val.close() + f_test.close() sys.exit() @@ -248,7 +250,7 @@ def predict_x(self,y,bs=256): x_pred[ind, :] = batch_x_ return x_pred - #calculate gradient + #calculate Jacobian matrix def get_jacobian(self,x,bs=16): N = x.shape[0] jcob_pred = np.zeros(shape=(N, self.y_dim, self.x_dim)) @@ -351,7 +353,7 @@ def sample_from_qx(x_point): return py_est - #estimate pdf of y (e.g., p(y)) with Laplace approximation + #estimate pdf of y (e.g., p(y)) with Laplace approximation (closed-from) def estimate_py_with_CF(self,y_points,epoch,sd_y=0.5,log=True,save=True): from scipy.stats import t from multiprocessing.dummy import Pool as ThreadPool @@ -407,7 +409,7 @@ def load(self, pre_trained = False, timestamp='',epoch=999): checkpoint_dir = 'pre_trained_models/{}/{}_{}_{}_{}}'.format(self.data, self.x_dim,self.y_dim, self.alpha, self.beta) else: if timestamp == '': - print('Best Timestamp not provided. Abort !') + print('Best Timestamp not provided.') checkpoint_dir = '' else: checkpoint_dir = 'checkpoint/{}/{}'.format(self.data, timestamp) @@ -427,11 +429,10 @@ def load(self, pre_trained = False, timestamp='',epoch=999): parser.add_argument('--patience', type=int, default=20) parser.add_argument('--alpha', type=float, default=10.0) parser.add_argument('--beta', type=float, default=10.0) - parser.add_argument('--sd_y', type=float, default=0.5,help='sigma in model assumption') - parser.add_argument('--df', type=float, default=1,help='degree of freedom of student t distribution') - parser.add_argument('--scale', type=float, default=1,help='scale of student t distribution') parser.add_argument('--timestamp', type=str, default='') + parser.add_argument('--use_cv', type=bool, default=False) parser.add_argument('--train', type=str, default='False') + parser.add_argument('--df', type=float, default=1,help='degree of freedom of student t distribution') args = parser.parse_args() data = args.data model = importlib.import_module(args.model) @@ -443,11 +444,10 @@ def load(self, pre_trained = False, timestamp='',epoch=999): patience = args.patience alpha = args.alpha beta = args.beta - sd_y = args.sd_y df = args.df - scale = args.scale timestamp = args.timestamp - + use_cv = args.use_cv + g_net = model.Generator(input_dim=x_dim,output_dim = y_dim,name='g_net',nb_layers=10,nb_units=512) h_net = model.Generator(input_dim=y_dim,output_dim = x_dim,name='h_net',nb_layers=10,nb_units=256) dx_net = model.Discriminator(input_dim=x_dim,name='dx_net',nb_layers=2,nb_units=128) @@ -457,9 +457,14 @@ def load(self, pre_trained = False, timestamp='',epoch=999): xs = util.Gaussian_sampler(N=5000,mean=np.zeros(x_dim),sd=1.0) if data == "indep_gmm": + if not use_cv: + best_sd, best_scale = 0.05, 0.5 ys = util.GMM_indep_sampler(N=20000, sd=0.1, dim=y_dim, n_components=3, bound=1) + elif data == "eight_octagon_gmm": + if not use_cv: + best_sd, best_scale = 0.1, 0.5 n_components = 8 def cal_cov(theta,sx=1,sy=0.4**2): Scale = np.array([[sx, 0], [0, sy]]) @@ -474,6 +479,8 @@ def cal_cov(theta,sx=1,sy=0.4**2): ys = util.GMM_sampler(N=20000,mean=mean,cov=cov) elif data == "involute": + if not use_cv: + best_sd, best_scale = 0.4, 0.5 ys = util.Swiss_roll_sampler(N=20000) elif data.startswith("uci"): @@ -493,11 +500,11 @@ def cal_cov(theta,sx=1,sy=0.4**2): elif data.startswith("ood"): if data == "ood_Shuttle": - ys = util.Outliner_sampler('datasets/Outliner/Shuttle/data.npz') + ys = util.Outlier_sampler('datasets/OOD/Shuttle/data.npz') elif data == "ood_Mammography": - ys = util.Outliner_sampler('datasets/Outliner/Mammography/data.npz') + ys = util.Outlier_sampler('datasets/OOD/Mammography/data.npz') elif data == "ood_ForestCover": - ys = util.Outliner_sampler('datasets/Outliner/ForestCover/data.npz') + ys = util.Outlier_sampler('datasets/OOD/ForestCover/data.npz') else: print("Wrong OOD data name!") sys.exit() @@ -507,7 +514,7 @@ def cal_cov(theta,sx=1,sy=0.4**2): sys.exit() - RTM = RoundtripModel(g_net, h_net, dx_net, dy_net, xs, ys, pool, batch_size, alpha, beta, sd_y, df, scale) + RTM = RoundtripModel(g_net, h_net, dx_net, dy_net, xs, ys, pool, batch_size, alpha, beta, df) if args.train == 'True': RTM.train(epochs=epochs,cv_epoch=cv_epoch,patience=patience) @@ -517,7 +524,45 @@ def cal_cov(theta,sx=1,sy=0.4**2): RTM.load(pre_trained=True) timestamp = 'pre-trained' else: - RTM.load(pre_trained=False, timestamp = timestamp, epoch = epochs-1) - data_y_test = RTM.y_sampler.X_test - py = RTM.estimate_py_with_IS(data_y_test,0,sd_y=0.5,scale=0.5,sample_size=40000,log=True) + RTM.load(pre_trained=False, timestamp = timestamp, epoch = epochs) + #density estimate on 2D 100 x 100 grid + if data == "indep_gmm": + x1_min, x1_max, x2_min, x2_max = -1.5, 1.5, -1.5, 1.5 + grid_x1 = np.linspace(x1_min,x1_max,100) + grid_x2 = np.linspace(x2_min,x2_max,100) + v1,v2 = np.meshgrid(grid_x1,grid_x2) + data_grid = np.vstack((v1.ravel(),v2.ravel())).T + py = RTM.estimate_py_with_IS(data_grid,0,sd_y=0.1,scale=0.5,sample_size=40000,log=False) + py = py.reshape((100,100)) + import matplotlib + matplotlib.use('agg') + import matplotlib.pyplot as plt + plt.figure() + plt.rcParams.update({'font.size': 22}) + plt.imshow(py, extent=[v1.min(), v1.max(), v2.min(), v2.max()], + cmap='Blues', alpha=0.9) + plt.colorbar() + plt.savefig('%s/true_density.pdf'%(RTM.save_dir)) + plt.close() + elif data == "eight_octagon_gmm": + x1_min, x1_max, x2_min, x2_max = -5, 5, -5, 5 + grid_x1 = np.linspace(x1_min,x1_max,100) + grid_x2 = np.linspace(x2_min,x2_max,100) + v1,v2 = np.meshgrid(grid_x1,grid_x2) + data_grid = np.vstack((v1.ravel(),v2.ravel())).T + py = RTM.estimate_py_with_IS(data_grid,0,sd_y=0.1,scale=0.5,sample_size=40000,log=False) + elif data == "involute": + x1_min, x1_max, x2_min, x2_max = -6, 5, -5, 5 + grid_x1 = np.linspace(x1_min,x1_max,100) + grid_x2 = np.linspace(x2_min,x2_max,100) + v1,v2 = np.meshgrid(grid_x1,grid_x2) + data_grid = np.vstack((v1.ravel(),v2.ravel())).T + py = RTM.estimate_py_with_IS(data_grid,0,sd_y=0.1,scale=0.5,sample_size=40000,log=False) + else: + print("Wrong 2D data name!") + sys.exit() + + + + \ No newline at end of file diff --git a/util.py b/util.py index 24deef8..c738efa 100644 --- a/util.py +++ b/util.py @@ -71,8 +71,8 @@ def load_all(self): return np.concatenate((self.X_train, self.X_test)), np.concatenate((self.y_train, self.y_test)) #outliner dataset (http://odds.cs.stonybrook.edu/) -class Outliner_sampler(object): - def __init__(self,data_path='datasets/Outliner/Shuttle/data.npz'): +class Outlier_sampler(object): + def __init__(self,data_path='datasets/OOD/Shuttle/data.npz'): data_dic = np.load(data_path) self.X_train, self.X_val,self.X_test,self.label_test = self.normalize(data_dic) self.Y=None @@ -248,7 +248,7 @@ def load_all(self): #HEPMASS dataset class hepmass_sampler(object): - def __init__(self,data_path='/home/liuqiao/software/maf/data/hepmass/'): + def __init__(self,data_path='datasets/HEPMASS/'): self.X_train, self.X_val,self.X_test = self.normalize(data_path) self.Y=None self.nb_train = self.X_train.shape[0] @@ -480,7 +480,6 @@ def __init__(self, N, sd, dim, n_components, weights=None, bound=1): self.X_train, self.X_val,self.X_test = self.split(self.X) self.nb_train = self.X_train.shape[0] self.Y=None - self.mean=0 def generate_gmm(self,weights = None): if weights is None: weights = np.ones(self.n_components, dtype=np.float64) / float(self.n_components) @@ -685,25 +684,6 @@ def load_all(self): return self.X_c, self.X_d, self.label_idx -def sample_Z(batch, z_dim , sampler = 'one_hot', num_class = 10, n_cat = 1, label_index = None): - if sampler == 'mul_cat': - if label_index is None: - label_index = np.random.randint(low = 0 , high = num_class, size = batch) - return np.hstack((0.10 * np.random.randn(batch, z_dim-num_class*n_cat), - np.tile(np.eye(num_class)[label_index], (1, n_cat)))) - elif sampler == 'one_hot': - if label_index is None: - label_index = np.random.randint(low = 0 , high = num_class, size = batch) - #return np.hstack((0.10 * np.random.randn(batch, z_dim-num_class), np.eye(num_class)[label_index])) - return np.hstack((0.10 * np.random.normal(0,1,(batch, z_dim-num_class)), np.eye(num_class)[label_index])) - elif sampler == 'uniform': - return np.random.uniform(-1., 1., size=[batch, z_dim]) - elif sampler == 'normal': - return 0.15*np.random.randn(batch, z_dim) - elif sampler == 'mix_gauss': - if label_index is None: - label_index = np.random.randint(low = 0 , high = num_class, size = batch) - return (0.1 * np.random.randn(batch, z_dim) + np.eye(num_class, z_dim)[label_index]) #get a batch of data from previous 50 batches, add stochastic class DataPool(object): @@ -727,667 +707,7 @@ def __call__(self, data): else: return data -class Bayesian_sampler(object): - def __init__(self, N, dim1=10, dim2=5+1): - self.total_size = N - self.dim1 = dim1#y - self.dim2 = dim2#theta - np.random.seed(1024) - self.data = np.load('TS-data_block1.npy') - - self.X1 = self.data[:N,-self.dim1:] - self.X2 = self.data[:N,:self.dim2] - assert self.X2.shape[1]==self.dim2 - assert self.X1.shape[1]==self.dim1 - self.X = np.hstack((self.X1,self.X2)) - - def train(self, batch_size, label = False): - indx = np.random.randint(low = 0, high = self.total_size, size = batch_size) - return self.X1[indx, :], self.X2[indx, :] - - def get_batch(self,batch_size,weights=None): - indx = np.random.randint(low = 0, high = self.total_size, size = batch_size) - return self.X1[indx, :], self.X2[indx, :] - - def load_all(self): - return self.X1,self.X2 - - -class SV_sampler(object):#stochastic volatility model - def __init__(self, theta_init, sample_size, block_size=10,seed = 1): - np.random.seed(seed) - import matplotlib - matplotlib.use('agg') - import matplotlib.pyplot as plt - self.sample_size = sample_size - self.theta_init = theta_init - self.block_size = block_size - #self.y_true, _, self.h_true = self.generate_data(sample_size=1,time_step=1000,use_prior=True) - #print self.y_true[0,-5:] - - def generate_data(self,sample_size,time_step,theta=None,h_0=None,use_prior=False, seed = 1): - np.random.seed(seed) - assert len(self.theta_init)==5 - h_t = np.empty((sample_size, time_step), dtype=np.float64)#log-volatility - y_t = np.empty((sample_size, time_step), dtype=np.float64)#observation data - if use_prior: - theta = np.empty((sample_size, len(self.theta_init)), dtype=np.float64) - #theta[:,0] = self.generate_mu(sample_size) - theta[:,0] = np.random.normal(loc=0.0314,scale=1.0,size=(sample_size,)) - theta[:,1] = self.generate_phi(sample_size) - theta[:,2] = self.generate_sigma2(sample_size) - #theta[:,3] = self.generate_nu(sample_size) - #theta[:,4] = self.generate_lambda(sample_size) - #theta[0,:] = self.theta_init - theta[0,:] = [0.0314, 0.9967, 0.0107, 19.6797, -1.1528] - mu = theta[:, 0] - phi = theta[:, 1] - sigma2 = theta[:, 2] - sigma = sigma2 ** 0.5 - #nu = theta[:, 3] - #lambda_ = theta[:, 4] - - if use_prior: - #h_t[:, 0] = norm.rvs(size=sample_size) * (sigma2 / (1 - phi ** 2)) ** 0.5 + mu - h_t[:, 0] = 0 - else: - h_t[:, 0] = mu + phi * (h_0 - mu) + sigma * norm.rvs(size=sample_size) - - for t_ in range(1, time_step): - h_t[:, t_] = mu + phi * (h_t[:, t_-1] - mu) + sigma * norm.rvs(size=sample_size) - - #generate y_t - for i in range(sample_size): - #zeta, omega = self.get_zeta_omega(lambda_=lambda_[i], nu=nu[i]) - #epsilon = self.generate_skew_student_t(sample_size=time_step, zeta=zeta, omega=omega, lambda_=lambda_[i], nu=nu[i]) - epsilon = self.generate_normal(time_step) - y_t[i, :] = np.exp(h_t[i, :] / 2) * epsilon - return y_t, theta[:,:3], h_t - - def generate_normal(self,sample_size,low=0.,high=1.): - return np.random.normal(size=sample_size) - - def generate_mu(self,sample_size): - return norm.rvs(scale=1, size=sample_size) - - def generate_phi(self,sample_size): - my_mean = 0.95 - my_std = 10 - myclip_a = -1 - myclip_b = 1 - a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std - return truncnorm.rvs(a, b, loc=my_mean, scale=my_std, size=sample_size) - - def generate_sigma2(self,sample_size): - return invgamma.rvs(a=2.5, scale=0.025, size=sample_size) - - def prior_of_nu(self, nu): - first = nu / (nu + 3) - first **= 0.5 - second = polygamma(1, nu / 2) - polygamma(1, (nu + 1) / 2) - 2 * (nu + 3) / nu / (nu + 1) ** 2 - second **= 0.5 - return first * second - - def generate_nu(self, sample_size, left = 10, right = 40): - out = [] - temp = self.prior_of_nu(left) - while len(out) < sample_size: - nu = uniform.rvs() * (right - left) + left - if uniform.rvs() < self.prior_of_nu(nu) / temp: - out.append(nu) - return np.array(out, dtype=np.float64) - - def generate_lambda(self, sample_size): - # return t.rvs(df=0.5, loc=0.0, scale=pi ** 2 / 4, size=sample_size) - return norm.rvs(loc=0.0, scale=pi ** 2 / 4, size=sample_size) - - def get_zeta_omega(self,lambda_, nu): - k1 = (nu / 2) ** 0.5 * scipy.special.gamma(nu / 2 - 0.5) / scipy.special.gamma(nu / 2) - k2 = nu / (nu - 2) - delta = lambda_ / (1 + lambda_ ** 2) ** 0.5 - omega = (k2 - 2 / pi * k1 ** 2 * delta ** 2) ** (-0.5) - zeta = - (2 / pi) ** 0.5 * k1 * omega * delta - return zeta, omega - - def generate_skew_student_t(self, sample_size, zeta, omega, lambda_, nu): - delta = lambda_ / (1 + lambda_ ** 2) ** 0.5 - w = truncnorm.rvs(a=0, b=float('inf'), size=sample_size) - epsilon = norm.rvs(size=sample_size) - u = gamma.rvs(a=nu / 2, scale=2 / nu, size=sample_size) - return zeta + u ** (-0.5) * omega * (delta * w + (1 - delta ** 2) ** 0.5 * epsilon) - - def train(self, batch_size, label = False): - indx = np.random.randint(low = 0, high = self.total_size, size = batch_size) - return self.X1[indx, :], self.X2[indx, :] - - def get_batch(self,batch_size,weights=None): - indx = np.random.randint(low = 0, high = self.total_size, size = batch_size) - return self.X1[indx, :], self.X2[indx, :] - - def load_all(self): - return self.X1,self.X2 - -# model: y_t = A cos ( 2 pi omega t + phi ) + sigma w_t, w_t ~ N (0, 1) -# T = 1/omega -# parameters: (omega, phi, logsigma, logA) -# prior: -# logA ~ N (0, 1) -# phi ~ Unif(0, pi) -# omega ~ Unif(0, 0.1) -# logsigma ~ N (0, 1) -class Cosine_sampler(object): - def __init__(self, omega=1./10,sigma=0.3,iteration=10,block_size=10): - self.omega = omega - self.sigma = sigma - self.block_size = block_size - self.observation = np.zeros(iteration*block_size) - def generate_data(self,sample_size,time_step,seed=0):#time series data t=1,2,3... - np.random.seed(seed) - theta = np.empty((sample_size, 4), dtype=np.float64) - data = np.empty((sample_size, time_step), dtype=np.float64) - theta[:, 0] = np.random.normal(size=sample_size) - theta[:, 1] = np.random.uniform(low=0, high=np.pi, size=sample_size) - theta[:, 2] = 1./80 - theta[:, 3] = -10 - #theta[:, 0] = np.random.uniform(low=0, high=0.1, size=sample_size) - #theta[:, 1] = np.random.uniform(low=0, high=2 * np.pi, size=sample_size) - #theta[:, 2] = np.random.normal(size=sample_size) - #theta[:, 2] = np.random.uniform(low=-5, high=-2, size=sample_size) - #theta[:, 3] = np.random.normal(size=sample_size) - #theta[0, :] = self.theta_init - theta[0,:2] = [np.log(2),np.pi / 4] - #theta[0, :] = [1 / 80, np.pi / 4, 0, np.log(2)] - - logA, phi, omega, logsigma = theta.transpose() - sigma = np.exp(logsigma) - A = np.exp(logA) - - for t in range(time_step): - data[:, t] = A * np.cos(2 * np.pi * omega * (t + 1) + phi) + sigma * np.random.normal(size=sample_size) - return data, theta[:,:2] - def generate_data2(self,sample_size,time_step=10,seed=0):#time series data t is continous from [0,1] - np.random.seed(seed) - theta = np.empty((sample_size, 4), dtype=np.float64) - data = np.empty((sample_size, 2), dtype=np.float64) - theta[:, 0] = np.random.normal(size=sample_size) - theta[:, 1] = np.random.uniform(low=0, high=np.pi, size=sample_size) - theta[:, 2] = 0.5 - theta[:, 3] = -10 - #theta[:, 0] = np.random.uniform(low=0, high=0.1, size=sample_size) - #theta[:, 1] = np.random.uniform(low=0, high=2 * np.pi, size=sample_size) - #theta[:, 2] = np.random.normal(size=sample_size) - #theta[:, 2] = np.random.uniform(low=-5, high=-2, size=sample_size) - #theta[:, 3] = np.random.normal(size=sample_size) - #theta[0, :] = self.theta_init - #theta[0,:2] = [np.log(2),np.pi / 4] - #theta[0,:2] = [0.0, np.pi / 2] - theta[0,:2] = [1, 3.*np.pi / 4] - #theta[0, :] = [1 / 80, np.pi / 4, 0, np.log(2)] - - logA, phi, omega, logsigma = theta.transpose() - sigma = np.exp(logsigma) - A = np.exp(logA) - for i in range(sample_size): - if i np.log(np.random.uniform(size=sample_size)) \ - + self.cal_bayesian_posterior(data, t, para_temp) - para_temp[:, mask] = para_propose[:, mask] - return para_temp.T - - #plot posterior of both gruth truth(2D) and sampled theta - def get_posterior(self, data, t, res = 100): - #get the truth params - _,params,_ = self.generate_data3(1,0,self.block_size) - logA, phi = params[0,:] - logA_axis = np.linspace(logA-1,logA+1,res) - phi_axis = np.linspace(phi-1,phi+1,res) - X,Y = np.meshgrid(logA_axis,phi_axis) - params_stacks = np.vstack([X.ravel(), Y.ravel()]) #shape (2, N*N) - #log_posterior_list = map(cal_beyesian_posterior,theta_list) - bayesian_posterior_stacks = self.cal_bayesian_posterior(data, t, params_stacks) - bayesian_posterior_mat = np.reshape(bayesian_posterior_stacks,(len(phi_axis),len(logA_axis))) - params_sampled = self.sample_posterior(data,None,t) - return bayesian_posterior_mat, logA_axis, phi_axis, params_sampled - - #MCMC refinement - def refine_posterior(self, data, params, t, chain_len=50, seed=0): - np.random.seed(seed) - #starting states of Markov Chain - para_temp = params.T - for _ in tqdm(range(chain_len)): - para_propose = para_temp + np.random.normal(scale=0.1, size=para_temp.shape) - #para_propose[0, :] %= 0.1 - para_propose[1, :] += 2*np.pi - para_propose[1, :] %= (4 * np.pi) - para_propose[1, :] -= 2*np.pi - #para_propose[1, :] = para_propose[1, :]%(2 * np.pi)-np.pi - mask = self.cal_bayesian_posterior(data, t, para_propose) > np.log(np.random.uniform(size=para_temp.shape[1])) \ - + self.cal_bayesian_posterior(data, t, para_temp) - para_temp[:, mask] = para_propose[:, mask] - return para_temp.T - #MCMC resampling with N chains and get the last data point - def resampling(self, data, params, t, chain_len=100, seed=0): - np.random.seed(seed) - #t,data: time and data of a minibatch, e.g., (10,) - #params: params set as proposals - para_propose_set = copy.copy(params) - para_temp = params.T - for _ in tqdm(range(chain_len)): - label_batch_idx = np.random.choice(len(para_propose_set), size=len(para_propose_set), replace=True) - para_propose = para_propose_set[label_batch_idx].T - mask = self.cal_bayesian_likelihood(data, t, para_propose) > np.log(np.random.uniform(size=para_temp.shape[1])) \ - + self.cal_bayesian_likelihood(data, t, para_temp) - para_temp[:, mask] = para_propose[:, mask] - return para_temp.T - - #MCMC resampling with 1 chain and get the last N data points - def resampling_v2(self, data, params, t, chain_len=10000, seed=0): - np.random.seed(seed) - #t,data: time and data of a minibatch, e.g., (10,) - #params: params set as proposals - a = copy.copy(params) - para_propose_set = copy.copy(params) - para_sampled = np.zeros(params.shape) - para_temp = params[0,:] - for i in tqdm(range(chain_len+len(para_propose_set)*10)): - label_batch_idx = np.random.choice(len(para_propose_set)) - para_propose = para_propose_set[label_batch_idx,:] - #print(self.cal_bayesian_likelihood(data, t, para_propose)) - #print(self.cal_bayesian_likelihood(data, t, para_temp)) - if self.cal_bayesian_likelihood(data, t, para_propose) > np.log(np.random.uniform()) \ - + self.cal_bayesian_likelihood(data, t, para_temp): - para_temp = para_propose - if i >= chain_len and i%10==0: - para_sampled[int((i-chain_len)/10),:] = para_temp - return para_sampled - - #directly sample with multinomial distribution using likelihood as weights - def resampling_v3(self, data, params, t, chain_len=10000, seed=0): - np.random.seed(seed) - #t,data: time and data of a minibatch, e.g., (10,) - #params: params set as proposals - para_propose_set = copy.copy(params) - log_likelihood = self.cal_bayesian_likelihood(data, t, params.T) - likelihood = np.exp(log_likelihood) - likelihood /= np.sum(likelihood) - sampled_idx = np.random.choice(len(params),size=len(params),replace=True,p=likelihood) - return para_propose_set[sampled_idx] - - #adaptive mh - def adaptive_sampling(self, data, params, t, chain_len=1000, bound=0.2, seed=2): - np.random.seed(seed) - #t,data: time and data - #params: params set as proposals - para_propose_set = copy.copy(params) - sample_size,param_size = params.shape - para_sampled = np.zeros(params.shape) - params_sorted = np.zeros(params.shape) - para_temp = para_propose_set[0,:] - temp_idx = 0 - adjacent_list = [[] for _ in range(sample_size)] - #params with shape (N,m) - order_idx = [[] for _ in range(param_size)] - dic_order_idx = [{} for _ in range(param_size)] - for i in range(param_size): - params_with_idx = np.vstack([params[:,i],np.arange(sample_size)]).T - params_with_idx = np.array(sorted(params_with_idx,key=lambda a:a[0])) - params_sorted[:,i] = params_with_idx[:,0] - order_idx[i] = list(params_with_idx[:,1]) - dic_order_idx[i] = {item[0]:item[1] for item in zip(order_idx[i],np.arange(sample_size))} - #calculate proposal point density - def cal_proposal_density(param,idx): - neighbor_list=[] - for i in range(param_size): - param_ith_idx = dic_order_idx[i][idx] - left_idx, right_idx = find_inserted_idx(params_sorted[:,i],param[i],param_ith_idx) - neighbor_list.append(order_idx[i][left_idx:right_idx]) - return len(set(neighbor_list[0]).intersection(*neighbor_list[1:])) - def find_inserted_idx(array,value,idx): - left_idx,right_idx = 0,len(array) - isleftbreak=0 - for i in range(idx,len(array)): - if value+bound < array[i]: - right_idx = i - break - for i in range(idx,-1,-1): - if value-bound > array[i]: - left_idx = i - isleftbreak = 1 - break - if isleftbreak: - left_idx+=1 - return left_idx,right_idx - - def cal_proposal_density_v2(param,idx): - neighbor_list=[] - for i in range(param_size): - param_ith_idx = dic_order_idx[i][idx] - left_idx, right_idx = find_inserted_idx_v2(params_sorted[:,i],param[i],param_ith_idx) - neighbor_list.append(order_idx[i][left_idx:right_idx]) - return len(set(neighbor_list[0]).intersection(*neighbor_list[1:])) - #find idx with binary division search - def find_inserted_idx_v2(param_array,value,idx): - def binary_search(value,array): - if len(array)==0: - return 0 - if value>array[-1]: - return len(array) - Min, Max = 0, len(array)-1 - while Min<=Max: - mid = int((Min+Max)/2) - if array[mid]value: - Max = mid - 1 - else: - return mid - return Min - - right_idx = binary_search(value+bound,param_array[idx:]) - left_idx = binary_search(value-bound,param_array[:idx]) - return left_idx, idx+right_idx - - #neighboring points as the unnormalized density - def mixture_square_density(points,point,idx): - nb=0 - for i in adjacent_list[idx]: - neighbor_point = points[i] - if np.sum(abs(neighbor_point-point) <= bound) == points.shape[1]: - nb+=1 - return nb - - for i in tqdm(range(chain_len+len(para_propose_set)*5)): - mixture_idx = np.random.choice(len(para_propose_set)) - para_propose = para_propose_set[mixture_idx,:] + np.random.uniform(-bound,bound,size=(2,)) - - if self.cal_bayesian_posterior(data, t, para_propose) + np.log(cal_proposal_density_v2(para_temp,temp_idx)) > \ - + self.cal_bayesian_posterior(data, t, para_temp) + np.log(cal_proposal_density_v2(para_propose,mixture_idx)) \ - + np.log(np.random.uniform()): - para_temp = para_propose - temp_idx = mixture_idx - if i >= chain_len and i%5==0: - para_sampled[int((i-chain_len)/5),:] = para_temp - return para_sampled - if __name__=='__main__': - from scipy import stats - from sklearn.neighbors import KernelDensity - from sklearn.mixture import GaussianMixture - from scipy.stats import gaussian_kde - import matplotlib - matplotlib.use('agg') - import matplotlib.pyplot as plt - #import seaborn as sns - import random - np.random.seed(2) - - # x = list(range(3,11)) - # y_kde = [0.989, 0.967,0.911,0.743,0.549,0.378,0.226,0.14] - # y_made = [0.014,0.03,0.014,0.018,0.029,0.039,0.033,0.032] - # y_nvp = [0.741,0.785,0.812,0.803,0.764,0.709,0.618,0.669] - # y_maf = [0.879,0.847,0.796,0.744,0.69,0.663,0.654,0.595] - # y_cf = [0.841,0.776,0.745,0.703,0.612,0.566,0.481,0.505] - # y_is = [0.942,0.922,0.929,0.88,0.85,0.847,0.836,0.829] - # lw=2 - # plt.figure() - # plt.plot(x,y_kde,'*-',color=(135/255. ,135/255., 135/255.),label='KDE',linewidth=lw) - # plt.plot(x,y_made,'o-',color=(253/255. ,198/255., 122/255.),label='MADE',linewidth=lw) - # plt.plot(x,y_nvp,'s-',color=(87/255. ,104/255., 180/255.),label='Real NVP',linewidth=lw) - # plt.plot(x,y_maf,'v-',color=(240/255. ,168/255., 171/255.),label='MAF',linewidth=lw) - # plt.plot(x,y_cf,'D-',color=(215/255. ,220/255., 254/255.),label='Rountrip-CF',linewidth=lw) - # plt.plot(x,y_is,'d-',color=(169/255. ,209/255., 142/255.),label='Rountrip-IS',linewidth=lw) - # plt.legend(loc = "best") - # plt.savefig('com.png',dpi=300) - # sys.exit(0) ys = UCI_sampler('datasets/YearPredictionMSD/data.npy') - #ys = hepmass_sampler() - print ys.X_train.shape, ys.X_val.shape,ys.X_test.shape - X = np.concatenate([ys.X_train,ys.X_val]) - gkde1 = gaussian_kde(X.T,'silverman') - gkde2 = gaussian_kde(X.T,'scott') - py_gau_kernel1=gkde1.logpdf(ys.X_test.T) - py_gau_kernel2=gkde2.logpdf(ys.X_test.T) - print np.mean(py_gau_kernel1),2.*np.std(py_gau_kernel1)/np.sqrt(len(py_gau_kernel1)) - print np.mean(py_gau_kernel2),2.*np.std(py_gau_kernel2)/np.sqrt(len(py_gau_kernel2)) - - - sys.exit() - ys = Swiss_roll_sampler(N=20000) - print ys.X_train.shape, ys.X_val.shape, ys.X_test.shape - np.savez('data_swill_roll.npz',ys.X_train,ys.X_val,ys.X_test) - sys.exit() - - # for dim in range(30,100,20): - # ys = Multi_dis_sampler(N=50000,dim=dim) - # print ys.X_train.shape, ys.X_val.shape, ys.X_test.shape - # np.savez('data_multi_v2_dim%d.npz'%dim,ys.X_train,ys.X_val,ys.X_test) - # sys.exit() - # a = ys.get_all_density(ys.X_test) - # print a.shape - # sys.exit() - for dim in [5,10,30,50,100]: - ys = GMM_indep_sampler(N=50000, sd=0.1, dim=dim, n_components=3, bound=1) - #ys = GMM_sampler(N=10000,n_components=dim,dim=dim,sd=0.05) - print ys.X_train.shape, ys.X_val.shape, ys.X_test.shape - np.savez('data_indep_dim%d.npz'%dim,ys.X_train,ys.X_val,ys.X_test) - - sys.exit() - # s=miniboone_sampler() - # print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - # s=gas_sampler() - # print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - # s=power_sampler() - # print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - # s=hepmass_sampler() - # print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - - s=UCI_sampler('datasets/AReM/data.npy') - print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - print s.X_train[0,:5],s.X_val[0,:5] - a = copy.copy(s.X_train) - #a=s.X_train - np.random.shuffle(a) - print s.X_train[0,:5],s.X_val[0,:5] - sys.exit() - s=UCI_sampler('datasets/Protein/data.npy') - print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - s=UCI_sampler('datasets/Superconductivty/data.npy') - print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - s=UCI_sampler('datasets/YearPredictionMSD/data.npy') - print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - s=UCI_sampler('datasets/BankMarketing/data.npy') - print s.X_train.shape,s.X_val.shape,s.X_test.shape,s.nb_train - sys.exit() - data_list,t_list=[],[] - s = Cosine_sampler(block_size=10) - for i in range(10): - if i==0: - data, theta, t = s.generate_data3(1000,i) - else: - data, theta, t = s.generate_data3(100,i,prior=theta) - data_list.append(data[0,:]) - t_list.append(t) - #log_prob,axis_x,axis_y,sampled_theta = s.get_posterior(np.concatenate(data_list,axis=0),np.concatenate(t_list,axis=0)) - #theta_refine = s.refine_posterior(np.concatenate(data_list,axis=0),theta,np.concatenate(t_list,axis=0)) - theta = np.random.uniform(0,0.2,size=theta.shape) - theta_resample = s.adaptive_sampling(data[0,:],theta,t,chain_len=10000) - - print theta_resample.shape - sys.exit() - plt.figure(figsize=(5,15)) - fig, ax = plt.subplots(1, 2) - ax[0].hist(sampled_theta[:,0], bins=30,alpha=0.75) - ax[1].hist(sampled_theta[:,1], bins=30,alpha=0.75) - plt.savefig('data/bayes_infer/posterior2/iter_%d_sampled_posterior.png'%i) - plt.close('all') - prob = np.exp(log_prob) - z_min, z_max = -np.abs(prob).max(), np.abs(prob).max() - plt.imshow(prob, cmap='RdBu', vmin=z_min, vmax=z_max, - extent=[axis_x.min(), axis_x.max(), axis_y.min(), axis_y.max()], - interpolation='nearest', origin='lower') - plt.title('Beyesian posterior') - plt.colorbar() - plt.savefig('data/bayes_infer/posterior2/iter_%d_posterior_2d.png'%i) - plt.close('all') - - sys.exit() - data, theta, t = s.generate_data3(50000,0) - print data[0],t - data, theta, t = s.generate_data3(50000,1,prior=np.zeros(theta.shape)) - print data[0],t - data, theta, t = s.generate_data3(50000,2,prior=np.zeros(theta.shape)) - print data[0],t - sys.exit(0) - log_prob,axis_x,axis_y,sampled_theta = s.get_posterior(data[0,:],t,0) - fig, ax = plt.subplots(1, 2) - ax[0].hist(sampled_theta[:,0], bins=100,alpha=0.75) - ax[1].hist(sampled_theta[:,1], bins=100,alpha=0.75) - plt.savefig('sampled_posterior.png') - plt.close() - print log_prob.shape - print np.max(log_prob) - print(np.where(log_prob==np.max(log_prob))) - # sns.set(style='whitegrid', color_codes=True) - # sns.heatmap(np.exp(prob)) - prob = np.exp(log_prob) - z_min, z_max = -np.abs(prob).max(), np.abs(prob).max() - plt.imshow(prob, cmap='RdBu', vmin=z_min, vmax=z_max, - extent=[axis_x.min(), axis_x.max(), axis_y.min(), axis_y.max()], - interpolation='nearest', origin='lower') - plt.title('posterior') - plt.colorbar() - plt.savefig('posterior_map.png') - sys.exit() - print data.shape - print theta[0] - plt.plot(data[0,:50]) - plt.xlabel('t') - plt.ylabel('y_t') - plt.savefig('a.png') - sys.exit() - - X, Y = np.mgrid[-2:2:100j, -2:2:100j] - positions = np.vstack([X.ravel(), Y.ravel()]) - print positions.shape - - kernel = stats.gaussian_kde(values) - Z = kernel(positions) - #Z = np.reshape(kernel(positions).T, X.shape) - print Z.shape - Z2 = kernel.pdf(positions) - print Z[0:4],Z2[0:4] - X = np.random.normal(size=(100,3)) - kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X) - log_density = kde.score_samples(X) - log_density1 = kde.score(X) - print len(log_density) - print log_density[:3],sum(log_density) - print log_density1 - a=np.log(2) - print np.e**a - sys.exit() - a=np.ones((3,5)) - c=np.ones((2,5)) - b.append(a) - b.append(c) - print np.concatenate(b,axis=0) - sys.exit() - ys = Gaussian_sampler(N=10000,mean=np.zeros(5),sd=1.0) - print ys.get_batch(3) - print ys.get_batch(3) - print ys.get_batch(3) - a=np.array([0.0314, 0.9967, 0.0107, 19.6797, -1.1528]) - a=np.ones((4,3)) - import random - print random.sample(a,2) - sys.exit() - ys = SV_sampler(np.array([0.0314, 0.9967, 0.0107, 19.6797, -1.1528]),1) - - + print ys.X_train.shape, ys.X_val.shape,ys.X_test.shape \ No newline at end of file