-
Notifications
You must be signed in to change notification settings - Fork 0
/
experiments.py
349 lines (266 loc) · 10.5 KB
/
experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from utilities import *
from PrivUnitAlgs import *
import time
from SQKR import *
import os
import time
import signal
# A function that takes as input a mechanism mech and
# privacy parameter epsilon, and returns the "best" k
# for this setting. Best here is based on our simulations for
# each algorithm with different values of epsilon and k
def choose_best_k(mech,eps):
k = int(eps)
if mech in ['ProjUnit', 'FastProjUnit']:
k = 1000
elif mech == 'SQKR':
k = int(eps)
elif mech == 'PrivHS':
k = 1
elif mech == 'RePrivHS':
k = max(int(eps/2),1)
return k
# returns (color,mark) for mechnism
def color_plot(mech):
if mech == 'PrivUnitG':
return ('k','o')
if mech == 'CompPrivUnitG':
return ('y', 'o')
elif mech == 'ProjUnit':
return ('r','>')
elif mech == 'ProjUnit-corr':
return ('c','o')
elif mech == 'FastProjUnit':
return ('b','<')
elif mech == 'FastProjUnit-corr':
return ('g','x')
elif mech == 'RePrivHS':
return ('g','o')
elif mech == 'PrivHS':
return ('c','o')
elif mech == 'SQKR':
return ('m','o')
return ('k','0')
# Takes as input a vector x and epsilon, and calculates the error
# of the mechanism for num_rep trials and returns an array with the MSEs.
# The calculation is done as follows: we run the mechanism mech
# over input x for num_rep times, recording the MSE of each trial
# and returning the mean of these MSEs.
# x: input vector
# eps: privacy parameter
# mech: a mechanism to run
# num_rep: num of repetitions to run the mechanism
def find_err(x,eps,k,mech,num_rep):
y = np.zeros(len(x))
p = None,
gamma = None
sigma = None
if mech in ['PrivUnitG', 'ProjUnit', 'FastProjUnit', 'ProjUnit-corr','FastProjUnit-corr']:
# parameters for PrivG algorthms
p = priv_unit_G_get_p(eps)
gamma, sigma = get_gamma_sigma(p, eps)
err_mech_arr = np.zeros(num_rep)
for i in range(num_rep):
y = privatize_vector(x,eps,k,mech,p,gamma,sigma)
err_mech_arr[i] = np.sum((y-x)**2)
return err_mech_arr
# Plots the variance as a function of epsilon for all methods
# For each method, k is chosen using the function choose_best_k.
def exp_compare_var_eps(num_rep = 100):
mechanisms = [ 'FastProjUnit', 'ProjUnit', 'PrivUnitG', 'PrivHS','RePrivHS','SQKR']
n = 2**15 # dimension
lst_eps = list(range(16))[1:16:2]
err_dict = {}
dict_file = 'raw/err_comp_eps_dim_%d_num_rep_%d.npy' % (n,num_rep)
if os.path.exists(dict_file):
err_dict = np.load(dict_file,allow_pickle='TRUE').item()
x = np.random.normal(size=n)
x = x / np.linalg.norm(x)
for mech in mechanisms:
print(mech)
if mech in err_dict:
continue
err_dict[mech] = np.zeros((len(lst_eps),num_rep))
for i in range(len(lst_eps)):
eps = lst_eps[i]
k = choose_best_k(mech,eps)
err_dict[mech][i] = find_err(x,eps,k,mech,num_rep)
np.save(dict_file, err_dict)
# plot the errors
plt.figure()
for mech in mechanisms:
(c,m) = color_plot(mech)
q = 0.9
err_mech = [np.mean(err_dict[mech][i]) for i in range(len(lst_eps))]
err_mech_high = [np.quantile(err_dict[mech][i],q) for i in range(len(lst_eps))]
err_mech_low = [np.quantile(err_dict[mech][i],1-q) for i in range(len(lst_eps))]
(c,m) = color_plot(mech)
plt.plot(lst_eps,err_mech,color = c, marker=m,linestyle='dashed',label = mech)
plt.fill_between(lst_eps, err_mech_low, err_mech_high, color=c, alpha=.1)
plt.legend()
plt.yscale("log")
f_name = 'plots/err-comparison_lst_eps_d_%d.pdf' % n
plt.savefig(f_name)
plt.show()
# Plot the variance as a function of k for all methods
# Here we use the same HD matrices for the ProjUnit algorithms
# and only sample different S_i matrix for each user
# num_users: number of users
# num_rep: number of repetitions
def exp_compare_algs_k(eps,num_users=20,num_rep=100):
mechanisms = ['PrivUnitG', 'ProjUnit', 'FastProjUnit', 'FastProjUnit-corr']
n = 2**13 # dimension of input vectors
lst_k = [10, 50, 100, 500, 800, 1000, 1500, 2000]
f_name = 'plots/err-fixHD_num_users_%d_num_rep_%d_d_%d_eps_%d.pdf' % (num_users,num_rep,n,int(eps))
dict_file = 'raw/err-fixHD_lst_k_num_users_%d_num_rep_%d_d_%d_eps_%d.npy' % (num_users,num_rep,n,int(eps))
err_dict = {}
if os.path.exists(dict_file):
err_dict = np.load(dict_file,allow_pickle='TRUE').item()
p = priv_unit_G_get_p(eps)
gamma, sigma = get_gamma_sigma(p, eps)
v = np.random.normal(size=n)
v = v/math.sqrt(np.sum(v**2))
X = np.random.normal(size=(num_users,n))/math.sqrt(n) + v
norms = np.linalg.norm(X, axis=1)
X = X / norms[:, np.newaxis]
norms = np.linalg.norm(X, axis=1)
v_hat = np.sum(X,0)/num_users
for mech in mechanisms:
print(mech)
if mech in err_dict:
continue
err_dict[mech] = {}
for i in range(len(lst_k)):
k = lst_k[i]
err_dict[mech][k] = np.zeros(num_rep)
if mech == 'PrivUnitG' and i>0:
err_dict[mech][k] = err_dict[mech][lst_k[0]]
continue
err = np.zeros(num_rep)
W = None
for j in range(num_rep):
x_hat = np.zeros(n)
if mech == 'FastProjUnit-corr':
W = np.random.choice(a=[-1, 1], size=(n), p=[0.5, 0.5])
elif mech == 'ProjUnit-corr':
W_full = special_ortho_group.rvs(n)
for u in range(num_users):
if mech == 'ProjUnit-corr':
S = np.sort(random.choices(range(n), k=k)) # without repitition
#S = random.sample(range(n), k) # with repitition
W = math.sqrt(1.0*n/k) * W_full[S,:]
x_hat += privatize_vector(X[u],eps,k,mech,p,gamma,sigma,False,W)/num_users
err[j] = np.sum((x_hat - v_hat)**2)
err_dict[mech][k] = err
np.save(dict_file, err_dict)
# plot the errors
plt.figure()
for mech in mechanisms:
q = 0.9
err_mech = [np.mean(err_dict[mech][k]) for k in lst_k]
err_mech_high = [np.quantile(err_dict[mech][k],q) for k in lst_k]
err_mech_low = [np.quantile(err_dict[mech][k],1-q) for k in lst_k]
(c,m) = color_plot(mech)
plt.plot(lst_k[1:],err_mech[1:],color = c, marker=m,linestyle='dashed',label = mech)
plt.fill_between(lst_k[1:], err_mech_low[1:], err_mech_high[1:], color=c, alpha=.1)
#plt.xlabel('k')
plt.legend()
plt.yscale("log")
plt.xscale("log")
plt.savefig(f_name)
plt.show()
def handle_timeout(signum, frame):
raise TimeoutError
# Plot the run-time of each method as a function of the dimension
# when k is chosen as the best k for each method (using choose_best_k).
# For each dimension, we give a cut-off of 1 hour for each method
# to finish (num_rep=10) trials
def experiment_compare_time(eps,num_rep = 10):
# Run the function to create the plots
mechanisms = ['PrivUnitG', 'ProjUnit', 'FastProjUnit', 'PrivHS', 'SQKR', 'CompPrivUnitG']
file_dict = 'raw/time_dict_eps_%d.npy' % int(eps)
time_dict = {}
if os.path.exists(file_dict):
time_dict = np.load(file_dict,allow_pickle='TRUE').item()
lst_n = [10, 10**2, 10**3, 10**4, 10**5, 10**6, 10**7]
time_arr = {}
p = None
gamma = None
sigma = None
time_limit = 60*60 # set time limit to 60 minutes
for mech in mechanisms:
print(mech)
time_arr[mech] = np.zeros(len(lst_n))
if mech in ['PrivUnitG', 'FastProjUnit', 'ProjUnit', 'CompPrivUnitG']:
p = priv_unit_G_get_p(eps)
gamma, sigma = get_gamma_sigma(p, eps)
for i in range(len(lst_n)):
n = lst_n[i]
if (mech,n) in time_dict:
time_arr[mech][i] = time_dict[mech,n]
continue
x = np.random.normal(size=n)
x = x / np.linalg.norm(x)
k = choose_best_k(mech,eps)
start_time = time.process_time()
signal.signal(signal.SIGALRM, handle_timeout)
if i > 0 and time_arr[mech][i-1] == time_limit:
time_arr[mech][i] = time_limit
time_dict[mech,n] = time_limit
continue
signal.alarm(time_limit) # give limited time to each application
try:
for j in range(num_rep):
y = privatize_vector(x,eps,k,mech,p,gamma,sigma)
time_arr[mech][i] = time.process_time() - start_time
except TimeoutError:
print("------Too long-------")
time_arr[mech][i] = time_limit
time_dict[mech,n] = time_arr[mech][i]
np.save(file_dict, time_dict)
# plot the results
plt.figure()
for mech in mechanisms:
if time_arr[mech][-1] == time_limit:
idx = np.where(time_arr[mech] == time_limit)[0][0]
else:
idx = -1
if mech == 'PrivUnitG':
plt.plot(lst_n[:idx],time_arr[mech][:idx],'k-o',label = mech)
continue
plt.plot(lst_n[:idx],time_arr[mech][:idx],color_plot(mech),label = mech)
plt.legend()
plt.xscale("log")
plt.yscale("log")
f_name = 'plots/time-comparison_eps_%d.pdf' % int(eps)
plt.savefig(f_name)
plt.show()
# ---------- Experiment 1: plot error as a function of k ---------
num_rep = 30
num_users = 50
print('----experiment 1 (eps = 16)------')
eps = 16.0
exp_compare_algs_k(eps,num_users,num_rep)
print('----experiment 1 (eps = 10)------')
eps = 10.0
exp_compare_algs_k(eps,num_users,num_rep)
print('----experiment 1 (eps = 4)------')
eps = 4.0
exp_compare_algs_k(eps,num_users,num_rep)
# ---------- Experiment 2: plot error as a function of epsilon ---------
num_rep = 50
print('----experiment 2------')
exp_compare_var_eps(num_rep)
# ---------- Experiment 3: plot time for each method ---------
num_rep = 10
print('----experiment 3 (eps = 10)------')
eps = 10
experiment_compare_time(eps,num_rep)
print('----experiment 3 (eps = 16)------')
eps = 16
experiment_compare_time(eps,num_rep)