-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathfit-np-hmc.py
executable file
·121 lines (99 loc) · 2.79 KB
/
fit-np-hmc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
# fit-np-hmc.py
# Bayesian fit using numpy for HMC
import os
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats
from scipy.optimize import minimize
df = pd.read_parquet(os.path.join("..", "pima.parquet"))
print(df)
n, p = df.shape
print(n, p)
y = pd.get_dummies(df["type"])["Yes"].to_numpy(dtype='float32')
X = df.drop(columns="type").to_numpy()
X = np.hstack((np.ones((n,1)), X))
print(X)
print(y)
def ll(beta):
return np.sum(-np.log(1 + np.exp(-(2*y - 1)*(X.dot(beta)))))
init = np.random.randn(p)*0.1
print(init)
print("MAP:")
pscale = np.array([10.,1.,1.,1.,1.,1.,1.,1.])
def lprior(beta):
return np.sum(sp.stats.norm.logpdf(beta, loc=0, scale=pscale))
def lpost(beta):
return ll(beta) + lprior(beta)
def glp(beta):
glpr = -beta/(pscale*pscale)
gll = (X.T).dot(y - 1/(1 + np.exp(-X.dot(beta))))
return (glpr + gll)
def glp(beta):
glpr = -beta/(pscale*pscale)
gll = (X.T).dot(y - 1/(1 + np.exp(-X.dot(beta))))
return (glpr + gll)
res = minimize(lambda x: -lpost(x), init, jac=lambda x: -glp(x), method='BFGS')
print(res.x)
print(ll(res.x))
print(glp(res.x))
print("HMC:")
def mhKernel(lpost, rprop):
def kernel(x):
prop = rprop(x)
a = lpost(prop) - lpost(x)
if (np.log(np.random.rand()) < a):
x = prop
return x
return kernel
def hmcKernel(lpi, glpi, eps = 1e-4, l=10, dmm = 1):
sdmm = np.sqrt(dmm)
def leapf(q, p):
p = p + 0.5*eps*glpi(q)
for i in range(l):
q = q + eps*p/dmm
if (i < l-1):
p = p + eps*glpi(q)
else:
p = p + 0.5*eps*glpi(q)
return (q, -p)
def alpi(x):
(q, p) = x
return lpi(q) - 0.5*np.sum((p**2)/dmm)
def rprop(x):
(q, p) = x
return leapf(q, p)
mhk = mhKernel(alpi, rprop)
def kern(q):
d = len(q)
p = np.random.randn(d)*sdmm
return mhk((q, p))[0]
return kern
def mcmc(init, kernel, thin = 10, iters = 10000, verb = True):
p = len(init)
mat = np.zeros((iters, p))
x = init
if (verb):
print(str(iters) + " iterations")
for i in range(iters):
if (verb):
print(str(i), end=" ", flush=True)
for j in range(thin):
x = kernel(x)
mat[i,:] = x
if (verb):
print("\nDone.", flush=True)
return mat
pre = np.array([100.,1.,1.,1.,1.,1.,25.,1.])
out = mcmc(res.x,
hmcKernel(lpost, glp, eps=1e-3, l=50, dmm=1/pre), thin=20)
print(out)
odf = pd.DataFrame(out, columns=["b0","b1","b2","b3","b4","b5","b6","b7"])
odf.to_parquet("fit-np-hmc.parquet")
print("Posterior summaries:")
summ = scipy.stats.describe(out)
print(summ)
print("\nMean: " + str(summ.mean))
print("Variance: " + str(summ.variance))
# eof