-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.jl
265 lines (225 loc) · 7.72 KB
/
main.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
"""
This file contains implementations of bayesian polynomial regression.
All models assume the noise level is known.
Main components:
* `struct Joint` describes the probabilistic model for this task.
* `condition(joint::Joint, degree, y)` conditions the model on
observations `y` and a specific polynomial degree.
This can be done in closed form, therefore this function returns
a probability distribution (`MvNormal`).
* `sample(joint::Joint, y, ...)` samples from the model given only
observations `y` using involutive MCMC
See demo.jl for how to use.
"""
using Distributions
using LinearAlgebra
"""
Joint
parameters of joint probability distribution for the random variables `y`, `Rβ` and `d`:
y ~ MvNormal(ŷ, `likelihood_variance`)
ŷ = Q * Rβ
Rβ ~ MvNormal(zeros(d + 1), `prior_variance`)
d ~ `prior_degree_dist`
"""
struct Joint{D}
x::Vector{Float64}
prior_degree_dist::D
prior_variance::Float64
likelihood_variance::Float64
end
"""
logpdf(target, (Rβ, y))
Evaluate (non-normalized) log density of joint distribution at `Rβ`, `d`, `y`.
"""
function Distributions.logpdf(joint::Joint, (Rβ, y))
# Polynomial degree:
degree = length(Rβ) - 1
# Estimate y given polynomial coefficients and x:
# X = design_matrix(x, degree)
# ŷ = X * β = QR * β = Q * Rβ
X = design_matrix(joint.x, degree)
Q, R = qr(X) # X = Q*R
ŷ = Q * Rβ
# Prior for Rβ:
# Assume independently normally distributed around zero:
prior_coef_dist = MvNormal(zeros(degree + 1), joint.prior_variance * I)
# Gaussian likelihood around ŷ:
likelihood_dist = MvNormal(ŷ, joint.likelihood_variance * I)
# Return total log density
return logpdf(likelihood_dist, y) + # p(y | ŷ)
logpdf(joint.prior_degree_dist, degree) + # p(degree)
logpdf(prior_coef_dist, Rβ) # p(β)
end
"""
SimpleAuxiliaryDistribution
parameters of a distribution for auxiliary variables conditioned on current
non-auxiliary variables.
"""
struct SimpleAuxiliaryDistribution{D,C}
increase_dist::D
coef_dist::C
end
"""
SimpleAuxiliaryDistribution(joint, Rβ, y)
Create auxiliary distribution conditioned on current state `Rβ` and observations `y`
"""
function SimpleAuxiliaryDistribution(joint::Joint, Rβ, y)
# Distribution for auxiliary variable(s) given current state
degree = length(Rβ) - 1
max_degree = length(y) - 1
increase_dist = if degree == 0
# if currently constant polynomial, definitely increase degree
Bernoulli(1.0)
elseif degree == max_degree
# if currently maximal possible degree, definitely decrease degree
Bernoulli(0.0)
else
# else 50:50 chance
Bernoulli(0.5)
end
# use prior for coefficients as proposal for new coefficient:
coef_dist = Normal(0.0, sqrt(joint.prior_variance))
return SimpleAuxiliaryDistribution(increase_dist, coef_dist)
end
"""
rand(aux_dist)
Take a random sample from auxiliary distribution
"""
function Base.rand(dist::SimpleAuxiliaryDistribution)
increase = rand(dist.increase_dist)
coef = if increase
rand(dist.coef_dist)
else
NaN
end
(; increase, coef)
end
"""
logpdf(aux_dist, sample)
Evaluate log density of `aux_dist` at `sample`.
"""
function Distributions.logpdf(
dist::SimpleAuxiliaryDistribution, val::NamedTuple{(:increase, :coef)}
)
increase = val.increase
score = logpdf(dist.increase_dist, increase)
if increase
score += logpdf(dist.coef_dist, val.coef)
end
score
end
"""
push_pop_involution!(coefs, aux)
If `aux.increase`, push `aux.coef` to the end of `coefs`,
otherwise do the inverse.
One property of this function (and all involutions) is:
`coefs, aux == involution!(involution!(copy(coefs), aux)[1:2]...)`
"""
function push_pop_involution!(coefs, aux::NamedTuple{(:increase, :coef)})
logabsdet = 0.0
if aux.increase
coefs = push!(coefs, aux.coef)
return coefs, (; increase=false, coef=NaN), logabsdet
else
coef = pop!(coefs)
return coefs, (; increase=true, coef), logabsdet
end
end
"""
condition(joint, degree, y)
Condition `joint`` on `degree` and `y`
I.e. return the posterior distribution over polynomial coefficients Rβ
given observations `y` and a given `degree`
"""
function condition(joint::Joint, degree::Int, y)
# assume linear-gaussian model, i.e.
# gaussian prior and gaussian likelihood:
# Rβ ~ MvNormal(0, σ1)
# ŷ = Q * Rβ
# y ~ MvNormal(ŷ, σ2)
#
# return posterior for Rβ which is again gaussian
X = design_matrix(joint.x, degree)
Q, R = qr(X)
prior_precision = inv(joint.prior_variance)
likelihood_precision = inv(joint.likelihood_variance)
posterior_variance = inv(prior_precision + likelihood_precision) # Bishop 2.117
posterior_mean = (posterior_variance * I) * Matrix(Q)' * (likelihood_precision * I) * y # Bishop 2.116
return MvNormal(posterior_mean, posterior_variance * I)
end
"""
randprior(joint)
Perform ancestral sampling using prior distributions and return sample.
"""
function randprior(joint::Joint)
degree = rand(joint.prior_degree_dist)
# Prior for Rβ:
# Assume independently normally distributed around zero:
prior_coef_dist = MvNormal(zeros(degree + 1), joint.prior_variance * I)
Rβ = rand(prior_coef_dist)
return degree, Rβ
end
"""
involutive_mc_step(Rβ, ...)
Perform a single involutive Metropolis-Hastings step.
"""
function involutive_mc_step(Rβ, target, y, aux_dist, involution)
log_p_old = logpdf(target, (Rβ, y)) # logdensity of target at current Rβ
aux_dist_old = aux_dist(target, Rβ, y) # auxiliary distribution given current Rβ
aux_old = rand(aux_dist_old) # sample auxiliary state
log_q_old = logpdf(aux_dist_old, aux_old) # evaluate logpdf of auxiliary state
Rβ, aux_new, lad_jacobian = involution(Rβ, aux_old)
log_p_new = logpdf(target, (Rβ, y)) # logdensity of target at *proposed* Rβ
aux_dist_new = aux_dist(target, Rβ, y) # auxiliary distribution given *proposed* Rβ
log_q_new = logpdf(aux_dist_new, aux_new)
acceptance_probability = exp(
log_p_new + log_q_new - log_p_old - log_q_old + lad_jacobian
)
accepted = rand() < acceptance_probability
if !accepted
# simply call involution again to return to old Rβ
Rβ, _, _ = involution(Rβ, aux_new)
end
return accepted, Rβ
end
"""
sample(...)
sample from `target` using involutive MCMC.
"""
function sample(
target::Joint, y, aux_dist, involution, n_samples::Int, init_state=randprior(target)
)
βs = Vector{Float64}[]
Rβs = Vector{Float64}[]
degrees = Int[]
accepteds = Bool[]
# sample initial state (from prior):
degree, Rβ = init_state
# Gibbs sampling:
# every odd step sample from p(Rβ | degree)
# every even step sample from full p(Rβ, degree)
for i in 2:n_samples
if mod(i, 2) == 0 # sample from full posterior
accepted, Rβ = involutive_mc_step(Rβ, target, y, aux_dist, involution)
degree = length(Rβ) - 1
push!(accepteds, accepted)
else # sample β
Rβ_dist = condition(target, degree, y) # target conditioned on degree and y
# can directly sample from this
Rβ = rand(Rβ_dist)
end
Q, R = qr(design_matrix(target.x, degree))
β = R \ Rβ
push!(βs, β)
push!(Rβs, copy(Rβ))
push!(degrees, degree)
end
βs, Rβs, degrees, accepteds
end
"""
design_matrix(x, degree)
return the [Vandermonde-matrix](https://en.wikipedia.org/wiki/Vandermonde_matrix) of x
"""
function design_matrix(x, degree)
degree == 0 ? ones(length(x), 1) : reduce(hcat, x .^ i for i in 0:degree)
end