Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP ZeroSumNormal example notebook #210

Draft
wants to merge 24 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9c1a603
ZeroSumNormal: initial commit
drbenvincent Aug 16, 2021
ecb4bce
typos + swap histogram for np.allclose
drbenvincent Aug 18, 2021
445cd70
latest ZeroSumNormal code, pymc3 v3, random seed for sampling
drbenvincent Aug 18, 2021
32199a1
use of coords/dims for all models now
drbenvincent Aug 19, 2021
9940a56
Added introduction, framing it in the context of categorical coding.
drbenvincent Sep 12, 2021
7f5748a
added dims="groups" in a few places
drbenvincent Sep 12, 2021
9feff77
add _unconstrained suffix in manual model
drbenvincent Sep 12, 2021
5e3af8d
create truncated regression example
drbenvincent Jan 24, 2021
33fc141
delete truncated regression example from main branch
drbenvincent Jan 25, 2021
5d6af4d
fix typo
drbenvincent Sep 18, 2021
61eb3f8
ZeroSumNormal: initial commit
drbenvincent Aug 16, 2021
dc2e3a8
typos + swap histogram for np.allclose
drbenvincent Aug 18, 2021
0ee27f5
latest ZeroSumNormal code, pymc3 v3, random seed for sampling
drbenvincent Aug 18, 2021
05e87b0
use of coords/dims for all models now
drbenvincent Aug 19, 2021
0a34ea0
added dims="groups" in a few places
drbenvincent Sep 12, 2021
b0692bc
add _unconstrained suffix in manual model
drbenvincent Sep 12, 2021
3219207
create truncated regression example
drbenvincent Jan 24, 2021
4a26d81
delete truncated regression example from main branch
drbenvincent Jan 25, 2021
3aa5bd7
fix typo
drbenvincent Sep 18, 2021
5e0b00c
re-ran notebook after rebase - do tests pass now?
drbenvincent Sep 18, 2021
788e909
make tests pass - formatting of ZeroSumNormal.py
drbenvincent Sep 18, 2021
51c6fad
add Tomás Capretto as a co-author
drbenvincent Sep 18, 2021
981f600
testing if I can commit to the PR
tomicapretto Sep 22, 2021
b9070c1
Explain something about where zero sum encoding comes from
tomicapretto Sep 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,927 changes: 3,927 additions & 0 deletions examples/generalized_linear_models/GLM-ZeroSumNormal.ipynb

Large diffs are not rendered by default.

298 changes: 298 additions & 0 deletions examples/generalized_linear_models/ZeroSumNormal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> cb0c201 (latest ZeroSumNormal code, pymc3 v3, random seed for sampling)
from typing import List

try:
import aesara.tensor as aet
except ImportError:
import theano.tensor as aet

<<<<<<< HEAD
import numpy as np
import pymc3 as pm
from scipy import stats
from pymc3.distributions.distribution import generate_samples, draw_values


def extend_axis_aet(array, axis):
n = array.shape[axis] + 1
sum_vals = array.sum(axis, keepdims=True)
norm = sum_vals / (np.sqrt(n) + n)
fill_val = norm - sum_vals / np.sqrt(n)

out = aet.concatenate([array, fill_val.astype(str(array.dtype))], axis=axis)
return out - norm.astype(str(array.dtype))


def extend_axis_rev_aet(array: np.ndarray, axis: int):
if axis < 0:
axis = axis % array.ndim
assert axis >= 0 and axis < array.ndim

n = array.shape[axis]
last = aet.take(array, [-1], axis=axis)

sum_vals = -last * np.sqrt(n)
norm = sum_vals / (np.sqrt(n) + n)
slice_before = (slice(None, None),) * axis
return array[slice_before + (slice(None, -1),)] + norm.astype(str(array.dtype))


def extend_axis(array, axis):
n = array.shape[axis] + 1
sum_vals = array.sum(axis, keepdims=True)
norm = sum_vals / (np.sqrt(n) + n)
fill_val = norm - sum_vals / np.sqrt(n)

out = np.concatenate([array, fill_val.astype(str(array.dtype))], axis=axis)
return out - norm.astype(str(array.dtype))


def extend_axis_rev(array, axis):
n = array.shape[axis]
last = np.take(array, [-1], axis=axis)

sum_vals = -last * np.sqrt(n)
norm = sum_vals / (np.sqrt(n) + n)
slice_before = (slice(None, None),) * len(array.shape[:axis])
return array[slice_before + (slice(None, -1),)] + norm.astype(str(array.dtype))


class ZeroSumTransform(pm.distributions.transforms.Transform):
name = "zerosum"

_active_dims: List[int]

def __init__(self, active_dims):
self._active_dims = active_dims

def forward(self, x):
for axis in self._active_dims:
x = extend_axis_rev_aet(x, axis=axis)
return x

def forward_val(self, x, point=None):
for axis in self._active_dims:
x = extend_axis_rev(x, axis=axis)
return x

def backward(self, z):
z = aet.as_tensor_variable(z)
for axis in self._active_dims:
z = extend_axis_aet(z, axis=axis)
return z

def jacobian_det(self, x):
return aet.constant(0.0)


class ZeroSumNormal(pm.Continuous):
def __init__(self, sigma=1, *, active_dims=None, active_axes=None, **kwargs):
shape = kwargs.get("shape", ())
dims = kwargs.get("dims", None)
if isinstance(shape, int):
shape = (shape,)

if isinstance(dims, str):
dims = (dims,)

self.mu = self.median = self.mode = aet.zeros(shape)
self.sigma = aet.as_tensor_variable(sigma)

if active_dims is None and active_axes is None:
if shape:
active_axes = (-1,)
else:
active_axes = ()

if isinstance(active_axes, int):
active_axes = (active_axes,)

if isinstance(active_dims, str):
active_dims = (active_dims,)

if active_axes is not None and active_dims is not None:
raise ValueError("Only one of active_axes and active_dims can be specified.")

if active_dims is not None:
model = pm.modelcontext(None)
print(model.RV_dims)
if dims is None:
raise ValueError("active_dims can only be used with the dims kwargs.")
active_axes = []
for dim in active_dims:
active_axes.append(dims.index(dim))

super().__init__(**kwargs, transform=ZeroSumTransform(active_axes))

def logp(self, x):
return pm.Normal.dist(sigma=self.sigma).logp(x)

@staticmethod
def _random(scale, size):
samples = stats.norm.rvs(loc=0, scale=scale, size=size)
return samples - np.mean(samples, axis=-1, keepdims=True)

def random(self, point=None, size=None):
(sigma,) = draw_values([self.sigma], point=point, size=size)
return generate_samples(self._random, scale=sigma, dist_shape=self.shape, size=size)

def _distr_parameters_for_repr(self):
return ["sigma"]

def logcdf(self, value):
raise NotImplementedError()
=======
import pymc3 as pm
=======
>>>>>>> cb0c201 (latest ZeroSumNormal code, pymc3 v3, random seed for sampling)
import numpy as np
import pymc3 as pm
from scipy import stats
from pymc3.distributions.distribution import generate_samples, draw_values

def extend_axis_aet(array, axis):
n = array.shape[axis] + 1
sum_vals = array.sum(axis, keepdims=True)
norm = sum_vals / (np.sqrt(n) + n)
fill_val = norm - sum_vals / np.sqrt(n)

out = aet.concatenate([array, fill_val.astype(str(array.dtype))], axis=axis)
return out - norm.astype(str(array.dtype))


def extend_axis_rev_aet(array: np.ndarray, axis: int):
if axis < 0:
axis = axis % array.ndim
assert axis >= 0 and axis < array.ndim

n = array.shape[axis]
last = aet.take(array, [-1], axis=axis)

sum_vals = -last * np.sqrt(n)
norm = sum_vals / (np.sqrt(n) + n)
slice_before = (slice(None, None),) * axis
return array[slice_before + (slice(None, -1),)] + norm.astype(str(array.dtype))


def extend_axis(array, axis):
n = array.shape[axis] + 1
sum_vals = array.sum(axis, keepdims=True)
norm = sum_vals / (np.sqrt(n) + n)
fill_val = norm - sum_vals / np.sqrt(n)

out = np.concatenate([array, fill_val.astype(str(array.dtype))], axis=axis)
return out - norm.astype(str(array.dtype))


<<<<<<< HEAD
def make_sum_zero_hh(N: int) -> np.ndarray:
"""
Build a householder transformation matrix that maps e_1 to a vector of all 1s.
"""
e_1 = np.zeros(N)
e_1[0] = 1
a = np.ones(N)
a /= np.sqrt(a @ a)
v = a + e_1
v /= np.sqrt(v @ v)
return np.eye(N) - 2 * np.outer(v, v)
>>>>>>> 2da3052 (ZeroSumNormal: initial commit)
=======
def extend_axis_rev(array, axis):
n = array.shape[axis]
last = np.take(array, [-1], axis=axis)

sum_vals = -last * np.sqrt(n)
norm = sum_vals / (np.sqrt(n) + n)
slice_before = (slice(None, None),) * len(array.shape[:axis])
return array[slice_before + (slice(None, -1),)] + norm.astype(str(array.dtype))


class ZeroSumTransform(pm.distributions.transforms.Transform):
name = "zerosum"

_active_dims: List[int]

def __init__(self, active_dims):
self._active_dims = active_dims

def forward(self, x):
for axis in self._active_dims:
x = extend_axis_rev_aet(x, axis=axis)
return x

def forward_val(self, x, point=None):
for axis in self._active_dims:
x = extend_axis_rev(x, axis=axis)
return x

def backward(self, z):
z = aet.as_tensor_variable(z)
for axis in self._active_dims:
z = extend_axis_aet(z, axis=axis)
return z

def jacobian_det(self, x):
return aet.constant(0.)


class ZeroSumNormal(pm.Continuous):
def __init__(self, sigma=1, *, active_dims=None, active_axes=None, **kwargs):
shape = kwargs.get("shape", ())
dims = kwargs.get("dims", None)
if isinstance(shape, int):
shape = (shape,)

if isinstance(dims, str):
dims = (dims,)

self.mu = self.median = self.mode = aet.zeros(shape)
self.sigma = aet.as_tensor_variable(sigma)

if active_dims is None and active_axes is None:
if shape:
active_axes = (-1,)
else:
active_axes = ()

if isinstance(active_axes, int):
active_axes = (active_axes,)

if isinstance(active_dims, str):
active_dims = (active_dims,)

if active_axes is not None and active_dims is not None:
raise ValueError("Only one of active_axes and active_dims can be specified.")

if active_dims is not None:
model = pm.modelcontext(None)
print(model.RV_dims)
if dims is None:
raise ValueError("active_dims can only be used with the dims kwargs.")
active_axes = []
for dim in active_dims:
active_axes.append(dims.index(dim))

super().__init__(**kwargs, transform=ZeroSumTransform(active_axes))

def logp(self, x):
return pm.Normal.dist(sigma=self.sigma).logp(x)

@staticmethod
def _random(scale, size):
samples = stats.norm.rvs(loc=0, scale=scale, size=size)
return samples - np.mean(samples, axis=-1, keepdims=True)

def random(self, point=None, size=None):
sigma, = draw_values([self.sigma], point=point, size=size)
return generate_samples(self._random, scale=sigma, dist_shape=self.shape, size=size)

def _distr_parameters_for_repr(self):
return ["sigma"]

def logcdf(self, value):
raise NotImplementedError()
>>>>>>> cb0c201 (latest ZeroSumNormal code, pymc3 v3, random seed for sampling)