Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assert ndim and number of dims match #7390

Merged
merged 2 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions pymc/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,13 +1185,6 @@ def logp(value, p):
)


class _OrderedLogistic(Categorical):
r"""
Underlying class for ordered logistic distributions.
See docs for the OrderedLogistic wrapper class for more details on how to use it in models.
"""


class OrderedLogistic:
R"""Ordered Logistic distribution.

Expand Down Expand Up @@ -1263,7 +1256,7 @@ class OrderedLogistic:
def __new__(cls, name, eta, cutpoints, compute_p=True, **kwargs):
p = cls.compute_p(eta, cutpoints)
if compute_p:
p = pm.Deterministic(f"{name}_probs", p, dims=kwargs.get("dims"))
p = pm.Deterministic(f"{name}_probs", p)
Copy link
Member Author

@ricardoV94 ricardoV94 Jun 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not trivial. p may have more or less dims than the respective Categorical variable, and these may or not be broadcasting. I decided it was not worth the trouble for the time being. In general we still need to figure out an approach for automatic dims (see #6828)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think partial dim assignment is currently supported, so you could do something like dims=(None,) * dim_diff + ('categories', ), no? The last one at least is always known iiuc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No this is not sufficient either because p could have a broadcastable dim on the same batch dimensions of the underlying categorical variable. pm.Categorical(..., p=[[0.5, 0.5]], dims="obs”). You cannot say p has dims ("obs", None) because it has shape (1, 2) not (2, 2)

Copy link
Member Author

@ricardoV94 ricardoV94 Jun 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or you're suggestion adding a "categories dim" regardless of the rest. That has another problem, what if the user creates 2 variables with different number of categories? Then again you have two dims that are incompatible (same name, different shape).

That can also be solved (by using the variable name) but I don't want to do it here, I just want to not break stuff for now.

Besides that was not what the code was doing, it seems like the biggest focus was on reusing the user provided dims, not auto naming the extra dim

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was assuming that "categories" already was provided somewhere, not that we generate or auto-name anything.

But yes I understand this is a rabbit hole, focusing on not breaking things is fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case the PR is ready for review :)

out_rv = Categorical(name, p=p, **kwargs)
return out_rv

Expand Down Expand Up @@ -1367,7 +1360,7 @@ class OrderedProbit:
def __new__(cls, name, eta, cutpoints, sigma=1, compute_p=True, **kwargs):
p = cls.compute_p(eta, cutpoints, sigma)
if compute_p:
p = pm.Deterministic(f"{name}_probs", p, dims=kwargs.get("dims"))
p = pm.Deterministic(f"{name}_probs", p)
out_rv = Categorical(name, p=p, **kwargs)
return out_rv

Expand Down
5 changes: 5 additions & 0 deletions pymc/model/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1532,6 +1532,11 @@ def add_named_variable(self, var, dims: tuple[str | None, ...] | None = None):
raise ValueError(f"Dimension {dim} is not specified in `coords`.")
if any(var.name == dim for dim in dims if dim is not None):
raise ValueError(f"Variable `{var.name}` has the same name as its dimension label.")
# This check implicitly states that only vars with .ndim attribute can have dims
if var.ndim != len(dims):
raise ValueError(
f"{var} has {var.ndim} dims but {len(dims)} dim labels were provided."
)
self.named_vars_to_dims[var.name] = dims

self.named_vars[var.name] = var
Expand Down
16 changes: 11 additions & 5 deletions tests/distributions/test_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,19 +897,25 @@ def test_shape_inputs(self, eta, cutpoints, expected):
assert p_shape == expected

def test_compute_p(self):
with pm.Model() as m:
pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0)
pm.OrderedLogistic("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
with pm.Model(coords={"test_dim": [0]}) as m:
pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0, dims="test_dim")
pm.OrderedLogistic(
"ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False, dims="test_dim"
)
assert len(m.deterministics) == 1

x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
assert isinstance(x, TensorVariable)

# Test it works with auto-imputation
with pm.Model() as m:
with pm.Model(coords={"test_dim": [0, 1, 2]}) as m:
with pytest.warns(ImputationWarning):
pm.OrderedLogistic(
"ol", cutpoints=np.array([-2, 0, 2]), eta=0, observed=[0, np.nan, 1]
"ol",
cutpoints=np.array([[-2, 0, 2]]),
eta=0,
observed=[0, np.nan, 1],
dims=["test_dim"],
)
assert len(m.deterministics) == 2 # One from the auto-imputation, the other from compute_p

Expand Down
26 changes: 19 additions & 7 deletions tests/model/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,17 @@ def test_add_named_variable_checks_dim_name(self):
rv2.name = "yumyum"
pmodel.add_named_variable(rv2, dims=("nomnom", None))

def test_dims_type_check(self):
def test_add_named_variable_checks_number_of_dims(self):
match = "dim labels were provided"
with pm.Model(coords={"bad": range(6)}) as m:
with pytest.raises(ValueError, match=match):
m.add_named_variable(pt.random.normal(size=(6, 6, 6), name="a"), dims=("bad",))

# "bad" is an iterable with 3 elements, but we treat strings as a single dim, so it's still invalid
with pytest.raises(ValueError, match=match):
m.add_named_variable(pt.random.normal(size=(6, 6, 6), name="b"), dims="bad")

def test_rv_dims_type_check(self):
with pm.Model(coords={"a": range(5)}) as m:
with pytest.raises(TypeError, match="Dims must be string"):
x = pm.Normal("x", shape=(10, 5), dims=(None, "a"))
Expand All @@ -899,12 +909,14 @@ def test_none_coords_autonumbering(self):
# TODO: Either allow dims without coords everywhere or nowhere
with pm.Model() as m:
m.add_coord(name="a", values=None, length=3)
m.add_coord(name="b", values=range(5))
x = pm.Normal("x", dims=("a", "b"))
m.add_coord(name="b", values=range(-5, 0))
m.add_coord(name="c", values=None, length=7)
x = pm.Normal("x", dims=("a", "b", "c"))
prior = pm.sample_prior_predictive(draws=2).prior
assert prior["x"].shape == (1, 2, 3, 5)
assert prior["x"].shape == (1, 2, 3, 5, 7)
assert list(prior.coords["a"].values) == list(range(3))
assert list(prior.coords["b"].values) == list(range(5))
assert list(prior.coords["b"].values) == list(range(-5, 0))
assert list(prior.coords["c"].values) == list(range(7))

def test_set_data_indirect_resize_without_coords(self):
with pm.Model() as pmodel:
Expand Down Expand Up @@ -1068,7 +1080,7 @@ def test_determinsitic_with_dims():
Test to check the passing of dims to the potential
"""
with pm.Model(coords={"observed": range(10)}) as model:
x = pm.Normal("x", 0, 1)
x = pm.Normal("x", 0, 1, shape=(10,))
y = pm.Deterministic("y", x**2, dims=("observed",))
assert model.named_vars_to_dims == {"y": ("observed",)}

Expand All @@ -1078,7 +1090,7 @@ def test_potential_with_dims():
Test to check the passing of dims to the potential
"""
with pm.Model(coords={"observed": range(10)}) as model:
x = pm.Normal("x", 0, 1)
x = pm.Normal("x", 0, 1, shape=(10,))
y = pm.Potential("y", x**2, dims=("observed",))
assert model.named_vars_to_dims == {"y": ("observed",)}

Expand Down
Loading