From 29821a548895866cd8ec53cd4a178605004929ea Mon Sep 17 00:00:00 2001 From: rpgoldman Date: Fri, 20 Mar 2020 07:03:51 -0500 Subject: [PATCH] Fix issue 3793 (#3843) * Demonstrate issue 3793. * Give Data model-relative names. Use Model.name_for() to relativize pm.Data names. * Move test description into docstring * mention changes to pm.Data naming closes #3793 Co-authored-by: michaelosthege --- RELEASE-NOTES.md | 1 + pymc3/data.py | 19 ++++++++++++------- pymc3/tests/test_data_container.py | 12 ++++++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 75b78ceef3b..56622eb9039 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -14,6 +14,7 @@ - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6. - Tuning results no longer leak into sequentially sampled `Metropolis` chains (see #3733 and #3796). - Deprecated `sd` in version 3.7 has been replaced by `sigma` now raises `DepreciationWarning` on using `sd` in continuous, mixed and timeseries distributions. (see #3837 and #3688). +- In named models, `pm.Data` objects now get model-relative names (see [#3843](https://github.com/pymc-devs/pymc3/pull/3843)) ## PyMC3 3.8 (November 29 2019) diff --git a/pymc3/data.py b/pymc3/data.py index 9ad815ba861..d4572bc264b 100644 --- a/pymc3/data.py +++ b/pymc3/data.py @@ -479,13 +479,6 @@ class Data: https://docs.pymc.io/notebooks/data_container.html """ def __new__(self, name, value): - # `pm.model.pandas_to_array` takes care of parameter `value` and - # transforms it to something digestible for pymc3 - shared_object = theano.shared(pm.model.pandas_to_array(value), name) - - # To draw the node for this variable in the graphviz Digraph we need - # its shape. - shared_object.dshape = tuple(shared_object.shape.eval()) # Add data container to the named variables of the model. try: @@ -494,6 +487,18 @@ def __new__(self, name, value): raise TypeError("No model on context stack, which is needed to " "instantiate a data container. Add variable " "inside a 'with model:' block.") + + name = model.name_for(name) + + # `pm.model.pandas_to_array` takes care of parameter `value` and + # transforms it to something digestible for pymc3 + shared_object = theano.shared(pm.model.pandas_to_array(value), name) + + # To draw the node for this variable in the graphviz Digraph we need + # its shape. + shared_object.dshape = tuple(shared_object.shape.eval()) + + model.add_random_variable(shared_object) return shared_object diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py index 1ece1f503ec..e49cab457af 100644 --- a/pymc3/tests/test_data_container.py +++ b/pymc3/tests/test_data_container.py @@ -143,3 +143,15 @@ def test_model_to_graphviz_for_model_with_data_container(self): assert text in g.source text = 'obs [label="obs ~ Normal" style=filled]' assert text in g.source + + +def test_data_naming(): + """ + This is a test for issue #3793 -- `Data` objects in named models are + not given model-relative names. + """ + with pm.Model("named_model") as model: + x = pm.Data("x", [1.0, 2.0, 3.0]) + y = pm.Normal("y") + assert y.name == "named_model_y" + assert x.name == "named_model_x"