From 653efafcbfeb1ef621cdcaa241441ad92b7d85d6 Mon Sep 17 00:00:00 2001
From: Michael Osthege <m.osthege@fz-juelich.de>
Date: Sun, 24 Jan 2021 23:09:45 +0100
Subject: [PATCH] Support imputations with ndarray data

closes #4437
---
 RELEASE-NOTES.md                  |  3 ++-
 pymc3/model.py                    | 33 ++++++++++++++++++++++---------
 pymc3/tests/test_model_helpers.py | 11 +++++++----
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 65cde7cc6ed..99e35d659b3 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,10 +1,11 @@
 # Release Notes
 
-## PyMC3 vNext (on deck)
+## PyMC3 vNext (3.11.1)
 
 ### Breaking Changes
 
 ### New Features
++ Automatic imputations now also work with `ndarray` data, not just `pd.Series` or `pd.DataFrame` (see[#4439](https://github.com/pymc-devs/pymc3/pull/4439)).
 
 ### Maintenance
 - `math.log1mexp_numpy` no longer raises RuntimeWarning when given very small inputs. These were commonly observed during NUTS sampling (see [#4428](https://github.com/pymc-devs/pymc3/pull/4428)).
diff --git a/pymc3/model.py b/pymc3/model.py
index 393c4d2f6a2..0794116809c 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1695,16 +1695,31 @@ def pandas_to_array(data):
     XXX: When `data` is a generator, this will return a Theano tensor!
 
     """
-    if hasattr(data, "values"):  # pandas
-        if data.isnull().any().any():  # missing values
-            ret = np.ma.MaskedArray(data.values, data.isnull().values)
+    if hasattr(data, "to_numpy"):
+        # typically, but not limited to pandas objects
+        vals = data.to_numpy()
+        mask = np.isnan(vals)
+        if mask.any():
+            # there are missing values
+            ret = np.ma.MaskedArray(vals, mask)
         else:
-            ret = data.values
-    elif hasattr(data, "mask"):
-        if data.mask.any():
-            ret = data
-        else:  # empty mask
-            ret = data.filled()
+            ret = vals
+    elif isinstance(data, np.ndarray):
+        if isinstance(data, np.ma.MaskedArray):
+            if not data.mask.any():
+                # empty mask
+                ret = data.filled()
+            else:
+                # already masked and rightly so
+                ret = data
+        else:
+            # already a ndarray, but not masked
+            mask = np.isnan(data)
+            if np.any(mask):
+                ret = np.ma.MaskedArray(data, mask)
+            else:
+                # no masking required
+                ret = data
     elif isinstance(data, theano.graph.basic.Variable):
         ret = data
     elif sps.issparse(data):
diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py
index 7b049bac707..b0d8efc9e34 100644
--- a/pymc3/tests/test_model_helpers.py
+++ b/pymc3/tests/test_model_helpers.py
@@ -41,9 +41,12 @@ def test_pandas_to_array(self, input_dtype):
         pandas_input = pd.DataFrame(dense_input)
 
         # All the even numbers are replaced with NaN
-        missing_pandas_input = pd.DataFrame(
-            np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]])
-        )
+        missing_numpy_input = np.array([
+            [np.nan, 1, np.nan],
+            [3, np.nan, 5],
+            [np.nan, 7, np.nan]
+        ])
+        missing_pandas_input = pd.DataFrame(missing_numpy_input)
         masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0))
 
         # Create a generator object. Apparently the generator object needs to
@@ -72,7 +75,7 @@ def test_pandas_to_array(self, input_dtype):
 
         # Check function behavior when using masked array inputs and pandas
         # objects with missing data
-        for input_value in [masked_array_input, missing_pandas_input]:
+        for input_value in [missing_numpy_input, masked_array_input, missing_pandas_input]:
             func_output = func(input_value)
             assert isinstance(func_output, ma.core.MaskedArray)
             assert func_output.shape == input_value.shape