Skip to content

Commit

Permalink
Support imputations with ndarray data
Browse files Browse the repository at this point in the history
closes #4437
  • Loading branch information
michaelosthege committed Jan 25, 2021
1 parent 823906a commit 653efaf
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 14 deletions.
3 changes: 2 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Release Notes

## PyMC3 vNext (on deck)
## PyMC3 vNext (3.11.1)

### Breaking Changes

### New Features
+ Automatic imputations now also work with `ndarray` data, not just `pd.Series` or `pd.DataFrame` (see[#4439](https://github.com/pymc-devs/pymc3/pull/4439)).

### Maintenance
- `math.log1mexp_numpy` no longer raises RuntimeWarning when given very small inputs. These were commonly observed during NUTS sampling (see [#4428](https://github.com/pymc-devs/pymc3/pull/4428)).
Expand Down
33 changes: 24 additions & 9 deletions pymc3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1695,16 +1695,31 @@ def pandas_to_array(data):
XXX: When `data` is a generator, this will return a Theano tensor!
"""
if hasattr(data, "values"): # pandas
if data.isnull().any().any(): # missing values
ret = np.ma.MaskedArray(data.values, data.isnull().values)
if hasattr(data, "to_numpy"):
# typically, but not limited to pandas objects
vals = data.to_numpy()
mask = np.isnan(vals)
if mask.any():
# there are missing values
ret = np.ma.MaskedArray(vals, mask)
else:
ret = data.values
elif hasattr(data, "mask"):
if data.mask.any():
ret = data
else: # empty mask
ret = data.filled()
ret = vals
elif isinstance(data, np.ndarray):
if isinstance(data, np.ma.MaskedArray):
if not data.mask.any():
# empty mask
ret = data.filled()
else:
# already masked and rightly so
ret = data
else:
# already a ndarray, but not masked
mask = np.isnan(data)
if np.any(mask):
ret = np.ma.MaskedArray(data, mask)
else:
# no masking required
ret = data
elif isinstance(data, theano.graph.basic.Variable):
ret = data
elif sps.issparse(data):
Expand Down
11 changes: 7 additions & 4 deletions pymc3/tests/test_model_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,12 @@ def test_pandas_to_array(self, input_dtype):
pandas_input = pd.DataFrame(dense_input)

# All the even numbers are replaced with NaN
missing_pandas_input = pd.DataFrame(
np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]])
)
missing_numpy_input = np.array([
[np.nan, 1, np.nan],
[3, np.nan, 5],
[np.nan, 7, np.nan]
])
missing_pandas_input = pd.DataFrame(missing_numpy_input)
masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0))

# Create a generator object. Apparently the generator object needs to
Expand Down Expand Up @@ -72,7 +75,7 @@ def test_pandas_to_array(self, input_dtype):

# Check function behavior when using masked array inputs and pandas
# objects with missing data
for input_value in [masked_array_input, missing_pandas_input]:
for input_value in [missing_numpy_input, masked_array_input, missing_pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, ma.core.MaskedArray)
assert func_output.shape == input_value.shape
Expand Down

0 comments on commit 653efaf

Please sign in to comment.