BUG: Made SparseDataFrame.fillna() fill all NaNs

kernc · jreback · commit ee6412aee8bb · 2017-07-22T14:59:43.000-04:00
A continuation of #16178 closes #16112 closes #16178 Author: Kernc <kerncece@gmail.com> Author: keitakurita <kris337jbn@yahoo.co.jp> This patch had conflicts when merged, resolved by Committer: Jeff Reback <jeff@reback.net> Closes #16892 from kernc/sparse-fillna and squashes the following commits: c1cd33e [Kernc] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs 2974232 [Kernc] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs 4bc01a1 [keitakurita] BUG: Made SparseDataFrame.fillna() fill all NaNs
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -259,7 +259,7 @@ Indexing
 - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`)
 - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
 - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
- 
+
 I/O
 ^^^
 
@@ -284,7 +284,9 @@ Groupby/Resample/Rolling
 
 Sparse
 ^^^^^^
+
 - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`)
+- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
 
 
 Reshaping
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
@@ -595,14 +595,11 @@ def fillna(self, value, downcast=None):
         if issubclass(self.dtype.type, np.floating):
             value = float(value)
 
-        if self._null_fill_value:
-            return self._simple_new(self.sp_values, self.sp_index,
-                                    fill_value=value)
-        else:
-            new_values = self.sp_values.copy()
-            new_values[isnull(new_values)] = value
-            return self._simple_new(new_values, self.sp_index,
-                                    fill_value=self.fill_value)
+        new_values = np.where(isnull(self.sp_values), value, self.sp_values)
+        fill_value = value if self._null_fill_value else self.fill_value
+
+        return self._simple_new(new_values, self.sp_index,
+                                fill_value=fill_value)
 
     def sum(self, axis=0, *args, **kwargs):
         """
diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
@@ -1271,6 +1271,41 @@ def test_from_scipy_correct_ordering(spmatrix):
     tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
 
 
+def test_from_scipy_fillna(spmatrix):
+    # GH 16112
+    tm.skip_if_no_package('scipy')
+
+    arr = np.eye(3)
+    arr[1:, 0] = np.nan
+
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = pd.SparseDataFrame(spm).fillna(-1.0)
+
+    # Returning frame should fill all nan values with -1.0
+    expected = pd.SparseDataFrame({
+        0: pd.SparseSeries([1., -1, -1]),
+        1: pd.SparseSeries([np.nan, 1, np.nan]),
+        2: pd.SparseSeries([np.nan, np.nan, 1]),
+    }, default_fill_value=-1)
+
+    # fill_value is expected to be what .fillna() above was called with
+    # We don't use -1 as initial fill_value in expected SparseSeries
+    # construction because this way we obtain "compressed" SparseArrays,
+    # avoiding having to construct them ourselves
+    for col in expected:
+        expected[col].fill_value = -1
+
+    tm.assert_sp_frame_equal(sdf, expected)
+
+
 class TestSparseDataFrameArithmetic(object):
 
     def test_numeric_op_scalar(self):