BUG: Fixed GH16112 except for dia and dok formats

kristopheryahoo · kristopheryahoo · commit a532d09b615c · 2017-05-01T21:32:30.000+09:00
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
@@ -595,12 +595,13 @@ def fillna(self, value, downcast=None):
         if issubclass(self.dtype.type, np.floating):
             value = float(value)
 
+        new_values = self.sp_values.copy()
+        new_values[isnull(new_values)] = value
+
         if self._null_fill_value:
-            return self._simple_new(self.sp_values, self.sp_index,
+            return self._simple_new(new_values, self.sp_index,
                                     fill_value=value)
         else:
-            new_values = self.sp_values.copy()
-            new_values[isnull(new_values)] = value
             return self._simple_new(new_values, self.sp_index,
                                     fill_value=self.fill_value)
 
diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
@@ -1245,6 +1245,53 @@ def test_from_to_scipy_object(spmatrix, fill_value):
     assert sdf.to_coo().dtype == res_dtype
 
 
+def test_from_scipy_object_fillna(spmatrix):
+    # GH 16112
+    columns = list('cd')
+    index = list('ab')
+    tm.skip_if_no_package('scipy', max_version='0.19.0')
+
+    # Explicitly convert one zero to np.nan
+    arr = np.array([[2.0, 0.0], [np.nan, 1.0]])
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = pd.SparseDataFrame(spm, index=index, columns=columns).fillna(-1.0)
+
+    # Returning frame should fill all nan values with -1.0
+    expected = pd.SparseDataFrame({"c": {"a": 2.0, "b": np.nan},
+                                   "d": {"a": np.nan, "b": 1.0}}).fillna(-1.0)
+    expected_bsr = pd.SparseDataFrame({"c": {"a": 2.0, "b": np.nan},
+                                       "d": {"a": 0.0, "b": 1.0}}).fillna(-1.0)
+
+    from scipy.sparse.bsr import bsr_matrix
+    from scipy.sparse.dia import dia_matrix
+    from scipy.sparse.dok import dok_matrix
+    if spmatrix == bsr_matrix:
+        # A SparseDataFrame from a bsr matrix does not fill 0s
+        # Therefore, only the explicit nan value needs to be filled with -1
+        tm.assert_frame_equal(sdf.to_dense(), expected_bsr.to_dense())
+    elif spmatrix == dia_matrix:
+        # the dia matrix has a bug of a different nature,
+        # so is currently passed in this test suite
+        pytest.xfail("Initialization of SparseDataFrame with dia_matrix has bugs (see GH 16179)")
+    elif spmatrix == dok_matrix:
+        # the dok matrix in python2 has a bug of a different nature,
+        # so is currently passed in this test suite
+        pytest.xfail("Initialization of SparseDataFrame with dok_matrix has bugs (see GH 16179)")
+    else:
+        # The internal representations can differ.
+        # This test is here to ensure that all nan values are filled,
+        # regardless of origin.
+        tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
+
+
 class TestSparseDataFrameArithmetic(tm.TestCase):
 
     def test_numeric_op_scalar(self):