Skip to content

Commit a532d09

Browse files
BUG: Fixed GH16112 except for dia and dok formats
1 parent 075eca1 commit a532d09

File tree

2 files changed

+51
-3
lines changed

2 files changed

+51
-3
lines changed

pandas/core/sparse/array.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -595,12 +595,13 @@ def fillna(self, value, downcast=None):
595595
if issubclass(self.dtype.type, np.floating):
596596
value = float(value)
597597

598+
new_values = self.sp_values.copy()
599+
new_values[isnull(new_values)] = value
600+
598601
if self._null_fill_value:
599-
return self._simple_new(self.sp_values, self.sp_index,
602+
return self._simple_new(new_values, self.sp_index,
600603
fill_value=value)
601604
else:
602-
new_values = self.sp_values.copy()
603-
new_values[isnull(new_values)] = value
604605
return self._simple_new(new_values, self.sp_index,
605606
fill_value=self.fill_value)
606607

pandas/tests/sparse/test_frame.py

+47
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,53 @@ def test_from_to_scipy_object(spmatrix, fill_value):
12451245
assert sdf.to_coo().dtype == res_dtype
12461246

12471247

1248+
def test_from_scipy_object_fillna(spmatrix):
1249+
# GH 16112
1250+
columns = list('cd')
1251+
index = list('ab')
1252+
tm.skip_if_no_package('scipy', max_version='0.19.0')
1253+
1254+
# Explicitly convert one zero to np.nan
1255+
arr = np.array([[2.0, 0.0], [np.nan, 1.0]])
1256+
try:
1257+
spm = spmatrix(arr)
1258+
assert spm.dtype == arr.dtype
1259+
except (TypeError, AssertionError):
1260+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1261+
# then the combination is not currently supported in NumPy, so we
1262+
# can just skip testing it thoroughly
1263+
return
1264+
1265+
sdf = pd.SparseDataFrame(spm, index=index, columns=columns).fillna(-1.0)
1266+
1267+
# Returning frame should fill all nan values with -1.0
1268+
expected = pd.SparseDataFrame({"c": {"a": 2.0, "b": np.nan},
1269+
"d": {"a": np.nan, "b": 1.0}}).fillna(-1.0)
1270+
expected_bsr = pd.SparseDataFrame({"c": {"a": 2.0, "b": np.nan},
1271+
"d": {"a": 0.0, "b": 1.0}}).fillna(-1.0)
1272+
1273+
from scipy.sparse.bsr import bsr_matrix
1274+
from scipy.sparse.dia import dia_matrix
1275+
from scipy.sparse.dok import dok_matrix
1276+
if spmatrix == bsr_matrix:
1277+
# A SparseDataFrame from a bsr matrix does not fill 0s
1278+
# Therefore, only the explicit nan value needs to be filled with -1
1279+
tm.assert_frame_equal(sdf.to_dense(), expected_bsr.to_dense())
1280+
elif spmatrix == dia_matrix:
1281+
# the dia matrix has a bug of a different nature,
1282+
# so is currently passed in this test suite
1283+
pytest.xfail("Initialization of SparseDataFrame with dia_matrix has bugs (see GH 16179)")
1284+
elif spmatrix == dok_matrix:
1285+
# the dok matrix in python2 has a bug of a different nature,
1286+
# so is currently passed in this test suite
1287+
pytest.xfail("Initialization of SparseDataFrame with dok_matrix has bugs (see GH 16179)")
1288+
else:
1289+
# The internal representations can differ.
1290+
# This test is here to ensure that all nan values are filled,
1291+
# regardless of origin.
1292+
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
1293+
1294+
12481295
class TestSparseDataFrameArithmetic(tm.TestCase):
12491296

12501297
def test_numeric_op_scalar(self):

0 commit comments

Comments
 (0)