Skip to content

Commit ee6412a

Browse files
kerncjreback
authored andcommitted
BUG: Made SparseDataFrame.fillna() fill all NaNs
A continuation of #16178 closes #16112 closes #16178 Author: Kernc <kerncece@gmail.com> Author: keitakurita <kris337jbn@yahoo.co.jp> This patch had conflicts when merged, resolved by Committer: Jeff Reback <jeff@reback.net> Closes #16892 from kernc/sparse-fillna and squashes the following commits: c1cd33e [Kernc] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs 2974232 [Kernc] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs 4bc01a1 [keitakurita] BUG: Made SparseDataFrame.fillna() fill all NaNs
1 parent 09108fa commit ee6412a

File tree

3 files changed

+43
-9
lines changed

3 files changed

+43
-9
lines changed

doc/source/whatsnew/v0.21.0.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ Indexing
259259
- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`)
260260
- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
261261
- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
262-
262+
263263
I/O
264264
^^^
265265

@@ -284,7 +284,9 @@ Groupby/Resample/Rolling
284284

285285
Sparse
286286
^^^^^^
287+
287288
- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`)
289+
- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
288290

289291

290292
Reshaping

pandas/core/sparse/array.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -595,14 +595,11 @@ def fillna(self, value, downcast=None):
595595
if issubclass(self.dtype.type, np.floating):
596596
value = float(value)
597597

598-
if self._null_fill_value:
599-
return self._simple_new(self.sp_values, self.sp_index,
600-
fill_value=value)
601-
else:
602-
new_values = self.sp_values.copy()
603-
new_values[isnull(new_values)] = value
604-
return self._simple_new(new_values, self.sp_index,
605-
fill_value=self.fill_value)
598+
new_values = np.where(isnull(self.sp_values), value, self.sp_values)
599+
fill_value = value if self._null_fill_value else self.fill_value
600+
601+
return self._simple_new(new_values, self.sp_index,
602+
fill_value=fill_value)
606603

607604
def sum(self, axis=0, *args, **kwargs):
608605
"""

pandas/tests/sparse/test_frame.py

+35
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,41 @@ def test_from_scipy_correct_ordering(spmatrix):
12711271
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
12721272

12731273

1274+
def test_from_scipy_fillna(spmatrix):
1275+
# GH 16112
1276+
tm.skip_if_no_package('scipy')
1277+
1278+
arr = np.eye(3)
1279+
arr[1:, 0] = np.nan
1280+
1281+
try:
1282+
spm = spmatrix(arr)
1283+
assert spm.dtype == arr.dtype
1284+
except (TypeError, AssertionError):
1285+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1286+
# then the combination is not currently supported in NumPy, so we
1287+
# can just skip testing it thoroughly
1288+
return
1289+
1290+
sdf = pd.SparseDataFrame(spm).fillna(-1.0)
1291+
1292+
# Returning frame should fill all nan values with -1.0
1293+
expected = pd.SparseDataFrame({
1294+
0: pd.SparseSeries([1., -1, -1]),
1295+
1: pd.SparseSeries([np.nan, 1, np.nan]),
1296+
2: pd.SparseSeries([np.nan, np.nan, 1]),
1297+
}, default_fill_value=-1)
1298+
1299+
# fill_value is expected to be what .fillna() above was called with
1300+
# We don't use -1 as initial fill_value in expected SparseSeries
1301+
# construction because this way we obtain "compressed" SparseArrays,
1302+
# avoiding having to construct them ourselves
1303+
for col in expected:
1304+
expected[col].fill_value = -1
1305+
1306+
tm.assert_sp_frame_equal(sdf, expected)
1307+
1308+
12741309
class TestSparseDataFrameArithmetic(object):
12751310

12761311
def test_numeric_op_scalar(self):

0 commit comments

Comments
 (0)