diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 60103024909a0..a22aaa4077382 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -111,6 +111,7 @@ These changes conform sparse handling to return the correct types and work to ma s.take([1, 2, 3]) - Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`) +- Bug in ``SparseArray[]`` indexing with tuples are not handled properly (:issue:`12966`) - Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`) - Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`) - Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`) @@ -126,6 +127,8 @@ These changes conform sparse handling to return the correct types and work to ma - Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`) - Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`) - Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`) +- Bug in ``pd.concat()`` of ``SparseDataFrame`` incorrectly handle ``fill_value`` (:issue:`9765`) +- Bug in ``pd.concat()`` of ``SparseDataFrame`` may raise ``AttributeError`` (:issue:`12174`) - Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`) .. _whatsnew_0181.api: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d47c99db1b17c..abfc5c989056e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4872,6 +4872,11 @@ def is_null(self): values = self.block.values if self.block.is_categorical: values_flat = values.categories + elif self.block.is_sparse: + # fill_value is not NaN and have holes + if not values._null_fill_value and values.sp_index.ngaps > 0: + return False + values_flat = values.ravel(order='K') else: values_flat = values.ravel(order='K') total_len = values_flat.shape[0] @@ -4904,6 +4909,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): pass elif getattr(self.block, 'is_categorical', False): pass + elif getattr(self.block, 'is_sparse', False): + pass else: missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 486dbaaa624d9..ff199276c1401 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -278,14 +278,18 @@ def __getitem__(self, key): """ if com.is_integer(key): return self._get_val_at(key) + elif isinstance(key, tuple): + data_slice = self.values[key] else: if isinstance(key, SparseArray): key = np.asarray(key) + if hasattr(key, '__len__') and len(self) != len(key): return self.take(key) else: data_slice = self.values[key] - return self._constructor(data_slice) + + return self._constructor(data_slice) def __getslice__(self, i, j): if i < 0: diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 1fe58922e85a5..032b0f18b6482 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -115,9 +115,12 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', if fastpath: # data is an ndarray, index is defined - data = SingleBlockManager(data, index, fastpath=True) + + if not isinstance(data, SingleBlockManager): + data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() + else: if data is None: diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 1786123191866..3301bc4e00209 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -347,6 +347,26 @@ def test_getslice(self): exp = SparseArray(self.arr.values[:0]) tm.assert_sp_array_equal(result, exp) + def test_getslice_tuple(self): + dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) + + sparse = SparseArray(dense) + res = sparse[4:, ] + exp = SparseArray(dense[4:, ]) + tm.assert_sp_array_equal(res, exp) + + sparse = SparseArray(dense, fill_value=0) + res = sparse[4:, ] + exp = SparseArray(dense[4:, ], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + with tm.assertRaises(IndexError): + sparse[4:, :] + + with tm.assertRaises(IndexError): + # check numpy compat + dense[4:, :] + def test_binary_operators(self): data1 = np.random.randn(20) data2 = np.random.randn(20) diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/sparse/tests/test_combine_concat.py new file mode 100644 index 0000000000000..fcdc6d9580dd5 --- /dev/null +++ b/pandas/sparse/tests/test_combine_concat.py @@ -0,0 +1,364 @@ +# pylint: disable-msg=E1101,W0612 + +import nose # noqa +import numpy as np +import pandas as pd +import pandas.util.testing as tm + + +class TestSparseSeriesConcat(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_concat(self): + val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) + val2 = np.array([3, np.nan, 4, 0, 0]) + + for kind in ['integer', 'block']: + sparse1 = pd.SparseSeries(val1, name='x', kind=kind) + sparse2 = pd.SparseSeries(val2, name='y', kind=kind) + + res = pd.concat([sparse1, sparse2]) + exp = pd.concat([pd.Series(val1), pd.Series(val2)]) + exp = pd.SparseSeries(exp, kind=kind) + tm.assert_sp_series_equal(res, exp) + + sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind) + sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind) + + res = pd.concat([sparse1, sparse2]) + exp = pd.concat([pd.Series(val1), pd.Series(val2)]) + exp = pd.SparseSeries(exp, fill_value=0, kind=kind) + tm.assert_sp_series_equal(res, exp) + + def test_concat_axis1(self): + val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) + val2 = np.array([3, np.nan, 4, 0, 0]) + + sparse1 = pd.SparseSeries(val1, name='x') + sparse2 = pd.SparseSeries(val2, name='y') + + res = pd.concat([sparse1, sparse2], axis=1) + exp = pd.concat([pd.Series(val1, name='x'), + pd.Series(val2, name='y')], axis=1) + exp = pd.SparseDataFrame(exp) + tm.assert_sp_frame_equal(res, exp) + + def test_concat_different_fill(self): + val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) + val2 = np.array([3, np.nan, 4, 0, 0]) + + for kind in ['integer', 'block']: + sparse1 = pd.SparseSeries(val1, name='x', kind=kind) + sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0) + + res = pd.concat([sparse1, sparse2]) + exp = pd.concat([pd.Series(val1), pd.Series(val2)]) + exp = pd.SparseSeries(exp, kind=kind) + tm.assert_sp_series_equal(res, exp) + + res = pd.concat([sparse2, sparse1]) + exp = pd.concat([pd.Series(val2), pd.Series(val1)]) + exp = pd.SparseSeries(exp, kind=kind, fill_value=0) + tm.assert_sp_series_equal(res, exp) + + def test_concat_axis1_different_fill(self): + val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) + val2 = np.array([3, np.nan, 4, 0, 0]) + + sparse1 = pd.SparseSeries(val1, name='x') + sparse2 = pd.SparseSeries(val2, name='y', fill_value=0) + + res = pd.concat([sparse1, sparse2], axis=1) + exp = pd.concat([pd.Series(val1, name='x'), + pd.Series(val2, name='y')], axis=1) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + def test_concat_different_kind(self): + val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) + val2 = np.array([3, np.nan, 4, 0, 0]) + + sparse1 = pd.SparseSeries(val1, name='x', kind='integer') + sparse2 = pd.SparseSeries(val2, name='y', kind='block', fill_value=0) + + res = pd.concat([sparse1, sparse2]) + exp = pd.concat([pd.Series(val1), pd.Series(val2)]) + exp = pd.SparseSeries(exp, kind='integer') + tm.assert_sp_series_equal(res, exp) + + res = pd.concat([sparse2, sparse1]) + exp = pd.concat([pd.Series(val2), pd.Series(val1)]) + exp = pd.SparseSeries(exp, kind='block', fill_value=0) + tm.assert_sp_series_equal(res, exp) + + def test_concat_sparse_dense(self): + # use first input's fill_value + val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) + val2 = np.array([3, np.nan, 4, 0, 0]) + + for kind in ['integer', 'block']: + sparse = pd.SparseSeries(val1, name='x', kind=kind) + dense = pd.Series(val2, name='y') + + res = pd.concat([sparse, dense]) + exp = pd.concat([pd.Series(val1), dense]) + exp = pd.SparseSeries(exp, kind=kind) + tm.assert_sp_series_equal(res, exp) + + res = pd.concat([dense, sparse, dense]) + exp = pd.concat([dense, pd.Series(val1), dense]) + exp = pd.SparseSeries(exp, kind=kind) + tm.assert_sp_series_equal(res, exp) + + sparse = pd.SparseSeries(val1, name='x', kind=kind, fill_value=0) + dense = pd.Series(val2, name='y') + + res = pd.concat([sparse, dense]) + exp = pd.concat([pd.Series(val1), dense]) + exp = pd.SparseSeries(exp, kind=kind, fill_value=0) + tm.assert_sp_series_equal(res, exp) + + res = pd.concat([dense, sparse, dense]) + exp = pd.concat([dense, pd.Series(val1), dense]) + exp = pd.SparseSeries(exp, kind=kind, fill_value=0) + tm.assert_sp_series_equal(res, exp) + + +class TestSparseDataFrameConcat(tm.TestCase): + + _multiprocess_can_split_ = True + + def setUp(self): + + self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan], + 'B': [0., 0., 0., 0.], + 'C': [np.nan, np.nan, np.nan, np.nan], + 'D': [1., 2., 3., 4.]}) + + self.dense2 = pd.DataFrame({'A': [5., 6., 7., 8.], + 'B': [np.nan, 0., 7., 8.], + 'C': [5., 6., np.nan, np.nan], + 'D': [np.nan, np.nan, np.nan, np.nan]}) + + self.dense3 = pd.DataFrame({'E': [5., 6., 7., 8.], + 'F': [np.nan, 0., 7., 8.], + 'G': [5., 6., np.nan, np.nan], + 'H': [np.nan, np.nan, np.nan, np.nan]}) + + def test_concat(self): + # fill_value = np.nan + sparse = self.dense1.to_sparse() + sparse2 = self.dense2.to_sparse() + + res = pd.concat([sparse, sparse]) + exp = pd.concat([self.dense1, self.dense1]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2, sparse2]) + exp = pd.concat([self.dense2, self.dense2]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse, sparse2]) + exp = pd.concat([self.dense1, self.dense2]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2, sparse]) + exp = pd.concat([self.dense2, self.dense1]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + # fill_value = 0 + sparse = self.dense1.to_sparse(fill_value=0) + sparse2 = self.dense2.to_sparse(fill_value=0) + + res = pd.concat([sparse, sparse]) + exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2, sparse2]) + exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse, sparse2]) + exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2, sparse]) + exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + def test_concat_different_fill_value(self): + # 1st fill_value will be used + sparse = self.dense1.to_sparse() + sparse2 = self.dense2.to_sparse(fill_value=0) + + res = pd.concat([sparse, sparse2]) + exp = pd.concat([self.dense1, self.dense2]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2, sparse]) + exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + def test_concat_different_columns(self): + # fill_value = np.nan + sparse = self.dense1.to_sparse() + sparse3 = self.dense3.to_sparse() + + res = pd.concat([sparse, sparse3]) + exp = pd.concat([self.dense1, self.dense3]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse3, sparse]) + exp = pd.concat([self.dense3, self.dense1]).to_sparse() + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + # fill_value = 0 + sparse = self.dense1.to_sparse(fill_value=0) + sparse3 = self.dense3.to_sparse(fill_value=0) + + res = pd.concat([sparse, sparse3]) + exp = pd.concat([self.dense1, self.dense3]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse3, sparse]) + exp = pd.concat([self.dense3, self.dense1]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + # different fill values + sparse = self.dense1.to_sparse() + sparse3 = self.dense3.to_sparse(fill_value=0) + # each columns keeps its fill_value, thus compare in dense + res = pd.concat([sparse, sparse3]) + exp = pd.concat([self.dense1, self.dense3]) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + res = pd.concat([sparse3, sparse]) + exp = pd.concat([self.dense3, self.dense1]) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + def test_concat_series(self): + # fill_value = np.nan + sparse = self.dense1.to_sparse() + sparse2 = self.dense2.to_sparse() + + for col in ['A', 'D']: + res = pd.concat([sparse, sparse2[col]]) + exp = pd.concat([self.dense1, self.dense2[col]]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2[col], sparse]) + exp = pd.concat([self.dense2[col], self.dense1]).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + # fill_value = 0 + sparse = self.dense1.to_sparse(fill_value=0) + sparse2 = self.dense2.to_sparse(fill_value=0) + + for col in ['C', 'D']: + res = pd.concat([sparse, sparse2[col]]) + exp = pd.concat([self.dense1, + self.dense2[col]]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse2[col], sparse]) + exp = pd.concat([self.dense2[col], + self.dense1]).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + def test_concat_axis1(self): + # fill_value = np.nan + sparse = self.dense1.to_sparse() + sparse3 = self.dense3.to_sparse() + + res = pd.concat([sparse, sparse3], axis=1) + exp = pd.concat([self.dense1, self.dense3], axis=1).to_sparse() + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse3, sparse], axis=1) + exp = pd.concat([self.dense3, self.dense1], axis=1).to_sparse() + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + # fill_value = 0 + sparse = self.dense1.to_sparse(fill_value=0) + sparse3 = self.dense3.to_sparse(fill_value=0) + + res = pd.concat([sparse, sparse3], axis=1) + exp = pd.concat([self.dense1, self.dense3], + axis=1).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + res = pd.concat([sparse3, sparse], axis=1) + exp = pd.concat([self.dense3, self.dense1], + axis=1).to_sparse(fill_value=0) + exp._default_fill_value = np.nan + tm.assert_sp_frame_equal(res, exp) + + # different fill values + sparse = self.dense1.to_sparse() + sparse3 = self.dense3.to_sparse(fill_value=0) + # each columns keeps its fill_value, thus compare in dense + res = pd.concat([sparse, sparse3], axis=1) + exp = pd.concat([self.dense1, self.dense3], axis=1) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + res = pd.concat([sparse3, sparse], axis=1) + exp = pd.concat([self.dense3, self.dense1], axis=1) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + def test_concat_sparse_dense(self): + sparse = self.dense1.to_sparse() + + res = pd.concat([sparse, self.dense2]) + exp = pd.concat([self.dense1, self.dense2]) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + res = pd.concat([self.dense2, sparse]) + exp = pd.concat([self.dense2, self.dense1]) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + sparse = self.dense1.to_sparse(fill_value=0) + + res = pd.concat([sparse, self.dense2]) + exp = pd.concat([self.dense1, self.dense2]) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + res = pd.concat([self.dense2, sparse]) + exp = pd.concat([self.dense2, self.dense1]) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + res = pd.concat([self.dense3, sparse], axis=1) + exp = pd.concat([self.dense3, self.dense1], axis=1) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res, exp) + + res = pd.concat([sparse, self.dense3], axis=1) + exp = pd.concat([self.dense1, self.dense3], axis=1) + self.assertIsInstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res, exp) + + +if __name__ == '__main__': + import nose # noqa + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 097bdee82a589..f8955e526b3da 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -1040,120 +1040,6 @@ def _check_results_to_coo(results, check): assert_equal(il, il_result) assert_equal(jl, jl_result) - def test_concat(self): - val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) - val2 = np.array([3, np.nan, 4, 0, 0]) - - for kind in ['integer', 'block']: - sparse1 = pd.SparseSeries(val1, name='x', kind=kind) - sparse2 = pd.SparseSeries(val2, name='y', kind=kind) - - res = pd.concat([sparse1, sparse2]) - exp = pd.concat([pd.Series(val1), pd.Series(val2)]) - exp = pd.SparseSeries(exp, kind=kind) - tm.assert_sp_series_equal(res, exp) - - sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind) - sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind) - - res = pd.concat([sparse1, sparse2]) - exp = pd.concat([pd.Series(val1), pd.Series(val2)]) - exp = pd.SparseSeries(exp, fill_value=0, kind=kind) - tm.assert_sp_series_equal(res, exp) - - def test_concat_axis1(self): - val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) - val2 = np.array([3, np.nan, 4, 0, 0]) - - sparse1 = pd.SparseSeries(val1, name='x') - sparse2 = pd.SparseSeries(val2, name='y') - - res = pd.concat([sparse1, sparse2], axis=1) - exp = pd.concat([pd.Series(val1, name='x'), - pd.Series(val2, name='y')], axis=1) - exp = pd.SparseDataFrame(exp) - tm.assert_sp_frame_equal(res, exp) - - def test_concat_different_fill(self): - val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) - val2 = np.array([3, np.nan, 4, 0, 0]) - - for kind in ['integer', 'block']: - sparse1 = pd.SparseSeries(val1, name='x', kind=kind) - sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0) - - res = pd.concat([sparse1, sparse2]) - exp = pd.concat([pd.Series(val1), pd.Series(val2)]) - exp = pd.SparseSeries(exp, kind=kind) - tm.assert_sp_series_equal(res, exp) - - res = pd.concat([sparse2, sparse1]) - exp = pd.concat([pd.Series(val2), pd.Series(val1)]) - exp = pd.SparseSeries(exp, kind=kind, fill_value=0) - tm.assert_sp_series_equal(res, exp) - - def test_concat_axis1_different_fill(self): - val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) - val2 = np.array([3, np.nan, 4, 0, 0]) - - sparse1 = pd.SparseSeries(val1, name='x') - sparse2 = pd.SparseSeries(val2, name='y', fill_value=0) - - res = pd.concat([sparse1, sparse2], axis=1) - exp = pd.concat([pd.Series(val1, name='x'), - pd.Series(val2, name='y')], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - def test_concat_different_kind(self): - val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) - val2 = np.array([3, np.nan, 4, 0, 0]) - - sparse1 = pd.SparseSeries(val1, name='x', kind='integer') - sparse2 = pd.SparseSeries(val2, name='y', kind='block', fill_value=0) - - res = pd.concat([sparse1, sparse2]) - exp = pd.concat([pd.Series(val1), pd.Series(val2)]) - exp = pd.SparseSeries(exp, kind='integer') - tm.assert_sp_series_equal(res, exp) - - res = pd.concat([sparse2, sparse1]) - exp = pd.concat([pd.Series(val2), pd.Series(val1)]) - exp = pd.SparseSeries(exp, kind='block', fill_value=0) - tm.assert_sp_series_equal(res, exp) - - def test_concat_sparse_dense(self): - # use first input's fill_value - val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) - val2 = np.array([3, np.nan, 4, 0, 0]) - - for kind in ['integer', 'block']: - sparse = pd.SparseSeries(val1, name='x', kind=kind) - dense = pd.Series(val2, name='y') - - res = pd.concat([sparse, dense]) - exp = pd.concat([pd.Series(val1), dense]) - exp = pd.SparseSeries(exp, kind=kind) - tm.assert_sp_series_equal(res, exp) - - res = pd.concat([dense, sparse, dense]) - exp = pd.concat([dense, pd.Series(val1), dense]) - exp = pd.SparseSeries(exp, kind=kind) - tm.assert_sp_series_equal(res, exp) - - sparse = pd.SparseSeries(val1, name='x', kind=kind, fill_value=0) - dense = pd.Series(val2, name='y') - - res = pd.concat([sparse, dense]) - exp = pd.concat([pd.Series(val1), dense]) - exp = pd.SparseSeries(exp, kind=kind, fill_value=0) - tm.assert_sp_series_equal(res, exp) - - res = pd.concat([dense, sparse, dense]) - exp = pd.concat([dense, pd.Series(val1), dense]) - exp = pd.SparseSeries(exp, kind=kind, fill_value=0) - tm.assert_sp_series_equal(res, exp) - def _dense_series_compare(s, f): result = f(s) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 671c345898ec2..862e2282bae2f 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -388,8 +388,8 @@ def test_dataframe_dummies_with_na(self): 'B_b': [1., 1, 0, 0], 'B_c': [0., 0, 1, 0], 'B_nan': [0., 0, 0, 1]}) - expected = expected[['C', 'A_a', 'A_b', 'A_nan', 'B_b', 'B_c', 'B_nan' - ]] + expected = expected[['C', 'A_a', 'A_b', 'A_nan', + 'B_b', 'B_c', 'B_nan']] assert_frame_equal(result, expected) result = get_dummies(df, dummy_na=False, sparse=self.sparse) @@ -407,8 +407,8 @@ def test_dataframe_dummies_with_categorical(self): 'B_c': [0., 0, 1], 'cat_x': [1., 0, 0], 'cat_y': [0., 1, 1]}) - expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', 'cat_x', 'cat_y' - ]] + expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', + 'cat_x', 'cat_y']] assert_frame_equal(result, expected) # GH12402 Add a new parameter `drop_first` to avoid collinearity diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 84a431393b0bf..4ec98728398c5 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1030,7 +1030,8 @@ def get_result(self): if not self.copy: new_data._consolidate_inplace() - return (self.objs[0]._from_axes(new_data, self.new_axes) + cons = _concat._get_frame_result_type(new_data, self.objs) + return (cons._from_axes(new_data, self.new_axes) .__finalize__(self, method='concat')) def _get_result_dim(self): diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 228c48041c0f8..eb18023d6409d 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -67,6 +67,19 @@ def _get_series_result_type(result): return Series +def _get_frame_result_type(result, objs): + """ + return appropriate class of DataFrame-like concat + if any block is SparseBlock, return SparseDataFrame + otherwise, return 1st obj + """ + if any(b.is_sparse for b in result.blocks): + from pandas.sparse.api import SparseDataFrame + return SparseDataFrame + else: + return objs[0] + + def _concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single