diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 4caf22357b1d3..4fbb97d11b5fa 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -296,6 +296,19 @@ API changes To insert a NaN, you must explicitly use ``np.nan``. See the :ref:`docs `. +- Previously an enlargement with a mixed-dtype frame would act unlike ``.append`` which will preserve dtypes (related :issue:`2578`, :issue:`8176`): + + .. ipython:: python + + df = DataFrame([[True, 1],[False, 2]], columns = ["female","fitness"]) + df + df.dtypes + + # dtypes are now preserved + df.loc[2] = df.loc[1] + df + df.dtypes + .. _whatsnew_0150.dt: .dt accessor diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index dfc552e8df0d7..15bf8e8807836 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -303,12 +303,27 @@ def _setitem_with_indexer(self, indexer, value): "cannot set a frame with no defined columns" ) - index = self.obj._get_axis(0) - labels = _safe_append_to_index(index, indexer) - self.obj._data = self.obj.reindex_axis(labels, 0)._data + # append a Series + if isinstance(value, Series): + + value = value.reindex(index=self.obj.columns,copy=True) + value.name = indexer + + # a list-list + else: + + # must have conforming columns + if com.is_list_like(value): + if len(value) != len(self.obj.columns): + raise ValueError( + "cannot set a row with mismatched columns" + ) + + value = Series(value,index=self.obj.columns,name=indexer) + + self.obj._data = self.obj.append(value)._data self.obj._maybe_update_cacher(clear=True) - return getattr(self.obj, self.name).__setitem__(indexer, - value) + return self.obj # set using setitem (Panel and > dims) elif self.ndim >= 3: diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 7c55ea860f8d1..17bffcae056cf 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -2812,7 +2812,8 @@ def f(): df.loc[1] = df.loc[2] assert_frame_equal(df,expected) - expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] }),dtype='float64') + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] })) df = df_orig.copy() df.loc[3] = df.loc[2] assert_frame_equal(df,expected) @@ -2864,6 +2865,41 @@ def f(): p.loc[:,:,'C'] = Series([30,32],index=p_orig.items) assert_panel_equal(p,expected) + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1],[False, 2]], + columns = ["female","fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = df.append(s) + + df.loc[2] = df.loc[1] + assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=['A','B']) + df.loc[0] = Series(1,index=range(4)) + assert_frame_equal(df,DataFrame(columns=['A','B'],index=[0])) + + # columns will align + df = DataFrame(columns=['A','B']) + df.loc[0] = Series(1,index=['B']) + assert_frame_equal(df,DataFrame([[np.nan, 1]], columns=['A','B'],index=[0],dtype='float64')) + + # list-like must conform + df = DataFrame(columns=['A','B']) + def f(): + df.loc[0] = [1,2,3] + self.assertRaises(ValueError, f) + + # these are coerced to float unavoidably (as its a list-like to begin) + df = DataFrame(columns=['A','B']) + df.loc[3] = [6,7] + assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B'],dtype='float64')) + def test_series_partial_set(self): # partial set with new index # Regression from GH4825 @@ -3013,15 +3049,6 @@ def f(): assert_frame_equal(df,DataFrame([[1]],index=['foo'],columns=[1])) assert_frame_equal(df,df2) - df = DataFrame(columns=['A','B']) - df.loc[3] = [6,7] - assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B'])) - - # no label overlap - df = DataFrame(columns=['A','B']) - df.loc[0] = Series(1,index=range(4)) - assert_frame_equal(df,DataFrame(columns=['A','B'],index=[0])) - # no index to start expected = DataFrame({ 0 : Series(1,index=range(4)) },columns=['A','B',0])