diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 8a48314de5f77..9e62ba22d8f96 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -362,6 +362,89 @@ New Behavior: s.index print(s.to_csv(path=None)) +Changes to dtype assignment behaviors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When a DataFrame's slice is updated with a new slice of the same +dtype, the dtype of the DataFrame will now remain the same. + +Previous Behavior: + +.. code-block:: python + + In [2]: df = pd.DataFrame({'a':[0, 1, 1], 'b':[100, 200, 300]}, dtype='uint32') + + In [3]: df.info() + + RangeIndex: 3 entries, 0 to 2 + Data columns (total 2 columns): + a 3 non-null uint32 + b 3 non-null uint32 + dtypes: uint32(2) + memory usage: 96.0 bytes + + In [4]: ix = df['a'] == 1 + + In [5]: df.loc[ix, 'b'] = df.loc[ix, 'b'] + + In [6]: df.info() + + RangeIndex: 3 entries, 0 to 2 + Data columns (total 2 columns): + a 3 non-null int64 + b 3 non-null int64 + dtypes: int64(2) + +New Behavior: + +.. ipython:: python + + df = pd.DataFrame({'a':[0, 1, 1], 'b':[100, 200, 300]}, dtype='uint32') + df.info() + ix = df['a'] == 1 + df.loc[ix, 'b'] = df.loc[ix, 'b'] + df.info() + + +When a DataFrame's integer slice is partially updated with a new slice of floats that +could potentially be downcasted to integer without losing precision, +the dtype of the slice will be set to float instead of integer. + +Previous Behavior: + +.. code-block:: python + + In [4]: df = pd.DataFrame(np.array(range(1,10)).reshape(3,3), + ...: columns=list('abc'), + ...: index=[[4,4,8], [8,10,12]]) + + In [5]: df + Out[5]: + a b c + 4 8 1 2 3 + 10 4 5 6 + 8 12 7 8 9 + + In [6]: df.ix[4, 'c'] = np.array([0., 1.]) + + In [7]: df + Out[7]: + a b c + 4 8 1 2 0 + 10 4 5 1 + 8 12 7 8 9 + +New Behavior: + +.. ipython:: python + + df = pd.DataFrame(np.array(range(1,10)).reshape(3,3), + columns=list('abc'), + index=[[4,4,8], [8,10,12]]) + df + df.ix[4, 'c'] = np.array([0., 1.]) + df + .. _whatsnew_0180.enhancements.xarray: to_xarray @@ -1120,3 +1203,4 @@ Bug Fixes - Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`) - Bug when initializing categorical series with a scalar value. (:issue:`12336`) - Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`) +- Bug when modifying a slice of a ``DataFrame`` with the same ``dtype``, the ``dtype`` of the ``DataFrame`` could unexpected changed. (:issue:`10503`). diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8563481c8564d..51bd9fd0e952c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -702,7 +702,10 @@ def _is_empty_indexer(indexer): values[indexer] = value # coerce and try to infer the dtypes of the result - if np.isscalar(value): + if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, + value.dtype): + dtype = value.dtype + elif np.isscalar(value): dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 4c7510783eda0..591ffc9a68c7a 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -361,6 +361,24 @@ def test_head_tail(self): self._compare(o.head(-3), o.head(7)) self._compare(o.tail(-3), o.tail(7)) + def test_dtype_after_slice_update(self): + # GH10503 + + # assigning the same type should not change the type + df1 = pd.DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint32') + ix = df1['a'] == 1 + newb1 = df1.loc[ix, 'b'] + 1 + df1.loc[ix, 'b'] = newb1 + assert_equal(df1['a'].dtype, newb1.dtype) + + # assigning a new type should get the inferred type + df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + newb2 = df2.loc[ix, 'b'] + df1.loc[ix, 'b'] = newb2 + assert_equal(df1['a'].dtype, np.dtype('int64')) + def test_sample(self): # Fixes issue: 2419 diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 1c0986b025acc..9497dbc6ddeef 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -3256,12 +3256,12 @@ def test_multiindex_assignment(self): df.ix[4, 'c'] = arr assert_series_equal(df.ix[4, 'c'], Series(arr, index=[8, 10], name='c', - dtype='int64')) + dtype='float64')) # scalar ok df.ix[4, 'c'] = 10 assert_series_equal(df.ix[4, 'c'], Series(10, index=[8, 10], name='c', - dtype='int64')) + dtype='float64')) # invalid assignments def f():