diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 902a51ab03022..e3a069960ab6b 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -412,6 +412,41 @@ Pandas will detect this and raise ``IndexError``, rather than return an empty st >>> df.iloc[:,3:6] IndexError: out-of-bounds on slice (end) +.. _indexing.basics.partial_setting: + +Setting With Enlargement +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.13 + +The ``.loc/.ix/[]`` operations can perform enlargement when setting a non-existant key for that axis. + +In the ``Series`` case this is effectively an appending operation + +.. ipython:: python + + se = Series([1,2,3]) + se + se[5] = 5. + se + +A ``DataFrame`` can be enlarged on either axis via ``.loc`` + +.. ipython:: python + + dfi = DataFrame(np.arange(6).reshape(3,2), + columns=['A','B']) + dfi + dfi.loc[:,'C'] = dfi.loc[:,'A'] + dfi + +This is like an ``append`` operation on the ``DataFrame``. + +.. ipython:: python + + dfi.loc[3] = 5 + dfi + .. _indexing.basics.get_value: Fast scalar value getting and setting @@ -431,15 +466,20 @@ Similary to ``loc``, ``at`` provides **label** based scalar lookups, while, ``ia df.at[dates[5], 'A'] df.iat[3, 0] -You can also set using these same indexers. These have the additional -capability of enlarging an object. This method *always* returns a reference to -the object it modified, which in the case of enlargement, will be a **new object**: +You can also set using these same indexers. .. ipython:: python df.at[dates[5], 'E'] = 7 df.iat[3, 0] = 7 +``at`` may enlarge the object in-place as above if the indexer is missing. + +.. ipython:: python + + df.at[6, 0] = 7 + df + Boolean indexing ~~~~~~~~~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 69647939ab0d0..557c4b293a84e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -92,6 +92,9 @@ pandas 0.13 an alias of iteritems used to get around ``2to3``'s changes). (:issue:`4384`, :issue:`4375`, :issue:`4372`) - ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`) + - allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in + the index for that axis (:issue:`2578`) + - ``at`` now will enlarge the object inplace (and return the same) (:issue:`2578`) - ``HDFStore`` @@ -296,7 +299,7 @@ See :ref:`Internal Refactoring` - ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise if code argument out of range (:issue:`4519`, :issue:`4520`) - Fix boolean indexing on an empty series loses index names (:issue:`4235`), - infer_dtype works with empty arrays. + infer_dtype works with empty arrays. - Fix reindexing with multiple axes; if an axes match was not replacing the current axes, leading to a possible lazay frequency inference issue (:issue:`3317`) - Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 74cdb1ce35441..d1decc164484d 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -134,6 +134,54 @@ API changes df1 and df2 s1 and s2 +Indexing API Changes +~~~~~~~~~~~~~~~~~~~~ + + Prior to 0.13, it was impossible to use an indexer (``.loc/.iloc/.ix``) to set a value that + was not contained in the index of a particular axis. (:issue:`2578`). See more at :ref:`here` + + In the ``Series`` case this is effectively an appending operation + + .. ipython:: python + + s = Series([1,2,3]) + s + s[5] = 5. + s + + .. ipython:: python + + dfi = DataFrame(np.arange(6).reshape(3,2), + columns=['A','B']) + dfi + + This would previously ``KeyError`` + + .. ipython:: python + + dfi.loc[:,'C'] = dfi.loc[:,'A'] + dfi + + This is like an ``append`` operation. + + .. ipython:: python + + dfi.loc[3] = 5 + dfi + + A Panel setting operation on an arbitrary axis aligns the input to the Panel + + .. ipython:: python + + p = pd.Panel(np.arange(16).reshape(2,4,2), + items=['Item1','Item2'], + major_axis=pd.date_range('2001/1/12',periods=4), + minor_axis=['A','B'],dtype='float64') + p + p.loc[:,:,'C'] = Series([30,32],index=p.items) + p + p.loc[:,:,'C'] + Enhancements ~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e9f28122b43d..d184120185955 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1731,18 +1731,12 @@ def set_value(self, index, col, value): engine.set_value(series.values, index, value) return self except KeyError: - new_index, new_columns = self._expand_axes((index, col)) - result = self.reindex(index=new_index, columns=new_columns, - copy=False) - likely_dtype, value = _infer_dtype_from_scalar(value) - made_bigger = not np.array_equal(new_columns, self.columns) + # set using a non-recursive method & reset the cache + self.loc[index,col] = value + self._item_cache.pop(col,None) - # how to make this logic simpler? - if made_bigger: - com._possibly_cast_item(result, col, likely_dtype) - - return result.set_value(index, col, value) + return self def irow(self, i, copy=False): return self._ixs(i, axis=0) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 64760cdba60ff..856e97ad163f2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,7 +1,7 @@ # pylint: disable=W0223 from datetime import datetime -from pandas.core.common import _asarray_tuplesafe +from pandas.core.common import _asarray_tuplesafe, is_list_like from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.compat import range, zip import pandas.compat as compat @@ -86,28 +86,120 @@ def __setitem__(self, key, value): if len(key) > self.ndim: raise IndexingError('only tuples of length <= %d supported', self.ndim) - indexer = self._convert_tuple(key) + indexer = self._convert_tuple(key, is_setter=True) else: - indexer = self._convert_to_indexer(key) + indexer = self._convert_to_indexer(key, is_setter=True) self._setitem_with_indexer(indexer, value) def _has_valid_tuple(self, key): pass - def _convert_tuple(self, key): + def _convert_tuple(self, key, is_setter=False): keyidx = [] for i, k in enumerate(key): - idx = self._convert_to_indexer(k, axis=i) + idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter) keyidx.append(idx) return tuple(keyidx) + def _has_valid_setitem_indexer(self, indexer): + return True + + def _has_valid_positional_setitem_indexer(self, indexer): + """ validate that an positional indexer cannot enlarge its target + will raise if needed, does not modify the indexer externally """ + if isinstance(indexer, dict): + raise IndexError("{0} cannot enlarge its target object".format(self.name)) + else: + if not isinstance(indexer, tuple): + indexer = self._tuplify(indexer) + for ax, i in zip(self.obj.axes,indexer): + if isinstance(i, slice): + # should check the stop slice? + pass + elif is_list_like(i): + # should check the elements? + pass + elif com.is_integer(i): + if i >= len(ax): + raise IndexError("{0} cannot enlarge its target object".format(self.name)) + elif isinstance(i, dict): + raise IndexError("{0} cannot enlarge its target object".format(self.name)) + + return True + def _setitem_with_indexer(self, indexer, value): + self._has_valid_setitem_indexer(indexer) + # also has the side effect of consolidating in-place - # mmm, spaghetti + from pandas import Panel, DataFrame, Series + + # maybe partial set + take_split_path = self.obj._is_mixed_type + if isinstance(indexer,tuple): + nindexer = [] + for i, idx in enumerate(indexer): + if isinstance(idx, dict): + + # reindex the axis to the new value + # and set inplace + key,_ = _convert_missing_indexer(idx) + + # if this is the items axes, then take the main missing path + # first; this correctly sets the dtype and avoids cache issues + # essentially this separates out the block that is needed to possibly + # be modified + if self.ndim > 1 and i == self.obj._info_axis_number: + + # add the new item, and set the value + new_indexer = _convert_from_missing_indexer_tuple(indexer) + self.obj[key] = np.nan + self.obj.loc[new_indexer] = value + return self.obj + + # reindex the axis + index = self.obj._get_axis(i) + labels = _safe_append_to_index(index, key) + self.obj._data = self.obj.reindex_axis(labels,i)._data + + nindexer.append(labels.get_loc(key)) - if self.obj._is_mixed_type: + else: + nindexer.append(idx) + + indexer = tuple(nindexer) + else: + + indexer, missing = _convert_missing_indexer(indexer) + + if missing: + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + if len(index) == 0: + new_index = Index([indexer]) + else: + new_index = _safe_append_to_index(index, indexer) + + new_values = np.concatenate([self.obj.values, [value]]) + self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name) + return self.obj + + elif self.ndim == 2: + index = self.obj._get_axis(0) + labels = _safe_append_to_index(index, indexer) + self.obj._data = self.obj.reindex_axis(labels,0)._data + return getattr(self.obj,self.name).__setitem__(indexer,value) + + # set using setitem (Panel and > dims) + elif self.ndim >= 3: + return self.obj.__setitem__(indexer,value) + + # align and set the values + if take_split_path: if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) @@ -192,14 +284,73 @@ def setter(item, v): def _align_series(self, indexer, ser): # indexer to assign Series can be tuple or scalar if isinstance(indexer, tuple): + + aligners = [ not _is_null_slice(idx) for idx in indexer ] + single_aligner = sum(aligners) == 1 + is_frame = self.obj.ndim == 2 + is_panel = self.obj.ndim >= 3 + + # are we a single alignable value on a non-primary + # dim (e.g. panel: 1,2, or frame: 0) ? + # hence need to align to a single axis dimension + # rather that find all valid dims + + # frame + if is_frame: + single_aligner = single_aligner and aligners[0] + + # panel + elif is_panel: + single_aligner = single_aligner and (aligners[1] or aligners[2]) + + obj = self.obj for i, idx in enumerate(indexer): - ax = self.obj.axes[i] + ax = obj.axes[i] + + # multiple aligners (or null slices) if com._is_sequence(idx) or isinstance(idx, slice): + if single_aligner and _is_null_slice(idx): + continue new_ix = ax[idx] + if not is_list_like(new_ix): + new_ix = Index([new_ix]) if ser.index.equals(new_ix): return ser.values.copy() return ser.reindex(new_ix).values + # 2 dims + elif single_aligner and is_frame: + + # reindex along index + ax = self.obj.axes[1] + if ser.index.equals(ax): + return ser.values.copy() + return ser.reindex(ax).values + + # >2 dims + elif single_aligner: + + broadcast = [] + for n, labels in enumerate(self.obj._get_plane_axes(i)): + + # reindex along the matching dimensions + if len(labels & ser.index): + ser = ser.reindex(labels) + else: + broadcast.append((n,len(labels))) + + # broadcast along other dims + ser = ser.values.copy() + for (axis,l) in broadcast: + shape = [ -1 ] * (len(broadcast)+1) + shape[axis] = l + ser = np.tile(ser,l).reshape(shape) + + if self.obj.ndim == 3: + ser = ser.T + + return ser + elif np.isscalar(indexer): ax = self.obj._get_axis(1) @@ -521,7 +672,7 @@ def _reindex(keys, level=None): return result - def _convert_to_indexer(self, obj, axis=0): + def _convert_to_indexer(self, obj, axis=0, is_setter=False): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray @@ -634,12 +785,23 @@ def _convert_to_indexer(self, obj, axis=0): mask = check == -1 if mask.any(): + + # mi here + if isinstance(obj, tuple) and is_setter: + return { 'key' : obj } raise KeyError('%s not in index' % objarr[mask]) return indexer else: - return labels.get_loc(obj) + try: + return labels.get_loc(obj) + except (KeyError): + + # allow a not found key only if we are a setter + if not is_list_like(obj) and is_setter: + return { 'key' : obj } + raise def _tuplify(self, loc): tup = [slice(None, None) for _ in range(self.ndim)] @@ -828,6 +990,9 @@ def _has_valid_type(self, key, axis): return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key) + def _has_valid_setitem_indexer(self, indexer): + self._has_valid_positional_setitem_indexer(indexer) + def _getitem_tuple(self, tup): self._has_valid_tuple(tup) @@ -860,7 +1025,6 @@ def _get_slice_axis(self, slice_obj, axis=0): return self.obj.take(slice_obj, axis=axis) def _getitem_axis(self, key, axis=0): - if isinstance(key, slice): self._has_valid_type(key,axis) return self._get_slice_axis(key, axis=axis) @@ -877,7 +1041,7 @@ def _getitem_axis(self, key, axis=0): return self._get_loc(key,axis=axis) - def _convert_to_indexer(self, obj, axis=0): + def _convert_to_indexer(self, obj, axis=0, is_setter=False): """ much simpler as we only have to deal with our valid types """ if self._has_valid_type(obj,axis): return obj @@ -900,14 +1064,12 @@ def __getitem__(self, key): else: raise ValueError('Invalid call for scalar access (getting)!') - if len(key) != self.obj.ndim: - raise ValueError('Not enough indexers for scalar access (getting)!') key = self._convert_key(key) return self.obj.get_value(*key) def __setitem__(self, key, value): if not isinstance(key, tuple): - raise ValueError('Invalid call for scalar access (setting)!') + key = self._tuplify(key) if len(key) != self.obj.ndim: raise ValueError('Not enough indexers for scalar access (setting)!') key = self._convert_key(key) @@ -921,6 +1083,9 @@ class _AtIndexer(_ScalarAccessIndexer): class _iAtIndexer(_ScalarAccessIndexer): """ integer based scalar accessor """ + def _has_valid_setitem_indexer(self, indexer): + self._has_valid_positional_setitem_indexer(indexer) + def _convert_key(self, key): """ require integer args (and convert to label arguments) """ ckey = [] @@ -1028,6 +1193,12 @@ def _slice(self, indexer, axis=0): return self.obj._get_values(indexer) def _setitem_with_indexer(self, indexer, value): + + # need to delegate to the super setter + if isinstance(indexer, dict): + return super(_SeriesIndexer, self)._setitem_with_indexer(indexer, value) + + # fast access self.obj._set_values(indexer, value) def _check_bool_indexer(ax, key): @@ -1053,6 +1224,34 @@ def _check_bool_indexer(ax, key): return result +def _convert_missing_indexer(indexer): + """ reverse convert a missing indexer, which is a dict + return the scalar indexer and a boolean indicating if we converted """ + + if isinstance(indexer, dict): + + # a missing key (but not a tuple indexer) + indexer = indexer['key'] + + if isinstance(indexer, bool): + raise KeyError("cannot use a single bool to index into setitem") + return indexer, True + + return indexer, False + +def _convert_from_missing_indexer_tuple(indexer): + """ create a filtered indexer that doesn't have any missing indexers """ + def get_indexer(_idx): + return _idx['key'] if isinstance(_idx,dict) else _idx + return tuple([ get_indexer(_idx) for _i, _idx in enumerate(indexer) ]) + +def _safe_append_to_index(index, key): + """ a safe append to an index, if incorrect type, then catch and recreate """ + try: + return index.insert(len(index), key) + except: + return Index(np.concatenate([index.asobject.values,np.array([key])])) + def _maybe_convert_indices(indices, n): """ if we have negative indicies, translate to postive here if have indicies that are out-of-bounds, raise an IndexError """ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c6af7e27070a9..d025c7a7fcf6d 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -9,7 +9,7 @@ from pandas.core.common import (_possibly_downcast_to_dtype, isnull, notnull, _NS_DTYPE, _TD_DTYPE, ABCSeries, ABCSparseSeries, - is_list_like) + is_list_like, _infer_dtype_from_scalar) from pandas.core.index import (Index, MultiIndex, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices @@ -460,6 +460,24 @@ def _try_cast_result(self, result, dtype=None): if self.is_integer or self.is_bool or self.is_datetime: pass elif self.is_float and result.dtype == self.dtype: + + # protect against a bool/object showing up here + if isinstance(dtype,compat.string_types) and dtype == 'infer': + return result + if not isinstance(dtype,type): + dtype = dtype.type + if issubclass(dtype,(np.bool_,np.object_)): + if issubclass(dtype,np.bool_): + if isnull(result).all(): + return result.astype(np.bool_) + else: + result = result.astype(np.object_) + result[result==1] = True + result[result==0] = False + return result + else: + return result.astype(np.object_) + return result # may need to change the dtype here @@ -536,8 +554,12 @@ def setitem(self, indexer, value): values[indexer] = value # coerce and try to infer the dtypes of the result + if np.isscalar(value): + dtype,_ = _infer_dtype_from_scalar(value) + else: + dtype = 'infer' values = self._try_coerce_result(values) - values = self._try_cast_result(values, 'infer') + values = self._try_cast_result(values, dtype) return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)] except: pass @@ -902,7 +924,7 @@ def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) return issubclass(element.dtype.type, (np.floating, np.integer)) - return isinstance(element, (float, int)) + return isinstance(element, (float, int, np.float_, np.int_)) and not isinstance(bool,np.bool_) def _try_cast(self, element): try: @@ -2647,7 +2669,7 @@ def reindex_axis(self, new_axis, indexer=None, method=None, axis=0, fill_value=N if method is not None or limit is not None: return self.reindex_axis0_with_method(new_axis, indexer=indexer, method=method, fill_value=fill_value, limit=limit, copy=copy) - return self.reindex_items(new_axis, copy=copy, fill_value=fill_value) + return self.reindex_items(new_axis, indexer=indexer, copy=copy, fill_value=fill_value) new_axis, indexer = cur_axis.reindex( new_axis, method, copy_if_needed=True) @@ -2709,7 +2731,7 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value): return self.__class__(new_blocks, new_axes) - def reindex_items(self, new_items, copy=True, fill_value=None): + def reindex_items(self, new_items, indexer=None, copy=True, fill_value=None): """ """ @@ -2719,8 +2741,8 @@ def reindex_items(self, new_items, copy=True, fill_value=None): data = data.consolidate() return data.reindex_items(new_items, copy=copy, fill_value=fill_value) - # TODO: this part could be faster (!) - new_items, indexer = self.items.reindex(new_items, copy_if_needed=True) + if indexer is None: + new_items, indexer = self.items.reindex(new_items, copy_if_needed=True) new_axes = [new_items] + self.axes[1:] # could have so me pathological (MultiIndex) issues here diff --git a/pandas/core/series.py b/pandas/core/series.py index 3a8c0ec5e1a0f..8396de9c5997b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1204,13 +1204,10 @@ def set_value(self, label, value): self.index._engine.set_value(self.values, label, value) return self except KeyError: - if len(self.index) == 0: - new_index = Index([label]) - else: - new_index = self.index.insert(len(self), label) - new_values = np.concatenate([self.values, [value]]) - return self._constructor(new_values, index=new_index, name=self.name) + # set using a non-recursive method + self.loc[label] = value + return self def reset_index(self, level=None, drop=False, name=None, inplace=False): """ diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index ba002415c1112..91f2fe319957b 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1075,6 +1075,7 @@ def test_icol(self): type(iframe.icol(0).sp_index)) def test_set_value(self): + res = self.frame.set_value('foobar', 'B', 1.5) self.assert_(res is not self.frame) self.assert_(res.index[-1] == 'foobar') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index d72c379919e93..118672a85d3fb 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -866,9 +866,11 @@ def test_fancy_index_int_labels_exceptions(self): self.assertRaises(KeyError, self.frame.ix.__setitem__, (slice(None, None), ['E']), 1) - self.assertRaises(KeyError, - self.frame.ix.__setitem__, - (slice(None, None), 'E'), 1) + + # partial setting now allows this GH2578 + #self.assertRaises(KeyError, + # self.frame.ix.__setitem__, + # (slice(None, None), 'E'), 1) def test_setitem_fancy_mixed_2d(self): self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5 @@ -1481,33 +1483,54 @@ def test_set_value(self): assert_almost_equal(self.frame[col][idx], 1) def test_set_value_resize(self): + res = self.frame.set_value('foobar', 'B', 0) - self.assert_(res is not self.frame) + self.assert_(res is self.frame) self.assert_(res.index[-1] == 'foobar') self.assertEqual(res.get_value('foobar', 'B'), 0) - res2 = res.set_value('foobar', 'qux', 0) - self.assert_(res2 is not res) - self.assert_(np.array_equal(res2.columns, - list(self.frame.columns) + ['qux'])) - self.assertEqual(res2.get_value('foobar', 'qux'), 0) + self.frame.loc['foobar','qux'] = 0 + self.assertEqual(self.frame.get_value('foobar', 'qux'), 0) + res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 'sam') self.assert_(res3['baz'].dtype == np.object_) + res = self.frame.copy() res3 = res.set_value('foobar', 'baz', True) self.assert_(res3['baz'].dtype == np.object_) + res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) self.assert_(com.is_float_dtype(res3['baz'])) self.assert_(isnull(res3['baz'].drop(['foobar'])).values.all()) self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): - df = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) - res = df.set_value('C', 2, 1.0) - self.assert_(list(res.index) == list(df.index) + ['C']) - self.assert_(list(res.columns) == list(df.columns) + [2]) + df_orig = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) + + # this is actually ambiguous as the 2 is interpreted as a positional + # so column is not created + df = df_orig.copy() + df.set_value('C', 2, 1.0) + self.assert_(list(df.index) == list(df_orig.index) + ['C']) + #self.assert_(list(df.columns) == list(df_orig.columns) + [2]) + + df = df_orig.copy() + df.loc['C', 2] = 1.0 + self.assert_(list(df.index) == list(df_orig.index) + ['C']) + #self.assert_(list(df.columns) == list(df_orig.columns) + [2]) + + # create both new + df = df_orig.copy() + df.set_value('C', 'D', 1.0) + self.assert_(list(df.index) == list(df_orig.index) + ['C']) + self.assert_(list(df.columns) == list(df_orig.columns) + ['D']) + + df = df_orig.copy() + df.loc['C', 'D'] = 1.0 + self.assert_(list(df.index) == list(df_orig.index) + ['C']) + self.assert_(list(df.columns) == list(df_orig.columns) + ['D']) def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index da4e1e98e6b7a..66193248ffb7d 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1229,6 +1229,118 @@ def test_dups_loc(self): result = df.loc[1] assert_series_equal(result,expected) + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + ### series ### + s_orig = Series([1,2,3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1,2,3,5],index=[0,1,2,5]) + assert_series_equal(s,expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1,2,3,5],index=[0,1,2,5]) + assert_series_equal(s,expected) + + s = s_orig.copy() + s[5] = 5. + expected = Series([1,2,3,5.],index=[0,1,2,5]) + assert_series_equal(s,expected) + + s = s_orig.copy() + s.loc[5] = 5. + expected = Series([1,2,3,5.],index=[0,1,2,5]) + assert_series_equal(s,expected) + + # iloc/iat raise + s = s_orig.copy() + def f(): + s.iloc[3] = 5. + self.assertRaises(IndexError, f) + def f(): + s.iat[3] = 5. + self.assertRaises(IndexError, f) + + ### frame ### + + df_orig = DataFrame(np.arange(6).reshape(3,2),columns=['A','B']) + + # iloc/iat raise + df = df_orig.copy() + def f(): + df.iloc[4,2] = 5. + self.assertRaises(IndexError, f) + def f(): + df.iat[4,2] = 5. + self.assertRaises(IndexError, f) + + # row setting where it exists + expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] })) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + assert_frame_equal(df,expected) + + expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] })) + df = df_orig.copy() + df.loc[1] = df.loc[2] + assert_frame_equal(df,expected) + + expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] }),dtype='float64') + df = df_orig.copy() + df.loc[3] = df.loc[2] + assert_frame_equal(df,expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : [0,2,4] })) + df = df_orig.copy() + df.ix[:,'B'] = df.ix[:,'A'] + assert_frame_equal(df,expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : Series([0.,2.,4.]) })) + df = df_orig.copy() + df['B'] = df['B'].astype(np.float64) + df.ix[:,'B'] = df.ix[:,'A'] + assert_frame_equal(df,expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'].astype(np.float64) + df = df_orig.copy() + df.ix[:,'C'] = df.ix[:,'A'] + assert_frame_equal(df,expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'].astype(np.float64) + df = df_orig.copy() + df.ix[:,'C'] = df.ix[:,'A'] + assert_frame_equal(df,expected) + + ### panel ### + p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64') + + # panel setting via item + p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64') + expected = p_orig.copy() + expected['Item3'] = expected['Item1'] + p = p_orig.copy() + p.loc['Item3'] = p['Item1'] + assert_panel_equal(p,expected) + + # panel with aligned series + expected = p_orig.copy() + expected = expected.transpose(2,1,0) + expected['C'] = DataFrame({ 'Item1' : [30,30,30,30], 'Item2' : [32,32,32,32] },index=p_orig.major_axis) + expected = expected.transpose(2,1,0) + p = p_orig.copy() + p.loc[:,:,'C'] = Series([30,32],index=p_orig.items) + assert_panel_equal(p,expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c903af1860421..50d94ada7b9df 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1400,8 +1400,10 @@ def test_getitem_lowerdim_corner(self): self.assertRaises(KeyError, self.frame.ix.__getitem__, (('bar', 'three'), 'B')) - self.assertRaises(KeyError, self.frame.ix.__setitem__, - (('bar', 'three'), 'B'), 0) + + # in theory should be inserting in a sorted space???? + self.frame.ix[('bar','three'),'B'] = 0 + self.assert_(self.frame.sortlevel().ix[('bar','three'),'B'] == 0) #---------------------------------------------------------------------- # AMBIGUOUS CASES! diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index b2849aeb2fbe8..282dad5c0d6be 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -851,8 +851,17 @@ def test_getitem_dups_with_missing(self): def test_setitem_ambiguous_keyerror(self): s = Series(lrange(10), index=lrange(0, 20, 2)) - self.assertRaises(KeyError, s.__setitem__, 1, 5) - self.assertRaises(KeyError, s.ix.__setitem__, 1, 5) + + # equivalent of an append + s2 = s.copy() + s2[1] = 5 + expected = s.append(Series([5],index=[1])) + assert_series_equal(s2,expected) + + s2 = s.copy() + s2.ix[1] = 5 + expected = s.append(Series([5],index=[1])) + assert_series_equal(s2,expected) def test_setitem_float_labels(self): # note labels are floats @@ -954,8 +963,10 @@ def test_setitem(self): self.assert_((series[::2] == 0).all()) # set item that's not contained - self.assertRaises(Exception, self.series.__setitem__, - 'foobar', 1) + s = self.series.copy() + s['foobar'] = 1 + expected = self.series.append(Series([1],index=['foobar'])) + assert_series_equal(s,expected) def test_setitem_dtypes(self): @@ -989,11 +1000,18 @@ def test_set_value(self): self.assert_(res is self.ts) self.assertEqual(self.ts[idx], 0) - res = self.series.set_value('foobar', 0) - self.assert_(res is not self.series) + # equiv + s = self.series.copy() + res = s.set_value('foobar', 0) + self.assert_(res is s) self.assert_(res.index[-1] == 'foobar') self.assertEqual(res['foobar'], 0) + s = self.series.copy() + s.loc['foobar'] = 0 + self.assert_(s.index[-1] == 'foobar') + self.assertEqual(s['foobar'], 0) + def test_setslice(self): sl = self.ts[5:20] self.assertEqual(len(sl), len(sl.index)) @@ -4719,33 +4737,26 @@ def test_basic_indexing(self): self.assertRaises(IndexError, s.__setitem__, 5, 0) self.assertRaises(KeyError, s.__getitem__, 'c') - self.assertRaises(KeyError, s.__setitem__, 'c', 0) s = s.sort_index() self.assertRaises(IndexError, s.__getitem__, 5) self.assertRaises(IndexError, s.__setitem__, 5, 0) - self.assertRaises(KeyError, s.__getitem__, 'c') - self.assertRaises(KeyError, s.__setitem__, 'c', 0) def test_int_indexing(self): s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) self.assertRaises(KeyError, s.__getitem__, 5) - self.assertRaises(KeyError, s.__setitem__, 5, 0) self.assertRaises(KeyError, s.__getitem__, 'c') - self.assertRaises(KeyError, s.__setitem__, 'c', 0) # not monotonic s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) self.assertRaises(KeyError, s.__getitem__, 5) - self.assertRaises(KeyError, s.__setitem__, 5, 0) self.assertRaises(KeyError, s.__getitem__, 'c') - self.assertRaises(KeyError, s.__setitem__, 'c', 0) def test_datetime_indexing(self): from pandas import date_range @@ -4757,13 +4768,16 @@ def test_datetime_indexing(self): stamp = Timestamp('1/8/2000') self.assertRaises(KeyError, s.__getitem__, stamp) - self.assertRaises(KeyError, s.__setitem__, stamp, 0) + s[stamp] = 0 + self.assert_(s[stamp] == 0) # not monotonic + s = Series(len(index), index=index) s = s[::-1] self.assertRaises(KeyError, s.__getitem__, stamp) - self.assertRaises(KeyError, s.__setitem__, stamp, 0) + s[stamp] = 0 + self.assert_(s[stamp] == 0) def test_reset_index(self): df = tm.makeDataFrame()[:5] diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 01f573279fe5c..5bed7777cf439 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -109,7 +109,10 @@ def test_duplicate_dates_indexing(self): assert_series_equal(cp, expected) self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) - self.assertRaises(KeyError, ts.__setitem__, datetime(2000, 1, 6), 0) + + # new index + ts[datetime(2000,1,6)] = 0 + self.assert_(ts[datetime(2000,1,6)] == 0) def test_range_slice(self): idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000',