diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 7c4701b61c18d..ea2b85d983ade 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -41,6 +41,7 @@ API changes - Regression from 0.16.2 for output formatting of long floats/nan, restored in (:issue:`11302`) - Prettyprinting sets (e.g. in DataFrame cells) now uses set literal syntax (``{x, y}``) instead of Legacy Python syntax (``set([x, y])``) (:issue:`11215`) +- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`) .. _whatsnew_0171.deprecations: diff --git a/pandas/core/common.py b/pandas/core/common.py index c6e774b5077db..c2c50bce04309 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -444,14 +444,24 @@ def mask_missing(arr, values_to_mask): mask = None for x in nonna: if mask is None: - mask = arr == x + + # numpy elementwise comparison warning + if is_numeric_v_string_like(arr, x): + mask = False + else: + mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape if np.isscalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: - mask |= arr == x + + # numpy elementwise comparison warning + if is_numeric_v_string_like(arr, x): + mask |= False + else: + mask |= arr == x if na_mask.any(): if mask is None: @@ -2382,6 +2392,9 @@ def _maybe_make_list(obj): is_complex = lib.is_complex +def is_string_like(obj): + return isinstance(obj, (compat.text_type, compat.string_types)) + def is_iterator(obj): # python 3 generators have __next__ instead of next return hasattr(obj, 'next') or hasattr(obj, '__next__') @@ -2525,6 +2538,27 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): return issubclass(tipo, (np.datetime64, np.timedelta64)) +def is_numeric_v_string_like(a, b): + """ + numpy doesn't like to compare numeric arrays vs scalar string-likes + + return a boolean result if this is the case for a,b or b,a + + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and is_numeric_dtype(a) + is_b_numeric_array = is_b_array and is_numeric_dtype(b) + + is_a_scalar_string_like = not is_a_array and is_string_like(a) + is_b_scalar_string_like = not is_b_array and is_string_like(b) + + return ( + is_a_numeric_array and is_b_scalar_string_like) or ( + is_b_numeric_array and is_a_scalar_string_like + ) + def is_datetimelike_v_numeric(a, b): # return if we have an i8 convertible and numeric comparision if not hasattr(a,'dtype'): diff --git a/pandas/core/index.py b/pandas/core/index.py index b4c690fe8973b..ede848c1103ab 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -862,9 +862,10 @@ def to_int(): return self._invalid_indexer('label', key) if is_float(key): - if not self.is_floating(): - warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format( - type(self).__name__), FutureWarning, stacklevel=3) + if isnull(key): + return self._invalid_indexer('label', key) + warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format( + type(self).__name__), FutureWarning, stacklevel=3) return to_int() return key @@ -3721,9 +3722,23 @@ def astype(self, dtype): return Index(self._values, name=self.name, dtype=dtype) def _convert_scalar_indexer(self, key, kind=None): + """ + convert a scalar indexer + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + + right now we are converting + floats -> ints if the index supports it + """ + if kind == 'iloc': - return super(Float64Index, self)._convert_scalar_indexer(key, - kind=kind) + if is_integer(key): + return key + return super(Float64Index, self)._convert_scalar_indexer(key, kind=kind) + return key def _convert_slice_indexer(self, key, kind=None): @@ -4276,7 +4291,7 @@ def _reference_duplicate_name(self, name): Returns True if the name refered to in self.names is duplicated. """ # count the times name equals an element in self.names. - return np.sum(name == np.asarray(self.names)) > 1 + return sum(name == n for n in self.names) > 1 def _format_native_types(self, **kwargs): return self.values diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8b4528ef451ef..5eb25a53d4533 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1285,7 +1285,7 @@ def _has_valid_type(self, key, axis): def error(): if isnull(key): - raise ValueError( + raise TypeError( "cannot use label indexing with a null key") raise KeyError("the label [%s] is not in the [%s]" % (key, self.obj._get_axis_name(axis))) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ed4d6a6ccd73e..d98121520b8b0 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -17,7 +17,7 @@ is_datetime64tz_dtype, is_datetimetz, is_sparse, array_equivalent, _maybe_convert_string_to_object, is_categorical, needs_i8_conversion, is_datetimelike_v_numeric, - is_internal_type) + is_numeric_v_string_like, is_internal_type) from pandas.core.dtypes import DatetimeTZDtype from pandas.core.index import Index, MultiIndex, _ensure_index @@ -1082,8 +1082,16 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): # get the result, may need to transpose the other def get_result(other): - # compute - result = func(values, other) + # avoid numpy warning of comparisons again None + if other is None: + result = not func.__name__ == 'eq' + + # avoid numpy warning of elementwise comparisons to object + elif is_numeric_v_string_like(values, other): + result = False + + else: + result = func(values, other) # mask if needed if isinstance(values_mask, np.ndarray) and values_mask.any(): @@ -3214,7 +3222,7 @@ def get(self, item, fastpath=True): else: if isnull(item): - raise ValueError("cannot label index with a null key") + raise TypeError("cannot label index with a null key") indexer = self.items.get_indexer_for([item]) return self.reindex_indexer(new_axis=self.items[indexer], @@ -4251,11 +4259,16 @@ def _possibly_compare(a, b, op): # numpy deprecation warning to have i8 vs integer comparisions if is_datetimelike_v_numeric(a, b): - res = False + result = False + + # numpy deprecation warning if comparing numeric vs string-like + elif is_numeric_v_string_like(a, b): + result = False + else: - res = op(a, b) + result = op(a, b) - if np.isscalar(res) and (is_a_array or is_b_array): + if lib.isscalar(result) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] if is_a_array: @@ -4265,7 +4278,7 @@ def _possibly_compare(a, b, op): type_names[1] = 'ndarray(dtype=%s)' % b.dtype raise TypeError("Cannot compare types %r and %r" % tuple(type_names)) - return res + return result def _concat_indexes(indexes): diff --git a/pandas/io/data.py b/pandas/io/data.py index 310b165101bdf..ac6f14e846bec 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -1024,7 +1024,7 @@ def _validate_expiry(self, expiry): if expiry in expiry_dates: return expiry else: - index = DatetimeIndex(expiry_dates).order() + index = DatetimeIndex(expiry_dates).sort_values() return index[index.date >= expiry][0].date() def get_forward_data(self, months, call=True, put=False, near=False, diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a45f4bf1726f2..dc0e0e2670565 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5839,7 +5839,7 @@ def check(df): def f(): df.loc[:,np.nan] - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) df = DataFrame([[1,2,3],[4,5,6]], index=[1,np.nan])