diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 16f0b9ee99909..d786711ffa6ea 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -198,6 +198,8 @@ Other Enhancements - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, + all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`) - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 1453725225e7d..5108e23c53b5a 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -141,6 +141,7 @@ def lfilter(*args, **kwargs): Mapping = collections.abc.Mapping Sequence = collections.abc.Sequence Sized = collections.abc.Sized + Set = collections.abc.Set else: # Python 2 @@ -201,6 +202,7 @@ def get_range_parameters(data): Mapping = collections.Mapping Sequence = collections.Sequence Sized = collections.Sized + Set = collections.Set if PY2: def iteritems(obj, **kw): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 22da546355df6..af5e1523c7cec 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -16,10 +16,10 @@ ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, ABCDateOffset) from pandas.core.dtypes.inference import ( # noqa:F401 - is_bool, is_integer, is_hashable, is_iterator, is_float, - is_dict_like, is_scalar, is_string_like, is_list_like, is_number, - is_file_like, is_re, is_re_compilable, is_sequence, is_nested_list_like, - is_named_tuple, is_array_like, is_decimal, is_complex, is_interval) + is_bool, is_integer, is_float, is_number, is_decimal, is_complex, + is_re, is_re_compilable, is_dict_like, is_string_like, is_file_like, + is_list_like, is_nested_list_like, is_sequence, is_named_tuple, + is_hashable, is_iterator, is_array_like, is_scalar, is_interval) _POSSIBLY_CAST_DTYPES = {np.dtype(t).name for t in ['O', 'int8', 'uint8', 'int16', 'uint16', diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 67f391615eedb..7470497383064 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -5,7 +5,7 @@ from numbers import Number from pandas import compat from pandas.compat import (PY2, string_types, text_type, - string_and_binary_types, re_type) + string_and_binary_types, re_type, Set) from pandas._libs import lib is_bool = lib.is_bool @@ -247,7 +247,7 @@ def is_re_compilable(obj): return True -def is_list_like(obj): +def is_list_like(obj, allow_sets=True): """ Check if the object is list-like. @@ -259,6 +259,10 @@ def is_list_like(obj): Parameters ---------- obj : The object to check. + allow_sets : boolean, default True + If this parameter is False, sets will not be considered list-like + + .. versionadded:: 0.24.0 Returns ------- @@ -283,11 +287,15 @@ def is_list_like(obj): False """ - return (isinstance(obj, compat.Iterable) and + return (isinstance(obj, compat.Iterable) # we do not count strings/unicode/bytes as list-like - not isinstance(obj, string_and_binary_types) and + and not isinstance(obj, string_and_binary_types) + # exclude zero-dimensional numpy arrays, effectively scalars - not (isinstance(obj, np.ndarray) and obj.ndim == 0)) + and not (isinstance(obj, np.ndarray) and obj.ndim == 0) + + # exclude sets if allow_sets is False + and not (allow_sets is False and isinstance(obj, Set))) def is_array_like(obj): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 76cd6aabb93ae..d0dd03d6eb8df 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -47,6 +47,70 @@ def coerce(request): return request.param +# collect all objects to be tested for list-like-ness; use tuples of objects, +# whether they are list-like or not (special casing for sets), and their ID +ll_params = [ + ([1], True, 'list'), # noqa: E241 + ([], True, 'list-empty'), # noqa: E241 + ((1, ), True, 'tuple'), # noqa: E241 + (tuple(), True, 'tuple-empty'), # noqa: E241 + ({'a': 1}, True, 'dict'), # noqa: E241 + (dict(), True, 'dict-empty'), # noqa: E241 + ({'a', 1}, 'set', 'set'), # noqa: E241 + (set(), 'set', 'set-empty'), # noqa: E241 + (frozenset({'a', 1}), 'set', 'frozenset'), # noqa: E241 + (frozenset([]), 'set', 'frozenset-empty'), # noqa: E241 + (iter([1, 2]), True, 'iterator'), # noqa: E241 + (iter([]), True, 'iterator-empty'), # noqa: E241 + ((x for x in [1, 2]), True, 'generator'), # noqa: E241 + ((x for x in []), True, 'generator-empty'), # noqa: E241 + (Series([1]), True, 'Series'), # noqa: E241 + (Series([]), True, 'Series-empty'), # noqa: E241 + (Series(['a']).str, True, 'StringMethods'), # noqa: E241 + (Series([], dtype='O').str, True, 'StringMethods-empty'), # noqa: E241 + (Index([1]), True, 'Index'), # noqa: E241 + (Index([]), True, 'Index-empty'), # noqa: E241 + (DataFrame([[1]]), True, 'DataFrame'), # noqa: E241 + (DataFrame(), True, 'DataFrame-empty'), # noqa: E241 + (np.ndarray((2,) * 1), True, 'ndarray-1d'), # noqa: E241 + (np.array([]), True, 'ndarray-1d-empty'), # noqa: E241 + (np.ndarray((2,) * 2), True, 'ndarray-2d'), # noqa: E241 + (np.array([[]]), True, 'ndarray-2d-empty'), # noqa: E241 + (np.ndarray((2,) * 3), True, 'ndarray-3d'), # noqa: E241 + (np.array([[[]]]), True, 'ndarray-3d-empty'), # noqa: E241 + (np.ndarray((2,) * 4), True, 'ndarray-4d'), # noqa: E241 + (np.array([[[[]]]]), True, 'ndarray-4d-empty'), # noqa: E241 + (np.array(2), False, 'ndarray-0d'), # noqa: E241 + (1, False, 'int'), # noqa: E241 + (b'123', False, 'bytes'), # noqa: E241 + (b'', False, 'bytes-empty'), # noqa: E241 + ('123', False, 'string'), # noqa: E241 + ('', False, 'string-empty'), # noqa: E241 + (str, False, 'string-type'), # noqa: E241 + (object(), False, 'object'), # noqa: E241 + (np.nan, False, 'NaN'), # noqa: E241 + (None, False, 'None') # noqa: E241 +] +objs, expected, ids = zip(*ll_params) + + +@pytest.fixture(params=zip(objs, expected), ids=ids) +def maybe_list_like(request): + return request.param + + +def test_is_list_like(maybe_list_like): + obj, expected = maybe_list_like + expected = True if expected == 'set' else expected + assert inference.is_list_like(obj) == expected + + +def test_is_list_like_disallow_sets(maybe_list_like): + obj, expected = maybe_list_like + expected = False if expected == 'set' else expected + assert inference.is_list_like(obj, allow_sets=False) == expected + + def test_is_sequence(): is_seq = inference.is_sequence assert (is_seq((1, 2))) @@ -63,23 +127,6 @@ def __getitem__(self): assert (not is_seq(A())) -@pytest.mark.parametrize( - "ll", - [ - [], [1], (1, ), (1, 2), {'a': 1}, - {1, 'a'}, Series([1]), - Series([]), Series(['a']).str, - np.array([2])]) -def test_is_list_like_passes(ll): - assert inference.is_list_like(ll) - - -@pytest.mark.parametrize( - "ll", [1, '2', object(), str, np.array(2)]) -def test_is_list_like_fails(ll): - assert not inference.is_list_like(ll) - - def test_is_array_like(): assert inference.is_array_like(Series([])) assert inference.is_array_like(Series([1, 2]))