Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow using Iterable in Series and DataFrame constructor #21987

Merged
merged 3 commits into from
Jul 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ Other Enhancements
- :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`)
- :func:`~DataFrame.to_csv` and :func:`~DataFrame.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`)
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
-
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)

.. _whatsnew_0240.api_breaking:

Expand Down
15 changes: 8 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import collections
import itertools
import sys
import types
import warnings
from textwrap import dedent

Expand Down Expand Up @@ -75,7 +74,8 @@
from pandas.core.arrays import Categorical, ExtensionArray
import pandas.core.algorithms as algorithms
from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
OrderedDict, raise_with_traceback)
OrderedDict, raise_with_traceback,
string_and_binary_types)
from pandas import compat
from pandas.compat import PY36
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -267,7 +267,7 @@ class DataFrame(NDFrame):

Parameters
----------
data : numpy ndarray (structured or homogeneous), dict, or DataFrame
data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
Dict can contain Series, arrays, constants, or list-like objects

.. versionchanged :: 0.23.0
Expand Down Expand Up @@ -391,8 +391,11 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
else:
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
elif isinstance(data, (list, types.GeneratorType)):
if isinstance(data, types.GeneratorType):

# For data is list-like, or Iterable (will consume into list)
elif (isinstance(data, collections.Iterable)
and not isinstance(data, string_and_binary_types)):
if not isinstance(data, collections.Sequence):
data = list(data)
if len(data) > 0:
if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1:
Expand All @@ -417,8 +420,6 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
copy=copy)
else:
mgr = self._init_dict({}, index, columns, dtype=dtype)
elif isinstance(data, collections.Iterator):
raise TypeError("data argument can't be an iterator")
else:
try:
arr = np.array(data, dtype=dtype, copy=copy)
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# pylint: disable=E1101,E1103
# pylint: disable=W0703,W0622,W0613,W0201

import types
import collections
import warnings
from textwrap import dedent

Expand Down Expand Up @@ -144,7 +144,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):

Parameters
----------
data : array-like, dict, or scalar value
data : array-like, Iterable, dict, or scalar value
Contains data stored in Series

.. versionchanged :: 0.23.0
Expand Down Expand Up @@ -238,12 +238,13 @@ def __init__(self, data=None, index=None, dtype=None, name=None,

elif is_extension_array_dtype(data):
pass
elif (isinstance(data, types.GeneratorType) or
(compat.PY3 and isinstance(data, map))):
data = list(data)
elif isinstance(data, (set, frozenset)):
raise TypeError("{0!r} type is unordered"
"".format(data.__class__.__name__))
# If data is Iterable but not list-like, consume into list.
elif (isinstance(data, collections.Iterable)
and not isinstance(data, collections.Sized)):
data = list(data)
else:

# handle sparse passed here (and force conversion)
Expand Down
21 changes: 11 additions & 10 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,12 +865,6 @@ def test_constructor_more(self):
dm = DataFrame(index=np.arange(10))
assert dm.values.shape == (10, 0)

# corner, silly
# TODO: Fix this Exception to be better...
with tm.assert_raises_regex(ValueError, 'constructor not '
'properly called'):
DataFrame((1, 2, 3))

# can't cast
mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1)
with tm.assert_raises_regex(ValueError, 'cast'):
Expand Down Expand Up @@ -953,6 +947,17 @@ def __len__(self, n):
array.array('i', range(10))])
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_iterable(self):
# GH 21987
class Iter():
def __iter__(self):
for i in range(10):
yield [1, 2, 3]

expected = DataFrame([[1, 2, 3]] * 10)
result = DataFrame(Iter())
tm.assert_frame_equal(result, expected)

Copy link
Member

@gfyoung gfyoung Jul 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference the issue number in a comment below the functional definition.

Applies to all newly-added tests.

def test_constructor_iterator(self):

expected = DataFrame([list(range(10)), list(range(10))])
Expand Down Expand Up @@ -1374,10 +1379,6 @@ def test_constructor_miscast_na_int_dtype(self):
expected = DataFrame([[np.nan, 1], [1, 0]])
tm.assert_frame_equal(df, expected)

def test_constructor_iterator_failure(self):
with tm.assert_raises_regex(TypeError, 'iterator'):
DataFrame(iter([1, 2, 3]))

def test_constructor_column_duplicates(self):
# it works! #2079
df = DataFrame([[8, 5]], columns=['a', 'a'])
Expand Down
19 changes: 18 additions & 1 deletion pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,29 @@ def test_constructor_series(self):

assert_series_equal(s2, s1.sort_index())

def test_constructor_iterator(self):
def test_constructor_iterable(self):
# GH 21987
class Iter():
def __iter__(self):
for i in range(10):
yield i

expected = Series(list(range(10)), dtype='int64')
result = Series(Iter(), dtype='int64')
assert_series_equal(result, expected)

def test_constructor_sequence(self):
# GH 21987
expected = Series(list(range(10)), dtype='int64')
result = Series(range(10), dtype='int64')
assert_series_equal(result, expected)

def test_constructor_single_str(self):
# GH 21987
expected = Series(['abc'])
result = Series('abc')
assert_series_equal(result, expected)

def test_constructor_list_like(self):

# make sure that we are coercing different
Expand Down