Skip to content

Commit cd2281e

Browse files
committed
[nomerge] Amend _from_array functionality to _from_arrays
1 parent 0ebf3f4 commit cd2281e

File tree

3 files changed

+80
-80
lines changed

3 files changed

+80
-80
lines changed

pandas/core/internals/construction.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
dict_compat,
2828
maybe_cast_to_datetime,
2929
maybe_convert_platform,
30+
maybe_infer_to_datetimelike,
3031
maybe_upcast,
3132
)
3233
from pandas.core.dtypes.common import (
@@ -36,6 +37,7 @@
3637
is_integer_dtype,
3738
is_list_like,
3839
is_named_tuple,
40+
is_object_dtype,
3941
)
4042
from pandas.core.dtypes.generic import (
4143
ABCDataFrame,
@@ -55,10 +57,7 @@
5557
get_objs_combined_axis,
5658
union_indexes,
5759
)
58-
from pandas.core.internals.managers import (
59-
create_block_manager_from_array,
60-
create_block_manager_from_arrays,
61-
)
60+
from pandas.core.internals.managers import create_block_manager_from_arrays
6261

6362
if TYPE_CHECKING:
6463
from numpy.ma.mrecords import MaskedRecords
@@ -230,7 +229,23 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
230229
)
231230
values = values.T
232231

233-
return create_block_manager_from_array(values, [columns, index], dtype)
232+
# if we don't have a dtype specified, then try to convert objects
233+
# on the entire block; this is to convert if we have datetimelike's
234+
# embedded in an object type
235+
if dtype is None and is_object_dtype(values.dtype):
236+
maybe_datetime = [maybe_infer_to_datetimelike(instance) for instance in values]
237+
# don't convert (and copy) the objects if no type inference occurs
238+
if any(
239+
not is_dtype_equal(instance.dtype, values.dtype)
240+
for instance in maybe_datetime
241+
):
242+
arrays = maybe_datetime
243+
else:
244+
arrays = [values]
245+
else:
246+
arrays = [values]
247+
248+
return create_block_manager_from_arrays(arrays, columns, [columns, index])
234249

235250

236251
def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):

pandas/core/internals/managers.py

+57-73
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,13 @@
2727
from pandas.core.dtypes.cast import (
2828
find_common_type,
2929
infer_dtype_from_scalar,
30-
maybe_infer_to_datetimelike,
3130
maybe_promote,
3231
)
3332
from pandas.core.dtypes.common import (
3433
DT64NS_DTYPE,
3534
is_dtype_equal,
3635
is_extension_array_dtype,
3736
is_list_like,
38-
is_object_dtype,
3937
)
4038
from pandas.core.dtypes.concat import concat_compat
4139
from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -328,7 +326,7 @@ def _verify_integrity(self) -> None:
328326
if block.shape[1:] != mgr_shape[1:]:
329327
raise construction_error(tot_items, block.shape[1:], self.axes)
330328
if len(self.items) != tot_items:
331-
raise AssertionError(
329+
raise ValueError(
332330
"Number of manager items must equal union of "
333331
f"block items\n# manager items: {len(self.items)}, # "
334332
f"tot_items: {tot_items}"
@@ -1671,48 +1669,14 @@ def create_block_manager_from_arrays(
16711669
# ensure we dont have any PandasArrays when we call get_block_type
16721670
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
16731671
arrays = [x if not isinstance(x, ABCPandasArray) else x.to_numpy() for x in arrays]
1672+
blocks = _form_blocks(arrays, names, axes)
16741673
try:
1675-
blocks = _form_blocks(arrays, names, axes)
1676-
mgr = BlockManager(blocks, axes)
1677-
mgr._consolidate_inplace()
1678-
return mgr
1679-
except ValueError as e:
1680-
raise construction_error(len(arrays), arrays[0].shape, axes, e)
1681-
1682-
1683-
def create_block_manager_from_array(
1684-
array, axes: List[Index], dtype: Optional[Dtype] = None
1685-
) -> BlockManager:
1686-
assert isinstance(axes, list)
1687-
assert all(isinstance(x, Index) for x in axes)
1688-
1689-
# ensure we dont have any PandasArrays when we call get_block_type
1690-
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
1691-
array = array if not isinstance(array, ABCPandasArray) else array.to_numpy()
1692-
1693-
try:
1694-
# if we don't have a dtype specified, then try to convert objects
1695-
# on the entire block; this is to convert if we have datetimelike's
1696-
# embedded in an object type
1697-
if dtype is None and is_object_dtype(array.dtype):
1698-
maybe_datetime = [
1699-
maybe_infer_to_datetimelike(instance) for instance in array
1700-
]
1701-
# don't convert (and copy) the objects if no type conversion occurs
1702-
if any(
1703-
not is_dtype_equal(instance.dtype, array.dtype)
1704-
for instance in maybe_datetime
1705-
):
1706-
blocks = _form_blocks(maybe_datetime, axes[0], axes)
1707-
else:
1708-
blocks = [make_block(array, slice(0, len(axes[0])))]
1709-
else:
1710-
blocks = [make_block(array, slice(0, len(axes[0])), dtype=dtype)]
17111674
mgr = BlockManager(blocks, axes)
17121675
mgr._consolidate_inplace()
17131676
return mgr
17141677
except ValueError as e:
1715-
raise construction_error(array.shape[0], array.shape[1:], axes, e)
1678+
tot_items = sum(b.shape[0] for b in blocks)
1679+
raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
17161680

17171681

17181682
def construction_error(tot_items, block_shape, axes, e=None):
@@ -1743,32 +1707,41 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
17431707
# put "leftover" items in float bucket, where else?
17441708
# generalize?
17451709

1746-
if len(arrays) != len(names):
1747-
raise ValueError(
1748-
f"Number of arrays ({len(arrays)}) "
1749-
f"does not match index length ({len(names)})"
1750-
)
1751-
17521710
items_dict: DefaultDict[str, List] = defaultdict(list)
17531711
extra_locs = []
17541712

1755-
names_idx = names
1756-
if names_idx.equals(axes[0]):
1757-
names_indexer = np.arange(len(names_idx))
1713+
if len(arrays) == 1:
1714+
first = arrays[0]
1715+
block_type = get_block_type(first)
1716+
if first.ndim == 1:
1717+
end = 1
1718+
else:
1719+
end = len(first)
1720+
items_dict[block_type.__name__].append(((0, end), first))
17581721
else:
1759-
assert names_idx.intersection(axes[0]).is_unique
1760-
names_indexer = names_idx.get_indexer_for(axes[0])
1761-
1762-
for i, name_idx in enumerate(names_indexer):
1763-
if name_idx == -1:
1764-
extra_locs.append(i)
1765-
continue
1722+
names_idx = names
1723+
if names_idx.equals(axes[0]):
1724+
names_indexer = np.arange(len(names_idx))
1725+
else:
1726+
assert names_idx.intersection(axes[0]).is_unique
1727+
names_indexer = names_idx.get_indexer_for(axes[0])
1728+
1729+
i = 0
1730+
for name_idx in names_indexer:
1731+
if name_idx == -1:
1732+
extra_locs.append(i)
1733+
i += 1
1734+
continue
17661735

1767-
k = names[name_idx]
1768-
v = arrays[name_idx]
1736+
v = arrays[name_idx]
17691737

1770-
block_type = get_block_type(v)
1771-
items_dict[block_type.__name__].append((i, k, v))
1738+
if v.ndim == 2:
1739+
ei = i + v.shape[1]
1740+
else:
1741+
ei = i + 1
1742+
block_type = get_block_type(v)
1743+
items_dict[block_type.__name__].append(((i, ei), v))
1744+
i = ei
17721745

17731746
blocks: List[Block] = []
17741747
if len(items_dict["FloatBlock"]):
@@ -1789,8 +1762,8 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
17891762

17901763
if len(items_dict["DatetimeTZBlock"]):
17911764
dttz_blocks = [
1792-
make_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
1793-
for i, _, array in items_dict["DatetimeTZBlock"]
1765+
make_block(array, klass=DatetimeTZBlock, placement=slice(*i), ndim=2)
1766+
for i, array in items_dict["DatetimeTZBlock"]
17941767
]
17951768
blocks.extend(dttz_blocks)
17961769

@@ -1800,23 +1773,25 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
18001773

18011774
if len(items_dict["CategoricalBlock"]) > 0:
18021775
cat_blocks = [
1803-
make_block(array, klass=CategoricalBlock, placement=i, ndim=2)
1804-
for i, _, array in items_dict["CategoricalBlock"]
1776+
make_block(array, klass=CategoricalBlock, placement=slice(*i), ndim=2)
1777+
for i, array in items_dict["CategoricalBlock"]
18051778
]
18061779
blocks.extend(cat_blocks)
18071780

18081781
if len(items_dict["ExtensionBlock"]):
18091782
external_blocks = [
1810-
make_block(array, klass=ExtensionBlock, placement=i, ndim=2)
1811-
for i, _, array in items_dict["ExtensionBlock"]
1783+
make_block(array, klass=ExtensionBlock, placement=slice(*i), ndim=2)
1784+
for i, array in items_dict["ExtensionBlock"]
18121785
]
18131786

18141787
blocks.extend(external_blocks)
18151788

18161789
if len(items_dict["ObjectValuesExtensionBlock"]):
18171790
external_blocks = [
1818-
make_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
1819-
for i, _, array in items_dict["ObjectValuesExtensionBlock"]
1791+
make_block(
1792+
array, klass=ObjectValuesExtensionBlock, placement=slice(*i), ndim=2
1793+
)
1794+
for i, array in items_dict["ObjectValuesExtensionBlock"]
18201795
]
18211796

18221797
blocks.extend(external_blocks)
@@ -1849,10 +1824,10 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
18491824
return [block]
18501825

18511826

1852-
def _multi_blockify(tuples, dtype: Optional[Dtype] = None):
1827+
def _multi_blockify(tuples):
18531828
""" return an array of blocks that potentially have different dtypes """
18541829
# group by dtype
1855-
grouper = itertools.groupby(tuples, lambda x: x[2].dtype)
1830+
grouper = itertools.groupby(tuples, lambda x: x[1].dtype)
18561831

18571832
new_blocks = []
18581833
for dtype, tup_block in grouper:
@@ -1880,16 +1855,25 @@ def _shape_compat(x) -> Shape:
18801855
else:
18811856
return x.shape
18821857

1883-
placement, names, arrays = zip(*tuples)
1858+
placement, arrays = zip(*tuples)
18841859

18851860
first = arrays[0]
1886-
shape = (len(arrays),) + _shape_compat(first)
18871861

1862+
if len(arrays) == 1:
1863+
if dtype is None or is_dtype_equal(first.dtype, dtype):
1864+
arr = _asarray_compat(first)
1865+
else:
1866+
arr = _asarray_compat(first).astype(dtype)
1867+
if len(_shape_compat(first)) < 2:
1868+
arr = arr.reshape(1, -1)
1869+
return arr, slice(*placement[0])
1870+
1871+
shape = (len(arrays),) + _shape_compat(first)
18881872
stacked = np.empty(shape, dtype=dtype)
18891873
for i, arr in enumerate(arrays):
18901874
stacked[i] = _asarray_compat(arr)
18911875

1892-
return stacked, placement
1876+
return stacked, list(itertools.chain(*(list(range(*p)) for p in placement)))
18931877

18941878

18951879
def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]:

pandas/tests/frame/test_constructors.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1837,13 +1837,14 @@ def test_constructor_ndarray_copy(self, float_frame):
18371837
float_frame.values[6] = 6
18381838
assert not (df.values[6] == 6).all()
18391839

1840-
def test_constructor_series_copy(self, float_frame):
1840+
def test_constructor_series_not_copy(self, float_frame):
18411841
series = float_frame._series
18421842

18431843
df = DataFrame({"A": series["A"]})
1844+
18441845
df["A"][:] = 5
18451846

1846-
assert not (series["A"] == 5).all()
1847+
assert (series["A"] == 5).all()
18471848

18481849
def test_constructor_with_nas(self):
18491850
# GH 5016

0 commit comments

Comments
 (0)