Skip to content

Commit d2facee

Browse files
committed
[nomerge] Amend _from_array functionality to _from_arrays
1 parent 0ebf3f4 commit d2facee

File tree

3 files changed

+108
-82
lines changed

3 files changed

+108
-82
lines changed

pandas/core/internals/construction.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
dict_compat,
2828
maybe_cast_to_datetime,
2929
maybe_convert_platform,
30+
maybe_infer_to_datetimelike,
3031
maybe_upcast,
3132
)
3233
from pandas.core.dtypes.common import (
@@ -36,6 +37,7 @@
3637
is_integer_dtype,
3738
is_list_like,
3839
is_named_tuple,
40+
is_object_dtype,
3941
)
4042
from pandas.core.dtypes.generic import (
4143
ABCDataFrame,
@@ -55,10 +57,7 @@
5557
get_objs_combined_axis,
5658
union_indexes,
5759
)
58-
from pandas.core.internals.managers import (
59-
create_block_manager_from_array,
60-
create_block_manager_from_arrays,
61-
)
60+
from pandas.core.internals.managers import create_block_manager_from_arrays
6261

6362
if TYPE_CHECKING:
6463
from numpy.ma.mrecords import MaskedRecords
@@ -230,7 +229,23 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
230229
)
231230
values = values.T
232231

233-
return create_block_manager_from_array(values, [columns, index], dtype)
232+
# if we don't have a dtype specified, then try to convert objects
233+
# on the entire block; this is to convert if we have datetimelike's
234+
# embedded in an object type
235+
if dtype is None and is_object_dtype(values.dtype):
236+
maybe_datetime = [maybe_infer_to_datetimelike(instance) for instance in values]
237+
# don't convert (and copy) the objects if no type inference occurs
238+
if any(
239+
not is_dtype_equal(instance.dtype, values.dtype)
240+
for instance in maybe_datetime
241+
):
242+
arrays = maybe_datetime
243+
else:
244+
arrays = [values]
245+
else:
246+
arrays = [values]
247+
248+
return create_block_manager_from_arrays(arrays, columns, [columns, index])
234249

235250

236251
def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):

pandas/core/internals/managers.py

+77-76
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,13 @@
2727
from pandas.core.dtypes.cast import (
2828
find_common_type,
2929
infer_dtype_from_scalar,
30-
maybe_infer_to_datetimelike,
3130
maybe_promote,
3231
)
3332
from pandas.core.dtypes.common import (
3433
DT64NS_DTYPE,
3534
is_dtype_equal,
3635
is_extension_array_dtype,
3736
is_list_like,
38-
is_object_dtype,
3937
)
4038
from pandas.core.dtypes.concat import concat_compat
4139
from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -328,7 +326,7 @@ def _verify_integrity(self) -> None:
328326
if block.shape[1:] != mgr_shape[1:]:
329327
raise construction_error(tot_items, block.shape[1:], self.axes)
330328
if len(self.items) != tot_items:
331-
raise AssertionError(
329+
raise ValueError(
332330
"Number of manager items must equal union of "
333331
f"block items\n# manager items: {len(self.items)}, # "
334332
f"tot_items: {tot_items}"
@@ -1671,48 +1669,14 @@ def create_block_manager_from_arrays(
16711669
# ensure we dont have any PandasArrays when we call get_block_type
16721670
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
16731671
arrays = [x if not isinstance(x, ABCPandasArray) else x.to_numpy() for x in arrays]
1672+
blocks = _form_blocks(arrays, names, axes)
16741673
try:
1675-
blocks = _form_blocks(arrays, names, axes)
16761674
mgr = BlockManager(blocks, axes)
16771675
mgr._consolidate_inplace()
16781676
return mgr
16791677
except ValueError as e:
1680-
raise construction_error(len(arrays), arrays[0].shape, axes, e)
1681-
1682-
1683-
def create_block_manager_from_array(
1684-
array, axes: List[Index], dtype: Optional[Dtype] = None
1685-
) -> BlockManager:
1686-
assert isinstance(axes, list)
1687-
assert all(isinstance(x, Index) for x in axes)
1688-
1689-
# ensure we dont have any PandasArrays when we call get_block_type
1690-
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
1691-
array = array if not isinstance(array, ABCPandasArray) else array.to_numpy()
1692-
1693-
try:
1694-
# if we don't have a dtype specified, then try to convert objects
1695-
# on the entire block; this is to convert if we have datetimelike's
1696-
# embedded in an object type
1697-
if dtype is None and is_object_dtype(array.dtype):
1698-
maybe_datetime = [
1699-
maybe_infer_to_datetimelike(instance) for instance in array
1700-
]
1701-
# don't convert (and copy) the objects if no type conversion occurs
1702-
if any(
1703-
not is_dtype_equal(instance.dtype, array.dtype)
1704-
for instance in maybe_datetime
1705-
):
1706-
blocks = _form_blocks(maybe_datetime, axes[0], axes)
1707-
else:
1708-
blocks = [make_block(array, slice(0, len(axes[0])))]
1709-
else:
1710-
blocks = [make_block(array, slice(0, len(axes[0])), dtype=dtype)]
1711-
mgr = BlockManager(blocks, axes)
1712-
mgr._consolidate_inplace()
1713-
return mgr
1714-
except ValueError as e:
1715-
raise construction_error(array.shape[0], array.shape[1:], axes, e)
1678+
tot_items = sum(b.shape[0] for b in blocks)
1679+
raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
17161680

17171681

17181682
def construction_error(tot_items, block_shape, axes, e=None):
@@ -1739,36 +1703,60 @@ def construction_error(tot_items, block_shape, axes, e=None):
17391703
# -----------------------------------------------------------------------
17401704

17411705

1742-
def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1743-
# put "leftover" items in float bucket, where else?
1744-
# generalize?
1745-
1746-
if len(arrays) != len(names):
1747-
raise ValueError(
1748-
f"Number of arrays ({len(arrays)}) "
1749-
f"does not match index length ({len(names)})"
1750-
)
1751-
1706+
def _get_block_ranges(arrays, names, axes):
17521707
items_dict: DefaultDict[str, List] = defaultdict(list)
17531708
extra_locs = []
17541709

1755-
names_idx = names
1756-
if names_idx.equals(axes[0]):
1757-
names_indexer = np.arange(len(names_idx))
1710+
# GH#39263
1711+
# if one array is passed, form one contiguous block
1712+
if len(arrays) == 1:
1713+
first = arrays[0]
1714+
block_type = get_block_type(first)
1715+
1716+
if first.ndim == 1:
1717+
end = 1
1718+
else:
1719+
end = len(first)
1720+
1721+
items_dict[block_type.__name__].append(((0, end), first))
1722+
# else, form blocks column-by-column
17581723
else:
1759-
assert names_idx.intersection(axes[0]).is_unique
1760-
names_indexer = names_idx.get_indexer_for(axes[0])
1724+
names_idx = names
1725+
if names_idx.equals(axes[0]):
1726+
names_indexer = np.arange(len(names_idx))
1727+
else:
1728+
assert names_idx.intersection(axes[0]).is_unique
1729+
names_indexer = names_idx.get_indexer_for(axes[0])
1730+
1731+
i = 0
1732+
for name_idx in names_indexer:
1733+
if name_idx == -1:
1734+
extra_locs.append(i)
1735+
i += 1
1736+
continue
1737+
1738+
v = arrays[name_idx]
1739+
block_type = get_block_type(v)
17611740

1762-
for i, name_idx in enumerate(names_indexer):
1763-
if name_idx == -1:
1764-
extra_locs.append(i)
1765-
continue
1741+
if v.ndim == 1:
1742+
ei = i + 1
1743+
else:
1744+
ei = i + v.shape[1]
1745+
1746+
items_dict[block_type.__name__].append(((i, ei), v))
1747+
i = ei
17661748

1767-
k = names[name_idx]
1768-
v = arrays[name_idx]
1749+
return items_dict, extra_locs
17691750

1770-
block_type = get_block_type(v)
1771-
items_dict[block_type.__name__].append((i, k, v))
1751+
1752+
def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1753+
# put "leftover" items in float bucket, where else?
1754+
# generalize?
1755+
1756+
items_dict: DefaultDict[str, List] = defaultdict(list)
1757+
extra_locs = []
1758+
1759+
items_dict, extra_locs = _get_block_ranges(arrays, names, axes)
17721760

17731761
blocks: List[Block] = []
17741762
if len(items_dict["FloatBlock"]):
@@ -1789,8 +1777,8 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
17891777

17901778
if len(items_dict["DatetimeTZBlock"]):
17911779
dttz_blocks = [
1792-
make_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
1793-
for i, _, array in items_dict["DatetimeTZBlock"]
1780+
make_block(array, klass=DatetimeTZBlock, placement=slice(*i), ndim=2)
1781+
for i, array in items_dict["DatetimeTZBlock"]
17941782
]
17951783
blocks.extend(dttz_blocks)
17961784

@@ -1800,23 +1788,25 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
18001788

18011789
if len(items_dict["CategoricalBlock"]) > 0:
18021790
cat_blocks = [
1803-
make_block(array, klass=CategoricalBlock, placement=i, ndim=2)
1804-
for i, _, array in items_dict["CategoricalBlock"]
1791+
make_block(array, klass=CategoricalBlock, placement=slice(*i), ndim=2)
1792+
for i, array in items_dict["CategoricalBlock"]
18051793
]
18061794
blocks.extend(cat_blocks)
18071795

18081796
if len(items_dict["ExtensionBlock"]):
18091797
external_blocks = [
1810-
make_block(array, klass=ExtensionBlock, placement=i, ndim=2)
1811-
for i, _, array in items_dict["ExtensionBlock"]
1798+
make_block(array, klass=ExtensionBlock, placement=slice(*i), ndim=2)
1799+
for i, array in items_dict["ExtensionBlock"]
18121800
]
18131801

18141802
blocks.extend(external_blocks)
18151803

18161804
if len(items_dict["ObjectValuesExtensionBlock"]):
18171805
external_blocks = [
1818-
make_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
1819-
for i, _, array in items_dict["ObjectValuesExtensionBlock"]
1806+
make_block(
1807+
array, klass=ObjectValuesExtensionBlock, placement=slice(*i), ndim=2
1808+
)
1809+
for i, array in items_dict["ObjectValuesExtensionBlock"]
18201810
]
18211811

18221812
blocks.extend(external_blocks)
@@ -1849,10 +1839,10 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
18491839
return [block]
18501840

18511841

1852-
def _multi_blockify(tuples, dtype: Optional[Dtype] = None):
1842+
def _multi_blockify(tuples):
18531843
""" return an array of blocks that potentially have different dtypes """
18541844
# group by dtype
1855-
grouper = itertools.groupby(tuples, lambda x: x[2].dtype)
1845+
grouper = itertools.groupby(tuples, lambda x: x[1].dtype)
18561846

18571847
new_blocks = []
18581848
for dtype, tup_block in grouper:
@@ -1880,16 +1870,27 @@ def _shape_compat(x) -> Shape:
18801870
else:
18811871
return x.shape
18821872

1883-
placement, names, arrays = zip(*tuples)
1873+
placement, arrays = zip(*tuples)
18841874

18851875
first = arrays[0]
1886-
shape = (len(arrays),) + _shape_compat(first)
18871876

1877+
# GH#39263
1878+
# if only one array is passed, avoid copying it
1879+
if len(arrays) == 1:
1880+
arr = _asarray_compat(first)
1881+
# except if the dtype doesn't match
1882+
if dtype is not None and not is_dtype_equal(first.dtype, dtype):
1883+
arr = _asarray_compat(first).astype(dtype)
1884+
if len(_shape_compat(first)) < 2:
1885+
arr = arr.reshape(1, -1)
1886+
return arr, slice(*placement[0])
1887+
1888+
shape = (len(arrays),) + _shape_compat(first)
18881889
stacked = np.empty(shape, dtype=dtype)
18891890
for i, arr in enumerate(arrays):
18901891
stacked[i] = _asarray_compat(arr)
18911892

1892-
return stacked, placement
1893+
return stacked, list(itertools.chain(*(list(range(*p)) for p in placement)))
18931894

18941895

18951896
def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]:

pandas/tests/frame/test_constructors.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1837,10 +1837,20 @@ def test_constructor_ndarray_copy(self, float_frame):
18371837
float_frame.values[6] = 6
18381838
assert not (df.values[6] == 6).all()
18391839

1840-
def test_constructor_series_copy(self, float_frame):
1840+
def test_constructor_series_not_copy(self, float_frame):
18411841
series = float_frame._series
18421842

18431843
df = DataFrame({"A": series["A"]})
1844+
1845+
df["A"][:] = 5
1846+
1847+
assert (series["A"] == 5).all()
1848+
1849+
def test_constructor_series_copy(self, float_frame):
1850+
series = float_frame._series
1851+
1852+
df = DataFrame({"A": series["A"], "B": series["A"]})
1853+
18441854
df["A"][:] = 5
18451855

18461856
assert not (series["A"] == 5).all()

0 commit comments

Comments
 (0)