27
27
from pandas .core .dtypes .cast import (
28
28
find_common_type ,
29
29
infer_dtype_from_scalar ,
30
- maybe_infer_to_datetimelike ,
31
30
maybe_promote ,
32
31
)
33
32
from pandas .core .dtypes .common import (
34
33
DT64NS_DTYPE ,
35
34
is_dtype_equal ,
36
35
is_extension_array_dtype ,
37
36
is_list_like ,
38
- is_object_dtype ,
39
37
)
40
38
from pandas .core .dtypes .concat import concat_compat
41
39
from pandas .core .dtypes .dtypes import ExtensionDtype
@@ -328,7 +326,7 @@ def _verify_integrity(self) -> None:
328
326
if block .shape [1 :] != mgr_shape [1 :]:
329
327
raise construction_error (tot_items , block .shape [1 :], self .axes )
330
328
if len (self .items ) != tot_items :
331
- raise AssertionError (
329
+ raise ValueError (
332
330
"Number of manager items must equal union of "
333
331
f"block items\n # manager items: { len (self .items )} , # "
334
332
f"tot_items: { tot_items } "
@@ -1671,48 +1669,14 @@ def create_block_manager_from_arrays(
1671
1669
# ensure we dont have any PandasArrays when we call get_block_type
1672
1670
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
1673
1671
arrays = [x if not isinstance (x , ABCPandasArray ) else x .to_numpy () for x in arrays ]
1672
+ blocks = _form_blocks (arrays , names , axes )
1674
1673
try :
1675
- blocks = _form_blocks (arrays , names , axes )
1676
1674
mgr = BlockManager (blocks , axes )
1677
1675
mgr ._consolidate_inplace ()
1678
1676
return mgr
1679
1677
except ValueError as e :
1680
- raise construction_error (len (arrays ), arrays [0 ].shape , axes , e )
1681
-
1682
-
1683
- def create_block_manager_from_array (
1684
- array , axes : List [Index ], dtype : Optional [Dtype ] = None
1685
- ) -> BlockManager :
1686
- assert isinstance (axes , list )
1687
- assert all (isinstance (x , Index ) for x in axes )
1688
-
1689
- # ensure we dont have any PandasArrays when we call get_block_type
1690
- # Note: just calling extract_array breaks tests that patch PandasArray._typ.
1691
- array = array if not isinstance (array , ABCPandasArray ) else array .to_numpy ()
1692
-
1693
- try :
1694
- # if we don't have a dtype specified, then try to convert objects
1695
- # on the entire block; this is to convert if we have datetimelike's
1696
- # embedded in an object type
1697
- if dtype is None and is_object_dtype (array .dtype ):
1698
- maybe_datetime = [
1699
- maybe_infer_to_datetimelike (instance ) for instance in array
1700
- ]
1701
- # don't convert (and copy) the objects if no type conversion occurs
1702
- if any (
1703
- not is_dtype_equal (instance .dtype , array .dtype )
1704
- for instance in maybe_datetime
1705
- ):
1706
- blocks = _form_blocks (maybe_datetime , axes [0 ], axes )
1707
- else :
1708
- blocks = [make_block (array , slice (0 , len (axes [0 ])))]
1709
- else :
1710
- blocks = [make_block (array , slice (0 , len (axes [0 ])), dtype = dtype )]
1711
- mgr = BlockManager (blocks , axes )
1712
- mgr ._consolidate_inplace ()
1713
- return mgr
1714
- except ValueError as e :
1715
- raise construction_error (array .shape [0 ], array .shape [1 :], axes , e )
1678
+ tot_items = sum (b .shape [0 ] for b in blocks )
1679
+ raise construction_error (tot_items , blocks [0 ].shape [1 :], axes , e )
1716
1680
1717
1681
1718
1682
def construction_error (tot_items , block_shape , axes , e = None ):
@@ -1739,36 +1703,60 @@ def construction_error(tot_items, block_shape, axes, e=None):
1739
1703
# -----------------------------------------------------------------------
1740
1704
1741
1705
1742
- def _form_blocks (arrays , names : Index , axes ) -> List [Block ]:
1743
- # put "leftover" items in float bucket, where else?
1744
- # generalize?
1745
-
1746
- if len (arrays ) != len (names ):
1747
- raise ValueError (
1748
- f"Number of arrays ({ len (arrays )} ) "
1749
- f"does not match index length ({ len (names )} )"
1750
- )
1751
-
1706
+ def _get_block_ranges (arrays , names , axes ):
1752
1707
items_dict : DefaultDict [str , List ] = defaultdict (list )
1753
1708
extra_locs = []
1754
1709
1755
- names_idx = names
1756
- if names_idx .equals (axes [0 ]):
1757
- names_indexer = np .arange (len (names_idx ))
1710
+ # GH#39263
1711
+ # if one array is passed, form one contiguous block
1712
+ if len (arrays ) == 1 :
1713
+ first = arrays [0 ]
1714
+ block_type = get_block_type (first )
1715
+
1716
+ if first .ndim == 1 :
1717
+ end = 1
1718
+ else :
1719
+ end = len (first )
1720
+
1721
+ items_dict [block_type .__name__ ].append (((0 , end ), first ))
1722
+ # else, form blocks column-by-column
1758
1723
else :
1759
- assert names_idx .intersection (axes [0 ]).is_unique
1760
- names_indexer = names_idx .get_indexer_for (axes [0 ])
1724
+ names_idx = names
1725
+ if names_idx .equals (axes [0 ]):
1726
+ names_indexer = np .arange (len (names_idx ))
1727
+ else :
1728
+ assert names_idx .intersection (axes [0 ]).is_unique
1729
+ names_indexer = names_idx .get_indexer_for (axes [0 ])
1730
+
1731
+ i = 0
1732
+ for name_idx in names_indexer :
1733
+ if name_idx == - 1 :
1734
+ extra_locs .append (i )
1735
+ i += 1
1736
+ continue
1737
+
1738
+ v = arrays [name_idx ]
1739
+ block_type = get_block_type (v )
1761
1740
1762
- for i , name_idx in enumerate (names_indexer ):
1763
- if name_idx == - 1 :
1764
- extra_locs .append (i )
1765
- continue
1741
+ if v .ndim == 1 :
1742
+ ei = i + 1
1743
+ else :
1744
+ ei = i + v .shape [1 ]
1745
+
1746
+ items_dict [block_type .__name__ ].append (((i , ei ), v ))
1747
+ i = ei
1766
1748
1767
- k = names [name_idx ]
1768
- v = arrays [name_idx ]
1749
+ return items_dict , extra_locs
1769
1750
1770
- block_type = get_block_type (v )
1771
- items_dict [block_type .__name__ ].append ((i , k , v ))
1751
+
1752
+ def _form_blocks (arrays , names : Index , axes ) -> List [Block ]:
1753
+ # put "leftover" items in float bucket, where else?
1754
+ # generalize?
1755
+
1756
+ items_dict : DefaultDict [str , List ] = defaultdict (list )
1757
+ extra_locs = []
1758
+
1759
+ items_dict , extra_locs = _get_block_ranges (arrays , names , axes )
1772
1760
1773
1761
blocks : List [Block ] = []
1774
1762
if len (items_dict ["FloatBlock" ]):
@@ -1789,8 +1777,8 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1789
1777
1790
1778
if len (items_dict ["DatetimeTZBlock" ]):
1791
1779
dttz_blocks = [
1792
- make_block (array , klass = DatetimeTZBlock , placement = i , ndim = 2 )
1793
- for i , _ , array in items_dict ["DatetimeTZBlock" ]
1780
+ make_block (array , klass = DatetimeTZBlock , placement = slice ( * i ) , ndim = 2 )
1781
+ for i , array in items_dict ["DatetimeTZBlock" ]
1794
1782
]
1795
1783
blocks .extend (dttz_blocks )
1796
1784
@@ -1800,23 +1788,25 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1800
1788
1801
1789
if len (items_dict ["CategoricalBlock" ]) > 0 :
1802
1790
cat_blocks = [
1803
- make_block (array , klass = CategoricalBlock , placement = i , ndim = 2 )
1804
- for i , _ , array in items_dict ["CategoricalBlock" ]
1791
+ make_block (array , klass = CategoricalBlock , placement = slice ( * i ) , ndim = 2 )
1792
+ for i , array in items_dict ["CategoricalBlock" ]
1805
1793
]
1806
1794
blocks .extend (cat_blocks )
1807
1795
1808
1796
if len (items_dict ["ExtensionBlock" ]):
1809
1797
external_blocks = [
1810
- make_block (array , klass = ExtensionBlock , placement = i , ndim = 2 )
1811
- for i , _ , array in items_dict ["ExtensionBlock" ]
1798
+ make_block (array , klass = ExtensionBlock , placement = slice ( * i ) , ndim = 2 )
1799
+ for i , array in items_dict ["ExtensionBlock" ]
1812
1800
]
1813
1801
1814
1802
blocks .extend (external_blocks )
1815
1803
1816
1804
if len (items_dict ["ObjectValuesExtensionBlock" ]):
1817
1805
external_blocks = [
1818
- make_block (array , klass = ObjectValuesExtensionBlock , placement = i , ndim = 2 )
1819
- for i , _ , array in items_dict ["ObjectValuesExtensionBlock" ]
1806
+ make_block (
1807
+ array , klass = ObjectValuesExtensionBlock , placement = slice (* i ), ndim = 2
1808
+ )
1809
+ for i , array in items_dict ["ObjectValuesExtensionBlock" ]
1820
1810
]
1821
1811
1822
1812
blocks .extend (external_blocks )
@@ -1849,10 +1839,10 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
1849
1839
return [block ]
1850
1840
1851
1841
1852
- def _multi_blockify (tuples , dtype : Optional [ Dtype ] = None ):
1842
+ def _multi_blockify (tuples ):
1853
1843
""" return an array of blocks that potentially have different dtypes """
1854
1844
# group by dtype
1855
- grouper = itertools .groupby (tuples , lambda x : x [2 ].dtype )
1845
+ grouper = itertools .groupby (tuples , lambda x : x [1 ].dtype )
1856
1846
1857
1847
new_blocks = []
1858
1848
for dtype , tup_block in grouper :
@@ -1880,16 +1870,27 @@ def _shape_compat(x) -> Shape:
1880
1870
else :
1881
1871
return x .shape
1882
1872
1883
- placement , names , arrays = zip (* tuples )
1873
+ placement , arrays = zip (* tuples )
1884
1874
1885
1875
first = arrays [0 ]
1886
- shape = (len (arrays ),) + _shape_compat (first )
1887
1876
1877
+ # GH#39263
1878
+ # if only one array is passed, avoid copying it
1879
+ if len (arrays ) == 1 :
1880
+ arr = _asarray_compat (first )
1881
+ # except if the dtype doesn't match
1882
+ if dtype is not None and not is_dtype_equal (first .dtype , dtype ):
1883
+ arr = _asarray_compat (first ).astype (dtype )
1884
+ if len (_shape_compat (first )) < 2 :
1885
+ arr = arr .reshape (1 , - 1 )
1886
+ return arr , slice (* placement [0 ])
1887
+
1888
+ shape = (len (arrays ),) + _shape_compat (first )
1888
1889
stacked = np .empty (shape , dtype = dtype )
1889
1890
for i , arr in enumerate (arrays ):
1890
1891
stacked [i ] = _asarray_compat (arr )
1891
1892
1892
- return stacked , placement
1893
+ return stacked , list ( itertools . chain ( * ( list ( range ( * p )) for p in placement )))
1893
1894
1894
1895
1895
1896
def _interleaved_dtype (blocks : Sequence [Block ]) -> Optional [DtypeObj ]:
0 commit comments