27
27
from pandas .core .dtypes .cast import (
28
28
find_common_type ,
29
29
infer_dtype_from_scalar ,
30
- maybe_infer_to_datetimelike ,
31
30
maybe_promote ,
32
31
)
33
32
from pandas .core .dtypes .common import (
34
33
DT64NS_DTYPE ,
35
34
is_dtype_equal ,
36
35
is_extension_array_dtype ,
37
36
is_list_like ,
38
- is_object_dtype ,
39
37
)
40
38
from pandas .core .dtypes .concat import concat_compat
41
39
from pandas .core .dtypes .dtypes import ExtensionDtype
@@ -328,7 +326,7 @@ def _verify_integrity(self) -> None:
328
326
if block .shape [1 :] != mgr_shape [1 :]:
329
327
raise construction_error (tot_items , block .shape [1 :], self .axes )
330
328
if len (self .items ) != tot_items :
331
- raise AssertionError (
329
+ raise ValueError (
332
330
"Number of manager items must equal union of "
333
331
f"block items\n # manager items: { len (self .items )} , # "
334
332
f"tot_items: { tot_items } "
@@ -1671,48 +1669,14 @@ def create_block_manager_from_arrays(
1671
1669
# ensure we dont have any PandasArrays when we call get_block_type
1672
1670
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
1673
1671
arrays = [x if not isinstance (x , ABCPandasArray ) else x .to_numpy () for x in arrays ]
1672
+ blocks = _form_blocks (arrays , names , axes )
1674
1673
try :
1675
- blocks = _form_blocks (arrays , names , axes )
1676
- mgr = BlockManager (blocks , axes )
1677
- mgr ._consolidate_inplace ()
1678
- return mgr
1679
- except ValueError as e :
1680
- raise construction_error (len (arrays ), arrays [0 ].shape , axes , e )
1681
-
1682
-
1683
- def create_block_manager_from_array (
1684
- array , axes : List [Index ], dtype : Optional [Dtype ] = None
1685
- ) -> BlockManager :
1686
- assert isinstance (axes , list )
1687
- assert all (isinstance (x , Index ) for x in axes )
1688
-
1689
- # ensure we dont have any PandasArrays when we call get_block_type
1690
- # Note: just calling extract_array breaks tests that patch PandasArray._typ.
1691
- array = array if not isinstance (array , ABCPandasArray ) else array .to_numpy ()
1692
-
1693
- try :
1694
- # if we don't have a dtype specified, then try to convert objects
1695
- # on the entire block; this is to convert if we have datetimelike's
1696
- # embedded in an object type
1697
- if dtype is None and is_object_dtype (array .dtype ):
1698
- maybe_datetime = [
1699
- maybe_infer_to_datetimelike (instance ) for instance in array
1700
- ]
1701
- # don't convert (and copy) the objects if no type conversion occurs
1702
- if any (
1703
- not is_dtype_equal (instance .dtype , array .dtype )
1704
- for instance in maybe_datetime
1705
- ):
1706
- blocks = _form_blocks (maybe_datetime , axes [0 ], axes )
1707
- else :
1708
- blocks = [make_block (array , slice (0 , len (axes [0 ])))]
1709
- else :
1710
- blocks = [make_block (array , slice (0 , len (axes [0 ])), dtype = dtype )]
1711
1674
mgr = BlockManager (blocks , axes )
1712
1675
mgr ._consolidate_inplace ()
1713
1676
return mgr
1714
1677
except ValueError as e :
1715
- raise construction_error (array .shape [0 ], array .shape [1 :], axes , e )
1678
+ tot_items = sum (b .shape [0 ] for b in blocks )
1679
+ raise construction_error (tot_items , blocks [0 ].shape [1 :], axes , e )
1716
1680
1717
1681
1718
1682
def construction_error (tot_items , block_shape , axes , e = None ):
@@ -1743,32 +1707,41 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1743
1707
# put "leftover" items in float bucket, where else?
1744
1708
# generalize?
1745
1709
1746
- if len (arrays ) != len (names ):
1747
- raise ValueError (
1748
- f"Number of arrays ({ len (arrays )} ) "
1749
- f"does not match index length ({ len (names )} )"
1750
- )
1751
-
1752
1710
items_dict : DefaultDict [str , List ] = defaultdict (list )
1753
1711
extra_locs = []
1754
1712
1755
- names_idx = names
1756
- if names_idx .equals (axes [0 ]):
1757
- names_indexer = np .arange (len (names_idx ))
1713
+ if len (arrays ) == 1 :
1714
+ first = arrays [0 ]
1715
+ block_type = get_block_type (first )
1716
+ if first .ndim == 1 :
1717
+ end = 1
1718
+ else :
1719
+ end = len (first )
1720
+ items_dict [block_type .__name__ ].append (((0 , end ), first ))
1758
1721
else :
1759
- assert names_idx .intersection (axes [0 ]).is_unique
1760
- names_indexer = names_idx .get_indexer_for (axes [0 ])
1761
-
1762
- for i , name_idx in enumerate (names_indexer ):
1763
- if name_idx == - 1 :
1764
- extra_locs .append (i )
1765
- continue
1722
+ names_idx = names
1723
+ if names_idx .equals (axes [0 ]):
1724
+ names_indexer = np .arange (len (names_idx ))
1725
+ else :
1726
+ assert names_idx .intersection (axes [0 ]).is_unique
1727
+ names_indexer = names_idx .get_indexer_for (axes [0 ])
1728
+
1729
+ i = 0
1730
+ for name_idx in names_indexer :
1731
+ if name_idx == - 1 :
1732
+ extra_locs .append (i )
1733
+ i += 1
1734
+ continue
1766
1735
1767
- k = names [name_idx ]
1768
- v = arrays [name_idx ]
1736
+ v = arrays [name_idx ]
1769
1737
1770
- block_type = get_block_type (v )
1771
- items_dict [block_type .__name__ ].append ((i , k , v ))
1738
+ if v .ndim == 2 :
1739
+ ei = i + v .shape [1 ]
1740
+ else :
1741
+ ei = i + 1
1742
+ block_type = get_block_type (v )
1743
+ items_dict [block_type .__name__ ].append (((i , ei ), v ))
1744
+ i = ei
1772
1745
1773
1746
blocks : List [Block ] = []
1774
1747
if len (items_dict ["FloatBlock" ]):
@@ -1789,8 +1762,8 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1789
1762
1790
1763
if len (items_dict ["DatetimeTZBlock" ]):
1791
1764
dttz_blocks = [
1792
- make_block (array , klass = DatetimeTZBlock , placement = i , ndim = 2 )
1793
- for i , _ , array in items_dict ["DatetimeTZBlock" ]
1765
+ make_block (array , klass = DatetimeTZBlock , placement = slice ( * i ) , ndim = 2 )
1766
+ for i , array in items_dict ["DatetimeTZBlock" ]
1794
1767
]
1795
1768
blocks .extend (dttz_blocks )
1796
1769
@@ -1800,23 +1773,25 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
1800
1773
1801
1774
if len (items_dict ["CategoricalBlock" ]) > 0 :
1802
1775
cat_blocks = [
1803
- make_block (array , klass = CategoricalBlock , placement = i , ndim = 2 )
1804
- for i , _ , array in items_dict ["CategoricalBlock" ]
1776
+ make_block (array , klass = CategoricalBlock , placement = slice ( * i ) , ndim = 2 )
1777
+ for i , array in items_dict ["CategoricalBlock" ]
1805
1778
]
1806
1779
blocks .extend (cat_blocks )
1807
1780
1808
1781
if len (items_dict ["ExtensionBlock" ]):
1809
1782
external_blocks = [
1810
- make_block (array , klass = ExtensionBlock , placement = i , ndim = 2 )
1811
- for i , _ , array in items_dict ["ExtensionBlock" ]
1783
+ make_block (array , klass = ExtensionBlock , placement = slice ( * i ) , ndim = 2 )
1784
+ for i , array in items_dict ["ExtensionBlock" ]
1812
1785
]
1813
1786
1814
1787
blocks .extend (external_blocks )
1815
1788
1816
1789
if len (items_dict ["ObjectValuesExtensionBlock" ]):
1817
1790
external_blocks = [
1818
- make_block (array , klass = ObjectValuesExtensionBlock , placement = i , ndim = 2 )
1819
- for i , _ , array in items_dict ["ObjectValuesExtensionBlock" ]
1791
+ make_block (
1792
+ array , klass = ObjectValuesExtensionBlock , placement = slice (* i ), ndim = 2
1793
+ )
1794
+ for i , array in items_dict ["ObjectValuesExtensionBlock" ]
1820
1795
]
1821
1796
1822
1797
blocks .extend (external_blocks )
@@ -1849,10 +1824,10 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
1849
1824
return [block ]
1850
1825
1851
1826
1852
- def _multi_blockify (tuples , dtype : Optional [ Dtype ] = None ):
1827
+ def _multi_blockify (tuples ):
1853
1828
""" return an array of blocks that potentially have different dtypes """
1854
1829
# group by dtype
1855
- grouper = itertools .groupby (tuples , lambda x : x [2 ].dtype )
1830
+ grouper = itertools .groupby (tuples , lambda x : x [1 ].dtype )
1856
1831
1857
1832
new_blocks = []
1858
1833
for dtype , tup_block in grouper :
@@ -1880,16 +1855,25 @@ def _shape_compat(x) -> Shape:
1880
1855
else :
1881
1856
return x .shape
1882
1857
1883
- placement , names , arrays = zip (* tuples )
1858
+ placement , arrays = zip (* tuples )
1884
1859
1885
1860
first = arrays [0 ]
1886
- shape = (len (arrays ),) + _shape_compat (first )
1887
1861
1862
+ if len (arrays ) == 1 :
1863
+ if dtype is None or is_dtype_equal (first .dtype , dtype ):
1864
+ arr = _asarray_compat (first )
1865
+ else :
1866
+ arr = _asarray_compat (first ).astype (dtype )
1867
+ if len (_shape_compat (first )) < 2 :
1868
+ arr = arr .reshape (1 , - 1 )
1869
+ return arr , slice (* placement [0 ])
1870
+
1871
+ shape = (len (arrays ),) + _shape_compat (first )
1888
1872
stacked = np .empty (shape , dtype = dtype )
1889
1873
for i , arr in enumerate (arrays ):
1890
1874
stacked [i ] = _asarray_compat (arr )
1891
1875
1892
- return stacked , placement
1876
+ return stacked , list ( itertools . chain ( * ( list ( range ( * p )) for p in placement )))
1893
1877
1894
1878
1895
1879
def _interleaved_dtype (blocks : Sequence [Block ]) -> Optional [DtypeObj ]:
0 commit comments