From 291ac7039de6a6e24de498fa493dcdec1482fb07 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Aug 2022 09:52:09 -0700
Subject: [PATCH 1/4] REF: remove axes from Managers

---
 pandas/_libs/properties.pyx            |   5 +-
 pandas/core/arraylike.py               |   6 ++
 pandas/core/frame.py                   |  47 ++++++---
 pandas/core/generic.py                 | 130 ++++++++++++++++++++-----
 pandas/core/groupby/generic.py         |  10 +-
 pandas/core/groupby/groupby.py         |   3 +-
 pandas/core/groupby/ops.py             |  11 ++-
 pandas/core/internals/array_manager.py |   3 +
 pandas/core/internals/base.py          |   5 +-
 pandas/core/internals/construction.py  |  15 +--
 pandas/core/internals/managers.py      |   4 +
 pandas/core/reshape/concat.py          |   2 +-
 pandas/core/reshape/merge.py           |   4 +-
 pandas/core/series.py                  |  16 ++-
 14 files changed, 199 insertions(+), 62 deletions(-)

diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx
index 3354290a5f535..e6cbfb2c11017 100644
--- a/pandas/_libs/properties.pyx
+++ b/pandas/_libs/properties.pyx
@@ -61,9 +61,10 @@ cdef class AxisProperty:
         if obj is None:
             # Only instances have _mgr, not classes
             return self
+        if self.axis == 0:
+            return obj._index
         else:
-            axes = obj._mgr.axes
-        return axes[self.axis]
+            return obj._columns
 
     def __set__(self, obj, value):
         obj._set_axis(self.axis, value)
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index 4e8e4ea7e8d87..996bfd776f981 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -358,6 +358,12 @@ def _reconstruct(result):
             return result
         if isinstance(result, BlockManager):
             # we went through BlockManager.apply e.g. np.sqrt
+            # TODO: any cases that aren't index/columns-preserving?
+            if self.ndim == 1:
+                reconstruct_kwargs["index"] = self.index
+            else:
+                reconstruct_kwargs["index"] = self.index
+                reconstruct_kwargs["columns"] = self.columns
             result = self._constructor(result, **reconstruct_kwargs, copy=False)
         else:
             # we converted an array, lost our axes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 22ccd1d763769..d8074ca18a8eb 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -590,7 +590,7 @@ class DataFrame(NDFrame, OpsMixin):
     2  2  3
     """
 
-    _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
+    _internal_names_set = {"_columns", "columns", "_index", "index"} | NDFrame._internal_names_set
     _typ = "dataframe"
     _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray)
     _accessors: set[str] = {"sparse"}
@@ -621,11 +621,20 @@ def __init__(
             dtype = self._validate_dtype(dtype)
 
         if isinstance(data, DataFrame):
+            if index is None and columns is None:
+                index = data.index
+                columns = data.columns
             data = data._mgr
 
         if isinstance(data, (BlockManager, ArrayManager)):
             # first check if a Manager is passed without any other arguments
             # -> use fastpath (without checking Manager type)
+            if index is None or columns is None:
+                assert False
+            if not index.equals(data.axes[-1]):#index is not data.axes[-1]:
+                assert False
+            if not columns.equals(data.axes[0]):#columns is not data.axes[0]:
+                assert False
             if index is None and columns is None and dtype is None and not copy:
                 # GH#33357 fastpath
                 NDFrame.__init__(self, data)
@@ -751,7 +760,7 @@ def __init__(
                         index,  # type: ignore[arg-type]
                         dtype,
                     )
-                    mgr = arrays_to_mgr(
+                    mgr, _, _ = arrays_to_mgr(
                         arrays,
                         columns,
                         index,
@@ -794,7 +803,7 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
+                mgr, _, _ = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
             else:
                 arr2d = construct_2d_arraylike_from_scalar(
                     data,
@@ -2399,9 +2408,10 @@ def maybe_reorder(
             columns = columns.drop(exclude)
 
         manager = get_option("mode.data_manager")
-        mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
+        mgr, index, columns = arrays_to_mgr(arrays, columns, result_index, typ=manager)
 
-        return cls(mgr)
+        # FIXME: get axes without mgr.axes
+        return cls(mgr, index=index, columns=columns)
 
     def to_records(
         self, index: bool = True, column_dtypes=None, index_dtypes=None
@@ -2603,7 +2613,7 @@ def _from_arrays(
         columns = ensure_index(columns)
         if len(columns) != len(arrays):
             raise ValueError("len(columns) must match len(arrays)")
-        mgr = arrays_to_mgr(
+        mgr, index, columns = arrays_to_mgr(
             arrays,
             columns,
             index,
@@ -2611,7 +2621,7 @@ def _from_arrays(
             verify_integrity=verify_integrity,
             typ=manager,
         )
-        return cls(mgr)
+        return cls(mgr, index=index, columns=columns)
 
     @doc(
         storage_options=_shared_docs["storage_options"],
@@ -3729,7 +3739,7 @@ def _ixs(self, i: int, axis: int = 0) -> Series:
 
             # if we are a copy, mark as such
             copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None
-            result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__(
+            result = self._constructor_sliced(new_mgr, index=self.columns, name=self.index[i]).__finalize__(
                 self
             )
             result._set_is_copy(self, copy=copy)
@@ -4267,7 +4277,7 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
         name = self.columns[loc]
         klass = self._constructor_sliced
         # We get index=self.index bc values is a SingleDataManager
-        return klass(values, name=name, fastpath=True).__finalize__(self)
+        return klass(values, name=name, index=self.index, fastpath=True).__finalize__(self)
 
     # ----------------------------------------------------------------------
     # Lookup Caching
@@ -6942,8 +6952,12 @@ def sort_values(  # type: ignore[override]
             new_data.set_axis(
                 self._get_block_manager_axis(axis), default_index(len(indexer))
             )
-
-        result = self._constructor(new_data)
+        # FIXME: get axes without mgr.axes
+        axes_dict = {}
+        axes_dict["index"] = new_data.axes[-1]
+        if self.ndim == 2:
+            axes_dict["columns"] = new_data.axes[0]
+        result = self._constructor(new_data, **axes_dict)
         if inplace:
             return self._update_inplace(result)
         else:
@@ -7627,7 +7641,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
             # i.e. scalar, faster than checking np.ndim(right) == 0
             with np.errstate(all="ignore"):
                 bm = self._mgr.apply(array_op, right=right)
-            return self._constructor(bm)
+            return self._constructor(bm, index=self.index, columns=self.columns)
 
         elif isinstance(right, DataFrame):
             assert self.index.equals(right.index)
@@ -7648,7 +7662,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
                     right._mgr,  # type: ignore[arg-type]
                     array_op,
                 )
-            return self._constructor(bm)
+            return self._constructor(bm, index=self.index, columns=self.columns)
 
         elif isinstance(right, Series) and axis == 1:
             # axis=1 means we want to operate row-by-row
@@ -10900,7 +10914,8 @@ def _get_data() -> DataFrame:
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager.reduce
             res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
-            out = df._constructor(res).iloc[0]
+            # FIXME: get axes without mgr.axes
+            out = df._constructor(res, index=res.axes[1], columns=res.axes[0]).iloc[0]
             if out_dtype is not None:
                 out = out.astype(out_dtype)
             if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
@@ -11665,9 +11680,9 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
     _info_axis_name = "columns"
 
     index = properties.AxisProperty(
-        axis=1, doc="The index (row labels) of the DataFrame."
+        axis=0, doc="The index (row labels) of the DataFrame."
     )
-    columns = properties.AxisProperty(axis=0, doc="The column labels of the DataFrame.")
+    columns = properties.AxisProperty(axis=1, doc="The column labels of the DataFrame.")
 
     @property
     def _AXIS_NUMBERS(self) -> dict[str, int]:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index aa9845a2abb78..f92c297c77293 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -276,6 +276,13 @@ def __init__(
         object.__setattr__(self, "_attrs", attrs)
         object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))
 
+        # FIXME: get axes without data.axes
+        if self.ndim == 1:
+            object.__setattr__(self, "_index", data.axes[0])
+        else:
+            object.__setattr__(self, "_index", data.axes[1])
+            object.__setattr__(self, "_columns", data.axes[0])
+
     @classmethod
     def _init_mgr(
         cls,
@@ -820,8 +827,29 @@ def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t):
 
     def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
         labels = ensure_index(labels)
-        self._mgr.set_axis(axis, labels)
+        self._validate_set_axis(axis, labels)
         self._clear_item_cache()
+        if axis == 0:
+            object.__setattr__(self, "_index", labels)
+        else:
+            object.__setattr__(self, "_columns", labels)
+
+    @final
+    def _validate_set_axis(self, axis: int, new_labels: Index) -> None:
+        # Caller is responsible for ensuring we have an Index object.
+        old_len = self.shape[axis]
+        new_len = len(new_labels)
+
+        if axis == 1 and len(self.columns) == 0:
+            # If we are setting the index on a DataFrame with no columns,
+            #  it is OK to change the length.
+            pass
+
+        elif new_len != old_len:
+            raise ValueError(
+                f"Length mismatch: Expected axis has {old_len} elements, new "
+                f"values have {new_len} elements"
+            )
 
     @final
     def swapaxes(
@@ -1495,7 +1523,8 @@ def blk_func(values: ArrayLike):
                 return operator.neg(values)  # type: ignore[arg-type]
 
         new_data = self._mgr.apply(blk_func)
-        res = self._constructor(new_data)
+        axes_dict = self._construct_axes_dict()
+        res = self._constructor(new_data, **axes_dict)
         return res.__finalize__(self, method="__neg__")
 
     @final
@@ -1510,7 +1539,8 @@ def blk_func(values: ArrayLike):
                 return operator.pos(values)  # type: ignore[arg-type]
 
         new_data = self._mgr.apply(blk_func)
-        res = self._constructor(new_data)
+        axes_dict = self._construct_axes_dict()
+        res = self._constructor(new_data, **axes_dict)
         return res.__finalize__(self, method="__pos__")
 
     @final
@@ -1520,7 +1550,8 @@ def __invert__(self: NDFrameT) -> NDFrameT:
             return self
 
         new_data = self._mgr.apply(operator.invert)
-        return self._constructor(new_data).__finalize__(self, method="__invert__")
+        axes_dict = self._construct_axes_dict()
+        return self._constructor(new_data, **axes_dict).__finalize__(self, method="__invert__")
 
     @final
     def __nonzero__(self) -> NoReturn:
@@ -1647,7 +1678,8 @@ def abs(self: NDFrameT) -> NDFrameT:
         3    7   40  -50
         """
         res_mgr = self._mgr.apply(np.abs)
-        return self._constructor(res_mgr).__finalize__(self, name="abs")
+        axes_dict = self._construct_axes_dict()
+        return self._constructor(res_mgr, **axes_dict).__finalize__(self, name="abs")
 
     @final
     def __abs__(self: NDFrameT) -> NDFrameT:
@@ -3891,7 +3923,10 @@ def _take(
             verify=True,
             convert_indices=convert_indices,
         )
-        return self._constructor(new_data).__finalize__(self, method="take")
+        axes_dict = self._construct_axes_dict()
+        #axes_dict[axis] = self.axes[axis].take(indices)  # FIXME: get axes without mgr.axes
+        axes_dict[self._get_axis_name(axis)] = new_data.axes[self._get_block_manager_axis(axis)]
+        return self._constructor(new_data, **axes_dict).__finalize__(self, method="take")
 
     def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT:
         """
@@ -4103,8 +4138,16 @@ def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT:
         Slicing with this method is *always* positional.
         """
         assert isinstance(slobj, slice), type(slobj)
-        axis = self._get_block_manager_axis(axis)
-        result = self._constructor(self._mgr.get_slice(slobj, axis=axis))
+
+        axis_name = self._get_axis_name(axis)
+        new_idx = self.axes[axis][slobj]
+        axes_dict = self._construct_axes_dict()
+        axes_dict[axis_name] = new_idx
+
+        bm_axis = self._get_block_manager_axis(axis)
+        new_mgr = self._mgr.get_slice(slobj, axis=bm_axis)
+
+        result = self._constructor(new_mgr, **axes_dict)
         result = result.__finalize__(self)
 
         # this could be a view
@@ -4595,7 +4638,12 @@ def _drop_axis(
             allow_dups=True,
             only_slice=only_slice,
         )
-        result = self._constructor(new_mgr)
+        # FIXME: get axes without mgr.axes
+        axes_dict = {}
+        axes_dict["index"] = new_mgr.axes[-1]
+        if self.ndim == 2:
+            axes_dict["columns"] = new_mgr.axes[0]
+        result = self._constructor(new_mgr, **axes_dict)
         if self.ndim == 1:
             result.name = self.name
 
@@ -5056,7 +5104,12 @@ def sort_index(
             axis = 1 if isinstance(self, ABCDataFrame) else 0
             new_data.set_axis(axis, default_index(len(indexer)))
 
-        result = self._constructor(new_data)
+        axes_dict = {}#self._construct_axes_dict()
+        # FIXME: get axes without mgr.axes
+        axes_dict["index"] = new_data.axes[-1]
+        if self.ndim == 2:
+            axes_dict["columns"] = new_data.axes[0]
+        result = self._constructor(new_data, **axes_dict)
 
         if inplace:
             return self._update_inplace(result)
@@ -5393,7 +5446,13 @@ def _reindex_with_indexers(
         if copy and new_data is self._mgr:
             new_data = new_data.copy()
 
-        return self._constructor(new_data).__finalize__(self)
+        # FIXME: get axes without mgr.axes
+        if self.ndim == 1:
+            axes_dict = {"index": new_data.axes[0]}
+        else:
+            axes_dict = {"index": new_data.axes[1], "columns": new_data.axes[0]}
+
+        return self._constructor(new_data, **axes_dict).__finalize__(self)
 
     def filter(
         self: NDFrameT,
@@ -6018,7 +6077,8 @@ def _consolidate(self):
         """
         f = lambda: self._mgr.consolidate()
         cons_data = self._protect_consolidate(f)
-        return self._constructor(cons_data).__finalize__(self)
+        axes_dict = self._construct_axes_dict()
+        return self._constructor(cons_data, **axes_dict).__finalize__(self)
 
     @property
     def _is_mixed_type(self) -> bool_t:
@@ -6050,11 +6110,23 @@ def _check_inplace_setting(self, value) -> bool_t:
 
     @final
     def _get_numeric_data(self: NDFrameT) -> NDFrameT:
-        return self._constructor(self._mgr.get_numeric_data()).__finalize__(self)
+        # FIXME: get axes without mgr.axes
+        mgr = self._mgr.get_numeric_data()
+        axes_dict = {}
+        axes_dict["index"] = mgr.axes[-1]
+        if self.ndim == 2:
+            axes_dict["columns"] = mgr.axes[0]
+        return self._constructor(mgr, **axes_dict).__finalize__(self)
 
     @final
     def _get_bool_data(self):
-        return self._constructor(self._mgr.get_bool_data()).__finalize__(self)
+        # FIXME: get axes without mgr.axes
+        mgr = self._mgr.get_bool_data()
+        axes_dict = {}
+        axes_dict["index"] = mgr.axes[-1]
+        if self.ndim == 2:
+            axes_dict["columns"] = mgr.axes[0]
+        return self._constructor(mgr, **axes_dict).__finalize__(self)
 
     # ----------------------------------------------------------------------
     # Internal Interface Methods
@@ -6264,7 +6336,8 @@ def astype(
         else:
             # else, only a single dtype is given
             new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
-            return self._constructor(new_data).__finalize__(self, method="astype")
+            axes_dict = self._construct_axes_dict()
+            return self._constructor(new_data, **axes_dict).__finalize__(self, method="astype")
 
         # GH 33113: handle empty frame or series
         if not results:
@@ -6393,7 +6466,8 @@ def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT:
         """
         data = self._mgr.copy(deep=deep)
         self._clear_item_cache()
-        return self._constructor(data).__finalize__(self, method="copy")
+        axes_dict = self._construct_axes_dict()
+        return self._constructor(data, **axes_dict).__finalize__(self, method="copy")
 
     @final
     def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT:
@@ -6436,13 +6510,15 @@ def _convert(
         validate_bool_kwarg(datetime, "datetime")
         validate_bool_kwarg(numeric, "numeric")
         validate_bool_kwarg(timedelta, "timedelta")
+        axes_dict = self._construct_axes_dict()
         return self._constructor(
             self._mgr.convert(
                 datetime=datetime,
                 numeric=numeric,
                 timedelta=timedelta,
                 copy=True,
-            )
+            ),
+            **axes_dict
         ).__finalize__(self)
 
     @final
@@ -6954,7 +7030,8 @@ def fillna(
             else:
                 raise ValueError(f"invalid fill value with a {type(value)}")
 
-        result = self._constructor(new_data)
+        axes_dict = self._construct_axes_dict()
+        result = self._constructor(new_data, **axes_dict)
         if inplace:
             return self._update_inplace(result)
         else:
@@ -9615,7 +9692,12 @@ def _align_series(
             if copy and fdata is self._mgr:
                 fdata = fdata.copy()
 
-            left = self._constructor(fdata)
+            # FIXME: get axes without mgr.axes
+            if self.ndim == 1:
+                axes_dict = {"index": fdata.axes[0]}
+            else:
+                axes_dict = {"index": fdata.axes[1], "columns": fdata.axes[0]}
+            left = self._constructor(fdata, **axes_dict)
 
             if ridx is None:
                 right = other
@@ -9757,7 +9839,8 @@ def _where(
 
             self._check_inplace_setting(other)
             new_data = self._mgr.putmask(mask=cond, new=other, align=align)
-            result = self._constructor(new_data)
+            axes_dict = self._construct_axes_dict()
+            result = self._constructor(new_data, **axes_dict)
             return self._update_inplace(result)
 
         else:
@@ -9766,7 +9849,8 @@ def _where(
                 cond=cond,
                 align=align,
             )
-            result = self._constructor(new_data)
+            axes_dict = self._construct_axes_dict()
+            result = self._constructor(new_data, **axes_dict)
             return result.__finalize__(self)
 
     @overload
@@ -11245,8 +11329,8 @@ def block_accum_func(blk_values):
             return result
 
         result = self._mgr.apply(block_accum_func)
-
-        return self._constructor(result).__finalize__(self, method=name)
+        axes_dict = self._construct_axes_dict()
+        return self._constructor(result, **axes_dict).__finalize__(self, method=name)
 
     def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
         return self._accum_func(
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 33f3ffa34489e..800eb94d574e0 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -175,7 +175,9 @@ def _wrap_agged_manager(self, mgr: Manager) -> Series:
         else:
             mgr = cast(Manager2D, mgr)
             single = mgr.iget(0)
-        ser = self.obj._constructor(single, name=self.obj.name)
+        # FIXME: get axes without mgr.axes
+        index = single.axes[0]
+        ser = self.obj._constructor(single, index=index, name=self.obj.name)
         # NB: caller is responsible for setting ser.index
         return ser
 
@@ -1654,14 +1656,16 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
             rows = mgr.shape[1] if mgr.shape[0] > 0 else 0
             index = Index(range(rows))
             mgr.set_axis(1, index)
-            result = self.obj._constructor(mgr)
+            # FIXME: get axes without mgr.axes
+            result = self.obj._constructor(mgr, index=mgr.axes[1], columns=mgr.axes[0])
 
             self._insert_inaxis_grouper_inplace(result)
             result = result._consolidate()
         else:
             index = self.grouper.result_index
             mgr.set_axis(1, index)
-            result = self.obj._constructor(mgr)
+            # FIXME: get axes without mgr.axes
+            result = self.obj._constructor(mgr, index=mgr.axes[1], columns=mgr.axes[0])
 
         if self.axis == 1:
             result = result.T
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b963b85b93a31..63fce060ab887 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3864,7 +3864,8 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         if is_ser:
             out = self._wrap_agged_manager(res_mgr)
         else:
-            out = obj._constructor(res_mgr)
+            # FIXME: get axes without mgr.axes
+            out = obj._constructor(res_mgr, index=res_mgr.axes[1], columns=res_mgr.axes[0])
 
         return self._wrap_aggregated_output(out)
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ba808e1f2e07f..46440f59adcfc 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -1348,7 +1348,8 @@ class SeriesSplitter(DataSplitter):
     def _chop(self, sdata: Series, slice_obj: slice) -> Series:
         # fastpath equivalent to `sdata.iloc[slice_obj]`
         mgr = sdata._mgr.get_slice(slice_obj)
-        ser = sdata._constructor(mgr, name=sdata.name, fastpath=True)
+        index = sdata.index[slice_obj]
+        ser = sdata._constructor(mgr, index=index, name=sdata.name, fastpath=True)
         return ser.__finalize__(sdata, method="groupby")
 
 
@@ -1360,7 +1361,13 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
         # else:
         #     return sdata.iloc[:, slice_obj]
         mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
-        df = sdata._constructor(mgr)
+        if self.axis == 0:
+            index = sdata.index[slice_obj]
+            columns = sdata.columns
+        else:
+            index = sdata.index
+            columns = sdata.columns[slice_obj]
+        df = sdata._constructor(mgr, index=index, columns=columns)
         return df.__finalize__(sdata, method="groupby")
 
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 53f8486074ef9..0fda74149fdf4 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -1180,6 +1180,9 @@ def as_array(
 
         return result
 
+    def __len__(self) -> int:
+        return len(self.arrays)
+
 
 class SingleArrayManager(BaseArrayManager, SingleDataManager):
 
diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py
index ddc4495318568..c695f2c1e6ff1 100644
--- a/pandas/core/internals/base.py
+++ b/pandas/core/internals/base.py
@@ -45,7 +45,7 @@ def items(self) -> Index:
 
     @final
     def __len__(self) -> int:
-        return len(self.items)
+        raise AbstractMethodError(self)
 
     @property
     def ndim(self) -> int:
@@ -160,6 +160,9 @@ class SingleDataManager(DataManager):
     def ndim(self) -> Literal[1]:
         return 1
 
+    def __len__(self) -> int:
+        return len(self.arrays[0])
+
     @final
     @property
     def array(self) -> ArrayLike:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 6aad8dbd940d4..708adc6201fe1 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -107,7 +107,7 @@ def arrays_to_mgr(
     verify_integrity: bool = True,
     typ: str | None = None,
     consolidate: bool = True,
-) -> Manager:
+) -> tuple[Manager, Index, Index]:
     """
     Segregate Series based on type and coerce into matrices.
 
@@ -152,13 +152,14 @@ def arrays_to_mgr(
     axes = [columns, index]
 
     if typ == "block":
-        return create_block_manager_from_column_arrays(
+        mgr = create_block_manager_from_column_arrays(
             arrays, axes, consolidate=consolidate
         )
     elif typ == "array":
-        return ArrayManager(arrays, [index, columns])
+        mgr = ArrayManager(arrays, [index, columns])
     else:
         raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
+    return mgr, index, columns
 
 
 def rec_array_to_mgr(
@@ -204,7 +205,7 @@ def rec_array_to_mgr(
     if columns is None:
         columns = arr_columns
 
-    mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ)
+    mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ)[0]
 
     if copy:
         mgr = mgr.copy()
@@ -242,7 +243,7 @@ def mgr_to_mgr(mgr, typ: str, copy: bool = True):
             new_mgr = mgr
         else:
             if mgr.ndim == 2:
-                new_mgr = arrays_to_mgr(
+                new_mgr, _, _ = arrays_to_mgr(
                     mgr.arrays, mgr.axes[0], mgr.axes[1], typ="block"
                 )
             else:
@@ -314,7 +315,7 @@ def ndarray_to_mgr(
         else:
             columns = ensure_index(columns)
 
-        return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)
+        return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)[0]
 
     elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
         # i.e. Datetime64TZ, PeriodDtype
@@ -491,7 +492,7 @@ def dict_to_mgr(
             # dtype check to exclude e.g. range objects, scalars
             arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]
 
-    return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
+    return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)[0]
 
 
 def nested_data_to_arrays(
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 61037a46f4f92..f1fd5aed7dcf4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1856,6 +1856,10 @@ def _consolidate_inplace(self) -> None:
             self._known_consolidated = True
             self._rebuild_blknos_and_blklocs()
 
+    def __len__(self) -> int:
+        # TODO: cache? would need to invalidate akin to blklocs
+        return sum(x.shape[1] for x in self.blocks)
+
 
 class SingleBlockManager(BaseBlockManager, SingleDataManager):
     """manage a single block with"""
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 3d9e4f0c69c62..ac60aaf591c27 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -621,7 +621,7 @@ def get_result(self):
                 new_data._consolidate_inplace()
 
             cons = sample._constructor
-            return cons(new_data).__finalize__(self, method="concat")
+            return cons(new_data, index=self.new_axes[1], columns=self.new_axes[0]).__finalize__(self, method="concat")
 
     def _get_result_dim(self) -> int:
         if self._is_series and self.bm_axis == 1:
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 524b26ff07769..3aae45e19bddf 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -746,7 +746,7 @@ def _reindex_and_concat(
                 allow_dups=True,
                 use_na_proxy=True,
             )
-            left = left._constructor(lmgr)
+            left = left._constructor(lmgr, index=join_index, columns=left.columns)
         left.index = join_index
 
         if right_indexer is not None:
@@ -759,7 +759,7 @@ def _reindex_and_concat(
                 allow_dups=True,
                 use_na_proxy=True,
             )
-            right = right._constructor(rmgr)
+            right = right._constructor(rmgr, index=join_index, columns=right.columns)
         right.index = join_index
 
         from pandas import concat
diff --git a/pandas/core/series.py b/pandas/core/series.py
index fc97a8f04e0cc..022004fa469c6 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -318,7 +318,7 @@ class Series(base.IndexOpsMixin, NDFrame):
 
     _name: Hashable
     _metadata: list[str] = ["name"]
-    _internal_names_set = {"index"} | NDFrame._internal_names_set
+    _internal_names_set = {"_index", "index"} | NDFrame._internal_names_set
     _accessors = {"dt", "cat", "str", "sparse"}
     _hidden_attrs = (
         base.IndexOpsMixin._hidden_attrs
@@ -351,6 +351,12 @@ def __init__(
         fastpath: bool = False,
     ) -> None:
 
+        if isinstance(data, (SingleBlockManager, SingleArrayManager)):
+            if index is None:
+                assert False
+            if not index.equals(data.axes[0]):#index is not data.axes[0]:
+                assert False
+
         if (
             isinstance(data, (SingleBlockManager, SingleArrayManager))
             and index is None
@@ -592,7 +598,8 @@ def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
                     pass
 
         # The ensure_index call above ensures we have an Index object
-        self._mgr.set_axis(axis, labels)
+        self._validate_set_axis(0, labels)
+        object.__setattr__(self, "_index", labels)
 
     # ndarray compatibility
     @property
@@ -1071,7 +1078,8 @@ def _get_values_tuple(self, key: tuple):
 
     def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
         new_mgr = self._mgr.getitem_mgr(indexer)
-        return self._constructor(new_mgr).__finalize__(self)
+        new_index = self.index[indexer]
+        return self._constructor(new_mgr, index=new_index).__finalize__(self)
 
     def _get_value(self, label, takeable: bool = False):
         """
@@ -1946,7 +1954,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame:
             columns = Index([name])
 
         mgr = self._mgr.to_2d_mgr(columns)
-        df = self._constructor_expanddim(mgr)
+        df = self._constructor_expanddim(mgr, index=self.index, columns=columns)
         return df.__finalize__(self, method="to_frame")
 
     def _set_name(self, name, inplace=False) -> Series:

From 87eafb4c783ffcd5ba9f148f182c725586384834 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Aug 2022 14:35:56 -0700
Subject: [PATCH 2/4] fix some tests

---
 pandas/core/apply.py           |  3 +-
 pandas/core/arraylike.py       |  6 +---
 pandas/core/frame.py           | 63 ++++++++++++++++++++++------------
 pandas/core/generic.py         | 58 +++++++++++++++----------------
 pandas/core/groupby/generic.py |  9 ++---
 pandas/core/groupby/groupby.py |  3 +-
 pandas/core/indexing.py        | 14 ++++++++
 pandas/core/series.py          |  7 ++--
 8 files changed, 98 insertions(+), 65 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 48822d9d01ddb..372d932629259 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -726,7 +726,8 @@ def apply(self) -> DataFrame | Series:
             with np.errstate(all="ignore"):
                 results = self.obj._mgr.apply("apply", func=self.f)
             # _constructor will retain self.index and self.columns
-            return self.obj._constructor(data=results)
+            axes_dict = self.obj._construct_axes_dict()
+            return self.obj._constructor(data=results, **axes_dict)
 
         # broadcasting
         if self.result_type == "broadcast":
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index 996bfd776f981..871eb187802f0 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -359,11 +359,7 @@ def _reconstruct(result):
         if isinstance(result, BlockManager):
             # we went through BlockManager.apply e.g. np.sqrt
             # TODO: any cases that aren't index/columns-preserving?
-            if self.ndim == 1:
-                reconstruct_kwargs["index"] = self.index
-            else:
-                reconstruct_kwargs["index"] = self.index
-                reconstruct_kwargs["columns"] = self.columns
+            reconstruct_kwargs.update(self._construct_axes_dict())
             result = self._constructor(result, **reconstruct_kwargs, copy=False)
         else:
             # we converted an array, lost our axes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d8074ca18a8eb..b3c45930ae71f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -621,8 +621,9 @@ def __init__(
             dtype = self._validate_dtype(dtype)
 
         if isinstance(data, DataFrame):
-            if index is None and columns is None:
+            if index is None:
                 index = data.index
+            if columns is None:
                 columns = data.columns
             data = data._mgr
 
@@ -631,10 +632,14 @@ def __init__(
             # -> use fastpath (without checking Manager type)
             if index is None or columns is None:
                 assert False
-            if not index.equals(data.axes[-1]):#index is not data.axes[-1]:
-                assert False
-            if not columns.equals(data.axes[0]):#columns is not data.axes[0]:
-                assert False
+            if data.axes[0] is not columns or data.axes[1] is not index:
+                # FIXME: without this check, json tests segfault...
+                #  nope, segfaults even with this check
+                data.axes = [ensure_index(columns), ensure_index(index)]
+            #if not index.equals(data.axes[-1]):#index is not data.axes[-1]:
+            #    assert False
+            #if not columns.equals(data.axes[0]):#columns is not data.axes[0]:
+            #    assert False
             if index is None and columns is None and dtype is None and not copy:
                 # GH#33357 fastpath
                 NDFrame.__init__(self, data)
@@ -2410,7 +2415,6 @@ def maybe_reorder(
         manager = get_option("mode.data_manager")
         mgr, index, columns = arrays_to_mgr(arrays, columns, result_index, typ=manager)
 
-        # FIXME: get axes without mgr.axes
         return cls(mgr, index=index, columns=columns)
 
     def to_records(
@@ -4164,6 +4168,7 @@ def _set_item_mgr(self, key, value: ArrayLike) -> None:
         except KeyError:
             # This item wasn't present, just insert at end
             self._mgr.insert(len(self._info_axis), key, value)
+            self._columns = self.columns.insert(len(self._info_axis), key)
         else:
             self._iset_item_mgr(loc, value)
 
@@ -4765,7 +4770,9 @@ def predicate(arr: ArrayLike) -> bool:
             return True
 
         mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
-        return type(self)(mgr).__finalize__(self)
+        # FIXME: get axes without mgr.axes
+        assert mgr.axes[1] is self.index  # WTF why does passing columns/index cause segfault?
+        return type(self)(mgr, columns=mgr.axes[0], index=mgr.axes[1]).__finalize__(self)
 
     def insert(
         self,
@@ -5865,7 +5872,7 @@ def shift(
                     fill_value=fill_value,
                     allow_dups=True,
                 )
-                res_df = self._constructor(mgr)
+                res_df = self._constructor(mgr, columns=self.columns, index=self.index)
                 return res_df.__finalize__(self, method="shift")
 
         return super().shift(
@@ -6392,7 +6399,8 @@ class    max    type
 
     @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
     def isna(self) -> DataFrame:
-        result = self._constructor(self._mgr.isna(func=isna))
+        axes_dict = self._construct_axes_dict()
+        result = self._constructor(self._mgr.isna(func=isna), **axes_dict)
         return result.__finalize__(self, method="isna")
 
     @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
@@ -6944,19 +6952,26 @@ def sort_values(  # type: ignore[override]
         else:
             return self.copy()
 
+        bm_axis = self._get_block_manager_axis(axis)
+
         new_data = self._mgr.take(
-            indexer, axis=self._get_block_manager_axis(axis), verify=False
+            indexer, axis=bm_axis, verify=False
         )
 
-        if ignore_index:
-            new_data.set_axis(
-                self._get_block_manager_axis(axis), default_index(len(indexer))
-            )
-        # FIXME: get axes without mgr.axes
+        axis_name = self._get_axis_name(axis)
+
         axes_dict = {}
-        axes_dict["index"] = new_data.axes[-1]
-        if self.ndim == 2:
-            axes_dict["columns"] = new_data.axes[0]
+        axes_dict[axis_name] = self.axes[axis].take(indexer)
+        if axis == 0:
+            axes_dict["columns"] = self.columns
+        else:
+            axes_dict["index"] = self.index
+
+        if ignore_index:
+            rng = default_index(len(indexer))
+            new_data.set_axis(bm_axis, rng)
+            axes_dict[axis_name] = rng
+
         result = self._constructor(new_data, **axes_dict)
         if inplace:
             return self._update_inplace(result)
@@ -10913,9 +10928,12 @@ def _get_data() -> DataFrame:
 
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager.reduce
-            res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
-            # FIXME: get axes without mgr.axes
-            out = df._constructor(res, index=res.axes[1], columns=res.axes[0]).iloc[0]
+            res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
+            index = Index([None], dtype=object)
+            assert index.equals(res.axes[1])
+            columns = self.columns.take(indexer)
+            assert columns.equals(res.axes[0])
+            out = df._constructor(res, index=index, columns=columns).iloc[0]
             if out_dtype is not None:
                 out = out.astype(out_dtype)
             if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
@@ -11413,7 +11431,8 @@ def quantile(
             res = data._mgr.take(indexer[q_idx], verify=False)
             res.axes[1] = q
 
-        result = self._constructor(res)
+        # FIXME: get axes without mgr.axes
+        result = self._constructor(res, columns=res.axes[0], index=res.axes[1])
         return result.__finalize__(self, method="quantile")
 
     @doc(NDFrame.asfreq, **_shared_doc_kwargs)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f92c297c77293..d84013e35d2d8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3924,7 +3924,7 @@ def _take(
             convert_indices=convert_indices,
         )
         axes_dict = self._construct_axes_dict()
-        #axes_dict[axis] = self.axes[axis].take(indices)  # FIXME: get axes without mgr.axes
+        # FIXME: get axes without mgr.axes
         axes_dict[self._get_axis_name(axis)] = new_data.axes[self._get_block_manager_axis(axis)]
         return self._constructor(new_data, **axes_dict).__finalize__(self, method="take")
 
@@ -4113,7 +4113,7 @@ class   animal   locomotion
             new_mgr = self._mgr.fast_xs(loc)
 
             result = self._constructor_sliced(
-                new_mgr, name=self.index[loc]
+                new_mgr, name=self.index[loc], index=self.columns
             ).__finalize__(self)
         elif is_scalar(loc):
             result = self.iloc[:, slice(loc, loc + 1)]
@@ -4156,6 +4156,14 @@ def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT:
         result._set_is_copy(self, copy=is_copy)
         return result
 
+    @staticmethod
+    def _get_axes_from_mgr(mgr):
+        axes_dict = {}
+        axes_dict["index"] = mgr.axes[-1]
+        if mgr.ndim == 2:
+            axes_dict["columns"] = mgr.axes[0]
+        return axes_dict
+
     @final
     def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None:
         if not copy:
@@ -4291,6 +4299,11 @@ def __delitem__(self, key) -> None:
             # exception:
             loc = self.axes[-1].get_loc(key)
             self._mgr = self._mgr.idelete(loc)
+            # FIXME: get axes without mgr.axes
+            if self.ndim == 1:
+                self._index = self._mgr.axes[0]
+            else:
+                self._columns = self._mgr.axes[0]
 
         # delete from the caches
         try:
@@ -4639,10 +4652,7 @@ def _drop_axis(
             only_slice=only_slice,
         )
         # FIXME: get axes without mgr.axes
-        axes_dict = {}
-        axes_dict["index"] = new_mgr.axes[-1]
-        if self.ndim == 2:
-            axes_dict["columns"] = new_mgr.axes[0]
+        axes_dict = self._get_axes_from_mgr(new_mgr)
         result = self._constructor(new_mgr, **axes_dict)
         if self.ndim == 1:
             result.name = self.name
@@ -5104,11 +5114,8 @@ def sort_index(
             axis = 1 if isinstance(self, ABCDataFrame) else 0
             new_data.set_axis(axis, default_index(len(indexer)))
 
-        axes_dict = {}#self._construct_axes_dict()
         # FIXME: get axes without mgr.axes
-        axes_dict["index"] = new_data.axes[-1]
-        if self.ndim == 2:
-            axes_dict["columns"] = new_data.axes[0]
+        axes_dict = self._get_axes_from_mgr(new_data)
         result = self._constructor(new_data, **axes_dict)
 
         if inplace:
@@ -5447,10 +5454,7 @@ def _reindex_with_indexers(
             new_data = new_data.copy()
 
         # FIXME: get axes without mgr.axes
-        if self.ndim == 1:
-            axes_dict = {"index": new_data.axes[0]}
-        else:
-            axes_dict = {"index": new_data.axes[1], "columns": new_data.axes[0]}
+        axes_dict = self._get_axes_from_mgr(new_data)
 
         return self._constructor(new_data, **axes_dict).__finalize__(self)
 
@@ -6110,22 +6114,16 @@ def _check_inplace_setting(self, value) -> bool_t:
 
     @final
     def _get_numeric_data(self: NDFrameT) -> NDFrameT:
-        # FIXME: get axes without mgr.axes
         mgr = self._mgr.get_numeric_data()
-        axes_dict = {}
-        axes_dict["index"] = mgr.axes[-1]
-        if self.ndim == 2:
-            axes_dict["columns"] = mgr.axes[0]
+        # FIXME: get axes without mgr.axes
+        axes_dict = self._get_axes_from_mgr(mgr)
         return self._constructor(mgr, **axes_dict).__finalize__(self)
 
     @final
     def _get_bool_data(self):
-        # FIXME: get axes without mgr.axes
         mgr = self._mgr.get_bool_data()
-        axes_dict = {}
-        axes_dict["index"] = mgr.axes[-1]
-        if self.ndim == 2:
-            axes_dict["columns"] = mgr.axes[0]
+        # FIXME: get axes without mgr.axes
+        axes_dict = self._get_axes_from_mgr(mgr)
         return self._constructor(mgr, **axes_dict).__finalize__(self)
 
     # ----------------------------------------------------------------------
@@ -6563,8 +6561,10 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
         # numeric=False necessary to only soft convert;
         # python objects will still be converted to
         # native numpy numeric types
+        axes_dict = self._construct_axes_dict()
         return self._constructor(
-            self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True)
+            self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True),
+            **axes_dict,
         ).__finalize__(self, method="infer_objects")
 
     @final
@@ -9693,10 +9693,7 @@ def _align_series(
                 fdata = fdata.copy()
 
             # FIXME: get axes without mgr.axes
-            if self.ndim == 1:
-                axes_dict = {"index": fdata.axes[0]}
-            else:
-                axes_dict = {"index": fdata.axes[1], "columns": fdata.axes[0]}
+            axes_dict = self._get_axes_from_mgr(fdata)
             left = self._constructor(fdata, **axes_dict)
 
             if ridx is None:
@@ -10282,7 +10279,8 @@ def shift(
             new_data = self._mgr.shift(
                 periods=periods, axis=axis, fill_value=fill_value
             )
-            return self._constructor(new_data).__finalize__(self, method="shift")
+            axes_dict = self._construct_axes_dict()
+            return self._constructor(new_data, **axes_dict).__finalize__(self, method="shift")
 
         # when freq is given, index is shifted, data is not
         index = self._get_axis(axis)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 800eb94d574e0..6664a91eda093 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1160,7 +1160,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
         for i, (item, sgb) in enumerate(self._iterate_column_groupbys(obj)):
             result[i] = sgb.aggregate(func, *args, **kwargs)
 
-        res_df = self.obj._constructor(result)
+        res_df = self.obj._constructor(result, columns=obj.columns)
         res_df.columns = obj.columns
         return res_df
 
@@ -1335,7 +1335,8 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
         if len(res_mgr) < orig_mgr_len:
             warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
 
-        res_df = self.obj._constructor(res_mgr)
+        # FIXME: get axes without mgr.axes
+        res_df = self.obj._constructor(res_mgr, index=res_mgr.axes[1], columns=res_mgr.axes[0])
         if self.axis == 1:
             res_df = res_df.T
         return res_df
@@ -1657,7 +1658,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
             index = Index(range(rows))
             mgr.set_axis(1, index)
             # FIXME: get axes without mgr.axes
-            result = self.obj._constructor(mgr, index=mgr.axes[1], columns=mgr.axes[0])
+            result = self.obj._constructor(mgr, index=index, columns=mgr.axes[0])
 
             self._insert_inaxis_grouper_inplace(result)
             result = result._consolidate()
@@ -1665,7 +1666,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
             index = self.grouper.result_index
             mgr.set_axis(1, index)
             # FIXME: get axes without mgr.axes
-            result = self.obj._constructor(mgr, index=mgr.axes[1], columns=mgr.axes[0])
+            result = self.obj._constructor(mgr, index=index, columns=mgr.axes[0])
 
         if self.axis == 1:
             result = result.T
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 63fce060ab887..4765f0e6bd8c2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2906,7 +2906,8 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         mgr = obj._mgr
         res_mgr = mgr.apply(blk_func)
 
-        new_obj = obj._constructor(res_mgr)
+        axes_dict = obj._construct_axes_dict()
+        new_obj = obj._constructor(res_mgr, **axes_dict)
         if isinstance(new_obj, Series):
             new_obj.name = obj.name
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index d415cbd035cd1..ed7307debbd19 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -803,6 +803,9 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
             keys = self.obj.columns.union(key, sort=False)
 
             self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
+            assert self.obj._mgr.axes[0].equals(keys)
+            self.obj._columns = Index(keys)
+            
 
     @final
     def __setitem__(self, key, value) -> None:
@@ -1765,8 +1768,13 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                         reindexers, allow_dups=True
                     )
                     self.obj._mgr = new_obj._mgr
+                    self.obj._index = self.obj._mgr.axes[-1]
+                    if self.ndim == 2:
+                        # FIXME: get axes without mgr.axes
+                        self.obj._columns = self.obj._mgr.axes[0]
                     self.obj._maybe_update_cacher(clear=True)
                     self.obj._is_copy = None
+                    
 
                     nindexer.append(labels.get_loc(key))
 
@@ -1988,6 +1996,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
             #  falling back to casting if necessary)
             self.obj._mgr.column_setitem(loc, plane_indexer, value)
             self.obj._clear_item_cache()
+            
             return
 
         # We will not operate in-place, but will attempt to in the future.
@@ -2078,6 +2087,7 @@ def _setitem_single_block(self, indexer, value, name: str) -> None:
         # actually do the set
         self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
         self.obj._maybe_update_cacher(clear=True, inplace=True)
+        
 
     def _setitem_with_indexer_missing(self, indexer, value):
         """
@@ -2129,8 +2139,10 @@ def _setitem_with_indexer_missing(self, indexer, value):
             self.obj._mgr = self.obj._constructor(
                 new_values, index=new_index, name=self.obj.name
             )._mgr
+            self.obj._index = new_index
             self.obj._maybe_update_cacher(clear=True)
 
+
         elif self.ndim == 2:
 
             if not len(self.obj.columns):
@@ -2172,8 +2184,10 @@ def _setitem_with_indexer_missing(self, indexer, value):
                     #  dtype.  But if we had a list or dict, then do inference
                     df = df.infer_objects()
                 self.obj._mgr = df._mgr
+                
             else:
                 self.obj._mgr = self.obj._append(value)._mgr
+                
             self.obj._maybe_update_cacher(clear=True)
 
     def _ensure_iterable_column_indexer(self, column_indexer):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 022004fa469c6..f1ed825dd4f2c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -354,8 +354,11 @@ def __init__(
         if isinstance(data, (SingleBlockManager, SingleArrayManager)):
             if index is None:
                 assert False
-            if not index.equals(data.axes[0]):#index is not data.axes[0]:
-                assert False
+            if data.axes[0] is not index:
+                # Adding check to try to avoid segfualt in json tests
+                data.axes = [ensure_index(index)]
+            #if not index.equals(data.axes[0]):#index is not data.axes[0]:
+            #    assert False
 
         if (
             isinstance(data, (SingleBlockManager, SingleArrayManager))

From 7f73a89570f9775546f6376e5ff5d31f4933a7c2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 19 Aug 2022 10:35:20 -0700
Subject: [PATCH 3/4] down to about 100 failing tests

---
 pandas/core/apply.py                        |  4 +-
 pandas/core/frame.py                        | 48 ++++++++---
 pandas/core/generic.py                      | 95 +++++++++++++++------
 pandas/core/groupby/generic.py              |  2 +-
 pandas/core/groupby/groupby.py              | 13 ++-
 pandas/core/indexes/base.py                 |  2 +
 pandas/core/indexing.py                     | 20 +++--
 pandas/core/internals/array_manager.py      |  1 +
 pandas/core/internals/base.py               |  4 +-
 pandas/core/internals/managers.py           | 39 +++++----
 pandas/core/resample.py                     |  2 +-
 pandas/core/reshape/reshape.py              |  6 +-
 pandas/core/series.py                       |  1 +
 pandas/tests/internals/test_internals.py    | 10 +--
 pandas/tests/series/methods/test_reindex.py |  1 +
 15 files changed, 169 insertions(+), 79 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 372d932629259..3faac6858e7af 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1002,6 +1002,7 @@ def series_generator(self):
         # We create one Series object, and will swap out the data inside
         #  of it.  Kids: don't do this at home.
         ser = self.obj._ixs(0, axis=0)
+        index = ser.index
         mgr = ser._mgr
 
         if is_extension_array_dtype(ser.dtype):
@@ -1013,9 +1014,10 @@ def series_generator(self):
 
         else:
             for (arr, name) in zip(values, self.index):
-                # GH#35462 re-pin mgr in case setitem changed it
+                # GH#35462 re-pin mgr, index in case setitem changed it
                 ser._mgr = mgr
                 mgr.set_values(arr)
+                ser._index = index
                 object.__setattr__(ser, "_name", name)
                 yield ser
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b3c45930ae71f..76b01b069d2dd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4272,6 +4272,7 @@ def _ensure_valid_index(self, value) -> None:
                 index_copy.name = self.index.name
 
             self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan)
+            self._index = index_copy
 
     def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
         """
@@ -4501,6 +4502,8 @@ def query(self, expr: str, inplace: bool = False, **kwargs) -> DataFrame | None:
 
         if inplace:
             self._update_inplace(result)
+            self._index = result._index
+            self._columns = result._columns
             return None
         else:
             return result
@@ -4757,8 +4760,7 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
                 and not is_bool_dtype(dtype)
             )
 
-        def predicate(arr: ArrayLike) -> bool:
-            dtype = arr.dtype
+        def predicate(dtype: DtypeObj) -> bool:
             if include:
                 if not dtype_predicate(dtype, include):
                     return False
@@ -4769,10 +4771,16 @@ def predicate(arr: ArrayLike) -> bool:
 
             return True
 
-        mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
+        def arr_predicate(arr: ArrayLike) -> bool:
+            dtype = arr.dtype
+            return predicate(dtype)
+
+        mgr, taker = self._mgr._get_data_subset(arr_predicate).copy(deep=None)
         # FIXME: get axes without mgr.axes
-        assert mgr.axes[1] is self.index  # WTF why does passing columns/index cause segfault?
-        return type(self)(mgr, columns=mgr.axes[0], index=mgr.axes[1]).__finalize__(self)
+        # FIXME: return taker from _get_data_subset, this is really slow
+        #taker = self.dtypes.apply(predicate).values.nonzero()[0]
+        columns = self.columns.take(taker)
+        return type(self)(mgr, columns=columns, index=self.index).__finalize__(self)
 
     def insert(
         self,
@@ -4841,6 +4849,7 @@ def insert(
 
         value = self._sanitize_column(value)
         self._mgr.insert(loc, column, value)
+        self._columns = self.columns.insert(loc, column)
 
     def assign(self, **kwargs) -> DataFrame:
         r"""
@@ -6605,6 +6614,8 @@ def dropna(
         if not inplace:
             return result
         self._update_inplace(result)
+        self._columns = result._columns
+        self._index = result._index
         return None
 
     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"])
@@ -6703,6 +6714,8 @@ def drop_duplicates(
 
         if inplace:
             self._update_inplace(result)
+            self._index = result._index
+            self._columns = result._columns
             return None
         else:
             return result
@@ -9268,7 +9281,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
             axis = 0
 
         new_data = self._mgr.diff(n=periods, axis=axis)
-        return self._constructor(new_data).__finalize__(self, "diff")
+        return self._constructor(new_data, index=self.index, columns=self.columns).__finalize__(self, "diff")
 
     # ----------------------------------------------------------------------
     # Function application
@@ -10879,8 +10892,9 @@ def _reduce(
                 #  cols = self.columns[~dt64_cols]
                 #  self = self[cols]
                 predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
-                mgr = self._mgr._get_data_subset(predicate)
-                self = type(self)(mgr)
+                mgr, taker = self._mgr._get_data_subset(predicate)
+                columns = self.columns[taker]
+                self = type(self)(mgr, index=self.index, columns=columns)
 
         # TODO: Make other agg func handle axis=None properly GH#21597
         axis = self._get_axis_number(axis)
@@ -10928,11 +10942,20 @@ def _get_data() -> DataFrame:
 
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager.reduce
-            res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
+            res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
             index = Index([None], dtype=object)
             assert index.equals(res.axes[1])
-            columns = self.columns.take(indexer)
-            assert columns.equals(res.axes[0])
+            if ignore_failures:
+                if len(res.items) == len(df.columns):
+                    # i.e. nothing was dropped
+                    columns = df.columns
+                else:
+                    # FIXME: get axes without mgr.axes; THIS IS WRONG TOO
+                    columns = res.axes[0]
+            else:
+                columns = df.columns
+                assert columns.equals(res.axes[0])
+
             out = df._constructor(res, index=index, columns=columns).iloc[0]
             if out_dtype is not None:
                 out = out.astype(out_dtype)
@@ -11736,8 +11759,9 @@ def _to_dict_of_blocks(self, copy: bool = True):
         # convert to BlockManager if needed -> this way support ArrayManager as well
         mgr = mgr_to_mgr(mgr, "block")
         mgr = cast(BlockManager, mgr)
+        # FIXME: get axes without mgr.axes
         return {
-            k: self._constructor(v).__finalize__(self)
+            k: self._constructor(v, index=self.index, columns=v.axes[0]).__finalize__(self)
             for k, v, in mgr.to_dict(copy=copy).items()
         }
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d84013e35d2d8..a4b65b4e91149 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -32,7 +32,7 @@
 import numpy as np
 
 from pandas._config import config
-
+from pandas.core.indexers import maybe_convert_indices
 from pandas._libs import lib
 from pandas._libs.tslibs import (
     Period,
@@ -1503,6 +1503,10 @@ def equals(self, other: object) -> bool_t:
         if not (isinstance(other, type(self)) or isinstance(self, type(other))):
             return False
         other = cast(NDFrame, other)
+        if self.ndim != other.ndim:
+            return False
+        if not all(left.equals(right) for left, right in zip(self.axes, other.axes)):
+            return False
         return self._mgr.equals(other._mgr)
 
     # -------------------------------------------------------------------------
@@ -2150,6 +2154,11 @@ def __array_ufunc__(
     @final
     def __getstate__(self) -> dict[str, Any]:
         meta = {k: getattr(self, k, None) for k in self._metadata}
+
+        # TODO: handle unpickling older pickles where index/columns are in mgr
+        meta["_index"] = self.index
+        if self.ndim == 2:
+            meta["_columns"] = self.columns
         return {
             "_mgr": self._mgr,
             "_typ": self._typ,
@@ -3923,9 +3932,13 @@ def _take(
             verify=True,
             convert_indices=convert_indices,
         )
+
         axes_dict = self._construct_axes_dict()
-        # FIXME: get axes without mgr.axes
-        axes_dict[self._get_axis_name(axis)] = new_data.axes[self._get_block_manager_axis(axis)]
+        if convert_indices and isinstance(indices, np.ndarray):
+            # i.e. exclude slice, which in principle shouldn't be in a _take
+            indices = maybe_convert_indices(indices, len(self.axes[axis]), verify=True)
+        axes_dict[self._get_axis_name(axis)] = self.axes[axis].take(indices)#[indices]
+
         return self._constructor(new_data, **axes_dict).__finalize__(self, method="take")
 
     def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT:
@@ -4297,13 +4310,19 @@ def __delitem__(self, key) -> None:
             # If the above loop ran and didn't delete anything because
             # there was no match, this call should raise the appropriate
             # exception:
+
+            # make sure we access self.shape before calling mgr.idelete
+            is_deleted = np.zeros(self.shape[-1], dtype=np.bool_)
+
             loc = self.axes[-1].get_loc(key)
             self._mgr = self._mgr.idelete(loc)
-            # FIXME: get axes without mgr.axes
+
+            is_deleted[loc] = True
+            new_items = self.axes[-1][~is_deleted]
             if self.ndim == 1:
-                self._index = self._mgr.axes[0]
+                self._index = new_items
             else:
-                self._columns = self._mgr.axes[0]
+                self._columns = new_items
 
         # delete from the caches
         try:
@@ -4568,6 +4587,9 @@ def drop(
 
         if inplace:
             self._update_inplace(obj)
+            self._index = obj._index
+            if self.ndim > 1:
+                self._columns = obj._columns
         else:
             return obj
 
@@ -4643,6 +4665,8 @@ def _drop_axis(
             indexer = mask.nonzero()[0]
             new_axis = axis.take(indexer)
 
+        axes_dict = self._construct_axes_dict()
+        axes_dict[self._get_axis_name(axis_num)] = new_axis
         bm_axis = self.ndim - axis_num - 1
         new_mgr = self._mgr.reindex_indexer(
             new_axis,
@@ -4651,13 +4675,12 @@ def _drop_axis(
             allow_dups=True,
             only_slice=only_slice,
         )
-        # FIXME: get axes without mgr.axes
-        axes_dict = self._get_axes_from_mgr(new_mgr)
         result = self._constructor(new_mgr, **axes_dict)
         if self.ndim == 1:
             result.name = self.name
 
-        return result.__finalize__(self)
+        out =  result.__finalize__(self)
+        return out
 
     @final
     def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:
@@ -4675,6 +4698,9 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:
         self._reset_cache()
         self._clear_item_cache()
         self._mgr = result._mgr
+        self._index = result._index
+        if self.ndim == 2:
+            self._columns = result._columns
         self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True)
 
     @final
@@ -5083,6 +5109,7 @@ def sort_index(
 
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)
+        orig_axis = axis
         ascending = validate_ascending(ascending)
 
         target = self._get_axis(axis)
@@ -5107,19 +5134,25 @@ def sort_index(
         baxis = self._get_block_manager_axis(axis)
         new_data = self._mgr.take(indexer, axis=baxis, verify=False)
 
+        axis_name = self._get_axis_name(axis)
+        axes_dict = self._construct_axes_dict()
+        axes_dict[axis_name] = self.axes[axis].take(indexer)._sort_levels_monotonic()
+
         # reconstruct axis if needed
-        new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic())
+        new_data.set_axis(baxis, axes_dict[axis_name])
 
         if ignore_index:
             axis = 1 if isinstance(self, ABCDataFrame) else 0
-            new_data.set_axis(axis, default_index(len(indexer)))
+            rng = default_index(len(indexer))
+            new_data.set_axis(axis, rng)
+
+            name = "columns" if orig_axis == 1 else "index"
+            axes_dict[name] = rng
 
-        # FIXME: get axes without mgr.axes
-        axes_dict = self._get_axes_from_mgr(new_data)
         result = self._constructor(new_data, **axes_dict)
 
         if inplace:
-            return self._update_inplace(result)
+            self._update_inplace(result)
         else:
             return result.__finalize__(self, method="sort_index")
 
@@ -5426,6 +5459,9 @@ def _reindex_with_indexers(
     ) -> NDFrameT:
         """allow_dups indicates an internal call here"""
         # reindex doing multiple operations on different axes if indicated
+        axes_dict = self._construct_axes_dict()
+        axes_dict = {x: axes_dict[x].copy(deep=False) for x in axes_dict}
+
         new_data = self._mgr
         for axis in sorted(reindexers.keys()):
             index, indexer = reindexers[axis]
@@ -5449,12 +5485,13 @@ def _reindex_with_indexers(
             )
             # If we've made a copy once, no need to make another one
             copy = False
+            axes_dict[self._get_axis_name(axis)] = index
 
         if copy and new_data is self._mgr:
             new_data = new_data.copy()
 
         # FIXME: get axes without mgr.axes
-        axes_dict = self._get_axes_from_mgr(new_data)
+        #axes_dict = self._get_axes_from_mgr(new_data)
 
         return self._constructor(new_data, **axes_dict).__finalize__(self)
 
@@ -6114,16 +6151,16 @@ def _check_inplace_setting(self, value) -> bool_t:
 
     @final
     def _get_numeric_data(self: NDFrameT) -> NDFrameT:
-        mgr = self._mgr.get_numeric_data()
-        # FIXME: get axes without mgr.axes
-        axes_dict = self._get_axes_from_mgr(mgr)
+        mgr, taker = self._mgr.get_numeric_data()
+        axes_dict = self._construct_axes_dict()
+        axes_dict[self._get_axis_name(self.ndim-1)] = self.axes[-1].take(taker)
         return self._constructor(mgr, **axes_dict).__finalize__(self)
 
     @final
     def _get_bool_data(self):
-        mgr = self._mgr.get_bool_data()
-        # FIXME: get axes without mgr.axes
-        axes_dict = self._get_axes_from_mgr(mgr)
+        mgr, taker = self._mgr.get_bool_data()
+        axes_dict = self._construct_axes_dict()
+        axes_dict[self._get_axis_name(self.ndim-1)] = self.axes[-1].take(taker)
         return self._constructor(mgr, **axes_dict).__finalize__(self)
 
     # ----------------------------------------------------------------------
@@ -6465,6 +6502,8 @@ def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT:
         data = self._mgr.copy(deep=deep)
         self._clear_item_cache()
         axes_dict = self._construct_axes_dict()
+        # TODO: probably need to do this copy elsewhere?
+        axes_dict = {x: axes_dict[x].copy(deep=False) for x in axes_dict}
         return self._constructor(data, **axes_dict).__finalize__(self, method="copy")
 
     @final
@@ -7388,7 +7427,8 @@ def replace(
                         f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'
                     )
 
-        result = self._constructor(new_data)
+        axes_dict = self._construct_axes_dict()
+        result = self._constructor(new_data, **axes_dict)
         if inplace:
             return self._update_inplace(result)
         else:
@@ -7679,8 +7719,8 @@ def interpolate(
             downcast=downcast,
             **kwargs,
         )
-
-        result = self._constructor(new_data)
+        axes_dict = obj._construct_axes_dict()
+        result = self._constructor(new_data, **axes_dict)
         if should_transpose:
             result = result.T
         if inplace:
@@ -9668,8 +9708,9 @@ def _align_series(
             elif lidx is None or join_index is None:
                 left = self.copy() if copy else self
             else:
+                new_mgr = self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
                 left = self._constructor(
-                    self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
+                    new_mgr, columns=self.columns, index=join_index
                 )
 
             right = other._reindex_indexer(join_index, ridx, copy)
@@ -9692,8 +9733,8 @@ def _align_series(
             if copy and fdata is self._mgr:
                 fdata = fdata.copy()
 
-            # FIXME: get axes without mgr.axes
-            axes_dict = self._get_axes_from_mgr(fdata)
+            axes_dict = self._construct_axes_dict()
+            axes_dict["columns"] = join_index
             left = self._constructor(fdata, **axes_dict)
 
             if ridx is None:
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 6664a91eda093..128f2372f27d8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1320,7 +1320,7 @@ def _cython_transform(
         mgr: Manager2D = self._get_data_to_aggregate()
         orig_mgr_len = len(mgr)
         if numeric_only_bool:
-            mgr = mgr.get_numeric_data(copy=False)
+            mgr = mgr.get_numeric_data(copy=False)[0]
 
         def arr_func(bvalues: ArrayLike) -> ArrayLike:
             return self.grouper._cython_operation(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4765f0e6bd8c2..43312cd9f5554 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1757,7 +1757,7 @@ def _cython_agg_general(
                     f"{type(self).__name__}.{how} does not implement {kwd_name}."
                 )
             elif not is_ser:
-                data = data.get_numeric_data(copy=False)
+                data = data.get_numeric_data(copy=False)[0]
 
         def array_func(values: ArrayLike) -> ArrayLike:
             try:
@@ -3372,7 +3372,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         obj = self._obj_with_exclusions
         is_ser = obj.ndim == 1
         mgr = self._get_data_to_aggregate()
-        data = mgr.get_numeric_data() if numeric_only_bool else mgr
+        data = mgr.get_numeric_data()[0] if numeric_only_bool else mgr
         ignore_failures = numeric_only_bool
         res_mgr = data.grouped_reduce(blk_func, ignore_failures=ignore_failures)
 
@@ -3396,7 +3396,12 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         if is_ser:
             res = self._wrap_agged_manager(res_mgr)
         else:
-            res = obj._constructor(res_mgr)
+            # FIXME: get axes without mgr.axes
+            axes_dict = {}
+            axes_dict["index"] = res_mgr.axes[-1]
+            if res_mgr.ndim == 2:
+                axes_dict["columns"] = res_mgr.axes[0]
+            res = obj._constructor(res_mgr, **axes_dict)
 
         if orig_scalar:
             # Avoid expensive MultiIndex construction
@@ -3846,7 +3851,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         orig_mgr_len = len(mgr)
 
         if numeric_only_bool:
-            mgr = mgr.get_numeric_data()
+            mgr = mgr.get_numeric_data()[0]
 
         res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 52150eafd7783..ef8a3f6aef191 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -828,7 +828,9 @@ def _view(self: _IndexT) -> _IndexT:
         result = self._simple_new(self._values, name=self._name)
 
         result._cache = self._cache
+        result._id = self._id
         return result
+        # TODO: preserve _id?
 
     @final
     def _rename(self: _IndexT, name: Hashable) -> _IndexT:
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ed7307debbd19..780dc8695a87b 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1587,6 +1587,7 @@ def _get_list_axis(self, key, axis: int):
             return self.obj._take_with_is_copy(key, axis=axis)
         except IndexError as err:
             # re-raise with different error message
+            raise  # watch out for case with wrong dtype key?
             raise IndexError("positional indexers are out-of-bounds") from err
 
     def _getitem_axis(self, key, axis: int):
@@ -1606,6 +1607,9 @@ def _getitem_axis(self, key, axis: int):
 
         if isinstance(key, list):
             key = np.asarray(key)
+            #if len(key) == 0:
+            #    key = key.astype(np.intp)
+            # TODO: if empty, do intp instead of float64?
 
         if com.is_bool_indexer(key):
             self._validate_key(key, axis)
@@ -1768,10 +1772,11 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                         reindexers, allow_dups=True
                     )
                     self.obj._mgr = new_obj._mgr
-                    self.obj._index = self.obj._mgr.axes[-1]
-                    if self.ndim == 2:
-                        # FIXME: get axes without mgr.axes
-                        self.obj._columns = self.obj._mgr.axes[0]
+                    # TODO: use update_inplace?
+                    if i == 0:
+                        self.obj._index = labels
+                    else:
+                        self.obj._columns = labels
                     self.obj._maybe_update_cacher(clear=True)
                     self.obj._is_copy = None
                     
@@ -2086,8 +2091,8 @@ def _setitem_single_block(self, indexer, value, name: str) -> None:
 
         # actually do the set
         self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
+        # FIXME: update axes?
         self.obj._maybe_update_cacher(clear=True, inplace=True)
-        
 
     def _setitem_with_indexer_missing(self, indexer, value):
         """
@@ -2184,9 +2189,12 @@ def _setitem_with_indexer_missing(self, indexer, value):
                     #  dtype.  But if we had a list or dict, then do inference
                     df = df.infer_objects()
                 self.obj._mgr = df._mgr
+                self.obj._index = df.index
                 
             else:
-                self.obj._mgr = self.obj._append(value)._mgr
+                new_obj = self.obj._append(value)
+                self.obj._mgr = new_obj._mgr
+                self.obj._index = new_obj.index
                 
             self.obj._maybe_update_cacher(clear=True)
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 0fda74149fdf4..d9154aa8b212b 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -485,6 +485,7 @@ def get_bool_data(self: T, copy: bool = False) -> T:
             Whether to copy the blocks
         """
         return self._get_data_subset(is_inferred_bool_dtype)
+        # FIXME: return indexer
 
     def get_numeric_data(self: T, copy: bool = False) -> T:
         """
diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py
index c695f2c1e6ff1..995f9423cb5cb 100644
--- a/pandas/core/internals/base.py
+++ b/pandas/core/internals/base.py
@@ -124,8 +124,8 @@ def equals(self, other: object) -> bool:
         self_axes, other_axes = self.axes, other.axes
         if len(self_axes) != len(other_axes):
             return False
-        if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
-            return False
+        #if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
+        #    return False
 
         return self._equal_values(other)
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index f1fd5aed7dcf4..e3758dc5574c1 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -353,7 +353,7 @@ def apply(
             result_blocks = extend_blocks(applied, result_blocks)
 
         if ignore_failures:
-            return self._combine(result_blocks)
+            return self._combine(result_blocks)[0]
 
         out = type(self).from_blocks(result_blocks, self.axes)
         return out
@@ -524,11 +524,11 @@ def is_view(self) -> bool:
 
         return False
 
-    def _get_data_subset(self: T, predicate: Callable) -> T:
+    def _get_data_subset(self: T, predicate: Callable) -> tuple[T, npt.NDArray[np.intp]]:
         blocks = [blk for blk in self.blocks if predicate(blk.values)]
         return self._combine(blocks, copy=False)
 
-    def get_bool_data(self: T, copy: bool = False) -> T:
+    def get_bool_data(self: T, copy: bool = False) -> tuple[T, npt.NDArray[np.intp]]:
         """
         Select blocks that are bool-dtype and columns from object-dtype blocks
         that are all-bool.
@@ -553,7 +553,7 @@ def get_bool_data(self: T, copy: bool = False) -> T:
 
         return self._combine(new_blocks, copy)
 
-    def get_numeric_data(self: T, copy: bool = False) -> T:
+    def get_numeric_data(self: T, copy: bool = False) -> tuple[T, npt.NDArray[np.intp]]:
         """
         Parameters
         ----------
@@ -563,24 +563,26 @@ def get_numeric_data(self: T, copy: bool = False) -> T:
         numeric_blocks = [blk for blk in self.blocks if blk.is_numeric]
         if len(numeric_blocks) == len(self.blocks):
             # Avoid somewhat expensive _combine
+            taker = np.arange(len(self), dtype=np.intp)  # TODO: return None to indicate no take needed?
             if copy:
-                return self.copy(deep=True)
-            return self
+                return self.copy(deep=True), taker
+            return self, taker
         return self._combine(numeric_blocks, copy)
 
     def _combine(
         self: T, blocks: list[Block], copy: bool = True, index: Index | None = None
-    ) -> T:
+    ) -> tuple[T, npt.NDArray[np.intp]]:
         """return a new manager with the blocks"""
         if len(blocks) == 0:
+            indexer = np.arange(0, dtype=np.intp)
             if self.ndim == 2:
                 # retain our own Index dtype
                 if index is not None:
                     axes = [self.items[:0], index]
                 else:
                     axes = [self.items[:0]] + self.axes[1:]
-                return self.make_empty(axes)
-            return self.make_empty()
+                return self.make_empty(axes), indexer
+            return self.make_empty(), indexer
 
         # FIXME: optimization potential
         indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks]))
@@ -604,7 +606,7 @@ def _combine(
             axes[-1] = index
         axes[0] = self.items.take(indexer)
 
-        return type(self).from_blocks(new_blocks, axes, new_refs)
+        return type(self).from_blocks(new_blocks, axes, new_refs), indexer
 
     @property
     def nblocks(self) -> int:
@@ -1520,7 +1522,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
 
         if dropped_any:
             # faster to skip _combine if we haven't dropped any blocks
-            return self._combine(result_blocks, copy=False, index=index)
+            return self._combine(result_blocks, copy=False, index=index)[0]
 
         return type(self).from_blocks(result_blocks, [self.axes[0], index])
 
@@ -1554,7 +1556,7 @@ def reduce(
         if ignore_failures:
             if res_blocks:
                 indexer = np.concatenate([blk.mgr_locs.as_array for blk in res_blocks])
-                new_mgr = self._combine(res_blocks, copy=False, index=index)
+                new_mgr = self._combine(res_blocks, copy=False, index=index)[0]
             else:
                 indexer = []
                 new_mgr = type(self).from_blocks([], [self.items[:0], index])
@@ -1618,7 +1620,7 @@ def quantile(
 
     # ----------------------------------------------------------------
 
-    def unstack(self, unstacker, fill_value) -> BlockManager:
+    def unstack(self, unstacker, fill_value) -> tuple[BlockManager, list[np.ndarray]]:
         """
         Return a BlockManager with all blocks unstacked.
 
@@ -1677,7 +1679,7 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
         new_columns = new_columns[columns_mask]
 
         bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False)
-        return bm
+        return bm, columns_mask
 
     def to_dict(self, copy: bool = True):
         """
@@ -1697,7 +1699,7 @@ def to_dict(self, copy: bool = True):
             bd.setdefault(str(b.dtype), []).append(b)
 
         # TODO(EA2D): the combine will be unnecessary with 2D EAs
-        return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()}
+        return {dtype: self._combine(blocks, copy=copy)[0] for dtype, blocks in bd.items()}
 
     def as_array(
         self,
@@ -1858,7 +1860,7 @@ def _consolidate_inplace(self) -> None:
 
     def __len__(self) -> int:
         # TODO: cache? would need to invalidate akin to blklocs
-        return sum(x.shape[1] for x in self.blocks)
+        return sum(x.shape[0] for x in self.blocks)
 
 
 class SingleBlockManager(BaseBlockManager, SingleDataManager):
@@ -2053,8 +2055,9 @@ def array_values(self):
 
     def get_numeric_data(self, copy: bool = False):
         if self._block.is_numeric:
-            return self.copy(deep=copy)
-        return self.make_empty()
+            return self.copy(deep=copy), taker
+        taker = np.array([], dtype=np.intp)
+        return self.make_empty(), taker
 
     @property
     def _can_hold_na(self) -> bool:
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 85731bbde6d40..3411e674afa6f 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1937,7 +1937,7 @@ def _take_new_index(
         new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
         # error: Incompatible return value type
         # (got "DataFrame", expected "NDFrameT")
-        return obj._constructor(new_mgr)  # type: ignore[return-value]
+        return obj._constructor(new_mgr, index=new_index, columns=obj.columns)  # type: ignore[return-value]
     else:
         raise ValueError("'obj' should be either a Series or a DataFrame")
 
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 0270a5dd75952..a788bd2f0dbd7 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -500,8 +500,10 @@ def _unstack_frame(obj: DataFrame, level, fill_value=None):
     unstacker = _Unstacker(obj.index, level=level, constructor=obj._constructor)
 
     if not obj._can_fast_transpose:
-        mgr = obj._mgr.unstack(unstacker, fill_value=fill_value)
-        return obj._constructor(mgr)
+        mgr, columns_mask = obj._mgr.unstack(unstacker, fill_value=fill_value)
+        new_columns = unstacker.get_new_columns(obj.columns)
+        new_columns = new_columns[columns_mask]
+        return obj._constructor(mgr, index=unstacker.new_index, columns=new_columns)
     else:
         return unstacker.get_result(
             obj._values, value_columns=obj.columns, fill_value=fill_value
diff --git a/pandas/core/series.py b/pandas/core/series.py
index f1ed825dd4f2c..8f5802d0b5497 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2360,6 +2360,7 @@ def drop_duplicates(
         result = super().drop_duplicates(keep=keep)
         if inplace:
             self._update_inplace(result)
+            self._index = result.index
             return None
         else:
             return result
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index b30b27f5bae1a..ecf9aacae2c72 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -714,7 +714,7 @@ def test_consolidate_ordering_issues(self, mgr):
         # we have datetime/tz blocks in mgr
         cons = mgr.consolidate()
         assert cons.nblocks == 4
-        cons = mgr.consolidate().get_numeric_data()
+        cons = mgr.consolidate().get_numeric_data()[0]
         assert cons.nblocks == 1
         assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
         tm.assert_numpy_array_equal(
@@ -752,7 +752,7 @@ def test_get_numeric_data(self, using_copy_on_write):
         )
         mgr.iset(5, np.array([1, 2, 3], dtype=np.object_))
 
-        numeric = mgr.get_numeric_data()
+        numeric = mgr.get_numeric_data()[0]
         tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
         tm.assert_almost_equal(
             mgr.iget(mgr.items.get_loc("float")).internal_values(),
@@ -776,7 +776,7 @@ def test_get_numeric_data(self, using_copy_on_write):
                 np.array([100.0, 200.0, 300.0]),
             )
 
-        numeric2 = mgr.get_numeric_data(copy=True)
+        numeric2 = mgr.get_numeric_data(copy=True)[0]
         tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
         numeric2.iset(
             numeric2.items.get_loc("float"),
@@ -804,7 +804,7 @@ def test_get_bool_data(self, using_copy_on_write):
         mgr.iset(6, np.array([True, False, True], dtype=np.object_))
 
         with tm.assert_produces_warning(FutureWarning, match=msg):
-            bools = mgr.get_bool_data()
+            bools = mgr.get_bool_data()[0]
         tm.assert_index_equal(bools.items, Index(["bool", "dt"]))
         tm.assert_almost_equal(
             mgr.iget(mgr.items.get_loc("bool")).internal_values(),
@@ -825,7 +825,7 @@ def test_get_bool_data(self, using_copy_on_write):
 
         # Check sharing
         with tm.assert_produces_warning(FutureWarning, match=msg):
-            bools2 = mgr.get_bool_data(copy=True)
+            bools2 = mgr.get_bool_data(copy=True)[0]
         bools2.iset(0, np.array([False, True, False]))
         if using_copy_on_write:
             tm.assert_numpy_array_equal(
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
index b64c7bec6ea39..7ed3273b2e6a0 100644
--- a/pandas/tests/series/methods/test_reindex.py
+++ b/pandas/tests/series/methods/test_reindex.py
@@ -22,6 +22,7 @@
 def test_reindex(datetime_series, string_series):
     identity = string_series.reindex(string_series.index)
 
+    # TODO: is the comment below still accurate for supported numpies?
     # __array_interface__ is not defined for older numpies
     # and on some pythons
     try:

From e8ce5e8b1aaedf52687dbcd59000fcd575872835 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 16 Sep 2022 13:44:23 -0700
Subject: [PATCH 4/4] down to 322 fails

---
 pandas/core/frame.py                   | 13 ++++---------
 pandas/core/generic.py                 | 13 +++++++++----
 pandas/core/groupby/generic.py         | 19 ++++++++++++++++---
 pandas/core/groupby/groupby.py         | 18 ++++++++++++------
 pandas/core/internals/array_manager.py | 18 ++++++++++--------
 pandas/core/internals/base.py          |  2 +-
 pandas/core/internals/managers.py      |  9 ++++++---
 7 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 76b01b069d2dd..9808b4def6e6f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4760,7 +4760,8 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
                 and not is_bool_dtype(dtype)
             )
 
-        def predicate(dtype: DtypeObj) -> bool:
+        def predicate(arr: ArrayLike) -> bool:
+            dtype = arr.dtype
             if include:
                 if not dtype_predicate(dtype, include):
                     return False
@@ -4771,14 +4772,8 @@ def predicate(dtype: DtypeObj) -> bool:
 
             return True
 
-        def arr_predicate(arr: ArrayLike) -> bool:
-            dtype = arr.dtype
-            return predicate(dtype)
-
-        mgr, taker = self._mgr._get_data_subset(arr_predicate).copy(deep=None)
-        # FIXME: get axes without mgr.axes
-        # FIXME: return taker from _get_data_subset, this is really slow
-        #taker = self.dtypes.apply(predicate).values.nonzero()[0]
+        mgr, taker = self._mgr._get_data_subset(predicate)
+        mgr = mgr.copy(deep=None)
         columns = self.columns.take(taker)
         return type(self)(mgr, columns=columns, index=self.index).__finalize__(self)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a4b65b4e91149..f744d0a2cc7a6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -840,7 +840,7 @@ def _validate_set_axis(self, axis: int, new_labels: Index) -> None:
         old_len = self.shape[axis]
         new_len = len(new_labels)
 
-        if axis == 1 and len(self.columns) == 0:
+        if self.ndim > 1 and axis == 0 and len(self.columns) == 0:
             # If we are setting the index on a DataFrame with no columns,
             #  it is OK to change the length.
             pass
@@ -3933,6 +3933,14 @@ def _take(
             convert_indices=convert_indices,
         )
 
+        # We have 6 tests that get here with a slice; TODO: maybe avoid?
+        # TODO: de-duplicate with similar inside BlockManager.take
+        indices = (
+            np.arange(indices.start, indices.stop, indices.step, dtype=np.intp)
+            if isinstance(indices, slice)
+            else np.asanyarray(indices, dtype=np.intp)  # <- converts some cases with empty float64
+        )
+
         axes_dict = self._construct_axes_dict()
         if convert_indices and isinstance(indices, np.ndarray):
             # i.e. exclude slice, which in principle shouldn't be in a _take
@@ -5490,9 +5498,6 @@ def _reindex_with_indexers(
         if copy and new_data is self._mgr:
             new_data = new_data.copy()
 
-        # FIXME: get axes without mgr.axes
-        #axes_dict = self._get_axes_from_mgr(new_data)
-
         return self._constructor(new_data, **axes_dict).__finalize__(self)
 
     def filter(
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 128f2372f27d8..7326a79a9654b 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -175,6 +175,7 @@ def _wrap_agged_manager(self, mgr: Manager) -> Series:
         else:
             mgr = cast(Manager2D, mgr)
             single = mgr.iget(0)
+        #breakpoint()
         # FIXME: get axes without mgr.axes
         index = single.axes[0]
         ser = self.obj._constructor(single, index=index, name=self.obj.name)
@@ -1329,14 +1330,26 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
 
         # We could use `mgr.apply` here and not have to set_axis, but
         #  we would have to do shape gymnastics for ArrayManager compat
-        res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True)
+        res_mgr, taker = mgr.grouped_reduce(arr_func, ignore_failures=True)
         res_mgr.set_axis(1, mgr.axes[1])
 
         if len(res_mgr) < orig_mgr_len:
             warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
 
-        # FIXME: get axes without mgr.axes
-        res_df = self.obj._constructor(res_mgr, index=res_mgr.axes[1], columns=res_mgr.axes[0])
+        columns = mgr.axes[0]
+        index = res_mgr.axes[1]  # FIXME: get index without res_mgr.axes
+        if self.axis == 0:
+            
+            pass#index = self._obj_with_exclusions.index
+            #columns = columns[taker]
+            #breakpoint()
+        else:
+            #columns = self._obj_with_exclusions.index
+            pass#index = self._obj_with_exclusions.columns
+            #breakpoint()
+
+        columns = columns[taker]
+        res_df = self.obj._constructor(res_mgr, index=index, columns=columns)
         if self.axis == 1:
             res_df = res_df.T
         return res_df
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 43312cd9f5554..ff46904758025 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1780,7 +1780,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
 
         # TypeError -> we may have an exception in trying to aggregate
         #  continue and exclude the block
-        new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures)
+        new_mgr, taker = data.grouped_reduce(array_func, ignore_failures=ignore_failures)
 
         if not is_ser and len(new_mgr) < orig_len:
             warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
@@ -2055,7 +2055,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
                 return counted[0]
             return counted
 
-        new_mgr = data.grouped_reduce(hfunc)
+        new_mgr, taker = data.grouped_reduce(hfunc)
 
         # If we are grouping on categoricals we want unobserved categories to
         # return zero, rather than the default of NaN which the reindexing in
@@ -3374,7 +3374,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         mgr = self._get_data_to_aggregate()
         data = mgr.get_numeric_data()[0] if numeric_only_bool else mgr
         ignore_failures = numeric_only_bool
-        res_mgr = data.grouped_reduce(blk_func, ignore_failures=ignore_failures)
+        res_mgr, taker = data.grouped_reduce(blk_func, ignore_failures=ignore_failures)
 
         if (
             numeric_only is lib.no_default
@@ -3401,6 +3401,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             axes_dict["index"] = res_mgr.axes[-1]
             if res_mgr.ndim == 2:
                 axes_dict["columns"] = res_mgr.axes[0]
+            #breakpoint()
             res = obj._constructor(res_mgr, **axes_dict)
 
         if orig_scalar:
@@ -3693,7 +3694,7 @@ def cummin(self, axis=0, numeric_only=False, **kwargs) -> NDFrameT:
         skipna = kwargs.get("skipna", True)
         if axis != 0:
             f = lambda x: np.minimum.accumulate(x, axis)
-            numeric_only_bool = self._resolve_numeric_only("cummax", numeric_only, axis)
+            numeric_only_bool = self._resolve_numeric_only("cummax", numeric_only, axis)  # TODO: "cummin"?
             obj = self._selected_obj
             if numeric_only_bool:
                 obj = obj._get_numeric_data()
@@ -3853,7 +3854,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         if numeric_only_bool:
             mgr = mgr.get_numeric_data()[0]
 
-        res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
+        res_mgr, taker = mgr.grouped_reduce(blk_func, ignore_failures=True)
 
         if not is_ser and len(res_mgr.items) != orig_mgr_len:
             howstr = how.replace("group_", "")
@@ -3871,7 +3872,12 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             out = self._wrap_agged_manager(res_mgr)
         else:
             # FIXME: get axes without mgr.axes
-            out = obj._constructor(res_mgr, index=res_mgr.axes[1], columns=res_mgr.axes[0])
+            if self.axis == 0 and not numeric_only_bool:
+                columns = self._obj_with_exclusions.columns[taker]
+            else:
+                #breakpoint()
+                columns = res_mgr.axes[0]
+            out = obj._constructor(res_mgr, index=res_mgr.axes[1], columns=columns)
 
         return self._wrap_aggregated_output(out)
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index d9154aa8b212b..9e0e832b8f3a5 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -464,7 +464,7 @@ def is_view(self) -> bool:
     def is_single_block(self) -> bool:
         return len(self.arrays) == 1
 
-    def _get_data_subset(self: T, predicate: Callable) -> T:
+    def _get_data_subset(self: T, predicate: Callable) -> tuple[T, npt.NDArray[np.intp]]:
         indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
         arrays = [self.arrays[i] for i in indices]
         # TODO copy?
@@ -473,9 +473,9 @@ def _get_data_subset(self: T, predicate: Callable) -> T:
         taker = np.array(indices, dtype="intp")
         new_cols = self._axes[1].take(taker)
         new_axes = [self._axes[0], new_cols]
-        return type(self)(arrays, new_axes, verify_integrity=False)
+        return type(self)(arrays, new_axes, verify_integrity=False), taker
 
-    def get_bool_data(self: T, copy: bool = False) -> T:
+    def get_bool_data(self: T, copy: bool = False) -> tuple[T, npt.NDArray[np.intp]]:
         """
         Select columns that are bool-dtype and object-dtype columns that are all-bool.
 
@@ -485,9 +485,8 @@ def get_bool_data(self: T, copy: bool = False) -> T:
             Whether to copy the blocks
         """
         return self._get_data_subset(is_inferred_bool_dtype)
-        # FIXME: return indexer
 
-    def get_numeric_data(self: T, copy: bool = False) -> T:
+    def get_numeric_data(self: T, copy: bool = False) -> tuple[T, npt.NDArray[np.intp]]:
         """
         Select columns that have a numeric dtype.
 
@@ -935,7 +934,7 @@ def idelete(self, indexer) -> ArrayManager:
     # --------------------------------------------------------------------
     # Array-wise Operation
 
-    def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
+    def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> tuple[T, npt.NDArray[np.intp]]:
         """
         Apply grouped reduction function columnwise, returning a new ArrayManager.
 
@@ -948,6 +947,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
         Returns
         -------
         ArrayManager
+        np.ndarray[intp]
         """
         result_arrays: list[np.ndarray] = []
         result_indices: list[int] = []
@@ -975,14 +975,16 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
         else:
             index = Index(range(result_arrays[0].shape[0]))
 
+        taker = None
         if ignore_failures:
-            columns = self.items[np.array(result_indices, dtype="int64")]
+            taker = np.array(result_indices, dtype=np.intp)
+            columns = self.items[taker]
         else:
             columns = self.items
 
         # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
         # expected "List[Union[ndarray, ExtensionArray]]"
-        return type(self)(result_arrays, [index, columns])  # type: ignore[arg-type]
+        return type(self)(result_arrays, [index, columns]), taker  # type: ignore[arg-type]
 
     def reduce(
         self: T, func: Callable, ignore_failures: bool = False
diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py
index 995f9423cb5cb..f082f2c3778d4 100644
--- a/pandas/core/internals/base.py
+++ b/pandas/core/internals/base.py
@@ -203,7 +203,7 @@ def grouped_reduce(self, func, ignore_failures: bool = False):
         index = default_index(len(res))
 
         mgr = type(self).from_array(res, index)
-        return mgr
+        return mgr, np.arange(len(res), dtype=np.intp)  # TODO: is taker meaningful here?
 
     @classmethod
     def from_array(cls, arr: ArrayLike, index: Index):
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index e3758dc5574c1..130f70fb9fb2a 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1475,7 +1475,7 @@ def idelete(self, indexer) -> BlockManager:
     # ----------------------------------------------------------------
     # Block-wise Operation
 
-    def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
+    def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> tuple[T, npt.NDArray[np.intp]]:
         """
         Apply grouped reduction function blockwise, returning a new BlockManager.
 
@@ -1488,6 +1488,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
         Returns
         -------
         BlockManager
+        np.ndarray[intp]
         """
         result_blocks: list[Block] = []
         dropped_any = False
@@ -1522,9 +1523,10 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
 
         if dropped_any:
             # faster to skip _combine if we haven't dropped any blocks
-            return self._combine(result_blocks, copy=False, index=index)[0]
+            return self._combine(result_blocks, copy=False, index=index)
 
-        return type(self).from_blocks(result_blocks, [self.axes[0], index])
+        taker = np.arange(len(self), dtype=np.intp)
+        return type(self).from_blocks(result_blocks, [self.axes[0], index]), taker
 
     def reduce(
         self: T, func: Callable, ignore_failures: bool = False
@@ -2055,6 +2057,7 @@ def array_values(self):
 
     def get_numeric_data(self, copy: bool = False):
         if self._block.is_numeric:
+            taker = np.arange(len(self.items), dtype=np.intp)
             return self.copy(deep=copy), taker
         taker = np.array([], dtype=np.intp)
         return self.make_empty(), taker