pandas-dev · jbrockmendel · Aug 17, 2022 · Aug 17, 2022 · Aug 19, 2022 · Sep 16, 2022
diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx
@@ -61,9 +61,10 @@ cdef class AxisProperty:
         if obj is None:
             # Only instances have _mgr, not classes
             return self
+        if self.axis == 0:
+            return obj._index
         else:
-            axes = obj._mgr.axes
-        return axes[self.axis]
+            return obj._columns
 
     def __set__(self, obj, value):
         obj._set_axis(self.axis, value)
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -726,7 +726,8 @@ def apply(self) -> DataFrame | Series:
             with np.errstate(all="ignore"):
                 results = self.obj._mgr.apply("apply", func=self.f)
             # _constructor will retain self.index and self.columns
-            return self.obj._constructor(data=results)
+            axes_dict = self.obj._construct_axes_dict()
+            return self.obj._constructor(data=results, **axes_dict)
 
         # broadcasting
         if self.result_type == "broadcast":
@@ -1001,6 +1002,7 @@ def series_generator(self):
         # We create one Series object, and will swap out the data inside
         #  of it.  Kids: don't do this at home.
         ser = self.obj._ixs(0, axis=0)
+        index = ser.index
         mgr = ser._mgr
 
         if is_extension_array_dtype(ser.dtype):
@@ -1012,9 +1014,10 @@ def series_generator(self):
 
         else:
             for (arr, name) in zip(values, self.index):
-                # GH#35462 re-pin mgr in case setitem changed it
+                # GH#35462 re-pin mgr, index in case setitem changed it
                 ser._mgr = mgr
                 mgr.set_values(arr)
+                ser._index = index
                 object.__setattr__(ser, "_name", name)
                 yield ser
 

diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -358,6 +358,8 @@ def _reconstruct(result):
             return result
         if isinstance(result, BlockManager):
             # we went through BlockManager.apply e.g. np.sqrt
+            # TODO: any cases that aren't index/columns-preserving?
+            reconstruct_kwargs.update(self._construct_axes_dict())
             result = self._constructor(result, **reconstruct_kwargs, copy=False)
         else:
             # we converted an array, lost our axes

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -590,7 +590,7 @@ class DataFrame(NDFrame, OpsMixin):
     2  2  3
     """
 
-    _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
+    _internal_names_set = {"_columns", "columns", "_index", "index"} | NDFrame._internal_names_set
     _typ = "dataframe"
     _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray)
     _accessors: set[str] = {"sparse"}
@@ -621,11 +621,25 @@ def __init__(
             dtype = self._validate_dtype(dtype)
 
         if isinstance(data, DataFrame):
+            if index is None:
+                index = data.index
+            if columns is None:
+                columns = data.columns
             data = data._mgr
 
         if isinstance(data, (BlockManager, ArrayManager)):
             # first check if a Manager is passed without any other arguments
             # -> use fastpath (without checking Manager type)
+            if index is None or columns is None:
+                assert False
+            if data.axes[0] is not columns or data.axes[1] is not index:
+                # FIXME: without this check, json tests segfault...
+                #  nope, segfaults even with this check
+                data.axes = [ensure_index(columns), ensure_index(index)]
+            #if not index.equals(data.axes[-1]):#index is not data.axes[-1]:
+            #    assert False
+            #if not columns.equals(data.axes[0]):#columns is not data.axes[0]:
+            #    assert False
             if index is None and columns is None and dtype is None and not copy:
                 # GH#33357 fastpath
                 NDFrame.__init__(self, data)
@@ -751,7 +765,7 @@ def __init__(
                         index,  # type: ignore[arg-type]
                         dtype,
                     )
-                    mgr = arrays_to_mgr(
+                    mgr, _, _ = arrays_to_mgr(
                         arrays,
                         columns,
                         index,
@@ -794,7 +808,7 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
+                mgr, _, _ = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
             else:
                 arr2d = construct_2d_arraylike_from_scalar(
                     data,
@@ -2399,9 +2413,9 @@ def maybe_reorder(
             columns = columns.drop(exclude)
 
         manager = get_option("mode.data_manager")
-        mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
+        mgr, index, columns = arrays_to_mgr(arrays, columns, result_index, typ=manager)
 
-        return cls(mgr)
+        return cls(mgr, index=index, columns=columns)
 
     def to_records(
         self, index: bool = True, column_dtypes=None, index_dtypes=None
@@ -2603,15 +2617,15 @@ def _from_arrays(
         columns = ensure_index(columns)
         if len(columns) != len(arrays):
             raise ValueError("len(columns) must match len(arrays)")
-        mgr = arrays_to_mgr(
+        mgr, index, columns = arrays_to_mgr(
             arrays,
             columns,
             index,
             dtype=dtype,
             verify_integrity=verify_integrity,
             typ=manager,
         )
-        return cls(mgr)
+        return cls(mgr, index=index, columns=columns)
 
     @doc(
         storage_options=_shared_docs["storage_options"],
@@ -3729,7 +3743,7 @@ def _ixs(self, i: int, axis: int = 0) -> Series:
 
             # if we are a copy, mark as such
             copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None
-            result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__(
+            result = self._constructor_sliced(new_mgr, index=self.columns, name=self.index[i]).__finalize__(
                 self
             )
             result._set_is_copy(self, copy=copy)
@@ -4154,6 +4168,7 @@ def _set_item_mgr(self, key, value: ArrayLike) -> None:
         except KeyError:
             # This item wasn't present, just insert at end
             self._mgr.insert(len(self._info_axis), key, value)
+            self._columns = self.columns.insert(len(self._info_axis), key)
         else:
             self._iset_item_mgr(loc, value)
 
@@ -4257,6 +4272,7 @@ def _ensure_valid_index(self, value) -> None:
                 index_copy.name = self.index.name
 
             self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan)
+            self._index = index_copy
 
     def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
         """
@@ -4267,7 +4283,7 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
         name = self.columns[loc]
         klass = self._constructor_sliced
         # We get index=self.index bc values is a SingleDataManager
-        return klass(values, name=name, fastpath=True).__finalize__(self)
+        return klass(values, name=name, index=self.index, fastpath=True).__finalize__(self)
 
     # ----------------------------------------------------------------------
     # Lookup Caching
@@ -4486,6 +4502,8 @@ def query(self, expr: str, inplace: bool = False, **kwargs) -> DataFrame | None:
 
         if inplace:
             self._update_inplace(result)
+            self._index = result._index
+            self._columns = result._columns
             return None
         else:
             return result
@@ -4754,8 +4772,10 @@ def predicate(arr: ArrayLike) -> bool:
 
             return True
 
-        mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
-        return type(self)(mgr).__finalize__(self)
+        mgr, taker = self._mgr._get_data_subset(predicate)
+        mgr = mgr.copy(deep=None)
+        columns = self.columns.take(taker)
+        return type(self)(mgr, columns=columns, index=self.index).__finalize__(self)
 
     def insert(
         self,
@@ -4824,6 +4844,7 @@ def insert(
 
         value = self._sanitize_column(value)
         self._mgr.insert(loc, column, value)
+        self._columns = self.columns.insert(loc, column)
 
     def assign(self, **kwargs) -> DataFrame:
         r"""
@@ -5855,7 +5876,7 @@ def shift(
                     fill_value=fill_value,
                     allow_dups=True,
                 )
-                res_df = self._constructor(mgr)
+                res_df = self._constructor(mgr, columns=self.columns, index=self.index)
                 return res_df.__finalize__(self, method="shift")
 
         return super().shift(
@@ -6382,7 +6403,8 @@ class    max    type
 
     @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
     def isna(self) -> DataFrame:
-        result = self._constructor(self._mgr.isna(func=isna))
+        axes_dict = self._construct_axes_dict()
+        result = self._constructor(self._mgr.isna(func=isna), **axes_dict)
         return result.__finalize__(self, method="isna")
 
     @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
@@ -6587,6 +6609,8 @@ def dropna(
         if not inplace:
             return result
         self._update_inplace(result)
+        self._columns = result._columns
+        self._index = result._index
         return None
 
     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"])
@@ -6685,6 +6709,8 @@ def drop_duplicates(
 
         if inplace:
             self._update_inplace(result)
+            self._index = result._index
+            self._columns = result._columns
             return None
         else:
             return result
@@ -6934,16 +6960,27 @@ def sort_values(  # type: ignore[override]
         else:
             return self.copy()
 
+        bm_axis = self._get_block_manager_axis(axis)
+
         new_data = self._mgr.take(
-            indexer, axis=self._get_block_manager_axis(axis), verify=False
+            indexer, axis=bm_axis, verify=False
         )
 
+        axis_name = self._get_axis_name(axis)
+
+        axes_dict = {}
+        axes_dict[axis_name] = self.axes[axis].take(indexer)
+        if axis == 0:
+            axes_dict["columns"] = self.columns
+        else:
+            axes_dict["index"] = self.index
+
         if ignore_index:
-            new_data.set_axis(
-                self._get_block_manager_axis(axis), default_index(len(indexer))
-            )
+            rng = default_index(len(indexer))
+            new_data.set_axis(bm_axis, rng)
+            axes_dict[axis_name] = rng
 
-        result = self._constructor(new_data)
+        result = self._constructor(new_data, **axes_dict)
         if inplace:
             return self._update_inplace(result)
         else:
@@ -7627,7 +7664,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
             # i.e. scalar, faster than checking np.ndim(right) == 0
             with np.errstate(all="ignore"):
                 bm = self._mgr.apply(array_op, right=right)
-            return self._constructor(bm)
+            return self._constructor(bm, index=self.index, columns=self.columns)
 
         elif isinstance(right, DataFrame):
             assert self.index.equals(right.index)
@@ -7648,7 +7685,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
                     right._mgr,  # type: ignore[arg-type]
                     array_op,
                 )
-            return self._constructor(bm)
+            return self._constructor(bm, index=self.index, columns=self.columns)
 
         elif isinstance(right, Series) and axis == 1:
             # axis=1 means we want to operate row-by-row
@@ -9239,7 +9276,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
             axis = 0
 
         new_data = self._mgr.diff(n=periods, axis=axis)
-        return self._constructor(new_data).__finalize__(self, "diff")
+        return self._constructor(new_data, index=self.index, columns=self.columns).__finalize__(self, "diff")
 
     # ----------------------------------------------------------------------
     # Function application
@@ -10850,8 +10887,9 @@ def _reduce(
                 #  cols = self.columns[~dt64_cols]
                 #  self = self[cols]
                 predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
-                mgr = self._mgr._get_data_subset(predicate)
-                self = type(self)(mgr)
+                mgr, taker = self._mgr._get_data_subset(predicate)
+                columns = self.columns[taker]
+                self = type(self)(mgr, index=self.index, columns=columns)
 
         # TODO: Make other agg func handle axis=None properly GH#21597
         axis = self._get_axis_number(axis)
@@ -10900,7 +10938,20 @@ def _get_data() -> DataFrame:
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager.reduce
             res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
-            out = df._constructor(res).iloc[0]
+            index = Index([None], dtype=object)
+            assert index.equals(res.axes[1])
+            if ignore_failures:
+                if len(res.items) == len(df.columns):
+                    # i.e. nothing was dropped
+                    columns = df.columns
+                else:
+                    # FIXME: get axes without mgr.axes; THIS IS WRONG TOO
+                    columns = res.axes[0]
+            else:
+                columns = df.columns
+                assert columns.equals(res.axes[0])
+
+            out = df._constructor(res, index=index, columns=columns).iloc[0]
             if out_dtype is not None:
                 out = out.astype(out_dtype)
             if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
@@ -11398,7 +11449,8 @@ def quantile(
             res = data._mgr.take(indexer[q_idx], verify=False)
             res.axes[1] = q
 
-        result = self._constructor(res)
+        # FIXME: get axes without mgr.axes
+        result = self._constructor(res, columns=res.axes[0], index=res.axes[1])
         return result.__finalize__(self, method="quantile")
 
     @doc(NDFrame.asfreq, **_shared_doc_kwargs)
@@ -11665,9 +11717,9 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
     _info_axis_name = "columns"
 
     index = properties.AxisProperty(
-        axis=1, doc="The index (row labels) of the DataFrame."
+        axis=0, doc="The index (row labels) of the DataFrame."
     )
-    columns = properties.AxisProperty(axis=0, doc="The column labels of the DataFrame.")
+    columns = properties.AxisProperty(axis=1, doc="The column labels of the DataFrame.")
 
     @property
     def _AXIS_NUMBERS(self) -> dict[str, int]:
@@ -11702,8 +11754,9 @@ def _to_dict_of_blocks(self, copy: bool = True):
         # convert to BlockManager if needed -> this way support ArrayManager as well
         mgr = mgr_to_mgr(mgr, "block")
         mgr = cast(BlockManager, mgr)
+        # FIXME: get axes without mgr.axes
         return {
-            k: self._constructor(v).__finalize__(self)
+            k: self._constructor(v, index=self.index, columns=v.axes[0]).__finalize__(self)
             for k, v, in mgr.to_dict(copy=copy).items()
         }