DEPR: accepting Manager objects in DataFrame/Series (#52419)

pandas-dev · Oct 17, 2023 · b2a622e · b2a622e
1 parent 3ccdc5b
commit b2a622e
Show file tree

Hide file tree

Showing 46 changed files with 437 additions and 95 deletions.
diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -763,12 +763,14 @@ Parquet
 Writing to a Parquet file:
 
 .. ipython:: python
+ :okwarning:
 
  df.to_parquet("foo.parquet")
 
 Reading from a Parquet file Store using :func:`read_parquet`:
 
 .. ipython:: python
+ :okwarning:
 
  pd.read_parquet("foo.parquet")
 

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -2247,6 +2247,7 @@ For line-delimited json files, pandas can also return an iterator which reads in
 Line-limited json can also be read using the pyarrow reader by specifying ``engine="pyarrow"``.
 
 .. ipython:: python
+ :okwarning:
 
  from io import BytesIO
  df = pd.read_json(BytesIO(jsonl.encode()), lines=True, engine="pyarrow")
@@ -5554,6 +5555,7 @@ Read from an orc file.
 Read only certain columns of an orc file.
 
 .. ipython:: python
+ :okwarning:
 
  result = pd.read_orc(
  "example_pa.orc",

diff --git a/doc/source/user_guide/pyarrow.rst b/doc/source/user_guide/pyarrow.rst
@@ -104,6 +104,7 @@ To convert a :external+pyarrow:py:class:`pyarrow.Table` to a :class:`DataFrame`,
 :external+pyarrow:py:meth:`pyarrow.Table.to_pandas` method with ``types_mapper=pd.ArrowDtype``.
 
 .. ipython:: python
+ :okwarning:
 
  table = pa.table([pa.array([1, 2, 3], type=pa.int64())], names=["a"])
 
@@ -164,6 +165,7 @@ functions provide an ``engine`` keyword that can dispatch to PyArrow to accelera
 * :func:`read_feather`
 
 .. ipython:: python
+ :okwarning:
 
  import io
  data = io.StringIO("""a,b,c
@@ -178,6 +180,7 @@ PyArrow-backed data by specifying the parameter ``dtype_backend="pyarrow"``. A r
 ``engine="pyarrow"`` to necessarily return PyArrow-backed data.
 
 .. ipython:: python
+ :okwarning:
 
  import io
  data = io.StringIO("""a,b,c,d,e,f,g,h,i

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -51,6 +51,7 @@ To load the columns we want, we have two options.
 Option 1 loads in all the data and then filters to what we need.
 
 .. ipython:: python
+ :okwarning:
 
  columns = ["id_0", "name_0", "x_0", "y_0"]
 
@@ -59,6 +60,7 @@ Option 1 loads in all the data and then filters to what we need.
 Option 2 only loads the columns we request.
 
 .. ipython:: python
+ :okwarning:
 
  pd.read_parquet("timeseries_wide.parquet", columns=columns)
 
@@ -200,6 +202,7 @@ counts up to this point. As long as each individual file fits in memory, this wi
 work for arbitrary-sized datasets.
 
 .. ipython:: python
+ :okwarning:
 
  %%time
  files = pathlib.Path("data/timeseries/").glob("ts*.parquet")

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -152,6 +152,7 @@ When this keyword is set to ``"pyarrow"``, then these functions will return pyar
 * :meth:`Series.convert_dtypes`
 
 .. ipython:: python
+ :okwarning:
 
  import io
  data = io.StringIO("""a,b,c,d,e,f,g,h,i

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -249,6 +249,7 @@ Other Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`)
+- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
 - Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
 - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
 - Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -178,6 +178,7 @@ def pytest_collection_modifyitems(items, config) -> None:
  "DataFrameGroupBy.fillna",
  "DataFrame.fillna with 'method' is deprecated",
  ),
+ ("read_parquet", "Passing a BlockManager to DataFrame is deprecated"),
  ]
 
  for item in items:

diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -263,7 +263,10 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
  Series,
  )
  from pandas.core.generic import NDFrame
- from pandas.core.internals import BlockManager
+ from pandas.core.internals import (
+ ArrayManager,
+ BlockManager,
+ )
 
  cls = type(self)
 
@@ -347,7 +350,7 @@ def _reconstruct(result):
  if method == "outer":
  raise NotImplementedError
  return result
- if isinstance(result, BlockManager):
+ if isinstance(result, (BlockManager, ArrayManager)):
  # we went through BlockManager.apply e.g. np.sqrt
  result = self._constructor_from_mgr(result, axes=result.axes)
  else:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -644,7 +644,6 @@ def _constructor(self) -> Callable[..., DataFrame]:
 
  def _constructor_from_mgr(self, mgr, axes):
  df = self._from_mgr(mgr, axes=axes)
-
  if type(self) is DataFrame:
  # fastpath avoiding constructor call
  return df
@@ -677,17 +676,29 @@ def __init__(
  dtype: Dtype | None = None,
  copy: bool | None = None,
  ) -> None:
+ allow_mgr = False
  if dtype is not None:
  dtype = self._validate_dtype(dtype)
 
  if isinstance(data, DataFrame):
  data = data._mgr
+ allow_mgr = True
  if not copy:
  # if not copying data, ensure to still return a shallow copy
  # to avoid the result sharing the same Manager
  data = data.copy(deep=False)
 
  if isinstance(data, (BlockManager, ArrayManager)):
+ if not allow_mgr:
+ # GH#52419
+ warnings.warn(
+ f"Passing a {type(data).__name__} to {type(self).__name__} "
+ "is deprecated and will raise in a future version. "
+ "Use public APIs instead.",
+ DeprecationWarning,
+ stacklevel=find_stack_level(),
+ )
+
  if using_copy_on_write():
  data = data.copy(deep=False)
  # first check if a Manager is passed without any other arguments
@@ -2462,7 +2473,7 @@ def maybe_reorder(
  manager = _get_option("mode.data_manager", silent=True)
  mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
 
- return cls(mgr)
+ return cls._from_mgr(mgr, axes=mgr.axes)
 
  def to_records(
  self, index: bool = True, column_dtypes=None, index_dtypes=None
@@ -2672,7 +2683,7 @@ def _from_arrays(
  verify_integrity=verify_integrity,
  typ=manager,
  )
- return cls(mgr)
+ return cls._from_mgr(mgr, axes=mgr.axes)
 
  @doc(
  storage_options=_shared_docs["storage_options"],

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -829,7 +829,8 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self
  if not using_copy_on_write() and copy is not False:
  new_mgr = new_mgr.copy(deep=True)
 
- return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
+ out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
+ return out.__finalize__(self, method="swapaxes")
 
  return self._constructor(
  new_values,

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -390,12 +390,22 @@ def __init__(
  else:
  fastpath = False
 
+ allow_mgr = False
  if (
  isinstance(data, (SingleBlockManager, SingleArrayManager))
  and index is None
  and dtype is None
  and (copy is False or copy is None)
  ):
+ if not allow_mgr:
+ # GH#52419
+ warnings.warn(
+ f"Passing a {type(data).__name__} to {type(self).__name__} "
+ "is deprecated and will raise in a future version. "
+ "Use public APIs instead.",
+ DeprecationWarning,
+ stacklevel=find_stack_level(),
+ )
  if using_copy_on_write():
  data = data.copy(deep=False)
  # GH#33357 called with just the SingleBlockManager
@@ -423,8 +433,19 @@ def __init__(
  data = SingleBlockManager.from_array(data, index)
  elif manager == "array":
  data = SingleArrayManager.from_array(data, index)
+ allow_mgr = True
  elif using_copy_on_write() and not copy:
  data = data.copy(deep=False)
+
+ if not allow_mgr:
+ warnings.warn(
+ f"Passing a {type(data).__name__} to {type(self).__name__} "
+ "is deprecated and will raise in a future version. "
+ "Use public APIs instead.",
+ DeprecationWarning,
+ stacklevel=find_stack_level(),
+ )
+
  if copy:
  data = data.copy()
  # skips validation of the name
@@ -435,6 +456,15 @@ def __init__(
  if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy:
  data = data.copy(deep=False)
 
+ if not allow_mgr:
+ warnings.warn(
+ f"Passing a {type(data).__name__} to {type(self).__name__} "
+ "is deprecated and will raise in a future version. "
+ "Use public APIs instead.",
+ DeprecationWarning,
+ stacklevel=find_stack_level(),
+ )
+
  name = ibase.maybe_extract_name(name, data, type(self))
 
  if index is not None:
@@ -500,6 +530,16 @@ def __init__(
  "`index` argument. `copy` must be False."
  )
 
+ if not allow_mgr:
+ warnings.warn(
+ f"Passing a {type(data).__name__} to {type(self).__name__} "
+ "is deprecated and will raise in a future version. "
+ "Use public APIs instead.",
+ DeprecationWarning,
+ stacklevel=find_stack_level(),
+ )
+ allow_mgr = True
+
  elif isinstance(data, ExtensionArray):
  pass
  else:
@@ -612,22 +652,14 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]:
  return DataFrame
 
  def _expanddim_from_mgr(self, mgr, axes) -> DataFrame:
- # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828
- # This is a short-term implementation that will be replaced
- # with self._constructor_expanddim._constructor_from_mgr(...)
- # once downstream packages (geopandas) have had a chance to implement
- # their own overrides.
- # error: "Callable[..., DataFrame]" has no attribute "_from_mgr" [attr-defined]
- from pandas import DataFrame
+ from pandas.core.frame import DataFrame
 
  return DataFrame._from_mgr(mgr, axes=mgr.axes)
 
  def _constructor_expanddim_from_mgr(self, mgr, axes):
  df = self._expanddim_from_mgr(mgr, axes)
  if type(self) is Series:
- # fastpath avoiding constructor
  return df
- assert axes is mgr.axes
  return self._constructor_expanddim(df, copy=False)
 
  # types

diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
@@ -337,20 +337,26 @@ def test_arrow_table_roundtrip(breaks):
 
  table = pa.table(df)
  assert isinstance(table.field("a").type, ArrowIntervalType)
- result = table.to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.to_pandas()
  assert isinstance(result["a"].dtype, pd.IntervalDtype)
  tm.assert_frame_equal(result, df)
 
  table2 = pa.concat_tables([table, table])
- result = table2.to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table2.to_pandas()
  expected = pd.concat([df, df], ignore_index=True)
  tm.assert_frame_equal(result, expected)
 
  # GH-41040
  table = pa.table(
  [pa.chunked_array([], type=table.column(0).type)], schema=table.schema
  )
- result = table.to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.to_pandas()
  tm.assert_frame_equal(result, expected[0:0])
 
 
@@ -371,7 +377,9 @@ def test_arrow_table_roundtrip_without_metadata(breaks):
  table = table.replace_schema_metadata()
  assert table.schema.metadata is None
 
- result = table.to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.to_pandas()
  assert isinstance(result["a"].dtype, pd.IntervalDtype)
  tm.assert_frame_equal(result, df)
 

diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py
@@ -35,7 +35,10 @@ def test_arrow_roundtrip(data):
  df = pd.DataFrame({"a": data})
  table = pa.table(df)
  assert table.field("a").type == str(data.dtype.numpy_dtype)
- result = table.to_pandas()
+
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.to_pandas()
  assert result["a"].dtype == data.dtype
  tm.assert_frame_equal(result, df)
 
@@ -53,7 +56,9 @@ def types_mapper(arrow_type):
  record_batch = pa.RecordBatch.from_arrays(
  [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"]
  )
- result = record_batch.to_pandas(types_mapper=types_mapper)
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = record_batch.to_pandas(types_mapper=types_mapper)
  bools = pd.Series([True, None, False], dtype="boolean")
  ints = pd.Series([1, None, 2], dtype="Int64")
  small_ints = pd.Series([-1, 0, 7], dtype="Int64")
@@ -70,7 +75,9 @@ def test_arrow_load_from_zero_chunks(data):
  table = pa.table(
  [pa.chunked_array([], type=table.field("a").type)], schema=table.schema
  )
- result = table.to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.to_pandas()
  assert result["a"].dtype == data.dtype
  tm.assert_frame_equal(result, df)
 
@@ -91,14 +98,18 @@ def test_arrow_sliced(data):
 
  df = pd.DataFrame({"a": data})
  table = pa.table(df)
- result = table.slice(2, None).to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.slice(2, None).to_pandas()
  expected = df.iloc[2:].reset_index(drop=True)
  tm.assert_frame_equal(result, expected)
 
  # no missing values
  df2 = df.fillna(data[0])
  table = pa.table(df2)
- result = table.slice(2, None).to_pandas()
+ msg = "Passing a BlockManager to DataFrame is deprecated"
+ with tm.assert_produces_warning(DeprecationWarning, match=msg):
+ result = table.slice(2, None).to_pandas()
  expected = df2.iloc[2:].reset_index(drop=True)
  tm.assert_frame_equal(result, expected)