From b2a622e6f3f5c61dae62b0ed71ebc0006188a91b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 17 Oct 2023 14:23:27 -0700 Subject: [PATCH] DEPR: accepting Manager objects in DataFrame/Series (#52419) --- doc/source/user_guide/10min.rst | 2 + doc/source/user_guide/io.rst | 2 + doc/source/user_guide/pyarrow.rst | 3 + doc/source/user_guide/scale.rst | 3 + doc/source/whatsnew/v2.0.0.rst | 1 + doc/source/whatsnew/v2.2.0.rst | 1 + pandas/conftest.py | 1 + pandas/core/arraylike.py | 7 +- pandas/core/frame.py | 17 +++- pandas/core/generic.py | 3 +- pandas/core/series.py | 50 ++++++++-- pandas/tests/arrays/interval/test_interval.py | 16 +++- .../tests/arrays/masked/test_arrow_compat.py | 21 ++++- .../tests/arrays/period/test_arrow_compat.py | 17 +++- pandas/tests/arrays/string_/test_string.py | 8 +- pandas/tests/copy_view/test_constructors.py | 18 +++- pandas/tests/frame/test_block_internals.py | 7 +- pandas/tests/frame/test_constructors.py | 4 +- pandas/tests/internals/test_internals.py | 20 +++- pandas/tests/io/json/test_readlines.py | 4 + .../io/parser/common/test_common_basic.py | 13 ++- .../tests/io/parser/common/test_data_list.py | 4 + pandas/tests/io/parser/common/test_decimal.py | 4 + pandas/tests/io/parser/common/test_index.py | 4 + pandas/tests/io/parser/common/test_inf.py | 10 +- pandas/tests/io/parser/common/test_ints.py | 4 + .../io/parser/common/test_read_errors.py | 15 ++- .../io/parser/dtypes/test_categorical.py | 4 + .../io/parser/dtypes/test_dtypes_basic.py | 4 + pandas/tests/io/parser/test_compression.py | 4 + pandas/tests/io/parser/test_encoding.py | 4 + pandas/tests/io/parser/test_header.py | 4 + pandas/tests/io/parser/test_index_col.py | 4 + pandas/tests/io/parser/test_na_values.py | 4 + pandas/tests/io/parser/test_parse_dates.py | 93 ++++++++++++++++--- pandas/tests/io/parser/test_unsupported.py | 21 +++-- .../tests/io/parser/usecols/test_strings.py | 4 + .../io/parser/usecols/test_usecols_basic.py | 47 +++++++--- pandas/tests/io/test_common.py | 4 + pandas/tests/io/test_feather.py | 4 + pandas/tests/io/test_fsspec.py | 4 + pandas/tests/io/test_gcs.py | 4 + pandas/tests/io/test_orc.py | 48 +++++++--- pandas/tests/io/test_parquet.py | 9 +- pandas/tests/io/test_user_agent.py | 3 + pandas/tests/test_downstream.py | 4 +- 46 files changed, 437 insertions(+), 95 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index c8e67710c85a9..e9f1a073d77ef 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -763,12 +763,14 @@ Parquet Writing to a Parquet file: .. ipython:: python + :okwarning: df.to_parquet("foo.parquet") Reading from a Parquet file Store using :func:`read_parquet`: .. ipython:: python + :okwarning: pd.read_parquet("foo.parquet") diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cc19e4aca061c..cebe58a4da62e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2247,6 +2247,7 @@ For line-delimited json files, pandas can also return an iterator which reads in Line-limited json can also be read using the pyarrow reader by specifying ``engine="pyarrow"``. .. ipython:: python + :okwarning: from io import BytesIO df = pd.read_json(BytesIO(jsonl.encode()), lines=True, engine="pyarrow") @@ -5554,6 +5555,7 @@ Read from an orc file. Read only certain columns of an orc file. .. ipython:: python + :okwarning: result = pd.read_orc( "example_pa.orc", diff --git a/doc/source/user_guide/pyarrow.rst b/doc/source/user_guide/pyarrow.rst index 61b383afb7c43..9012c7b591f34 100644 --- a/doc/source/user_guide/pyarrow.rst +++ b/doc/source/user_guide/pyarrow.rst @@ -104,6 +104,7 @@ To convert a :external+pyarrow:py:class:`pyarrow.Table` to a :class:`DataFrame`, :external+pyarrow:py:meth:`pyarrow.Table.to_pandas` method with ``types_mapper=pd.ArrowDtype``. .. ipython:: python + :okwarning: table = pa.table([pa.array([1, 2, 3], type=pa.int64())], names=["a"]) @@ -164,6 +165,7 @@ functions provide an ``engine`` keyword that can dispatch to PyArrow to accelera * :func:`read_feather` .. ipython:: python + :okwarning: import io data = io.StringIO("""a,b,c @@ -178,6 +180,7 @@ PyArrow-backed data by specifying the parameter ``dtype_backend="pyarrow"``. A r ``engine="pyarrow"`` to necessarily return PyArrow-backed data. .. ipython:: python + :okwarning: import io data = io.StringIO("""a,b,c,d,e,f,g,h,i diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index b262de5d71439..7b89b25f6ea38 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -51,6 +51,7 @@ To load the columns we want, we have two options. Option 1 loads in all the data and then filters to what we need. .. ipython:: python + :okwarning: columns = ["id_0", "name_0", "x_0", "y_0"] @@ -59,6 +60,7 @@ Option 1 loads in all the data and then filters to what we need. Option 2 only loads the columns we request. .. ipython:: python + :okwarning: pd.read_parquet("timeseries_wide.parquet", columns=columns) @@ -200,6 +202,7 @@ counts up to this point. As long as each individual file fits in memory, this wi work for arbitrary-sized datasets. .. ipython:: python + :okwarning: %%time files = pathlib.Path("data/timeseries/").glob("ts*.parquet") diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index be63da09846c8..07cc4aeed1272 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -152,6 +152,7 @@ When this keyword is set to ``"pyarrow"``, then these functions will return pyar * :meth:`Series.convert_dtypes` .. ipython:: python + :okwarning: import io data = io.StringIO("""a,b,c,d,e,f,g,h,i diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ea7850b6f2a5f..7a476c529d947 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -249,6 +249,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`) +- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`) - Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`) - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`) - Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`) diff --git a/pandas/conftest.py b/pandas/conftest.py index 62f22921f0482..7f24b8370c8eb 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -178,6 +178,7 @@ def pytest_collection_modifyitems(items, config) -> None: "DataFrameGroupBy.fillna", "DataFrame.fillna with 'method' is deprecated", ), + ("read_parquet", "Passing a BlockManager to DataFrame is deprecated"), ] for item in items: diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 62f6737d86d51..fd585b3e19468 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -263,7 +263,10 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) Series, ) from pandas.core.generic import NDFrame - from pandas.core.internals import BlockManager + from pandas.core.internals import ( + ArrayManager, + BlockManager, + ) cls = type(self) @@ -347,7 +350,7 @@ def _reconstruct(result): if method == "outer": raise NotImplementedError return result - if isinstance(result, BlockManager): + if isinstance(result, (BlockManager, ArrayManager)): # we went through BlockManager.apply e.g. np.sqrt result = self._constructor_from_mgr(result, axes=result.axes) else: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4b5bed5d3fff8..70a5ac69011d1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -644,7 +644,6 @@ def _constructor(self) -> Callable[..., DataFrame]: def _constructor_from_mgr(self, mgr, axes): df = self._from_mgr(mgr, axes=axes) - if type(self) is DataFrame: # fastpath avoiding constructor call return df @@ -677,17 +676,29 @@ def __init__( dtype: Dtype | None = None, copy: bool | None = None, ) -> None: + allow_mgr = False if dtype is not None: dtype = self._validate_dtype(dtype) if isinstance(data, DataFrame): data = data._mgr + allow_mgr = True if not copy: # if not copying data, ensure to still return a shallow copy # to avoid the result sharing the same Manager data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): + if not allow_mgr: + # GH#52419 + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + if using_copy_on_write(): data = data.copy(deep=False) # first check if a Manager is passed without any other arguments @@ -2462,7 +2473,7 @@ def maybe_reorder( manager = _get_option("mode.data_manager", silent=True) mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) - return cls(mgr) + return cls._from_mgr(mgr, axes=mgr.axes) def to_records( self, index: bool = True, column_dtypes=None, index_dtypes=None @@ -2672,7 +2683,7 @@ def _from_arrays( verify_integrity=verify_integrity, typ=manager, ) - return cls(mgr) + return cls._from_mgr(mgr, axes=mgr.axes) @doc( storage_options=_shared_docs["storage_options"], diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f1ecc57335a51..1ae4c3cdfc458 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -829,7 +829,8 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self if not using_copy_on_write() and copy is not False: new_mgr = new_mgr.copy(deep=True) - return self._constructor(new_mgr).__finalize__(self, method="swapaxes") + out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) + return out.__finalize__(self, method="swapaxes") return self._constructor( new_values, diff --git a/pandas/core/series.py b/pandas/core/series.py index b02cee3c1fcf3..c6e1e460b336a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -390,12 +390,22 @@ def __init__( else: fastpath = False + allow_mgr = False if ( isinstance(data, (SingleBlockManager, SingleArrayManager)) and index is None and dtype is None and (copy is False or copy is None) ): + if not allow_mgr: + # GH#52419 + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) if using_copy_on_write(): data = data.copy(deep=False) # GH#33357 called with just the SingleBlockManager @@ -423,8 +433,19 @@ def __init__( data = SingleBlockManager.from_array(data, index) elif manager == "array": data = SingleArrayManager.from_array(data, index) + allow_mgr = True elif using_copy_on_write() and not copy: data = data.copy(deep=False) + + if not allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + if copy: data = data.copy() # skips validation of the name @@ -435,6 +456,15 @@ def __init__( if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy: data = data.copy(deep=False) + if not allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + name = ibase.maybe_extract_name(name, data, type(self)) if index is not None: @@ -500,6 +530,16 @@ def __init__( "`index` argument. `copy` must be False." ) + if not allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + allow_mgr = True + elif isinstance(data, ExtensionArray): pass else: @@ -612,22 +652,14 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: return DataFrame def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: - # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 - # This is a short-term implementation that will be replaced - # with self._constructor_expanddim._constructor_from_mgr(...) - # once downstream packages (geopandas) have had a chance to implement - # their own overrides. - # error: "Callable[..., DataFrame]" has no attribute "_from_mgr" [attr-defined] - from pandas import DataFrame + from pandas.core.frame import DataFrame return DataFrame._from_mgr(mgr, axes=mgr.axes) def _constructor_expanddim_from_mgr(self, mgr, axes): df = self._expanddim_from_mgr(mgr, axes) if type(self) is Series: - # fastpath avoiding constructor return df - assert axes is mgr.axes return self._constructor_expanddim(df, copy=False) # types diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 761b85287764f..678ff13d99a5f 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -337,12 +337,16 @@ def test_arrow_table_roundtrip(breaks): table = pa.table(df) assert isinstance(table.field("a").type, ArrowIntervalType) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) - result = table2.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -350,7 +354,9 @@ def test_arrow_table_roundtrip(breaks): table = pa.table( [pa.chunked_array([], type=table.column(0).type)], schema=table.schema ) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() tm.assert_frame_equal(result, expected[0:0]) @@ -371,7 +377,9 @@ def test_arrow_table_roundtrip_without_metadata(breaks): table = table.replace_schema_metadata() assert table.schema.metadata is None - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index fc2094bd9f4a8..f11ec5f8a36a0 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -35,7 +35,10 @@ def test_arrow_roundtrip(data): df = pd.DataFrame({"a": data}) table = pa.table(df) assert table.field("a").type == str(data.dtype.numpy_dtype) - result = table.to_pandas() + + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert result["a"].dtype == data.dtype tm.assert_frame_equal(result, df) @@ -53,7 +56,9 @@ def types_mapper(arrow_type): record_batch = pa.RecordBatch.from_arrays( [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] ) - result = record_batch.to_pandas(types_mapper=types_mapper) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = record_batch.to_pandas(types_mapper=types_mapper) bools = pd.Series([True, None, False], dtype="boolean") ints = pd.Series([1, None, 2], dtype="Int64") small_ints = pd.Series([-1, 0, 7], dtype="Int64") @@ -70,7 +75,9 @@ def test_arrow_load_from_zero_chunks(data): table = pa.table( [pa.chunked_array([], type=table.field("a").type)], schema=table.schema ) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert result["a"].dtype == data.dtype tm.assert_frame_equal(result, df) @@ -91,14 +98,18 @@ def test_arrow_sliced(data): df = pd.DataFrame({"a": data}) table = pa.table(df) - result = table.slice(2, None).to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.slice(2, None).to_pandas() expected = df.iloc[2:].reset_index(drop=True) tm.assert_frame_equal(result, expected) # no missing values df2 = df.fillna(data[0]) table = pa.table(df2) - result = table.slice(2, None).to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.slice(2, None).to_pandas() expected = df2.iloc[2:].reset_index(drop=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index a1e1e8efe6dee..c97a08244a9a8 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -81,12 +81,16 @@ def test_arrow_table_roundtrip(): table = pa.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) - result = table2.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -104,7 +108,10 @@ def test_arrow_load_from_zero_chunks(): table = pa.table( [pa.chunked_array([], type=table.column(0).type)], schema=table.schema ) - result = table.to_pandas() + + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) @@ -119,6 +126,8 @@ def test_arrow_table_roundtrip_without_metadata(): table = table.replace_schema_metadata() assert table.schema.metadata is None - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 451136225a612..052a013eba739 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -497,7 +497,9 @@ def test_arrow_roundtrip(dtype, string_storage2): table = pa.table(df) assert table.field("a").type == "string" with pd.option_context("string_storage", string_storage2): - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.StringDtype) expected = df.astype(f"string[{string_storage2}]") tm.assert_frame_equal(result, expected) @@ -516,7 +518,9 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2): # Instantiate the same table with no chunks at all table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema) with pd.option_context("string_storage", string_storage2): - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.StringDtype) expected = df.astype(f"string[{string_storage2}]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 31f80d300ccca..b288d51160534 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -176,19 +176,29 @@ def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): def test_series_from_block_manager_different_dtype(using_copy_on_write): ser = Series([1, 2, 3], dtype="int64") - ser2 = Series(ser._mgr, dtype="int32") + msg = "Passing a SingleBlockManager to Series" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + ser2 = Series(ser._mgr, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(ser2)) if using_copy_on_write: assert ser2._mgr._has_no_reference(0) -@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr]) +@pytest.mark.parametrize("use_mgr", [True, False]) @pytest.mark.parametrize("columns", [None, ["a"]]) -def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func): +def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() - new_df = DataFrame(func(df)) + if use_mgr: + data = df._mgr + warn = DeprecationWarning + else: + data = df + warn = None + msg = "Passing a BlockManager to DataFrame" + with tm.assert_produces_warning(warn, match=msg): + new_df = DataFrame(data) assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) new_df.iloc[0] = 100 diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 9e8d92e832d01..8c53ffdd493d6 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -50,11 +50,14 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti[1] == ts def test_cast_internals(self, float_frame): - casted = DataFrame(float_frame._mgr, dtype=int) + msg = "Passing a BlockManager to DataFrame" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + casted = DataFrame(float_frame._mgr, dtype=int) expected = DataFrame(float_frame._series, dtype=int) tm.assert_frame_equal(casted, expected) - casted = DataFrame(float_frame._mgr, dtype=np.int32) + with tm.assert_produces_warning(DeprecationWarning, match=msg): + casted = DataFrame(float_frame._mgr, dtype=np.int32) expected = DataFrame(float_frame._series, dtype=np.int32) tm.assert_frame_equal(casted, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 4307cfc1e1d4e..35bc23601eaf4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1741,7 +1741,9 @@ def test_constructor_manager_resize(self, float_frame): index = list(float_frame.index[:5]) columns = list(float_frame.columns[:3]) - result = DataFrame(float_frame._mgr, index=index, columns=columns) + msg = "Passing a BlockManager to DataFrame" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = DataFrame(float_frame._mgr, index=index, columns=columns) tm.assert_index_equal(result.index, Index(index)) tm.assert_index_equal(result.columns, Index(columns)) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index d1c1d72ac4afe..ae79da4bbe0d3 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -397,7 +397,10 @@ def test_duplicate_ref_loc_failure(self): def test_pickle(self, mgr): mgr2 = tm.round_trip_pickle(mgr) - tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + tm.assert_frame_equal( + DataFrame._from_mgr(mgr, axes=mgr.axes), + DataFrame._from_mgr(mgr2, axes=mgr2.axes), + ) # GH2431 assert hasattr(mgr2, "_is_consolidated") @@ -411,16 +414,25 @@ def test_pickle(self, mgr): def test_non_unique_pickle(self, mgr_string): mgr = create_mgr(mgr_string) mgr2 = tm.round_trip_pickle(mgr) - tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + tm.assert_frame_equal( + DataFrame._from_mgr(mgr, axes=mgr.axes), + DataFrame._from_mgr(mgr2, axes=mgr2.axes), + ) def test_categorical_block_pickle(self): mgr = create_mgr("a: category") mgr2 = tm.round_trip_pickle(mgr) - tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + tm.assert_frame_equal( + DataFrame._from_mgr(mgr, axes=mgr.axes), + DataFrame._from_mgr(mgr2, axes=mgr2.axes), + ) smgr = create_single_mgr("category") smgr2 = tm.round_trip_pickle(smgr) - tm.assert_series_equal(Series(smgr), Series(smgr2)) + tm.assert_series_equal( + Series()._constructor_from_mgr(smgr, axes=smgr.axes), + Series()._constructor_from_mgr(smgr2, axes=smgr2.axes), + ) def test_iget(self): cols = Index(list("abc")) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 124d6890886a8..f5342e0ab1a38 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -14,6 +14,10 @@ from pandas.io.json._json import JsonReader +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + @pytest.fixture def lines_json_df(): diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 00a26a755756f..c2f37c75aaab3 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -29,6 +29,10 @@ from pandas.io.parsers import TextFileReader from pandas.io.parsers.c_parser_wrapper import CParserWrapper +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @@ -85,7 +89,14 @@ def test_read_csv_local(all_parsers, csv1): parser = all_parsers fname = prefix + str(os.path.abspath(csv1)) - result = parser.read_csv(fname, index_col=0, parse_dates=True) + + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + msg = "Passing a BlockManager to DataFrame is deprecated" + + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv(fname, index_col=0, parse_dates=True) expected = DataFrame( [ diff --git a/pandas/tests/io/parser/common/test_data_list.py b/pandas/tests/io/parser/common/test_data_list.py index 8d484bba1cb9d..3b0ff9e08d349 100644 --- a/pandas/tests/io/parser/common/test_data_list.py +++ b/pandas/tests/io/parser/common/test_data_list.py @@ -12,6 +12,10 @@ from pandas.io.parsers import TextParser +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_decimal.py b/pandas/tests/io/parser/common/test_decimal.py index 72d4eb2c69845..b8a68c138eeff 100644 --- a/pandas/tests/io/parser/common/test_decimal.py +++ b/pandas/tests/io/parser/common/test_decimal.py @@ -9,6 +9,10 @@ from pandas import DataFrame import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 69afb9fe56472..1d5b0fec7a7c6 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -15,6 +15,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") # GH#43650: Some expected failures with the pyarrow engine can occasionally diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index 488b9d75affe1..e1dc87ed0071e 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -13,6 +13,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @@ -63,7 +67,11 @@ def test_read_csv_with_use_inf_as_na(all_parsers): parser = all_parsers data = "1.0\nNaN\n3.0" msg = "use_inf_as_na option is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): with option_context("use_inf_as_na", True): result = parser.read_csv(StringIO(data), header=None) expected = DataFrame([1.0, np.nan, 3.0]) diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index e3159ef3e6a42..939fdbc159454 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -13,6 +13,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + # GH#43650: Some expected failures with the pyarrow engine can occasionally # cause a deadlock instead, so we skip these instead of xfailing skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index ff1af6e2dc81b..8b612c8d40994 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -148,7 +148,12 @@ def test_suppress_error_output(all_parsers): data = "a\n1\n1,2,3\n4\n5,6,7" expected = DataFrame({"a": [1, 4]}) - with tm.assert_produces_warning(None): + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + msg = "Passing a BlockManager to DataFrame" + + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv(StringIO(data), on_bad_lines="skip") tm.assert_frame_equal(result, expected) @@ -174,11 +179,13 @@ def test_warn_bad_lines(all_parsers): expected = DataFrame({"a": [1, 4]}) match_msg = "Skipping line" + expected_warning = ParserWarning if parser.engine == "pyarrow": match_msg = "Expected 1 columns, but found 3: 1,2,3" + expected_warning = (ParserWarning, DeprecationWarning) with tm.assert_produces_warning( - ParserWarning, match=match_msg, check_stacklevel=False + expected_warning, match=match_msg, check_stacklevel=False ): result = parser.read_csv(StringIO(data), on_bad_lines="warn") tm.assert_frame_equal(result, expected) @@ -282,11 +289,13 @@ def test_on_bad_lines_warn_correct_formatting(all_parsers): expected = DataFrame({"1": "a", "2": ["b"] * 2}) match_msg = "Skipping line" + expected_warning = ParserWarning if parser.engine == "pyarrow": match_msg = "Expected 2 columns, but found 3: a,b,c" + expected_warning = (ParserWarning, DeprecationWarning) with tm.assert_produces_warning( - ParserWarning, match=match_msg, check_stacklevel=False + expected_warning, match=match_msg, check_stacklevel=False ): result = parser.read_csv(StringIO(data), on_bad_lines="warn") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index 8640c17a1349f..d305c94b171f3 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -20,6 +20,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 8fb25fe3ee47e..3f3d340ab2e08 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -22,6 +22,10 @@ StringArray, ) +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + @pytest.mark.parametrize("dtype", [str, object]) @pytest.mark.parametrize("check_orig", [True, False]) diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index 1c67ec7c3066c..2ef7102543154 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -13,6 +13,10 @@ from pandas import DataFrame import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index c09ab7898c67c..070027860b829 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -19,6 +19,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index d6eab59074dd6..d059cc0c49db4 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -18,6 +18,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + # TODO(1.4): Change me to xfails at release time skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 8213ea006614f..83dae12b472da 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -15,6 +15,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + # TODO(1.4): Change me to xfails at release time skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 9a16ec5a50d36..86c50fe103f2c 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -16,6 +16,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index bd08dd3a0c5d2..c0977be03adef 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -35,6 +35,10 @@ from pandas.io.parsers import read_csv +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") # GH#43650: Some expected failures with the pyarrow engine can occasionally @@ -183,8 +187,11 @@ def date_parser(*date_cols): "keep_date_col": keep_date_col, "names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"], } + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), **kwds, @@ -502,7 +509,10 @@ def test_multiple_date_cols_int_cast(all_parsers): "date_parser": pd.to_datetime, } result = parser.read_csv_check_warnings( - FutureWarning, "use 'date_format' instead", StringIO(data), **kwds + (FutureWarning, DeprecationWarning), + "use 'date_format' instead", + StringIO(data), + **kwds, ) expected = DataFrame( @@ -549,8 +559,12 @@ def test_multiple_date_col_timestamp_parse(all_parsers): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), parse_dates=[[0, 1]], @@ -711,8 +725,12 @@ def test_date_parser_int_bug(all_parsers): "12345,1,-1,3,invoice_InvoiceResource,search\n" ) + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), index_col=0, @@ -1270,7 +1288,14 @@ def test_bad_date_parse(all_parsers, cache_dates, value): parser = all_parsers s = StringIO((f"{value},\n") * 50000) - parser.read_csv( + warn = None + msg = "Passing a BlockManager to DataFrame" + if parser.engine == "pyarrow": + warn = DeprecationWarning + + parser.read_csv_check_warnings( + warn, + msg, s, header=None, names=["foo", "bar"], @@ -1292,16 +1317,19 @@ def test_bad_date_parse_with_warning(all_parsers, cache_dates, value): # pandas doesn't try to guess the datetime format # TODO: parse dates directly in pyarrow, see # https://github.com/pandas-dev/pandas/issues/48017 - warn = None + warn = DeprecationWarning + msg = "Passing a BlockManager to DataFrame" elif cache_dates: # Note: warning is not raised if 'cache_dates', because here there is only a # single unique date and hence no risk of inconsistent parsing. warn = None + msg = None else: warn = UserWarning + msg = "Could not infer format" parser.read_csv_check_warnings( warn, - "Could not infer format", + msg, s, header=None, names=["foo", "bar"], @@ -1331,8 +1359,12 @@ def test_parse_dates_infer_datetime_format_warning(all_parsers, reader): parser = all_parsers data = "Date,test\n2012-01-01,1\n,2" + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + getattr(parser, reader)( - FutureWarning, + warn, "The argument 'infer_datetime_format' is deprecated", StringIO(data), parse_dates=["Date"], @@ -1502,8 +1534,13 @@ def test_parse_date_time_multi_level_column_name(all_parsers): ) def test_parse_date_time(all_parsers, data, kwargs, expected): parser = all_parsers + + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), date_parser=pd.to_datetime, @@ -1519,9 +1556,14 @@ def test_parse_date_time(all_parsers, data, kwargs, expected): def test_parse_date_fields(all_parsers): parser = all_parsers + + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), header=0, @@ -1554,9 +1596,17 @@ def test_parse_date_all_fields(all_parsers, key, value, warn): 2001,01,05,10,00,0,0.0,10. 2001,01,5,10,0,00,1.,11. """ + msg = "use 'date_format' instead" + if parser.engine == "pyarrow": + if warn is None: + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = DeprecationWarning + else: + warn = (warn, DeprecationWarning) + result = parser.read_csv_check_warnings( warn, - "use 'date_format' instead", + msg, StringIO(data), header=0, parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, @@ -1590,9 +1640,17 @@ def test_datetime_fractional_seconds(all_parsers, key, value, warn): 2001,01,05,10,00,0.123456,0.0,10. 2001,01,5,10,0,0.500000,1.,11. """ + msg = "use 'date_format' instead" + if parser.engine == "pyarrow": + if warn is None: + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = DeprecationWarning + else: + warn = (warn, DeprecationWarning) + result = parser.read_csv_check_warnings( warn, - "use 'date_format' instead", + msg, StringIO(data), header=0, parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, @@ -1615,8 +1673,12 @@ def test_generic(all_parsers): def parse_function(yy, mm): return [date(year=int(y), month=int(m), day=1) for y, m in zip(yy, mm)] + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), header=0, @@ -2143,7 +2205,8 @@ def test_parse_dot_separated_dates(all_parsers): dtype="object", name="a", ) - warn = None + warn = DeprecationWarning + msg = "Passing a BlockManager to DataFrame" else: expected_index = DatetimeIndex( ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], @@ -2151,7 +2214,7 @@ def test_parse_dot_separated_dates(all_parsers): name="a", ) warn = UserWarning - msg = r"when dayfirst=False \(the default\) was specified" + msg = r"when dayfirst=False \(the default\) was specified" result = parser.read_csv_check_warnings( warn, msg, StringIO(data), parse_dates=True, index_col=0 ) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index f201f6c394566..468d888590cf8 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -157,15 +157,20 @@ def test_on_bad_lines_callable_python_or_pyarrow(self, all_parsers): sio = StringIO("a,b\n1,2") bad_lines_func = lambda x: x parser = all_parsers - if all_parsers.engine not in ["python", "pyarrow"]: - msg = ( - "on_bad_line can only be a callable " - "function if engine='python' or 'pyarrow'" - ) - with pytest.raises(ValueError, match=msg): + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + warn_msg = "Passing a BlockManager" + with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): + if all_parsers.engine not in ["python", "pyarrow"]: + msg = ( + "on_bad_line can only be a callable " + "function if engine='python' or 'pyarrow'" + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv(sio, on_bad_lines=bad_lines_func) + else: parser.read_csv(sio, on_bad_lines=bad_lines_func) - else: - parser.read_csv(sio, on_bad_lines=bad_lines_func) def test_close_file_handle_on_invalid_usecols(all_parsers): diff --git a/pandas/tests/io/parser/usecols/test_strings.py b/pandas/tests/io/parser/usecols/test_strings.py index 22f19ec518e4a..d4ade41d38465 100644 --- a/pandas/tests/io/parser/usecols/test_strings.py +++ b/pandas/tests/io/parser/usecols/test_strings.py @@ -9,6 +9,10 @@ from pandas import DataFrame import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + def test_usecols_with_unicode_strings(all_parsers): # see gh-13219 diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 9846621b5fb0c..07c94e301b37a 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -168,9 +168,14 @@ def test_usecols_index_col_conflict2(all_parsers): expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")}) expected = expected.set_index(["b", "c"]) - result = parser.read_csv( - StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"] - ) + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"] + ) tm.assert_frame_equal(result, expected) @@ -191,7 +196,12 @@ def test_usecols_index_col_middle(all_parsers): data = """a,b,c,d 1,2,3,4 """ - result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="c") + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="c") expected = DataFrame({"b": [2], "d": [4]}, index=Index([3], name="c")) tm.assert_frame_equal(result, expected) @@ -202,7 +212,12 @@ def test_usecols_index_col_end(all_parsers): data = """a,b,c,d 1,2,3,4 """ - result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="d") + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="d") expected = DataFrame({"b": [2], "c": [3]}, index=Index([4], name="d")) tm.assert_frame_equal(result, expected) @@ -268,7 +283,12 @@ def test_np_array_usecols(all_parsers): usecols = np.array(["a", "b"]) expected = DataFrame([[1, 2]], columns=usecols) - result = parser.read_csv(StringIO(data), usecols=usecols) + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv(StringIO(data), usecols=usecols) tm.assert_frame_equal(result, expected) @@ -460,11 +480,16 @@ def test_usecols_dtype(all_parsers): a,1,x b,2,y """ - result = parser.read_csv( - StringIO(data), - usecols=["col1", "col2"], - dtype={"col1": "string", "col2": "uint8", "col3": "string"}, - ) + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv( + StringIO(data), + usecols=["col1", "col2"], + dtype={"col1": "string", "col2": "uint8", "col3": "string"}, + ) expected = DataFrame( {"col1": array(["a", "b"]), "col2": np.array([1, 2], dtype="uint8")} ) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a7ece6a6d7b08..018400fcfe0cf 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -25,6 +25,10 @@ import pandas.io.common as icom +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + class CustomFSPath: """For testing fspath on unknown objects""" diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index cf43203466ef4..5ec8705251d95 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -11,6 +11,10 @@ from pandas.io.feather_format import read_feather, to_feather # isort:skip +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + pyarrow = pytest.importorskip("pyarrow") diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 030505f617b97..8726d44c9c3ed 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -18,6 +18,10 @@ import pandas._testing as tm from pandas.util import _test_decorators as td +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + @pytest.fixture def df1(): diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 89655e8693d7f..6e55cde12f2f9 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -18,6 +18,10 @@ import pandas._testing as tm from pandas.util import _test_decorators as td +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + @pytest.fixture def gcs_buffer(): diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index d90f803f1e607..6b713bfa42b53 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -17,6 +17,10 @@ import pyarrow as pa +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + @pytest.fixture def dirpath(datapath): @@ -66,7 +70,9 @@ def test_orc_reader_empty(dirpath): expected[colname] = pd.Series(dtype=dtype) inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") - got = read_orc(inputfile, columns=columns) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(inputfile, columns=columns) tm.assert_equal(expected, got) @@ -86,7 +92,9 @@ def test_orc_reader_basic(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.test1.orc") - got = read_orc(inputfile, columns=data.keys()) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(inputfile, columns=data.keys()) tm.assert_equal(expected, got) @@ -113,7 +121,9 @@ def test_orc_reader_decimal(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.decimal.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -154,7 +164,9 @@ def test_orc_reader_date_low(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testDate1900.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -195,7 +207,9 @@ def test_orc_reader_date_high(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testDate2038.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -236,7 +250,9 @@ def test_orc_reader_snappy_compressed(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testSnappy.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -261,7 +277,9 @@ def test_orc_roundtrip_file(dirpath): with tm.ensure_clean() as path: expected.to_orc(path) - got = read_orc(path) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(path) tm.assert_equal(expected, got) @@ -285,7 +303,9 @@ def test_orc_roundtrip_bytesio(): expected = pd.DataFrame.from_dict(data) bytes = expected.to_orc() - got = read_orc(BytesIO(bytes)) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + got = read_orc(BytesIO(bytes)) tm.assert_equal(expected, got) @@ -323,7 +343,9 @@ def test_orc_dtype_backend_pyarrow(): ) bytes_data = df.copy().to_orc() - result = read_orc(BytesIO(bytes_data), dtype_backend="pyarrow") + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = read_orc(BytesIO(bytes_data), dtype_backend="pyarrow") expected = pd.DataFrame( { @@ -354,7 +376,9 @@ def test_orc_dtype_backend_numpy_nullable(): ) bytes_data = df.copy().to_orc() - result = read_orc(BytesIO(bytes_data), dtype_backend="numpy_nullable") + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = read_orc(BytesIO(bytes_data), dtype_backend="numpy_nullable") expected = pd.DataFrame( { @@ -383,7 +407,9 @@ def test_orc_uri_path(): with tm.ensure_clean("tmp.orc") as path: expected.to_orc(path) uri = pathlib.Path(path).as_uri() - result = read_orc(uri) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = read_orc(uri) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 1538275e6af73..9cafde240b13c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -48,9 +48,12 @@ # TODO(ArrayManager) fastparquet relies on BlockManager internals -pytestmark = pytest.mark.filterwarnings( - "ignore:DataFrame._data is deprecated:FutureWarning" -) +pytestmark = [ + pytest.mark.filterwarnings("ignore:DataFrame._data is deprecated:FutureWarning"), + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" + ), +] # setup engines & skips diff --git a/pandas/tests/io/test_user_agent.py b/pandas/tests/io/test_user_agent.py index a0656b938eaa6..a892e51f2f28d 100644 --- a/pandas/tests/io/test_user_agent.py +++ b/pandas/tests/io/test_user_agent.py @@ -23,6 +23,9 @@ is_ci_environment(), reason="GH 45651: This test can hang in our CI min_versions build", ), + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" + ), ] diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index c541c5792ec7c..735c6131ba319 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -175,7 +175,9 @@ def test_pandas_datareader(): def test_pyarrow(df): pyarrow = pytest.importorskip("pyarrow") table = pyarrow.Table.from_pandas(df) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = table.to_pandas() tm.assert_frame_equal(result, df)