Adapt tests from modin-project#4724

Co-authored-by: mvashishtha <mahesh@ponder.io> Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
vnlitvinov · Aug 4, 2023 · 1d0b19a · 1d0b19a
1 parent 665d6d0
commit 1d0b19a
Showing 1 changed file with 60 additions and 7 deletions.
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
@@ -93,13 +93,6 @@
 
 from modin.config import NPartitions
 
-# Our configuration in pytest.ini requires that we explicitly catch all
-# instances of defaulting to pandas, but some test modules, like this one,
-# have too many such instances.
-# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
-# of defaulting to pandas.
-pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
-
 NPartitions.put(4)
 
 DATASET_SIZE_DICT = {
@@ -269,6 +262,7 @@ def _make_parquet_dir(
  IsExperimental.get() and StorageFormat.get() == "Pyarrow",
  reason="Segmentation fault; see PR #2347 ffor details",
 )
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestCsv:
  # delimiter tests
  @pytest.mark.parametrize("sep", [None, "_", ",", ".", "\n"])
@@ -1307,6 +1301,24 @@ def test_read_csv_1930(self, usecols):
  )
 
 
+# Leave this test apart from the test classes, which skip the default to pandas
+# warning check. We want to make sure we are NOT defaulting to pandas for a
+# path relative to user home.
+# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
+# commment once we turn all default to pandas messages into errors.
+def test_read_csv_relative_to_user_home(make_csv_file):
+ with ensure_clean(".csv") as unique_filename:
+ make_csv_file(filename=unique_filename)
+
+ with mock.patch.dict(os.environ, {"HOME": os.path.dirname(unique_filename)}):
+ with warns_that_defaulting_to_pandas() if Engine.get() == "Python" else _nullcontext():
+ eval_io(
+ fn_name="read_csv",
+ filepath_or_buffer=f"~/{os.path.basename(unique_filename)}",
+ )
+
+
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestTable:
  def test_read_table(self, make_csv_file):
  with ensure_clean() as unique_filename:
@@ -1358,6 +1370,7 @@ def test_read_table_empty_frame(self, make_csv_file):
 
 
 @pytest.mark.parametrize("engine", ["pyarrow", "fastparquet"])
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestParquet:
  @pytest.mark.parametrize("columns", [None, ["col1"]])
  @pytest.mark.parametrize("row_group_size", [None, 100, 1000, 10_000])
@@ -1729,6 +1742,24 @@ def test_read_parquet_s3_with_column_partitioning(self, engine):
  )
 
 
+# Leave this test apart from the test classes, which skip the default to pandas
+# warning check. We want to make sure we are NOT defaulting to pandas for a
+# path relative to user home.
+# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
+# commment once we turn all default to pandas messages into errors.
+def test_read_parquet_relative_to_user_home(make_parquet_file):
+ with ensure_clean(".parquet") as unique_filename:
+ make_parquet_file(filename=unique_filename)
+
+ with mock.patch.dict(os.environ, {"HOME": os.path.dirname(unique_filename)}):
+ with warns_that_defaulting_to_pandas() if Engine.get() == "Python" else _nullcontext():
+ eval_io(
+ fn_name="read_parquet",
+ path=f"~/{os.path.basename(unique_filename)}",
+ )
+
+
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestJson:
  @pytest.mark.parametrize("lines", [False, True])
  def test_read_json(self, make_json_file, lines):
@@ -1838,6 +1869,7 @@ def test_read_json_metadata(self, make_json_file):
  assert parts_width_cached == parts_width_actual
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestExcel:
  @check_file_leaks
  def test_read_excel(self, make_excel_file):
@@ -2018,6 +2050,7 @@ def test_read_excel_empty_frame(self, make_excel_file):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestHdf:
  @pytest.mark.parametrize("format", [None, "table"])
  def test_read_hdf(self, make_hdf_file, format):
@@ -2072,6 +2105,7 @@ def test_HDFStore_in_read_hdf(self):
  df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestSql:
  @pytest.mark.parametrize("read_sql_engine", ["Pandas", "Connectorx"])
  def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
@@ -2246,6 +2280,7 @@ def test_to_sql(self, tmp_path, make_sql_connection, index, conn_type):
  assert df_modin_sql.sort_index().equals(df_pandas_sql.sort_index())
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestHtml:
  def test_read_html(self, make_html_file):
  eval_io(fn_name="read_html", io=make_html_file())
@@ -2262,6 +2297,7 @@ def test_to_html(self, tmp_path):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestFwf:
  def test_fwf_file(self, make_fwf_file):
  fwf_data = (
@@ -2489,6 +2525,7 @@ def test_read_fwf_s3(self, storage_options):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestGbq:
  @pytest.mark.skip(reason="Can not pass without GBQ access")
  def test_read_gbq(self):
@@ -2519,6 +2556,7 @@ def test_read_gbq_mock(self):
  read_gbq.assert_called_once_with(*test_args, **test_kwargs)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestStata:
  def test_read_stata(self, make_stata_file):
  eval_io(
@@ -2538,6 +2576,7 @@ def test_to_stata(self, tmp_path):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestSas:
  def test_read_sas(self):
  eval_io(
@@ -2547,6 +2586,7 @@ def test_read_sas(self):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestFeather:
  def test_read_feather(self, make_feather_file):
  eval_io(
@@ -2611,6 +2651,7 @@ def test_read_feather_with_index_metadata(self, tmp_path):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestClipboard:
  @pytest.mark.skip(reason="No clipboard in CI")
  def test_read_clipboard(self):
@@ -2631,6 +2672,7 @@ def test_to_clipboard(self):
  assert modin_as_clip.equals(pandas_as_clip)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestPickle:
  def test_read_pickle(self, make_pickle_file):
  eval_io(
@@ -2650,6 +2692,7 @@ def test_to_pickle(self, tmp_path):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestXml:
  def test_read_xml(self):
  # example from pandas
@@ -2669,6 +2712,7 @@ def test_read_xml(self):
  eval_io("read_xml", path_or_buffer=data)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestOrc:
  # It's not easy to add infrastructure for `orc` format.
  # In case of defaulting to pandas, it's enough
@@ -2687,6 +2731,7 @@ def test_read_orc(self):
  read_orc.assert_called_once_with(*test_args, **test_kwargs)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestSpss:
  # It's not easy to add infrastructure for `spss` format.
  # In case of defaulting to pandas, it's enough
@@ -2703,6 +2748,7 @@ def test_read_spss(self):
  read_spss.assert_called_once_with(*test_args, **test_kwargs)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_json_normalize():
  # example from pandas
  data = [
@@ -2713,12 +2759,14 @@ def test_json_normalize():
  eval_io("json_normalize", data=data)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_from_arrow():
  _, pandas_df = create_test_dfs(TEST_DATA)
  modin_df = from_arrow(pa.Table.from_pandas(pandas_df))
  df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_from_spmatrix():
  data = sparse.eye(3)
  with pytest.warns(UserWarning, match="defaulting to pandas.*"):
@@ -2727,12 +2775,14 @@ def test_from_spmatrix():
  df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_dense():
  data = {"col1": pandas.arrays.SparseArray([0, 1, 0])}
  modin_df, pandas_df = create_test_dfs(data)
  df_equals(modin_df.sparse.to_dense(), pandas_df.sparse.to_dense())
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_dict_dataframe():
  modin_df, _ = create_test_dfs(TEST_DATA)
  assert modin_df.to_dict() == to_pandas(modin_df).to_dict()
@@ -2747,6 +2797,7 @@ def test_to_dict_dataframe():
  pytest.param({"into": defaultdict(list)}, id="into_defaultdict"),
  ],
 )
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_dict_series(kwargs):
  eval_general(
  *[df.iloc[:, 0] for df in create_test_dfs(utils_test_data["int_data"])],
@@ -2757,11 +2808,13 @@ def test_to_dict_series(kwargs):
  )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_latex():
  modin_df, _ = create_test_dfs(TEST_DATA)
  assert modin_df.to_latex() == to_pandas(modin_df).to_latex()
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_period():
  index = pandas.DatetimeIndex(
  pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))