Skip to content

Commit

Permalink
Adapt tests from modin-project#4724
Browse files Browse the repository at this point in the history
Co-authored-by: mvashishtha <mahesh@ponder.io>
Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
  • Loading branch information
vnlitvinov and mvashishtha committed Aug 4, 2023
1 parent 665d6d0 commit 1d0b19a
Showing 1 changed file with 60 additions and 7 deletions.
67 changes: 60 additions & 7 deletions modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,6 @@

from modin.config import NPartitions

# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, but some test modules, like this one,
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)

NPartitions.put(4)

DATASET_SIZE_DICT = {
Expand Down Expand Up @@ -269,6 +262,7 @@ def _make_parquet_dir(
IsExperimental.get() and StorageFormat.get() == "Pyarrow",
reason="Segmentation fault; see PR #2347 ffor details",
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestCsv:
# delimiter tests
@pytest.mark.parametrize("sep", [None, "_", ",", ".", "\n"])
Expand Down Expand Up @@ -1307,6 +1301,24 @@ def test_read_csv_1930(self, usecols):
)


# Leave this test apart from the test classes, which skip the default to pandas
# warning check. We want to make sure we are NOT defaulting to pandas for a
# path relative to user home.
# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
# commment once we turn all default to pandas messages into errors.
def test_read_csv_relative_to_user_home(make_csv_file):
with ensure_clean(".csv") as unique_filename:
make_csv_file(filename=unique_filename)

with mock.patch.dict(os.environ, {"HOME": os.path.dirname(unique_filename)}):
with warns_that_defaulting_to_pandas() if Engine.get() == "Python" else _nullcontext():
eval_io(
fn_name="read_csv",
filepath_or_buffer=f"~/{os.path.basename(unique_filename)}",
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestTable:
def test_read_table(self, make_csv_file):
with ensure_clean() as unique_filename:
Expand Down Expand Up @@ -1358,6 +1370,7 @@ def test_read_table_empty_frame(self, make_csv_file):


@pytest.mark.parametrize("engine", ["pyarrow", "fastparquet"])
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestParquet:
@pytest.mark.parametrize("columns", [None, ["col1"]])
@pytest.mark.parametrize("row_group_size", [None, 100, 1000, 10_000])
Expand Down Expand Up @@ -1729,6 +1742,24 @@ def test_read_parquet_s3_with_column_partitioning(self, engine):
)


# Leave this test apart from the test classes, which skip the default to pandas
# warning check. We want to make sure we are NOT defaulting to pandas for a
# path relative to user home.
# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
# commment once we turn all default to pandas messages into errors.
def test_read_parquet_relative_to_user_home(make_parquet_file):
with ensure_clean(".parquet") as unique_filename:
make_parquet_file(filename=unique_filename)

with mock.patch.dict(os.environ, {"HOME": os.path.dirname(unique_filename)}):
with warns_that_defaulting_to_pandas() if Engine.get() == "Python" else _nullcontext():
eval_io(
fn_name="read_parquet",
path=f"~/{os.path.basename(unique_filename)}",
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestJson:
@pytest.mark.parametrize("lines", [False, True])
def test_read_json(self, make_json_file, lines):
Expand Down Expand Up @@ -1838,6 +1869,7 @@ def test_read_json_metadata(self, make_json_file):
assert parts_width_cached == parts_width_actual


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestExcel:
@check_file_leaks
def test_read_excel(self, make_excel_file):
Expand Down Expand Up @@ -2018,6 +2050,7 @@ def test_read_excel_empty_frame(self, make_excel_file):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestHdf:
@pytest.mark.parametrize("format", [None, "table"])
def test_read_hdf(self, make_hdf_file, format):
Expand Down Expand Up @@ -2072,6 +2105,7 @@ def test_HDFStore_in_read_hdf(self):
df_equals(modin_df, pandas_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestSql:
@pytest.mark.parametrize("read_sql_engine", ["Pandas", "Connectorx"])
def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
Expand Down Expand Up @@ -2246,6 +2280,7 @@ def test_to_sql(self, tmp_path, make_sql_connection, index, conn_type):
assert df_modin_sql.sort_index().equals(df_pandas_sql.sort_index())


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestHtml:
def test_read_html(self, make_html_file):
eval_io(fn_name="read_html", io=make_html_file())
Expand All @@ -2262,6 +2297,7 @@ def test_to_html(self, tmp_path):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestFwf:
def test_fwf_file(self, make_fwf_file):
fwf_data = (
Expand Down Expand Up @@ -2489,6 +2525,7 @@ def test_read_fwf_s3(self, storage_options):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestGbq:
@pytest.mark.skip(reason="Can not pass without GBQ access")
def test_read_gbq(self):
Expand Down Expand Up @@ -2519,6 +2556,7 @@ def test_read_gbq_mock(self):
read_gbq.assert_called_once_with(*test_args, **test_kwargs)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestStata:
def test_read_stata(self, make_stata_file):
eval_io(
Expand All @@ -2538,6 +2576,7 @@ def test_to_stata(self, tmp_path):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestSas:
def test_read_sas(self):
eval_io(
Expand All @@ -2547,6 +2586,7 @@ def test_read_sas(self):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestFeather:
def test_read_feather(self, make_feather_file):
eval_io(
Expand Down Expand Up @@ -2611,6 +2651,7 @@ def test_read_feather_with_index_metadata(self, tmp_path):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestClipboard:
@pytest.mark.skip(reason="No clipboard in CI")
def test_read_clipboard(self):
Expand All @@ -2631,6 +2672,7 @@ def test_to_clipboard(self):
assert modin_as_clip.equals(pandas_as_clip)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestPickle:
def test_read_pickle(self, make_pickle_file):
eval_io(
Expand All @@ -2650,6 +2692,7 @@ def test_to_pickle(self, tmp_path):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestXml:
def test_read_xml(self):
# example from pandas
Expand All @@ -2669,6 +2712,7 @@ def test_read_xml(self):
eval_io("read_xml", path_or_buffer=data)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestOrc:
# It's not easy to add infrastructure for `orc` format.
# In case of defaulting to pandas, it's enough
Expand All @@ -2687,6 +2731,7 @@ def test_read_orc(self):
read_orc.assert_called_once_with(*test_args, **test_kwargs)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
class TestSpss:
# It's not easy to add infrastructure for `spss` format.
# In case of defaulting to pandas, it's enough
Expand All @@ -2703,6 +2748,7 @@ def test_read_spss(self):
read_spss.assert_called_once_with(*test_args, **test_kwargs)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_json_normalize():
# example from pandas
data = [
Expand All @@ -2713,12 +2759,14 @@ def test_json_normalize():
eval_io("json_normalize", data=data)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_arrow():
_, pandas_df = create_test_dfs(TEST_DATA)
modin_df = from_arrow(pa.Table.from_pandas(pandas_df))
df_equals(modin_df, pandas_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_from_spmatrix():
data = sparse.eye(3)
with pytest.warns(UserWarning, match="defaulting to pandas.*"):
Expand All @@ -2727,12 +2775,14 @@ def test_from_spmatrix():
df_equals(modin_df, pandas_df)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_dense():
data = {"col1": pandas.arrays.SparseArray([0, 1, 0])}
modin_df, pandas_df = create_test_dfs(data)
df_equals(modin_df.sparse.to_dense(), pandas_df.sparse.to_dense())


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_dict_dataframe():
modin_df, _ = create_test_dfs(TEST_DATA)
assert modin_df.to_dict() == to_pandas(modin_df).to_dict()
Expand All @@ -2747,6 +2797,7 @@ def test_to_dict_dataframe():
pytest.param({"into": defaultdict(list)}, id="into_defaultdict"),
],
)
@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_dict_series(kwargs):
eval_general(
*[df.iloc[:, 0] for df in create_test_dfs(utils_test_data["int_data"])],
Expand All @@ -2757,11 +2808,13 @@ def test_to_dict_series(kwargs):
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_latex():
modin_df, _ = create_test_dfs(TEST_DATA)
assert modin_df.to_latex() == to_pandas(modin_df).to_latex()


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
def test_to_period():
index = pandas.DatetimeIndex(
pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))
Expand Down

0 comments on commit 1d0b19a

Please sign in to comment.