From 1ee1b135efa3330e7dba4fa5fffa534f1491f7f4 Mon Sep 17 00:00:00 2001 From: Kian Eliasi Date: Sat, 27 Sep 2025 10:38:03 +0330 Subject: [PATCH 1/3] Replace 'ensure_clean' with 'temp_file' in some tests --- .../tests/io/parser/common/test_chunksize.py | 34 +++++------ .../tests/io/parser/common/test_iterator.py | 22 ++++---- pandas/tests/io/parser/test_index_col.py | 20 +++---- .../io/parser/test_python_parser_only.py | 14 +++-- pandas/tests/io/pytables/test_round_trip.py | 44 +++++++-------- pandas/tests/io/test_sql.py | 56 +++++++++---------- 6 files changed, 96 insertions(+), 94 deletions(-) diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 75ec96409bdd0..51107ff15bf1d 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -295,29 +295,29 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator): tm.assert_frame_equal(result, expected) -def test_read_csv_memory_growth_chunksize(all_parsers): +def test_read_csv_memory_growth_chunksize(temp_file, all_parsers): # see gh-24805 # # Let's just make sure that we don't crash # as we iteratively process all chunks. parser = all_parsers - with tm.ensure_clean() as path: - with open(path, "w", encoding="utf-8") as f: - for i in range(1000): - f.write(str(i) + "\n") - - if parser.engine == "pyarrow": - msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - with parser.read_csv(path, chunksize=20) as result: - for _ in result: - pass - return - - with parser.read_csv(path, chunksize=20) as result: - for _ in result: - pass + path = str(temp_file) + with open(path, "w", encoding="utf-8") as f: + for i in range(1000): + f.write(str(i) + "\n") + + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + with parser.read_csv(path, chunksize=20) as result: + for _ in result: + pass + return + + with parser.read_csv(path, chunksize=20) as result: + for _ in result: + pass def test_chunksize_with_usecols_second_block_shorter(all_parsers): diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 668aab05b9fa4..625d867832b2e 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -142,19 +142,19 @@ def test_iterator_skipfooter_errors(all_parsers, kwargs): pass -def test_iteration_open_handle(all_parsers): +def test_iteration_open_handle(temp_file, all_parsers): parser = all_parsers kwargs = {"header": None} - with tm.ensure_clean() as path: - with open(path, "w", encoding="utf-8") as f: - f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") + path = str(temp_file) + with open(path, "w", encoding="utf-8") as f: + f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") - with open(path, encoding="utf-8") as f: - for line in f: - if "CCC" in line: - break + with open(path, encoding="utf-8") as f: + for line in f: + if "CCC" in line: + break - result = parser.read_csv(f, **kwargs) - expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]}) - tm.assert_frame_equal(result, expected) + result = parser.read_csv(f, **kwargs) + expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 9977e2b8e1a1d..d760d4d34a78c 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -200,7 +200,7 @@ def test_multi_index_naming_not_all_at_beginning(all_parsers): @xfail_pyarrow # ValueError: Found non-unique column index -def test_no_multi_index_level_names_empty(all_parsers): +def test_no_multi_index_level_names_empty(temp_file, all_parsers): # GH 10984 parser = all_parsers midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) @@ -209,9 +209,9 @@ def test_no_multi_index_level_names_empty(all_parsers): index=midx, columns=["x", "y", "z"], ) - with tm.ensure_clean() as path: - expected.to_csv(path) - result = parser.read_csv(path, index_col=[0, 1, 2]) + path = str(temp_file) + expected.to_csv(path) + result = parser.read_csv(path, index_col=[0, 1, 2]) tm.assert_frame_equal(result, expected) @@ -240,7 +240,7 @@ def test_header_with_index_col(all_parsers): @pytest.mark.slow -def test_index_col_large_csv(all_parsers, monkeypatch): +def test_index_col_large_csv(temp_file, all_parsers, monkeypatch): # https://github.com/pandas-dev/pandas/issues/37094 parser = all_parsers @@ -252,11 +252,11 @@ def test_index_col_large_csv(all_parsers, monkeypatch): } ) - with tm.ensure_clean() as path: - df.to_csv(path, index=False) - with monkeypatch.context() as m: - m.setattr("pandas.core.algorithms._MINIMUM_COMP_ARR_LEN", ARR_LEN) - result = parser.read_csv(path, index_col=[0]) + path = str(temp_file) + df.to_csv(path, index=False) + with monkeypatch.context() as m: + m.setattr("pandas.core.algorithms._MINIMUM_COMP_ARR_LEN", ARR_LEN) + result = parser.read_csv(path, index_col=[0]) tm.assert_frame_equal(result, df.set_index("a")) diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index a5bb151e84f47..89f7521035c27 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -158,7 +158,9 @@ def test_skipfooter(python_parser_only, kwargs): @pytest.mark.parametrize( "compression,klass", [("gzip", "GzipFile"), ("bz2", "BZ2File")] ) -def test_decompression_regex_sep(python_parser_only, csv1, compression, klass): +def test_decompression_regex_sep( + temp_file, python_parser_only, csv1, compression, klass +): # see gh-6607 parser = python_parser_only @@ -171,12 +173,12 @@ def test_decompression_regex_sep(python_parser_only, csv1, compression, klass): module = pytest.importorskip(compression) klass = getattr(module, klass) - with tm.ensure_clean() as path: - with klass(path, mode="wb") as tmp: - tmp.write(data) + path = str(temp_file) + with klass(path, mode="wb") as tmp: + tmp.write(data) - result = parser.read_csv(path, sep="::", compression=compression) - tm.assert_frame_equal(result, expected) + result = parser.read_csv(path, sep="::", compression=compression) + tm.assert_frame_equal(result, expected) def test_read_csv_buglet_4x_multi_index(python_parser_only): diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 409b92d2ddde1..f3fc063b07514 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -27,33 +27,33 @@ pytestmark = [pytest.mark.single_cpu] -def test_conv_read_write(): - with tm.ensure_clean() as path: +def test_conv_read_write(temp_file): + path = str(temp_file) - def roundtrip(key, obj, **kwargs): - obj.to_hdf(path, key=key, **kwargs) - return read_hdf(path, key) + def roundtrip(key, obj, **kwargs): + obj.to_hdf(path, key=key, **kwargs) + return read_hdf(path, key) - o = Series( - np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) - ) - tm.assert_series_equal(o, roundtrip("series", o)) + o = Series( + np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) + ) + tm.assert_series_equal(o, roundtrip("series", o)) - o = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)]) - tm.assert_series_equal(o, roundtrip("string_series", o)) + o = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)]) + tm.assert_series_equal(o, roundtrip("string_series", o)) - o = DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=Index(list("ABCD")), - index=Index([f"i-{i}" for i in range(30)]), - ) - tm.assert_frame_equal(o, roundtrip("frame", o)) + o = DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=Index(list("ABCD")), + index=Index([f"i-{i}" for i in range(30)]), + ) + tm.assert_frame_equal(o, roundtrip("frame", o)) - # table - df = DataFrame({"A": range(5), "B": range(5)}) - df.to_hdf(path, key="table", append=True) - result = read_hdf(path, "table", where=["index>2"]) - tm.assert_frame_equal(df[df.index > 2], result) + # table + df = DataFrame({"A": range(5), "B": range(5)}) + df.to_hdf(path, key="table", append=True) + result = read_hdf(path, "table", where=["index>2"]) + tm.assert_frame_equal(df[df.index > 2], result) def test_long_strings(setup_path): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1b9ae5d8e7209..e1f11c4a4a09d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -748,10 +748,10 @@ def postgresql_psycopg2_conn_types(postgresql_psycopg2_engine_types): @pytest.fixture -def sqlite_str(): +def sqlite_str(temp_file): pytest.importorskip("sqlalchemy") - with tm.ensure_clean() as name: - yield f"sqlite:///{name}" + name = str(temp_file) + yield f"sqlite:///{name}" @pytest.fixture @@ -817,20 +817,20 @@ def sqlite_conn_types(sqlite_engine_types): @pytest.fixture -def sqlite_adbc_conn(): +def sqlite_adbc_conn(temp_file): pytest.importorskip("pyarrow") pytest.importorskip("adbc_driver_sqlite") from adbc_driver_sqlite import dbapi - with tm.ensure_clean() as name: - uri = f"file:{name}" - with dbapi.connect(uri) as conn: - yield conn - for view in get_all_views(conn): - drop_view(view, conn) - for tbl in get_all_tables(conn): - drop_table(tbl, conn) - conn.commit() + name = str(temp_file) + uri = f"file:{name}" + with dbapi.connect(uri) as conn: + yield conn + for view in get_all_views(conn): + drop_view(view, conn) + for tbl in get_all_tables(conn): + drop_table(tbl, conn) + conn.commit() @pytest.fixture @@ -2504,20 +2504,20 @@ def test_sqlalchemy_integer_overload_mapping(conn, request, integer): sql.SQLTable("test_type", db, frame=df) -def test_database_uri_string(request, test_frame1): +def test_database_uri_string(temp_file, request, test_frame1): pytest.importorskip("sqlalchemy") # Test read_sql and .to_sql method with a database URI (GH10654) # db_uri = 'sqlite:///:memory:' # raises # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near # "iris": syntax error [SQL: 'iris'] - with tm.ensure_clean() as name: - db_uri = "sqlite:///" + name - table = "iris" - test_frame1.to_sql(name=table, con=db_uri, if_exists="replace", index=False) - test_frame2 = sql.read_sql(table, db_uri) - test_frame3 = sql.read_sql_table(table, db_uri) - query = "SELECT * FROM iris" - test_frame4 = sql.read_sql_query(query, db_uri) + name = str(temp_file) + db_uri = "sqlite:///" + name + table = "iris" + test_frame1.to_sql(name=table, con=db_uri, if_exists="replace", index=False) + test_frame2 = sql.read_sql(table, db_uri) + test_frame3 = sql.read_sql_table(table, db_uri) + query = "SELECT * FROM iris" + test_frame4 = sql.read_sql_query(query, db_uri) tm.assert_frame_equal(test_frame1, test_frame2) tm.assert_frame_equal(test_frame1, test_frame3) tm.assert_frame_equal(test_frame1, test_frame4) @@ -2581,16 +2581,16 @@ def test_column_with_percentage(conn, request): tm.assert_frame_equal(res, df) -def test_sql_open_close(test_frame3): +def test_sql_open_close(temp_file, test_frame3): # Test if the IO in the database still work if the connection closed # between the writing and reading (as in many real situations). - with tm.ensure_clean() as name: - with contextlib.closing(sqlite3.connect(name)) as conn: - assert sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) == 4 + name = str(temp_file) + with contextlib.closing(sqlite3.connect(name)) as conn: + assert sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) == 4 - with contextlib.closing(sqlite3.connect(name)) as conn: - result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;", conn) + with contextlib.closing(sqlite3.connect(name)) as conn: + result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;", conn) tm.assert_frame_equal(test_frame3, result) From 17d3c68f7e274cf78a6d7fc65330dd0ebf811b20 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 06:47:20 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e1f11c4a4a09d..2e5fc2194ec39 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -751,7 +751,7 @@ def postgresql_psycopg2_conn_types(postgresql_psycopg2_engine_types): def sqlite_str(temp_file): pytest.importorskip("sqlalchemy") name = str(temp_file) - yield f"sqlite:///{name}" + return f"sqlite:///{name}" @pytest.fixture From a848d2ba218ce4315610177ee0c8a96f52e03d80 Mon Sep 17 00:00:00 2001 From: Kian Eliasi Date: Tue, 7 Oct 2025 21:49:43 +0330 Subject: [PATCH 3/3] Use temp_file directly instead of converting to str --- pandas/tests/io/parser/common/test_chunksize.py | 7 +++---- pandas/tests/io/parser/common/test_iterator.py | 5 ++--- pandas/tests/io/parser/test_index_col.py | 10 ++++------ pandas/tests/io/parser/test_python_parser_only.py | 5 ++--- pandas/tests/io/pytables/test_round_trip.py | 10 ++++------ pandas/tests/io/test_sql.py | 11 ++++------- 6 files changed, 19 insertions(+), 29 deletions(-) diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 51107ff15bf1d..3c9e7c80f9db0 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -302,20 +302,19 @@ def test_read_csv_memory_growth_chunksize(temp_file, all_parsers): # as we iteratively process all chunks. parser = all_parsers - path = str(temp_file) - with open(path, "w", encoding="utf-8") as f: + with open(temp_file, "w", encoding="utf-8") as f: for i in range(1000): f.write(str(i) + "\n") if parser.engine == "pyarrow": msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" with pytest.raises(ValueError, match=msg): - with parser.read_csv(path, chunksize=20) as result: + with parser.read_csv(temp_file, chunksize=20) as result: for _ in result: pass return - with parser.read_csv(path, chunksize=20) as result: + with parser.read_csv(temp_file, chunksize=20) as result: for _ in result: pass diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 625d867832b2e..5841fb7ad9594 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -146,11 +146,10 @@ def test_iteration_open_handle(temp_file, all_parsers): parser = all_parsers kwargs = {"header": None} - path = str(temp_file) - with open(path, "w", encoding="utf-8") as f: + with open(temp_file, "w", encoding="utf-8") as f: f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") - with open(path, encoding="utf-8") as f: + with open(temp_file, encoding="utf-8") as f: for line in f: if "CCC" in line: break diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index d760d4d34a78c..c5b43484f7615 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -209,9 +209,8 @@ def test_no_multi_index_level_names_empty(temp_file, all_parsers): index=midx, columns=["x", "y", "z"], ) - path = str(temp_file) - expected.to_csv(path) - result = parser.read_csv(path, index_col=[0, 1, 2]) + expected.to_csv(temp_file) + result = parser.read_csv(temp_file, index_col=[0, 1, 2]) tm.assert_frame_equal(result, expected) @@ -252,11 +251,10 @@ def test_index_col_large_csv(temp_file, all_parsers, monkeypatch): } ) - path = str(temp_file) - df.to_csv(path, index=False) + df.to_csv(temp_file, index=False) with monkeypatch.context() as m: m.setattr("pandas.core.algorithms._MINIMUM_COMP_ARR_LEN", ARR_LEN) - result = parser.read_csv(path, index_col=[0]) + result = parser.read_csv(temp_file, index_col=[0]) tm.assert_frame_equal(result, df.set_index("a")) diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 89f7521035c27..b25d879144e4c 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -173,11 +173,10 @@ def test_decompression_regex_sep( module = pytest.importorskip(compression) klass = getattr(module, klass) - path = str(temp_file) - with klass(path, mode="wb") as tmp: + with klass(temp_file, mode="wb") as tmp: tmp.write(data) - result = parser.read_csv(path, sep="::", compression=compression) + result = parser.read_csv(temp_file, sep="::", compression=compression) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index f3fc063b07514..37e3e9d4f9db2 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -28,11 +28,9 @@ def test_conv_read_write(temp_file): - path = str(temp_file) - def roundtrip(key, obj, **kwargs): - obj.to_hdf(path, key=key, **kwargs) - return read_hdf(path, key) + obj.to_hdf(temp_file, key=key, **kwargs) + return read_hdf(temp_file, key) o = Series( np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) @@ -51,8 +49,8 @@ def roundtrip(key, obj, **kwargs): # table df = DataFrame({"A": range(5), "B": range(5)}) - df.to_hdf(path, key="table", append=True) - result = read_hdf(path, "table", where=["index>2"]) + df.to_hdf(temp_file, key="table", append=True) + result = read_hdf(temp_file, "table", where=["index>2"]) tm.assert_frame_equal(df[df.index > 2], result) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2e5fc2194ec39..5865c46b4031e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -750,8 +750,7 @@ def postgresql_psycopg2_conn_types(postgresql_psycopg2_engine_types): @pytest.fixture def sqlite_str(temp_file): pytest.importorskip("sqlalchemy") - name = str(temp_file) - return f"sqlite:///{name}" + return f"sqlite:///{temp_file}" @pytest.fixture @@ -822,8 +821,7 @@ def sqlite_adbc_conn(temp_file): pytest.importorskip("adbc_driver_sqlite") from adbc_driver_sqlite import dbapi - name = str(temp_file) - uri = f"file:{name}" + uri = f"file:{temp_file}" with dbapi.connect(uri) as conn: yield conn for view in get_all_views(conn): @@ -2585,11 +2583,10 @@ def test_sql_open_close(temp_file, test_frame3): # Test if the IO in the database still work if the connection closed # between the writing and reading (as in many real situations). - name = str(temp_file) - with contextlib.closing(sqlite3.connect(name)) as conn: + with contextlib.closing(sqlite3.connect(temp_file)) as conn: assert sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) == 4 - with contextlib.closing(sqlite3.connect(name)) as conn: + with contextlib.closing(sqlite3.connect(temp_file)) as conn: result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;", conn) tm.assert_frame_equal(test_frame3, result)