From 99f5ab54539c906924fda312985331c0e84777d5 Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Mon, 17 Apr 2023 11:01:31 +0530 Subject: [PATCH 1/7] GH - 624: Added dtype arg to reaf_sql --- pandas-stubs/io/sql.pyi | 2 ++ tests/test_io.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi index 589dc03ac..c8d1eb2b1 100644 --- a/pandas-stubs/io/sql.pyi +++ b/pandas-stubs/io/sql.pyi @@ -84,6 +84,7 @@ def read_sql( columns: list[str] = ..., *, chunksize: int, + dtype: DtypeArg | None = ..., ) -> Generator[DataFrame, None, None]: ... @overload def read_sql( @@ -95,6 +96,7 @@ def read_sql( parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ..., columns: list[str] = ..., chunksize: None = ..., + dtype: DtypeArg | None = ..., ) -> DataFrame: ... class PandasSQL(PandasObject): diff --git a/tests/test_io.py b/tests/test_io.py index e259b82ec..00ddbdbb1 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1204,3 +1204,25 @@ def test_sqlalchemy_text() -> None: assert_type(read_sql(sql_select, con=conn), DataFrame), DataFrame, ) + + +def test_read_sql_dtype() -> None: + with ensure_clean() as path: + conn = sqlite3.connect(path) + df = pd.DataFrame( + data=[[0, "10/11/12"], [1, "12/11/10"]], + columns=["int_column", "date_column"], + ) + check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) + check( + assert_type( + pd.read_sql( + "SELECT int_column, date_column FROM test_data", + con=conn, + dtype=None, + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + conn.close() From e30e6780a2f0dca92a09d7bdbe8d142e35ee0922 Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Fri, 21 Apr 2023 09:19:41 +0530 Subject: [PATCH 2/7] added dtype_backend and test --- pandas-stubs/io/sql.pyi | 2 ++ tests/test_io.py | 67 ++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi index c8d1eb2b1..71026c7f6 100644 --- a/pandas-stubs/io/sql.pyi +++ b/pandas-stubs/io/sql.pyi @@ -85,6 +85,7 @@ def read_sql( *, chunksize: int, dtype: DtypeArg | None = ..., + dtype_backend: None = ..., ) -> Generator[DataFrame, None, None]: ... @overload def read_sql( @@ -97,6 +98,7 @@ def read_sql( columns: list[str] = ..., chunksize: None = ..., dtype: DtypeArg | None = ..., + dtype_backend: None = ..., ) -> DataFrame: ... class PandasSQL(PandasObject): diff --git a/tests/test_io.py b/tests/test_io.py index 00ddbdbb1..365ea2b76 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1206,23 +1206,56 @@ def test_sqlalchemy_text() -> None: ) -def test_read_sql_dtype() -> None: +# def test_read_sql_dtype() -> None: +# with ensure_clean() as path: +# conn = sqlite3.connect(path) +# df = pd.DataFrame( +# data=[[0, "10/11/12"], [1, "12/11/10"]], +# columns=["int_column", "date_column"], +# ) +# check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) +# check( +# assert_type( +# pd.read_sql( +# "SELECT int_column, date_column FROM test_data", +# con=conn, +# dtype={"int_columb":float}, +# # dtype=None, +# ), +# pd.DataFrame, +# ), +# pd.DataFrame, +# ) +# conn.close() + + +# def test_read_sql_dtype1() -> None: +# with ensure_clean() as path: +# conn = sqlite3.connect(path) +# df = pd.DataFrame( +# data=[[0, "10/11/12"], [1, "12/11/10"]], +# columns=["int_column", "date_column"], +# ) +# check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) +# check( +# assert_type( +# pd.read_sql( +# "SELECT int_column, date_column FROM test_data", +# con=conn, +# dtype={"int_columb":float}, +# ), +# pd.DataFrame, +# ), +# pd.DataFrame, +# ) +# conn.close() + + +def test_read_sql_dtypes2() -> None: with ensure_clean() as path: - conn = sqlite3.connect(path) - df = pd.DataFrame( - data=[[0, "10/11/12"], [1, "12/11/10"]], - columns=["int_column", "date_column"], - ) - check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) + con = sqlite3.connect(path) + check(assert_type(DF.to_sql("test", con=con), Union[int, None]), int) check( - assert_type( - pd.read_sql( - "SELECT int_column, date_column FROM test_data", - con=conn, - dtype=None, - ), - pd.DataFrame, - ), - pd.DataFrame, + assert_type(read_sql("select * from test", con=con, dtype={"int_column": float}), DataFrame), DataFrame ) - conn.close() + con.close() \ No newline at end of file From ffd6cf281348622ad3a902ba0536530bf0ce4cda Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Fri, 21 Apr 2023 09:20:24 +0530 Subject: [PATCH 3/7] Update test_io.py --- tests/test_io.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_io.py b/tests/test_io.py index 365ea2b76..8b035780f 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1256,6 +1256,10 @@ def test_read_sql_dtypes2() -> None: con = sqlite3.connect(path) check(assert_type(DF.to_sql("test", con=con), Union[int, None]), int) check( - assert_type(read_sql("select * from test", con=con, dtype={"int_column": float}), DataFrame), DataFrame + assert_type( + read_sql("select * from test", con=con, dtype={"int_column": float}), + DataFrame, + ), + DataFrame, ) - con.close() \ No newline at end of file + con.close() From 1261519f6cbb8a1c2469ddaf59a8bf57f2cba5aa Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Fri, 21 Apr 2023 09:52:49 +0530 Subject: [PATCH 4/7] updated the tests --- tests/test_io.py | 76 +++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 49 deletions(-) diff --git a/tests/test_io.py b/tests/test_io.py index 8b035780f..e50b732bd 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1206,60 +1206,38 @@ def test_sqlalchemy_text() -> None: ) -# def test_read_sql_dtype() -> None: -# with ensure_clean() as path: -# conn = sqlite3.connect(path) -# df = pd.DataFrame( -# data=[[0, "10/11/12"], [1, "12/11/10"]], -# columns=["int_column", "date_column"], -# ) -# check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) -# check( -# assert_type( -# pd.read_sql( -# "SELECT int_column, date_column FROM test_data", -# con=conn, -# dtype={"int_columb":float}, -# # dtype=None, -# ), -# pd.DataFrame, -# ), -# pd.DataFrame, -# ) -# conn.close() - - -# def test_read_sql_dtype1() -> None: -# with ensure_clean() as path: -# conn = sqlite3.connect(path) -# df = pd.DataFrame( -# data=[[0, "10/11/12"], [1, "12/11/10"]], -# columns=["int_column", "date_column"], -# ) -# check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) -# check( -# assert_type( -# pd.read_sql( -# "SELECT int_column, date_column FROM test_data", -# con=conn, -# dtype={"int_columb":float}, -# ), -# pd.DataFrame, -# ), -# pd.DataFrame, -# ) -# conn.close() +def test_read_sql_dtype() -> None: + with ensure_clean() as path: + conn = sqlite3.connect(path) + df = pd.DataFrame( + data=[[0, "10/11/12"], [1, "12/11/10"]], + columns=["int_column", "date_column"], + ) + check(assert_type(df.to_sql("test_data", con=conn), Union[int, None]), int) + check( + assert_type( + pd.read_sql( + "SELECT int_column, date_column FROM test_data", + con=conn, + dtype=None, + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + conn.close() def test_read_sql_dtypes2() -> None: with ensure_clean() as path: - con = sqlite3.connect(path) - check(assert_type(DF.to_sql("test", con=con), Union[int, None]), int) + conn1 = sqlite3.connect(path) + check(assert_type(DF.to_sql("test", con=conn1), Union[int, None]), int) + check( assert_type( - read_sql("select * from test", con=con, dtype={"int_column": float}), - DataFrame, + read_sql("select * from test", con=conn1, dtype=int), + pd.DataFrame, ), - DataFrame, + pd.DataFrame, ) - con.close() + conn1.close() From 517df4bf5c46fc0418b0ef335d5e499d05e3f65c Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Fri, 21 Apr 2023 10:08:47 +0530 Subject: [PATCH 5/7] Update test_io.py --- tests/test_io.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_io.py b/tests/test_io.py index e50b732bd..5fe9a6bc6 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1228,6 +1228,28 @@ def test_read_sql_dtype() -> None: conn.close() +def test_read_sql_dtype1() -> None: + with ensure_clean() as path: + conn2 = sqlite3.connect(path) + df = pd.DataFrame( + data=[[0, "10/11/12"], [1, "12/11/10"]], + columns=["int_column", "date_column"], + ) + check(assert_type(df.to_sql("test_data", con=conn2), Union[int, None]), int) + check( + assert_type( + pd.read_sql( + "SELECT int_column, date_column FROM test_data", + con=conn2, + dtype={"int_column": int}, + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + conn2.close() + + def test_read_sql_dtypes2() -> None: with ensure_clean() as path: conn1 = sqlite3.connect(path) From feae3a2902dab400fded3b159a5a393811c5c1f6 Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Fri, 21 Apr 2023 19:46:42 +0530 Subject: [PATCH 6/7] corrected the 'dtype_backend; and added tests for it --- pandas-stubs/_typing.pyi | 1 + pandas-stubs/io/sql.pyi | 6 ++++-- tests/test_io.py | 38 +++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 0fb8eed2a..6bdb6f91e 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -77,6 +77,7 @@ class FulldatetimeDict(YearMonthDayDict, total=False): NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | object] Dtype: TypeAlias = ExtensionDtype | NpDtype DtypeArg: TypeAlias = Dtype | dict[Any, Dtype] +DtypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"] BooleanDtypeArg: TypeAlias = ( # Builtin bool type and its string alias type[bool] # noqa: Y030 diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi index 71026c7f6..f26f9bece 100644 --- a/pandas-stubs/io/sql.pyi +++ b/pandas-stubs/io/sql.pyi @@ -16,8 +16,10 @@ import sqlalchemy.engine import sqlalchemy.sql.expression from typing_extensions import TypeAlias +from pandas._libs.lib import NoDefault from pandas._typing import ( DtypeArg, + DtypeBackend, npt, ) @@ -85,7 +87,7 @@ def read_sql( *, chunksize: int, dtype: DtypeArg | None = ..., - dtype_backend: None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> Generator[DataFrame, None, None]: ... @overload def read_sql( @@ -98,7 +100,7 @@ def read_sql( columns: list[str] = ..., chunksize: None = ..., dtype: DtypeArg | None = ..., - dtype_backend: None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... class PandasSQL(PandasObject): diff --git a/tests/test_io.py b/tests/test_io.py index 5fe9a6bc6..5ce613dc6 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1225,29 +1225,18 @@ def test_read_sql_dtype() -> None: ), pd.DataFrame, ) - conn.close() - - -def test_read_sql_dtype1() -> None: - with ensure_clean() as path: - conn2 = sqlite3.connect(path) - df = pd.DataFrame( - data=[[0, "10/11/12"], [1, "12/11/10"]], - columns=["int_column", "date_column"], - ) - check(assert_type(df.to_sql("test_data", con=conn2), Union[int, None]), int) check( assert_type( pd.read_sql( "SELECT int_column, date_column FROM test_data", - con=conn2, + con=conn, dtype={"int_column": int}, ), pd.DataFrame, ), pd.DataFrame, ) - conn2.close() + conn.close() def test_read_sql_dtypes2() -> None: @@ -1263,3 +1252,26 @@ def test_read_sql_dtypes2() -> None: pd.DataFrame, ) conn1.close() + + +def test_read_sql_dtype_backend() -> None: + with ensure_clean() as path: + conn2 = sqlite3.connect(path) + check(assert_type(DF.to_sql("test", con=conn2), Union[int, None]), int) + check( + assert_type( + read_sql("select * from test", con=conn2, dtype_backend="pyarrow"), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + read_sql( + "select * from test", con=conn2, dtype_backend="numpy_nullable" + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + conn2.close() From ada2c4f8290dd3ce973a6e57da5680e114dc29b7 Mon Sep 17 00:00:00 2001 From: ramvikrams Date: Fri, 21 Apr 2023 19:48:21 +0530 Subject: [PATCH 7/7] Update test_io.py --- tests/test_io.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/test_io.py b/tests/test_io.py index 5ce613dc6..24ff74262 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1236,22 +1236,16 @@ def test_read_sql_dtype() -> None: ), pd.DataFrame, ) - conn.close() - - -def test_read_sql_dtypes2() -> None: - with ensure_clean() as path: - conn1 = sqlite3.connect(path) - check(assert_type(DF.to_sql("test", con=conn1), Union[int, None]), int) + check(assert_type(DF.to_sql("test", con=conn), Union[int, None]), int) check( assert_type( - read_sql("select * from test", con=conn1, dtype=int), + read_sql("select * from test", con=conn, dtype=int), pd.DataFrame, ), pd.DataFrame, ) - conn1.close() + conn.close() def test_read_sql_dtype_backend() -> None: