From ad48fae9eb5c8c93dafcce6edbd02634c3922b6c Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Wed, 6 Nov 2024 09:25:15 -0600 Subject: [PATCH 1/8] adds test --- src/tests/integration/test_duckDB.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/tests/integration/test_duckDB.py b/src/tests/integration/test_duckDB.py index d0c460e25..a3cf152d6 100644 --- a/src/tests/integration/test_duckDB.py +++ b/src/tests/integration/test_duckDB.py @@ -235,3 +235,10 @@ def test_commits_all_statements(ip, sql, request): out = ip.run_cell(sql) assert out.error_in_exec is None assert out.result.dict() == {"x": (1, 2)} + + +def test_can_query_existing_df(ip_with_duckdb_sqlalchemy_empty): + df = pd.DataFrame({"city": ["NYC"]}) + ip_with_duckdb_sqlalchemy_empty.run_cell("%sql SET python_scan_all_frames=true") + out = ip_with_duckdb_sqlalchemy_empty.run_cell("%sql SELECT * FROM df;") + assert out.result.dict() == {"city": ("NYC",)} From e8e79835a310762277bdd29dbfeb52323d09183b Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Wed, 6 Nov 2024 09:26:30 -0600 Subject: [PATCH 2/8] lint --- src/tests/integration/test_duckDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/integration/test_duckDB.py b/src/tests/integration/test_duckDB.py index a3cf152d6..c17683aa6 100644 --- a/src/tests/integration/test_duckDB.py +++ b/src/tests/integration/test_duckDB.py @@ -238,7 +238,7 @@ def test_commits_all_statements(ip, sql, request): def test_can_query_existing_df(ip_with_duckdb_sqlalchemy_empty): - df = pd.DataFrame({"city": ["NYC"]}) + df = pd.DataFrame({"city": ["NYC"]}) # noqa ip_with_duckdb_sqlalchemy_empty.run_cell("%sql SET python_scan_all_frames=true") out = ip_with_duckdb_sqlalchemy_empty.run_cell("%sql SELECT * FROM df;") assert out.result.dict() == {"city": ("NYC",)} From 478b75c52999ec029a3e1585b9279323b274b02d Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 7 Nov 2024 09:31:05 -0600 Subject: [PATCH 3/8] fix --- src/sql/connection/connection.py | 8 ++++++-- src/tests/integration/test_duckDB.py | 14 +++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/sql/connection/connection.py b/src/sql/connection/connection.py index c684427a4..0c3ff604f 100644 --- a/src/sql/connection/connection.py +++ b/src/sql/connection/connection.py @@ -991,9 +991,9 @@ class DBAPIConnection(AbstractConnection): def __init__(self, connection, alias=None, config=None): # detect if the engine is a native duckdb connection - _is_duckdb_native = _check_if_duckdb_dbapi_connection(connection) + self._is_duckdb_native = _check_if_duckdb_dbapi_connection(connection) - self._dialect = "duckdb" if _is_duckdb_native else None + self._dialect = "duckdb" if self._is_duckdb_native else None self._driver = None # TODO: implement the dialect blacklist and add unit tests @@ -1038,6 +1038,10 @@ def raw_execute(self, query, parameters=None, with_=None): query = self._resolve_cte(query, with_) cur = self._connection.cursor() + + if self._is_duckdb_native: + cur.execute("SET python_scan_all_frames=true") + cur.execute(query) if self._requires_manual_commit: diff --git a/src/tests/integration/test_duckDB.py b/src/tests/integration/test_duckDB.py index c17683aa6..4a0027212 100644 --- a/src/tests/integration/test_duckDB.py +++ b/src/tests/integration/test_duckDB.py @@ -237,8 +237,16 @@ def test_commits_all_statements(ip, sql, request): assert out.result.dict() == {"x": (1, 2)} -def test_can_query_existing_df(ip_with_duckdb_sqlalchemy_empty): +@pytest.mark.parametrize( + "ip", + [ + "ip_with_duckdb_native_empty", + "ip_with_duckdb_sqlalchemy_empty", + ], +) +def test_can_query_existing_df(ip, request): + ip = request.getfixturevalue(ip) df = pd.DataFrame({"city": ["NYC"]}) # noqa - ip_with_duckdb_sqlalchemy_empty.run_cell("%sql SET python_scan_all_frames=true") - out = ip_with_duckdb_sqlalchemy_empty.run_cell("%sql SELECT * FROM df;") + ip.run_cell("%sql SET python_scan_all_frames=true") + out = ip.run_cell("%sql SELECT * FROM df;") assert out.result.dict() == {"city": ("NYC",)} From f86884f9244f3813fe6b134b96345e95d009ac38 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 7 Nov 2024 09:34:06 -0600 Subject: [PATCH 4/8] adds note --- doc/integrations/duckdb.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/integrations/duckdb.md b/doc/integrations/duckdb.md index fe45d0b05..6e09d9c95 100644 --- a/doc/integrations/duckdb.md +++ b/doc/integrations/duckdb.md @@ -265,6 +265,14 @@ df = pd.DataFrame({"x": range(100)}) %sql engine ``` +```{important} +If you're using DuckDB 1.1.0 or higher, you must run this before querying a data frame + +~~~sql +%sql SET python_scan_all_frames=true +~~~ +``` + ```{code-cell} ipython3 %%sql SELECT * From 9dff63415ca2b5802128b7e0db04e7e6e978da9c Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 7 Nov 2024 09:37:28 -0600 Subject: [PATCH 5/8] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bd6e0146..dc26e75cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## 0.10.16dev +* [Fix] Updates docs for querying data frames when using DuckDB SQLAlchemy connections +* [Fix] Support for scanning data frames when using native DuckDB connections due to changes in DuckDB's API + ## 0.10.15 (2024-11-05) *Drops compatibility with Python 3.8* From f8fcd6b49a1d4043959856c8ad1abc8abc73003f Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 7 Nov 2024 09:47:06 -0600 Subject: [PATCH 6/8] fix --- src/sql/connection/connection.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sql/connection/connection.py b/src/sql/connection/connection.py index 0c3ff604f..bd723cb76 100644 --- a/src/sql/connection/connection.py +++ b/src/sql/connection/connection.py @@ -1040,7 +1040,10 @@ def raw_execute(self, query, parameters=None, with_=None): cur = self._connection.cursor() if self._is_duckdb_native: - cur.execute("SET python_scan_all_frames=true") + try: + cur.execute("SET python_scan_all_frames=true") + except Exception: + pass cur.execute(query) From 4d3f3d8203c2b6b6dc8f1b2731e942a6d7ec5eb0 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 7 Nov 2024 09:56:12 -0600 Subject: [PATCH 7/8] fix --- src/tests/test_connection.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/tests/test_connection.py b/src/tests/test_connection.py index 8369ff690..ae1e0e4a8 100644 --- a/src/tests/test_connection.py +++ b/src/tests/test_connection.py @@ -888,6 +888,10 @@ def mock_dbapi_raw_execute(monkeypatch, conn_dbapi_duckdb): def test_raw_execute_doesnt_transpile_sql_query(fixture_name, request): mock_execute, conn = request.getfixturevalue(fixture_name) + # to prevent the "SET python_scan_all_frames=true" call, since we don't want to + # test that here + conn._is_duckdb_native = False + conn.raw_execute("CREATE TABLE foo (bar INT)") conn.raw_execute("INSERT INTO foo VALUES (42), (43)") conn.raw_execute("SELECT * FROM foo LIMIT 1") @@ -949,6 +953,10 @@ def mock_dbapi_execute(monkeypatch): def test_execute_transpiles_sql_query(fixture_name, request): mock_execute, conn = request.getfixturevalue(fixture_name) + # to prevent the "SET python_scan_all_frames=true" call, since we don't want to + # test that here + conn._is_duckdb_native = False + conn.execute("CREATE TABLE foo (bar INT)") conn.execute("INSERT INTO foo VALUES (42), (43)") conn.execute("SELECT * FROM foo LIMIT 1") From 3bdb1f49acd01dfb70bf04d6ccb2a7900a08958f Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 7 Nov 2024 10:10:16 -0600 Subject: [PATCH 8/8] adds note --- src/sql/connection/connection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sql/connection/connection.py b/src/sql/connection/connection.py index bd723cb76..6c1c1d525 100644 --- a/src/sql/connection/connection.py +++ b/src/sql/connection/connection.py @@ -1039,6 +1039,8 @@ def raw_execute(self, query, parameters=None, with_=None): cur = self._connection.cursor() + # NOTE: this is a workaround for duckdb 1.1.0 and higher so we keep the + # existing behavior of being able to query data frames if self._is_duckdb_native: try: cur.execute("SET python_scan_all_frames=true")