forked from apache/arrow-adbc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs: add recipes for using ADBC with Pandas and Polars
Fixes apache#812.
- Loading branch information
Showing
4 changed files
with
133 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# RECIPE STARTS HERE | ||
|
||
#: ADBC is integrated into Pandas_, a popular dataframe library. Pandas can | ||
#: use ADBC to read tables in PostgreSQL databases. Compared to using | ||
#: SQLAlchemy or other options, using ADBC with Pandas can have better | ||
#: performance, such as by avoiding excess conversions to and from Python | ||
#: objects. | ||
#: | ||
#: .. _Pandas: https://pandas.pydata.org/ | ||
|
||
import os | ||
|
||
import pandas | ||
import pyarrow | ||
|
||
import adbc_driver_postgresql.dbapi | ||
|
||
uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] | ||
conn = adbc_driver_postgresql.dbapi.connect(uri) | ||
|
||
#: For the purposes of testing, we'll first make sure the tables we're about | ||
#: to use don't exist. | ||
with conn.cursor() as cur: | ||
cur.execute("DROP TABLE IF EXISTS example") | ||
|
||
#: Then we'll use ADBC to create a sample table. (ADBC is not currently | ||
#: integrated into :external:py:meth:`pandas.DataFrame.to_sql`.) | ||
|
||
data = pyarrow.Table.from_pydict( | ||
{ | ||
"ints": [1, 2, None, 4], | ||
"strs": ["a", "b", "c", "d"], | ||
} | ||
) | ||
|
||
with conn.cursor() as cur: | ||
cur.adbc_ingest("example", data, mode="create") | ||
|
||
conn.commit() | ||
|
||
#: After creating the table, we can pass an ADBC connection to | ||
#: :external:py:func:`pandas.read_sql` to fetch the result. | ||
|
||
df = pandas.read_sql("SELECT * FROM example", conn) | ||
|
||
assert len(df) == 4 | ||
|
||
conn.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# RECIPE STARTS HERE | ||
|
||
#: ADBC can be used with Polars_, a dataframe library written in Rust. As per | ||
#: its documentation: | ||
#: | ||
#: If the backend supports returning Arrow data directly then this facility | ||
#: will be used to efficiently instantiate the DataFrame; otherwise, the | ||
#: DataFrame is initialised from row-wise data. | ||
#: | ||
#: Obviously, ADBC returns Arrow data directly, making ADBC and Polars a | ||
#: natural fit for each other. | ||
#: | ||
#: .. _Polars: https://pola.rs/ | ||
|
||
import os | ||
|
||
import polars as pl | ||
|
||
uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] | ||
|
||
#: We'll use Polars to create a sample table with | ||
#: :external:py:meth:`polars.DataFrame.write_database`. We don't need | ||
#: to open an ADBC connection ourselves with Polars. | ||
|
||
data = pl.DataFrame( | ||
{ | ||
"ints": [1, 2, None, 4], | ||
"strs": ["a", "b", "c", "d"], | ||
} | ||
) | ||
data.write_database("example", uri, engine="adbc", if_table_exists="replace") | ||
|
||
#: After creating the table, we can use | ||
#: :external:py:func:`polars.read_database` to fetch the result. Again, | ||
#: we can just pass the URI and tell Polars to manage ADBC for us. | ||
|
||
df = pl.read_database("SELECT * FROM example", uri, engine="adbc") | ||
|
||
assert len(df) == 4 |