Skip to content

Commit

Permalink
docs: add recipes for using ADBC with Pandas and Polars
Browse files Browse the repository at this point in the history
Fixes apache#812.
  • Loading branch information
lidavidm committed Jun 25, 2024
1 parent c1ad8df commit 16b2700
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@

intersphinx_mapping = {
"arrow": ("https://arrow.apache.org/docs/", None),
"pandas": ("https://pandas.pydata.org/docs/", None),
"polars": ("https://docs.pola.rs/api/python/stable/", None),
}

# Add env vars like ADBC_INTERSPHINX_MAPPING_adbc_java = url;path
Expand Down
10 changes: 10 additions & 0 deletions docs/source/python/recipe/postgresql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,13 @@ Connection pooling with SQLAlchemy
==================================

.. recipe:: postgresql_pool.py

Using Pandas and ADBC
=====================

.. recipe:: postgresql_pandas.py

Using Polars and ADBC
=====================

.. recipe:: postgresql_polars.py
65 changes: 65 additions & 0 deletions docs/source/python/recipe/postgresql_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# RECIPE STARTS HERE

#: ADBC is integrated into Pandas_, a popular dataframe library. Pandas can
#: use ADBC to read tables in PostgreSQL databases. Compared to using
#: SQLAlchemy or other options, using ADBC with Pandas can have better
#: performance, such as by avoiding excess conversions to and from Python
#: objects.
#:
#: .. _Pandas: https://pandas.pydata.org/

import os

import pandas
import pyarrow

import adbc_driver_postgresql.dbapi

uri = os.environ["ADBC_POSTGRESQL_TEST_URI"]
conn = adbc_driver_postgresql.dbapi.connect(uri)

#: For the purposes of testing, we'll first make sure the tables we're about
#: to use don't exist.
with conn.cursor() as cur:
cur.execute("DROP TABLE IF EXISTS example")

#: Then we'll use ADBC to create a sample table. (ADBC is not currently
#: integrated into :external:py:meth:`pandas.DataFrame.to_sql`.)

data = pyarrow.Table.from_pydict(
{
"ints": [1, 2, None, 4],
"strs": ["a", "b", "c", "d"],
}
)

with conn.cursor() as cur:
cur.adbc_ingest("example", data, mode="create")

conn.commit()

#: After creating the table, we can pass an ADBC connection to
#: :external:py:func:`pandas.read_sql` to fetch the result.

df = pandas.read_sql("SELECT * FROM example", conn)

assert len(df) == 4

conn.close()
56 changes: 56 additions & 0 deletions docs/source/python/recipe/postgresql_polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# RECIPE STARTS HERE

#: ADBC can be used with Polars_, a dataframe library written in Rust. As per
#: its documentation:
#:
#: If the backend supports returning Arrow data directly then this facility
#: will be used to efficiently instantiate the DataFrame; otherwise, the
#: DataFrame is initialised from row-wise data.
#:
#: Obviously, ADBC returns Arrow data directly, making ADBC and Polars a
#: natural fit for each other.
#:
#: .. _Polars: https://pola.rs/

import os

import polars as pl

uri = os.environ["ADBC_POSTGRESQL_TEST_URI"]

#: We'll use Polars to create a sample table with
#: :external:py:meth:`polars.DataFrame.write_database`. We don't need
#: to open an ADBC connection ourselves with Polars.

data = pl.DataFrame(
{
"ints": [1, 2, None, 4],
"strs": ["a", "b", "c", "d"],
}
)
data.write_database("example", uri, engine="adbc", if_table_exists="replace")

#: After creating the table, we can use
#: :external:py:func:`polars.read_database` to fetch the result. Again,
#: we can just pass the URI and tell Polars to manage ADBC for us.

df = pl.read_database("SELECT * FROM example", uri, engine="adbc")

assert len(df) == 4

0 comments on commit 16b2700

Please sign in to comment.