Skip to content

Commit

Permalink
Make sqlalchemy use connection pooling for catalog db (#710)
Browse files Browse the repository at this point in the history
* Make catalog db use connection pool for sqlite

* Add asv benchmark for catalog db write

* Re-add poolclass import for async pool

* Fix linter issues

* Do not apply pool to in-memory database.

---------

Co-authored-by: Dan Allan <dallan@bnl.gov>
  • Loading branch information
nmaytan and danielballan authored Apr 25, 2024
1 parent bd1a815 commit f44177e
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
38 changes: 38 additions & 0 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.

import tempfile

import numpy
import pandas

from tiled.adapters.array import ArrayAdapter
from tiled.adapters.mapping import MapAdapter
from tiled.catalog import from_uri
from tiled.client import Context, from_context
from tiled.server.app import build_app
from tiled.structures.core import StructureFamily
from tiled.structures.data_source import DataSource
from tiled.structures.table import TableStructure


class TimeSuite:
Expand All @@ -27,3 +34,34 @@ def time_lookup(self):

def time_lookup_and_read(self):
self.client["x"].read()


class CatalogSuite:
def setup(self):
self.directory = tempfile.TemporaryDirectory()
self.df = pandas.DataFrame([])

catalog = from_uri(
f"sqlite+aiosqlite:///{self.directory.name}/catalog.db",
init_if_not_exists=True,
writable_storage=self.directory.name,
)
self.context = Context.from_app(build_app(catalog))
self.client = from_context(self.context)

def teardown(self):
self.context.close()
self.directory.cleanup()

def time_repeated_write(self):
for _ in range(100):
self.client.new(
structure_family=StructureFamily.table,
data_sources=[
DataSource(
structure_family=StructureFamily.table,
structure=TableStructure.from_pandas(self.df),
mimetype="text/csv",
), # or PARQUET_MIMETYPE
],
)
16 changes: 15 additions & 1 deletion tiled/catalog/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
from fastapi import HTTPException
from sqlalchemy import delete, event, func, not_, or_, select, text, type_coerce, update
from sqlalchemy.dialects.postgresql import JSONB, REGCONFIG
from sqlalchemy.engine import make_url
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import selectinload
from sqlalchemy.pool import AsyncAdaptedQueuePool
from sqlalchemy.sql.expression import cast
from starlette.status import HTTP_404_NOT_FOUND, HTTP_415_UNSUPPORTED_MEDIA_TYPE

Expand Down Expand Up @@ -1316,7 +1318,19 @@ def from_uri(
# Interpret URI as filepath.
uri = f"sqlite+aiosqlite:///{uri}"

engine = create_async_engine(uri, echo=echo, json_serializer=json_serializer)
parsed_url = make_url(uri)
if (parsed_url.get_dialect().name == "sqlite") and (
parsed_url.database != ":memory:"
):
# For file-backed SQLite databases, connection pooling offers a
# significant performance boost. For SQLite databases that exist
# only in process memory, pooling is not applicable.
poolclass = AsyncAdaptedQueuePool
else:
poolclass = None # defer to sqlalchemy default
engine = create_async_engine(
uri, echo=echo, json_serializer=json_serializer, poolclass=poolclass
)
if engine.dialect.name == "sqlite":
event.listens_for(engine.sync_engine, "connect")(_set_sqlite_pragma)
return CatalogContainerAdapter(
Expand Down

0 comments on commit f44177e

Please sign in to comment.