Make sqlalchemy use connection pooling for catalog db (#710)

* Make catalog db use connection pool for sqlite * Add asv benchmark for catalog db write * Re-add poolclass import for async pool * Fix linter issues * Do not apply pool to in-memory database. --------- Co-authored-by: Dan Allan <dallan@bnl.gov>
bluesky · Apr 25, 2024 · f44177e · f44177e
1 parent bd1a815
commit f44177e
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 1 deletion.
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
@@ -1,12 +1,19 @@
 # Write the benchmarking functions here.
 # See "Writing benchmarks" in the asv docs for more information.
 
+import tempfile
+
 import numpy
+import pandas
 
 from tiled.adapters.array import ArrayAdapter
 from tiled.adapters.mapping import MapAdapter
+from tiled.catalog import from_uri
 from tiled.client import Context, from_context
 from tiled.server.app import build_app
+from tiled.structures.core import StructureFamily
+from tiled.structures.data_source import DataSource
+from tiled.structures.table import TableStructure
 
 
 class TimeSuite:
@@ -27,3 +34,34 @@ def time_lookup(self):
 
     def time_lookup_and_read(self):
         self.client["x"].read()
+
+
+class CatalogSuite:
+    def setup(self):
+        self.directory = tempfile.TemporaryDirectory()
+        self.df = pandas.DataFrame([])
+
+        catalog = from_uri(
+            f"sqlite+aiosqlite:///{self.directory.name}/catalog.db",
+            init_if_not_exists=True,
+            writable_storage=self.directory.name,
+        )
+        self.context = Context.from_app(build_app(catalog))
+        self.client = from_context(self.context)
+
+    def teardown(self):
+        self.context.close()
+        self.directory.cleanup()
+
+    def time_repeated_write(self):
+        for _ in range(100):
+            self.client.new(
+                structure_family=StructureFamily.table,
+                data_sources=[
+                    DataSource(
+                        structure_family=StructureFamily.table,
+                        structure=TableStructure.from_pandas(self.df),
+                        mimetype="text/csv",
+                    ),  # or PARQUET_MIMETYPE
+                ],
+            )
diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py
@@ -16,9 +16,11 @@
 from fastapi import HTTPException
 from sqlalchemy import delete, event, func, not_, or_, select, text, type_coerce, update
 from sqlalchemy.dialects.postgresql import JSONB, REGCONFIG
+from sqlalchemy.engine import make_url
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import create_async_engine
 from sqlalchemy.orm import selectinload
+from sqlalchemy.pool import AsyncAdaptedQueuePool
 from sqlalchemy.sql.expression import cast
 from starlette.status import HTTP_404_NOT_FOUND, HTTP_415_UNSUPPORTED_MEDIA_TYPE
 
@@ -1316,7 +1318,19 @@ def from_uri(
         # Interpret URI as filepath.
         uri = f"sqlite+aiosqlite:///{uri}"
 
-    engine = create_async_engine(uri, echo=echo, json_serializer=json_serializer)
+    parsed_url = make_url(uri)
+    if (parsed_url.get_dialect().name == "sqlite") and (
+        parsed_url.database != ":memory:"
+    ):
+        # For file-backed SQLite databases, connection pooling offers a
+        # significant performance boost. For SQLite databases that exist
+        # only in process memory, pooling is not applicable.
+        poolclass = AsyncAdaptedQueuePool
+    else:
+        poolclass = None  # defer to sqlalchemy default
+    engine = create_async_engine(
+        uri, echo=echo, json_serializer=json_serializer, poolclass=poolclass
+    )
     if engine.dialect.name == "sqlite":
         event.listens_for(engine.sync_engine, "connect")(_set_sqlite_pragma)
     return CatalogContainerAdapter(