diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 09db1db43..b88abaf60 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -1,12 +1,19 @@ # Write the benchmarking functions here. # See "Writing benchmarks" in the asv docs for more information. +import tempfile + import numpy +import pandas from tiled.adapters.array import ArrayAdapter from tiled.adapters.mapping import MapAdapter +from tiled.catalog import from_uri from tiled.client import Context, from_context from tiled.server.app import build_app +from tiled.structures.core import StructureFamily +from tiled.structures.data_source import DataSource +from tiled.structures.table import TableStructure class TimeSuite: @@ -27,3 +34,34 @@ def time_lookup(self): def time_lookup_and_read(self): self.client["x"].read() + + +class CatalogSuite: + def setup(self): + self.directory = tempfile.TemporaryDirectory() + self.df = pandas.DataFrame([]) + + catalog = from_uri( + f"sqlite+aiosqlite:///{self.directory.name}/catalog.db", + init_if_not_exists=True, + writable_storage=self.directory.name, + ) + self.context = Context.from_app(build_app(catalog)) + self.client = from_context(self.context) + + def teardown(self): + self.context.close() + self.directory.cleanup() + + def time_repeated_write(self): + for _ in range(100): + self.client.new( + structure_family=StructureFamily.table, + data_sources=[ + DataSource( + structure_family=StructureFamily.table, + structure=TableStructure.from_pandas(self.df), + mimetype="text/csv", + ), # or PARQUET_MIMETYPE + ], + ) diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py index 3d6c61967..4ee465cb8 100644 --- a/tiled/catalog/adapter.py +++ b/tiled/catalog/adapter.py @@ -16,9 +16,11 @@ from fastapi import HTTPException from sqlalchemy import delete, event, func, not_, or_, select, text, type_coerce, update from sqlalchemy.dialects.postgresql import JSONB, REGCONFIG +from sqlalchemy.engine import make_url from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.orm import selectinload +from sqlalchemy.pool import AsyncAdaptedQueuePool from sqlalchemy.sql.expression import cast from starlette.status import HTTP_404_NOT_FOUND, HTTP_415_UNSUPPORTED_MEDIA_TYPE @@ -1316,7 +1318,19 @@ def from_uri( # Interpret URI as filepath. uri = f"sqlite+aiosqlite:///{uri}" - engine = create_async_engine(uri, echo=echo, json_serializer=json_serializer) + parsed_url = make_url(uri) + if (parsed_url.get_dialect().name == "sqlite") and ( + parsed_url.database != ":memory:" + ): + # For file-backed SQLite databases, connection pooling offers a + # significant performance boost. For SQLite databases that exist + # only in process memory, pooling is not applicable. + poolclass = AsyncAdaptedQueuePool + else: + poolclass = None # defer to sqlalchemy default + engine = create_async_engine( + uri, echo=echo, json_serializer=json_serializer, poolclass=poolclass + ) if engine.dialect.name == "sqlite": event.listens_for(engine.sync_engine, "connect")(_set_sqlite_pragma) return CatalogContainerAdapter(