Skip to content

Commit

Permalink
Do not query all the collections again unless there are wildcards
Browse files Browse the repository at this point in the history
It can be slow to run the query on a list of hundreds of collections
just on the off chance they have wildcards.
  • Loading branch information
timj committed Sep 7, 2024
1 parent 5ed6d6c commit 9101379
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from .dimensions import DataCoordinate, DimensionConfig
from .registry import RegistryConfig, _RegistryFactory
from .repo_relocation import BUTLER_ROOT_TAG
from .utils import has_globs

if TYPE_CHECKING:
from ._dataset_existence import DatasetExistence
Expand Down Expand Up @@ -1691,14 +1692,15 @@ def query_datasets(
if collections:
# Wild cards need to be expanded but can only be allowed if
# find_first=False because expanding wildcards does not return
# a guaranteed ordering.
expanded_collections = self.collections.query(collections)
if find_first and set(expanded_collections) != set(ensure_iterable(collections)):
raise RuntimeError(
"Can not use wildcards in collections when find_first=True "
f" (given {collections} which expanded to {expanded_collections})"
)
collections = expanded_collections
# a guaranteed ordering. Querying collection registry to expand
# collections when we do not have wildcards is expensive so only
# do it if we need it.
if has_globs(collections):
if find_first:
raise RuntimeError(
"Can not use wildcards in collections when find_first=True " f" (given {collections})"
)
collections = self.collections.query(collections)
query_limit = limit
warn_limit = False
if limit is not None and limit < 0:
Expand Down

0 comments on commit 9101379

Please sign in to comment.