Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-46339: fix regression in glob detection in query-datasets #1080

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-46339.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug in `butler query-datasets` that incorrectly rejected a find-first query against a chain collection as having a glob.
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/script/queryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from .._butler import Butler
from ..cli.utils import sortAstropyTable
from ..utils import has_globs

if TYPE_CHECKING:
from lsst.daf.butler import DatasetRef
Expand Down Expand Up @@ -261,7 +262,7 @@ def getDatasets(self) -> Iterator[list[DatasetRef]]:
summary_datasets=dataset_types,
)
expanded_query_collections = [c.name for c in query_collections_info]
if self._find_first and set(query_collections) != set(expanded_query_collections):
if self._find_first and has_globs(query_collections):
raise RuntimeError("Can not use wildcards in collections when find_first=True")
query_collections = expanded_query_collections

Expand Down
15 changes: 12 additions & 3 deletions tests/test_cliCmdQueryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import unittest

from astropy.table import Table as AstropyTable
from lsst.daf.butler import StorageClassFactory, script
from lsst.daf.butler import CollectionType, StorageClassFactory, script
from lsst.daf.butler.tests import addDatasetType
from lsst.daf.butler.tests.utils import ButlerTestHelper, MetricTestRepo, makeTestTempDir, removeTestTempDir
from lsst.resources import ResourcePath
Expand Down Expand Up @@ -370,8 +370,13 @@ def testFindFirstAndCollections(self):
# Add a new run, and add a dataset to shadow an existing dataset.
testRepo.addDataset(run="foo", dataId={"instrument": "DummyCamComp", "visit": 424})

# Add a CHAINED collection to include the two runs, to check that
# flattening works as well.
testRepo.butler.collections.register("chain", CollectionType.CHAINED)
testRepo.butler.collections.redefine_chain("chain", ["foo", "ingest/run"])

# Verify that without find-first, duplicate datasets are returned
tables = self._queryDatasets(repo=testRepo.butler, collections=["foo", "ingest/run"], show_uri=True)
tables = self._queryDatasets(repo=testRepo.butler, collections=["chain"], show_uri=True)

# The test should be running with a single FileDatastore.
roots = testRepo.butler.get_datastore_roots()
Expand Down Expand Up @@ -514,7 +519,7 @@ def testFindFirstAndCollections(self):
# Verify that with find first the duplicate dataset is eliminated and
# the more recent dataset is returned.
tables = self._queryDatasets(
repo=testRepo.butler, collections=["foo", "ingest/run"], show_uri=True, find_first=True
repo=testRepo.butler, collections=["chain"], show_uri=True, find_first=True
)

expectedTables = (
Expand Down Expand Up @@ -614,6 +619,10 @@ def testFindFirstAndCollections(self):

self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)

# Verify that globs are not supported with find_first=True.
with self.assertRaises(RuntimeError):
self._queryDatasets(repo=testRepo.butler, collections=["*"], show_uri=True, find_first=True)


if __name__ == "__main__":
unittest.main()
Loading