From e92b0e2da27d686f6966f82df80f3139d64721da Mon Sep 17 00:00:00 2001
From: Jim Bosch <jbosch@astro.princeton.edu>
Date: Thu, 19 Sep 2024 14:48:42 -0400
Subject: [PATCH 1/3] Don't flag 'instrument=X' expressions as needing an
 instrument.

We want to flag a constraint like 'visit=X' with no instrument as
needing an instrument constraint, or (if one is available) use the
instrument from the butler's default data ID.  But we were also
flagging constraints like 'instrument=X', and while that on its own
provides the needed constraint, 'instrument=X OR instrument=Y' does
not, and hence always failed.
---
 .../butler/queries/tree/_column_reference.py  |  2 +-
 .../lsst/daf/butler/tests/butler_queries.py   | 25 +++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/python/lsst/daf/butler/queries/tree/_column_reference.py b/python/lsst/daf/butler/queries/tree/_column_reference.py
index 6f67ef66eb..f5103d0af4 100644
--- a/python/lsst/daf/butler/queries/tree/_column_reference.py
+++ b/python/lsst/daf/butler/queries/tree/_column_reference.py
@@ -62,7 +62,7 @@ def gather_required_columns(self, columns: ColumnSet) -> None:
         columns.update_dimensions(self.dimension.minimal_group)
 
     def gather_governors(self, governors: set[str]) -> None:
-        if self.dimension.governor is not None:
+        if self.dimension.governor is not None and self.dimension.governor is not self.dimension:
             governors.add(self.dimension.governor.name)
 
     @property
diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py
index 2d3d83f0eb..ee188e5104 100644
--- a/python/lsst/daf/butler/tests/butler_queries.py
+++ b/python/lsst/daf/butler/tests/butler_queries.py
@@ -1859,6 +1859,31 @@ def test_dataset_queries(self) -> None:
             self.assertEqual(rows[0]["visit"], 1)
             self.assertEqual(rows[0]["dt.collection"], "run1")
 
+    def test_multiple_instrument_queries(self) -> None:
+        """Test that multiple-instrument queries are not rejected as having
+        governor dimension ambiguities.
+        """
+        butler = self.make_butler("base.yaml")
+        butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
+        self.assertCountEqual(
+            butler.query_data_ids(["detector"], where="instrument='Cam1' OR instrument='Cam2'"),
+            [
+                DataCoordinate.standardize(instrument="Cam1", detector=n, universe=butler.dimensions)
+                for n in range(1, 5)
+            ],
+        )
+        self.assertCountEqual(
+            butler.query_data_ids(
+                ["detector"],
+                where="(instrument='Cam1' OR instrument='Cam2') AND visit.region OVERLAPS region",
+                bind={"region": Region.from_ivoa_pos("CIRCLE 320. -0.25 10.")},
+                explain=False,
+            ),
+            # No visits in this test dataset means no result, but the point of
+            # the test is just that the query can be constructed at all.
+            [],
+        )
+
 
 def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:
     output = []

From 42be712856273f875fd93e3e93f215242b1ad9af Mon Sep 17 00:00:00 2001
From: Jim Bosch <jbosch@astro.princeton.edu>
Date: Thu, 19 Sep 2024 15:28:57 -0400
Subject: [PATCH 2/3] Allow queries with OR or IN constraints on governor
 dimensions.

Instead of requiring a new-query-system WHERE constraint to lock down
the (e.g.) instrument to a single value in order to allow a dependent
value (e.g. detector) to appear, we now just check that instrument
appears somewhere in the expression, since that'd catch the most likely
mistake of leaving the instrument out entirely (or assuming incorrectly
that a default would be available).
---
 .../daf/butler/direct_query_driver/_driver.py |  2 +-
 .../butler/direct_query_driver/_query_plan.py | 31 ++++++++++++++++---
 python/lsst/daf/butler/queries/visitors.py    |  2 +-
 .../lsst/daf/butler/tests/butler_queries.py   |  8 +++++
 4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/python/lsst/daf/butler/direct_query_driver/_driver.py b/python/lsst/daf/butler/direct_query_driver/_driver.py
index 4ccd769fad..bfc85ac143 100644
--- a/python/lsst/daf/butler/direct_query_driver/_driver.py
+++ b/python/lsst/daf/butler/direct_query_driver/_driver.py
@@ -937,7 +937,7 @@ def _analyze_query_tree(self, tree: qt.QueryTree) -> tuple[QueryJoinsPlan, Query
         where_governors: set[str] = set()
         result.predicate.gather_governors(where_governors)
         for governor in where_governors:
-            if governor not in result.constraint_data_id:
+            if governor not in result.constraint_data_id and governor not in result.governors_referenced:
                 if governor in self._default_data_id.dimensions:
                     result.constraint_data_id[governor] = self._default_data_id[governor]
                 else:
diff --git a/python/lsst/daf/butler/direct_query_driver/_query_plan.py b/python/lsst/daf/butler/direct_query_driver/_query_plan.py
index 60f8a8c05c..ced6c1131c 100644
--- a/python/lsst/daf/butler/direct_query_driver/_query_plan.py
+++ b/python/lsst/daf/butler/direct_query_driver/_query_plan.py
@@ -115,12 +115,19 @@ class QueryJoinsPlan:
     rows.
     """
 
+    governors_referenced: set[str] = dataclasses.field(default_factory=set)
+    """Governor dimensions referenced directly in the predicate, but not
+    necessarily constrained to the same value in all logic branches.
+    """
+
     def __post_init__(self) -> None:
         self.predicate.gather_required_columns(self.columns)
         # Extract the data ID implied by the predicate; we can use the governor
         # dimensions in that to constrain the collections we search for
         # datasets later.
-        self.predicate.visit(_DataIdExtractionVisitor(self.constraint_data_id, self.messages))
+        self.predicate.visit(
+            _DataIdExtractionVisitor(self.constraint_data_id, self.messages, self.governors_referenced)
+        )
 
     def iter_mandatory(self) -> Iterator[DimensionElement]:
         """Return an iterator over the dimension elements that must be joined
@@ -304,11 +311,17 @@ class _DataIdExtractionVisitor(
         Dictionary to populate in place.
     messages : `list` [ `str` ]
         List of diagnostic messages to populate in place.
+    governor_references : `set` [ `str` ]
+        Set of the names of governor dimension names that were referenced
+        directly.  This includes dimensions that were constrained to different
+        values in different logic branches, and hence not included in
+        ``data_id``.
     """
 
-    def __init__(self, data_id: dict[str, DataIdValue], messages: list[str]):
+    def __init__(self, data_id: dict[str, DataIdValue], messages: list[str], governor_references: set[str]):
         self.data_id = data_id
         self.messages = messages
+        self.governor_references = governor_references
 
     def visit_comparison(
         self,
@@ -317,6 +330,8 @@ def visit_comparison(
         b: qt.ColumnExpression,
         flags: PredicateVisitFlags,
     ) -> None:
+        k_a, v_a = a.visit(self)
+        k_b, v_b = b.visit(self)
         if flags & PredicateVisitFlags.HAS_OR_SIBLINGS:
             return None
         if flags & PredicateVisitFlags.INVERTED:
@@ -326,8 +341,6 @@ def visit_comparison(
                 return None
         if operator != "==":
             return None
-        k_a, v_a = a.visit(self)
-        k_b, v_b = b.visit(self)
         if k_a is not None and v_b is not None:
             key = k_a
             value = v_b
@@ -341,18 +354,28 @@ def visit_comparison(
         return None
 
     def visit_binary_expression(self, expression: qt.BinaryExpression) -> tuple[None, None]:
+        expression.a.visit(self)
+        expression.b.visit(self)
         return None, None
 
     def visit_unary_expression(self, expression: qt.UnaryExpression) -> tuple[None, None]:
+        expression.operand.visit(self)
         return None, None
 
     def visit_literal(self, expression: qt.ColumnLiteral) -> tuple[None, Any]:
         return None, expression.get_literal_value()
 
     def visit_dimension_key_reference(self, expression: qt.DimensionKeyReference) -> tuple[str, None]:
+        if expression.dimension.governor is expression.dimension:
+            self.governor_references.add(expression.dimension.name)
         return expression.dimension.name, None
 
     def visit_dimension_field_reference(self, expression: qt.DimensionFieldReference) -> tuple[None, None]:
+        if (
+            expression.element.governor is expression.element
+            and expression.field in expression.element.alternate_keys.names
+        ):
+            self.governor_references.add(expression.element.name)
         return None, None
 
     def visit_dataset_field_reference(self, expression: qt.DatasetFieldReference) -> tuple[None, None]:
diff --git a/python/lsst/daf/butler/queries/visitors.py b/python/lsst/daf/butler/queries/visitors.py
index 5e77161d15..5ee2979b4d 100644
--- a/python/lsst/daf/butler/queries/visitors.py
+++ b/python/lsst/daf/butler/queries/visitors.py
@@ -192,7 +192,7 @@ class PredicateVisitor(Generic[_A, _O, _L]):
     -----
     The concrete `PredicateLeaf` types are only semi-public (they appear in
     the serialized form of a `Predicate`, but their types should not generally
-    be referenced directly outside of the module in which they are defined.
+    be referenced directly outside of the module in which they are defined).
     As a result, visiting these objects unpacks their attributes into the
     visit method arguments.
     """
diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py
index ee188e5104..970b42a770 100644
--- a/python/lsst/daf/butler/tests/butler_queries.py
+++ b/python/lsst/daf/butler/tests/butler_queries.py
@@ -1883,6 +1883,14 @@ def test_multiple_instrument_queries(self) -> None:
             # the test is just that the query can be constructed at all.
             [],
         )
+        self.assertCountEqual(
+            butler.query_data_ids(
+                ["instrument"],
+                where="(instrument='Cam1' AND detector=2) OR (instrument='Cam2' AND detector=500)",
+                explain=False,
+            ),
+            [DataCoordinate.standardize(instrument="Cam1", universe=butler.dimensions)],
+        )
 
 
 def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:

From 622876da61c8f7481b17fa497ba9981b6a488d51 Mon Sep 17 00:00:00 2001
From: Jim Bosch <jbosch@astro.princeton.edu>
Date: Thu, 19 Sep 2024 15:34:30 -0400
Subject: [PATCH 3/3] Add changelog entry.

---
 doc/changes/DM-46401.bugfix.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/changes/DM-46401.bugfix.md

diff --git a/doc/changes/DM-46401.bugfix.md b/doc/changes/DM-46401.bugfix.md
new file mode 100644
index 0000000000..0c92351d4b
--- /dev/null
+++ b/doc/changes/DM-46401.bugfix.md
@@ -0,0 +1 @@
+Fix support for multiple-instrument (and multiple-skymap) `where` expressions in the new query system.