PennyLaneAI · albi3ro · Jan 15, 2025 · Nov 8, 2024 · Nov 13, 2024 · Nov 19, 2024
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -6,6 +6,9 @@
 
 <h3>Improvements 🛠</h3>
 
+* The coefficients of observables now have improved differentiability.
+  [(#6598)](https://github.com/PennyLaneAI/pennylane/pull/6598)
+
 <h3>Breaking changes 💔</h3>
 
 * Removed method `qsvt_legacy` along with its private helper `_qsp_to_qsvt`
@@ -26,3 +29,4 @@ This release contains contributions from (in alphabetical order):
 
 Yushao Chen,
 Diksha Dhawan,
+Christina Lee,
diff --git a/pennylane/gradients/parameter_shift.py b/pennylane/gradients/parameter_shift.py
@@ -318,7 +318,7 @@
    return tuple(tuple(map(zero_entry, shot_comp_g)) for shot_comp_g in g)


 def expval_param_shift(
    tape, argnum=None, shifts=None, gradient_recipes=None, f0=None, broadcast=False
 ):
    r"""Generate the parameter-shift tapes and postprocessing methods required
@@ -563,7 +563,7 @@
    return tuple(var_grad)


 def _create_variance_proc_fn(
    tape, var_mask, var_indices, pdA_fn, pdA2_fn, tape_boundary, non_involutory_indices
 ):
    """Auxiliary function to define the processing function for computing the
@@ -635,7 +635,7 @@
    return non_involutory_indices


 def var_param_shift(tape, argnum, shifts=None, gradient_recipes=None, f0=None, broadcast=False):
    r"""Generate the parameter-shift tapes and postprocessing methods required
    to compute the gradient of a gate parameter with respect to a
    variance value.
@@ -752,7 +752,13 @@
     return True
 
 
+def _inplace_set_trainable_params(tape):
+    """Update all the trainable params in place."""
+    params = tape.get_parameters(trainable_only=False)
+    tape.trainable_params = qml.math.get_trainable_indices(params)
+
+
 def _expand_transform_param_shift(
     tape: QuantumScript,
     argnum=None,
    shifts=None,
@@ -769,11 +775,21 @@
         name="param_shift",
         error=qml.operation.DecompositionUndefinedError,
     )
-    if new_tape is tape:
-        return [tape], postprocessing
-    params = new_tape.get_parameters(trainable_only=False)
-    new_tape.trainable_params = qml.math.get_trainable_indices(params)
-    return [new_tape], postprocessing
+    if any(
+        qml.math.requires_grad(d) for mp in tape.measurements for d in getattr(mp.obs, "data", [])
+    ):
+        try:
+            batch, postprocessing = qml.transforms.split_to_single_terms(new_tape)
+        except RuntimeError as e:
+            raise ValueError(
+                "Can only differentiate Hamiltonian "
+                f"coefficients for expectations, not {tape.measurements}."
+            ) from e
+    else:
+        batch = [new_tape]
+    if len(batch) > 1 or batch[0] is not tape:
+        _ = [_inplace_set_trainable_params(t) for t in batch]
+    return batch, postprocessing
 
 
 @partial(
@@ -782,7 +798,7 @@
    classical_cotransform=_contract_qjac_with_cjac,
    final_transform=True,
 )
 def param_shift(
    tape: QuantumScript,
    argnum=None,
    shifts=None,

diff --git a/pennylane/ops/functions/dot.py b/pennylane/ops/functions/dot.py
@@ -142,10 +142,12 @@ def dot(
                 f"ops must be an Iterable of {t.__name__}'s, not a {t.__name__} itself."
             )
 
-    if len(coeffs) != len(ops):
-        raise ValueError("Number of coefficients and operators does not match.")
-    if len(coeffs) == 0 and len(ops) == 0:
-        raise ValueError("Cannot compute the dot product of an empty sequence.")
+    # tensorflow variables have no len
+    if qml.math.get_interface(coeffs) != "tensorflow":
+        if len(coeffs) != len(ops):
+            raise ValueError("Number of coefficients and operators does not match.")
+        if len(coeffs) == 0 and len(ops) == 0:
+            raise ValueError("Cannot compute the dot product of an empty sequence.")
 
     for t in (Operator, PauliWord, PauliSentence):
         if isinstance(ops, t):

diff --git a/pennylane/ops/op_math/sprod.py b/pennylane/ops/op_math/sprod.py
@@ -15,7 +15,6 @@
 This file contains the implementation of the SProd class which contains logic for
 computing the scalar product of operations.
 """
-from copy import copy
 from typing import Union
 
 import pennylane as qml
@@ -148,7 +147,6 @@ def __init__(
         elif (base_pauli_rep := getattr(self.base, "pauli_rep", None)) and (
             self.batch_size is None
         ):
-            scalar = copy(self.scalar)
 
             pr = {pw: qnp.dot(coeff, scalar) for pw, coeff in base_pauli_rep.items()}
             self._pauli_rep = qml.pauli.PauliSentence(pr)

diff --git a/pennylane/transforms/split_non_commuting.py b/pennylane/transforms/split_non_commuting.py
@@ -18,11 +18,11 @@
 
 # pylint: disable=too-many-arguments,too-many-boolean-expressions
 
-from functools import partial
+from functools import partial, wraps
 from typing import Optional
 
 import pennylane as qml
-from pennylane.measurements import ExpectationMP, MeasurementProcess, Shots, StateMP
+from pennylane.measurements import ExpectationMP, MeasurementProcess, StateMP
 from pennylane.ops import Prod, SProd, Sum
 from pennylane.tape import QuantumScript, QuantumScriptBatch
 from pennylane.transforms import transform
@@ -36,6 +36,16 @@ def null_postprocessing(results):
     return results[0]
 
 
+def shot_vector_support(initial_postprocessing: PostprocessingFn) -> PostprocessingFn:
+    """Convert a postprocessing function to one with shot vector support."""
+
+    @wraps(initial_postprocessing)
+    def shot_vector_postprocessing(results):
+        return tuple(initial_postprocessing(r) for r in zip(*results))
+
+    return shot_vector_postprocessing
+
+
 @transform
 def split_non_commuting(
     tape: QuantumScript, grouping_strategy: Optional[str] = "default"
@@ -280,13 +290,15 @@ def circuit(x):
     if grouping_strategy is None:
         measurements = list(single_term_obs_mps.keys())
         tapes = [tape.copy(measurements=[m]) for m in measurements]
-        return tapes, partial(
+        fn = partial(
             _processing_fn_no_grouping,
             single_term_obs_mps=single_term_obs_mps,
             offsets=offsets,
-            shots=tape.shots,
             batch_size=tape.batch_size,
         )
+        if tape.shots.has_partitioned_shots:
+            fn = shot_vector_support(fn)
+        return tapes, fn
 
     if grouping_strategy == "wires" or any(
         m.obs is not None and not qml.pauli.is_pauli_word(m.obs) for m in single_term_obs_mps
@@ -360,14 +372,16 @@ def _split_ham_with_grouping(tape: qml.tape.QuantumScript):
             group_sizes.append(group_size)
 
     tapes = [tape.copy(measurements=mps) for mps in mp_groups]
-    return tapes, partial(
+    fn = partial(
         _processing_fn_with_grouping,
         single_term_obs_mps=single_term_obs_mps,
         offsets=[offset],
         group_sizes=group_sizes,
-        shots=tape.shots,
         batch_size=tape.batch_size,
     )
+    if tape.shots.has_partitioned_shots:
+        fn = shot_vector_support(fn)
+    return tapes, fn
 
 
 def _split_using_qwc_grouping(
@@ -424,16 +438,17 @@ def _split_using_qwc_grouping(
             0,
         )
         group_sizes.append(1)
-
     tapes = [tape.copy(measurements=mps) for mps in mp_groups]
-    return tapes, partial(
+    fn = partial(
         _processing_fn_with_grouping,
         single_term_obs_mps=single_term_obs_mps_grouped,
         offsets=offsets,
         group_sizes=group_sizes,
-        shots=tape.shots,
         batch_size=tape.batch_size,
     )
+    if tape.shots.has_partitioned_shots:
+        fn = shot_vector_support(fn)
+    return tapes, fn
 
 
 def _split_using_wires_grouping(
@@ -497,14 +512,16 @@ def _split_using_wires_grouping(
             num_groups += 1
 
     tapes = [tape.copy(measurements=mps) for mps in mp_groups]
-    return tapes, partial(
+    fn = partial(
         _processing_fn_with_grouping,
         single_term_obs_mps=single_term_obs_mps_grouped,
         offsets=offsets,
         group_sizes=group_sizes,
-        shots=tape.shots,
         batch_size=tape.batch_size,
     )
+    if tape.shots.has_partitioned_shots:
+        fn = shot_vector_support(fn)
+    return tapes, fn
 
 
 def _split_all_multi_term_obs_mps(tape: qml.tape.QuantumScript):
@@ -572,8 +589,7 @@ def _processing_fn_no_grouping(
     res: ResultBatch,
     single_term_obs_mps: dict[MeasurementProcess, tuple[list[int], list[Union[float, TensorLike]]]],
     offsets: list[Union[float, TensorLike]],
-    shots: Shots,
-    batch_size: int,
+    batch_size: Union[None, int],
 ):
     """Postprocessing function for the split_non_commuting transform without grouping.
 
@@ -592,22 +608,31 @@ def _processing_fn_no_grouping(
     coeffs_for_each_mp = [[] for _ in offsets]
 
     for smp_idx, (_, (mp_indices, coeffs)) in enumerate(single_term_obs_mps.items()):
-
         for mp_idx, coeff in zip(mp_indices, coeffs):
             res_batch_for_each_mp[mp_idx].append(res[smp_idx])
             coeffs_for_each_mp[mp_idx].append(coeff)
 
-    return _res_for_each_mp(res_batch_for_each_mp, coeffs_for_each_mp, offsets, shots, batch_size)
+    result_shape = (batch_size,) if batch_size and batch_size > 1 else ()
+    # Sum up the results for each original measurement
+
+    res_for_each_mp = [
+        _sum_terms(_sub_res, coeffs, offset, result_shape)
+        for _sub_res, coeffs, offset in zip(res_batch_for_each_mp, coeffs_for_each_mp, offsets)
+    ]
+    # res_for_each_mp should have shape (n_mps, [,n_shots] [,batch_size])
+    if len(res_for_each_mp) == 1:
+        return res_for_each_mp[0]
+
+    return tuple(res_for_each_mp)
 
 
 def _processing_fn_with_grouping(
     res: ResultBatch,
     single_term_obs_mps: dict[
         MeasurementProcess, tuple[list[int], list[Union[float, TensorLike]], int, int]
     ],
-    offsets: list[Union[float, TensorLike]],
+    offsets: list[TensorLike],
     group_sizes: list[int],
-    shots: Shots,
     batch_size: int,
 ):
     """Postprocessing function for the split_non_commuting transform with grouping.
@@ -636,26 +661,16 @@ def _processing_fn_with_grouping(
         res_group = res[group_idx]  # ([n_shots] [,n_mps] [,batch_size])
         group_size = group_sizes[group_idx]
 
-        if group_size > 1 and shots.has_partitioned_shots:
-            # Each result should have shape ([n_shots] [,batch_size])
-            sub_res = [_res[mp_idx_in_group] for _res in res_group]
-        else:
-            # If there is only one term in the group, the n_mps dimension would have
-            # been squeezed out, use the entire result directly.
-            sub_res = res_group if group_size == 1 else res_group[mp_idx_in_group]
+        # If there is only one term in the group, the n_mps dimension would have
+        # been squeezed out, use the entire result directly.
+        sub_res = res_group if group_size == 1 else res_group[mp_idx_in_group]
 
         # Add this result to the result batch for the corresponding original measurement
         for mp_idx, coeff in zip(mp_indices, coeffs):
             res_batch_for_each_mp[mp_idx].append(sub_res)
             coeffs_for_each_mp[mp_idx].append(coeff)
 
-    return _res_for_each_mp(res_batch_for_each_mp, coeffs_for_each_mp, offsets, shots, batch_size)
-
-
-def _res_for_each_mp(res_batch_for_each_mp, coeffs_for_each_mp, offsets, shots, batch_size):
-    """Helper function that combines a result batch into results for each mp"""
-
-    result_shape = _infer_result_shape(shots, batch_size)
+    result_shape = (batch_size,) if batch_size and batch_size > 1 else ()
 
     # Sum up the results for each original measurement
     res_for_each_mp = [
@@ -667,14 +682,6 @@ def _res_for_each_mp(res_batch_for_each_mp, coeffs_for_each_mp, offsets, shots,
     if len(res_for_each_mp) == 1:
         return res_for_each_mp[0]
 
-    if shots.has_partitioned_shots:
-        # If the shot vector dimension exists, it should be moved to the first axis
-        # Basically, the shape becomes (n_shots, n_mps, [,batch_size])
-        res_for_each_mp = [
-            tuple(res_for_each_mp[j][i] for j in range(len(res_for_each_mp)))
-            for i in range(shots.num_copies)
-        ]
-
     return tuple(res_for_each_mp)
 
 
@@ -685,20 +692,25 @@ def _sum_terms(
     shape: tuple,
 ) -> Result:
     """Sum results from measurements of multiple terms in a multi-term observable."""
-
-    # Trivially return the original result
-    if coeffs == [1] and offset == 0:
+    if (
+        coeffs
+        and not qml.math.is_abstract(coeffs[0])
+        and not qml.math.is_abstract(offset)
+        and coeffs == [1]
+        and offset == 0
+    ):
         return res[0]
 
     # The shape of res at this point is (n_terms, [,n_shots] [,batch_size])
     dot_products = []
     for c, r in zip(coeffs, res):
         if qml.math.get_interface(r) == "autograd":
             r = qml.math.array(r)
-        dot_products.append(qml.math.dot(qml.math.squeeze(r), c))
+        if isinstance(r, (list, tuple)):
+            r = qml.math.stack(r)
+        dot_products.append(qml.math.dot(c, qml.math.squeeze(r)))
     if len(dot_products) == 0:
         return qml.math.ones(shape) * offset
-
     summed_dot_products = qml.math.sum(qml.math.stack(dot_products), axis=0)
     if qml.math.get_interface(offset) == "autograd" and qml.math.requires_grad(summed_dot_products):
         offset = qml.math.array(offset)
@@ -718,14 +730,3 @@ def _mp_to_obs(mp: MeasurementProcess, tape: qml.tape.QuantumScript) -> qml.oper
 
     obs_wires = mp.wires if mp.wires else tape.wires
     return qml.prod(*(qml.Z(wire) for wire in obs_wires))
-
-
-def _infer_result_shape(shots: Shots, batch_size: int) -> tuple:
-    """Based on the result, infer the ([,n_shots] [,batch_size]) shape of the result."""
-
-    shape = ()
-    if shots.has_partitioned_shots:
-        shape += (shots.num_copies,)
-    if batch_size and batch_size > 1:
-        shape += (batch_size,)
-    return shape
diff --git a/pennylane/transforms/split_to_single_terms.py b/pennylane/transforms/split_to_single_terms.py
@@ -24,6 +24,7 @@
 from pennylane.transforms.split_non_commuting import (
     _processing_fn_no_grouping,
     _split_all_multi_term_obs_mps,
+    shot_vector_support,
 )
 
 
@@ -162,23 +163,13 @@ def post_processing_split_sums(res):
             _processing_fn_no_grouping,
             single_term_obs_mps=single_term_obs_mps,
             offsets=offsets,
-            shots=tape.shots,
             batch_size=tape.batch_size,
         )
 
-        if len(new_tape.measurements) == 1:
-            return process(res)
-
         # we go from ((mp1_res, mp2_res, mp3_res),) as result output
         # to (mp1_res, mp2_res, mp3_res) as expected by _processing_fn_no_grouping
-        res = res[0]
-        if tape.shots.has_partitioned_shots:
-            # swap dimension order of mps vs shot copies for _processing_fn_no_grouping
-            res = [
-                tuple(res[j][i] for j in range(tape.shots.num_copies))
-                for i in range(len(new_tape.measurements))
-            ]
-
-        return process(res)
+        return process(res if len(new_tape.measurements) == 1 else res[0])
 
+    if tape.shots.has_partitioned_shots:
+        return (new_tape,), shot_vector_support(post_processing_split_sums)
     return (new_tape,), post_processing_split_sums
diff --git a/pennylane/workflow/execution.py b/pennylane/workflow/execution.py
@@ -44,6 +44,7 @@ def execute(
     device: Union["qml.devices.LegacyDevice", "qml.devices.Device"],
     diff_method: Optional[Union[Callable, str, qml.transforms.core.TransformDispatcher]] = None,
     interface: Optional[Union[str, Interface]] = Interface.AUTO,
+    *,
     transform_program=None,
     inner_transform=None,
     config=None,