Parameter specific learning rates for Gradient Descent (#569)

* feat: adds multi-dimensional learning rate gradient descent implementation * fix: update prior bounds for mcmc integration tests * tests: adds coverage, refactors optimisation unit tests * tests: adds missing coverage for negative learning rate ValueError * docs: add Pints' reference to docstring * add changelog entry * docs: adds mathjax rendering to docstring
pybop-team · Nov 26, 2024 · 5deb93f · 5deb93f
1 parent 2f1a4b4
commit 5deb93f
Show file tree

Hide file tree

Showing 10 changed files with 251 additions and 90 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Features
 
+- [#569](https://github.com/pybop-team/PyBOP/pull/569) - Adds parameter specific learning rate functionality to GradientDescent optimiser.
 - [#282](https://github.com/pybop-team/PyBOP/issues/282) - Restructures the examples directory.
 - [#396](https://github.com/pybop-team/PyBOP/issues/396) - Adds `ecm_with_tau.py` example script.
 - [#452](https://github.com/pybop-team/PyBOP/issues/452) - Extends `cell_mass` and `approximate_capacity` for half-cell models.

diff --git a/examples/scripts/comparison_examples/spm_descent.py b/examples/scripts/comparison_examples/spm_descent.py
@@ -12,11 +12,11 @@
 parameters = pybop.Parameters(
     pybop.Parameter(
         "Negative electrode active material volume fraction",
-        prior=pybop.Gaussian(0.68, 0.05),
+        prior=pybop.Gaussian(0.6, 0.01),
     ),
     pybop.Parameter(
         "Positive electrode active material volume fraction",
-        prior=pybop.Gaussian(0.58, 0.05),
+        prior=pybop.Gaussian(0.6, 0.01),
     ),
 )
 
@@ -40,9 +40,9 @@
 cost = pybop.RootMeanSquaredError(problem)
 optim = pybop.GradientDescent(
     cost,
-    sigma0=0.05,
+    sigma0=[0.6, 0.02],
     verbose=True,
-    max_iterations=125,
+    max_iterations=75,
 )
 
 # Run optimisation
@@ -58,4 +58,5 @@
 pybop.plot.parameters(optim)
 
 # Plot the cost landscape with optimisation path
-pybop.plot.surface(optim)
+bounds = np.asarray([[0.5, 0.8], [0.4, 0.7]])
+pybop.plot.surface(optim, bounds=bounds)
diff --git a/pybop/__init__.py b/pybop/__init__.py
@@ -123,6 +123,7 @@
 
 from .optimisers._cuckoo import CuckooSearchImpl
 from .optimisers._adamw import AdamWImpl
+from .optimisers._gradient_descent import GradientDescentImpl
 from .optimisers.base_optimiser import BaseOptimiser, OptimisationResult
 from .optimisers.base_pints_optimiser import BasePintsOptimiser
 from .optimisers.scipy_optimisers import (

diff --git a/pybop/optimisers/_adamw.py b/pybop/optimisers/_adamw.py
@@ -156,7 +156,7 @@ def tell(self, reply):
 
         # Check ask-tell pattern
         if not self._ready_for_tell:
-            raise RuntimeError("ask() not called before tell()")
+            raise RuntimeError("ask() must be called before tell().")
         self._ready_for_tell = False
 
         # Unpack reply

diff --git a/pybop/optimisers/_cuckoo.py b/pybop/optimisers/_cuckoo.py
@@ -105,6 +105,9 @@ def tell(self, replies):
         previously specified by `self.ask()`, and updates the optimiser state
         accordingly.
         """
+        if not self._ready_for_tell:
+            raise RuntimeError("ask() must be called before tell().")
+
         # Update iteration count
         self._iterations += 1
 

diff --git a/pybop/optimisers/_gradient_descent.py b/pybop/optimisers/_gradient_descent.py
@@ -0,0 +1,137 @@
+import numpy as np
+import pints
+
+
+class GradientDescentImpl(pints.Optimiser):
+    """
+    Gradient descent method with a fixed, per-dimension learning rate.
+
+    Gradient descent updates the current position in the direction of the
+    steepest descent, as determined by the negative of the gradient of the
+    function.
+
+    The update rule for each iteration is given by:
+
+    .. math::
+
+        x_{t+1} = x_t - \\eta * \\nabla f(x_t)
+
+    where:
+        - :math:`x_t` are the current parameter values at iteration t,
+        - :math:`\\nabla f(x_t)` is the gradient of the function at :math:`x_t`,
+        - :math:`\\eta` is the learning rate, which controls the step size.
+
+    This class reimplements the Pints' Gradient Descent, but with multidimensional,
+    fixed learning rates. Original creation and credit is attributed to Pints.
+
+    Parameters
+    ----------
+    x0 : array-like
+        Initial starting point for the optimisation. This should be a 1D array
+        representing the starting parameter values for the function being
+        optimised.
+    sigma0 : float or array-like, optional
+        Initial learning rate or rates for each dimension. If a scalar is
+        provided, the same learning rate is applied across all dimensions.
+        If an array is provided, each dimension will have its own learning
+        rate. Defaults to 0.02.
+    boundaries : pybop.Boundaries, optional
+        Boundaries for the parameters. This optimiser ignores boundaries and
+        operates as an unbounded method. Defaults to None.
+
+    Attributes
+    ----------
+    _x_best : array-like
+        The best parameter values (solution) found so far.
+    _f_best : float
+        The best function value (objective value) found so far.
+    _current : array-like
+        The current parameter values at the latest iteration.
+    _eta : array-like
+        The current learning rate(s). Can be a scalar or per-dimension array.
+    _running : bool
+        Indicates whether the optimisation process is running.
+    _ready_for_tell : bool
+        Indicates whether the optimiser is ready to receive feedback from
+        the objective function.
+    """
+
+    def __init__(self, x0, sigma0=0.02, boundaries=None):
+        super().__init__(x0, sigma0, boundaries)
+
+        # Initialise state
+        self._x_best = self._current = self._x0
+        self._f_best = np.inf
+        self._eta = np.asarray(sigma0, dtype=float)
+
+        # State tracking
+        self._running = False
+        self._ready_for_tell = False
+
+    def ask(self):
+        """Proposes the next point for evaluation."""
+        self._ready_for_tell = True
+        self._running = True
+        return [self._current]
+
+    def tell(self, reply):
+        """Updates optimiser with function evaluation results."""
+        if not self._ready_for_tell:
+            raise RuntimeError("ask() must be called before tell().")
+        self._ready_for_tell = False
+
+        fx, dfx = reply[0]
+
+        # Update state
+        self._current_f, self._current_df = fx, dfx
+        self._current = self._current - self._eta * dfx
+
+        # Track best solution
+        if fx < self._f_best:
+            self._f_best, self._x_best = fx, self._current
+
+    def f_best(self):
+        """Returns the best objective value found."""
+        return self._f_best
+
+    def x_best(self):
+        """Returns the best solution found."""
+        return self._x_best
+
+    def learning_rate(self):
+        """Returns the learning rate(s)."""
+        return self._eta
+
+    def set_learning_rate(self, eta):
+        """
+        Sets the learning rate. Supports per-dimension rates.
+
+        Parameters
+        ----------
+        eta : float or array-like
+            New learning rate(s).
+        """
+        eta = np.asarray(eta, dtype=float)
+        if np.any(eta <= 0):
+            raise ValueError("Learning rate(s) must be positive.")
+        self._eta = eta
+
+    def needs_sensitivities(self):
+        """Indicates this optimiser requires gradient information."""
+        return True
+
+    def running(self):
+        """Returns whether the optimiser is running."""
+        return self._running
+
+    def name(self):
+        """Returns the name of the optimiser."""
+        return "Gradient descent"
+
+    def n_hyper_parameters(self):
+        """Returns the number of hyper-parameters (learning rate)."""
+        return self._eta.size if self._eta.ndim > 0 else 1
+
+    def set_hyper_parameters(self, x):
+        """Sets hyper-parameters (learning rate)."""
+        self.set_learning_rate(x)
diff --git a/pybop/optimisers/base_pints_optimiser.py b/pybop/optimisers/base_pints_optimiser.py
@@ -3,7 +3,6 @@
 import numpy as np
 from pints import PSO as PintsPSO
 from pints import Adam as PintsAdam
-from pints import GradientDescent as PintsGradientDescent
 from pints import NelderMead as PintsNelderMead
 from pints import Optimiser as PintsOptimiser
 from pints import ParallelEvaluator as PintsParallelEvaluator
@@ -12,7 +11,7 @@
 from pints import SequentialEvaluator as PintsSequentialEvaluator
 from pints import strfloat as PintsStrFloat
 
-from pybop import BaseOptimiser, OptimisationResult
+from pybop import BaseOptimiser, GradientDescentImpl, OptimisationResult
 
 
 class BasePintsOptimiser(BaseOptimiser):
@@ -136,7 +135,7 @@ def _sanitise_inputs(self):
 
         # Convert bounds to PINTS boundaries
         if self.bounds is not None:
-            ignored_optimisers = (PintsGradientDescent, PintsAdam, PintsNelderMead)
+            ignored_optimisers = (GradientDescentImpl, PintsAdam, PintsNelderMead)
             if issubclass(self.optimiser, ignored_optimisers):
                 print(f"NOTE: Boundaries ignored by {self.optimiser}")
                 self.bounds = None

diff --git a/pybop/optimisers/pints_optimisers.py b/pybop/optimisers/pints_optimisers.py
@@ -5,11 +5,15 @@
 from pints import SNES as PintsSNES
 from pints import XNES as PintsXNES
 from pints import Adam as PintsAdam
-from pints import GradientDescent as PintsGradientDescent
 from pints import IRPropMin as PintsIRPropMin
 from pints import NelderMead as PintsNelderMead
 
-from pybop import AdamWImpl, BasePintsOptimiser, CuckooSearchImpl
+from pybop import (
+    AdamWImpl,
+    BasePintsOptimiser,
+    CuckooSearchImpl,
+    GradientDescentImpl,
+)
 
 
 class GradientDescent(BasePintsOptimiser):
@@ -69,7 +73,7 @@ def __init__(
     ):
         super().__init__(
             cost,
-            PintsGradientDescent,
+            GradientDescentImpl,
             max_iterations,
             min_iterations,
             max_unchanged_iterations,

diff --git a/tests/integration/test_monte_carlo_thevenin.py b/tests/integration/test_monte_carlo_thevenin.py
@@ -58,12 +58,12 @@ def parameters(self):
         return pybop.Parameters(
             pybop.Parameter(
                 "R0 [Ohm]",
-                prior=pybop.Uniform(1e-2, 8e-2),
+                prior=pybop.Uniform(1e-3, 9e-2),
                 bounds=[1e-4, 1e-1],
             ),
             pybop.Parameter(
                 "R1 [Ohm]",
-                prior=pybop.Uniform(1e-2, 8e-2),
+                prior=pybop.Uniform(1e-3, 9e-2),
                 bounds=[1e-4, 1e-1],
             ),
         )