From 0963ae0151b76576b85df721a4bb7a0406958202 Mon Sep 17 00:00:00 2001
From: Hans Dembinski <HDembinski@users.noreply.github.com>
Date: Wed, 30 Aug 2023 14:49:31 +0200
Subject: [PATCH] Warn on errordef override (#937)

Closes #930

- Documentation is added to the `iminuit.cost` module which explains
that errordef should not be set manually when the builtin cost functions
are used.
- Minuit now warns the user if Minuit.errordef is set if also the cost
function has an errordef attribute, but the value set via
Minuit.errordef still overrides the Cost.errordef attribute.
---
 .github/workflows/test.yml | 20 ++++++----------
 doc/notebooks/basic.ipynb  | 48 ++++++++++++++++++++++++++++++++++++--
 src/iminuit/cost.py        |  4 ++++
 src/iminuit/minuit.py      | 36 ++++++++++++++++++++--------
 src/iminuit/warnings.py    |  4 ++++
 tests/test_minuit.py       | 20 +++++++++++++++-
 6 files changed, 106 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c15282e9..03542c0e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -40,33 +40,27 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - run: python -m pip install --upgrade pip wheel
-    # python -m pip install .[test] is not used here to test minimum (faster)
+    # python -m pip install .[test] is not used here to test minimum (faster),
+    # the cov workflow runs all tests
     - run: python -m pip install -v . pytest
     - run: python -m pytest
 
-  # The aarch64 test is very slow, that's why we do not run it
-  #
   # aarch64:
-  #   strategy:
-  #     matrix:
-  #       py: cp39
-  #       arch: [aarch64]
-  #     fail-fast: false
   #   runs-on: ubuntu-latest
   #   env:
-  #     py: /opt/python/${{ matrix.py }}-${{ matrix.py }}/bin/python
-  #     img: quay.io/pypa/manylinux2014_${{ matrix.arch }}
+  #     py: /opt/python/cp309-cp309/bin/python
+  #     img: quay.io/pypa/manylinux2014_aarch64
   #   steps:
-  #   - uses: actions/checkout@v2
+  #   - uses: actions/checkout@v3
   #     with:
   #       submodules: true
-  #   - uses: docker/setup-qemu-action@v1
+  #   - uses: docker/setup-qemu-action@v2
   #   - run: >
   #           docker run --rm -v ${{ github.workspace }}:/ws:rw --workdir=/ws
   #           ${{ env.img }}
   #           bash -exc '${{ env.py }} -m venv venv &&
   #           source venv/bin/activate &&
-  #           python -m pip install --upgrade pip wheel &&
+  #           python -m pip install --upgrade pip &&
   #           python -m pip install . pytest'
   #   - run: >
   #           docker run --rm -v ${{ github.workspace }}:/ws:rw --workdir=/ws
diff --git a/doc/notebooks/basic.ipynb b/doc/notebooks/basic.ipynb
index 22c924df..72850b26 100644
--- a/doc/notebooks/basic.ipynb
+++ b/doc/notebooks/basic.ipynb
@@ -648,7 +648,7 @@
                 "\n",
                 "If you like to understand the origin of these numbers, have a look into the study **Hesse and Minos**, which explains in depth how uncertainties are computed.\n",
                 "\n",
-                "For our custom cost function, we could set `m.errordef=1` or `m.errordef=Minuit.LEAST_SQUARES`, which is more readable. An even better way is to add an attribute called `errordef` to the cost function. If such an attribute is present, Minuit uses it. Since this cost function has the default scaling, we do not need to set anything, but keep it in mind for negative log-likelihoods."
+                "For our custom cost function, we could set `m.errordef=1` or `m.errordef=Minuit.LEAST_SQUARES`, which is more readable."
             ]
         },
         {
@@ -678,12 +678,56 @@
             ]
         },
         {
-            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
                 "The reported errors are now by a factor `sqrt(2)` smaller than they really are.\n",
                 "\n",
+                "An even better way is to add an attribute called `errordef` to the cost function. If such an attribute is present, Minuit uses it. Since this cost function has the default scaling, we do not need to set anything, but keep it in mind for negative log-likelihoods."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "# artificial cost function that scales like a negative log-likelihood\n",
+                "def custom_least_squares_2(a, b):\n",
+                "    return 0.5 * custom_least_squares(a, b)\n",
+                "\n",
+                "# Instead of calling Minuit.errordef, we assign an errordef attribute to the cost\n",
+                "# function. Minuit will automatically use this value.\n",
+                "custom_least_squares_2.errordef = Minuit.LIKELIHOOD\n",
+                "\n",
+                "m = Minuit(custom_least_squares_2, 1, 2)\n",
+                "m.migrad()  # uses the correct errordef automatically"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "We get the correct errors. The built-in cost functions from the module `iminuit.cost` all define the `errordef` attribute, so you don't need to worry about that.\n",
+                "\n",
+                "If the cost function defines the `errordef`, it should not be necessary to set it to another value, so `Minuit` warns you if you try to set it."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "# raises a warning\n",
+                "m.errordef = 1"
+            ]
+        },
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
                 "### Advanced: Initial step sizes\n",
                 "\n",
                 "Minuit uses a gradient-descent method to find the minimum, and the gradient is computed numerically using finite differences. The initial step size is used to compute the first gradient. A good step size is small compared to the curvature of the function, but large compared to numerical resolution. Using a good step size can slightly accelerate the convergence, but Minuit is not very sensitive to the choice. If you don't provide a value, iminuit will guess a step size based on a heuristic.\n",
diff --git a/src/iminuit/cost.py b/src/iminuit/cost.py
index d7f0a9ee..a9fb67fe 100644
--- a/src/iminuit/cost.py
+++ b/src/iminuit/cost.py
@@ -5,6 +5,10 @@
 fits. The cost functions optionally use Numba to accelerate some calculations, if Numba
 is installed.
 
+**There is no need** to set :attr:`iminuit.Minuit.errordef` manually for any of these
+cost functions. :class:`iminuit.Minuit` automatically uses the correct value, which is
+provided by each cost function with the attribute ``Cost.errordef``.
+
 What to use when
 ----------------
 - Fit a normalised probability density to data
diff --git a/src/iminuit/minuit.py b/src/iminuit/minuit.py
index 3a50c165..f4f1a3eb 100644
--- a/src/iminuit/minuit.py
+++ b/src/iminuit/minuit.py
@@ -16,6 +16,7 @@
     MnUserParameterState,
     FunctionMinimum,
 )
+from iminuit.warnings import ErrordefAlreadySetWarning
 import numpy as np
 from typing import (
     Union,
@@ -108,6 +109,13 @@ def errordef(self) -> float:
         the :download:`MINUIT2 User's Guide <mnusersguide.pdf>`. This parameter is also
         called *UP* in MINUIT documents.
 
+        If FCN has an attribute ``errordef``, its value is used automatically and you
+        should not set errordef by hand. Doing so will raise a
+        ErrordefAlreadySetWarning.
+
+        For the builtin cost functions in :mod:`iminuit.cost`, you don't need to set
+        this value, because they all have the ``errordef`` attribute set.
+
         To make user code more readable, we provided two named constants::
 
             m_lsq = Minuit(a_least_squares_function)
@@ -120,6 +128,13 @@ def errordef(self) -> float:
 
     @errordef.setter
     def errordef(self, value: float) -> None:
+        fcn_errordef = getattr(self._fcn._fcn, "errordef", None)
+        if fcn_errordef is not None:
+            msg = (
+                f"cost function has an errordef attribute equal to {fcn_errordef}, "
+                "you should not override this with Minuit.errordef"
+            )
+            warnings.warn(msg, ErrordefAlreadySetWarning)
         if value <= 0:
             raise ValueError(f"errordef={value} must be a positive number")
         self._fcn._errordef = value
@@ -926,7 +941,7 @@ def scan(self, ncall: int = None) -> "Minuit":
 
         def run(ipar: int) -> None:
             if ipar == self.npar:
-                r = self.fcn(x[: self.npar])
+                r = self._fcn(x[: self.npar])
                 if r < x[self.npar]:
                     x[self.npar] = r
                     self.values[:] = x[: self.npar]
@@ -1126,9 +1141,9 @@ def __call__(self, par, v):
                         self.vec[self.free] = v
                         return self.fcn(*self.par, self.vec)[self.free]
 
-        fcn = Wrapped(self.fcn._fcn)
+        fcn = Wrapped(self._fcn._fcn)
 
-        grad = self.fcn._grad
+        grad = self._fcn._grad
         grad = WrappedGrad(grad) if grad else None
 
         if hess:
@@ -1239,9 +1254,9 @@ def __call__(self, par, v):
         if self.print_level > 0:
             print(r)
 
-        self.fcn._nfcn += r["nfev"]
+        self._fcn._nfcn += r["nfev"]
         if grad:
-            self.fcn._ngrad += r.get("njev", 0)
+            self._fcn._ngrad += r.get("njev", 0)
 
         # Get inverse Hesse matrix, working around many inconsistencies in scipy.
         # Try in order:
@@ -1709,7 +1724,7 @@ def profile(
         values = np.array(self.values)
         for i, vi in enumerate(x):
             values[ipar] = vi
-            y[i] = self.fcn(values)
+            y[i] = self._fcn(values)
 
         if subtract_min:
             y -= np.min(y)
@@ -2298,7 +2313,7 @@ def plot_with_frame(args, from_fit, report_success):
                 trans = plt.gca().transAxes
                 try:
                     with warnings.catch_warnings():
-                        if self.fcn._array_call:
+                        if self._fcn._array_call:
                             plot([args], **kwargs)  # prevent unpacking of array
                         else:
                             plot(args, **kwargs)
@@ -2321,7 +2336,7 @@ def plot_with_frame(args, from_fit, report_success):
                 if from_fit:
                     fval = self.fmin.fval
                 else:
-                    fval = self.fcn(args)
+                    fval = self._fcn(args)
                 plt.text(
                     0.05,
                     1.05,
@@ -2618,15 +2633,16 @@ def _repr_pretty_(self, p, cycle):
             p.text(str(self))
 
     def _visualize(self, plot):
-        pyfcn = self.fcn._fcn
+        pyfcn = self._fcn._fcn
         if plot is None:
             if hasattr(pyfcn, "visualize"):
                 plot = pyfcn.visualize
             else:
-                raise AttributeError(
+                msg = (
                     f"class {pyfcn.__class__.__name__} has no visualize method, "
                     "please use the 'plot' keyword to pass a visualization function"
                 )
+                raise AttributeError(msg)
         return plot
 
     def _experimental_mncontour(
diff --git a/src/iminuit/warnings.py b/src/iminuit/warnings.py
index fef598aa..8c5c2ea8 100644
--- a/src/iminuit/warnings.py
+++ b/src/iminuit/warnings.py
@@ -13,5 +13,9 @@ class HesseFailedWarning(IMinuitWarning):
     """HESSE failed warning."""
 
 
+class ErrordefAlreadySetWarning(IMinuitWarning):
+    """The errordef attribute is already defined by the cost function."""
+
+
 class PerformanceWarning(UserWarning):
     """Warning about performance issues."""
diff --git a/tests/test_minuit.py b/tests/test_minuit.py
index fd7e7909..5fa9cb66 100644
--- a/tests/test_minuit.py
+++ b/tests/test_minuit.py
@@ -4,7 +4,7 @@
 from numpy.testing import assert_allclose, assert_equal
 from iminuit import Minuit
 from iminuit.util import Param, make_func_code
-from iminuit.warnings import IMinuitWarning
+from iminuit.warnings import IMinuitWarning, ErrordefAlreadySetWarning
 from iminuit.typing import Annotated
 from pytest import approx
 from argparse import Namespace
@@ -1709,3 +1709,21 @@ def cost(a, b):
 
     with pytest.raises(ValueError, match="provided gradient is not a CostGradient"):
         Minuit(cost, 0, 0, grad="foo")
+
+
+def test_errordef_already_set_warning():
+    def cost(a, b):
+        return a**2 + b**2
+
+    cost.errordef = 1
+
+    m = Minuit(cost, 0, 0)
+    m.hesse()
+    assert_allclose(m.errors, [1, 1])
+
+    with pytest.warns(ErrordefAlreadySetWarning):
+        m.errordef = 4
+
+    # check that cost.errordef value is still overridden
+    m.hesse()
+    assert_allclose(m.errors, [2, 2])