voxel51 · brimoor · Jan 28, 2025 · Jan 25, 2025 · Jan 28, 2025
diff --git a/docs/source/user_guide/evaluation.rst b/docs/source/user_guide/evaluation.rst
@@ -2085,6 +2085,7 @@ When using metric operators without custom parameters, you can also pass a list
 of operator URI's to the `custom_metrics` parameter:
 
 .. code-block:: python
+    :linenos:
 
     # Apply two custom metrics to a regression evaluation
     results = dataset.evaluate_regressions(
@@ -2097,6 +2098,23 @@ of operator URI's to the `custom_metrics` parameter:
         ],
     )
 
+You can also add custom metrics to an existing evaluation at any time via
+:meth:`add_custom_metrics() <fiftyone.utils.eval.base.BaseEvaluationResults.add_custom_metrics>`:
+
+.. code-block:: python
+    :linenos:
+
+    # Load an existing evaluation run
+    results = dataset.load_evaluation_results("eval")
+
+    # Add some custom metrics
+    results.add_custom_metrics(
+        [
+            "@voxel51/metric-examples/absolute_error",
+            "@voxel51/metric-examples/squared_error",
+        ]
+    )
+
 Developing custom metrics
 -------------------------
 

diff --git a/fiftyone/utils/eval/base.py b/fiftyone/utils/eval/base.py
@@ -28,6 +28,8 @@ class BaseEvaluationMethodConfig(foe.EvaluationMethodConfig):
     """Base class for configuring evaluation methods.
 
     Args:
+        custom_metrics (None): an optional list of custom metrics to compute
+            or dict mapping metric names to kwargs dicts
         **kwargs: any leftover keyword arguments after subclasses have done
             their parsing
     """
@@ -42,17 +44,27 @@ class BaseEvaluationMethod(foe.EvaluationMethod):
         config: an :class:`BaseEvaluationMethodConfig`
     """
 
-    def _get_custom_metrics(self):
+    def _get_custom_metrics(self, metric_uris=None):
         if not self.config.custom_metrics:
             return {}
 
         if isinstance(self.config.custom_metrics, list):
             return {m: None for m in self.config.custom_metrics}
 
-        return self.config.custom_metrics
+        custom_metrics = self.config.custom_metrics
+
+        if metric_uris is not None:
+            custom_metrics = {
+                k: v for k, v in custom_metrics.items() if k in metric_uris
+            }
+
+        return custom_metrics
 
-    def compute_custom_metrics(self, samples, eval_key, results):
-        for metric, kwargs in self._get_custom_metrics().items():
+    def compute_custom_metrics(
+        self, samples, eval_key, results, metric_uris=None
+    ):
+        custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
+        for metric, kwargs in custom_metrics.items():
             try:
                 operator = foo.get_operator(metric)
                 value = operator.compute(samples, results, **kwargs or {})
@@ -79,10 +91,11 @@ def compute_custom_metrics(self, samples, eval_key, results):
                     e,
                 )
 
-    def get_custom_metric_fields(self, samples, eval_key):
+    def get_custom_metric_fields(self, samples, eval_key, metric_uris=None):
         fields = []
 
-        for metric in self._get_custom_metrics().keys():
+        custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
+        for metric in custom_metrics.keys():
             try:
                 operator = foo.get_operator(metric)
                 fields.extend(
@@ -97,8 +110,11 @@ def get_custom_metric_fields(self, samples, eval_key):
 
         return fields
 
-    def rename_custom_metrics(self, samples, eval_key, new_eval_key):
-        for metric in self._get_custom_metrics().keys():
+    def rename_custom_metrics(
+        self, samples, eval_key, new_eval_key, metric_uris=None
+    ):
+        custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
+        for metric in custom_metrics.keys():
             try:
                 operator = foo.get_operator(metric)
                 operator.rename(samples, self.config, eval_key, new_eval_key)
@@ -109,8 +125,9 @@ def rename_custom_metrics(self, samples, eval_key, new_eval_key):
                     e,
                 )
 
-    def cleanup_custom_metrics(self, samples, eval_key):
-        for metric in self._get_custom_metrics().keys():
+    def cleanup_custom_metrics(self, samples, eval_key, metric_uris=None):
+        custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
+        for metric in custom_metrics.keys():
             try:
                 operator = foo.get_operator(metric)
                 operator.cleanup(samples, self.config, eval_key)
@@ -144,6 +161,50 @@ def __init__(
         super().__init__(samples, config, eval_key, backend=backend)
         self.custom_metrics = custom_metrics
 
+    def add_custom_metrics(self, custom_metrics, overwrite=True):
+        """Computes the given custom metrics and adds them to these results.
+
+        Args:
+            custom_metrics: a list of custom metrics to compute or a dict
+                mapping metric names to kwargs dicts
+            overwrite (True): whether to recompute any custom metrics that
+                have already been applied
+        """
+        _custom_metrics = self.config.custom_metrics
+
+        if _custom_metrics is None:
+            _custom_metrics = {}
+
+        if isinstance(_custom_metrics, list):
+            _custom_metrics = {k: None for k in _custom_metrics}
+
+        if isinstance(custom_metrics, list):
+            custom_metrics = {k: None for k in custom_metrics}
+
+        if not overwrite:
+            custom_metrics = {
+                k: v
+                for k, v in custom_metrics.items()
+                if k not in _custom_metrics
+            }
+
+        if not custom_metrics:
+            return
+
+        metric_uris = list(custom_metrics.keys())
+
+        _custom_metrics.update(custom_metrics)
+        if all(v is None for v in _custom_metrics.values()):
+            _custom_metrics = list(_custom_metrics.keys())
+
+        self.config.custom_metrics = _custom_metrics
+        self.save_config()
+
+        self.backend.compute_custom_metrics(
+            self.samples, self.key, self, metric_uris=metric_uris
+        )
+        self.save()
+
     def metrics(self, *args, **kwargs):
         """Returns the metrics associated with this evaluation run.