Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support adding custom metrics to existing evaluations #5436

Merged
merged 2 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/source/user_guide/evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2085,6 +2085,7 @@ When using metric operators without custom parameters, you can also pass a list
of operator URI's to the `custom_metrics` parameter:

.. code-block:: python
:linenos:

# Apply two custom metrics to a regression evaluation
results = dataset.evaluate_regressions(
Expand All @@ -2097,6 +2098,23 @@ of operator URI's to the `custom_metrics` parameter:
],
)

You can also add custom metrics to an existing evaluation at any time via
:meth:`add_custom_metrics() <fiftyone.utils.eval.base.BaseEvaluationResults.add_custom_metrics>`:

.. code-block:: python
:linenos:

# Load an existing evaluation run
results = dataset.load_evaluation_results("eval")

# Add some custom metrics
results.add_custom_metrics(
[
"@voxel51/metric-examples/absolute_error",
"@voxel51/metric-examples/squared_error",
]
)

Developing custom metrics
-------------------------

Expand Down
81 changes: 71 additions & 10 deletions fiftyone/utils/eval/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class BaseEvaluationMethodConfig(foe.EvaluationMethodConfig):
"""Base class for configuring evaluation methods.

Args:
custom_metrics (None): an optional list of custom metrics to compute
or dict mapping metric names to kwargs dicts
**kwargs: any leftover keyword arguments after subclasses have done
their parsing
"""
Expand All @@ -42,17 +44,27 @@ class BaseEvaluationMethod(foe.EvaluationMethod):
config: an :class:`BaseEvaluationMethodConfig`
"""

def _get_custom_metrics(self):
def _get_custom_metrics(self, metric_uris=None):
if not self.config.custom_metrics:
return {}

if isinstance(self.config.custom_metrics, list):
return {m: None for m in self.config.custom_metrics}

return self.config.custom_metrics
custom_metrics = self.config.custom_metrics

if metric_uris is not None:
custom_metrics = {
k: v for k, v in custom_metrics.items() if k in metric_uris
}

return custom_metrics

def compute_custom_metrics(self, samples, eval_key, results):
for metric, kwargs in self._get_custom_metrics().items():
def compute_custom_metrics(
self, samples, eval_key, results, metric_uris=None
):
custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
for metric, kwargs in custom_metrics.items():
try:
operator = foo.get_operator(metric)
value = operator.compute(samples, results, **kwargs or {})
Expand All @@ -79,10 +91,11 @@ def compute_custom_metrics(self, samples, eval_key, results):
e,
)

def get_custom_metric_fields(self, samples, eval_key):
def get_custom_metric_fields(self, samples, eval_key, metric_uris=None):
fields = []

for metric in self._get_custom_metrics().keys():
custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
for metric in custom_metrics.keys():
try:
operator = foo.get_operator(metric)
fields.extend(
Expand All @@ -97,8 +110,11 @@ def get_custom_metric_fields(self, samples, eval_key):

return fields

def rename_custom_metrics(self, samples, eval_key, new_eval_key):
for metric in self._get_custom_metrics().keys():
def rename_custom_metrics(
self, samples, eval_key, new_eval_key, metric_uris=None
):
custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
for metric in custom_metrics.keys():
try:
operator = foo.get_operator(metric)
operator.rename(samples, self.config, eval_key, new_eval_key)
Expand All @@ -109,8 +125,9 @@ def rename_custom_metrics(self, samples, eval_key, new_eval_key):
e,
)

def cleanup_custom_metrics(self, samples, eval_key):
for metric in self._get_custom_metrics().keys():
def cleanup_custom_metrics(self, samples, eval_key, metric_uris=None):
custom_metrics = self._get_custom_metrics(metric_uris=metric_uris)
for metric in custom_metrics.keys():
try:
operator = foo.get_operator(metric)
operator.cleanup(samples, self.config, eval_key)
Expand Down Expand Up @@ -144,6 +161,50 @@ def __init__(
super().__init__(samples, config, eval_key, backend=backend)
self.custom_metrics = custom_metrics

def add_custom_metrics(self, custom_metrics, overwrite=True):
"""Computes the given custom metrics and adds them to these results.

Args:
custom_metrics: a list of custom metrics to compute or a dict
mapping metric names to kwargs dicts
overwrite (True): whether to recompute any custom metrics that
have already been applied
"""
_custom_metrics = self.config.custom_metrics

if _custom_metrics is None:
_custom_metrics = {}

if isinstance(_custom_metrics, list):
_custom_metrics = {k: None for k in _custom_metrics}

if isinstance(custom_metrics, list):
custom_metrics = {k: None for k in custom_metrics}

if not overwrite:
custom_metrics = {
k: v
for k, v in custom_metrics.items()
if k not in _custom_metrics
}

if not custom_metrics:
return

metric_uris = list(custom_metrics.keys())

_custom_metrics.update(custom_metrics)
if all(v is None for v in _custom_metrics.values()):
_custom_metrics = list(_custom_metrics.keys())

self.config.custom_metrics = _custom_metrics
self.save_config()

self.backend.compute_custom_metrics(
self.samples, self.key, self, metric_uris=metric_uris
)
self.save()
manushreegangwar marked this conversation as resolved.
Show resolved Hide resolved

def metrics(self, *args, **kwargs):
"""Returns the metrics associated with this evaluation run.

Expand Down
Loading