Skip to content

Commit e780cca

Browse files
author
Songki Choi
authored
Fix F1 auto-threshold to choose best largest confidence (#2371)
* Fix F1 auto-threshold to choose best largest confidence * Update license notice * Update change log --------- Signed-off-by: Songki Choi <songki.choi@intel.com>
1 parent 48989b2 commit e780cca

File tree

3 files changed

+28
-20
lines changed

3 files changed

+28
-20
lines changed

CHANGELOG.md

+18
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,24 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## \[v1.5.0\]
6+
7+
### New features
8+
9+
-
10+
11+
### Enhancements
12+
13+
-
14+
15+
### Bug fixes
16+
17+
- Fix F1 auto-threshold to choose best largest confidence (<https://github.com/openvinotoolkit/training_extensions/pull/2371>)
18+
19+
### Known issues
20+
21+
- OpenVINO(==2023.0) IR inference is not working well on 2-stage models (e.g. Mask-RCNN) exported from torch==1.13.1
22+
523
## \[v1.4.0\]
624

725
### New features

src/otx/api/usecases/evaluation/f_measure.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
"""This module contains the f-measure performance provider class."""
2-
3-
# Copyright (C) 2021-2022 Intel Corporation
2+
# Copyright (C) 2021-2023 Intel Corporation
43
# SPDX-License-Identifier: Apache-2.0
54
#
65

7-
86
import logging
97
from typing import Dict, List, Optional, Tuple
108

@@ -363,7 +361,7 @@ def get_results_per_confidence(
363361
result.f_measure_curve[class_name].append(result_point[class_name].f_measure)
364362
result.precision_curve[class_name].append(result_point[class_name].precision)
365363
result.recall_curve[class_name].append(result_point[class_name].recall)
366-
if all_classes_f_measure > result.best_f_measure:
364+
if all_classes_f_measure > 0.0 and all_classes_f_measure >= result.best_f_measure:
367365
result.best_f_measure = all_classes_f_measure
368366
result.best_threshold = confidence_threshold
369367
return result
@@ -417,7 +415,7 @@ def get_results_per_nms(
417415
result.precision_curve[class_name].append(result_point[class_name].precision)
418416
result.recall_curve[class_name].append(result_point[class_name].recall)
419417

420-
if all_classes_f_measure >= result.best_f_measure:
418+
if all_classes_f_measure > 0.0 and all_classes_f_measure >= result.best_f_measure:
421419
result.best_f_measure = all_classes_f_measure
422420
result.best_threshold = nms_threshold
423421
return result

tests/unit/api/usecases/evaluation/test_f_measure.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
1-
# Copyright (C) 2020-2021 Intel Corporation
1+
# Copyright (C) 2020-2023 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
23
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
4-
# you may not use this file except in compliance with the License.
5-
# You may obtain a copy of the License at
6-
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
9-
# Unless required by applicable law or agreed to in writing,
10-
# software distributed under the License is distributed on an "AS IS" BASIS,
11-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
# See the License for the specific language governing permissions
13-
# and limitations under the License.
144

155
import datetime
166
from typing import cast
@@ -962,7 +952,7 @@ def test_f_measure_calculator_get_results_per_confidence(self):
962952
# Check "_AggregatedResults" object returned by "get_results_per_confidence" when All Classes f-measure is more
963953
# than best f-measure in results_per_confidence
964954
expected_results_per_confidence = _AggregatedResults(["class_1", "class_2"])
965-
for confidence_threshold in np.arange(*[0.6, 0.9]):
955+
for confidence_threshold in np.arange(*[0.6, 0.9, 0.1]):
966956
result_point = f_measure_calculator.evaluate_classes(
967957
classes=["class_1", "class_2"],
968958
iou_threshold=0.7,
@@ -978,7 +968,7 @@ def test_f_measure_calculator_get_results_per_confidence(self):
978968

979969
actual_results_per_confidence = f_measure_calculator.get_results_per_confidence(
980970
classes=["class_1", "class_2"],
981-
confidence_range=[0.6, 0.9],
971+
confidence_range=[0.6, 0.9, 0.1], # arrange(0.6, 0.9, 0.1)
982972
iou_threshold=0.7,
983973
)
984974
assert actual_results_per_confidence.all_classes_f_measure_curve == (
@@ -987,7 +977,9 @@ def test_f_measure_calculator_get_results_per_confidence(self):
987977
assert actual_results_per_confidence.f_measure_curve == expected_results_per_confidence.f_measure_curve
988978
assert actual_results_per_confidence.recall_curve == expected_results_per_confidence.recall_curve
989979
assert actual_results_per_confidence.best_f_measure == 0.5454545454545453
990-
assert actual_results_per_confidence.best_threshold == 0.6
980+
# 0.6 -> 0.54, 0.7 -> 0.54, 0.8 -> 0.54, 0.9 -> 0.44
981+
# Best ""LARGEST" trehshold should be 0.8 (considering numerical error)
982+
assert abs(actual_results_per_confidence.best_threshold - 0.8) < 0.001
991983
# Check "_AggregatedResults" object returned by "get_results_per_confidence" when All Classes f-measure is less
992984
# than best f-measure in results_per_confidence
993985
actual_results_per_confidence = f_measure_calculator.get_results_per_confidence(

0 commit comments

Comments
 (0)