Skip to content

Commit

Permalink
add fixture for expected values for faster tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MorrisNein committed Dec 4, 2023
1 parent a329eb7 commit 0aa8acf
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 59 deletions.
8 changes: 5 additions & 3 deletions fedot/core/composer/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_value(cls, pipeline: Pipeline, reference_data: InputData,
try:
if validation_blocks is None:
# Time series or regression classical hold-out validation
results = cls._simple_prediction(pipeline, reference_data)
reference_data, results = cls._simple_prediction(pipeline, reference_data)
else:
# Perform time series in-sample validation
reference_data, results = cls._in_sample_prediction(pipeline, reference_data, validation_blocks)
Expand All @@ -95,9 +95,9 @@ def get_value(cls, pipeline: Pipeline, reference_data: InputData,
return metric

@classmethod
def _simple_prediction(cls, pipeline: Pipeline, reference_data: InputData) -> OutputData:
def _simple_prediction(cls, pipeline: Pipeline, reference_data: InputData) -> Tuple[InputData, OutputData]:
""" Method calls pipeline.predict() and returns the result. """
return pipeline.predict(reference_data, output_mode=cls.output_mode)
return reference_data, pipeline.predict(reference_data, output_mode=cls.output_mode)

@classmethod
def get_value_with_penalty(cls, pipeline: Pipeline, reference_data: InputData,
Expand Down Expand Up @@ -330,6 +330,8 @@ def metric(cls, pipeline: Pipeline, **kwargs) -> float:


class ComputationTime(ComplexityMetric):
default_value = sys.maxsize

@classmethod
def metric(cls, pipeline: Pipeline, **kwargs) -> float:
return pipeline.computation_time
76 changes: 38 additions & 38 deletions test/data/expected_metric_values.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,30 @@
"computation_time_in_seconds": 0.001
},
"binary": {
"roc_auc": -0.9910714285714286,
"precision": -0.9615384615384616,
"f1": -0.9259259259259259,
"neg_log_loss": 0.20353997172165245,
"roc_auc_pen": -0.9867767857142857,
"accuracy": -0.95
"roc_auc": -0.9967141292442498,
"precision": -0.961038961038961,
"f1": -0.961038961038961,
"neg_log_loss": 0.12480364983668737,
"roc_auc_pen": -0.9923950346841914,
"accuracy": -0.9625
},
"multiclass": {
"roc_auc": -0.996776273372018,
"precision": -0.9705882352941176,
"f1": -0.9850746268656716,
"neg_log_loss": 0.1574405647249265,
"roc_auc_pen": -0.9924569095207393,
"accuracy": -0.9875
"roc_auc": -0.9973393347059444,
"precision": -0.9667403140534997,
"f1": -0.9648282401280049,
"neg_log_loss": 0.12313713353516573,
"roc_auc_pen": -0.9930175309222187,
"accuracy": -0.9647887323943662
},
"regression": {
"rmse": 43.47543530353005,
"mse": 1890.1134748314266,
"neg_mean_squared_log_error": 0.13477129922476577,
"mape": 0.32297281709194836,
"smape": 51.335744050400855,
"mae": 35.530404577480475,
"r2": 0.6153035428072782,
"rmse_pen": 43.56238617413711
"rmse": 52.66625041489969,
"mse": 2773.7339327649224,
"neg_mean_squared_log_error": 0.1816505113919213,
"mape": 0.4054093661550918,
"smape": 51.74454568467751,
"mae": 42.70067250366354,
"r2": 0.5213672450946683,
"rmse_pen": 52.77158291572949
},
"multitarget": {
"rmse": 4.242740603698809,
Expand All @@ -41,25 +41,25 @@
"rmse_pen": 4.2512260849062065
},
"ts": {
"mase": 0.058980194115549354,
"rmse": 0.4255038192349266,
"mse": 0.18105350018350908,
"neg_mean_squared_log_error": 2.667666308520171e-05,
"mape": 0.004435590365776401,
"smape": 0.44406826178197356,
"mae": 0.3694022684079144,
"r2": 0.9989871132856867,
"rmse_pen": 0.42635482687339643
"mase": 0.15114880404154094,
"rmse": 1.5387331777721858,
"mse": 2.367699792376889,
"neg_mean_squared_log_error": 0.00038988885997187243,
"mape": 0.016605915265088925,
"smape": 1.6566004975287463,
"mae": 1.3285184355230175,
"r2": 0.9923997085574247,
"rmse_pen": 1.5418106441277302
},
"multits": {
"mase": 1.8544798760275754,
"rmse": 13.160020319921124,
"mse": 173.18613482073687,
"neg_mean_squared_log_error": 0.02414263771029647,
"mape": 0.13774146040649768,
"smape": 13.880884066527864,
"mae": 11.61490027617271,
"r2": 0.031126518485388055,
"rmse_pen": 13.186340360560965
"mase": 0.1509174893096649,
"rmse": 1.5354969862068721,
"mse": 2.357750994650387,
"neg_mean_squared_log_error": 0.00038897208639900315,
"mape": 0.01659010099781985,
"smape": 1.655093261482032,
"mae": 1.3264853007744228,
"r2": 0.9924316440935379,
"rmse_pen": 1.538567980179286
}
}
80 changes: 62 additions & 18 deletions test/unit/composer/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import sys
from itertools import product
from typing import Callable, Tuple, Union
from typing import Callable, Dict, Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -49,14 +48,14 @@ def data_setup(request):
elif task_type == 'multits':
file_path = fedot_project_root() / 'test/data/short_time_series.csv'
df = pd.read_csv(file_path)
x = df[['wind_speed', 'sea_height']].to_numpy()
x = df[['sea_height', 'sea_height']].to_numpy()
y = df['sea_height'].to_numpy()
task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=10))
data_type = DataTypesEnum.multi_ts
else:
raise ValueError(f'Unsupported task type: {task_type}')

x, y = x[:100], y[:100]
x, y = x[:200], y[:200]

# Wrap data into InputData
input_data = InputData(features=x,
Expand Down Expand Up @@ -99,37 +98,82 @@ def get_ts_pipeline(window_size=30):
return pipeline


@pytest.fixture(scope='session')
def expected_values() -> Dict[str, Dict[str, float]]:
with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'r') as f:
return json.load(f)


@pytest.mark.parametrize(
'metric, validation_blocks, pipeline_func, data_setup',
[*product(ComplexityMetricsEnum, [None], [get_classification_pipeline], ['complexity']),
*product(ClassificationMetricsEnum, [None], [get_classification_pipeline], ['binary', 'multiclass']),
*product(RegressionMetricsEnum, [None], [get_regression_pipeline], ['regression', 'multitarget']),
*product(TimeSeriesForecastingMetricsEnum, [2], [get_ts_pipeline], ['ts', 'multits'])],
'metric, pipeline_func, data_setup, validation_blocks',
[
*product(ComplexityMetricsEnum, [get_classification_pipeline], ['complexity'], [None]),
*product(ClassificationMetricsEnum, [get_classification_pipeline], ['binary', 'multiclass'], [None]),
*product(RegressionMetricsEnum, [get_regression_pipeline], ['regression', 'multitarget'], [None]),
*product(TimeSeriesForecastingMetricsEnum, [get_ts_pipeline], ['ts', 'multits'], [2])
],
indirect=['data_setup']
)
def test_quality_metrics(metric: ClassificationMetricsEnum, validation_blocks: Union[int, None],
pipeline_func: Callable[[], Pipeline], data_setup: Tuple[InputData, InputData, str]):
def test_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[], Pipeline],
validation_blocks: Union[int, None], data_setup: Tuple[InputData, InputData, str],
expected_values: Dict[str, Dict[str, float]], update_expected_values: bool = False):
train, _, task_type = data_setup

pipeline = pipeline_func()
pipeline.fit(input_data=train)
metric_function = MetricsRepository.get_metric(metric)
metric_class = MetricsRepository.get_metric_class(metric)
metric_value = metric_function(pipeline=pipeline, reference_data=train, validation_blocks=validation_blocks)
with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'r') as f:
expected_value = json.load(f)[task_type][str(metric)]
assert 0 <= abs(metric_value) < sys.maxsize

if update_expected_values:
with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'w') as f:
expected_values = dict(binary={}, multiclass={}, regression={}, multitarget={}, ts={}, multits={})
expected_values[task_type][str(metric)] = metric_value
json.dump(expected_values, f)

expected_value = expected_values[task_type][str(metric)]

assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001)
assert metric_value != MetricsRepository.get_metric_class(metric).default_value
assert not np.isclose(metric_value, metric_class.default_value, rtol=0.01, atol=0.01)


@pytest.mark.parametrize(
'metric, pipeline_func, data_setup, validation_blocks',
[
*product(ClassificationMetricsEnum, [get_classification_pipeline], ['binary', 'multiclass'], [None]),
*product(RegressionMetricsEnum, [get_regression_pipeline], ['regression', 'multitarget'], [None]),
*product(TimeSeriesForecastingMetricsEnum, [get_ts_pipeline], ['ts', 'multits'], [2]),
],
indirect=['data_setup']
)
def test_ideal_case_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[], Pipeline],
validation_blocks: Union[int, None], data_setup: Tuple[InputData, InputData, str],
expected_values):
reference, _, task_type = data_setup
metric_class = MetricsRepository.get_metric_class(metric)
predicted = OutputData(idx=reference.idx, task=reference.task, data_type=reference.data_type)
if task_type == 'multiclass' and metric_class.output_mode != 'labels':
label_vals = np.unique(reference.target)
predicted.predict = np.identity(len(label_vals))[reference.target]
else:
predicted.predict = reference.target
if task_type == 'multits':
reference.features = reference.features[:, 0]

ideal_value = metric_class.metric(reference, predicted)

assert ideal_value != metric_class.default_value


@pytest.mark.parametrize('data_setup', ['multitarget'], indirect=True)
def test_predict_shape_multi_target(data_setup: Tuple[InputData, InputData]):
train, test = data_setup
def test_predict_shape_multi_target(data_setup: Tuple[InputData, InputData, str]):
train, test, _ = data_setup
simple_pipeline = Pipeline(PipelineNode('linear'))
simple_pipeline.fit(input_data=train)

target_shape = test.target.shape
# Get converted data
results = QualityMetric()._simple_prediction(simple_pipeline, test)
_, results = QualityMetric()._simple_prediction(simple_pipeline, test)
predict_shape = results.predict.shape
assert target_shape == predict_shape

Expand Down

0 comments on commit 0aa8acf

Please sign in to comment.