Skip to content

Commit

Permalink
feat: metrics as methods of models (#77)
Browse files Browse the repository at this point in the history
Closes #64.

### Summary of Changes

Metrics are now methods of classifiers and regressors. They also take a
validation or test set as input now instead of two columns representing
predicated and expected values.

---------

Co-authored-by: lars-reimann <lars-reimann@users.noreply.github.com>
  • Loading branch information
lars-reimann and lars-reimann authored Mar 24, 2023
1 parent ec539eb commit bc63693
Show file tree
Hide file tree
Showing 17 changed files with 204 additions and 138 deletions.
23 changes: 23 additions & 0 deletions src/safeds/ml/classification/_classifier.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod

from safeds.data.tabular.containers import Table, TaggedTable
from sklearn.metrics import accuracy_score as sk_accuracy_score


class Classifier(ABC):
Expand Down Expand Up @@ -44,3 +45,25 @@ def predict(self, dataset: Table) -> TaggedTable:
PredictionError
If prediction with the given dataset failed.
"""

def accuracy(self, validation_or_test_set: TaggedTable) -> float:
"""
Predicts the target values for the features in the validation or test set and compares it to the expected
results.
Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.
Returns
-------
accuracy : float
The calculated accuracy score, i.e. the percentage of equal data.
"""

expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

# noinspection PyProtectedMember
return sk_accuracy_score(expected._data, predicted._data)
1 change: 0 additions & 1 deletion src/safeds/ml/classification/metrics/__init__.py

This file was deleted.

21 changes: 0 additions & 21 deletions src/safeds/ml/classification/metrics/_module_level_functions.py

This file was deleted.

61 changes: 60 additions & 1 deletion src/safeds/ml/regression/_regressor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from abc import ABC, abstractmethod

from safeds.data.tabular.containers import Table, TaggedTable
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.exceptions import ColumnLengthMismatchError
from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
from sklearn.metrics import mean_squared_error as sk_mean_squared_error


class Regressor(ABC):
Expand Down Expand Up @@ -44,3 +47,59 @@ def predict(self, dataset: Table) -> TaggedTable:
PredictionError
If prediction with the given dataset failed.
"""

def mean_squared_error(self, validation_or_test_set: TaggedTable) -> float:
"""
Return the mean squared error, calculated from a given known truth and a column to compare.
Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.
Returns
-------
mean_squared_error : float
The calculated mean squared error (the average of the distance of each individual row squared).
"""

expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_squared_error(expected._data, predicted._data)

def mean_absolute_error(self, validation_or_test_set: TaggedTable) -> float:
"""
Return the mean absolute error, calculated from a given known truth and a column to compare.
Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.
Returns
-------
mean_absolute_error : float
The calculated mean absolute error (the average of the distance of each individual row).
"""

expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_absolute_error(expected._data, predicted._data)


def _check_metrics_preconditions(actual: Column, expected: Column) -> None:
if not actual.type.is_numeric():
raise TypeError(f"Column 'actual' is not numerical but {actual.type}.")
if not expected.type.is_numeric():
raise TypeError(f"Column 'expected' is not numerical but {expected.type}.")

if actual._data.size != expected._data.size:
raise ColumnLengthMismatchError(
"\n".join(
[f"{column.name}: {column._data.size}" for column in [actual, expected]]
)
)
1 change: 0 additions & 1 deletion src/safeds/ml/regression/metrics/__init__.py

This file was deleted.

58 changes: 0 additions & 58 deletions src/safeds/ml/regression/metrics/_module_level_functions.py

This file was deleted.

26 changes: 26 additions & 0 deletions tests/safeds/ml/classification/_classifier/_dummy_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.ml.classification import Classifier


class DummyClassifier(Classifier):
"""
Dummy classifier to test metrics.
Metrics methods expect a `TaggedTable` as input with two columns:
- `predicted`: The predicted targets.
- `expected`: The correct targets.
`target_name` must be set to `"expected"`.
"""

def fit(self, training_set: TaggedTable) -> None:
pass

def predict(self, dataset: Table) -> TaggedTable:
# Needed until https://github.com/Safe-DS/Stdlib/issues/75 is fixed
predicted = dataset.get_column("predicted")
feature = predicted.rename("feature")
dataset = Table.from_columns([feature, predicted])

return TaggedTable(dataset, target_name="predicted")
20 changes: 20 additions & 0 deletions tests/safeds/ml/classification/_classifier/test_accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pandas as pd
from safeds.data.tabular.containers import Column, Table, TaggedTable

from ._dummy_classifier import DummyClassifier


def test_accuracy() -> None:
c1 = Column(pd.Series(data=[1, 2, 3, 4]), "predicted")
c2 = Column(pd.Series(data=[1, 2, 3, 3]), "expected")
table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected")

assert DummyClassifier().accuracy(table) == 0.75


def test_accuracy_different_types() -> None:
c1 = Column(pd.Series(data=["1", "2", "3", "4"]), "predicted")
c2 = Column(pd.Series(data=[1, 2, 3, 3]), "expected")
table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected")

assert DummyClassifier().accuracy(table) == 0.0
15 changes: 0 additions & 15 deletions tests/safeds/ml/classification/metrics/_accuracy/test_accuracy.py

This file was deleted.

26 changes: 26 additions & 0 deletions tests/safeds/ml/regression/_regressor/_dummy_regressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.ml.regression import Regressor


class DummyRegressor(Regressor):
"""
Dummy regressor to test metrics.
Metrics methods expect a `TaggedTable` as input with two columns:
- `predicted`: The predicted targets.
- `expected`: The correct targets.
`target_name` must be set to `"expected"`.
"""

def fit(self, training_set: TaggedTable) -> None:
pass

def predict(self, dataset: Table) -> TaggedTable:
# Needed until https://github.com/Safe-DS/Stdlib/issues/75 is fixed
predicted = dataset.get_column("predicted")
feature = predicted.rename("feature")
dataset = Table.from_columns([feature, predicted])

return TaggedTable(dataset, target_name="predicted")
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import pytest
from safeds.data.tabular.containers import Column
from safeds.exceptions import ColumnLengthMismatchError
from safeds.ml.regression.metrics._module_level_functions import (
_check_metrics_preconditions,
)

# noinspection PyProtectedMember
from safeds.ml.regression._regressor import _check_metrics_preconditions


@pytest.mark.parametrize(
Expand Down
26 changes: 26 additions & 0 deletions tests/safeds/ml/regression/_regressor/test_mean_absolute_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable

from ._dummy_regressor import DummyRegressor


@pytest.mark.parametrize(
"predicted, expected, result",
[
([1, 2], [1, 2], 0),
([0, 0], [1, 1], 1),
([1, 1, 1], [2, 2, 11], 4),
([0, 0, 0], [10, 2, 18], 10),
([0.5, 0.5], [1.5, 1.5], 1),
],
)
def test_mean_absolute_error_valid(
predicted: list[float], expected: list[float], result: float
) -> None:
predicted_column = Column(predicted, "predicted")
expected_column = Column(expected, "expected")
table = TaggedTable(
Table.from_columns([predicted_column, expected_column]), target_name="expected"
)

assert DummyRegressor().mean_absolute_error(table) == result
20 changes: 20 additions & 0 deletions tests/safeds/ml/regression/_regressor/test_mean_squared_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable

from ._dummy_regressor import DummyRegressor


@pytest.mark.parametrize(
"predicted, expected, result",
[([1, 2], [1, 2], 0), ([0, 0], [1, 1], 1), ([1, 1, 1], [2, 2, 11], 34)],
)
def test_mean_squared_error_valid(
predicted: list[float], expected: list[float], result: float
) -> None:
predicted_column = Column(predicted, "predicted")
expected_column = Column(expected, "expected")
table = TaggedTable(
Table.from_columns([predicted_column, expected_column]), target_name="expected"
)

assert DummyRegressor().mean_squared_error(table) == result
22 changes: 0 additions & 22 deletions tests/safeds/ml/regression/metrics/test_mean_absolute_error.py

This file was deleted.

16 changes: 0 additions & 16 deletions tests/safeds/ml/regression/metrics/test_mean_squared_error.py

This file was deleted.

0 comments on commit bc63693

Please sign in to comment.