Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improve Error Handling of classifiers and regressors #355

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/safeds/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ColumnSizeError,
DuplicateColumnNameError,
IndexOutOfBoundsError,
MissingValuesColumnError,
NonNumericColumnError,
SchemaMismatchError,
TransformerNotFittedError,
Expand All @@ -14,6 +15,7 @@
)
from safeds.exceptions._ml import (
DatasetContainsTargetError,
DatasetMissesDataError,
DatasetMissesFeaturesError,
LearningError,
ModelNotFittedError,
Expand All @@ -33,11 +35,13 @@
"UnknownColumnNameError",
"ValueNotPresentWhenFittedError",
"WrongFileExtensionError",
"MissingValuesColumnError",
# ML exceptions
"DatasetContainsTargetError",
"DatasetMissesFeaturesError",
"LearningError",
"ModelNotFittedError",
"PredictionError",
"UntaggedTableError",
"DatasetMissesDataError",
]
23 changes: 21 additions & 2 deletions src/safeds/exceptions/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,27 @@ def __init__(self, column_names: list[str]):
class NonNumericColumnError(Exception):
"""Exception raised for trying to do numerical operations on a non-numerical column."""

def __init__(self, column_info: str) -> None:
super().__init__(f"Tried to do a numerical operation on one or multiple non numerical Columns: \n{column_info}")
def __init__(self, column_info: str, help_msg: str | None = None) -> None:
line_break = "\n"
jxnior01 marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(
(
"Tried to do a numerical operation on one or multiple non-numerical columns:"
f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}"
),
jxnior01 marked this conversation as resolved.
Show resolved Hide resolved
)


class MissingValuesColumnError(Exception):
"""Exception raised for trying to do operations on columns containing missing values."""

def __init__(self, column_info: str, help_msg: str | None = None) -> None:
line_break = "\n"
super().__init__(
(
"Tried to do an operation on one or multiple columns containing missing values:"
f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}"
jxnior01 marked this conversation as resolved.
Show resolved Hide resolved
),
)


class DuplicateColumnNameError(Exception):
Expand Down
7 changes: 7 additions & 0 deletions src/safeds/exceptions/_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ def __init__(self, missing_feature_names: list[str]):
super().__init__(f"Dataset misses the feature columns '{missing_feature_names}'.")


class DatasetMissesDataError(ValueError):
"""Raised when a dataset contains no rows."""

def __init__(self) -> None:
super().__init__("Dataset contains no rows")


class LearningError(Exception):
"""
Raised when an error occurred while training a model.
Expand Down
72 changes: 72 additions & 0 deletions src/safeds/ml/classical/_util_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.exceptions import (
DatasetContainsTargetError,
DatasetMissesDataError,
DatasetMissesFeaturesError,
LearningError,
MissingValuesColumnError,
ModelNotFittedError,
NonNumericColumnError,
PredictionError,
UntaggedTableError,
)
Expand All @@ -30,9 +33,44 @@ def fit(model: Any, tagged_table: TaggedTable) -> None:
If the tagged table contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
if not isinstance(tagged_table, TaggedTable) and isinstance(tagged_table, Table):
raise UntaggedTableError

if tagged_table.number_of_rows == 0:
raise DatasetMissesDataError

non_numerical_column_names = set(tagged_table.features.column_names) - set(
tagged_table.features.remove_columns_with_non_numerical_values().column_names,
)
if len(non_numerical_column_names) != 0:
raise NonNumericColumnError(
str(non_numerical_column_names),
(
"You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical"
" data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many"
" different values\nor is ordinal, you should use the LabelEncoder."
),
)

null_containing_column_names = set(tagged_table.features.column_names) - set(
tagged_table.features.remove_columns_with_missing_values().column_names,
)
if len(null_containing_column_names) != 0:
raise MissingValuesColumnError(
str(null_containing_column_names),
(
"You can use the Imputer to replace the missing values based on different strategies.\nIf you want to"
" remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`."
),
)

try:
model.fit(
tagged_table.features._data,
Expand Down Expand Up @@ -73,6 +111,12 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
# Validation
if model is None or target_name is None or feature_names is None:
Expand All @@ -83,6 +127,34 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_
if missing_feature_names:
raise DatasetMissesFeaturesError(missing_feature_names)

if dataset.number_of_rows == 0:
raise DatasetMissesDataError

non_numerical_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set(
dataset.keep_only_columns(feature_names).remove_columns_with_non_numerical_values().column_names,
)
if len(non_numerical_column_names) != 0:
raise NonNumericColumnError(
str(non_numerical_column_names),
(
"You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical"
" data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many"
" different values\nor is ordinal, you should use the LabelEncoder.\n"
),
)

null_containing_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set(
dataset.keep_only_columns(feature_names).remove_columns_with_missing_values().column_names,
)
if len(null_containing_column_names) != 0:
raise MissingValuesColumnError(
str(null_containing_column_names),
(
"You can use the Imputer to replace the missing values based on different strategies.\nIf you want to"
" remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`."
),
)

dataset_df = dataset.keep_only_columns(feature_names)._data
dataset_df.columns = feature_names

Expand Down
14 changes: 14 additions & 0 deletions src/safeds/ml/classical/classification/_ada_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ def fit(self, training_set: TaggedTable) -> AdaBoost:
------
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)
Expand Down Expand Up @@ -129,6 +137,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
14 changes: 14 additions & 0 deletions src/safeds/ml/classical/classification/_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def fit(self, training_set: TaggedTable) -> DecisionTree:
------
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)
Expand Down Expand Up @@ -78,6 +86,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
14 changes: 14 additions & 0 deletions src/safeds/ml/classical/classification/_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
------
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)
Expand Down Expand Up @@ -112,6 +120,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
17 changes: 17 additions & 0 deletions src/safeds/ml/classical/classification/_k_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from sklearn.neighbors import KNeighborsClassifier as sk_KNeighborsClassifier

from safeds.exceptions import DatasetMissesDataError
from safeds.ml.classical._util_sklearn import fit, predict

from ._classifier import Classifier
Expand Down Expand Up @@ -69,7 +70,17 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors:
If `number_of_neighbors` is greater than the sample size.
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
if training_set.number_of_rows == 0:
raise DatasetMissesDataError
if self._number_of_neighbors > training_set.number_of_rows:
raise ValueError(
(
Expand Down Expand Up @@ -111,6 +122,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
14 changes: 14 additions & 0 deletions src/safeds/ml/classical/classification/_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def fit(self, training_set: TaggedTable) -> LogisticRegression:
------
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)
Expand Down Expand Up @@ -78,6 +86,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
14 changes: 14 additions & 0 deletions src/safeds/ml/classical/classification/_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ def fit(self, training_set: TaggedTable) -> RandomForest:
------
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)
Expand Down Expand Up @@ -100,6 +108,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ def fit(self, training_set: TaggedTable) -> SupportVectorMachine:
------
LearningError
If the training data contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
NonNumericColumnError
If the training data contains non-numerical values.
MissingValuesColumnError
If the training data contains missing values.
DatasetMissesDataError
If the training data contains no rows.
"""
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)
Expand Down Expand Up @@ -154,6 +162,12 @@ def predict(self, dataset: Table) -> TaggedTable:
If the dataset misses feature columns.
PredictionError
If predicting with the given dataset failed.
NonNumericColumnError
If the dataset contains non-numerical values.
MissingValuesColumnError
If the dataset contains missing values.
DatasetMissesDataError
If the dataset contains no rows.
"""
return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

Expand Down
Loading