diff --git a/docs/tutorials/machine_learning.ipynb b/docs/tutorials/machine_learning.ipynb index 809b85df5..bd8867b93 100644 --- a/docs/tutorials/machine_learning.ipynb +++ b/docs/tutorials/machine_learning.ipynb @@ -10,8 +10,7 @@ "## Create a `TaggedTable`\n", "\n", "First, we need to create a `TaggedTable` from the training data. `TaggedTable`s are used to train supervised machine learning models, because they keep track of the target\n", - "column. A `TaggedTable` can be created from a `Table` by\n", - "specifying the target column in the `Table`." + "column. A `TaggedTable` can be created from a `Table` by calling the `tag_columns` method:" ], "metadata": { "collapsed": false @@ -32,8 +31,7 @@ " \"result\": [6, 7, 10, 13, 9]\n", "})\n", "\n", - "tagged_table = TaggedTable(\n", - " training_set,\n", + "tagged_table = training_set.tag_columns(\n", " target_name=\"result\"\n", ")" ], diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 3cec5c269..0c021e3a7 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -2,9 +2,8 @@ import functools import os.path -import typing from pathlib import Path -from typing import Callable, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, Union import matplotlib.pyplot as plt import numpy as np @@ -12,8 +11,6 @@ import seaborn as sns from IPython.core.display_functions import DisplayHandle, display from pandas import DataFrame, Series -from safeds.data.tabular.containers._column import Column -from safeds.data.tabular.containers._row import Row from safeds.data.tabular.typing import ColumnType, TableSchema from safeds.exceptions import ( ColumnLengthMismatchError, @@ -28,6 +25,12 @@ ) from scipy import stats +from ._column import Column +from ._row import Row + +if TYPE_CHECKING: + from ._tagged_table import TaggedTable + # noinspection PyProtectedMember class Table: @@ -188,7 +191,7 @@ def from_rows(rows: list[Row]) -> Table: # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, data: typing.Iterable, schema: Optional[TableSchema] = None): + def __init__(self, data: Iterable, schema: Optional[TableSchema] = None): self._data: pd.Dataframe = data if isinstance(data, pd.DataFrame) else pd.DataFrame(data) if schema is None: if self.count_columns() == 0: @@ -202,7 +205,7 @@ def __init__(self, data: typing.Iterable, schema: Optional[TableSchema] = None): self._data = self._data.reset_index(drop=True) self._data.columns = list(range(self.count_columns())) - def __eq__(self, other: typing.Any) -> bool: + def __eq__(self, other: Any) -> bool: if not isinstance(other, Table): return NotImplemented if self is other: @@ -782,8 +785,8 @@ def shuffle(self) -> Table: def slice( self, - start: typing.Optional[int] = None, - end: typing.Optional[int] = None, + start: Optional[int] = None, + end: Optional[int] = None, step: int = 1, ) -> Table: """ @@ -878,7 +881,7 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> Table: rows.sort(key=functools.cmp_to_key(comparator)) return Table.from_rows(rows) - def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]: + def split(self, percentage_in_first: float) -> tuple[Table, Table]: """ Split the table into two new tables. @@ -902,7 +905,28 @@ def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]: self.slice(round(percentage_in_first * self.count_rows())), ) - def transform_column(self, name: str, transformer: Callable[[Row], typing.Any]) -> Table: + def tag_columns(self, target_name: str, feature_names: Optional[list[str]] = None) -> TaggedTable: + """ + Mark the columns of the table as target column or feature columns. The original table is not modified. + + Parameters + ---------- + target_name : str + Name of the target column. + feature_names : Optional[list[str]] + Names of the feature columns. If None, all columns except the target column are used. + + Returns + ------- + tagged_table : TaggedTable + A new tagged table with the given target and feature names. + """ + # pylint: disable=import-outside-toplevel + from ._tagged_table import TaggedTable + + return TaggedTable(self._data, target_name, feature_names, self._schema) + + def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Table: """ Transform provided column by calling provided transformer. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 8eb4521d7..065b8c9be 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -1,7 +1,8 @@ -from IPython.core.display_functions import DisplayHandle +from typing import Iterable, Optional -from ._column import Column -from ._table import Table +from IPython.core.display_functions import DisplayHandle +from safeds.data.tabular.containers import Column, Table +from safeds.data.tabular.typing import TableSchema class TaggedTable(Table): @@ -10,34 +11,56 @@ class TaggedTable(Table): Parameters ---------- - table : Table - The table used to derive the features and target. + data : Iterable + The data. target_name : str Name of the target column. + feature_names : Optional[list[str]] + Names of the feature columns. If None, all columns except the target column are used. + schema : Optional[TableSchema] + The schema of the table. If not specified, the schema will be inferred from the data. """ - def __init__(self, table: Table, target_name: str): - super().__init__(table._data) + def __init__( + self, + data: Iterable, + target_name: str, + feature_names: Optional[list[str]] = None, + schema: Optional[TableSchema] = None, + ): + super().__init__(data, schema) + + # If no feature names are specified, use all columns except the target column + if feature_names is None: + feature_names = self.get_column_names() + if target_name in feature_names: + feature_names.remove(target_name) + + # Validate inputs + if target_name in feature_names: + raise ValueError(f"Column '{target_name}' cannot be both feature and target.") + if len(feature_names) == 0: + raise ValueError("At least one feature column must be specified.") - self._y: Column = table.get_column(target_name) - self._X: Table = table.drop_columns([target_name]) + self._features: Table = self.keep_only_columns(feature_names) + self._target: Column = self.get_column(target_name) @property def features(self) -> Table: - return self._X + return self._features @property def target(self) -> Column: - return self._y + return self._target def __repr__(self) -> str: - tmp = self._X.add_column(self._y) - header_info = "Target Column is '" + self._y.name + "'\n" + tmp = self._features.add_column(self._target) + header_info = "Target Column is '" + self._target.name + "'\n" return header_info + tmp.__repr__() def __str__(self) -> str: - tmp = self._X.add_column(self._y) - header_info = "Target Column is '" + self._y.name + "'\n" + tmp = self._features.add_column(self._target) + header_info = "Target Column is '" + self._target.name + "'\n" return header_info + tmp.__str__() def _ipython_display_(self) -> DisplayHandle: @@ -49,7 +72,7 @@ def _ipython_display_(self) -> DisplayHandle: output : DisplayHandle Output object. """ - tmp = self._X.add_column(self._y) - header_info = "Target Column is '" + self._y.name + "'\n" + tmp = self._features.add_column(self._target) + header_info = "Target Column is '" + self._target.name + "'\n" print(header_info) return tmp._ipython_display_() diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index b2844de22..b939fe1e9 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -1,9 +1,10 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Optional +from typing import TYPE_CHECKING, Optional -from safeds.data.tabular.containers import Table +if TYPE_CHECKING: + from safeds.data.tabular.containers import Table class TableTransformer(ABC): diff --git a/src/safeds/ml/_util_sklearn.py b/src/safeds/ml/_util_sklearn.py index a403682f7..28435b726 100644 --- a/src/safeds/ml/_util_sklearn.py +++ b/src/safeds/ml/_util_sklearn.py @@ -29,12 +29,10 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: ) except ValueError as exception: raise LearningError(str(exception)) from exception - except Exception as exception: - raise LearningError(None) from exception # noinspection PyProtectedMember -def predict(model: Any, dataset: Table, target_name: Optional[str]) -> TaggedTable: +def predict(model: Any, dataset: Table, feature_names: Optional[list[str]], target_name: Optional[str]) -> TaggedTable: """ Predict a target vector using a dataset containing feature vectors. The model has to be trained first. @@ -44,8 +42,10 @@ def predict(model: Any, dataset: Table, target_name: Optional[str]) -> TaggedTab Classifier or regressor from scikit-learn. dataset : Table The dataset containing the features. - target_name : str + target_name : Optional[str] The name of the target column. + feature_names : Optional[list[str]] + The names of the feature columns. Returns ------- @@ -58,23 +58,20 @@ def predict(model: Any, dataset: Table, target_name: Optional[str]) -> TaggedTab If predicting with the given dataset failed. """ - if model is None or target_name is None: + if model is None or target_name is None or feature_names is None: raise PredictionError("The model was not trained") - dataset_df = dataset._data - dataset_df.columns = dataset.schema.get_column_names() + dataset_df = dataset.keep_only_columns(feature_names)._data + dataset_df.columns = feature_names try: predicted_target_vector = model.predict(dataset_df.values) - result_set = dataset_df.copy(deep=True) + result_set = dataset._data.copy(deep=True) + result_set.columns = dataset.get_column_names() if target_name in result_set.columns: - raise ValueError( - f"Dataset already contains '{target_name}' column. Please rename this column" - ) + raise ValueError(f"Dataset already contains '{target_name}' column. Please rename this column") result_set[target_name] = predicted_target_vector - return TaggedTable(Table(result_set), target_name=target_name) + return Table(result_set).tag_columns(target_name=target_name, feature_names=feature_names) except NotFittedError as exception: raise PredictionError("The model was not trained") from exception except ValueError as exception: raise PredictionError(str(exception)) from exception - except Exception as exception: - raise PredictionError(None) from exception diff --git a/src/safeds/ml/classification/_ada_boost.py b/src/safeds/ml/classification/_ada_boost.py index 4c6080d65..229015b5a 100644 --- a/src/safeds/ml/classification/_ada_boost.py +++ b/src/safeds/ml/classification/_ada_boost.py @@ -17,6 +17,7 @@ class AdaBoost(Classifier): def __init__(self) -> None: self._wrapped_classifier: Optional[sk_AdaBoostClassifier] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> AdaBoost: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> AdaBoost: result = AdaBoost() result._wrapped_classifier = wrapped_classifier + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_classifier, dataset, self._target_name) + return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classification/_decision_tree.py b/src/safeds/ml/classification/_decision_tree.py index aa83ee77b..47d093507 100644 --- a/src/safeds/ml/classification/_decision_tree.py +++ b/src/safeds/ml/classification/_decision_tree.py @@ -17,6 +17,7 @@ class DecisionTree(Classifier): def __init__(self) -> None: self._wrapped_classifier: Optional[sk_DecisionTreeClassifier] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> DecisionTree: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> DecisionTree: result = DecisionTree() result._wrapped_classifier = wrapped_classifier + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_classifier, dataset, self._target_name) + return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classification/_gradient_boosting_classification.py b/src/safeds/ml/classification/_gradient_boosting_classification.py index 4252783a9..74dc7cb7d 100644 --- a/src/safeds/ml/classification/_gradient_boosting_classification.py +++ b/src/safeds/ml/classification/_gradient_boosting_classification.py @@ -17,6 +17,7 @@ class GradientBoosting(Classifier): def __init__(self) -> None: self._wrapped_classifier: Optional[sk_GradientBoostingClassifier] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> GradientBoosting: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting: result = GradientBoosting() result._wrapped_classifier = wrapped_classifier + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -69,4 +71,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_classifier, dataset, self._target_name) + return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classification/_k_nearest_neighbors.py b/src/safeds/ml/classification/_k_nearest_neighbors.py index 7a0c7a12f..e2d7cd915 100644 --- a/src/safeds/ml/classification/_k_nearest_neighbors.py +++ b/src/safeds/ml/classification/_k_nearest_neighbors.py @@ -23,6 +23,7 @@ def __init__(self, n_neighbors: int) -> None: self._n_neighbors = n_neighbors self._wrapped_classifier: Optional[sk_KNeighborsClassifier] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> KNearestNeighbors: @@ -50,6 +51,7 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors: result = KNearestNeighbors(self._n_neighbors) result._wrapped_classifier = wrapped_classifier + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -73,8 +75,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict( - self._wrapped_classifier, - dataset, - self._target_name, - ) + return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classification/_logistic_regression.py b/src/safeds/ml/classification/_logistic_regression.py index 46fcc102f..5eae1d873 100644 --- a/src/safeds/ml/classification/_logistic_regression.py +++ b/src/safeds/ml/classification/_logistic_regression.py @@ -17,6 +17,7 @@ class LogisticRegression(Classifier): def __init__(self) -> None: self._wrapped_classifier: Optional[sk_LogisticRegression] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> LogisticRegression: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> LogisticRegression: result = LogisticRegression() result._wrapped_classifier = wrapped_classifier + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_classifier, dataset, self._target_name) + return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classification/_random_forest.py b/src/safeds/ml/classification/_random_forest.py index 9f8928f94..927cc9397 100644 --- a/src/safeds/ml/classification/_random_forest.py +++ b/src/safeds/ml/classification/_random_forest.py @@ -16,6 +16,7 @@ class RandomForest(Classifier): def __init__(self) -> None: self._wrapped_classifier: Optional[sk_RandomForestClassifier] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> RandomForest: @@ -44,6 +45,7 @@ def fit(self, training_set: TaggedTable) -> RandomForest: result = RandomForest() result._wrapped_classifier = wrapped_classifier + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -67,4 +69,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_classifier, dataset, self._target_name) + return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_ada_boost.py b/src/safeds/ml/regression/_ada_boost.py index 52d1d42f3..fcd9dbce2 100644 --- a/src/safeds/ml/regression/_ada_boost.py +++ b/src/safeds/ml/regression/_ada_boost.py @@ -17,6 +17,7 @@ class AdaBoost(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_AdaBoostRegressor] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> AdaBoost: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> AdaBoost: result = AdaBoost() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_decision_tree.py b/src/safeds/ml/regression/_decision_tree.py index 5b71d8374..0fcaaef08 100644 --- a/src/safeds/ml/regression/_decision_tree.py +++ b/src/safeds/ml/regression/_decision_tree.py @@ -17,6 +17,7 @@ class DecisionTree(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_DecisionTreeRegressor] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> DecisionTree: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> DecisionTree: result = DecisionTree() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_elastic_net_regression.py b/src/safeds/ml/regression/_elastic_net_regression.py index 37f548d6a..7d12674dd 100644 --- a/src/safeds/ml/regression/_elastic_net_regression.py +++ b/src/safeds/ml/regression/_elastic_net_regression.py @@ -17,6 +17,7 @@ class ElasticNetRegression(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_ElasticNet] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> ElasticNetRegression: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> ElasticNetRegression: result = ElasticNetRegression() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_gradient_boosting_regression.py b/src/safeds/ml/regression/_gradient_boosting_regression.py index c87085d86..abb774567 100644 --- a/src/safeds/ml/regression/_gradient_boosting_regression.py +++ b/src/safeds/ml/regression/_gradient_boosting_regression.py @@ -17,6 +17,7 @@ class GradientBoosting(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_GradientBoostingRegressor] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> GradientBoosting: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting: result = GradientBoosting() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_k_nearest_neighbors.py b/src/safeds/ml/regression/_k_nearest_neighbors.py index c9b5fd3b5..d2371953c 100644 --- a/src/safeds/ml/regression/_k_nearest_neighbors.py +++ b/src/safeds/ml/regression/_k_nearest_neighbors.py @@ -23,6 +23,7 @@ def __init__(self, n_neighbors: int) -> None: self._n_neighbors = n_neighbors self._wrapped_regressor: Optional[sk_KNeighborsRegressor] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> KNearestNeighbors: @@ -51,6 +52,7 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors: result = KNearestNeighbors(self._n_neighbors) result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -74,4 +76,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_lasso_regression.py b/src/safeds/ml/regression/_lasso_regression.py index 824107481..f4f2f48b4 100644 --- a/src/safeds/ml/regression/_lasso_regression.py +++ b/src/safeds/ml/regression/_lasso_regression.py @@ -17,6 +17,7 @@ class LassoRegression(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_Lasso] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> LassoRegression: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> LassoRegression: result = LassoRegression() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_linear_regression.py b/src/safeds/ml/regression/_linear_regression.py index d05286188..1400eb9bc 100644 --- a/src/safeds/ml/regression/_linear_regression.py +++ b/src/safeds/ml/regression/_linear_regression.py @@ -17,6 +17,7 @@ class LinearRegression(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_LinearRegression] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> LinearRegression: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> LinearRegression: result = LinearRegression() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_random_forest.py b/src/safeds/ml/regression/_random_forest.py index 9a49b1298..ad6bdc28c 100644 --- a/src/safeds/ml/regression/_random_forest.py +++ b/src/safeds/ml/regression/_random_forest.py @@ -16,6 +16,7 @@ class RandomForest(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_RandomForestRegressor] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> RandomForest: @@ -44,6 +45,7 @@ def fit(self, training_set: TaggedTable) -> RandomForest: result = RandomForest() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -67,4 +69,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/regression/_ridge_regression.py b/src/safeds/ml/regression/_ridge_regression.py index bfcbe5404..cd3f27fd0 100644 --- a/src/safeds/ml/regression/_ridge_regression.py +++ b/src/safeds/ml/regression/_ridge_regression.py @@ -17,6 +17,7 @@ class RidgeRegression(Regressor): def __init__(self) -> None: self._wrapped_regressor: Optional[sk_Ridge] = None + self._feature_names: Optional[list[str]] = None self._target_name: Optional[str] = None def fit(self, training_set: TaggedTable) -> RidgeRegression: @@ -45,6 +46,7 @@ def fit(self, training_set: TaggedTable) -> RidgeRegression: result = RidgeRegression() result._wrapped_regressor = wrapped_regressor + result._feature_names = training_set.features.get_column_names() result._target_name = training_set.target.name return result @@ -68,4 +70,4 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ - return predict(self._wrapped_regressor, dataset, self._target_name) + return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/tests/fixtures/__init__.py b/tests/helpers/__init__.py similarity index 100% rename from tests/fixtures/__init__.py rename to tests/helpers/__init__.py diff --git a/tests/fixtures/_resources.py b/tests/helpers/_resources.py similarity index 100% rename from tests/fixtures/_resources.py rename to tests/helpers/_resources.py diff --git a/tests/resources/test_ada_boost.csv b/tests/resources/test_ada_boost.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_ada_boost.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_ada_boost_invalid.csv b/tests/resources/test_ada_boost_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_ada_boost_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_decision_tree.csv b/tests/resources/test_decision_tree.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_decision_tree.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_decision_tree_invalid.csv b/tests/resources/test_decision_tree_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_decision_tree_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_elastic_net_regression.csv b/tests/resources/test_elastic_net_regression.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_elastic_net_regression.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_elastic_net_regression_invalid.csv b/tests/resources/test_elastic_net_regression_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_elastic_net_regression_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_gradient_boosting_classification.csv b/tests/resources/test_gradient_boosting_classification.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_gradient_boosting_classification.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_gradient_boosting_classification_invalid.csv b/tests/resources/test_gradient_boosting_classification_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_gradient_boosting_classification_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_gradient_boosting_regression.csv b/tests/resources/test_gradient_boosting_regression.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_gradient_boosting_regression.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_gradient_boosting_regression_invalid.csv b/tests/resources/test_gradient_boosting_regression_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_gradient_boosting_regression_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_k_nearest_neighbors.csv b/tests/resources/test_k_nearest_neighbors.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_k_nearest_neighbors.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_k_nearest_neighbors_invalid.csv b/tests/resources/test_k_nearest_neighbors_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_k_nearest_neighbors_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_lasso_regression.csv b/tests/resources/test_lasso_regression.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_lasso_regression.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_lasso_regression_invalid.csv b/tests/resources/test_lasso_regression_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_lasso_regression_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_linear_regression.csv b/tests/resources/test_linear_regression.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_linear_regression.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_linear_regression_invalid.csv b/tests/resources/test_linear_regression_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_linear_regression_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_logistic_regression.csv b/tests/resources/test_logistic_regression.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_logistic_regression.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_logistic_regression_invalid.csv b/tests/resources/test_logistic_regression_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_logistic_regression_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_random_forest.csv b/tests/resources/test_random_forest.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_random_forest.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_random_forest_invalid.csv b/tests/resources/test_random_forest_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_random_forest_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_ridge_regression.csv b/tests/resources/test_ridge_regression.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_ridge_regression.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_ridge_regression_invalid.csv b/tests/resources/test_ridge_regression_invalid.csv deleted file mode 100644 index cfcf7131d..000000000 --- a/tests/resources/test_ridge_regression_invalid.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -A,2,3,0 -4,5,6,1 diff --git a/tests/resources/test_tagged_table.csv b/tests/resources/test_tagged_table.csv deleted file mode 100644 index 5ba8f4fad..000000000 --- a/tests/resources/test_tagged_table.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,T -1,2,3,0 -4,5,6,1 diff --git a/tests/safeds/data/tabular/containers/_row/test_has_column.py b/tests/safeds/data/tabular/containers/_row/test_has_column.py index a18246328..a776399a5 100644 --- a/tests/safeds/data/tabular/containers/_row/test_has_column.py +++ b/tests/safeds/data/tabular/containers/_row/test_has_column.py @@ -1,5 +1,5 @@ from safeds.data.tabular.containers import Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_has_column_positive() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_column_drop.py b/tests/safeds/data/tabular/containers/_table/test_column_drop.py index 290a5f055..a583ac3b1 100644 --- a/tests/safeds/data/tabular/containers/_table/test_column_drop.py +++ b/tests/safeds/data/tabular/containers/_table/test_column_drop.py @@ -1,15 +1,13 @@ import pytest from safeds.data.tabular.containers import Table from safeds.exceptions import UnknownColumnNameError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_table_column_drop() -> None: table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv")) transformed_table = table.drop_columns(["A"]) - assert transformed_table.schema.has_column( - "B" - ) and not transformed_table.schema.has_column("A") + assert transformed_table.schema.has_column("B") and not transformed_table.schema.has_column("A") def test_table_column_drop_warning() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/test_drop_duplicate_rows.py index fdfdea451..9bb135662 100644 --- a/tests/safeds/data/tabular/containers/_table/test_drop_duplicate_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_drop_duplicate_rows.py @@ -1,7 +1,7 @@ import pandas as pd import pytest from safeds.data.tabular.containers import Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_table/test_from_columns.py b/tests/safeds/data/tabular/containers/_table/test_from_columns.py index 1c9962f95..b44ab9b1a 100644 --- a/tests/safeds/data/tabular/containers/_table/test_from_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_from_columns.py @@ -2,7 +2,7 @@ import pytest from safeds.data.tabular.containers import Column, Table from safeds.exceptions import MissingDataError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_from_columns() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_from_csv_file.py b/tests/safeds/data/tabular/containers/_table/test_from_csv_file.py index 0150e334e..e0c1d0e54 100644 --- a/tests/safeds/data/tabular/containers/_table/test_from_csv_file.py +++ b/tests/safeds/data/tabular/containers/_table/test_from_csv_file.py @@ -1,18 +1,13 @@ import pytest from safeds.data.tabular.containers import Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_from_csv_file_valid() -> None: table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv")) - assert ( - table.get_column("A").get_value(0) == 1 - and table.get_column("B").get_value(0) == 2 - ) + assert table.get_column("A").get_value(0) == 1 and table.get_column("B").get_value(0) == 2 def test_from_csv_file_invalid() -> None: with pytest.raises(FileNotFoundError): - Table.from_csv_file( - resolve_resource_path("test_table_from_csv_file_invalid.csv") - ) + Table.from_csv_file(resolve_resource_path("test_table_from_csv_file_invalid.csv")) diff --git a/tests/safeds/data/tabular/containers/_table/test_from_json_file.py b/tests/safeds/data/tabular/containers/_table/test_from_json_file.py index adf3cdf41..112fd78dd 100644 --- a/tests/safeds/data/tabular/containers/_table/test_from_json_file.py +++ b/tests/safeds/data/tabular/containers/_table/test_from_json_file.py @@ -1,20 +1,13 @@ import pytest from safeds.data.tabular.containers import Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_from_json_file_valid() -> None: - table = Table.from_json_file( - resolve_resource_path("test_table_from_json_file.json") - ) - assert ( - table.get_column("A").get_value(0) == 1 - and table.get_column("B").get_value(0) == 2 - ) + table = Table.from_json_file(resolve_resource_path("test_table_from_json_file.json")) + assert table.get_column("A").get_value(0) == 1 and table.get_column("B").get_value(0) == 2 def test_from_json_file_invalid() -> None: with pytest.raises(FileNotFoundError): - Table.from_json_file( - resolve_resource_path("test_table_from_json_file_invalid.json") - ) + Table.from_json_file(resolve_resource_path("test_table_from_json_file_invalid.json")) diff --git a/tests/safeds/data/tabular/containers/_table/test_from_rows.py b/tests/safeds/data/tabular/containers/_table/test_from_rows.py index 3e5ebac15..a1c8e934d 100644 --- a/tests/safeds/data/tabular/containers/_table/test_from_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_from_rows.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Row, Table from safeds.exceptions import MissingDataError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_from_rows() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_get_row.py b/tests/safeds/data/tabular/containers/_table/test_get_row.py index ab457753c..b42976e4b 100644 --- a/tests/safeds/data/tabular/containers/_table/test_get_row.py +++ b/tests/safeds/data/tabular/containers/_table/test_get_row.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.exceptions import IndexOutOfBoundsError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_get_row() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_has_column.py b/tests/safeds/data/tabular/containers/_table/test_has_column.py index 99b224b15..a450c733e 100644 --- a/tests/safeds/data/tabular/containers/_table/test_has_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_has_column.py @@ -1,5 +1,5 @@ from safeds.data.tabular.containers import Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_has_column_positive() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/test_keep_only_columns.py index edc3034d4..76b568934 100644 --- a/tests/safeds/data/tabular/containers/_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_keep_only_columns.py @@ -1,15 +1,13 @@ import pytest from safeds.data.tabular.containers import Table from safeds.exceptions import UnknownColumnNameError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_keep_columns() -> None: table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv")) transformed_table = table.keep_only_columns(["A"]) - assert transformed_table.schema.has_column( - "A" - ) and not transformed_table.schema.has_column("B") + assert transformed_table.schema.has_column("A") and not transformed_table.schema.has_column("B") def test_keep_columns_warning() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_rename.py b/tests/safeds/data/tabular/containers/_table/test_rename.py index d3e8ac4ce..c4799aca1 100644 --- a/tests/safeds/data/tabular/containers/_table/test_rename.py +++ b/tests/safeds/data/tabular/containers/_table/test_rename.py @@ -1,19 +1,15 @@ import pytest from safeds.data.tabular.containers import Table from safeds.exceptions import DuplicateColumnNameError, UnknownColumnNameError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path @pytest.mark.parametrize( "name_from, name_to, column_one, column_two", [("A", "D", "D", "B"), ("A", "A", "A", "B")], ) -def test_rename_valid( - name_from: str, name_to: str, column_one: str, column_two: str -) -> None: - table: Table = Table.from_csv_file( - resolve_resource_path("test_table_from_csv_file.csv") - ) +def test_rename_valid(name_from: str, name_to: str, column_one: str, column_two: str) -> None: + table: Table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv")) renamed_table = table.rename_column(name_from, name_to) assert renamed_table.schema.has_column(column_one) assert renamed_table.schema.has_column(column_two) @@ -29,8 +25,6 @@ def test_rename_valid( ], ) def test_rename_invalid(name_from: str, name_to: str, error: Exception) -> None: - table: Table = Table.from_csv_file( - resolve_resource_path("test_table_from_csv_file.csv") - ) + table: Table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv")) with pytest.raises(error): table.rename_column(name_from, name_to) diff --git a/tests/safeds/data/tabular/containers/_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/test_replace_column.py index 23b4875dc..f383d38b3 100644 --- a/tests/safeds/data/tabular/containers/_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_replace_column.py @@ -6,7 +6,7 @@ DuplicateColumnNameError, UnknownColumnNameError, ) -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path @pytest.mark.parametrize( @@ -17,9 +17,7 @@ ], ) def test_replace_valid(column_name: str, path: str) -> None: - input_table: Table = Table.from_csv_file( - resolve_resource_path("test_table_replace_column_input.csv") - ) + input_table: Table = Table.from_csv_file(resolve_resource_path("test_table_replace_column_input.csv")) expected: Table = Table.from_csv_file(resolve_resource_path(path)) column = Column(column_name, pd.Series(["d", "e", "f"])) @@ -43,9 +41,7 @@ def test_replace_invalid( column_name: str, error: type[Exception], ) -> None: - input_table: Table = Table.from_csv_file( - resolve_resource_path("test_table_replace_column_input.csv") - ) + input_table: Table = Table.from_csv_file(resolve_resource_path("test_table_replace_column_input.csv")) column = Column(column_name, pd.Series(column_values)) with pytest.raises(error): diff --git a/tests/safeds/data/tabular/containers/_table/test_table_add_column.py b/tests/safeds/data/tabular/containers/_table/test_table_add_column.py index 1cc8c548f..b46bb2b67 100644 --- a/tests/safeds/data/tabular/containers/_table/test_table_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_table_add_column.py @@ -2,16 +2,12 @@ import pytest from safeds.data.tabular.containers import Column, Table from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_table_add_column_valid() -> None: - input_table = Table.from_csv_file( - resolve_resource_path("test_table_add_column_valid_input.csv") - ) - expected = Table.from_csv_file( - resolve_resource_path("test_table_add_column_valid_output.csv") - ) + input_table = Table.from_csv_file(resolve_resource_path("test_table_add_column_valid_input.csv")) + expected = Table.from_csv_file(resolve_resource_path("test_table_add_column_valid_output.csv")) column = Column("C", pd.Series(["a", "b", "c"])) result = input_table.add_column(column) @@ -25,12 +21,8 @@ def test_table_add_column_valid() -> None: (["a", "b"], "C", ColumnSizeError), ], ) -def test_table_add_column_( - column_values: list[str], column_name: str, error: type[Exception] -) -> None: - input_table = Table.from_csv_file( - resolve_resource_path("test_table_add_column_valid_input.csv") - ) +def test_table_add_column_(column_values: list[str], column_name: str, error: type[Exception]) -> None: + input_table = Table.from_csv_file(resolve_resource_path("test_table_add_column_valid_input.csv")) column = Column(column_name, pd.Series(column_values)) with pytest.raises(error): diff --git a/tests/safeds/data/tabular/containers/_table/test_to_columns.py b/tests/safeds/data/tabular/containers/_table/test_to_columns.py index 98cdca33a..4644149e3 100644 --- a/tests/safeds/data/tabular/containers/_table/test_to_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_to_columns.py @@ -1,7 +1,7 @@ import pandas as pd import pytest from safeds.data.tabular.containers import Column, Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_table/test_to_rows.py b/tests/safeds/data/tabular/containers/_table/test_to_rows.py index e623dc969..ffdcbc785 100644 --- a/tests/safeds/data/tabular/containers/_table/test_to_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_to_rows.py @@ -1,7 +1,7 @@ import pandas as pd from safeds.data.tabular.containers import Row, Table from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_to_rows() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_transform_column.py b/tests/safeds/data/tabular/containers/_table/test_transform_column.py index 227fc8acd..9eeecc361 100644 --- a/tests/safeds/data/tabular/containers/_table/test_transform_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_transform_column.py @@ -1,27 +1,19 @@ import pytest from safeds.data.tabular.containers import Table from safeds.exceptions import UnknownColumnNameError -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_transform_column_valid() -> None: - input_table: Table = Table.from_csv_file( - resolve_resource_path("test_table_transform_column.csv") - ) + input_table: Table = Table.from_csv_file(resolve_resource_path("test_table_transform_column.csv")) - result: Table = input_table.transform_column( - "A", lambda row: row.get_value("A") * 2 - ) + result: Table = input_table.transform_column("A", lambda row: row.get_value("A") * 2) - assert result == Table.from_csv_file( - resolve_resource_path("test_table_transform_column_output.csv") - ) + assert result == Table.from_csv_file(resolve_resource_path("test_table_transform_column_output.csv")) def test_transform_column_invalid() -> None: - input_table: Table = Table.from_csv_file( - resolve_resource_path("test_table_transform_column.csv") - ) + input_table: Table = Table.from_csv_file(resolve_resource_path("test_table_transform_column.csv")) with pytest.raises(UnknownColumnNameError): input_table.transform_column("D", lambda row: row.get_value("A") * 2) diff --git a/tests/safeds/data/tabular/containers/_tagged_table/__init__.py b/tests/safeds/data/tabular/containers/_tagged_table/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/safeds/data/tabular/containers/_tagged_table/test_features.py b/tests/safeds/data/tabular/containers/_tagged_table/test_features.py deleted file mode 100644 index 6110227c0..000000000 --- a/tests/safeds/data/tabular/containers/_tagged_table/test_features.py +++ /dev/null @@ -1,11 +0,0 @@ -from safeds.data.tabular.containers import Table, TaggedTable -from tests.fixtures import resolve_resource_path - - -def test_tagged_table_features() -> None: - table = Table.from_csv_file(resolve_resource_path("test_tagged_table.csv")) - tagged_table = TaggedTable(table, "T") - assert "T" not in tagged_table.features._data - assert tagged_table.features.schema.has_column("A") - assert tagged_table.features.schema.has_column("B") - assert tagged_table.features.schema.has_column("C") diff --git a/tests/safeds/data/tabular/containers/_tagged_table/test_target.py b/tests/safeds/data/tabular/containers/_tagged_table/test_target.py deleted file mode 100644 index 9fc6bec26..000000000 --- a/tests/safeds/data/tabular/containers/_tagged_table/test_target.py +++ /dev/null @@ -1,9 +0,0 @@ -from safeds.data.tabular.containers import Table, TaggedTable -from tests.fixtures import resolve_resource_path - - -def test_tagged_table_target() -> None: - table = Table.from_csv_file(resolve_resource_path("test_tagged_table.csv")) - tagged_table = TaggedTable(table, "T") - assert tagged_table.target._data[0] == 0 - assert tagged_table.target._data[1] == 1 diff --git a/tests/safeds/data/tabular/containers/test_tagged_table.py b/tests/safeds/data/tabular/containers/test_tagged_table.py new file mode 100644 index 000000000..73794bdf7 --- /dev/null +++ b/tests/safeds/data/tabular/containers/test_tagged_table.py @@ -0,0 +1,60 @@ +import pytest +from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import UnknownColumnNameError + + +@pytest.fixture +def table() -> Table: + return Table.from_columns( + [ + Column("A", [1, 4]), + Column("B", [2, 5]), + Column("C", [3, 6]), + Column("T", [0, 1]), + ] + ) + + +@pytest.fixture +def tagged_table(table: Table) -> TaggedTable: + return table.tag_columns(target_name="T") + + +class TestInit: + def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None: + with pytest.raises(UnknownColumnNameError): + table.tag_columns(target_name="T", feature_names=["A", "B", "C", "D"]) + + def test_should_raise_if_target_does_not_exist(self, table: Table) -> None: + with pytest.raises(UnknownColumnNameError): + table.tag_columns(target_name="D") + + def test_should_raise_if_features_and_target_overlap(self, table: Table) -> None: + with pytest.raises(ValueError): + table.tag_columns(target_name="A", feature_names=["A", "B", "C"]) + + def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> None: + with pytest.raises(ValueError): + table.tag_columns(target_name="A", feature_names=[]) + + def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> None: + table = Table.from_columns([Column("A", [1, 4])]) + + with pytest.raises(ValueError): + table.tag_columns(target_name="A") + + +class TestFeatures: + def test_should_return_features(self, tagged_table: TaggedTable) -> None: + assert tagged_table.features == Table.from_columns( + [ + Column("A", [1, 4]), + Column("B", [2, 5]), + Column("C", [3, 6]), + ] + ) + + +class TestTarget: + def test_should_return_target(self, tagged_table: TaggedTable) -> None: + assert tagged_table.target == Column("T", [0, 1]) diff --git a/tests/safeds/data/tabular/typing/_table_schema/test_get_column_type.py b/tests/safeds/data/tabular/typing/_table_schema/test_get_column_type.py index 110f0432d..e15837deb 100644 --- a/tests/safeds/data/tabular/typing/_table_schema/test_get_column_type.py +++ b/tests/safeds/data/tabular/typing/_table_schema/test_get_column_type.py @@ -1,7 +1,7 @@ import numpy as np from safeds.data.tabular.containers import Table from safeds.data.tabular.typing import ColumnType -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_get_type_of_column() -> None: diff --git a/tests/safeds/data/tabular/typing/_table_schema/test_has_column.py b/tests/safeds/data/tabular/typing/_table_schema/test_has_column.py index 5a3329d94..6ad02eb47 100644 --- a/tests/safeds/data/tabular/typing/_table_schema/test_has_column.py +++ b/tests/safeds/data/tabular/typing/_table_schema/test_has_column.py @@ -1,5 +1,5 @@ from safeds.data.tabular.containers import Table -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_has_column_true() -> None: diff --git a/tests/safeds/data/tabular/typing/_table_schema/test_table_equals.py b/tests/safeds/data/tabular/typing/_table_schema/test_table_equals.py index 715e03567..f1dc9c080 100644 --- a/tests/safeds/data/tabular/typing/_table_schema/test_table_equals.py +++ b/tests/safeds/data/tabular/typing/_table_schema/test_table_equals.py @@ -1,6 +1,6 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.typing import FloatColumnType, IntColumnType, TableSchema -from tests.fixtures import resolve_resource_path +from tests.helpers import resolve_resource_path def test_table_equals_valid() -> None: diff --git a/tests/safeds/ml/classification/test_ada_boost.py b/tests/safeds/ml/classification/test_ada_boost.py index 256ed7e10..9e1e36f80 100644 --- a/tests/safeds/ml/classification/test_ada_boost.py +++ b/tests/safeds/ml/classification/test_ada_boost.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.classification import AdaBoost, Classifier -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def classifier() -> Classifier: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_ada_boost.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_ada_boost_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie prediction = fitted_classifier.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None: + fitted_regressor = classifier.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) prediction = fitted_classifier.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, classifier: Classifier, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/classification/test_classifier.py b/tests/safeds/ml/classification/test_classifier.py index c07906861..be48ab662 100644 --- a/tests/safeds/ml/classification/test_classifier.py +++ b/tests/safeds/ml/classification/test_classifier.py @@ -27,20 +27,20 @@ def predict(self, dataset: Table) -> TaggedTable: feature = predicted.rename("feature") dataset = Table.from_columns([feature, predicted]) - return TaggedTable(dataset, target_name="predicted") + return dataset.tag_columns(target_name="predicted") class TestAccuracy: def test_with_same_type(self) -> None: - c1 = Column("predicted", pd.Series(data=[1, 2, 3, 4])) - c2 = Column("expected", pd.Series(data=[1, 2, 3, 3])) - table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected") + c1 = Column("predicted", [1, 2, 3, 4]) + c2 = Column("expected", [1, 2, 3, 3]) + table = Table.from_columns([c1, c2]).tag_columns(target_name="expected") assert DummyClassifier().accuracy(table) == 0.75 def test_with_different_types(self) -> None: c1 = Column("predicted", pd.Series(data=["1", "2", "3", "4"])) c2 = Column("expected", pd.Series(data=[1, 2, 3, 3])) - table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected") + table = Table.from_columns([c1, c2]).tag_columns(target_name="expected") assert DummyClassifier().accuracy(table) == 0.0 diff --git a/tests/safeds/ml/classification/test_decision_tree.py b/tests/safeds/ml/classification/test_decision_tree.py index 1e11bd3ae..ce605ad04 100644 --- a/tests/safeds/ml/classification/test_decision_tree.py +++ b/tests/safeds/ml/classification/test_decision_tree.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.classification import Classifier, DecisionTree -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def classifier() -> Classifier: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_decision_tree.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_decision_tree_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie prediction = fitted_classifier.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None: + fitted_regressor = classifier.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) prediction = fitted_classifier.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, classifier: Classifier, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/classification/test_gradient_boosting.py b/tests/safeds/ml/classification/test_gradient_boosting.py index 020f98011..f050e204b 100644 --- a/tests/safeds/ml/classification/test_gradient_boosting.py +++ b/tests/safeds/ml/classification/test_gradient_boosting.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.classification import Classifier, GradientBoosting -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def classifier() -> Classifier: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_gradient_boosting_classification.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_gradient_boosting_classification_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie prediction = fitted_classifier.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None: + fitted_regressor = classifier.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) prediction = fitted_classifier.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, classifier: Classifier, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/classification/test_k_nearest_neighbors.py b/tests/safeds/ml/classification/test_k_nearest_neighbors.py index a7394e6c4..066be1514 100644 --- a/tests/safeds/ml/classification/test_k_nearest_neighbors.py +++ b/tests/safeds/ml/classification/test_k_nearest_neighbors.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.classification import Classifier, KNearestNeighbors -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def classifier() -> Classifier: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_k_nearest_neighbors.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_k_nearest_neighbors_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie prediction = fitted_classifier.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None: + fitted_regressor = classifier.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) prediction = fitted_classifier.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, classifier: Classifier, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/classification/test_logistic_regression.py b/tests/safeds/ml/classification/test_logistic_regression.py index 9da5c1507..99cfb6f7e 100644 --- a/tests/safeds/ml/classification/test_logistic_regression.py +++ b/tests/safeds/ml/classification/test_logistic_regression.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.classification import Classifier, LogisticRegression -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def classifier() -> Classifier: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_logistic_regression.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_logistic_regression_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie prediction = fitted_classifier.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None: + fitted_regressor = classifier.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) prediction = fitted_classifier.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, classifier: Classifier, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/classification/test_random_forest.py b/tests/safeds/ml/classification/test_random_forest.py index 37d65948a..7a147e3dd 100644 --- a/tests/safeds/ml/classification/test_random_forest.py +++ b/tests/safeds/ml/classification/test_random_forest.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.classification import Classifier, RandomForest -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def classifier() -> Classifier: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_random_forest.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_random_forest_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie prediction = fitted_classifier.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None: + fitted_regressor = classifier.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) prediction = fitted_classifier.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, classifier: Classifier, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_ada_boost.py b/tests/safeds/ml/regression/test_ada_boost.py index d23a01904..43985fbad 100644 --- a/tests/safeds/ml/regression/test_ada_boost.py +++ b/tests/safeds/ml/regression/test_ada_boost.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import AdaBoost, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_ada_boost.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_ada_boost_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_decision_tree.py b/tests/safeds/ml/regression/test_decision_tree.py index daa036540..1e8e93589 100644 --- a/tests/safeds/ml/regression/test_decision_tree.py +++ b/tests/safeds/ml/regression/test_decision_tree.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import DecisionTree, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_decision_tree.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_decision_tree_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_elastic_net.py b/tests/safeds/ml/regression/test_elastic_net.py index a935a1037..aed84ae6d 100644 --- a/tests/safeds/ml/regression/test_elastic_net.py +++ b/tests/safeds/ml/regression/test_elastic_net.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import ElasticNetRegression, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_elastic_net_regression.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_elastic_net_regression_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_gradient_boosting.py b/tests/safeds/ml/regression/test_gradient_boosting.py index 8f4e06041..5d88166e6 100644 --- a/tests/safeds/ml/regression/test_gradient_boosting.py +++ b/tests/safeds/ml/regression/test_gradient_boosting.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import GradientBoosting, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_gradient_boosting_regression.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_gradient_boosting_regression_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_k_nearest_neighbors.py b/tests/safeds/ml/regression/test_k_nearest_neighbors.py index 6e879e54e..191eb69f3 100644 --- a/tests/safeds/ml/regression/test_k_nearest_neighbors.py +++ b/tests/safeds/ml/regression/test_k_nearest_neighbors.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import KNearestNeighbors, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_k_nearest_neighbors.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_k_nearest_neighbors_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_lasso_regression.py b/tests/safeds/ml/regression/test_lasso_regression.py index 6eb94ac4a..94f91c32d 100644 --- a/tests/safeds/ml/regression/test_lasso_regression.py +++ b/tests/safeds/ml/regression/test_lasso_regression.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import LassoRegression, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_lasso_regression.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_lasso_regression_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_linear_regression.py b/tests/safeds/ml/regression/test_linear_regression.py index 56d3916c8..190fde4c4 100644 --- a/tests/safeds/ml/regression/test_linear_regression.py +++ b/tests/safeds/ml/regression/test_linear_regression.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import LinearRegression, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_linear_regression.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_linear_regression_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_random_forest.py b/tests/safeds/ml/regression/test_random_forest.py index f22846d7d..5208eb325 100644 --- a/tests/safeds/ml/regression/test_random_forest.py +++ b/tests/safeds/ml/regression/test_random_forest.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import RandomForest, Regressor -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_random_forest.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_random_forest_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/regression/test_regressor.py b/tests/safeds/ml/regression/test_regressor.py index 6a8926cda..ca802490c 100644 --- a/tests/safeds/ml/regression/test_regressor.py +++ b/tests/safeds/ml/regression/test_regressor.py @@ -32,7 +32,7 @@ def predict(self, dataset: Table) -> TaggedTable: feature = predicted.rename("feature") dataset = Table.from_columns([feature, predicted]) - return TaggedTable(dataset, target_name="predicted") + return dataset.tag_columns(target_name="predicted") class TestMeanAbsoluteError: @@ -49,8 +49,7 @@ class TestMeanAbsoluteError: def test_valid_data(self, predicted: list[float], expected: list[float], result: float) -> None: predicted_column = Column("predicted", predicted) expected_column = Column("expected", expected) - table = TaggedTable( - Table.from_columns([predicted_column, expected_column]), + table = Table.from_columns([predicted_column, expected_column]).tag_columns( target_name="expected", ) @@ -65,8 +64,7 @@ class TestMeanSquaredError: def test_valid_data(self, predicted: list[float], expected: list[float], result: float) -> None: predicted_column = Column("predicted", predicted) expected_column = Column("expected", expected) - table = TaggedTable( - Table.from_columns([predicted_column, expected_column]), + table = Table.from_columns([predicted_column, expected_column]).tag_columns( target_name="expected", ) diff --git a/tests/safeds/ml/regression/test_ridge_regression.py b/tests/safeds/ml/regression/test_ridge_regression.py index f0b9bfe6e..2c2a0beca 100644 --- a/tests/safeds/ml/regression/test_ridge_regression.py +++ b/tests/safeds/ml/regression/test_ridge_regression.py @@ -1,8 +1,7 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import LearningError, PredictionError from safeds.ml.regression import Regressor, RidgeRegression -from tests.fixtures import resolve_resource_path @pytest.fixture() @@ -12,14 +11,26 @@ def regressor() -> Regressor: @pytest.fixture() def valid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_ridge_regression.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", [2, 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) @pytest.fixture() def invalid_data() -> TaggedTable: - table = Table.from_csv_file(resolve_resource_path("test_ridge_regression_invalid.csv")) - return TaggedTable(table, "T") + return Table.from_columns( + [ + Column("id", [1, 4]), + Column("feat1", ["a", 5]), + Column("feat2", [3, 6]), + Column("target", [0, 1]), + ] + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) class TestFit: @@ -38,10 +49,15 @@ def test_should_include_features_of_prediction_input(self, regressor: Regressor, prediction = fitted_regressor.predict(valid_data.features) assert prediction.features == valid_data.features + def test_should_include_complete_prediction_input(self, regressor: Regressor, valid_data: TaggedTable) -> None: + fitted_regressor = regressor.fit(valid_data) + prediction = fitted_regressor.predict(valid_data.drop_columns(["target"])) + assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"]) + def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) prediction = fitted_regressor.predict(valid_data.features) - assert prediction.target.name == "T" + assert prediction.target.name == "target" def test_should_raise_when_not_fitted(self, regressor: Regressor, valid_data: TaggedTable) -> None: with pytest.raises(PredictionError): diff --git a/tests/safeds/ml/test_util_sklearn.py b/tests/safeds/ml/test_util_sklearn.py index 88bb03b3c..85913417a 100644 --- a/tests/safeds/ml/test_util_sklearn.py +++ b/tests/safeds/ml/test_util_sklearn.py @@ -1,6 +1,6 @@ import warnings -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Table from safeds.ml.regression import LinearRegression @@ -9,7 +9,7 @@ def test_predict_should_not_warn_about_feature_names() -> None: See https://github.com/Safe-DS/Stdlib/issues/51. """ - training_set = TaggedTable(Table({"a": [1, 2, 3], "b": [2, 4, 6]}), target_name="b") + training_set = Table({"a": [1, 2, 3], "b": [2, 4, 6]}).tag_columns(target_name="b") model = LinearRegression() fitted_model = model.fit(training_set) @@ -19,4 +19,4 @@ def test_predict_should_not_warn_about_feature_names() -> None: # No warning should be emitted with warnings.catch_warnings(): warnings.filterwarnings("error", message="X has feature names") - fitted_model.predict(dataset=test_set) + fitted_model.predict(test_set)