Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add SequentialTableTransformer #893

Merged
merged 21 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
92d8909
first implementation of SequentialTableTransformer
xXstupidnameXx Jun 21, 2024
73e765d
implemented TransformerNotInvertableError
xXstupidnameXx Jun 28, 2024
3c5619a
all current tests passed.
xXstupidnameXx Jun 28, 2024
d847ab0
improved tests
xXstupidnameXx Jun 28, 2024
4029628
finished tests
xXstupidnameXx Jul 5, 2024
8254b2e
finished SequentialTableTransformer
xXstupidnameXx Jul 5, 2024
a47cdbe
last minute typo fix (changed invertable to invertible)
xXstupidnameXx Jul 5, 2024
ab706f4
fixed documentation
xXstupidnameXx Jul 5, 2024
5b0f9e7
Merge branch 'main' into 802-sequential-table-transformer
xXstupidnameXx Jul 5, 2024
b5d1393
fixed tests
xXstupidnameXx Jul 5, 2024
4dbcab4
Merge remote-tracking branch 'origin/802-sequential-table-transformer…
xXstupidnameXx Jul 5, 2024
d24a14a
fixing linter errors
xXstupidnameXx Jul 5, 2024
445d9ac
fixed mypy errors
xXstupidnameXx Jul 12, 2024
b2ea3c1
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
7097f7d
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
0960374
Merge branch 'main' into 802-sequential-table-transformer
xXstupidnameXx Jul 12, 2024
946ccc6
minor code improvements
xXstupidnameXx Jul 12, 2024
a3e0fe9
Merge remote-tracking branch 'origin/802-sequential-table-transformer…
xXstupidnameXx Jul 12, 2024
5251c57
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
e88083e
removed ValueError from __init__ in SequentialTableTransformer
xXstupidnameXx Jul 12, 2024
4273811
docs: minor changes
lars-reimann Jul 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/safeds/data/tabular/transformation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ._one_hot_encoder import OneHotEncoder
from ._range_scaler import RangeScaler
from ._robust_scaler import RobustScaler
from ._sequential_table_transformer import SequentialTableTransformer
from ._simple_imputer import SimpleImputer
from ._standard_scaler import StandardScaler
from ._table_transformer import TableTransformer
Expand All @@ -24,6 +25,7 @@
"LabelEncoder": "._label_encoder:LabelEncoder",
"OneHotEncoder": "._one_hot_encoder:OneHotEncoder",
"RangeScaler": "._range_scaler:RangeScaler",
"SequentialTableTransformer": "._sequential_table_transformer:SequentialTableTransformer",
"RobustScaler": "._robust_scaler:RobustScaler",
"SimpleImputer": "._simple_imputer:SimpleImputer",
"StandardScaler": "._standard_scaler:StandardScaler",
Expand All @@ -38,6 +40,7 @@
"LabelEncoder",
"OneHotEncoder",
"RangeScaler",
"SequentialTableTransformer",
"RobustScaler",
"SimpleImputer",
"StandardScaler",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from safeds._utils import _structural_hash
from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertibleError

from ._invertible_table_transformer import InvertibleTableTransformer

if TYPE_CHECKING:
from safeds.data.tabular.containers import Table

from ._table_transformer import TableTransformer


class SequentialTableTransformer(InvertibleTableTransformer):
"""
The SequentialTableTransforrmer transforms a table using multiple transformers in sequence.

Parameters
----------
transformers:
The list of transformers used to transform the table. Used in the order as they are supplied in the list.

Raises
------
ValueError:
Raises a ValueError if the list of Transformers is None or contains no transformers.
"""

def __init__(
self,
transformers: list[TableTransformer],
*,
column_names: str | list[str] | None = None, # noqa: ARG002
xXstupidnameXx marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
super().__init__(None)

# Check if transformers actually contains any transformers.
if transformers is None or len(transformers) == 0:
raise ValueError("transformers must contain at least 1 transformer")
xXstupidnameXx marked this conversation as resolved.
Show resolved Hide resolved

# Parameters
self._transformers: list[TableTransformer] = transformers

# Internal State
self._is_fitted: bool = False

def __hash__(self) -> int:
return _structural_hash(
super().__hash__(),
self._transformers,
self._is_fitted,
)

@property
def is_fitted(self) -> bool:
"""
Whether the transformer is fitted.

Returns
-------
True, if the transformer is fitted, False otherwise.
"""
return self._is_fitted

def fit(self, table: Table) -> SequentialTableTransformer:
"""
Fits all of the transformers in order.

Parameters
----------
table:
The table used to fit the transformers.

Returns
-------
The fitted transformer.

Raises
------
ValueError:
Raises a ValueError if the table has no rows.
"""
if table.row_count == 0:
raise ValueError("The SequentialTableTransformer cannot be fitted because the table contains 0 rows.")

current_table: Table = table
fitted_transformers: list[TableTransformer] = []

for transformer in self._transformers:
fitted_transformer = transformer.fit(current_table)
fitted_transformers.append(fitted_transformer)
current_table = fitted_transformer.transform(current_table)

result: SequentialTableTransformer = SequentialTableTransformer(
transformers=fitted_transformers,
column_names=self._column_names,
xXstupidnameXx marked this conversation as resolved.
Show resolved Hide resolved
)

result._is_fitted = True
return result

def transform(self, table: Table) -> Table:
"""
Transform the table using all the transformers sequentially.

Might change the order and type of columns base on the transformers used.

Parameters
----------
table:
The table to be transformed.

Returns
-------
The transformed table.

Raises
------
TransformerNotFittedError:
Raises a TransformerNotFittedError if the transformer isn't fitted.
"""
if not self._is_fitted:
raise TransformerNotFittedError

current_table: Table = table
for transformer in self._transformers:
current_table = transformer.transform(current_table)

return current_table

def inverse_transform(self, transformed_table: Table) -> Table:
"""
Inversely transforms the table using all the transformers sequentially in inverse order.

Might change the order and type of columns base on the transformers used.

Parameters
----------
transformed_table:
The table to be transformed back.

Returns
-------
The untranformed table.

Raises
------
TransformerNotFittedError:
Raises a TransformerNotFittedError if the transformer isn't fitted.
TransformerNotInvertibleError:
Raises a TransformerNotInvertibleError if one of the transformers isn't invertible.
"""
if not self._is_fitted:
raise TransformerNotFittedError

# sequentially inverse transform the table with all transformers, working from the back of the list forwards.
current_table: Table = transformed_table
for transformer in reversed(self._transformers):
# check if transformer is invertable
if not (isinstance(transformer, InvertibleTableTransformer)):
raise TransformerNotInvertibleError(str(type(transformer)))
current_table = transformer.inverse_transform(current_table)

return current_table
2 changes: 2 additions & 0 deletions src/safeds/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
NonNumericColumnError,
OutputLengthMismatchError,
TransformerNotFittedError,
TransformerNotInvertibleError,
ValueNotPresentWhenFittedError,
)
from ._ml import (
Expand Down Expand Up @@ -66,6 +67,7 @@ class OutOfBoundsError(SafeDsError):
"NonNumericColumnError",
"OutputLengthMismatchError",
"TransformerNotFittedError",
"TransformerNotInvertibleError",
"ValueNotPresentWhenFittedError",
# ML exceptions
"DatasetMissesDataError",
Expand Down
7 changes: 7 additions & 0 deletions src/safeds/exceptions/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ def __init__(self) -> None:
super().__init__("The transformer has not been fitted yet.")


class TransformerNotInvertibleError(Exception):
"""Raised when a function tries to invert a non-invertible transformer."""

def __init__(self, transformer_type: str) -> None:
super().__init__(f"{transformer_type} is not invertible.")


class ValueNotPresentWhenFittedError(Exception):
"""Exception raised when attempting to one-hot-encode a table containing values not present in the fitting phase."""

Expand Down
27 changes: 25 additions & 2 deletions tests/helpers/_assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@
from safeds.data.tabular.containers import Cell, Column, Table


def assert_tables_equal(table1: Table, table2: Table) -> None:
def assert_tables_equal(
table1: Table,
table2: Table,
*,
ignore_column_order: bool = False,
ignore_row_order: bool = False,
ignore_types: bool = False,
check_exact: bool = False,
xXstupidnameXx marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
"""
Assert that two tables are almost equal.

Expand All @@ -16,8 +24,23 @@ def assert_tables_equal(table1: Table, table2: Table) -> None:
The first table.
table2:
The table to compare the first table to.
ignore_column_order:
Ignore the column order when True. Will return true, even when the column order is different.
ignore_row_order:
Ignore the column order when True. Will return true, even when the row order is different.
ignore_types:
Ignore differing data Types. Will return true, even when columns have differing data types.
check_exact:
If True, check, if floating point values match EXACTLY.
"""
assert_frame_equal(table1._data_frame, table2._data_frame)
assert_frame_equal(
table1._data_frame,
table2._data_frame,
check_row_order=not ignore_row_order,
check_column_order=not ignore_column_order,
check_dtypes=not ignore_types,
check_exact=check_exact,
)


def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: TabularDataset) -> None:
Expand Down
Loading