From 06eab77ac5d0512dff38cf4d8aa181dd0032fe63 Mon Sep 17 00:00:00 2001 From: Gerhardsa0 <113539440+Gerhardsa0@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:12:52 +0200 Subject: [PATCH 1/2] feat: add temporal operations (#832) Closes #XYZ ### Summary of Changes Added temporal operations to the temporal interface. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- .../tabular/containers/_lazy_temporal_cell.py | 18 +++ .../data/tabular/containers/_temporal_cell.py | 151 +++++++++++++++++- .../containers/_temporal_cell/test_century.py | 20 +++ .../containers/_temporal_cell/test_day.py | 20 +++ .../containers/_temporal_cell/test_month.py | 20 +++ .../containers/_temporal_cell/test_week.py | 20 +++ .../containers/_temporal_cell/test_weekday.py | 20 +++ .../containers/_temporal_cell/test_year.py | 20 +++ 8 files changed, 284 insertions(+), 5 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_temporal_cell/test_century.py create mode 100644 tests/safeds/data/tabular/containers/_temporal_cell/test_day.py create mode 100644 tests/safeds/data/tabular/containers/_temporal_cell/test_month.py create mode 100644 tests/safeds/data/tabular/containers/_temporal_cell/test_week.py create mode 100644 tests/safeds/data/tabular/containers/_temporal_cell/test_weekday.py create mode 100644 tests/safeds/data/tabular/containers/_temporal_cell/test_year.py diff --git a/src/safeds/data/tabular/containers/_lazy_temporal_cell.py b/src/safeds/data/tabular/containers/_lazy_temporal_cell.py index 12619605c..180ecb58c 100644 --- a/src/safeds/data/tabular/containers/_lazy_temporal_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_temporal_cell.py @@ -31,6 +31,24 @@ def __sizeof__(self) -> int: # Temporal operations # ------------------------------------------------------------------------------------------------------------------ + def century(self) -> Cell[int]: + return _LazyCell(self._expression.dt.century()) + + def weekday(self) -> Cell[int]: + return _LazyCell(self._expression.dt.weekday()) + + def week(self) -> Cell[int]: + return _LazyCell(self._expression.dt.week()) + + def year(self) -> Cell[int]: + return _LazyCell(self._expression.dt.year()) + + def month(self) -> Cell[int]: + return _LazyCell(self._expression.dt.month()) + + def day(self) -> Cell[int]: + return _LazyCell(self._expression.dt.day()) + def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str]: if not _check_format_string(format_string): raise ValueError("Invalid format string") diff --git a/src/safeds/data/tabular/containers/_temporal_cell.py b/src/safeds/data/tabular/containers/_temporal_cell.py index 85368bec4..e4a3dca59 100644 --- a/src/safeds/data/tabular/containers/_temporal_cell.py +++ b/src/safeds/data/tabular/containers/_temporal_cell.py @@ -9,12 +9,9 @@ class TemporalCell(ABC): """ - A class that contains temporal methods for a column. + Namespace for operations on temporal data. - Parameters - ---------- - column: - The column to be operated on. + This class cannot be instantiated directly. It can only be accessed using the `dt` attribute of a cell. Examples -------- @@ -31,6 +28,150 @@ class TemporalCell(ABC): +------------+ """ + @abstractmethod + def century(self) -> Cell[int]: + """ + Get the century of the underlying date(time) data. + + Returns + ------- + A cell containing the century as integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> import datetime + >>> column = Column("example", [datetime.date(2022, 1, 1)]) + >>> column.transform(lambda cell: cell.dt.century()) + +---------+ + | example | + | --- | + | i32 | + +=========+ + | 21 | + +---------+ + """ + + @abstractmethod + def weekday(self) -> Cell[int]: + """ + Get the weekday of the underlying date(time) data. + + Returns + ------- + A cell containing the weekday as integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> import datetime + >>> column = Column("example", [datetime.date(2022, 1, 1)]) + >>> column.transform(lambda cell: cell.dt.weekday()) + +---------+ + | example | + | --- | + | i8 | + +=========+ + | 6 | + +---------+ + """ + + @abstractmethod + def week(self) -> Cell[int]: + """ + Get the week of the underlying date(time) data. + + Returns + ------- + A cell containing the week as integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> import datetime + >>> column = Column("example", [datetime.date(2022, 1, 1)]) + >>> column.transform(lambda cell: cell.dt.week()) + +---------+ + | example | + | --- | + | i8 | + +=========+ + | 52 | + +---------+ + """ + + @abstractmethod + def year(self) -> Cell[int]: + """ + Get the year of the underlying date(time) data. + + Returns + ------- + A cell containing the year as integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> import datetime + >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column.transform(lambda cell: cell.dt.year()) + +---------+ + | example | + | --- | + | i32 | + +=========+ + | 2022 | + +---------+ + """ + + @abstractmethod + def month(self) -> Cell[int]: + """ + Get the month of the underlying date(time) data. + + Returns + ------- + A cell containing the month as integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> import datetime + >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column.transform(lambda cell: cell.dt.month()) + +---------+ + | example | + | --- | + | i8 | + +=========+ + | 1 | + +---------+ + """ + + @abstractmethod + def day(self) -> Cell[int]: + """ + Get the day of the underlying date(time) data. + + Returns + ------- + A cell containing the day as integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> import datetime + >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column.transform(lambda cell: cell.dt.day()) + +---------+ + | example | + | --- | + | i8 | + +=========+ + | 9 | + +---------+ + """ + @abstractmethod def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str]: """ diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_century.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_century.py new file mode 100644 index 000000000..2d36808b6 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_century.py @@ -0,0 +1,20 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("expected", "input_date"), + [ + (18, datetime.datetime(1800, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), + (21, datetime.date(2022, 1, 1)), + ], + ids=[ + "ISO datetime", + "ISO date", + ], +) +def test_get_day(input_date: datetime.date, expected: bool) -> None: + assert_cell_operation_works(input_date, lambda cell: cell.dt.century(), expected) diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_day.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_day.py new file mode 100644 index 000000000..afa9c588b --- /dev/null +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_day.py @@ -0,0 +1,20 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("expected", "input_date"), + [ + (9, datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), + (1, datetime.date(2022, 1, 1)), + ], + ids=[ + "ISO datetime", + "ISO date", + ], +) +def test_get_day(input_date: datetime.date, expected: bool) -> None: + assert_cell_operation_works(input_date, lambda cell: cell.dt.day(), expected) diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_month.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_month.py new file mode 100644 index 000000000..626dff546 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_month.py @@ -0,0 +1,20 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("expected", "input_date"), + [ + (3, datetime.datetime(2022, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), + (1, datetime.date(2022, 1, 1)), + ], + ids=[ + "ISO datetime", + "ISO date", + ], +) +def test_get_month(input_date: datetime.date, expected: bool) -> None: + assert_cell_operation_works(input_date, lambda cell: cell.dt.month(), expected) diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_week.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_week.py new file mode 100644 index 000000000..3a6c7fd60 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_week.py @@ -0,0 +1,20 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("expected", "input_date"), + [ + (10, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), + (52, datetime.date(2022, 1, 1)), + ], + ids=[ + "ISO datetime", + "ISO date", + ], +) +def test_get_week(input_date: datetime.date, expected: bool) -> None: + assert_cell_operation_works(input_date, lambda cell: cell.dt.week(), expected) diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_weekday.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_weekday.py new file mode 100644 index 000000000..9db08b4fc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_weekday.py @@ -0,0 +1,20 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("expected", "input_date"), + [ + (4, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), + (6, datetime.date(2022, 1, 1)), + ], + ids=[ + "ISO datetime", + "ISO date", + ], +) +def test_get_weekday(input_date: datetime.date, expected: bool) -> None: + assert_cell_operation_works(input_date, lambda cell: cell.dt.weekday(), expected) diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_year.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_year.py new file mode 100644 index 000000000..e35810e52 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_year.py @@ -0,0 +1,20 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("expected", "input_date"), + [ + (2023, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), + (2022, datetime.date(2022, 1, 1)), + ], + ids=[ + "ISO datetime", + "ISO date", + ], +) +def test_get_year(input_date: datetime.date, expected: bool) -> None: + assert_cell_operation_works(input_date, lambda cell: cell.dt.year(), expected) From 487854cdd5d26f0995f89c0470425b618cb381ff Mon Sep 17 00:00:00 2001 From: Simon Breuer <86068340+sibre28@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:05:49 +0200 Subject: [PATCH 2/2] feat: add InvalidFitDataError (#824) Closes #655 ### Summary of Changes Add InvalidFitDataError and tests --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- src/safeds/exceptions/__init__.py | 2 + src/safeds/exceptions/_ml.py | 7 ++ .../nn/converters/_input_converter_table.py | 19 ++++ tests/safeds/ml/nn/test_model.py | 98 +++++++++++++++++++ 4 files changed, 126 insertions(+) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 8f1e9de6d..2f84387c9 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -18,6 +18,7 @@ DatasetMissesFeaturesError, FeatureDataMismatchError, InputSizeError, + InvalidFitDataError, InvalidModelStructureError, LearningError, ModelNotFittedError, @@ -69,6 +70,7 @@ class OutOfBoundsError(SafeDsError): "DatasetMissesDataError", "DatasetMissesFeaturesError", "FeatureDataMismatchError", + "InvalidFitDataError", "InputSizeError", "InvalidModelStructureError", "LearningError", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index d84395485..649ea0455 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -22,6 +22,13 @@ def __init__(self) -> None: super().__init__("Dataset contains no rows") +class InvalidFitDataError(Exception): + """Raised when a Neural Network is fitted on invalid data.""" + + def __init__(self, reason: str) -> None: + super().__init__(f"The given Fit Data is invalid:\n{reason}") + + class LearningError(Exception): """ Raised when an error occurred while training a model. diff --git a/src/safeds/ml/nn/converters/_input_converter_table.py b/src/safeds/ml/nn/converters/_input_converter_table.py index 52d64ac01..7f26b39af 100644 --- a/src/safeds/ml/nn/converters/_input_converter_table.py +++ b/src/safeds/ml/nn/converters/_input_converter_table.py @@ -4,6 +4,7 @@ from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Column, Table +from safeds.exceptions import InvalidFitDataError from ._input_converter import InputConversion @@ -43,6 +44,24 @@ def _is_fit_data_valid(self, input_data: TabularDataset) -> bool: self._feature_names = input_data.features.column_names self._target_name = input_data.target.name self._first = False + + columns_with_missing_values = [] + columns_with_non_numerical_data = [] + + for col in input_data.features.add_columns([input_data.target]).to_columns(): + if col.missing_value_count() > 0: + columns_with_missing_values.append(col.name) + if not col.type.is_numeric: + columns_with_non_numerical_data.append(col.name) + + reason = "" + if len(columns_with_missing_values) > 0: + reason += f"The following Columns contain missing values: {columns_with_missing_values}\n" + if len(columns_with_non_numerical_data) > 0: + reason += f"The following Columns contain non-numerical data: {columns_with_non_numerical_data}" + if reason != "": + raise InvalidFitDataError(reason) + return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names)) def _is_predict_data_valid(self, input_data: Table) -> bool: diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 5b8022a2c..0902d630d 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,4 +1,5 @@ import pickle +import re import pytest from safeds.data.image.typing import ImageSize @@ -6,6 +7,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import ( FeatureDataMismatchError, + InvalidFitDataError, InvalidModelStructureError, ModelNotFittedError, OutOfBoundsError, @@ -231,6 +233,54 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None: ): learned_model.fit(Table.from_dict({"k": [0.1, 0, 0.2], "l": [0, 0.15, 0.5]}).to_tabular_dataset("k")) + @pytest.mark.parametrize( + ("table", "reason"), + [ + ( + Table.from_dict({"a": [1, 2, 3], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"), + re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"), + ), + ( + Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"), + re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"), + ), + ( + Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"), + re.escape( + "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']", + ), + ), + ( + Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"), + re.escape( + "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n", + ), + ), + ( + Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": ["a", "b", "a"]}).to_tabular_dataset("c"), + re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"), + ), + ], + ids=[ + "missing value feature", + "non-numerical feature", + "missing value and non-numerical features", + "missing value target", + "non-numerical target", + ], + ) + def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None: + configure_test_with_device(device) + model = NeuralNetworkClassifier( + InputConversionTable(), + [ForwardLayer(neuron_count=4), ForwardLayer(1)], + ) + with pytest.raises( + InvalidFitDataError, + match=reason, + ): + model.fit(table) + # def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None: # configure_test_with_device(device) # model = NeuralNetworkClassifier( @@ -609,6 +659,54 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None: Table.from_dict({"k": [1, 0, 2], "l": [0, 15, 5]}).to_tabular_dataset("l"), ) + @pytest.mark.parametrize( + ("table", "reason"), + [ + ( + Table.from_dict({"a": [1, 2, 3], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"), + re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"), + ), + ( + Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"), + re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"), + ), + ( + Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"), + re.escape( + "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']", + ), + ), + ( + Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"), + re.escape( + "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n", + ), + ), + ( + Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": ["a", "b", "a"]}).to_tabular_dataset("c"), + re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"), + ), + ], + ids=[ + "missing value feature", + "non-numerical feature", + "missing value and non-numerical features", + "missing value target", + "non-numerical target", + ], + ) + def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None: + configure_test_with_device(device) + model = NeuralNetworkRegressor( + InputConversionTable(), + [ForwardLayer(neuron_count=4), ForwardLayer(1)], + ) + with pytest.raises( + InvalidFitDataError, + match=reason, + ): + model.fit(table) + # def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None: # configure_test_with_device(device) # model = NeuralNetworkRegressor(