From 9e4c8ebf6f29c8e219b5d22038816c744b350959 Mon Sep 17 00:00:00 2001
From: Simon <s6snbreu@uni-bonn.de>
Date: Mon, 3 Jun 2024 00:08:15 +0200
Subject: [PATCH 1/4] add InvalidFitData and test cases

---
 src/safeds/exceptions/__init__.py             |   2 +
 src/safeds/exceptions/_ml.py                  |   8 +
 .../nn/converters/_input_converter_table.py   |  19 +++
 tests/safeds/ml/nn/test_model.py              | 140 +++++++++++++++++-
 4 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py
index 8f1e9de6d..2f84387c9 100644
--- a/src/safeds/exceptions/__init__.py
+++ b/src/safeds/exceptions/__init__.py
@@ -18,6 +18,7 @@
     DatasetMissesFeaturesError,
     FeatureDataMismatchError,
     InputSizeError,
+    InvalidFitDataError,
     InvalidModelStructureError,
     LearningError,
     ModelNotFittedError,
@@ -69,6 +70,7 @@ class OutOfBoundsError(SafeDsError):
     "DatasetMissesDataError",
     "DatasetMissesFeaturesError",
     "FeatureDataMismatchError",
+    "InvalidFitDataError",
     "InputSizeError",
     "InvalidModelStructureError",
     "LearningError",
diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py
index d84395485..a35e9f73c 100644
--- a/src/safeds/exceptions/_ml.py
+++ b/src/safeds/exceptions/_ml.py
@@ -22,6 +22,14 @@ def __init__(self) -> None:
         super().__init__("Dataset contains no rows")
 
 
+class InvalidFitDataError(Exception):
+    """Raised when a Neural Network is fitted on invalid data."""
+
+    def __init__(self, reason: str) -> None:
+        super().__init__(f"The given Fit Data is invalid:\n{reason}")
+
+
+
 class LearningError(Exception):
     """
     Raised when an error occurred while training a model.
diff --git a/src/safeds/ml/nn/converters/_input_converter_table.py b/src/safeds/ml/nn/converters/_input_converter_table.py
index 52d64ac01..e11dba8d1 100644
--- a/src/safeds/ml/nn/converters/_input_converter_table.py
+++ b/src/safeds/ml/nn/converters/_input_converter_table.py
@@ -4,6 +4,7 @@
 
 from safeds.data.labeled.containers import TabularDataset
 from safeds.data.tabular.containers import Column, Table
+from safeds.exceptions import InvalidFitDataError
 
 from ._input_converter import InputConversion
 
@@ -43,6 +44,24 @@ def _is_fit_data_valid(self, input_data: TabularDataset) -> bool:
             self._feature_names = input_data.features.column_names
             self._target_name = input_data.target.name
             self._first = False
+
+            columns_with_missing_values = []
+            columns_with_non_numerical_data = []
+
+            for col in input_data.features.add_columns([input_data.target]).to_columns():
+                if col.missing_value_count() > 0:
+                    columns_with_missing_values.append(col.name)
+                if not col.type.is_numeric:
+                    columns_with_non_numerical_data.append(col.name)
+
+            reason = ""
+            if len(columns_with_missing_values) > 0:
+                reason += f"The following Columns contain missing values: {columns_with_missing_values}\n"
+            if len(columns_with_non_numerical_data) > 0:
+                reason += f"The following Columns contain non-numerical data: {columns_with_non_numerical_data}"
+            if reason is not "":
+                raise InvalidFitDataError(reason)
+
         return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names))
 
     def _is_predict_data_valid(self, input_data: Table) -> bool:
diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py
index 5b8022a2c..8a54db0ca 100644
--- a/tests/safeds/ml/nn/test_model.py
+++ b/tests/safeds/ml/nn/test_model.py
@@ -1,4 +1,5 @@
 import pickle
+import re
 
 import pytest
 from safeds.data.image.typing import ImageSize
@@ -8,7 +9,7 @@
     FeatureDataMismatchError,
     InvalidModelStructureError,
     ModelNotFittedError,
-    OutOfBoundsError,
+    OutOfBoundsError, InvalidFitDataError,
 )
 from safeds.ml.nn import (
     NeuralNetworkClassifier,
@@ -231,6 +232,75 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
         ):
             learned_model.fit(Table.from_dict({"k": [0.1, 0, 0.2], "l": [0, 0.15, 0.5]}).to_tabular_dataset("k"))
 
+    @pytest.mark.parametrize(
+        ("table", "reason"),
+        [
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']",
+                ),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a', 'b']",
+                ),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", None]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\nThe following Columns contain non-numerical data: ['c']",
+                ),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, "c", None]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b', 'c']\nThe following Columns contain non-numerical data: ['a', 'b', 'c']",
+                ),
+            ),
+        ],
+        ids=[
+            "missing value feature",
+            "non-numerical feature",
+            "missing value and non-numerical features",
+            "mixed missing and non-numerical features",
+            "missing value target",
+            "non-numerical target",
+            "missing value and non-numerical target",
+            "mixed missing and non-numerical features and target",
+        ],
+    )
+    def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None:
+        configure_test_with_device(device)
+        model = NeuralNetworkClassifier(
+            InputConversionTable(),
+            [ForwardLayer(neuron_count=4), ForwardLayer(1)],
+        )
+        with pytest.raises(
+            InvalidFitDataError,
+            match=reason,
+        ):
+            model.fit(table)
+
+
+
     # def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None:
     #     configure_test_with_device(device)
     #     model = NeuralNetworkClassifier(
@@ -609,6 +679,73 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
                 Table.from_dict({"k": [1, 0, 2], "l": [0, 15, 5]}).to_tabular_dataset("l"),
             )
 
+
+    @pytest.mark.parametrize(
+        ("table", "reason"),
+        [
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']",
+                ),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a', 'b']",
+                ),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", 5]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", None]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\nThe following Columns contain non-numerical data: ['c']",
+                ),
+            ),
+            (
+                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, "c", None]}).to_tabular_dataset("c"),
+                re.escape(
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b', 'c']\nThe following Columns contain non-numerical data: ['a', 'b', 'c']",
+                ),
+            ),
+        ],
+        ids=[
+            "missing value feature",
+            "non-numerical feature",
+            "missing value and non-numerical features",
+            "mixed missing and non-numerical features",
+            "missing value target",
+            "non-numerical target",
+            "missing value and non-numerical target",
+            "mixed missing and non-numerical features and target",
+        ],
+    )
+    def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None:
+        model = NeuralNetworkRegressor(
+            InputConversionTable(),
+            [ForwardLayer(neuron_count=4), ForwardLayer(1)],
+        )
+        with pytest.raises(
+            InvalidFitDataError,
+            match=reason,
+        ):
+            model.fit(table)
+
     # def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None:
     #     configure_test_with_device(device)
     #     model = NeuralNetworkRegressor(
@@ -781,3 +918,4 @@ def test_should_be_pickleable(self, device: Device) -> None:
 
         # Should not raise
         pickle.dumps(fitted_model)
+

From 75a1d944b2ca1a922d9e90ad31c02a1165a070e5 Mon Sep 17 00:00:00 2001
From: Simon <s6snbreu@uni-bonn.de>
Date: Mon, 3 Jun 2024 00:12:37 +0200
Subject: [PATCH 2/4] linter fix

---
 tests/safeds/ml/nn/test_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py
index 8a54db0ca..1fcffcf6e 100644
--- a/tests/safeds/ml/nn/test_model.py
+++ b/tests/safeds/ml/nn/test_model.py
@@ -736,6 +736,7 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
         ],
     )
     def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None:
+        configure_test_with_device(device)
         model = NeuralNetworkRegressor(
             InputConversionTable(),
             [ForwardLayer(neuron_count=4), ForwardLayer(1)],

From 6b8c633d93ab7bd17143a745d031c67893842fd0 Mon Sep 17 00:00:00 2001
From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Date: Sun, 2 Jun 2024 22:14:16 +0000
Subject: [PATCH 3/4] style: apply automated linter fixes

---
 src/safeds/exceptions/_ml.py                          | 1 -
 src/safeds/ml/nn/converters/_input_converter_table.py | 2 +-
 tests/safeds/ml/nn/test_model.py                      | 7 ++-----
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py
index a35e9f73c..649ea0455 100644
--- a/src/safeds/exceptions/_ml.py
+++ b/src/safeds/exceptions/_ml.py
@@ -29,7 +29,6 @@ def __init__(self, reason: str) -> None:
         super().__init__(f"The given Fit Data is invalid:\n{reason}")
 
 
-
 class LearningError(Exception):
     """
     Raised when an error occurred while training a model.
diff --git a/src/safeds/ml/nn/converters/_input_converter_table.py b/src/safeds/ml/nn/converters/_input_converter_table.py
index e11dba8d1..7f26b39af 100644
--- a/src/safeds/ml/nn/converters/_input_converter_table.py
+++ b/src/safeds/ml/nn/converters/_input_converter_table.py
@@ -59,7 +59,7 @@ def _is_fit_data_valid(self, input_data: TabularDataset) -> bool:
                 reason += f"The following Columns contain missing values: {columns_with_missing_values}\n"
             if len(columns_with_non_numerical_data) > 0:
                 reason += f"The following Columns contain non-numerical data: {columns_with_non_numerical_data}"
-            if reason is not "":
+            if reason != "":
                 raise InvalidFitDataError(reason)
 
         return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names))
diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py
index 1fcffcf6e..db1b94840 100644
--- a/tests/safeds/ml/nn/test_model.py
+++ b/tests/safeds/ml/nn/test_model.py
@@ -7,9 +7,10 @@
 from safeds.data.tabular.containers import Table
 from safeds.exceptions import (
     FeatureDataMismatchError,
+    InvalidFitDataError,
     InvalidModelStructureError,
     ModelNotFittedError,
-    OutOfBoundsError, InvalidFitDataError,
+    OutOfBoundsError,
 )
 from safeds.ml.nn import (
     NeuralNetworkClassifier,
@@ -299,8 +300,6 @@ def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDatas
         ):
             model.fit(table)
 
-
-
     # def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None:
     #     configure_test_with_device(device)
     #     model = NeuralNetworkClassifier(
@@ -679,7 +678,6 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
                 Table.from_dict({"k": [1, 0, 2], "l": [0, 15, 5]}).to_tabular_dataset("l"),
             )
 
-
     @pytest.mark.parametrize(
         ("table", "reason"),
         [
@@ -919,4 +917,3 @@ def test_should_be_pickleable(self, device: Device) -> None:
 
         # Should not raise
         pickle.dumps(fitted_model)
-

From 620cbf1b6aaa9513d4bc39c9315ab99d007e3c00 Mon Sep 17 00:00:00 2001
From: Simon <s6snbreu@uni-bonn.de>
Date: Tue, 4 Jun 2024 15:33:00 +0200
Subject: [PATCH 4/4] adjust tests

---
 tests/safeds/ml/nn/test_model.py | 58 ++++++--------------------------
 1 file changed, 10 insertions(+), 48 deletions(-)

diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py
index db1b94840..0902d630d 100644
--- a/tests/safeds/ml/nn/test_model.py
+++ b/tests/safeds/ml/nn/test_model.py
@@ -241,51 +241,32 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
                 re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"),
             ),
             (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
                 re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"),
             ),
             (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
                 re.escape(
                     "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']",
                 ),
             ),
-            (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
-                re.escape(
-                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a', 'b']",
-                ),
-            ),
             (
                 Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"),
-                re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n"),
-            ),
-            (
-                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", 5]}).to_tabular_dataset("c"),
-                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
-            ),
-            (
-                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", None]}).to_tabular_dataset("c"),
                 re.escape(
-                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\nThe following Columns contain non-numerical data: ['c']",
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n",
                 ),
             ),
             (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, "c", None]}).to_tabular_dataset("c"),
-                re.escape(
-                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b', 'c']\nThe following Columns contain non-numerical data: ['a', 'b', 'c']",
-                ),
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": ["a", "b", "a"]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
             ),
         ],
         ids=[
             "missing value feature",
             "non-numerical feature",
             "missing value and non-numerical features",
-            "mixed missing and non-numerical features",
             "missing value target",
             "non-numerical target",
-            "missing value and non-numerical target",
-            "mixed missing and non-numerical features and target",
         ],
     )
     def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None:
@@ -686,51 +667,32 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
                 re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"),
             ),
             (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
                 re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"),
             ),
             (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
+                Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
                 re.escape(
                     "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']",
                 ),
             ),
-            (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
-                re.escape(
-                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a', 'b']",
-                ),
-            ),
             (
                 Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"),
-                re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n"),
-            ),
-            (
-                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", 5]}).to_tabular_dataset("c"),
-                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
-            ),
-            (
-                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, "c", None]}).to_tabular_dataset("c"),
                 re.escape(
-                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\nThe following Columns contain non-numerical data: ['c']",
+                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n",
                 ),
             ),
             (
-                Table.from_dict({"a": [1, 2, "a"], "b": [1, "b", None], "c": [0, "c", None]}).to_tabular_dataset("c"),
-                re.escape(
-                    "The given Fit Data is invalid:\nThe following Columns contain missing values: ['b', 'c']\nThe following Columns contain non-numerical data: ['a', 'b', 'c']",
-                ),
+                Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": ["a", "b", "a"]}).to_tabular_dataset("c"),
+                re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
             ),
         ],
         ids=[
             "missing value feature",
             "non-numerical feature",
             "missing value and non-numerical features",
-            "mixed missing and non-numerical features",
             "missing value target",
             "non-numerical target",
-            "missing value and non-numerical target",
-            "mixed missing and non-numerical features and target",
         ],
     )
     def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None: