Skip to content

Commit

Permalink
feat: Improve error handling of TaggedTable (#450)
Browse files Browse the repository at this point in the history
Closes #150 

### Summary of Changes

* feat: Validated inputs of functions
* feat: Raised appropriate exceptions with appropriate messages
* docs: Modified docs
* tests: Tested all exceptions

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Alexander Gréus <alexgreus51@gmail.com>
Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com>
  • Loading branch information
4 people authored Jul 13, 2023
1 parent 6a097a4 commit c5da544
Show file tree
Hide file tree
Showing 11 changed files with 229 additions and 27 deletions.
23 changes: 16 additions & 7 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def add_column(self, column: Column) -> Table:
DuplicateColumnNameError
If the new column already exists.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of the column does not match the number of rows.
Examples
--------
Expand Down Expand Up @@ -902,10 +902,10 @@ def add_columns(self, columns: list[Column] | Table) -> Table:
Raises
------
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
DuplicateColumnNameError
If at least one column name from the provided column list already exists in the table.
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
Examples
--------
Expand Down Expand Up @@ -973,7 +973,12 @@ def add_row(self, row: Row) -> Table:
if self.number_of_columns == 0:
return Table.from_rows([row])
if len(set(self.column_names) - set(row.column_names)) > 0:
raise UnknownColumnNameError(list(set(self.column_names) - set(row.column_names)))
raise UnknownColumnNameError(
sorted(
set(self.column_names) - set(row.column_names),
key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,
),
)

if result.number_of_rows == 0:
int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64 | np.int32), row.column_names))
Expand Down Expand Up @@ -1026,16 +1031,20 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
"""
if isinstance(rows, Table):
rows = rows.to_rows()
result = self._copy()

if len(rows) == 0:
return self._copy()

different_column_names = set()
for row in rows:
different_column_names.update(set(rows[0].column_names) - set(row.column_names))
different_column_names.update(set(self.column_names) - set(row.column_names))
if len(different_column_names) > 0:
raise UnknownColumnNameError(list(different_column_names))
raise UnknownColumnNameError(
sorted(
different_column_names,
key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,
),
)

result = self._copy()

Expand Down
61 changes: 48 additions & 13 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from typing import TYPE_CHECKING

from safeds.data.tabular.containers import Column, Row, Table
from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError
from safeds.exceptions import (
ColumnIsTargetError,
IllegalSchemaModificationError,
UnknownColumnNameError,
)

if TYPE_CHECKING:
from collections.abc import Callable, Mapping, Sequence
Expand Down Expand Up @@ -167,10 +171,26 @@ def __init__(

@property
def features(self) -> Table:
"""
Get the feature columns of the tagged table.
Returns
-------
Table
The table containing the feature columns.
"""
return self._features

@property
def target(self) -> Column:
"""
Get the target column of the tagged table.
Returns
-------
Column
The target column.
"""
return self._target

# ------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -198,6 +218,11 @@ def add_column_as_feature(self, column: Column) -> TaggedTable:
the original table is not modified.
Parameters
----------
column : Column
The column to be added.
Returns
-------
result : TaggedTable
Expand All @@ -208,7 +233,7 @@ def add_column_as_feature(self, column: Column) -> TaggedTable:
DuplicateColumnNameError
If the new column already exists.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of the column does not match the number of rows.
"""
return TaggedTable._from_table(
super().add_column(column),
Expand All @@ -222,6 +247,11 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable:
The original table is not modified.
Parameters
----------
columns : list[Column] | Table
The columns to be added as features.
Returns
-------
result : TaggedTable
Expand All @@ -230,9 +260,9 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable:
Raises
------
DuplicateColumnNameError
If the new column already exists.
If any of the new feature columns already exist.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of any feature column does not match the number of rows.
"""
return TaggedTable._from_table(
super().add_columns(columns),
Expand Down Expand Up @@ -270,6 +300,11 @@ def add_column(self, column: Column) -> TaggedTable:
The original table is not modified.
Parameters
----------
column : Column
The column to be added.
Returns
-------
result : TaggedTable
Expand All @@ -280,7 +315,7 @@ def add_column(self, column: Column) -> TaggedTable:
DuplicateColumnNameError
If the new column already exists.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of the column does not match the number of rows.
"""
return TaggedTable._from_table(
super().add_column(column),
Expand All @@ -306,10 +341,10 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable:
Raises
------
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
DuplicateColumnNameError
If at least one column name from the provided column list already exists in the table.
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
"""
return TaggedTable._from_table(
super().add_columns(columns),
Expand All @@ -335,8 +370,8 @@ def add_row(self, row: Row) -> TaggedTable:
Raises
------
SchemaMismatchError
If the schema of the row does not match the table schema.
UnknownColumnNameError
If the row has different column names than the table.
"""
return TaggedTable._from_table(super().add_row(row), target_name=self.target.name)

Expand All @@ -358,8 +393,8 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable:
Raises
------
SchemaMismatchError
If the schema of on of the row does not match the table schema.
UnknownColumnNameError
If at least one of the rows have different column names than the table.
"""
return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name)

Expand Down Expand Up @@ -587,9 +622,9 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable:
Parameters
----------
old_name : str
The old name of the target column
The old name of the target column.
new_name : str
The new name of the target column
The new name of the target column.
Returns
-------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError

from tests.helpers import assert_that_tagged_tables_are_equal

Expand Down Expand Up @@ -29,9 +30,49 @@
],
ids=["new column as feature", "table contains a non feature/target column"],
)
def test_add_column_as_feature(
def test_should_add_column_as_feature(
tagged_table: TaggedTable,
column: Column,
tagged_table_with_new_column: TaggedTable,
) -> None:
assert_that_tagged_tables_are_equal(tagged_table.add_column_as_feature(column), tagged_table_with_new_column)


@pytest.mark.parametrize(
("tagged_table", "column", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
Column("A", [7, 8, 9]),
r"Column 'A' already exists.",
),
],
ids=["column_already_exists"],
)
def test_should_raise_duplicate_column_name_if_column_already_exists(
tagged_table: TaggedTable,
column: Column,
error_msg: str,
) -> None:
with pytest.raises(DuplicateColumnNameError, match=error_msg):
tagged_table.add_column_as_feature(column)


@pytest.mark.parametrize(
("tagged_table", "column", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
Column("C", [5, 7, 8, 9]),
r"Expected a column of size 3 but got column of size 4.",
),
],
ids=["column_is_oversize"],
)
def test_should_raise_column_size_error_if_column_is_oversize(
tagged_table: TaggedTable,
column: Column,
error_msg: str,
) -> None:
with pytest.raises(ColumnSizeError, match=error_msg):
tagged_table.add_column_as_feature(column)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError

from tests.helpers import assert_that_tagged_tables_are_equal

Expand Down Expand Up @@ -43,3 +44,43 @@ def test_add_columns_as_features(
tagged_table_with_new_columns: TaggedTable,
) -> None:
assert_that_tagged_tables_are_equal(tagged_table.add_columns_as_features(columns), tagged_table_with_new_columns)


@pytest.mark.parametrize(
("tagged_table", "columns", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
[Column("A", [7, 8, 9]), Column("D", [10, 11, 12])],
r"Column 'A' already exists.",
),
],
ids=["column_already_exist"],
)
def test_add_columns_raise_duplicate_column_name_if_column_already_exist(
tagged_table: TaggedTable,
columns: list[Column] | Table,
error_msg: str,
) -> None:
with pytest.raises(DuplicateColumnNameError, match=error_msg):
tagged_table.add_columns_as_features(columns)


@pytest.mark.parametrize(
("tagged_table", "columns", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
[Column("C", [5, 7, 8, 9]), Column("D", [4, 10, 11, 12])],
r"Expected a column of size 3 but got column of size 4.",
),
],
ids=["columns_are_oversize"],
)
def test_should_raise_column_size_error_if_columns_are_oversize(
tagged_table: TaggedTable,
columns: list[Column] | Table,
error_msg: str,
) -> None:
with pytest.raises(ColumnSizeError, match=error_msg):
tagged_table.add_columns_as_features(columns)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from safeds.data.tabular.containers import Row, TaggedTable
from safeds.exceptions import UnknownColumnNameError

from tests.helpers import assert_that_tagged_tables_are_equal

Expand Down Expand Up @@ -34,3 +35,42 @@
)
def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> None:
assert_that_tagged_tables_are_equal(table.add_row(row), expected)


@pytest.mark.parametrize(
("tagged_table", "row", "error_msg"),
[
(
TaggedTable({"feature": [], "target": []}, "target", ["feature"]),
Row({"feat": None, "targ": None}),
r"Could not find column\(s\) 'feature, target'",
),
],
ids=["columns_missing"],
)
def test_should_raise_an_error_if_row_schema_invalid(
tagged_table: TaggedTable,
row: Row,
error_msg: str,
) -> None:
with pytest.raises(UnknownColumnNameError, match=error_msg):
tagged_table.add_row(row)


@pytest.mark.parametrize(
("tagged_table", "row", "expected_table"),
[
(
TaggedTable({"feature": [], "target": []}, "target"),
Row({"feature": 2, "target": 5}),
TaggedTable({"feature": [2], "target": [5]}, "target"),
),
],
ids=["empty_feature_column"],
)
def test_should_add_row_to_empty_table(
tagged_table: TaggedTable,
row: Row,
expected_table: TaggedTable,
) -> None:
assert_that_tagged_tables_are_equal(tagged_table.add_row(row), expected_table)
Loading

0 comments on commit c5da544

Please sign in to comment.