From b89d79e659d02701137de6f2f514370089719404 Mon Sep 17 00:00:00 2001 From: alex-senger Date: Fri, 23 Jun 2023 15:47:25 +0200 Subject: [PATCH 1/3] feat: improve `table.summary`. Catch `ValueError` thrown by `column.stability` Co-authored-by: patrikguempel <128832338+patrikguempel@users.noreply.github.com> --- src/safeds/data/tabular/containers/_table.py | 2 +- .../tabular/containers/_table/test_summary.py | 33 ++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 12fbb757c..92832b269 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -598,7 +598,7 @@ def summary(self) -> Table: for function in statistics.values(): try: values.append(str(function())) - except NonNumericColumnError: + except (NonNumericColumnError, ValueError): values.append("-") result = pd.concat([result, pd.DataFrame(values)], axis=1) diff --git a/tests/safeds/data/tabular/containers/_table/test_summary.py b/tests/safeds/data/tabular/containers/_table/test_summary.py index 460bc287c..5af69a510 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summary.py +++ b/tests/safeds/data/tabular/containers/_table/test_summary.py @@ -112,8 +112,39 @@ }, ), ), + ( + Table({"col": [None, None]}), + Table( + { + "metrics": [ + "maximum", + "minimum", + "mean", + "mode", + "median", + "sum", + "variance", + "standard deviation", + "idness", + "stability", + ], + "col": [ + "-", + "-", + "-", + "[]", + "-", + "-", + "-", + "-", + "0.0", + "-" + ], + }, + ), + ), ], - ids=["Column of integers and Column of characters", "empty", "empty with columns"], + ids=["Column of integers and Column of characters", "empty", "empty with columns", "Column of None"], ) def test_should_make_summary(table: Table, expected: Table) -> None: assert expected.schema == table.summary().schema From b2f608faeb777ffdcb8353f85a586f856d2decb9 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:58:26 +0000 Subject: [PATCH 2/3] style: apply automated linter fixes --- .../data/tabular/containers/_table/test_summary.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_summary.py b/tests/safeds/data/tabular/containers/_table/test_summary.py index 5af69a510..f4abaa988 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summary.py +++ b/tests/safeds/data/tabular/containers/_table/test_summary.py @@ -128,18 +128,7 @@ "idness", "stability", ], - "col": [ - "-", - "-", - "-", - "[]", - "-", - "-", - "-", - "-", - "0.0", - "-" - ], + "col": ["-", "-", "-", "[]", "-", "-", "-", "-", "0.0", "-"], }, ), ), From a18fe6a94595b9238841f81a3ea90cb1cae7125e Mon Sep 17 00:00:00 2001 From: alex-senger Date: Fri, 23 Jun 2023 15:24:00 +0200 Subject: [PATCH 3/3] feat: improve error handling of `column.stability` when given a column that contains only None Co-authored-by: patrikguempel <128832338+patrikguempel@users.noreply.github.com> --- src/safeds/data/tabular/containers/_column.py | 6 ++++++ .../data/tabular/containers/_column/test_stability.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index ae42ace06..a4f198f06 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -504,6 +504,8 @@ def stability(self) -> float: \frac{\text{number of occurrences of most common non-null value}}{\text{number of non-null values}} $$ + The stability cannot be calculated for a column with only null values. + Returns ------- stability : float @@ -516,6 +518,10 @@ def stability(self) -> float: """ if self._data.size == 0: raise ColumnSizeError("> 0", "0") + + if self.all(lambda x: x is None): + raise ValueError("Stability cannot be calculated for a column with only null values.") + return self._data.value_counts()[self.mode()[0]] / self._data.count() def standard_deviation(self) -> float: diff --git a/tests/safeds/data/tabular/containers/_column/test_stability.py b/tests/safeds/data/tabular/containers/_column/test_stability.py index d1bb893d2..7de0050d9 100644 --- a/tests/safeds/data/tabular/containers/_column/test_stability.py +++ b/tests/safeds/data/tabular/containers/_column/test_stability.py @@ -25,7 +25,13 @@ def test_should_return_stability_of_column(values: list[Any], expected: float) - assert column.stability() == expected -def test_should_raise_if_column_is_empty() -> None: +def test_should_raise_column_size_error_if_column_is_empty() -> None: column: Column[Any] = Column("A", []) - with pytest.raises(ColumnSizeError): + with pytest.raises(ColumnSizeError, match="Expected a column of size > 0 but got column of size 0."): + column.stability() + + +def test_should_raise_value_error_if_column_contains_only_none() -> None: + column: Column[Any] = Column("A", [None, None]) + with pytest.raises(ValueError, match="Stability cannot be calculated for a column with only null values."): column.stability()