From af58d9399a42d2e78e4c2902410dabcaaf5fcc71 Mon Sep 17 00:00:00 2001 From: Thomas S Date: Mon, 23 Sep 2024 15:44:31 +0200 Subject: [PATCH 1/2] Support of pandas.Series --- src/skore/item/__init__.py | 3 + src/skore/item/item_repository.py | 2 + src/skore/item/pandas_dataframe_item.py | 13 +--- src/skore/item/pandas_series_item.py | 80 ++++++++++++++++++++++ src/skore/project.py | 3 + src/skore/ui/report.py | 4 ++ tests/unit/item/test_pandas_series_item.py | 36 ++++++++++ tests/unit/test_project.py | 26 ++++--- 8 files changed, 147 insertions(+), 20 deletions(-) create mode 100644 src/skore/item/pandas_series_item.py create mode 100644 tests/unit/item/test_pandas_series_item.py diff --git a/src/skore/item/__init__.py b/src/skore/item/__init__.py index f70b66f87..a2fd83c83 100644 --- a/src/skore/item/__init__.py +++ b/src/skore/item/__init__.py @@ -10,6 +10,7 @@ from skore.item.media_item import MediaItem from skore.item.numpy_array_item import NumpyArrayItem from skore.item.pandas_dataframe_item import PandasDataFrameItem +from skore.item.pandas_series_item import PandasSeriesItem from skore.item.primitive_item import PrimitiveItem from skore.item.sklearn_base_estimator_item import SklearnBaseEstimatorItem @@ -19,6 +20,7 @@ def object_to_item(object: Any) -> Item: for cls in ( PrimitiveItem, PandasDataFrameItem, + PandasSeriesItem, NumpyArrayItem, SklearnBaseEstimatorItem, MediaItem, @@ -42,6 +44,7 @@ def object_to_item(object: Any) -> Item: "MediaItem", "NumpyArrayItem", "PandasDataFrameItem", + "PandasSeriesItem", "PrimitiveItem", "SklearnBaseEstimatorItem", "object_to_item", diff --git a/src/skore/item/item_repository.py b/src/skore/item/item_repository.py index e01c0f6a4..80554dd97 100644 --- a/src/skore/item/item_repository.py +++ b/src/skore/item/item_repository.py @@ -16,6 +16,7 @@ from skore.item.media_item import MediaItem from skore.item.numpy_array_item import NumpyArrayItem from skore.item.pandas_dataframe_item import PandasDataFrameItem +from skore.item.pandas_series_item import PandasSeriesItem from skore.item.primitive_item import PrimitiveItem from skore.item.sklearn_base_estimator_item import SklearnBaseEstimatorItem @@ -31,6 +32,7 @@ class ItemRepository: "MediaItem": MediaItem, "NumpyArrayItem": NumpyArrayItem, "PandasDataFrameItem": PandasDataFrameItem, + "PandasSeriesItem": PandasSeriesItem, "PrimitiveItem": PrimitiveItem, "SklearnBaseEstimatorItem": SklearnBaseEstimatorItem, } diff --git a/src/skore/item/pandas_dataframe_item.py b/src/skore/item/pandas_dataframe_item.py index 642a5127f..194997c3d 100644 --- a/src/skore/item/pandas_dataframe_item.py +++ b/src/skore/item/pandas_dataframe_item.py @@ -47,14 +47,7 @@ def __init__( @cached_property def dataframe(self) -> pandas.DataFrame: - """ - Convert the stored dictionary to a pandas DataFrame. - - Returns - ------- - pd.DataFrame - The pandas DataFrame representation of the stored dictionary. - """ + """The pandas DataFrame.""" import pandas return pandas.DataFrame.from_dict(self.dataframe_dict, orient="tight") @@ -74,9 +67,9 @@ def factory(cls, dataframe: pandas.DataFrame) -> PandasDataFrameItem: PandasDataFrameItem A new PandasDataFrameItem instance. """ - import pandas.core.frame + import pandas - if not isinstance(dataframe, pandas.core.frame.DataFrame): + if not isinstance(dataframe, pandas.DataFrame): raise TypeError(f"Type '{dataframe.__class__}' is not supported.") instance = cls(dataframe_dict=dataframe.to_dict(orient="tight")) diff --git a/src/skore/item/pandas_series_item.py b/src/skore/item/pandas_series_item.py new file mode 100644 index 000000000..1aa2e8101 --- /dev/null +++ b/src/skore/item/pandas_series_item.py @@ -0,0 +1,80 @@ +"""PandasSeriesItem. + +This module defines the PandasSeriesItem class, +which represents a pandas Series item. +""" + +from __future__ import annotations + +from functools import cached_property +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import pandas + +from skore.item.item import Item + + +class PandasSeriesItem(Item): + """ + A class to represent a pandas Series item. + + This class encapsulates a pandas Series along with its + creation and update timestamps. + """ + + def __init__( + self, + series_list: list, + created_at: str | None = None, + updated_at: str | None = None, + ): + """ + Initialize a PandasSeriesItem. + + Parameters + ---------- + series_dict : dict + The dict representation of the series. + created_at : str + The creation timestamp in ISO format. + updated_at : str + The last update timestamp in ISO format. + """ + super().__init__(created_at, updated_at) + + self.series_list = series_list + + @cached_property + def series(self) -> pandas.Series: + """The pandas Series.""" + import pandas + + return pandas.Series(self.series_list) + + @classmethod + def factory(cls, series: pandas.Series) -> PandasSeriesItem: + """ + Create a new PandasSeriesItem instance from a pandas Series. + + Parameters + ---------- + series : pd.Series + The pandas Series to store. + + Returns + ------- + PandasSeriesItem + A new PandasSeriesItem instance. + """ + import pandas + + if not isinstance(series, pandas.Series): + raise TypeError(f"Type '{series.__class__}' is not supported.") + + instance = cls(series_list=series.to_list()) + + # add series as cached property + instance.series = series + + return instance diff --git a/src/skore/project.py b/src/skore/project.py index ae8fdde1e..13a4c8179 100644 --- a/src/skore/project.py +++ b/src/skore/project.py @@ -9,6 +9,7 @@ MediaItem, NumpyArrayItem, PandasDataFrameItem, + PandasSeriesItem, PrimitiveItem, SklearnBaseEstimatorItem, object_to_item, @@ -54,6 +55,8 @@ def get(self, key: str) -> Any: return item.array elif isinstance(item, PandasDataFrameItem): return item.dataframe + elif isinstance(item, PandasSeriesItem): + return item.series elif isinstance(item, SklearnBaseEstimatorItem): return item.estimator elif isinstance(item, MediaItem): diff --git a/src/skore/ui/report.py b/src/skore/ui/report.py index 518daf66c..4dffdac67 100644 --- a/src/skore/ui/report.py +++ b/src/skore/ui/report.py @@ -12,6 +12,7 @@ from skore.item.media_item import MediaItem from skore.item.numpy_array_item import NumpyArrayItem from skore.item.pandas_dataframe_item import PandasDataFrameItem +from skore.item.pandas_series_item import PandasSeriesItem from skore.item.primitive_item import PrimitiveItem from skore.item.sklearn_base_estimator_item import SklearnBaseEstimatorItem from skore.layout import Layout @@ -50,6 +51,9 @@ def __serialize_project(project: Project) -> SerializedProject: elif isinstance(item, PandasDataFrameItem): value = item.dataframe_dict media_type = "application/vnd.dataframe+json" + elif isinstance(item, PandasSeriesItem): + value = item.series_list + media_type = "text/markdown" elif isinstance(item, SklearnBaseEstimatorItem): value = item.estimator_html_repr media_type = "application/vnd.sklearn.estimator+html" diff --git a/tests/unit/item/test_pandas_series_item.py b/tests/unit/item/test_pandas_series_item.py new file mode 100644 index 000000000..b6aa8498f --- /dev/null +++ b/tests/unit/item/test_pandas_series_item.py @@ -0,0 +1,36 @@ +import pytest +from pandas import Series +from pandas.testing import assert_series_equal +from skore.item import PandasSeriesItem + + +class TestPandasSeriesItem: + @pytest.fixture(autouse=True) + def monkeypatch_datetime(self, monkeypatch, MockDatetime): + monkeypatch.setattr("skore.item.item.datetime", MockDatetime) + + @pytest.mark.order(0) + def test_factory(self, mock_nowstr): + series = Series([0, 1, 2]) + series_list = series.to_list() + + item = PandasSeriesItem.factory(series) + + assert item.series_list == series_list + assert item.created_at == mock_nowstr + assert item.updated_at == mock_nowstr + + @pytest.mark.order(1) + def test_series(self, mock_nowstr): + series = Series([0, 1, 2]) + series_list = series.to_list() + + item1 = PandasSeriesItem.factory(series) + item2 = PandasSeriesItem( + series_list=series_list, + created_at=mock_nowstr, + updated_at=mock_nowstr, + ) + + assert_series_equal(item1.series, series) + assert_series_equal(item2.series, series) diff --git a/tests/unit/test_project.py b/tests/unit/test_project.py index 553e08130..95e12fceb 100644 --- a/tests/unit/test_project.py +++ b/tests/unit/test_project.py @@ -26,39 +26,45 @@ def project(): def test_put_string_item(project): - project.put("string_item", "Hello, World!") # JSONItem + project.put("string_item", "Hello, World!") assert project.get("string_item") == "Hello, World!" def test_put_int_item(project): - project.put("int_item", 42) # JSONItem + project.put("int_item", 42) assert project.get("int_item") == 42 def test_put_float_item(project): - project.put("float_item", 3.14) # JSONItem + project.put("float_item", 3.14) assert project.get("float_item") == 3.14 def test_put_bool_item(project): - project.put("bool_item", True) # JSONItem + project.put("bool_item", True) assert project.get("bool_item") is True def test_put_list_item(project): - project.put("list_item", [1, 2, 3]) # JSONItem + project.put("list_item", [1, 2, 3]) assert project.get("list_item") == [1, 2, 3] def test_put_dict_item(project): - project.put("dict_item", {"key": "value"}) # JSONItem + project.put("dict_item", {"key": "value"}) assert project.get("dict_item") == {"key": "value"} -def test_put_pandas_df(project): - df = pandas.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - project.put("pandas_df", df) # DataFrameItem - pandas.testing.assert_frame_equal(project.get("pandas_df"), df) +def test_put_pandas_dataframe(project): + dataframe = pandas.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + project.put("pandas_dataframe", dataframe) + pandas.testing.assert_frame_equal(project.get("pandas_dataframe"), dataframe) + + +def test_put_pandas_series(project): + series = pandas.Series([0, 1, 2]) + project.put("pandas_series", series) + pandas.testing.assert_series_equal(project.get("pandas_series"), series) def test_put_numpy_array(project): From c15504828d7cd9c45121410db50dbfb0d0f904ed Mon Sep 17 00:00:00 2001 From: "Thomas S." Date: Tue, 24 Sep 2024 09:06:21 +0200 Subject: [PATCH 2/2] Update src/skore/item/pandas_series_item.py Fix docstring Co-authored-by: Auguste Baum <52001167+augustebaum@users.noreply.github.com> --- src/skore/item/pandas_series_item.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/skore/item/pandas_series_item.py b/src/skore/item/pandas_series_item.py index 1aa2e8101..b8f638fc7 100644 --- a/src/skore/item/pandas_series_item.py +++ b/src/skore/item/pandas_series_item.py @@ -34,8 +34,8 @@ def __init__( Parameters ---------- - series_dict : dict - The dict representation of the series. + series_list : list + The list representation of the series. created_at : str The creation timestamp in ISO format. updated_at : str