Skip to content

Commit

Permalink
Support of pandas.Series (#378)
Browse files Browse the repository at this point in the history
Co-authored-by: Auguste Baum <52001167+augustebaum@users.noreply.github.com>
  • Loading branch information
thomass-dev and augustebaum authored Sep 24, 2024
1 parent 1b9b1ad commit 3749ca7
Show file tree
Hide file tree
Showing 8 changed files with 147 additions and 20 deletions.
3 changes: 3 additions & 0 deletions src/skore/item/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from skore.item.media_item import MediaItem
from skore.item.numpy_array_item import NumpyArrayItem
from skore.item.pandas_dataframe_item import PandasDataFrameItem
from skore.item.pandas_series_item import PandasSeriesItem
from skore.item.primitive_item import PrimitiveItem
from skore.item.sklearn_base_estimator_item import SklearnBaseEstimatorItem

Expand All @@ -19,6 +20,7 @@ def object_to_item(object: Any) -> Item:
for cls in (
PrimitiveItem,
PandasDataFrameItem,
PandasSeriesItem,
NumpyArrayItem,
SklearnBaseEstimatorItem,
MediaItem,
Expand All @@ -42,6 +44,7 @@ def object_to_item(object: Any) -> Item:
"MediaItem",
"NumpyArrayItem",
"PandasDataFrameItem",
"PandasSeriesItem",
"PrimitiveItem",
"SklearnBaseEstimatorItem",
"object_to_item",
Expand Down
2 changes: 2 additions & 0 deletions src/skore/item/item_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from skore.item.media_item import MediaItem
from skore.item.numpy_array_item import NumpyArrayItem
from skore.item.pandas_dataframe_item import PandasDataFrameItem
from skore.item.pandas_series_item import PandasSeriesItem
from skore.item.primitive_item import PrimitiveItem
from skore.item.sklearn_base_estimator_item import SklearnBaseEstimatorItem

Expand All @@ -31,6 +32,7 @@ class ItemRepository:
"MediaItem": MediaItem,
"NumpyArrayItem": NumpyArrayItem,
"PandasDataFrameItem": PandasDataFrameItem,
"PandasSeriesItem": PandasSeriesItem,
"PrimitiveItem": PrimitiveItem,
"SklearnBaseEstimatorItem": SklearnBaseEstimatorItem,
}
Expand Down
13 changes: 3 additions & 10 deletions src/skore/item/pandas_dataframe_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,7 @@ def __init__(

@cached_property
def dataframe(self) -> pandas.DataFrame:
"""
Convert the stored dictionary to a pandas DataFrame.
Returns
-------
pd.DataFrame
The pandas DataFrame representation of the stored dictionary.
"""
"""The pandas DataFrame."""
import pandas

return pandas.DataFrame.from_dict(self.dataframe_dict, orient="tight")
Expand All @@ -74,9 +67,9 @@ def factory(cls, dataframe: pandas.DataFrame) -> PandasDataFrameItem:
PandasDataFrameItem
A new PandasDataFrameItem instance.
"""
import pandas.core.frame
import pandas

if not isinstance(dataframe, pandas.core.frame.DataFrame):
if not isinstance(dataframe, pandas.DataFrame):
raise TypeError(f"Type '{dataframe.__class__}' is not supported.")

instance = cls(dataframe_dict=dataframe.to_dict(orient="tight"))
Expand Down
80 changes: 80 additions & 0 deletions src/skore/item/pandas_series_item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""PandasSeriesItem.
This module defines the PandasSeriesItem class,
which represents a pandas Series item.
"""

from __future__ import annotations

from functools import cached_property
from typing import TYPE_CHECKING

if TYPE_CHECKING:
import pandas

from skore.item.item import Item


class PandasSeriesItem(Item):
"""
A class to represent a pandas Series item.
This class encapsulates a pandas Series along with its
creation and update timestamps.
"""

def __init__(
self,
series_list: list,
created_at: str | None = None,
updated_at: str | None = None,
):
"""
Initialize a PandasSeriesItem.
Parameters
----------
series_list : list
The list representation of the series.
created_at : str
The creation timestamp in ISO format.
updated_at : str
The last update timestamp in ISO format.
"""
super().__init__(created_at, updated_at)

self.series_list = series_list

@cached_property
def series(self) -> pandas.Series:
"""The pandas Series."""
import pandas

return pandas.Series(self.series_list)

@classmethod
def factory(cls, series: pandas.Series) -> PandasSeriesItem:
"""
Create a new PandasSeriesItem instance from a pandas Series.
Parameters
----------
series : pd.Series
The pandas Series to store.
Returns
-------
PandasSeriesItem
A new PandasSeriesItem instance.
"""
import pandas

if not isinstance(series, pandas.Series):
raise TypeError(f"Type '{series.__class__}' is not supported.")

instance = cls(series_list=series.to_list())

# add series as cached property
instance.series = series

return instance
3 changes: 3 additions & 0 deletions src/skore/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
MediaItem,
NumpyArrayItem,
PandasDataFrameItem,
PandasSeriesItem,
PrimitiveItem,
SklearnBaseEstimatorItem,
object_to_item,
Expand Down Expand Up @@ -120,6 +121,8 @@ def get(self, key: str) -> Any:
return item.array
elif isinstance(item, PandasDataFrameItem):
return item.dataframe
elif isinstance(item, PandasSeriesItem):
return item.series
elif isinstance(item, SklearnBaseEstimatorItem):
return item.estimator
elif isinstance(item, MediaItem):
Expand Down
4 changes: 4 additions & 0 deletions src/skore/ui/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from skore.item.media_item import MediaItem
from skore.item.numpy_array_item import NumpyArrayItem
from skore.item.pandas_dataframe_item import PandasDataFrameItem
from skore.item.pandas_series_item import PandasSeriesItem
from skore.item.primitive_item import PrimitiveItem
from skore.item.sklearn_base_estimator_item import SklearnBaseEstimatorItem
from skore.project import Project
Expand Down Expand Up @@ -55,6 +56,9 @@ def __serialize_project(project: Project) -> SerializedProject:
elif isinstance(item, PandasDataFrameItem):
value = item.dataframe_dict
media_type = "application/vnd.dataframe+json"
elif isinstance(item, PandasSeriesItem):
value = item.series_list
media_type = "text/markdown"
elif isinstance(item, SklearnBaseEstimatorItem):
value = item.estimator_html_repr
media_type = "application/vnd.sklearn.estimator+html"
Expand Down
36 changes: 36 additions & 0 deletions tests/unit/item/test_pandas_series_item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pytest
from pandas import Series
from pandas.testing import assert_series_equal
from skore.item import PandasSeriesItem


class TestPandasSeriesItem:
@pytest.fixture(autouse=True)
def monkeypatch_datetime(self, monkeypatch, MockDatetime):
monkeypatch.setattr("skore.item.item.datetime", MockDatetime)

@pytest.mark.order(0)
def test_factory(self, mock_nowstr):
series = Series([0, 1, 2])
series_list = series.to_list()

item = PandasSeriesItem.factory(series)

assert item.series_list == series_list
assert item.created_at == mock_nowstr
assert item.updated_at == mock_nowstr

@pytest.mark.order(1)
def test_series(self, mock_nowstr):
series = Series([0, 1, 2])
series_list = series.to_list()

item1 = PandasSeriesItem.factory(series)
item2 = PandasSeriesItem(
series_list=series_list,
created_at=mock_nowstr,
updated_at=mock_nowstr,
)

assert_series_equal(item1.series, series)
assert_series_equal(item2.series, series)
26 changes: 16 additions & 10 deletions tests/unit/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,39 +26,45 @@ def project():


def test_put_string_item(project):
project.put("string_item", "Hello, World!") # JSONItem
project.put("string_item", "Hello, World!")
assert project.get("string_item") == "Hello, World!"


def test_put_int_item(project):
project.put("int_item", 42) # JSONItem
project.put("int_item", 42)
assert project.get("int_item") == 42


def test_put_float_item(project):
project.put("float_item", 3.14) # JSONItem
project.put("float_item", 3.14)
assert project.get("float_item") == 3.14


def test_put_bool_item(project):
project.put("bool_item", True) # JSONItem
project.put("bool_item", True)
assert project.get("bool_item") is True


def test_put_list_item(project):
project.put("list_item", [1, 2, 3]) # JSONItem
project.put("list_item", [1, 2, 3])
assert project.get("list_item") == [1, 2, 3]


def test_put_dict_item(project):
project.put("dict_item", {"key": "value"}) # JSONItem
project.put("dict_item", {"key": "value"})
assert project.get("dict_item") == {"key": "value"}


def test_put_pandas_df(project):
df = pandas.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
project.put("pandas_df", df) # DataFrameItem
pandas.testing.assert_frame_equal(project.get("pandas_df"), df)
def test_put_pandas_dataframe(project):
dataframe = pandas.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
project.put("pandas_dataframe", dataframe)
pandas.testing.assert_frame_equal(project.get("pandas_dataframe"), dataframe)


def test_put_pandas_series(project):
series = pandas.Series([0, 1, 2])
project.put("pandas_series", series)
pandas.testing.assert_series_equal(project.get("pandas_series"), series)


def test_put_numpy_array(project):
Expand Down

0 comments on commit 3749ca7

Please sign in to comment.