Skip to content

Commit 6a9bbea

Browse files
authored
Merge pull request #4244 from anmyachev/interchange-protocol
Support dataframe interchange protocol
2 parents 8445f91 + c6deacd commit 6a9bbea

File tree

4 files changed

+81
-2
lines changed

4 files changed

+81
-2
lines changed

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
77

88
### Updated
99
- Updated Plotly.js from version 2.24.1 to version 2.24.2. See the [plotly.js CHANGELOG](https://github.com/plotly/plotly.js/blob/master/CHANGELOG.md#2242----2023-06-09) for more information. These changes are reflected in the auto-generated `plotly.graph_objects` module.
10+
- `px` methods now accept data-frame-like objects that support a [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), such as polars, vaex, modin etc. This protocol has priority on `to_pandas` call, but will only be used if pandas>=2.0.2 is installed in the environment.
1011

1112
## [5.15.0] - 2023-06-08
1213

Diff for: packages/python/plotly/plotly/express/_core.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from _plotly_utils.basevalidators import ColorscaleValidator
88
from plotly.colors import qualitative, sequential
99
import math
10+
from packaging import version
1011
import pandas as pd
1112
import numpy as np
1213

@@ -1307,7 +1308,25 @@ def build_dataframe(args, constructor):
13071308
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
13081309
df_provided = args["data_frame"] is not None
13091310
if df_provided and not isinstance(args["data_frame"], pd.DataFrame):
1310-
if hasattr(args["data_frame"], "to_pandas"):
1311+
if hasattr(args["data_frame"], "__dataframe__") and version.parse(
1312+
pd.__version__
1313+
) >= version.parse("2.0.2"):
1314+
import pandas.api.interchange
1315+
1316+
df_not_pandas = args["data_frame"]
1317+
try:
1318+
df_pandas = pandas.api.interchange.from_dataframe(df_not_pandas)
1319+
except (ImportError, NotImplementedError) as exc:
1320+
# temporary workaround; developers of third-party libraries themselves
1321+
# should try a different implementation, if available. For example:
1322+
# def __dataframe__(self, ...):
1323+
# if not some_condition:
1324+
# self.to_pandas(...)
1325+
if not hasattr(df_not_pandas, "to_pandas"):
1326+
raise exc
1327+
df_pandas = df_not_pandas.to_pandas()
1328+
args["data_frame"] = df_pandas
1329+
elif hasattr(args["data_frame"], "to_pandas"):
13111330
args["data_frame"] = args["data_frame"].to_pandas()
13121331
else:
13131332
args["data_frame"] = pd.DataFrame(args["data_frame"])

Diff for: packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py

+57
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,25 @@
33
import numpy as np
44
import pandas as pd
55
import pytest
6+
from packaging import version
7+
import unittest.mock as mock
68
from plotly.express._core import build_dataframe
79
from pandas.testing import assert_frame_equal
810

11+
# Fixtures
12+
# --------
13+
@pytest.fixture
14+
def add_interchange_module_for_old_pandas():
15+
if not hasattr(pd.api, "interchange"):
16+
pd.api.interchange = mock.MagicMock()
17+
# to make the following import work: `import pandas.api.interchange`
18+
with mock.patch.dict(
19+
"sys.modules", {"pandas.api.interchange": pd.api.interchange}
20+
):
21+
yield
22+
else:
23+
yield
24+
925

1026
def test_numpy():
1127
fig = px.scatter(x=[1, 2, 3], y=[2, 3, 4], color=[1, 3, 9])
@@ -233,6 +249,47 @@ def test_build_df_with_index():
233249
assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"])
234250

235251

252+
def test_build_df_using_interchange_protocol_mock(
253+
add_interchange_module_for_old_pandas,
254+
):
255+
class CustomDataFrame:
256+
def __dataframe__(self):
257+
pass
258+
259+
input_dataframe = CustomDataFrame()
260+
args = dict(data_frame=input_dataframe, x="petal_width", y="sepal_length")
261+
262+
iris_pandas = px.data.iris()
263+
264+
with mock.patch("pandas.__version__", "2.0.2"):
265+
with mock.patch(
266+
"pandas.api.interchange.from_dataframe", return_value=iris_pandas
267+
) as mock_from_dataframe:
268+
build_dataframe(args, go.Scatter)
269+
mock_from_dataframe.assert_called_once_with(input_dataframe)
270+
271+
272+
@pytest.mark.skipif(
273+
version.parse(pd.__version__) < version.parse("2.0.2"),
274+
reason="plotly doesn't use a dataframe interchange protocol for pandas < 2.0.2",
275+
)
276+
@pytest.mark.parametrize("test_lib", ["vaex", "polars"])
277+
def test_build_df_from_vaex_and_polars(test_lib):
278+
if test_lib == "vaex":
279+
import vaex as lib
280+
else:
281+
import polars as lib
282+
283+
# take out the 'species' columns since the vaex implementation does not cover strings yet
284+
iris_pandas = px.data.iris()[["petal_width", "sepal_length"]]
285+
iris_vaex = lib.from_pandas(iris_pandas)
286+
args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length")
287+
out = build_dataframe(args, go.Scatter)
288+
assert_frame_equal(
289+
iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"]
290+
)
291+
292+
236293
def test_timezones():
237294
df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
238295
df["date"] = pd.to_datetime(df["date"])

Diff for: packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
requests==2.25.1
22
tenacity==6.2.0
3-
pandas==2.0.1
3+
pandas==2.0.2
44
numpy==1.20.3
55
xarray==0.17.0
66
statsmodels
@@ -19,3 +19,5 @@ matplotlib==2.2.3
1919
scikit-image==0.18.1
2020
psutil==5.7.0
2121
kaleido
22+
vaex
23+
polars

0 commit comments

Comments
 (0)