Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: support of pandas.DataFrame.hist for datetime data #36287

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ Other enhancements
- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`)
- ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
Expand Down
9 changes: 7 additions & 2 deletions pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,16 @@ def hist_frame(
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
column = [column]
data = data[column]
data = data._get_numeric_data()
# GH32590
data = data.select_dtypes(
include=(np.number, "datetime64", "datetimetz"), exclude="timedelta"
)
naxes = len(data.columns)

if naxes == 0:
raise ValueError("hist method requires numerical columns, nothing to plot.")
raise ValueError(
"hist method requires numerical or datetime columns, nothing to plot."
)

fig, axes = create_subplots(
naxes=naxes,
Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/plotting/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.core.dtypes.api import is_list_like

import pandas as pd
from pandas import DataFrame, Series
from pandas import DataFrame, Series, to_datetime
import pandas._testing as tm


Expand All @@ -28,6 +28,9 @@ def setup_method(self, method):

mpl.rcdefaults()

self.start_date_to_int64 = 812419200000000000
self.end_date_to_int64 = 819331200000000000

self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
Expand All @@ -50,6 +53,14 @@ def setup_method(self, method):
"height": random.normal(66, 4, size=n),
"weight": random.normal(161, 32, size=n),
"category": random.randint(4, size=n),
"datetime": to_datetime(
random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=n,
dtype=np.int64,
)
),
}
)

Expand Down
77 changes: 67 additions & 10 deletions pandas/tests/plotting/test_hist_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas.util._test_decorators as td

from pandas import DataFrame, Index, Series
from pandas import DataFrame, Index, Series, to_datetime
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase, _check_plot_works

Expand Down Expand Up @@ -163,17 +163,34 @@ def test_hist_df_legacy(self):
_check_plot_works(self.hist_df.hist)

# make sure layout is handled
df = DataFrame(randn(100, 3))
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(df.hist, grid=False)
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert not axes[1, 1].get_visible()

_check_plot_works(df[[2]].hist)
df = DataFrame(randn(100, 1))
_check_plot_works(df.hist)

# make sure layout is handled
df = DataFrame(randn(100, 6))
df = DataFrame(randn(100, 5))
df[5] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(df.hist, layout=(4, 2))
self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
Expand Down Expand Up @@ -225,18 +242,42 @@ def test_hist_df_legacy(self):
ser.hist(foo="bar")

@pytest.mark.slow
def test_hist_non_numerical_raises(self):
# gh-10444
df = DataFrame(np.random.rand(10, 2))
def test_hist_non_numerical_or_datetime_raises(self):
# gh-10444, GH32590
df = DataFrame(
{
"a": np.random.rand(10),
"b": np.random.randint(0, 10, 10),
"c": to_datetime(
np.random.randint(
1582800000000000000, 1583500000000000000, 10, dtype=np.int64
)
),
"d": to_datetime(
np.random.randint(
1582800000000000000, 1583500000000000000, 10, dtype=np.int64
),
utc=True,
),
}
)
df_o = df.astype(object)

msg = "hist method requires numerical columns, nothing to plot."
msg = "hist method requires numerical or datetime columns, nothing to plot."
with pytest.raises(ValueError, match=msg):
df_o.hist()

@pytest.mark.slow
def test_hist_layout(self):
df = DataFrame(randn(100, 3))
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)

layout_to_expected_size = (
{"layout": None, "expected_size": (2, 2)}, # default is 2x2
Expand Down Expand Up @@ -268,7 +309,15 @@ def test_hist_layout(self):
@pytest.mark.slow
# GH 9351
def test_tight_layout(self):
df = DataFrame(randn(100, 3))
df = DataFrame(np.random.randn(100, 2))
df[2] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)
_check_plot_works(df.hist)
self.plt.tight_layout()

Expand Down Expand Up @@ -355,7 +404,15 @@ def test_grouped_hist_legacy(self):

from pandas.plotting._matplotlib.hist import _grouped_hist

df = DataFrame(randn(500, 2), columns=["A", "B"])
df = DataFrame(randn(500, 1), columns=["A"])
df["B"] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=500,
dtype=np.int64,
)
)
df["C"] = np.random.randint(0, 4, 500)
df["D"] = ["X"] * 500

Expand Down