Skip to content

Commit

Permalink
ENH: support of pandas.DataFrame.hist for datetime data (#36287)
Browse files Browse the repository at this point in the history
  • Loading branch information
onshek authored Oct 10, 2020
1 parent 1b9641c commit 03709d4
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ Other enhancements
- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`)
- ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
Expand Down
9 changes: 7 additions & 2 deletions pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,16 @@ def hist_frame(
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
column = [column]
data = data[column]
data = data._get_numeric_data()
# GH32590
data = data.select_dtypes(
include=(np.number, "datetime64", "datetimetz"), exclude="timedelta"
)
naxes = len(data.columns)

if naxes == 0:
raise ValueError("hist method requires numerical columns, nothing to plot.")
raise ValueError(
"hist method requires numerical or datetime columns, nothing to plot."
)

fig, axes = create_subplots(
naxes=naxes,
Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/plotting/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.core.dtypes.api import is_list_like

import pandas as pd
from pandas import DataFrame, Series
from pandas import DataFrame, Series, to_datetime
import pandas._testing as tm


Expand All @@ -28,6 +28,9 @@ def setup_method(self, method):

mpl.rcdefaults()

self.start_date_to_int64 = 812419200000000000
self.end_date_to_int64 = 819331200000000000

self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
Expand All @@ -50,6 +53,14 @@ def setup_method(self, method):
"height": random.normal(66, 4, size=n),
"weight": random.normal(161, 32, size=n),
"category": random.randint(4, size=n),
"datetime": to_datetime(
random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=n,
dtype=np.int64,
)
),
}
)

Expand Down
77 changes: 67 additions & 10 deletions pandas/tests/plotting/test_hist_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas.util._test_decorators as td

from pandas import DataFrame, Index, Series
from pandas import DataFrame, Index, Series, to_datetime
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase, _check_plot_works

Expand Down Expand Up @@ -163,17 +163,34 @@ def test_hist_df_legacy(self):
_check_plot_works(self.hist_df.hist)

# make sure layout is handled
df = DataFrame(randn(100, 3))
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(df.hist, grid=False)
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert not axes[1, 1].get_visible()

_check_plot_works(df[[2]].hist)
df = DataFrame(randn(100, 1))
_check_plot_works(df.hist)

# make sure layout is handled
df = DataFrame(randn(100, 6))
df = DataFrame(randn(100, 5))
df[5] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(df.hist, layout=(4, 2))
self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
Expand Down Expand Up @@ -225,18 +242,42 @@ def test_hist_df_legacy(self):
ser.hist(foo="bar")

@pytest.mark.slow
def test_hist_non_numerical_raises(self):
# gh-10444
df = DataFrame(np.random.rand(10, 2))
def test_hist_non_numerical_or_datetime_raises(self):
# gh-10444, GH32590
df = DataFrame(
{
"a": np.random.rand(10),
"b": np.random.randint(0, 10, 10),
"c": to_datetime(
np.random.randint(
1582800000000000000, 1583500000000000000, 10, dtype=np.int64
)
),
"d": to_datetime(
np.random.randint(
1582800000000000000, 1583500000000000000, 10, dtype=np.int64
),
utc=True,
),
}
)
df_o = df.astype(object)

msg = "hist method requires numerical columns, nothing to plot."
msg = "hist method requires numerical or datetime columns, nothing to plot."
with pytest.raises(ValueError, match=msg):
df_o.hist()

@pytest.mark.slow
def test_hist_layout(self):
df = DataFrame(randn(100, 3))
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)

layout_to_expected_size = (
{"layout": None, "expected_size": (2, 2)}, # default is 2x2
Expand Down Expand Up @@ -268,7 +309,15 @@ def test_hist_layout(self):
@pytest.mark.slow
# GH 9351
def test_tight_layout(self):
df = DataFrame(randn(100, 3))
df = DataFrame(np.random.randn(100, 2))
df[2] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=100,
dtype=np.int64,
)
)
_check_plot_works(df.hist)
self.plt.tight_layout()

Expand Down Expand Up @@ -355,7 +404,15 @@ def test_grouped_hist_legacy(self):

from pandas.plotting._matplotlib.hist import _grouped_hist

df = DataFrame(randn(500, 2), columns=["A", "B"])
df = DataFrame(randn(500, 1), columns=["A"])
df["B"] = to_datetime(
np.random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=500,
dtype=np.int64,
)
)
df["C"] = np.random.randint(0, 4, 500)
df["D"] = ["X"] * 500

Expand Down

0 comments on commit 03709d4

Please sign in to comment.