Skip to content

ENH/BUG: boxplot now supports layout #7035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ Improvements to existing features
- Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`)
- ``GroupBy.count()`` is now implemented in Cython and is much faster for large
numbers of groups (:issue:`7016`).
- ``boxplot`` now supports ``layout`` keyword (:issue:`6769`)

.. _release.bug_fixes-0.14.0:

Expand Down Expand Up @@ -485,6 +486,7 @@ Bug Fixes
- Bug in cache coherence with chained indexing and slicing; add ``_is_view`` property to ``NDFrame`` to correctly predict
views; mark ``is_copy`` on ``xs` only if its an actual copy (and not a view) (:issue:`7084`)
- Bug in DatetimeIndex creation from string ndarray with ``dayfirst=True`` (:issue:`5917`)
- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`)

pandas 0.13.1
-------------
Expand Down
2 changes: 2 additions & 0 deletions doc/source/v0.14.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ Plotting
positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is
raised if the old ``data`` argument is used by name. (:issue:`6956`)

- ``boxplot`` now supports ``layout`` keyword (:issue:`6769`)

.. _whatsnew_0140.prior_deprecations:

Prior Version Deprecations/Changes
Expand Down
148 changes: 117 additions & 31 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas.compat import range, lrange, StringIO, lmap, lzip, u, zip
import pandas.util.testing as tm
from pandas.util.testing import ensure_clean
import pandas.core.common as com
from pandas.core.config import set_option


Expand Down Expand Up @@ -1837,6 +1838,19 @@ def test_errorbar_scatter(self):

@tm.mplskip
class TestDataFrameGroupByPlots(tm.TestCase):

def setUp(self):
n = 100
with tm.RNGContext(42):
gender = tm.choice(['Male', 'Female'], size=n)
classroom = tm.choice(['A', 'B', 'C'], size=n)

self.hist_df = DataFrame({'gender': gender,
'classroom': classroom,
'height': random.normal(66, 4, size=n),
'weight': random.normal(161, 32, size=n),
'category': random.randint(4, size=n)})

def tearDown(self):
tm.close()

Expand Down Expand Up @@ -1924,39 +1938,117 @@ def test_grouped_hist(self):
with tm.assertRaises(AttributeError):
plotting.grouped_hist(df.A, by=df.C, foo='bar')

def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=(8.0, 6.0)):
"""
Check expected number of axes is drawn in expected layout

Parameters
----------
axes : matplotlib Axes object, or its list-like
axes_num : number
expected number of axes. Unnecessary axes should be set to invisible.
layout : tuple
expected layout
figsize : tuple
expected figsize. default is matplotlib default
"""
visible_axes = self._flatten_visible(axes)

if axes_num is not None:
self.assertEqual(len(visible_axes), axes_num)
for ax in visible_axes:
# check something drawn on visible axes
self.assert_(len(ax.get_children()) > 0)

if layout is not None:
if isinstance(axes, list):
self.assertEqual((len(axes), ), layout)
elif isinstance(axes, np.ndarray):
self.assertEqual(axes.shape, layout)
else:
# in case of AxesSubplot
self.assertEqual((1, ), layout)

self.assert_numpy_array_equal(np.round(visible_axes[0].figure.get_size_inches()),
np.array(figsize))

def _flatten_visible(self, axes):
axes = plotting._flatten(axes)
axes = [ax for ax in axes if ax.get_visible()]
return axes

@slow
def test_grouped_hist_layout(self):
def test_grouped_box_layout(self):
import matplotlib.pyplot as plt
n = 100
gender = tm.choice(['Male', 'Female'], size=n)
df = DataFrame({'gender': gender,
'height': random.normal(66, 4, size=n),
'weight': random.normal(161, 32, size=n),
'category': random.randint(4, size=n)})
self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
df = self.hist_df

self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], by=df.gender,
layout=(1, 1))
self.assertRaises(ValueError, df.boxplot, column=['height', 'weight', 'category'],
layout=(2, 1))

box = _check_plot_works(df.groupby('gender').boxplot, column='height')
self._check_axes_shape(plt.gcf().axes, axes_num=2)

box = _check_plot_works(df.groupby('category').boxplot, column='height')
self._check_axes_shape(plt.gcf().axes, axes_num=4)

# GH 6769
box = _check_plot_works(df.groupby('classroom').boxplot, column='height')
self._check_axes_shape(plt.gcf().axes, axes_num=3)

box = df.boxplot(column=['height', 'weight', 'category'], by='gender')
self._check_axes_shape(plt.gcf().axes, axes_num=3)

box = df.groupby('classroom').boxplot(column=['height', 'weight', 'category'])
self._check_axes_shape(plt.gcf().axes, axes_num=3)

box = _check_plot_works(df.groupby('category').boxplot, column='height', layout=(3, 2))
self._check_axes_shape(plt.gcf().axes, axes_num=4)

box = df.boxplot(column=['height', 'weight', 'category'], by='gender', layout=(4, 1))
self._check_axes_shape(plt.gcf().axes, axes_num=3)

box = df.groupby('classroom').boxplot(column=['height', 'weight', 'category'], layout=(1, 4))
self._check_axes_shape(plt.gcf().axes, axes_num=3)

@slow
def test_grouped_hist_layout(self):

df = self.hist_df
self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
layout=(1,))
layout=(1, 1))
self.assertRaises(ValueError, df.hist, column='height', by=df.category,
layout=(1, 3))
self.assertRaises(ValueError, df.hist, column='height', by=df.category,
layout=(2, 1))
self.assertEqual(df.hist(column='height', by=df.gender,
layout=(2, 1)).shape, (2,))
tm.close()
self.assertEqual(df.hist(column='height', by=df.category,
layout=(4, 1)).shape, (4,))
tm.close()
self.assertEqual(df.hist(column='height', by=df.category,
layout=(4, 2)).shape, (4, 2))

axes = _check_plot_works(df.hist, column='height', by=df.gender, layout=(2, 1))
self._check_axes_shape(axes, axes_num=2, layout=(2, ), figsize=(10, 5))

axes = _check_plot_works(df.hist, column='height', by=df.category, layout=(4, 1))
self._check_axes_shape(axes, axes_num=4, layout=(4, ), figsize=(10, 5))

axes = _check_plot_works(df.hist, column='height', by=df.category,
layout=(4, 2), figsize=(12, 8))
self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8))

# GH 6769
axes = _check_plot_works(df.hist, column='height', by='classroom', layout=(2, 2))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2), figsize=(10, 5))

# without column
axes = _check_plot_works(df.hist, by='classroom')
self._check_axes_shape(axes, axes_num=3, layout=(2, 2), figsize=(10, 5))

axes = _check_plot_works(df.hist, by='gender', layout=(3, 5))
self._check_axes_shape(axes, axes_num=2, layout=(3, 5), figsize=(10, 5))

axes = _check_plot_works(df.hist, column=['height', 'weight', 'category'])
self._check_axes_shape(axes, axes_num=3, layout=(2, 2), figsize=(10, 5))

@slow
def test_axis_share_x(self):
df = self.hist_df
# GH4089
n = 100
df = DataFrame({'gender': tm.choice(['Male', 'Female'], size=n),
'height': random.normal(66, 4, size=n),
'weight': random.normal(161, 32, size=n)})
ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True)

# share x
Expand All @@ -1969,10 +2061,7 @@ def test_axis_share_x(self):

@slow
def test_axis_share_y(self):
n = 100
df = DataFrame({'gender': tm.choice(['Male', 'Female'], size=n),
'height': random.normal(66, 4, size=n),
'weight': random.normal(161, 32, size=n)})
df = self.hist_df
ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True)

# share y
Expand All @@ -1985,10 +2074,7 @@ def test_axis_share_y(self):

@slow
def test_axis_share_xy(self):
n = 100
df = DataFrame({'gender': tm.choice(['Male', 'Female'], size=n),
'height': random.normal(66, 4, size=n),
'weight': random.normal(161, 32, size=n)})
df = self.hist_df
ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True,
sharey=True)

Expand Down
Loading