Skip to content

Categorical dtypes cause error when attempting stacked bar plot #13019

Closed
@msure

Description

@msure

Code Setup w/ Fake Dataset

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# running this code in terminal/ipython
%matplotlib qt
import matplotlib.pylab
import matplotlib.pyplot as plt

geographies = ['north america', 'central/south america', 'europe', 'africa', 'asia', 'oceania']
geo_proportions = [0.35, 0.10, 0.30, 0.05, 0.15, 0.05]
task_proportions = [[0.3,0.7],[0.4,0.6],[0.3,0.7],[0.5,0.5],[0.7,0.3],[0.4,0.6]]
cats = ['unsuccessful','completed']

pieces = []
for geo, geo_prop, task_prop in zip(geographies,geo_proportions,task_proportions):
    shape = int(geo_prop * 100000)
    data = {'intl':np.repeat(geo,shape), 'task_completion':np.random.choice(cats, shape, p=task_prop)}
    frame = DataFrame(data)
    pieces.append(frame)

df = pd.concat(pieces, ignore_index=True)

Here's data frame at this point:

In [3]: df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 2 columns):
intl               100000 non-null object
task_completion    100000 non-null object
dtypes: object(2)
memory usage: 2.3+ MB

Expected Output

# when running this code I expect the following chart
df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar',stacked=True)

screen shot 2016-04-28 at 10 41 58 am

Error

However, my real dataset uses Categorical types:

df['intl'] = df['intl'].astype('category')
df['task_completion'] = df['task_completion'].astype('category')
df['task_completion'] = df['task_completion'].cat.set_categories(['unsuccessful','completed'], ordered=True)

Now frame is:

In [5]: df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 2 columns):
intl               100000 non-null category
task_completion    100000 non-null category
dtypes: category(2)
memory usage: 976.6 KB

And I get the following error when trying to plot:

In [6]: df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar',stacked=True)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-8d6e104892c4> in <module>()
----> 1 df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar',stacked=True)

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   3735                           fontsize=fontsize, colormap=colormap, table=table,
   3736                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3737                           sort_columns=sort_columns, **kwds)
   3738     __call__.__doc__ = plot_frame.__doc__
   3739

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2609                  yerr=yerr, xerr=xerr,
   2610                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 2611                  **kwds)
   2612
   2613

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2436         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2437
-> 2438     plot_obj.generate()
   2439     plot_obj.draw()
   2440     return plot_obj.result

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in generate(self)
   1023         self._compute_plot_data()
   1024         self._setup_subplots()
-> 1025         self._make_plot()
   1026         self._add_table()
   1027         self._make_legend()

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in _make_plot(self)
   1959                 rect = self._plot(ax, self.ax_pos + w, y, self.bar_width,
   1960                                   start=start, label=label,
-> 1961                                   log=self.log, **kwds)
   1962                 pos_prior = pos_prior + np.where(mask, y, 0)
   1963                 neg_prior = neg_prior + np.where(mask, 0, y)

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(cls, ax, x, y, w, start, log, **kwds)
   1913     @classmethod
   1914     def _plot(cls, ax, x, y, w, start=0, log=False, **kwds):
-> 1915         return ax.bar(x, y, w, bottom=start, log=log, **kwds)
   1916
   1917     @property

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
   1810                     warnings.warn(msg % (label_namer, func.__name__),
   1811                                   RuntimeWarning, stacklevel=2)
-> 1812             return func(ax, *args, **kwargs)
   1813         pre_doc = inner.__doc__
   1814         if pre_doc is None:

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/matplotlib/axes/_axes.py in bar(self, left, height, width, bottom, **kwargs)
   2127                 edgecolor=e,
   2128                 linewidth=lw,
-> 2129                 label='_nolegend_'
   2130                 )
   2131             r.update(kwargs)

/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/matplotlib/patches.py in __init__(self, xy, width, height, angle, **kwargs)
    640
    641         self._x = float(xy[0])
--> 642         self._y = float(xy[1])
    643         self._width = float(width)
    644         self._height = float(height)

TypeError: only length-1 arrays can be converted to Python scalars

Interestingly, taking out the stacked kwarg works:

df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar')

Output of pd.show_versions()

INSTALLED VERSIONS

commit: None
python: 3.4.4.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8

pandas: 0.18.0
nose: 1.3.7
pip: 8.1.0
setuptools: 20.2.2
Cython: 0.22.1
numpy: 1.10.4
scipy: 0.17.0
statsmodels: 0.6.1
xarray: None
IPython: 4.1.2
sphinx: 1.3.1
patsy: 0.3.0
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: 1.0.0
tables: 3.2.0
numexpr: 2.5
matplotlib: 1.5.1
openpyxl: 1.8.5
xlrd: 0.9.3
xlwt: 1.0.0
xlsxwriter: 0.7.3
lxml: 3.4.4
bs4: 4.3.2
html5lib: None
httplib2: 0.9.1
apiclient: None
sqlalchemy: 1.0.5
pymysql: None
psycopg2: None
jinja2: 2.8
boto: 2.38.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    CategoricalCategorical Data TypeTestingpandas testing functions or related to the test suiteVisualizationplotting

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions