Description
Code Setup w/ Fake Dataset
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# running this code in terminal/ipython
%matplotlib qt
import matplotlib.pylab
import matplotlib.pyplot as plt
geographies = ['north america', 'central/south america', 'europe', 'africa', 'asia', 'oceania']
geo_proportions = [0.35, 0.10, 0.30, 0.05, 0.15, 0.05]
task_proportions = [[0.3,0.7],[0.4,0.6],[0.3,0.7],[0.5,0.5],[0.7,0.3],[0.4,0.6]]
cats = ['unsuccessful','completed']
pieces = []
for geo, geo_prop, task_prop in zip(geographies,geo_proportions,task_proportions):
shape = int(geo_prop * 100000)
data = {'intl':np.repeat(geo,shape), 'task_completion':np.random.choice(cats, shape, p=task_prop)}
frame = DataFrame(data)
pieces.append(frame)
df = pd.concat(pieces, ignore_index=True)
Here's data frame at this point:
In [3]: df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 2 columns):
intl 100000 non-null object
task_completion 100000 non-null object
dtypes: object(2)
memory usage: 2.3+ MB
Expected Output
# when running this code I expect the following chart
df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar',stacked=True)
Error
However, my real dataset uses Categorical types:
df['intl'] = df['intl'].astype('category')
df['task_completion'] = df['task_completion'].astype('category')
df['task_completion'] = df['task_completion'].cat.set_categories(['unsuccessful','completed'], ordered=True)
Now frame is:
In [5]: df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 2 columns):
intl 100000 non-null category
task_completion 100000 non-null category
dtypes: category(2)
memory usage: 976.6 KB
And I get the following error when trying to plot:
In [6]: df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar',stacked=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-8d6e104892c4> in <module>()
----> 1 df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar',stacked=True)
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
3735 fontsize=fontsize, colormap=colormap, table=table,
3736 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3737 sort_columns=sort_columns, **kwds)
3738 __call__.__doc__ = plot_frame.__doc__
3739
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2609 yerr=yerr, xerr=xerr,
2610 secondary_y=secondary_y, sort_columns=sort_columns,
-> 2611 **kwds)
2612
2613
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
2436 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
2437
-> 2438 plot_obj.generate()
2439 plot_obj.draw()
2440 return plot_obj.result
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in generate(self)
1023 self._compute_plot_data()
1024 self._setup_subplots()
-> 1025 self._make_plot()
1026 self._add_table()
1027 self._make_legend()
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in _make_plot(self)
1959 rect = self._plot(ax, self.ax_pos + w, y, self.bar_width,
1960 start=start, label=label,
-> 1961 log=self.log, **kwds)
1962 pos_prior = pos_prior + np.where(mask, y, 0)
1963 neg_prior = neg_prior + np.where(mask, 0, y)
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(cls, ax, x, y, w, start, log, **kwds)
1913 @classmethod
1914 def _plot(cls, ax, x, y, w, start=0, log=False, **kwds):
-> 1915 return ax.bar(x, y, w, bottom=start, log=log, **kwds)
1916
1917 @property
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1810 warnings.warn(msg % (label_namer, func.__name__),
1811 RuntimeWarning, stacklevel=2)
-> 1812 return func(ax, *args, **kwargs)
1813 pre_doc = inner.__doc__
1814 if pre_doc is None:
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/matplotlib/axes/_axes.py in bar(self, left, height, width, bottom, **kwargs)
2127 edgecolor=e,
2128 linewidth=lw,
-> 2129 label='_nolegend_'
2130 )
2131 r.update(kwargs)
/Users/adrianpalacios/anaconda/lib/python3.4/site-packages/matplotlib/patches.py in __init__(self, xy, width, height, angle, **kwargs)
640
641 self._x = float(xy[0])
--> 642 self._y = float(xy[1])
643 self._width = float(width)
644 self._height = float(height)
TypeError: only length-1 arrays can be converted to Python scalars
Interestingly, taking out the stacked kwarg works:
df.groupby(['task_completion','intl']).size().unstack(0).plot(kind='bar')
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.4.4.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.18.0
nose: 1.3.7
pip: 8.1.0
setuptools: 20.2.2
Cython: 0.22.1
numpy: 1.10.4
scipy: 0.17.0
statsmodels: 0.6.1
xarray: None
IPython: 4.1.2
sphinx: 1.3.1
patsy: 0.3.0
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: 1.0.0
tables: 3.2.0
numexpr: 2.5
matplotlib: 1.5.1
openpyxl: 1.8.5
xlrd: 0.9.3
xlwt: 1.0.0
xlsxwriter: 0.7.3
lxml: 3.4.4
bs4: 4.3.2
html5lib: None
httplib2: 0.9.1
apiclient: None
sqlalchemy: 1.0.5
pymysql: None
psycopg2: None
jinja2: 2.8
boto: 2.38.0