Closed
Description
This code generates an error in Pandas 0.17.0, but works fine in pandas 0.16.2
import datetime
import pandas as pd
import numpy as np
print pd.show_versions()
SHOW_BUG=True
if SHOW_BUG:
df = pd.DataFrame({
'a': 1 * np.ones(10),
'b': [datetime.datetime.now() for nn in range(10)],
})
else:
df = pd.DataFrame({
'a': 1 * np.ones(10),
'b': range(10),
})
def _compute_length(batch):
return pd.Series({'c': 2})
dfg = df.groupby(by=['a']).apply(_compute_length)
Here is the version information:
print pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.10.final.0
python-bits: 64
OS: Darwin
OS-release: 14.5.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: en_US.UTF-8
LANG: en_US.UTF-8
pandas: 0.17.0
nose: 1.3.7
pip: 7.1.2
setuptools: 18.4
Cython: 0.22.1
numpy: 1.10.1
scipy: 0.16.0
statsmodels: 0.6.1
IPython: 3.2.0
sphinx: 1.3.1
patsy: 0.4.0
dateutil: 2.4.1
pytz: 2015.6
blosc: None
bottleneck: 1.0.0
tables: 3.2.0
numexpr: 2.4.3
matplotlib: 1.4.3
openpyxl: 1.8.5
xlrd: 0.9.3
xlwt: 1.0.0
xlsxwriter: 0.7.3
lxml: 3.4.4
bs4: 4.3.2
html5lib: None
httplib2: 0.9.1
apiclient: 1.4.1
sqlalchemy: 1.0.5
pymysql: None
psycopg2: None
None
And here is the stack trace
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-3-072dccab0584> in <module>()
21 return pd.Series({'c': 2})
22
---> 23 dfg = df.groupby(by=['a']).apply(_compute_length)
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/groupby.py in apply(self, func, *args, **kwargs)
711 # ignore SettingWithCopy here in case the user mutates
712 with option_context('mode.chained_assignment',None):
--> 713 return self._python_apply_general(f)
714
715 def _python_apply_general(self, f):
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/groupby.py in _python_apply_general(self, f)
718
719 return self._wrap_applied_output(keys, values,
--> 720 not_indexed_same=mutated)
721
722 def aggregate(self, func, *args, **kwargs):
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/groupby.py in _wrap_applied_output(self, keys, values, not_indexed_same)
3125 date_cols = self._selected_obj.select_dtypes(
3126 include=list(_DATELIKE_DTYPES)).columns
-> 3127 result[date_cols] = (result[date_cols]
3128 ._convert(datetime=True,
3129 coerce=True))
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
1906 if isinstance(key, (Series, np.ndarray, Index, list)):
1907 # either boolean or fancy integer index
-> 1908 return self._getitem_array(key)
1909 elif isinstance(key, DataFrame):
1910 return self._getitem_frame(key)
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/frame.py in _getitem_array(self, key)
1950 return self.take(indexer, axis=0, convert=False)
1951 else:
-> 1952 indexer = self.ix._convert_to_indexer(key, axis=1)
1953 return self.take(indexer, axis=1, convert=True)
1954
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1119 mask = check == -1
1120 if mask.any():
-> 1121 raise KeyError('%s not in index' % objarr[mask])
1122
1123 return _values_from_object(indexer)
KeyError: "['b'] not in index"