-
-
Notifications
You must be signed in to change notification settings - Fork 19.2k
Closed
Labels
Description
This code generates an error in Pandas 0.17.0, but works fine in pandas 0.16.2
import datetime
import pandas as pd
import numpy as np
print pd.show_versions()
SHOW_BUG=True
if SHOW_BUG:
df = pd.DataFrame({
'a': 1 * np.ones(10),
'b': [datetime.datetime.now() for nn in range(10)],
})
else:
df = pd.DataFrame({
'a': 1 * np.ones(10),
'b': range(10),
})
def _compute_length(batch):
return pd.Series({'c': 2})
dfg = df.groupby(by=['a']).apply(_compute_length)Here is the version information:
print pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.10.final.0
python-bits: 64
OS: Darwin
OS-release: 14.5.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: en_US.UTF-8
LANG: en_US.UTF-8
pandas: 0.17.0
nose: 1.3.7
pip: 7.1.2
setuptools: 18.4
Cython: 0.22.1
numpy: 1.10.1
scipy: 0.16.0
statsmodels: 0.6.1
IPython: 3.2.0
sphinx: 1.3.1
patsy: 0.4.0
dateutil: 2.4.1
pytz: 2015.6
blosc: None
bottleneck: 1.0.0
tables: 3.2.0
numexpr: 2.4.3
matplotlib: 1.4.3
openpyxl: 1.8.5
xlrd: 0.9.3
xlwt: 1.0.0
xlsxwriter: 0.7.3
lxml: 3.4.4
bs4: 4.3.2
html5lib: None
httplib2: 0.9.1
apiclient: 1.4.1
sqlalchemy: 1.0.5
pymysql: None
psycopg2: None
None
And here is the stack trace
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-3-072dccab0584> in <module>()
21 return pd.Series({'c': 2})
22
---> 23 dfg = df.groupby(by=['a']).apply(_compute_length)
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/groupby.py in apply(self, func, *args, **kwargs)
711 # ignore SettingWithCopy here in case the user mutates
712 with option_context('mode.chained_assignment',None):
--> 713 return self._python_apply_general(f)
714
715 def _python_apply_general(self, f):
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/groupby.py in _python_apply_general(self, f)
718
719 return self._wrap_applied_output(keys, values,
--> 720 not_indexed_same=mutated)
721
722 def aggregate(self, func, *args, **kwargs):
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/groupby.py in _wrap_applied_output(self, keys, values, not_indexed_same)
3125 date_cols = self._selected_obj.select_dtypes(
3126 include=list(_DATELIKE_DTYPES)).columns
-> 3127 result[date_cols] = (result[date_cols]
3128 ._convert(datetime=True,
3129 coerce=True))
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
1906 if isinstance(key, (Series, np.ndarray, Index, list)):
1907 # either boolean or fancy integer index
-> 1908 return self._getitem_array(key)
1909 elif isinstance(key, DataFrame):
1910 return self._getitem_frame(key)
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/frame.py in _getitem_array(self, key)
1950 return self.take(indexer, axis=0, convert=False)
1951 else:
-> 1952 indexer = self.ix._convert_to_indexer(key, axis=1)
1953 return self.take(indexer, axis=1, convert=True)
1954
/Users/rob/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1119 mask = check == -1
1120 if mask.any():
-> 1121 raise KeyError('%s not in index' % objarr[mask])
1122
1123 return _values_from_object(indexer)
KeyError: "['b'] not in index"