diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c2debb9bfe1c0..c7611d9829308 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1414,10 +1414,11 @@ def aggregate(self, values, how, axis=0): else: is_numeric = issubclass(values.dtype.type, (np.datetime64, np.timedelta64)) - out_dtype = 'float64' if is_numeric: + out_dtype = 'float64' values = values.view('int64') else: + out_dtype = 'object' values = values.astype(object) # will be filled in Cython function diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index b432ddd03d17f..4098ac06c2da2 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -2234,7 +2234,7 @@ def generate_put_template(template, use_ints=True, use_floats=True, date_like_list = [ ('int64', 'int64_t', 'float64_t', 'np.float64'), ] - object_list = [('object', 'object', 'float64_t', 'np.float64')] + object_list = [('object', 'object', 'object', 'np.object_')] function_list = [] if use_floats: function_list.extend(floats_list) diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index 42ae043847ba1..97a34582d2ef2 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -6697,7 +6697,7 @@ def group_count_float32(ndarray[float32_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_count_object(ndarray[float64_t, ndim=2] out, +def group_count_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, ndarray[int64_t] labels): @@ -6838,7 +6838,7 @@ def group_count_bin_float32(ndarray[float32_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_count_bin_object(ndarray[float64_t, ndim=2] out, +def group_count_bin_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, ndarray[int64_t] bins): diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index 2b6c6f55e5776..6a444d0a09af7 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -232,24 +232,46 @@ def f(): labels = labels.take(np.random.permutation(len(labels))) """ -groupby_first = Benchmark('data.groupby(labels).first()', setup, +groupby_first_float64 = Benchmark('data.groupby(labels).first()', setup, start_date=datetime(2012, 5, 1)) groupby_first_float32 = Benchmark('data2.groupby(labels).first()', setup, start_date=datetime(2013, 1, 1)) -groupby_last = Benchmark('data.groupby(labels).last()', setup, +groupby_last_float64 = Benchmark('data.groupby(labels).last()', setup, start_date=datetime(2012, 5, 1)) groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup, start_date=datetime(2013, 1, 1)) +groupby_nth_float64 = Benchmark('data.groupby(labels).nth(0)', setup, + start_date=datetime(2012, 5, 1)) + +groupby_nth_float32 = Benchmark('data2.groupby(labels).nth(0)', setup, + start_date=datetime(2013, 1, 1)) + # with datetimes (GH7555) setup = common_setup + """ df = DataFrame({'a' : date_range('1/1/2011',periods=100000,freq='s'),'b' : range(100000)}) """ -groupby_mixed_first = Benchmark('df.groupby("b").first()', setup, +groupby_first_datetimes = Benchmark('df.groupby("b").first()', setup, + start_date=datetime(2013, 5, 1)) +groupby_last_datetimes = Benchmark('df.groupby("b").last()', setup, + start_date=datetime(2013, 5, 1)) +groupby_nth_datetimes = Benchmark('df.groupby("b").nth(0)', setup, + start_date=datetime(2013, 5, 1)) + +# with object +setup = common_setup + """ +df = DataFrame({'a' : ['foo']*100000,'b' : range(100000)}) +""" + +groupby_first_object = Benchmark('df.groupby("b").first()', setup, + start_date=datetime(2013, 5, 1)) +groupby_last_object = Benchmark('df.groupby("b").last()', setup, + start_date=datetime(2013, 5, 1)) +groupby_nth_object = Benchmark('df.groupby("b").nth(0)', setup, start_date=datetime(2013, 5, 1)) #----------------------------------------------------------------------