Skip to content

PERF: perf improvements in timedelta conversions from integer dtypes #6754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 1, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ Improvements to existing features
- Performance improvement when converting ``DatetimeIndex`` to floating ordinals
using ``DatetimeConverter`` (:issue:`6636`)
- Performance improvement for ``DataFrame.shift`` (:issue:`5609`)
- Performance improvements in timedelta conversions for integer dtypes (:issue:`6754`)

.. _release.bug_fixes-0.14.0:

Expand Down
10 changes: 10 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2130,6 +2130,16 @@ def is_timedelta64_dtype(arr_or_dtype):
return issubclass(tipo, np.timedelta64)


def is_timedelta64_ns_dtype(arr_or_dtype):
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype.type
elif isinstance(arr_or_dtype, type):
tipo = np.dtype(arr_or_dtype).type
else:
tipo = arr_or_dtype.dtype.type
return tipo == _TD_DTYPE


def needs_i8_conversion(arr_or_dtype):
return (is_datetime64_dtype(arr_or_dtype) or
is_timedelta64_dtype(arr_or_dtype))
Expand Down
30 changes: 30 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,36 @@ def conv(v):
expected = np.timedelta64(timedelta(seconds=1))
self.assertEqual(result, expected)

# arrays of various dtypes
arr = np.array([1]*5,dtype='int64')
result = to_timedelta(arr,unit='s')
expected = Series([ np.timedelta64(1,'s') ]*5)
tm.assert_series_equal(result, expected)

arr = np.array([1]*5,dtype='int64')
result = to_timedelta(arr,unit='m')
expected = Series([ np.timedelta64(1,'m') ]*5)
tm.assert_series_equal(result, expected)

arr = np.array([1]*5,dtype='int64')
result = to_timedelta(arr,unit='h')
expected = Series([ np.timedelta64(1,'h') ]*5)
tm.assert_series_equal(result, expected)

arr = np.array([1]*5,dtype='timedelta64[s]')
result = to_timedelta(arr)
expected = Series([ np.timedelta64(1,'s') ]*5)
tm.assert_series_equal(result, expected)

arr = np.array([1]*5,dtype='timedelta64[D]')
result = to_timedelta(arr)
expected = Series([ np.timedelta64(1,'D') ]*5)
tm.assert_series_equal(result, expected)

# these will error
self.assertRaises(ValueError, lambda : to_timedelta(['1h']))
self.assertRaises(ValueError, lambda : to_timedelta(['1m']))

def test_to_timedelta_via_apply(self):
_skip_if_numpy_not_friendly()

Expand Down
26 changes: 15 additions & 11 deletions pandas/tseries/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pandas.tslib as tslib
from pandas import compat, _np_version_under1p7
from pandas.core.common import (ABCSeries, is_integer, is_timedelta64_dtype,
from pandas.core.common import (ABCSeries, is_integer, is_integer_dtype, is_timedelta64_dtype,
_values_from_object, is_list_like, isnull)

repr_timedelta = tslib.repr_timedelta64
Expand All @@ -23,7 +23,7 @@ def to_timedelta(arg, box=True, unit='ns'):
arg : string, timedelta, array of strings (with possible NAs)
box : boolean, default True
If True returns a Series of the results, if False returns ndarray of values
unit : unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer/float number
unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number

Returns
-------
Expand All @@ -32,18 +32,22 @@ def to_timedelta(arg, box=True, unit='ns'):
if _np_version_under1p7:
raise ValueError("to_timedelta is not support for numpy < 1.7")

def _convert_listlike(arg, box):
def _convert_listlike(arg, box, unit):

if isinstance(arg, (list,tuple)):
arg = np.array(arg, dtype='O')

if is_timedelta64_dtype(arg):
if box:
from pandas import Series
return Series(arg,dtype='m8[ns]')
return arg
value = arg.astype('timedelta64[ns]')
elif is_integer_dtype(arg):
# these are shortcutable
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
else:
try:
value = tslib.array_to_timedelta64(_ensure_object(arg),unit=unit)
except:
value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ])

value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ])
if box:
from pandas import Series
value = Series(value,dtype='m8[ns]')
Expand All @@ -53,10 +57,10 @@ def _convert_listlike(arg, box):
return arg
elif isinstance(arg, ABCSeries):
from pandas import Series
values = _convert_listlike(arg.values, box=False)
values = _convert_listlike(arg.values, box=False, unit=unit)
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
elif is_list_like(arg):
return _convert_listlike(arg, box=box)
return _convert_listlike(arg, box=box, unit=unit)

# ...so it must be a scalar value. Return scalar.
return _coerce_scalar_to_timedelta_type(arg, unit=unit)
Expand Down Expand Up @@ -139,7 +143,7 @@ def convert(r=None, unit=None, m=m):
return convert

# no converter
raise ValueError("cannot create timedelta string converter")
raise ValueError("cannot create timedelta string converter for [{0}]".format(r))

def _possibly_cast_to_timedelta(value, coerce=True):
""" try to cast to timedelta64, if already a timedeltalike, then make
Expand Down
1 change: 1 addition & 0 deletions vb_suite/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
'reshape',
'stat_ops',
'timeseries',
'timedelta',
'eval']

by_module = {}
Expand Down
32 changes: 32 additions & 0 deletions vb_suite/timedelta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from vbench.api import Benchmark
from datetime import datetime

common_setup = """from pandas_vb_common import *
from pandas import to_timedelta
"""

#----------------------------------------------------------------------
# conversion

setup = common_setup + """
arr = np.random.randint(0,1000,size=10000)
"""

stmt = "to_timedelta(arr,unit='s')"
timedelta_convert_int = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))

setup = common_setup + """
arr = np.random.randint(0,1000,size=10000)
arr = [ '{0} days'.format(i) for i in arr ]
"""

stmt = "to_timedelta(arr)"
timedelta_convert_string = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))

setup = common_setup + """
arr = np.random.randint(0,60,size=10000)
arr = [ '00:00:{0:02d}'.format(i) for i in arr ]
"""

stmt = "to_timedelta(arr)"
timedelta_convert_string_seconds = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))
2 changes: 1 addition & 1 deletion vb_suite/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def date_range(start=None, end=None, periods=None, freq=None):
"""

datetimeindex_converter = \
Benchmark('DatetimeConverter.convert(rng, None, None)',
Benchmark('DatetimeConverter.convert(rng, None, None)',
setup, start_date=datetime(2013, 1, 1))

# Adding custom business day
Expand Down