From 611bbc594ce3de0f3b1232da9176c6d266f84b76 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 18 Jun 2015 13:44:38 -0400 Subject: [PATCH 1/2] PERF: parse timedelta strings in cython #6755 --- doc/source/whatsnew/v0.17.0.txt | 2 + pandas/tseries/tests/test_timedeltas.py | 5 +- pandas/tseries/timedeltas.py | 153 +------------ pandas/tslib.pyx | 279 +++++++++++++++++++++++- 4 files changed, 278 insertions(+), 161 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 6f7e9bce0a3a6..8079c78562141 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -54,6 +54,8 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- 4x improvement in ``timedelta`` string parsing (:issue:`6755`) + .. _whatsnew_0170.bug_fixes: Bug Fixes diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 948a0be91b276..565760b545961 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -109,6 +109,9 @@ def test_construction(self): # currently invalid as it has a - on the hhmmdd part (only allowed on the days) self.assertRaises(ValueError, lambda : Timedelta('-10 days -1 h 1.5m 1s 3us')) + # only leading neg signs are allowed + self.assertRaises(ValueError, lambda : Timedelta('10 days -1 h 1.5m 1s 3us')) + # roundtripping both for string and value for v in ['1s', '-1s', @@ -151,7 +154,7 @@ def test_construction(self): "cannot construct a TimeDelta", lambda : Timedelta()) tm.assertRaisesRegexp(ValueError, - "cannot create timedelta string convert", + "unit abbreviation w/o a number", lambda : Timedelta('foo')) tm.assertRaisesRegexp(ValueError, "cannot construct a TimeDelta from the passed arguments, allowed keywords are ", diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 624981c5536f5..6eeb8e805e4bc 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -46,10 +46,7 @@ def _convert_listlike(arg, box, unit): except: # try to process strings fast; may need to fallback - try: - value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]') - except: - value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ]) + value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ]) value = value.astype('timedelta64[ns]', copy=False) if box: @@ -95,15 +92,6 @@ def _convert_listlike(arg, box, unit): 'NS' : 'ns', 'ns' : 'ns', } -_unit_scale = { - 'd' : 86400*1e9, - 'h' : 3600*1e9, - 'm' : 60*1e9, - 's' : 1e9, - 'ms' : 1e6, - 'us' : 1e3, - 'ns' : 1, - } def _validate_timedelta_unit(arg): """ provide validation / translation for timedelta short units """ @@ -114,150 +102,11 @@ def _validate_timedelta_unit(arg): return 'ns' raise ValueError("invalid timedelta unit {0} provided".format(arg)) -_short_search = re.compile( - "^\s*(?P-?)\s*(?P\d*\.?\d*)\s*(?Pd|s|ms|us|ns)?\s*$",re.IGNORECASE) -_full_search = re.compile( - "^\s*(?P-?)\s*(?P\d*?\.?\d*?)?\s*(days|d|day)?,?\s*\+?(?P