diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index a5734dc1db200..55e2d426a2303 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -24,10 +24,15 @@ New features +.. _whatsnew_0182.enhancements.other: +Other enhancements +^^^^^^^^^^^^^^^^^^ - +- The ``tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, + so you can silently ignore nonexistent timestamps and replace them with ``NaT`` (``errors='coerce'``). + The default behaviour is still raising a ``NonExistentTimeError`` (``errors='raise'``) (:issue:`13057`) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 746163069d3e8..25d3490873542 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1812,7 +1812,7 @@ def tz_convert(self, tz): @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', mapping={True: 'infer', False: 'raise'}) - def tz_localize(self, tz, ambiguous='raise'): + def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ Localize tz-naive DatetimeIndex to given time zone (using pytz/dateutil), or remove timezone from tz-aware DatetimeIndex @@ -1832,6 +1832,15 @@ def tz_localize(self, tz, ambiguous='raise'): - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + errors : 'raise', 'coerce', default 'raise' + - 'raise' will raise a NonExistentTimeError if a timestamp is not + valid in the specified timezone (e.g. due to a transition from + or to DST time) + - 'coerce' will return NaT if the timestamp can not be converted + into the specified timezone + + .. versionadded:: 0.18.2 + infer_dst : boolean, default False (DEPRECATED) Attempt to infer fall dst-transition hours based on order @@ -1854,7 +1863,8 @@ def tz_localize(self, tz, ambiguous='raise'): # Convert to UTC new_dates = tslib.tz_localize_to_utc(self.asi8, tz, - ambiguous=ambiguous) + ambiguous=ambiguous, + errors=errors) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 3961a8b99b4dd..e6d16636e9dcc 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -533,6 +533,23 @@ def test_ambiguous_nat(self): di_test = DatetimeIndex(times, tz='US/Eastern') self.assert_numpy_array_equal(di_test, localized) + def test_nonexistent_raise_coerce(self): + # See issue 13057 + from pytz.exceptions import NonExistentTimeError + times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00'] + index = DatetimeIndex(times) + tz = 'US/Eastern' + self.assertRaises(NonExistentTimeError, + index.tz_localize, tz=tz) + self.assertRaises(NonExistentTimeError, + index.tz_localize, tz=tz, errors='raise') + result = index.tz_localize(tz=tz, errors='coerce') + test_times = ['2015-03-08 01:00-05:00', 'NaT', + '2015-03-08 03:00-04:00'] + expected = DatetimeIndex(test_times)\ + .tz_localize('UTC').tz_convert('US/Eastern') + tm.assert_index_equal(result, expected) + # test utility methods def test_infer_tz(self): eastern = self.tz('US/Eastern') diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index b2311bf4d6661..b2550d28e7097 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -319,6 +319,29 @@ def test_tz_localize_ambiguous(self): 'tz_localize to localize'): Timestamp('2011-01-01').tz_convert('Asia/Tokyo') + def test_tz_localize_nonexistent(self): + # See issue 13057 + from pytz.exceptions import NonExistentTimeError + times = ['2015-03-08 02:00', '2015-03-08 02:30', + '2015-03-29 02:00', '2015-03-29 02:30'] + timezones = ['US/Eastern', 'US/Pacific', + 'Europe/Paris', 'Europe/Belgrade'] + for t, tz in zip(times, timezones): + ts = Timestamp(t) + self.assertRaises(NonExistentTimeError, ts.tz_localize, + tz) + self.assertRaises(NonExistentTimeError, ts.tz_localize, + tz, errors='raise') + self.assertIs(ts.tz_localize(tz, errors='coerce'), + pd.NaT) + + def test_tz_localize_errors_ambiguous(self): + # See issue 13057 + from pytz.exceptions import AmbiguousTimeError + ts = pd.Timestamp('2015-11-1 01:00') + self.assertRaises(AmbiguousTimeError, + ts.tz_localize, 'US/Pacific', errors='coerce') + def test_tz_localize_roundtrip(self): for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: for t in ['2014-02-01 09:00', '2014-07-08 09:00', diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 261997122988b..a240558025090 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -460,7 +460,7 @@ class Timestamp(_Timestamp): def is_year_end(self): return self._get_start_end_field('is_year_end') - def tz_localize(self, tz, ambiguous='raise'): + def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -475,6 +475,14 @@ class Timestamp(_Timestamp): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + errors : 'raise', 'coerce', default 'raise' + - 'raise' will raise a NonExistentTimeError if a timestamp is not + valid in the specified timezone (e.g. due to a transition from + or to DST time) + - 'coerce' will return NaT if the timestamp can not be converted + into the specified timezone + + .. versionadded:: 0.18.2 Returns ------- @@ -494,7 +502,7 @@ class Timestamp(_Timestamp): if not isinstance(ambiguous, basestring): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value],dtype='i8'), tz, - ambiguous=ambiguous)[0] + ambiguous=ambiguous, errors=errors)[0] return Timestamp(value, tz=tz) else: if tz is None: @@ -3943,7 +3951,8 @@ cpdef ndarray _unbox_utcoffsets(object transinfo): @cython.boundscheck(False) @cython.wraparound(False) -def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None): +def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, + object errors='raise'): """ Localize tzinfo-naive DateRange to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -3960,9 +3969,12 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None): ndarray[int64_t] result, result_a, result_b, dst_hours pandas_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False + bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' # Vectorized version of DstTzInfo.localize + assert is_coerce or is_raise + if not have_pytz: raise Exception("Could not find pytz module") @@ -4092,8 +4104,11 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None): elif right != NPY_NAT: result[i] = right else: - stamp = Timestamp(vals[i]) - raise pytz.NonExistentTimeError(stamp) + if is_coerce: + result[i] = NPY_NAT + else: + stamp = Timestamp(vals[i]) + raise pytz.NonExistentTimeError(stamp) return result