From d27e03eab3b0d299f2a692af77094b221ada0005 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Thu, 31 May 2018 21:48:00 -0400 Subject: [PATCH 1/6] BUG: Using DatetimeIndex.date with timezone returns incorrect date #21230 --- doc/source/whatsnew/v0.23.1.txt | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/indexes/datetimes/test_datetime.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index f2bc81eea186b..b263cf6993b03 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -85,7 +85,7 @@ Indexing - Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`) - Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`) - Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`) -- +- Bug in :attr:`DatetimeIndex.date` where an incorrect date is returned when the input date has a timezone (:issue:`21230`) I/O ^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 83950f1d71633..1b18b264749e5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2040,7 +2040,7 @@ def date(self): Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). """ - return libts.ints_to_pydatetime(self.normalize().asi8, box="date") + return libts.ints_to_pydatetime(self.asi8, box="date") def normalize(self): """ diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 1a5f12103595c..aa4c81b0648f6 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -27,6 +27,21 @@ def test_roundtrip_pickle_with_tz(self): unpickled = tm.round_trip_pickle(index) tm.assert_index_equal(index, unpickled) + def test_date_accessor_with_tz(self): + # GH 21230 + from datetime import date + index = DatetimeIndex(['2013-01-24 15:01:00+01:00'], + dtype='datetime64[ns, CET]', freq=None) + + assert index.date == np.array(date(2013, 1, 24)) + + def test_date_accessor_without_tz(self): + # GH 21230 + from datetime import date + index = DatetimeIndex(['2013-01-24 15:01:00+01:00'], freq=None) + + assert index.date == np.array(date(2013, 1, 24)) + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 index = date_range('20130101', periods=3, tz='US/Eastern') From d962273aeee8ecb366a1e2a3266bad765d3af6c0 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Fri, 1 Jun 2018 21:28:31 -0400 Subject: [PATCH 2/6] Use _local_timestamps() for tz-aware DTI and parametrize test --- pandas/core/indexes/datetimes.py | 6 +++++- .../tests/indexes/datetimes/test_datetime.py | 19 ++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1b18b264749e5..6dedc35c99109 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2040,7 +2040,11 @@ def date(self): Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). """ - return libts.ints_to_pydatetime(self.asi8, box="date") + if (self.tz is None): + return libts.ints_to_pydatetime(self.normalize().asi8, box="date") + else: + return libts.ints_to_pydatetime( + self.normalize()._local_timestamps(), box="date") def normalize(self): """ diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index aa4c81b0648f6..8c13636b94be5 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -27,20 +27,17 @@ def test_roundtrip_pickle_with_tz(self): unpickled = tm.round_trip_pickle(index) tm.assert_index_equal(index, unpickled) - def test_date_accessor_with_tz(self): - # GH 21230 - from datetime import date - index = DatetimeIndex(['2013-01-24 15:01:00+01:00'], - dtype='datetime64[ns, CET]', freq=None) - - assert index.date == np.array(date(2013, 1, 24)) - - def test_date_accessor_without_tz(self): + @pytest.mark.parametrize("test_input", [ + DatetimeIndex(['2013-01-24 15:01:00']), + DatetimeIndex(['2013-01-24 15:01:00'], dtype='datetime64[ns, CET]'), + DatetimeIndex(['2013-01-24 15:01:00'], dtype='datetime64[ns, EST]'), + DatetimeIndex(['2013-01-24 15:01:00'], dtype='datetime64[ns, UTC]') + ]) + def test_date_accessor(self, test_input): # GH 21230 from datetime import date - index = DatetimeIndex(['2013-01-24 15:01:00+01:00'], freq=None) - assert index.date == np.array(date(2013, 1, 24)) + assert test_input.date == np.array(date(2013, 1, 24)) def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 From f45a81928aab089472774118c20fb04a1030ba84 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Sun, 3 Jun 2018 00:24:17 -0400 Subject: [PATCH 3/6] Remove redudant code --- pandas/core/indexes/datetimes.py | 4 ++-- pandas/tests/indexes/datetimes/test_datetime.py | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 6dedc35c99109..f7459b4414805 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2041,10 +2041,10 @@ def date(self): part of Timestamps without timezone information). """ if (self.tz is None): - return libts.ints_to_pydatetime(self.normalize().asi8, box="date") + return libts.ints_to_pydatetime(self.asi8, box="date") else: return libts.ints_to_pydatetime( - self.normalize()._local_timestamps(), box="date") + self._local_timestamps(), box="date") def normalize(self): """ diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 8c13636b94be5..a988035a5a312 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -27,17 +27,16 @@ def test_roundtrip_pickle_with_tz(self): unpickled = tm.round_trip_pickle(index) tm.assert_index_equal(index, unpickled) - @pytest.mark.parametrize("test_input", [ - DatetimeIndex(['2013-01-24 15:01:00']), - DatetimeIndex(['2013-01-24 15:01:00'], dtype='datetime64[ns, CET]'), - DatetimeIndex(['2013-01-24 15:01:00'], dtype='datetime64[ns, EST]'), - DatetimeIndex(['2013-01-24 15:01:00'], dtype='datetime64[ns, UTC]') + @pytest.mark.parametrize("dtype", [ + None, 'datetime64[ns, CET]', + 'datetime64[ns, EST]', 'datetime64[ns, UTC]' ]) - def test_date_accessor(self, test_input): + def test_date_accessor(self, dtype): # GH 21230 - from datetime import date - assert test_input.date == np.array(date(2013, 1, 24)) + index = DatetimeIndex(['2013-01-24 15:01:00'], dtype=dtype) + tm.assert_numpy_array_equal(index.date, + np.array([date(2013, 1, 24)], ndmin=1)) def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 From f09e8a4b9ab409756d7f370945bce03f9b9ca9ab Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Mon, 4 Jun 2018 22:27:43 -0400 Subject: [PATCH 4/6] Use _local_timestamps() only for non-UTC DTI, fix documentation, and change test location --- pandas/core/indexes/datetimes.py | 13 +++++++++---- pandas/tests/indexes/datetimes/test_datetime.py | 11 ----------- pandas/tests/indexes/datetimes/test_timezones.py | 15 ++++++++++++++- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f7459b4414805..596cc4c15e147 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2040,11 +2040,16 @@ def date(self): Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). """ - if (self.tz is None): - return libts.ints_to_pydatetime(self.asi8, box="date") + + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + if (self.tz is not None and self.tz is not utc): + timestamps = self._local_timestamps() else: - return libts.ints_to_pydatetime( - self._local_timestamps(), box="date") + timestamps = self.asi8 + + return libts.ints_to_pydatetime(timestamps, box="date") def normalize(self): """ diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index a988035a5a312..1a5f12103595c 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -27,17 +27,6 @@ def test_roundtrip_pickle_with_tz(self): unpickled = tm.round_trip_pickle(index) tm.assert_index_equal(index, unpickled) - @pytest.mark.parametrize("dtype", [ - None, 'datetime64[ns, CET]', - 'datetime64[ns, EST]', 'datetime64[ns, UTC]' - ]) - def test_date_accessor(self, dtype): - # GH 21230 - - index = DatetimeIndex(['2013-01-24 15:01:00'], dtype=dtype) - tm.assert_numpy_array_equal(index.date, - np.array([date(2013, 1, 24)], ndmin=1)) - def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 index = date_range('20130101', periods=3, tz='US/Eastern') diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 09210d8b64d1b..f27ab968712c6 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -2,7 +2,7 @@ """ Tests for DatetimeIndex timezone-related methods """ -from datetime import datetime, timedelta, tzinfo +from datetime import datetime, timedelta, tzinfo, date from distutils.version import LooseVersion import pytest @@ -706,6 +706,19 @@ def test_join_utc_convert(self, join_type): assert isinstance(result, DatetimeIndex) assert result.tz.zone == 'UTC' + @pytest.mark.parametrize("dtype", [ + None, 'datetime64[ns, CET]', + 'datetime64[ns, EST]', 'datetime64[ns, UTC]' + ]) + def test_date_accessor(self, dtype): + # Regression test for GH#21230 + expected = np.array([date(2018, 6, 4), pd.NaT], ndmin=1) + + index = DatetimeIndex(['2018-06-04 10:00:00', pd.NaT], dtype=dtype) + result = index.date + + tm.assert_numpy_array_equal(result, expected) + def test_dti_drop_dont_lose_tz(self): # GH#2621 ind = date_range("2012-12-01", periods=10, tz="utc") From ab81279bb1cd22854e17880d1abef65e7215a737 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Tue, 5 Jun 2018 18:15:14 -0400 Subject: [PATCH 5/6] Fix bug where DTI.time returns a tz-aware Time instead of tz-naive --- doc/source/whatsnew/v0.23.1.txt | 1 + pandas/_libs/tslib.pyx | 2 +- pandas/core/indexes/datetimes.py | 11 ++++++++++- pandas/tests/indexes/datetimes/test_timezones.py | 15 ++++++++++++++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 1528f57c213d2..3a785c7adbdd7 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -89,6 +89,7 @@ Indexing - Bug in :attr:`DatetimeIndex.date` where an incorrect date is returned when the input date has a non-UTC timezone (:issue:`21230`) - Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`) - Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`) +- Bug in :attr:`DatetimeIndex.time` where given a tz-aware Timestamp, a tz-aware Time is returned instead of tz-naive (:issue:`21267`) - I/O diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 17453d8af1297..0f58cfa761f21 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -77,7 +77,7 @@ cdef inline object create_time_from_ts( int64_t value, pandas_datetimestruct dts, object tz, object freq): """ convenience routine to construct a datetime.time from its parts """ - return time(dts.hour, dts.min, dts.sec, dts.us, tz) + return time(dts.hour, dts.min, dts.sec, dts.us) def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 596cc4c15e147..0ddf33cdcae73 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2032,7 +2032,16 @@ def time(self): """ Returns numpy array of datetime.time. The time part of the Timestamps. """ - return libts.ints_to_pydatetime(self.asi8, self.tz, box="time") + + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + if (self.tz is not None and self.tz is not utc): + timestamps = self._local_timestamps() + else: + timestamps = self.asi8 + + return libts.ints_to_pydatetime(timestamps, box="time") @property def date(self): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index f27ab968712c6..e73a56e43a018 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -2,7 +2,7 @@ """ Tests for DatetimeIndex timezone-related methods """ -from datetime import datetime, timedelta, tzinfo, date +from datetime import datetime, timedelta, tzinfo, date, time from distutils.version import LooseVersion import pytest @@ -719,6 +719,19 @@ def test_date_accessor(self, dtype): tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("dtype", [ + None, 'datetime64[ns, CET]', + 'datetime64[ns, EST]', 'datetime64[ns, UTC]' + ]) + def test_time_accessor(self, dtype): + # Regression test for GH#21267 + expected = np.array([time(10, 20, 30), pd.NaT], ndmin=1) + + index = DatetimeIndex(['2018-06-04 10:20:30', pd.NaT], dtype=dtype) + result = index.time + + tm.assert_numpy_array_equal(result, expected) + def test_dti_drop_dont_lose_tz(self): # GH#2621 ind = date_range("2012-12-01", periods=10, tz="utc") From 3b15060ab9d6211dbe11a33be3fd435735ebd025 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Wed, 6 Jun 2018 21:16:36 -0400 Subject: [PATCH 6/6] Remove redundant code --- pandas/tests/indexes/datetimes/test_timezones.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index e73a56e43a018..573940edaa08f 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -712,7 +712,7 @@ def test_join_utc_convert(self, join_type): ]) def test_date_accessor(self, dtype): # Regression test for GH#21230 - expected = np.array([date(2018, 6, 4), pd.NaT], ndmin=1) + expected = np.array([date(2018, 6, 4), pd.NaT]) index = DatetimeIndex(['2018-06-04 10:00:00', pd.NaT], dtype=dtype) result = index.date @@ -725,7 +725,7 @@ def test_date_accessor(self, dtype): ]) def test_time_accessor(self, dtype): # Regression test for GH#21267 - expected = np.array([time(10, 20, 30), pd.NaT], ndmin=1) + expected = np.array([time(10, 20, 30), pd.NaT]) index = DatetimeIndex(['2018-06-04 10:20:30', pd.NaT], dtype=dtype) result = index.time