diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 6a85bfd852e19..bd20144268436 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -240,7 +240,7 @@ Timedelta Timezones ^^^^^^^^^ - +- Bug in different ``tzinfo`` objects representing UTC not being treated as equivalent (:issue:`39216`) - - diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 7723140e3eab1..f08b86aa63574 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -24,6 +24,7 @@ "to_offset", "Tick", "BaseOffset", + "tz_compare", ] from . import dtypes @@ -35,6 +36,7 @@ from .period import IncompatibleFrequency, Period from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta from .timestamps import Timestamp +from .timezones import tz_compare from .tzconversion import tz_convert_from_utc_single from .vectorized import ( dt64arr_to_periodarr, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index df4677a242758..5a1db4eee91be 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -647,14 +647,12 @@ cdef class _Timestamp(ABCTimestamp): try: stamp += self.strftime('%z') - if self.tzinfo: - zone = get_timezone(self.tzinfo) except ValueError: year2000 = self.replace(year=2000) stamp += year2000.strftime('%z') - if self.tzinfo: - zone = get_timezone(self.tzinfo) + if self.tzinfo: + zone = get_timezone(self.tzinfo) try: stamp += zone.strftime(' %%Z') except AttributeError: diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 3deabc57ec522..0449a8dac51fa 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -341,6 +341,12 @@ cpdef bint tz_compare(tzinfo start, tzinfo end): bool """ # GH 18523 + if is_utc(start): + # GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent + return is_utc(end) + elif is_utc(end): + # Ensure we don't treat tzlocal as equal to UTC when running in UTC + return False return get_timezone(start) == get_timezone(end) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0941967ef6bee..6d073a34220f4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -35,8 +35,8 @@ conversion, iNaT, ints_to_pydatetime, + tz_compare, ) -from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar from pandas.util._validators import validate_bool_kwarg diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index cefab33976ba8..6efcf9e6c8416 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -21,8 +21,16 @@ import pytz from pandas._libs.interval import Interval -from pandas._libs.tslibs import NaT, Period, Timestamp, dtypes, timezones, to_offset -from pandas._libs.tslibs.offsets import BaseOffset +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + Period, + Timestamp, + dtypes, + timezones, + to_offset, + tz_compare, +) from pandas._typing import Dtype, DtypeObj, Ordered from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype @@ -764,7 +772,7 @@ def __eq__(self, other: Any) -> bool: return ( isinstance(other, DatetimeTZDtype) and self.unit == other.unit - and str(self.tz) == str(other.tz) + and tz_compare(self.tz, other.tz) ) def __setstate__(self, state) -> None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0b46b43514d92..f44789456eb08 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -28,8 +28,12 @@ from pandas._libs import algos as libalgos, index as libindex, lib import pandas._libs.join as libjoin from pandas._libs.lib import is_datetime_array, no_default -from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime, Timestamp -from pandas._libs.tslibs.timezones import tz_compare +from pandas._libs.tslibs import ( + IncompatibleFrequency, + OutOfBoundsDatetime, + Timestamp, + tz_compare, +) from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Shape, final from pandas.compat.numpy import function as nv from pandas.errors import DuplicateLabelError, InvalidIndexError diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 294abafa86812..89b45890458c5 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas._libs.tslibs import NaT +from pandas._libs.tslibs import NaT, tz_compare from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( @@ -431,7 +431,7 @@ def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, tz_aware_fix # filling datetimetz with datetimetz casts to object, unless tz matches exp_val_for_scalar = fill_value - if dtype.tz == fill_dtype.tz: + if tz_compare(dtype.tz, fill_dtype.tz): expected_dtype = dtype else: expected_dtype = np.dtype(object) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 69a9a81e66ac4..0bcb37d4880a6 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -727,10 +727,18 @@ def test_fillna_method_and_limit_invalid(self): def test_fillna_datetime64_with_timezone_tzinfo(self): # https://github.com/pandas-dev/pandas/issues/38851 - s = Series(date_range("2020", periods=3, tz="UTC")) - expected = s.astype(object) - s[1] = NaT - result = s.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc)) + # different tzinfos representing UTC treated as equal + ser = Series(date_range("2020", periods=3, tz="UTC")) + expected = ser.copy() + ser[1] = NaT + result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc)) + tm.assert_series_equal(result, expected) + + # but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent + ts = Timestamp("2000-01-01", tz="US/Pacific") + ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific")) + result = ser2.fillna(ts) + expected = Series([ser[0], ts, ser[2]], dtype=object) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index eca444c9ceb34..4ded555ed8f73 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -49,6 +49,7 @@ def test_namespace(): "localize_pydatetime", "tz_convert_from_utc_single", "to_offset", + "tz_compare", ] expected = set(submodules + api) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index e49f511fe3cc4..3e825460d6ddd 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -48,6 +48,14 @@ def test_tzlocal_offset(): assert ts.value + offset == Timestamp("2011-01-01").value +def test_tzlocal_is_not_utc(): + # even if the machine running the test is localized to UTC + tz = dateutil.tz.tzlocal() + assert not timezones.is_utc(tz) + + assert not timezones.tz_compare(tz, dateutil.tz.tzutc()) + + @pytest.fixture( params=[ (pytz.timezone("US/Eastern"), lambda tz, x: tz.localize(x)),