Skip to content

Commit

Permalink
API: default to stdlib timezone objects for fixed-offsets (#49677)
Browse files Browse the repository at this point in the history
* API: default to stdlib timezone objects for fixed-offsets

* update docstrings

* flesh out whatsnew

* handle strings

* skip on windows
  • Loading branch information
jbrockmendel authored Dec 13, 2022
1 parent 5ee4dac commit 5a372d8
Show file tree
Hide file tree
Showing 26 changed files with 175 additions and 94 deletions.
34 changes: 34 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,40 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or
ser.astype("timedelta64[s]")
ser.astype("timedelta64[D]")
.. _whatsnew_200.api_breaking.default_to_stdlib_tzinfos:

UTC and fixed-offset timezones default to standard-library tzinfo objects
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In previous versions, the default ``tzinfo`` object used to represent UTC
was ``pytz.UTC``. In pandas 2.0, we default to ``datetime.timezone.utc`` instead.
Similarly, for timezones represent fixed UTC offsets, we use ``datetime.timezone``
objects instead of ``pytz.FixedOffset`` objects. See (:issue:`34916`)

*Previous behavior*:

.. code-block:: ipython
In [2]: ts = pd.Timestamp("2016-01-01", tz="UTC")
In [3]: type(ts.tzinfo)
Out[3]: pytz.UTC
In [4]: ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
In [3]: type(ts2.tzinfo)
Out[5]: pytz._FixedOffset
*New behavior*:

.. ipython:: python
ts = pd.Timestamp("2016-01-01", tz="UTC")
type(ts.tzinfo)
ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
type(ts2.tzinfo)
For timezones that are neither UTC nor fixed offsets, e.g. "US/Pacific", we
continue to default to ``pytz`` objects.

.. _whatsnew_200.api_breaking.zero_len_indexes:

Empty DataFrames/Series will now default to have a ``RangeIndex``
Expand Down
14 changes: 8 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
cimport cython

from datetime import timezone

from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
datetime,
import_datetime,
timedelta,
tzinfo,
)
from cpython.object cimport PyObject
Expand All @@ -23,8 +27,6 @@ import numpy as np

cnp.import_array()

import pytz

from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
Expand Down Expand Up @@ -95,7 +97,7 @@ def _test_parse_iso8601(ts: str):
obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo)
return Timestamp(obj.value, tz=obj.tzinfo)
else:
Expand Down Expand Up @@ -460,7 +462,7 @@ cpdef array_to_datetime(
2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
is encountered
Also returns a pytz.FixedOffset if an array of strings with the same
Also returns a fixed-offset tzinfo object if an array of strings with the same
timezone offset is passed and utc=True is not passed. Otherwise, None
is returned
Expand Down Expand Up @@ -650,7 +652,7 @@ cpdef array_to_datetime(
# since we store the total_seconds of
# dateutil.tz.tzoffset objects
out_tzoffset_vals.add(out_tzoffset * 60.)
tz = pytz.FixedOffset(out_tzoffset)
tz = timezone(timedelta(minutes=out_tzoffset))
value = tz_localize_to_utc_single(value, tz)
out_local = 0
out_tzoffset = 0
Expand Down Expand Up @@ -718,7 +720,7 @@ cpdef array_to_datetime(
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
else:
tz_offset = out_tzoffset_vals.pop()
tz_out = pytz.FixedOffset(tz_offset / 60.)
tz_out = timezone(timedelta(seconds=tz_offset))
return result, tz_out


Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ from numpy cimport (

cnp.import_array()

import pytz

# stdlib datetime imports

from datetime import timezone

from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
datetime,
import_datetime,
time,
timedelta,
tzinfo,
)

Expand Down Expand Up @@ -428,7 +429,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,

value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
obj.dts = dts
obj.tzinfo = pytz.FixedOffset(tzoffset)
obj.tzinfo = timezone(timedelta(minutes=tzoffset))
obj.value = tz_localize_to_utc_single(value, obj.tzinfo)
if tz is None:
check_overflows(obj, NPY_FR_ns)
Expand Down
21 changes: 12 additions & 9 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Strptime-related classes and functions.
"""
from datetime import timezone

from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
date,
import_datetime,
timedelta,
tzinfo,
)

Expand Down Expand Up @@ -96,7 +99,7 @@ def array_strptime(
int week_of_year, week_of_year_start, parse_code, ordinal
int iso_week, iso_year
int64_t us, ns
object val, group_key, ampm, found, timezone
object val, group_key, ampm, found, tz
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
Expand Down Expand Up @@ -214,7 +217,7 @@ def array_strptime(
year = 1900
month = day = 1
hour = minute = second = ns = us = 0
timezone = None
tz = None
# Default to -1 to signify that values not known; not critical to have,
# though
iso_week = week_of_year = -1
Expand Down Expand Up @@ -304,9 +307,9 @@ def array_strptime(
# W starts week on Monday.
week_of_year_start = 0
elif parse_code == 17:
timezone = pytz.timezone(found_dict["Z"])
tz = pytz.timezone(found_dict["Z"])
elif parse_code == 19:
timezone = parse_timezone_directive(found_dict["z"])
tz = parse_timezone_directive(found_dict["z"])
elif parse_code == 20:
iso_year = int(found_dict["G"])
elif parse_code == 21:
Expand Down Expand Up @@ -388,7 +391,7 @@ def array_strptime(
continue
raise

result_timezone[i] = timezone
result_timezone[i] = tz

return result, result_timezone.base

Expand Down Expand Up @@ -538,15 +541,15 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday)

cdef tzinfo parse_timezone_directive(str z):
"""
Parse the '%z' directive and return a pytz.FixedOffset
Parse the '%z' directive and return a datetime.timezone object.
Parameters
----------
z : string of the UTC offset
Returns
-------
pytz.FixedOffset
datetime.timezone
Notes
-----
Expand All @@ -560,7 +563,7 @@ cdef tzinfo parse_timezone_directive(str z):
object gmtoff_remainder, gmtoff_remainder_padding

if z == "Z":
return pytz.FixedOffset(0)
return timezone(timedelta(0))
if z[3] == ":":
z = z[:3] + z[4:]
if len(z) > 5:
Expand All @@ -580,4 +583,4 @@ cdef tzinfo parse_timezone_directive(str z):
total_minutes = ((hours * 60) + minutes + (seconds // 60) +
(microseconds // 60_000_000))
total_minutes = -total_minutes if z.startswith("-") else total_minutes
return pytz.FixedOffset(total_minutes)
return timezone(timedelta(minutes=total_minutes))
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ from pandas._libs.tslibs.timezones cimport (
is_utc,
maybe_get_tz,
treat_tz_as_pytz,
utc_pytz as UTC,
utc_stdlib as UTC,
)
from pandas._libs.tslibs.tzconversion cimport (
tz_convert_from_utc_single,
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ from cpython.datetime cimport (
)


cdef tzinfo utc_pytz
cdef tzinfo utc_stdlib

cpdef bint is_utc(tzinfo tz)
Expand Down
12 changes: 5 additions & 7 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,10 @@ from dateutil.tz import (
tzlocal as _dateutil_tzlocal,
tzutc as _dateutil_tzutc,
)
import numpy as np
import pytz
from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo

UTC = pytz.utc


import numpy as np

cimport numpy as cnp
from numpy cimport int64_t

Expand All @@ -49,7 +45,7 @@ from pandas._libs.tslibs.util cimport (

cdef int64_t NPY_NAT = get_nat()
cdef tzinfo utc_stdlib = timezone.utc
cdef tzinfo utc_pytz = UTC
cdef tzinfo utc_pytz = pytz.utc
cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()

cdef tzinfo utc_zoneinfo = None
Expand Down Expand Up @@ -168,10 +164,12 @@ cpdef inline tzinfo maybe_get_tz(object tz):
hours = int(tz[3:6])
minutes = int(tz[3] + tz[7:9])
tz = timezone(timedelta(hours=hours, minutes=minutes))
elif tz == "UTC" or tz == "utc":
tz = utc_stdlib
else:
tz = pytz.timezone(tz)
elif is_integer_object(tz):
tz = pytz.FixedOffset(tz / 60)
tz = timezone(timedelta(seconds=tz))
elif isinstance(tz, tzinfo):
pass
elif tz is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz):
Optimized equivalent to:
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, utc_pytz)
dts.min, dts.sec, dts.us, utc_stdlib)
dt = dt.astimezone(tz)
Derived from the datetime.astimezone implementation at
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ def to_datetime(
>>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500'])
DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)
dtype='datetime64[ns, UTC-05:00]', freq=None)
- However, timezone-aware inputs *with mixed time offsets* (for example
issued from a timezone with daylight savings, such as Europe/Paris)
Expand All @@ -1010,7 +1010,7 @@ def to_datetime(
>>> from datetime import datetime
>>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
dtype='datetime64[ns, UTC-01:00]', freq=None)
|
Expand Down
7 changes: 6 additions & 1 deletion pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import warnings

from pandas._libs.json import loads
from pandas._libs.tslibs import timezones
from pandas._typing import (
DtypeObj,
JSONSerializable,
Expand Down Expand Up @@ -140,7 +141,11 @@ def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]:
elif is_period_dtype(dtype):
field["freq"] = dtype.freq.freqstr
elif is_datetime64tz_dtype(dtype):
field["tz"] = dtype.tz.zone
if timezones.is_utc(dtype.tz):
# timezone.utc has no "zone" attr
field["tz"] = "UTC"
else:
field["tz"] = dtype.tz.zone
elif is_extension_array_dtype(dtype):
field["extDtype"] = dtype.name
return field
Expand Down
15 changes: 8 additions & 7 deletions pandas/tests/frame/methods/test_align.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import timezone

import numpy as np
import pytest
import pytz

import pandas as pd
from pandas import (
Expand All @@ -27,17 +28,17 @@ def test_frame_align_aware(self):
# frame with frame
df1_central = df1.tz_convert("US/Central")
new1, new2 = df1.align(df1_central)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

# frame with Series
new1, new2 = df1.align(df1_central[0], axis=0)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

df1[0].align(df1_central, axis=0)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

def test_align_float(self, float_frame):
af, bf = float_frame.align(float_frame)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/frame/methods/test_tz_localize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import timezone

import numpy as np
import pytest

Expand All @@ -23,7 +25,7 @@ def test_tz_localize(self, frame_or_series):
expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
expected = tm.get_obj(expected, frame_or_series)

assert result.index.tz.zone == "UTC"
assert result.index.tz is timezone.utc
tm.assert_equal(result, expected)

def test_tz_localize_axis1(self):
Expand All @@ -33,7 +35,7 @@ def test_tz_localize_axis1(self):

df = df.T
result = df.tz_localize("utc", axis=1)
assert result.columns.tz.zone == "UTC"
assert result.columns.tz is timezone.utc

expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))

Expand Down
Loading

0 comments on commit 5a372d8

Please sign in to comment.