Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: default to stdlib timezone objects for fixed-offsets #49677

Merged
merged 9 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,40 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or
ser.astype("timedelta64[s]")
ser.astype("timedelta64[D]")

.. _whatsnew_200.api_breaking.default_to_stdlib_tzinfos:

UTC and fixed-offset timezones default to standard-library tzinfo objects
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In previous versions, the default ``tzinfo`` object used to represent UTC
was ``pytz.UTC``. In pandas 2.0, we default to ``datetime.timezone.utc`` instead.
Similarly, for timezones represent fixed UTC offsets, we use ``datetime.timezone``
objects instead of ``pytz.FixedOffset`` objects. See (:issue:`34916`)

*Previous behavior*:

.. code-block:: ipython

In [2]: ts = pd.Timestamp("2016-01-01", tz="UTC")
In [3]: type(ts.tzinfo)
Out[3]: pytz.UTC

In [4]: ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
In [3]: type(ts2.tzinfo)
Out[5]: pytz._FixedOffset

*New behavior*:

.. ipython:: python

ts = pd.Timestamp("2016-01-01", tz="UTC")
type(ts.tzinfo)

ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
type(ts2.tzinfo)

For timezones that are neither UTC nor fixed offsets, e.g. "US/Pacific", we
continue to default to ``pytz`` objects.

.. _whatsnew_200.api_breaking.zero_len_indexes:

Empty DataFrames/Series will now default to have a ``RangeIndex``
Expand Down
14 changes: 8 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
cimport cython

from datetime import timezone

from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
datetime,
import_datetime,
timedelta,
tzinfo,
)
from cpython.object cimport PyObject
Expand All @@ -23,8 +27,6 @@ import numpy as np

cnp.import_array()

import pytz

from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
Expand Down Expand Up @@ -95,7 +97,7 @@ def _test_parse_iso8601(ts: str):
obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo)
return Timestamp(obj.value, tz=obj.tzinfo)
else:
Expand Down Expand Up @@ -460,7 +462,7 @@ cpdef array_to_datetime(
2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
is encountered

Also returns a pytz.FixedOffset if an array of strings with the same
Also returns a fixed-offset tzinfo object if an array of strings with the same
timezone offset is passed and utc=True is not passed. Otherwise, None
is returned

Expand Down Expand Up @@ -650,7 +652,7 @@ cpdef array_to_datetime(
# since we store the total_seconds of
# dateutil.tz.tzoffset objects
out_tzoffset_vals.add(out_tzoffset * 60.)
tz = pytz.FixedOffset(out_tzoffset)
tz = timezone(timedelta(minutes=out_tzoffset))
value = tz_localize_to_utc_single(value, tz)
out_local = 0
out_tzoffset = 0
Expand Down Expand Up @@ -718,7 +720,7 @@ cpdef array_to_datetime(
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
else:
tz_offset = out_tzoffset_vals.pop()
tz_out = pytz.FixedOffset(tz_offset / 60.)
tz_out = timezone(timedelta(seconds=tz_offset))
return result, tz_out


Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ from numpy cimport (

cnp.import_array()

import pytz

# stdlib datetime imports

from datetime import timezone

from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
datetime,
import_datetime,
time,
timedelta,
tzinfo,
)

Expand Down Expand Up @@ -428,7 +429,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,

value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
obj.dts = dts
obj.tzinfo = pytz.FixedOffset(tzoffset)
obj.tzinfo = timezone(timedelta(minutes=tzoffset))
obj.value = tz_localize_to_utc_single(value, obj.tzinfo)
if tz is None:
check_overflows(obj, NPY_FR_ns)
Expand Down
21 changes: 12 additions & 9 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Strptime-related classes and functions.
"""
from datetime import timezone

from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
date,
import_datetime,
timedelta,
tzinfo,
)

Expand Down Expand Up @@ -96,7 +99,7 @@ def array_strptime(
int week_of_year, week_of_year_start, parse_code, ordinal
int iso_week, iso_year
int64_t us, ns
object val, group_key, ampm, found, timezone
object val, group_key, ampm, found, tz
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
Expand Down Expand Up @@ -214,7 +217,7 @@ def array_strptime(
year = 1900
month = day = 1
hour = minute = second = ns = us = 0
timezone = None
tz = None
# Default to -1 to signify that values not known; not critical to have,
# though
iso_week = week_of_year = -1
Expand Down Expand Up @@ -304,9 +307,9 @@ def array_strptime(
# W starts week on Monday.
week_of_year_start = 0
elif parse_code == 17:
timezone = pytz.timezone(found_dict["Z"])
tz = pytz.timezone(found_dict["Z"])
elif parse_code == 19:
timezone = parse_timezone_directive(found_dict["z"])
tz = parse_timezone_directive(found_dict["z"])
elif parse_code == 20:
iso_year = int(found_dict["G"])
elif parse_code == 21:
Expand Down Expand Up @@ -388,7 +391,7 @@ def array_strptime(
continue
raise

result_timezone[i] = timezone
result_timezone[i] = tz

return result, result_timezone.base

Expand Down Expand Up @@ -538,15 +541,15 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday)

cdef tzinfo parse_timezone_directive(str z):
"""
Parse the '%z' directive and return a pytz.FixedOffset
Parse the '%z' directive and return a datetime.timezone object.

Parameters
----------
z : string of the UTC offset

Returns
-------
pytz.FixedOffset
datetime.timezone

Notes
-----
Expand All @@ -560,7 +563,7 @@ cdef tzinfo parse_timezone_directive(str z):
object gmtoff_remainder, gmtoff_remainder_padding

if z == "Z":
return pytz.FixedOffset(0)
return timezone(timedelta(0))
if z[3] == ":":
z = z[:3] + z[4:]
if len(z) > 5:
Expand All @@ -580,4 +583,4 @@ cdef tzinfo parse_timezone_directive(str z):
total_minutes = ((hours * 60) + minutes + (seconds // 60) +
(microseconds // 60_000_000))
total_minutes = -total_minutes if z.startswith("-") else total_minutes
return pytz.FixedOffset(total_minutes)
return timezone(timedelta(minutes=total_minutes))
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ from pandas._libs.tslibs.timezones cimport (
is_utc,
maybe_get_tz,
treat_tz_as_pytz,
utc_pytz as UTC,
utc_stdlib as UTC,
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
)
from pandas._libs.tslibs.tzconversion cimport (
tz_convert_from_utc_single,
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ from cpython.datetime cimport (
)


cdef tzinfo utc_pytz
cdef tzinfo utc_stdlib

cpdef bint is_utc(tzinfo tz)
Expand Down
12 changes: 5 additions & 7 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,10 @@ from dateutil.tz import (
tzlocal as _dateutil_tzlocal,
tzutc as _dateutil_tzutc,
)
import numpy as np
import pytz
from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo

UTC = pytz.utc
mroeschke marked this conversation as resolved.
Show resolved Hide resolved


import numpy as np

cimport numpy as cnp
from numpy cimport int64_t

Expand All @@ -49,7 +45,7 @@ from pandas._libs.tslibs.util cimport (

cdef int64_t NPY_NAT = get_nat()
cdef tzinfo utc_stdlib = timezone.utc
cdef tzinfo utc_pytz = UTC
cdef tzinfo utc_pytz = pytz.utc
cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()

cdef tzinfo utc_zoneinfo = None
Expand Down Expand Up @@ -168,10 +164,12 @@ cpdef inline tzinfo maybe_get_tz(object tz):
hours = int(tz[3:6])
minutes = int(tz[3] + tz[7:9])
tz = timezone(timedelta(hours=hours, minutes=minutes))
elif tz == "UTC" or tz == "utc":
tz = utc_stdlib
else:
tz = pytz.timezone(tz)
elif is_integer_object(tz):
tz = pytz.FixedOffset(tz / 60)
tz = timezone(timedelta(seconds=tz))
elif isinstance(tz, tzinfo):
pass
elif tz is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz):
Optimized equivalent to:

dt = datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, utc_pytz)
dts.min, dts.sec, dts.us, utc_stdlib)
dt = dt.astimezone(tz)

Derived from the datetime.astimezone implementation at
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ def to_datetime(

>>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500'])
DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)
dtype='datetime64[ns, UTC-05:00]', freq=None)

- However, timezone-aware inputs *with mixed time offsets* (for example
issued from a timezone with daylight savings, such as Europe/Paris)
Expand All @@ -1010,7 +1010,7 @@ def to_datetime(
>>> from datetime import datetime
>>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
dtype='datetime64[ns, UTC-01:00]', freq=None)

|

Expand Down
7 changes: 6 additions & 1 deletion pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import warnings

from pandas._libs.json import loads
from pandas._libs.tslibs import timezones
from pandas._typing import (
DtypeObj,
JSONSerializable,
Expand Down Expand Up @@ -140,7 +141,11 @@ def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]:
elif is_period_dtype(dtype):
field["freq"] = dtype.freq.freqstr
elif is_datetime64tz_dtype(dtype):
field["tz"] = dtype.tz.zone
if timezones.is_utc(dtype.tz):
# timezone.utc has no "zone" attr
field["tz"] = "UTC"
else:
field["tz"] = dtype.tz.zone
elif is_extension_array_dtype(dtype):
field["extDtype"] = dtype.name
return field
Expand Down
15 changes: 8 additions & 7 deletions pandas/tests/frame/methods/test_align.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import timezone

import numpy as np
import pytest
import pytz

import pandas as pd
from pandas import (
Expand All @@ -27,17 +28,17 @@ def test_frame_align_aware(self):
# frame with frame
df1_central = df1.tz_convert("US/Central")
new1, new2 = df1.align(df1_central)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

# frame with Series
new1, new2 = df1.align(df1_central[0], axis=0)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

df1[0].align(df1_central, axis=0)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

def test_align_float(self, float_frame):
af, bf = float_frame.align(float_frame)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/frame/methods/test_tz_localize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import timezone

import numpy as np
import pytest

Expand All @@ -23,7 +25,7 @@ def test_tz_localize(self, frame_or_series):
expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
expected = tm.get_obj(expected, frame_or_series)

assert result.index.tz.zone == "UTC"
assert result.index.tz is timezone.utc
tm.assert_equal(result, expected)

def test_tz_localize_axis1(self):
Expand All @@ -33,7 +35,7 @@ def test_tz_localize_axis1(self):

df = df.T
result = df.tz_localize("utc", axis=1)
assert result.columns.tz.zone == "UTC"
assert result.columns.tz is timezone.utc

expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))

Expand Down
Loading