Skip to content

ENH: add abiility for json.dumps to parse and add timezones #41667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ Other enhancements
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
- Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`)
- Add support for making ``ISO 8601``-like timestamps with timezone information in pd.io.json.dumps (:issue:`12997`)
- Add support for unary operators in :class:`FloatingArray` (:issue:`38749`)
- :class:`RangeIndex` can now be constructed by passing a ``range`` object directly e.g. ``pd.RangeIndex(range(3))`` (:issue:`12067`)
- :meth:`round` being enabled for the nullable integer and floating dtypes (:issue:`38844`)
Expand Down
30 changes: 24 additions & 6 deletions pandas/_libs/src/ujson/python/date_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ The full license is in the LICENSE file, distributed with this software.
// Conversion routines that are useful for serialization,
// but which don't interact with JSON objects directly

#include <Python.h>
#include <datetime.h>

#include "date_conversions.h"
#include <../../../tslibs/src/datetime/np_datetime.h>
#include <../../../tslibs/src/datetime/np_datetime_strings.h>
Expand Down Expand Up @@ -55,7 +58,7 @@ char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
return NULL;
}

ret_code = make_iso_8601_datetime(&dts, result, *len, base);
ret_code = make_iso_8601_datetime(&dts, result, *len, base, -1);
if (ret_code != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
Expand All @@ -77,8 +80,8 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
size_t *len) {
npy_datetimestruct dts;
int ret;

int ret, local;
int tzoffset = -1;
ret = convert_pydatetime_to_datetimestruct(obj, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
Expand All @@ -87,11 +90,26 @@ char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
}
return NULL;
}
if (PyObject_HasAttrString(obj, "tzinfo")){
PyObject *tzinfo = PyObject_GetAttrString(obj, "tzinfo");
Py_DECREF(tzinfo);

if ((tzinfo != NULL) && (tzinfo != Py_None)){
tzoffset = get_tzoffset_from_pytzinfo(tzinfo, &dts);
if (tzoffset == 0){
tzoffset = -1;
}
}
}

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
if (tzoffset == -1){
local = 0;
} else {
local = 1;
}
*len = (size_t)get_datetime_iso_8601_strlen(local, base);
char *result = PyObject_Malloc(*len);
ret = make_iso_8601_datetime(&dts, result, *len, base);

ret = make_iso_8601_datetime(&dts, result, *len, base, tzoffset);
if (ret != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
Expand Down
14 changes: 13 additions & 1 deletion pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Numeric decoder derived from from TCL library
#include <ultrajson.h>
#include "date_conversions.h"
#include "datetime.h"
#include "../../../tslibs/src/datetime/np_datetime.h"

static PyTypeObject *type_decimal;
static PyTypeObject *cls_dataframe;
Expand Down Expand Up @@ -180,6 +181,8 @@ void *initObjToJSON(void) {

/* Initialise numpy API */
import_array();
/* Initialize pandas datetime API */
pandas_pydatetime_import();
// GH 31463
return NULL;
}
Expand Down Expand Up @@ -213,6 +216,14 @@ static TypeContext *createTypeContext(void) {
return pc;
}

// static PyObject *get_tzinfo(PyObject *obj){
// if (PyObject_HasAttrString(obj, "tzinfo")){
// PyObject *tzinfo = PyObject_GetAttrString(obj, "tzinfo");
// return tzinfo;
// }
// return Py_None;
// }

static PyObject *get_values(PyObject *obj) {
PyObject *values = NULL;

Expand Down Expand Up @@ -1600,7 +1611,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
}

ISITERABLE:

// tzinfo = get_tzinfo(obj);

if (PyObject_TypeCheck(obj, cls_index)) {
if (enc->outputFormat == SPLIT) {
tc->type = JT_OBJECT;
Expand Down
42 changes: 41 additions & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#endif // NPY_NO_DEPRECATED_API

#include <Python.h>
#include <datetime.h>

#include <numpy/arrayobject.h>
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>
#include "np_datetime.h"

#define PyDateTime_FromDateAndTimeAndZone(year, month, day, hour, min, sec, usec, tz) \
PyDateTimeAPI->DateTime_FromDateAndTime(year, month, day, hour, \
min, sec, usec, tz, PyDateTimeAPI->DateTimeType)
#if PY_MAJOR_VERSION >= 3
#define PyInt_AsLong PyLong_AsLong
#endif // PyInt_AsLong
Expand All @@ -41,6 +44,13 @@ const int days_per_month_table[2][12] = {
{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
{31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};



void pandas_pydatetime_import(void)
{
PyDateTime_IMPORT;
}

/*
* Returns 1 if the given year is a leap year, 0 otherwise.
*/
Expand Down Expand Up @@ -764,3 +774,33 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td,
"invalid base unit");
}
}

/*
* Gets a tzoffset in minutes by calling the fromutc() function on
* the Python datetime.tzinfo object.
*/
int get_tzoffset_from_pytzinfo(PyObject *timezone_obj, npy_datetimestruct *dts)
{
PyDateTime_Date *dt;
PyDateTime_Delta *tzoffset;

/* Create a Python datetime to give to the timezone object */
dt = (PyDateTime_Date *) PyDateTime_FromDateAndTimeAndZone((int)dts->year, dts->month, dts->day,
dts->hour, dts->min, 0, 0, timezone_obj);
if (!(PyDateTime_Check(dt))) {
Py_DECREF(dt);
return -1;
}
tzoffset = (PyDateTime_Delta *) PyObject_CallMethod(timezone_obj, "utcoffset", "O", dt);

Py_DECREF(dt);
if (!(PyDelta_Check(tzoffset))){
Py_DECREF(tzoffset);
return -1;
}

long offset_minutes = (tzoffset->days * 24 * 60) + ((long) tzoffset->seconds / 60);
Py_DECREF(tzoffset);
return (int) offset_minutes;

}
6 changes: 6 additions & 0 deletions pandas/_libs/tslibs/src/datetime/np_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ extern const npy_datetimestruct _NS_MAX_DTS;

// stuff pandas needs
// ----------------------------------------------------------------------------
void pandas_pydatetime_import(void);

int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
npy_datetimestruct *out);
Expand Down Expand Up @@ -75,5 +76,10 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
void
add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes);

/*
* Gets a tzoffset in minutes by calling the fromutc() function on
* the Python datetime.tzinfo object.
*/
int get_tzoffset_from_pytzinfo(PyObject *timezone_obj, npy_datetimestruct *dts);

#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_
67 changes: 59 additions & 8 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {

if (base >= NPY_FR_h) {
if (local) {
len += 5; /* "+####" or "-####" */
len += 6; /* "+##:##" or "-##:##" */
} else {
len += 1; /* "Z" */
}
Expand All @@ -601,11 +601,15 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
* 'base' restricts the output to that unit. Set 'base' to
* -1 to auto-detect a base after which all the values are zero.
*
* 'tzoffset' are the minutes of the offset from UTC created by timezones
* e.g. 'tzoffset` of -750 with a dts of (2021, 4, 2, 0, 0, 0, 0) would
* produce '2021-04-01T11:30:00-12:30
*
* Returns 0 on success, -1 on failure (for example if the output
* string was too short).
*/
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
NPY_DATETIMEUNIT base) {
NPY_DATETIMEUNIT base, int tzoffset) {
char *substr = outstr;
int sublen = outlen;
int tmplen;
Expand Down Expand Up @@ -638,6 +642,12 @@ int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
substr += tmplen;
sublen -= tmplen;

if (tzoffset != -1){
npy_datetimestruct dts_local;
dts_local = *dts;
dts = &dts_local;
add_minutes_to_datetimestruct(dts, tzoffset);
}
/* Stop if the unit is years */
if (base == NPY_FR_Y) {
if (sublen > 0) {
Expand Down Expand Up @@ -883,13 +893,54 @@ int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
sublen -= 3;

add_time_zone:
/* UTC "Zulu" time */
if (sublen < 1) {
goto string_too_short;
if (tzoffset != -1) {
/* Add the +/- sign */
if (sublen < 1) {
goto string_too_short;
}
if (tzoffset < 0) {
substr[0] = '-';
tzoffset = -tzoffset;
}
else {
substr[0] = '+';
}
substr += 1;
sublen -= 1;

/* Add the timezone offset */
if (sublen < 1 ) {
goto string_too_short;
}
substr[0] = (char)((tzoffset / (10*60)) % 10 + '0');
if (sublen < 2 ) {
goto string_too_short;
}
substr[1] = (char)((tzoffset / 60) % 10 + '0');
if (sublen < 3 ) {
goto string_too_short;
}
substr[2] = ':';
if (sublen < 4) {
goto string_too_short;
}
substr[3] = (char)(((tzoffset % 60) / 10) % 10 + '0');
if (sublen < 5 ) {
goto string_too_short;
}
substr[4] = (char)((tzoffset % 60) % 10 + '0');
substr += 5;
sublen -= 5;
}
/* UTC "Zulu" time */
else {
if (sublen < 1) {
goto string_too_short;
}
substr[0] = 'Z';
substr += 1;
sublen -= 1;
}
substr[0] = 'Z';
substr += 1;
sublen -= 1;

/* Add a NULL terminator, and return */
if (sublen > 0) {
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
*/
int
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
NPY_DATETIMEUNIT base);
NPY_DATETIMEUNIT base, int tzoffset);

/*
* Converts an pandas_timedeltastruct to an ISO 8601 string.
Expand Down
46 changes: 41 additions & 5 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,8 +1185,6 @@ def test_sparse(self):
"ts",
[
Timestamp("2013-01-10 05:00:00Z"),
Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
Timestamp("2013-01-10 00:00:00-0500"),
],
)
def test_tz_is_utc(self, ts):
Expand All @@ -1198,12 +1196,26 @@ def test_tz_is_utc(self, ts):
dt = ts.to_pydatetime()
assert dumps(dt, iso_dates=True) == exp

@pytest.mark.parametrize(
"ts",
[
Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
Timestamp("2013-01-10 00:00:00-0500"),
],
)
def test_tz_is_localized(self, ts):
from pandas.io.json import dumps

exp = '"2013-01-10T00:00:00.000-05:00"'

assert dumps(ts, iso_dates=True) == exp
dt = ts.to_pydatetime()
assert dumps(dt, iso_dates=True) == exp

@pytest.mark.parametrize(
"tz_range",
[
pd.date_range("2013-01-01 05:00:00Z", periods=2),
pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"),
pd.date_range("2013-01-01 00:00:00-0500", periods=2),
pd.date_range("2013-01-01 05:00:00Z", periods=2)
],
)
def test_tz_range_is_utc(self, tz_range):
Expand All @@ -1223,6 +1235,30 @@ def test_tz_range_is_utc(self, tz_range):
result = dumps(df, iso_dates=True)
assert result == dfexp

@pytest.mark.parametrize(
"tz_range",
[
pd.date_range("2013-01-01 00:00:00", periods=2, tz='US/Eastern'),
pd.date_range("2013-01-01 00:00:00-0500", periods=2)
],
)
def test_tz_range_is_local(self, tz_range):
from pandas.io.json import dumps

exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
dfexp = (
'{"DT":{'
'"0":"2013-01-01T00:00:00.000-05:00",'
'"1":"2013-01-02T00:00:00.000-05:00"}}'
)

assert dumps(tz_range, iso_dates=True) == exp
dti = DatetimeIndex(tz_range)
assert dumps(dti, iso_dates=True) == exp
df = DataFrame({"DT": dti})
result = dumps(df, iso_dates=True)
assert result == dfexp

def test_read_inline_jsonl(self):
# GH9180
result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,9 @@ def test_encode_time_conversion_basic(self, test):

def test_encode_time_conversion_pytz(self):
# see gh-11473: to_json segfaults with timezone-aware datetimes
test = datetime.time(10, 12, 15, 343243, pytz.utc)
output = ujson.encode(test)
test = datetime.datetime(2021, 5, 25, 10, 12, 15, 343243,
pytz.timezone('US/Eastern'))
output = ujson.encode(test, iso_dates=True, date_unit='us')
expected = f'"{test.isoformat()}"'
assert expected == output

Expand Down