Skip to content

Commit

Permalink
Support decoding seconds as float/int to timedelta
Browse files Browse the repository at this point in the history
If `strict=False` we also support decoding int/float values (or str
representations of these values) to `timedelta` types. Note that a
`double` cannot represent a timedelta with microsecond precision if
the value is > ~270 years. For true roundtrip support we recommend
serializing timedeltas as ISO 8601 strings.
  • Loading branch information
jcrist committed Jul 11, 2023
1 parent 516a7ec commit 8d70fc0
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 77 deletions.
15 changes: 13 additions & 2 deletions docs/source/supported-types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,6 @@ The duration format used here is as follows:
[+/-]P[#D][T[#H][#M][#S]]
- The format starts with an optional sign (``-`` or ``+``). If negative, the
whole duration is negated.

Expand Down Expand Up @@ -450,7 +449,6 @@ The implementation in ``msgspec`` is compatible with the ones in:
Duration strings produced by msgspec should be interchangeable with these
libraries, as well as similar ones in other language ecosystems.


.. code-block:: python
>>> from datetime import timedelta
Expand All @@ -472,6 +470,19 @@ libraries, as well as similar ones in other language ecosystems.
File "<stdin>", line 1, in <module>
msgspec.ValidationError: Invalid ISO8601 duration
Additionally, if ``strict=False`` is specified, all protocols will decode ints,
floats, or strings containing ints/floats as timedeltas, interpreting the value
as total seconds. See :ref:`strict-vs-lax` for more information.

.. code-block:: python
>>> msgspec.json.decode(b"123.4", type=datetime.timedelta)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
msgspec.ValidationError: Expected `duration`, got `float`
>>> msgspec.json.decode(b"123.4", type=datetime.timedelta, strict=False)
datetime.timedelta(seconds=123, microseconds=400000)
``uuid``
--------
Expand Down
219 changes: 144 additions & 75 deletions msgspec/_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -8707,15 +8707,6 @@ ms_maybe_decode_bool_from_int64(int64_t x) {
return NULL;
}

static PyObject *
ms_maybe_decode_bool_from_pyint(PyObject *obj) {
uint64_t scale;
bool neg, overflow;
overflow = fast_long_extract_parts(obj, &neg, &scale);
if (overflow || neg) return NULL;
return ms_maybe_decode_bool_from_uint64(scale);
}

static MS_NOINLINE PyObject *
ms_decode_str_enum_or_literal(const char *name, Py_ssize_t size, TypeNode *type, PathNode *path) {
StrLookup *lookup = TypeNode_get_str_enum_or_literal(type);
Expand Down Expand Up @@ -9911,28 +9902,14 @@ ms_decode_datetime_from_float(
}

static PyObject *
ms_decode_datetime_from_pyint(PyObject *obj, TypeNode *type, PathNode *path)
{
bool overflow, neg;
uint64_t ux;
int64_t seconds;
overflow = fast_long_extract_parts(obj, &neg, &ux);
if (overflow || ux > LLONG_MAX) {
seconds = LLONG_MAX;
}
else {
seconds = ux;
if (neg) {
seconds *= -1;
}
}
return datetime_from_epoch(seconds, 0, type, path);
}

static PyObject *
ms_decode_datetime_from_pyfloat(PyObject *obj, TypeNode *type, PathNode *path)
{
return ms_decode_datetime_from_float(PyFloat_AS_DOUBLE(obj), type, path);
ms_parse_pyfloat(const char *buf, Py_ssize_t size) {
/* TODO: with some refactoring, we should be able to use our own
* str -> float routine rather than relying on CPython's */
PyObject *temp = PyBytes_FromStringAndSize(buf, size);
if (temp == NULL) return NULL;
PyObject *out = PyFloat_FromString(temp);
Py_DECREF(temp);
return out;
}

static PyObject *
Expand Down Expand Up @@ -10079,14 +10056,11 @@ ms_decode_datetime_from_str(

maybe_timestamp:
if (!strict) {
/* TODO: with some refactoring, we should be able to use our own str ->
* float routine rather than relying on CPython's */
PyObject *temp = PyBytes_FromStringAndSize(buf, size);
if (temp == NULL) goto error;
PyObject *timestamp = PyFloat_FromString(temp);
Py_DECREF(temp);
PyObject *timestamp = ms_parse_pyfloat(buf, size);
if (timestamp == NULL) goto invalid;
PyObject *out = ms_decode_datetime_from_pyfloat(timestamp, type, path);
PyObject *out = ms_decode_datetime_from_float(
PyFloat_AS_DOUBLE(timestamp), type, path
);
Py_DECREF(timestamp);
return out;
}
Expand Down Expand Up @@ -10146,8 +10120,46 @@ ms_encode_timedelta(PyObject *obj, char *out) {
return out - start;
}

#define MS_TIMEDELTA_MAX_SECONDS 86399999999999LL
#define MS_TIMEDELTA_MIN_SECONDS -86399999913600LL
#define MS_TIMEDELTA_MAX_SECONDS (86399999999999LL)
#define MS_TIMEDELTA_MIN_SECONDS (-86399999913600LL)

static PyObject *
ms_timedelta_from_parts(int64_t secs, int micros) {
int64_t days = secs / (24 * 60 * 60);
secs -= days * (24 * 60 * 60);
return PyDelta_FromDSU(days, secs, micros);
}

static PyObject *
ms_decode_timedelta_from_uint64(uint64_t x, PathNode *path) {
if (x > (uint64_t)MS_TIMEDELTA_MAX_SECONDS) {
return ms_error_with_path("Duration is out of range%U", path);
}
return ms_timedelta_from_parts((int64_t)x, 0);
}

static PyObject *
ms_decode_timedelta_from_int64(int64_t x, PathNode *path) {
if ((x > MS_TIMEDELTA_MAX_SECONDS) || (x < MS_TIMEDELTA_MIN_SECONDS)) {
return ms_error_with_path("Duration is out of range%U", path);
}
return ms_timedelta_from_parts(x, 0);
}

static PyObject *
ms_decode_timedelta_from_float(double x, PathNode *path) {
if (
(!isfinite(x)) ||
(x > (double)MS_TIMEDELTA_MAX_SECONDS) ||
(x < (double)MS_TIMEDELTA_MIN_SECONDS)
) {
return ms_error_with_path("Duration is out of range%U", path);
}
int64_t secs = trunc(x);
long micros = lround(1000000 * (x - secs));
return ms_timedelta_from_parts(secs, micros);
}

enum timedelta_parse_state {
TIMEDELTA_START = 0,
TIMEDELTA_D = 1,
Expand Down Expand Up @@ -10206,8 +10218,13 @@ enum timedelta_parse_state {
* range.
*/
static PyObject *
ms_decode_timedelta(const char *p, Py_ssize_t size, TypeNode *type, PathNode *path) {
ms_decode_timedelta(
const char *p, Py_ssize_t size,
TypeNode *type, PathNode *path,
bool strict
) {
bool neg = false;
const char *start = p;
const char *end = p + size;

if (p == end) goto invalid;
Expand All @@ -10222,7 +10239,7 @@ ms_decode_timedelta(const char *p, Py_ssize_t size, TypeNode *type, PathNode *pa
if (p == end) goto invalid;
}

if (*p != 'P' && *p != 'p') goto invalid;
if (*p != 'P' && *p != 'p') goto maybe_strict_false;
p++;
if (p == end) goto invalid;

Expand Down Expand Up @@ -10367,6 +10384,17 @@ ms_decode_timedelta(const char *p, Py_ssize_t size, TypeNode *type, PathNode *pa
seconds -= days * (24 * 60 * 60);
return PyDelta_FromDSU(days, seconds, micros);

maybe_strict_false:
if (!strict) {
PyObject *timestamp = ms_parse_pyfloat(start, size);
if (timestamp == NULL) goto invalid;
PyObject *out = ms_decode_timedelta_from_float(
PyFloat_AS_DOUBLE(timestamp), path
);
Py_DECREF(timestamp);
return out;
}

invalid:
return ms_error_with_path("Invalid ISO8601 duration%U", path);
out_of_range:
Expand Down Expand Up @@ -10595,11 +10623,6 @@ ms_decode_decimal_from_float(double val, PathNode *path, MsgspecState *mod) {
}
}

static PyObject *
ms_decode_decimal_from_pyfloat(PyObject *obj, PathNode *path, MsgspecState *mod) {
return ms_decode_decimal_from_float(PyFloat_AS_DOUBLE(obj), path, mod);
}


/*************************************************************************
* strict=False Utilities *
Expand Down Expand Up @@ -10724,12 +10747,7 @@ ms_decode_str_lax(
}

if (type->types & MS_TYPE_FLOAT) {
/* TODO: with some refactoring, we should be able to use our own str ->
* float routine rather than relying on CPython's */
PyObject *temp = PyBytes_FromStringAndSize(view, size);
if (temp == NULL) return NULL;
PyObject *out = PyFloat_FromString(temp);
Py_DECREF(temp);
PyObject *out = ms_parse_pyfloat(view, size);
if (out == NULL) {
PyErr_Clear();
}
Expand Down Expand Up @@ -10810,6 +10828,9 @@ ms_post_decode_int64(int64_t x, TypeNode *type, PathNode *path, bool strict) {
if (type->types & MS_TYPE_DATETIME) {
return ms_decode_datetime_from_int64(x, type, path);
}
if (type->types & MS_TYPE_TIMEDELTA) {
return ms_decode_timedelta_from_int64(x, path);
}
}
return ms_validation_error("int", type, path);
}
Expand All @@ -10836,6 +10857,9 @@ ms_post_decode_uint64(uint64_t x, TypeNode *type, PathNode *path, bool strict) {
if (type->types & MS_TYPE_DATETIME) {
return ms_decode_datetime_from_uint64(x, type, path);
}
if (type->types & MS_TYPE_TIMEDELTA) {
return ms_decode_timedelta_from_uint64(x, path);
}
}
return ms_validation_error("int", type, path);
}
Expand Down Expand Up @@ -13322,8 +13346,13 @@ mpack_decode_float(DecoderState *self, double x, TypeNode *type, PathNode *path)
else if (type->types & MS_TYPE_DECIMAL) {
return ms_decode_decimal_from_float(x, path, NULL);
}
else if (!self->strict && (type->types & MS_TYPE_DATETIME)) {
return ms_decode_datetime_from_float(x, type, path);
else if (!self->strict) {
if (type->types & MS_TYPE_DATETIME) {
return ms_decode_datetime_from_float(x, type, path);
}
if (type->types & MS_TYPE_TIMEDELTA) {
return ms_decode_timedelta_from_float(x, path);
}
}
return ms_validation_error("float", type, path);
}
Expand Down Expand Up @@ -13357,7 +13386,7 @@ mpack_decode_str(DecoderState *self, Py_ssize_t size, TypeNode *type, PathNode *
return ms_decode_time(s, size, type, path);
}
else if (MS_UNLIKELY(type->types & MS_TYPE_TIMEDELTA)) {
return ms_decode_timedelta(s, size, type, path);
return ms_decode_timedelta(s, size, type, path, self->strict);
}
else if (MS_UNLIKELY(type->types & MS_TYPE_UUID)) {
return ms_decode_uuid(s, size, path);
Expand Down Expand Up @@ -15465,7 +15494,7 @@ json_decode_string(JSONDecoderState *self, TypeNode *type, PathNode *path) {
return ms_decode_time(view, size, type, path);
}
else if (MS_UNLIKELY(type->types & MS_TYPE_TIMEDELTA)) {
return ms_decode_timedelta(view, size, type, path);
return ms_decode_timedelta(view, size, type, path, self->strict);
}
else if (MS_UNLIKELY(type->types & MS_TYPE_UUID)) {
return ms_decode_uuid(view, size, path);
Expand Down Expand Up @@ -15518,7 +15547,7 @@ json_decode_dict_key_fallback(
return ms_decode_time(view, size, type, path);
}
else if (type->types & MS_TYPE_TIMEDELTA) {
return ms_decode_timedelta(view, size, type, path);
return ms_decode_timedelta(view, size, type, path, self->strict);
}
else if (type->types & MS_TYPE_DECIMAL) {
return ms_decode_decimal(view, size, is_ascii, path, NULL);
Expand Down Expand Up @@ -16790,8 +16819,13 @@ json_decode_float(JSONDecoderState *self, double x, TypeNode *type, PathNode *pa
if (type->types & (MS_TYPE_ANY | MS_TYPE_FLOAT)) {
return ms_decode_float(x, type, path);
}
else if (!self->strict && (type->types & MS_TYPE_DATETIME)) {
return ms_decode_datetime_from_float(x, type, path);
else if (!self->strict) {
if (type->types & MS_TYPE_DATETIME) {
return ms_decode_datetime_from_float(x, type, path);
}
if (type->types & MS_TYPE_TIMEDELTA) {
return ms_decode_timedelta_from_float(x, path);
}
}
return ms_validation_error("float", type, path);
}
Expand Down Expand Up @@ -18465,6 +18499,43 @@ typedef struct ConvertState {

static PyObject * convert(ConvertState *, PyObject *, TypeNode *, PathNode *);

static PyObject *
convert_int_uncommon(
ConvertState *self, PyObject *obj, TypeNode *type, PathNode *path
) {
if (!self->strict) {
uint64_t ux;
bool neg, overflow;
overflow = fast_long_extract_parts(obj, &neg, &ux);

if ((type->types & MS_TYPE_BOOL) && !overflow && !neg) {
if (ux == 0) {
Py_RETURN_FALSE;
}
else if (ux == 1) {
Py_RETURN_TRUE;
}
}
if (type->types & (MS_TYPE_DATETIME | MS_TYPE_TIMEDELTA)) {
int64_t seconds;
if (overflow || ux > LLONG_MAX) {
seconds = LLONG_MAX;
}
else {
seconds = ux;
if (neg) {
seconds *= -1;
}
}
if (type->types & MS_TYPE_DATETIME) {
return datetime_from_epoch(seconds, 0, type, path);
}
return ms_decode_timedelta_from_int64(seconds, path);
}
}
return ms_validation_error("int", type, path);
}

static PyObject *
convert_int(
ConvertState *self, PyObject *obj, TypeNode *type, PathNode *path
Expand All @@ -18484,17 +18555,7 @@ convert_int(
) {
return ms_decode_decimal_from_pyobj(obj, path, self->mod);
}

if (!self->strict) {
if (type->types & MS_TYPE_BOOL) {
PyObject *out = ms_maybe_decode_bool_from_pyint(obj);
if (out != NULL) return out;
}
if (type->types & MS_TYPE_DATETIME) {
return ms_decode_datetime_from_pyint(obj, type, path);
}
}
return ms_validation_error("int", type, path);
return convert_int_uncommon(self, obj, type, path);
}

static PyObject *
Expand All @@ -18509,10 +18570,18 @@ convert_float(
type->types & MS_TYPE_DECIMAL
&& !(self->builtin_types & MS_BUILTIN_DECIMAL)
) {
return ms_decode_decimal_from_pyfloat(obj, path, self->mod);
return ms_decode_decimal_from_float(
PyFloat_AS_DOUBLE(obj), path, self->mod
);
}
else if (!self->strict && (type->types & MS_TYPE_DATETIME)) {
return ms_decode_datetime_from_pyfloat(obj, type, path);
else if (!self->strict) {
double seconds = PyFloat_AS_DOUBLE(obj);
if (type->types & MS_TYPE_DATETIME) {
return ms_decode_datetime_from_float(seconds, type, path);
}
else if (type->types & MS_TYPE_TIMEDELTA) {
return ms_decode_timedelta_from_float(seconds, path);
}
}
return ms_validation_error("float", type, path);
}
Expand Down Expand Up @@ -18569,7 +18638,7 @@ convert_str_uncommon(
(type->types & MS_TYPE_TIMEDELTA)
&& !(self->builtin_types & MS_BUILTIN_TIMEDELTA)
) {
return ms_decode_timedelta(view, size, type, path);
return ms_decode_timedelta(view, size, type, path, self->strict);
}
else if (
(type->types & MS_TYPE_UUID)
Expand Down
Loading

0 comments on commit 8d70fc0

Please sign in to comment.