Skip to content

Commit

Permalink
Issue #421/#68 drop "round up" feature for end_date for better consis…
Browse files Browse the repository at this point in the history
…tency

The "round up" end_date feature as originally proposed
turned out to conflict too much with existing behavior and openEO spec.

Still, the idea can still be provided through single string `extent`: "2022" -> ("2022-01-01", "2023-01-01")
  • Loading branch information
soxofaan committed Sep 18, 2023
1 parent e1e3835 commit 93a0985
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 340 deletions.
209 changes: 49 additions & 160 deletions openeo/dates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import datetime
import datetime as dt
import re
from enum import Enum
Expand All @@ -11,9 +10,9 @@

def get_temporal_extent(
*args,
start_date: Union[str, dt.datetime, dt.date] = None,
end_date: Union[str, dt.datetime, dt.date] = None,
extent: Union[list, tuple] = None,
start_date: Union[str, dt.datetime, dt.date, Any] = None,
end_date: Union[str, dt.datetime, dt.date, Any] = None,
extent: Union[list, tuple, str] = None,
convertor=rfc3339.normalize,
) -> Tuple[Union[str, None], Union[str, None]]:
"""
Expand All @@ -29,26 +28,47 @@ def get_temporal_extent(
("2019-01-01", "2019-05-15")
>>> get_temporal_extent(extent=["2019-01-01", "2019-05-15"])
("2019-01-01", "2019-05-15")
It also supports resolving year/month shorthand notation (rounding down to first day of year or month):
>>> get_temporal_extent("2019")
("2019-01-01", None)
>>> get_temporal_extent(start_date="2019-02", end_date="2019-03"])
("2019-02-01", "2019-03-01")
And even interpretes extents given as a single string:
>>> get_temporal_extent(extent="2021")
("2021-01-01", "2022-01-01")
"""
if (bool(len(args) > 0) + bool(start_date or end_date) + bool(extent)) > 1:
raise ValueError("At most one of `*args`, `start_date/end_date`, or `extent` should be provided")
if args:
assert start_date is None and end_date is None and extent is None
# Convert positional `*args` to `start_date`/`end_date` argument
if len(args) == 2:
start_date, end_date = args
elif len(args) == 1:
arg = args[0]
if isinstance(arg, (list, tuple)):
start_date, end_date = arg
if len(args) > 2:
raise ValueError(f"Unable to handle {args} as a temporal extent")
start_date, end_date = tuple(arg) + (None,) * (2 - len(arg))
else:
start_date, end_date = arg, None
else:
raise ValueError("Unable to handle {a!r} as a date range".format(a=args))
raise ValueError(f"Unable to handle {args} as a temporal extent")
elif extent:
assert start_date is None and end_date is None
if isinstance(extent, str):
start_date, end_date = extent, None
else:
if isinstance(extent, (list, tuple)) and len(extent) == 2:
start_date, end_date = extent
start_date, end_date = _convert_abbreviated_temporal_extent(start_date, end_date)
elif isinstance(extent, str):
# Special case: extent is given as a single string (e.g. "2021" for full year extent
# or "2021-04" for full month extent): convert that to the appropriate extent tuple.
start_date, end_date = _convert_abbreviated_date(extent), _get_end_of_time_slot(extent)
else:
raise ValueError(f"Unable to handle {extent} as a temporal extent")
start_date = _convert_abbreviated_date(start_date)
end_date = _convert_abbreviated_date(end_date)
return convertor(start_date) if start_date else None, convertor(end_date) if end_date else None


Expand All @@ -70,115 +90,6 @@ class _TypeOfDateString(Enum):
_REGEX_YEAR = re.compile(r"^\d{4}$")


def _convert_abbreviated_temporal_extent(start_date: Any, end_date: Any = None) -> Tuple[Any, Any]:
"""Convert strings representing entire years or months into a normalized date range.
The result is a 2-tuple ``(start, end)`` that represents the period as a
half-open, left-closed interval, i.e. the end date is not included in the period.
This function should ONLY convert string values into a datetime.date when
they are clearly abbreviations, that is what is is intended for.
In all other cases leave the original start_date or end_date as it was.
Keep in mind that this function is called by ``get_temporal_extent``,
and that in general the start date and end date can be None, though usually
not at the same time.
Because we cannot know what the missing start or end means in every context,
it is the calling function's job to handle that.
We only handle the case where we know the end date is implicitly determined
by the start date because the start date is a shorthand for a year or a month.
Also ``get_temporal_extent`` uses a callable to convert the dates, in order
to deal with things like ProcessGraph parameters.
That means we need to accept those other types but we must return those
values unchanged.
:param start_date:
- Typically a string that represents either a year, a year + month, a day,
or a datetime, and it always indicates the *beginning* of that period.
- Other data types allowed are a ``datetime.date`` and ``datetime.datetime``,
and in that case we return the those values unchanged.
Similarly, strings that represent a date or datetime are not processed
any further are returned unchanged as the start or end of the tuple.
- Since callers may try to deal with even more types in their own way
we do except any type, but return them unchanged and so the caller
can convert them.
- Allowed string formats are:
- For year: "yyyy"
- For year + month: "yyyy-mm"
Some other separators than "-" technically work but they are discouraged.
- For date and datetime you must follow the RFC 3339 format. See also: class ``Rfc3339``
:return:
The result is a 2-tuple of the form ``(start, end)`` that represents
the period as a half-open, left-close interval, i.e. the end date is the
first day that is no longer part of the time slot.
:raises ValueError:
when start_date was a string but not recognized as either a year,
a month, a date, or datetime. The format was invalid.
Examples
--------
>>> import datetime
>>>
>>> # 1. Year: use all data from the start of 2021 to the end of 2021.
>>> string_to_temporal_extent("2021")
(datetime.date(2021, 1, 1), datetime.date(2022, 1, 1))
>>>
>>> # 2. Year + month: all data from the start of August 2022 to the end of August 2022.
>>> string_to_temporal_extent("2022-08")
(datetime.date(2022, 8, 1), datetime.date(2022, 9, 1))
>>>
>>> # 3. We received a full date 2022-08-15:
>>> # In this case we should not process start_date. The calling function/method must
>>> # handle end date, depending on what an interval with an open end means for the caller.
>>> # See for example how ``get_temporal_extent`` handles this.
>>> string_to_temporal_extent("2022-08-15")
('2022-08-15', None)
>>>
>>> # 4. Similar to 3), but with a datetime.date instead of a string containing a date.
>>> string_to_temporal_extent(datetime.date(2022, 8, 15))
(datetime.date(2022, 8, 15), None)
>>>
>>> # 5. Similar to 3) & 4), but with a datetime.datetime instance.
>>> string_to_temporal_extent(datetime.datetime(2022, 8, 15, 0, 0))
(datetime.datetime(2022, 8, 15, 0, 0), None)
"""

# Exclude case where nothing was specified to keep code below simpler.
# We want to check for None explicitly here, don't accept any other values
# that evaluate to False.
if start_date is None and end_date is None:
return None, None

# Only strings can be abbreviated dates. We don't touch it if it is any other type.
start_date_converted = None
if start_date is not None:
if isinstance(start_date, str):
start_date_converted = _convert_abbreviated_date(start_date)

# If an end date was specified we should use it, and convert it when it is an abbreviation.
# But when only a start was specified, we derive the end date from the start date.
if end_date:
result_start_date = start_date_converted or start_date
return result_start_date, _get_end_of_time_slot(end_date)

# Only the start date was specified, when we reach this point.
if isinstance(start_date_converted, dt.date):
# start_date was effectively converted => derive end date from it.
result_end_date = _get_end_of_time_slot(start_date)
return start_date_converted, result_end_date
else:
# start_date was not abbreviated, it is a day or datetime:
# Therefor we should not derive the end date from start date because
# the caller should handle this case themselves.
return start_date, None


def _get_end_of_time_slot(date: str) -> Union[dt.date, str]:
"""Calculate the end of a left-closed period: the first day after a year or month."""
if not isinstance(date, str):
Expand All @@ -199,64 +110,42 @@ def _get_end_of_time_slot(date: str) -> Union[dt.date, str]:


def _convert_abbreviated_date(
date: str,
) -> Union[dt.date, str]:
"""Helper function to convert a string into a date when it is an abbreviation for an entire year or month.
The intent of this function is to only convert values into a datetime.date
when they are clearly abbreviations, and in all other cases return the original
value of date.
:param date:
date: Union[str, dt.date, dt.datetime, Any],
) -> Union[str, dt.date, dt.datetime, Any]:
"""
Helper function to convert a year- or month-abreviated strings (e.g. "2021" or "2021-03") into a date
(first day of the corresponding period). Other values are returned as original.
- Typically a string that represents either a year, a year + month, a day,
or a datetime, and it always indicates the *beginning* of that period.
- Strings that represent a day or a datetime are not processed.
In that case we return the original value of ``date`` unchanged.
- Any other type than str raises a TypeError.
:param date: some kind of date representation:
- Allowed string formats are:
- For year: "yyyy"
- For year + month: "yyyy-mm"
Some other separators than "-" technically work but they are discouraged.
- For date and datetime you must follow the RFC 3339 format. See also: class ``Rfc3339``
- A string, formatted "yyyy", "yyyy-mm", "yyyy-mm-dd" or with even more granularity
- Any other type (e.g. ``datetime.date``, ``datetime.datetime``, a parameter, ...)
:return:
If it was a string representing a year or a month:
a datetime.date that represents the first day of that year or month.
If the string represents a day, or a datetime than the original string will be returned.
:raises TypeError:
when date is not type ``str``
If input was a string representing a year or a month:
a ``datetime.date`` that represents the first day of that year or month.
Otherwise, the original version is returned as-is.
:raises ValueError:
when ``date`` was a string but not recognized as either a year,
a month, a date, or datetime.
when ``date`` was a string but not recognized as a date representation
Examples
--------
>>> import datetime
>>>
>>> # 1. Year: use all data from the start of 2021 to the end of 2021.
>>> # For year and month: "round down" to fist day:
>>> _convert_abbreviated_date("2021")
datetime.date(2021, 1, 1)
>>>
>>> # 2. Year + month: all data from the start of August 2022 to the end of August 2022.
>>> _convert_abbreviated_date("2022-08")
datetime.date(2022, 8, 1)
>>>
>>> # 3. We received a full date 2022-08-15:
>>> # In this case we should not process start_date. The calling function/method must
>>> # handle end date, depending on what an interval with an open end means for the caller.
>>> # See for example how ``get_temporal_extent`` handles this.
>>> # Preserve other values
>>> _convert_abbreviated_date("2022-08-15")
'2022-08-15'
"""
if not isinstance(date, str):
raise TypeError("date must be a string")
return date

# TODO: avoid double regex matching? Once in _type_of_date_string and once here.
type_of_date = _type_of_date_string(date)
if type_of_date == _TypeOfDateString.INVALID:
raise ValueError(
Expand Down
Loading

0 comments on commit 93a0985

Please sign in to comment.