From 15b339996286344ff543ba58c894727ffe212895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 29 Aug 2023 15:14:09 -0300 Subject: [PATCH 01/27] New op: tick_calendar structure --- temporian/core/operators/BUILD | 14 ++++ temporian/core/operators/tick_calendar.py | 80 +++++++++++++++++++ .../implementation/numpy/operators/BUILD | 17 ++++ .../implementation/numpy/operators/test/BUILD | 17 ++++ .../operators/test/tick_calendar_test.py | 64 +++++++++++++++ .../numpy/operators/tick_calendar.py | 58 ++++++++++++++ 6 files changed, 250 insertions(+) create mode 100644 temporian/core/operators/tick_calendar.py create mode 100644 temporian/implementation/numpy/operators/test/tick_calendar_test.py create mode 100644 temporian/implementation/numpy/operators/tick_calendar.py diff --git a/temporian/core/operators/BUILD b/temporian/core/operators/BUILD index c8e30cae5..69ef61906 100644 --- a/temporian/core/operators/BUILD +++ b/temporian/core/operators/BUILD @@ -360,3 +360,17 @@ py_library( "//temporian/proto:core_py_proto", ], ) + +py_library( + name = "tick_calendar", + srcs = ["tick_calendar.py"], + srcs_version = "PY3", + deps = [ + ":base", + "//temporian/core:operator_lib", + "//temporian/core/data:node", + "//temporian/core/data:schema", + "//temporian/proto:core_py_proto", + ], +) + \ No newline at end of file diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py new file mode 100644 index 000000000..6a9466ec3 --- /dev/null +++ b/temporian/core/operators/tick_calendar.py @@ -0,0 +1,80 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""TickCalendar operator class and public API function definitions.""" +from typing import Optional + +from temporian.core import operator_lib +from temporian.core.compilation import compile +from temporian.core.data.node import ( + EventSetNode, + create_node_new_features_new_sampling, +) +from temporian.core.operators.base import Operator +from temporian.core.typing import EventSetOrNode +from temporian.proto import core_pb2 as pb +from temporian.utils.typecheck import typecheck + + +class TickCalendar(Operator): + def __init__(self, input: EventSetNode, param: float): + super().__init__() + + self.add_input("input", input) + self.add_attribute("param", param) + + self.add_output( + "output", + create_node_new_features_new_sampling( + features=[], + indexes=input.schema.indexes, + is_unix_timestamp=input.schema.is_unix_timestamp, + creator=self, + ), + ) + + self.check() + + @classmethod + def build_op_definition(cls) -> pb.OperatorDef: + return pb.OperatorDef( + key="TICK_CALENDAR", + attributes=[ + pb.OperatorDef.Attribute( + key="param", + type=pb.OperatorDef.Attribute.Type.FLOAT_64, + is_optional=False, + ), + ], + inputs=[pb.OperatorDef.Input(key="input")], + outputs=[pb.OperatorDef.Output(key="output")], + ) + + +operator_lib.register_operator(TickCalendar) + + +@typecheck +@compile +def tick_calendar( + input: EventSetOrNode, + second: Optional[int], + minute: Optional[int], + hour: Optional[int], + day_of_month: Optional[int], + month: Optional[int], + day_of_week=Optional[int], +) -> EventSetOrNode: + return TickCalendar(input=input, param=param).outputs["output"] # type: ignore diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index d66b8386c..2c8abe3e1 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -364,3 +364,20 @@ py_library( "//temporian/implementation/numpy/data:event_set", ], ) + +py_library( + name = "tick_calendar", + srcs = ["tick_calendar.py"], + srcs_version = "PY3", + deps = [ + # already_there/numpy + ":base", + "//temporian/core/data:duration_utils", + "//temporian/core/operators:tick_calendar", + "//temporian/implementation/numpy:implementation_lib", + "//temporian/implementation/numpy:utils", + "//temporian/implementation/numpy/data:event_set", + ], +) + + \ No newline at end of file diff --git a/temporian/implementation/numpy/operators/test/BUILD b/temporian/implementation/numpy/operators/test/BUILD index 101cb7353..572b19ba8 100644 --- a/temporian/implementation/numpy/operators/test/BUILD +++ b/temporian/implementation/numpy/operators/test/BUILD @@ -753,3 +753,20 @@ py_test( "//temporian/implementation/numpy/operators:fast_fourier_transform", ], ) + +py_test( + name = "tick_calendar_test", + srcs = ["tick_calendar_test.py"], + srcs_version = "PY3", + deps = [ + # already_there/absl/testing:absltest + ":test_util", + "//temporian/core/data:dtype", + "//temporian/core/data:node", + "//temporian/core/data:schema", + "//temporian/implementation/numpy/data:io", + "//temporian/core/operators:tick_calendar", + "//temporian/implementation/numpy/operators:tick_calendar", + ], +) + \ No newline at end of file diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py new file mode 100644 index 000000000..9e26dd730 --- /dev/null +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -0,0 +1,64 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from absl.testing import absltest + +import numpy as np +from temporian.core.operators.tick_calendar import TickCalendar +from temporian.implementation.numpy.data.io import event_set +from temporian.implementation.numpy.operators.tick_calendar import ( + TickCalendarNumpyImplementation, +) +from temporian.implementation.numpy.operators.test.test_util import ( + assertEqualEventSet, + testOperatorAndImp, +) + +class TickCalendarOperatorTest(absltest.TestCase): + def setUp(self): + pass + + def test_base(self): + evset = event_set( + timestamps=[1,2,3,4], + features={ + "a": [1.0, 2.0, 3.0, 4.0], + "b": [5, 6, 7, 8], + "c": ["A", "A", "B", "B"], + }, + indexes=["c"], + ) + node = evset.node() + + expected_output = event_set( + timestamps=[1, 1], + features={ + "c": ["A", "B"], + }, + indexes=["c"], + ) + + # Run op + op = TickCalendar(input=node, param=1.0) + instance = TickCalendarNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + + +if __name__ == "__main__": + absltest.main() + diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py new file mode 100644 index 000000000..8b66d2d30 --- /dev/null +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -0,0 +1,58 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Implementation for the TickCalendar operator.""" + + +from typing import Dict +import numpy as np + +from temporian.implementation.numpy.data.event_set import IndexData, EventSet +from temporian.core.operators.tick_calendar import TickCalendar +from temporian.implementation.numpy import implementation_lib +from temporian.implementation.numpy.operators.base import OperatorImplementation + +class TickCalendarNumpyImplementation(OperatorImplementation): + + def __init__(self, operator: TickCalendar) -> None: + assert isinstance(operator, TickCalendar) + super().__init__(operator) + + def __call__( + self, input: EventSet) -> Dict[str, EventSet]: + assert isinstance(self.operator, TickCalendar) + + output_schema = self.output_schema("output") + + # Create output EventSet + output_evset = EventSet(data={}, schema=output_schema) + + # Fill output EventSet's data + for index_key, index_data in input.data.items(): + output_evset.set_index_value( + index_key, + IndexData( + features=[], + timestamps=np.array([1], dtype=np.float64), + schema=output_schema, + ) + ) + + return {"output": output_evset} + + +implementation_lib.register_operator_implementation( + TickCalendar, TickCalendarNumpyImplementation +) From 59f171b8b6d8fc82ec0b84d131224244da0ac38c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 5 Sep 2023 13:17:55 -0300 Subject: [PATCH 02/27] tick_calendar arguments, initial doc & examples --- temporian/core/event_set_ops.py | 79 +++++++++++++++++++ temporian/core/operators/base.py | 2 +- temporian/core/operators/tick_calendar.py | 74 ++++++++++++++--- .../numpy/operators/tick_calendar.py | 24 ++++-- 4 files changed, 162 insertions(+), 17 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 8ade843ca..5fa6590f3 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2180,6 +2180,85 @@ def tick( return tick(self, interval=interval, align=align) + def tick_calendar( + self: EventSetOrNode, + second: Union[int, str, None] = None, + minute: Union[int, str, None] = None, + hour: Union[int, str, None] = None, + day_of_month: Union[int, str, None] = None, + month: Union[int, str, None] = None, + day_of_week: Union[int, str, None] = None, + ) -> EventSetOrNode: + """Generates timestamps at specified datetimes, in the range of a guide + [`EventSet`][temporian.EventSet]. + + The usability is inspired in the crontab format, where arguments can + take a value of `'*'` to tick at all values, or a fixed integer to + tick only at that precise value. + + Non-specified values (`None`), are set to `*` if a finer + resolution argument is specified, or fixed to the first valid value if + a lower resolution is specified. For example, setting only + `tick_calendar(hour='*')` + is equivalent to: + `tick_calendar(second=0, minute=0, hour='*', day_of_month='*', month='*')` + , resulting in one tick at every exact hour of every day/month/year in + the input guide range. + + Example: + ```python + >>> a = tp.event_set(timestamps=["2020-01-01", "2021-01-01"]) + >>> # Every day in the period + >>> b = a.tick_calendar() + >>> b + + >>> # Every day at 2:30am + >>> b = a.tick_calendar(hour=2, minute=30) + >>> b + + >>> # Day 5 of every month + >>> b = a.tick_calendar(day_of_month=5) + >>> b + + >>> a = tp.event_set(timestamps=["2020-01-01", "2023-01-01"]) + >>> # 1st of February of every year + >>> b = a.tick_calendar(month=2) + >>> b + + ``` + + Args: + second: '*' (any second), None (auto) or number in range `[0-59]` + to tick at specific second of each minute. + minute: '*' (any minute), None (auto) or number in range `[0-59]` + to tick at specific minute of each hour. + hour: '*' (any hour), None (auto), or number in range `[0-23]` to + tick at specific hour of each day. + day_of_month: '*' (any day), None (auto) or number in range `[1-31]` + to tick at specific day of each month. Note that months + without some particular day may not have any tick + (e.g: day 31 on February). + month: '*' (any month), None (auto) or number in range `[1-12]` to + tick at one particular month of each year. + day_of_week: '*' (any day), None (auto) or number in range `[0-6]` + (Sun-Sat) to tick at particular day of week. Can only be + specified if `day_of_month` is `None`. + + Returns: + A feature-less EventSet with timestamps at specified interval. + """ + from temporian.core.operators.tick_calendar import tick_calendar + + return tick_calendar( + self, + second=second, + minute=minute, + hour=hour, + day_of_month=day_of_month, + month=month, + day_of_week=day_of_week, + ) + def timestamps(self: EventSetOrNode) -> EventSetOrNode: """Converts an [`EventSet`][temporian.EventSet]'s timestamps into a `float64` feature. diff --git a/temporian/core/operators/base.py b/temporian/core/operators/base.py index 3c032ce79..25cbda34e 100644 --- a/temporian/core/operators/base.py +++ b/temporian/core/operators/base.py @@ -26,7 +26,7 @@ # Valid types for operator attributes AttributeType = Union[ - str, int, float, bool, bytes, List[str], Dict[str, str], List[DType] + None, str, int, float, bool, bytes, List[str], Dict[str, str], List[DType] ] diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 6a9466ec3..94e850c4b 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -14,7 +14,7 @@ """TickCalendar operator class and public API function definitions.""" -from typing import Optional +from typing import Union from temporian.core import operator_lib from temporian.core.compilation import compile @@ -29,11 +29,33 @@ class TickCalendar(Operator): - def __init__(self, input: EventSetNode, param: float): + def __init__( + self, + input: EventSetNode, + second: Union[int, str], + minute: Union[int, str], + hour: Union[int, str], + day_of_month: Union[int, str], + month: Union[int, str], + day_of_week: Union[int, str], + ): super().__init__() + # Attributes + self._second = second + self._minute = minute + self._hour = hour + self._day_of_month = day_of_month + self._month = month + self._day_of_week = day_of_week + self.add_attribute("second", second) + self.add_attribute("minute", minute) + self.add_attribute("hour", hour) + self.add_attribute("day_of_month", day_of_month) + self.add_attribute("month", month) + self.add_attribute("day_of_week", day_of_week) + self.add_input("input", input) - self.add_attribute("param", param) self.add_output( "output", @@ -47,10 +69,35 @@ def __init__(self, input: EventSetNode, param: float): self.check() + @property + def second(self) -> Union[int, str]: + return self._second + + @property + def minute(self) -> Union[int, str]: + return self._minute + + @property + def hour(self) -> Union[int, str]: + return self._hour + + @property + def day_of_month(self) -> Union[int, str]: + return self._day_of_month + + @property + def month(self) -> Union[int, str]: + return self._month + + @property + def day_of_week(self) -> Union[int, str]: + return self._day_of_week + @classmethod def build_op_definition(cls) -> pb.OperatorDef: return pb.OperatorDef( key="TICK_CALENDAR", + # TODO: add attributes attributes=[ pb.OperatorDef.Attribute( key="param", @@ -70,11 +117,18 @@ def build_op_definition(cls) -> pb.OperatorDef: @compile def tick_calendar( input: EventSetOrNode, - second: Optional[int], - minute: Optional[int], - hour: Optional[int], - day_of_month: Optional[int], - month: Optional[int], - day_of_week=Optional[int], + second: Union[int, str, None], + minute: Union[int, str, None], + hour: Union[int, str, None], + day_of_month: Union[int, str, None], + month: Union[int, str, None], + day_of_week: Union[int, str, None], ) -> EventSetOrNode: - return TickCalendar(input=input, param=param).outputs["output"] # type: ignore + # TODO: Logic for auto arguments (None) + assert second is not None + assert minute is not None + assert hour is not None + assert day_of_month is not None + assert month is not None + assert day_of_week is not None + return TickCalendar(input=input, second=second, minute=minute, hour=hour, day_of_month=day_of_month, month=month, day_of_week=day_of_week).outputs["output"] # type: ignore diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index 8b66d2d30..508d10028 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -15,24 +15,31 @@ """Implementation for the TickCalendar operator.""" +from datetime import datetime, timedelta +from typing import Dict, List -from typing import Dict import numpy as np from temporian.implementation.numpy.data.event_set import IndexData, EventSet from temporian.core.operators.tick_calendar import TickCalendar from temporian.implementation.numpy import implementation_lib from temporian.implementation.numpy.operators.base import OperatorImplementation +from temporian.core.data import duration -class TickCalendarNumpyImplementation(OperatorImplementation): +class TickCalendarNumpyImplementation(OperatorImplementation): def __init__(self, operator: TickCalendar) -> None: assert isinstance(operator, TickCalendar) super().__init__(operator) - def __call__( - self, input: EventSet) -> Dict[str, EventSet]: + def __call__(self, input: EventSet) -> Dict[str, EventSet]: assert isinstance(self.operator, TickCalendar) + second = self.operator.second + minute = self.operator.minute + hour = self.operator.hour + day_of_month = self.operator.day_of_month + month = self.operator.month + day_of_week = self.operator.day_of_week output_schema = self.output_schema("output") @@ -41,13 +48,18 @@ def __call__( # Fill output EventSet's data for index_key, index_data in input.data.items(): + if len(index_data.timestamps < 2): + dst_timestamps = np.array([], dtype=np.float64) + else: + begin = index_data.timestamps[0] + end = index_data.timestamps[-1] output_evset.set_index_value( index_key, IndexData( features=[], - timestamps=np.array([1], dtype=np.float64), + timestamps=dst_timestamps, schema=output_schema, - ) + ), ) return {"output": output_evset} From 8fa5bc9bb8080f2d378953ed79c18117f21f0ab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Wed, 6 Sep 2023 13:17:49 -0300 Subject: [PATCH 03/27] Changed args and attributes in tick_calendar --- temporian/core/operators/tick_calendar.py | 208 ++++++++++++++---- .../numpy/operators/tick_calendar.py | 24 +- 2 files changed, 183 insertions(+), 49 deletions(-) diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 94e850c4b..74475fd1b 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -32,28 +32,46 @@ class TickCalendar(Operator): def __init__( self, input: EventSetNode, - second: Union[int, str], - minute: Union[int, str], - hour: Union[int, str], - day_of_month: Union[int, str], - month: Union[int, str], - day_of_week: Union[int, str], + min_second: int, + min_minute: int, + min_hour: int, + min_day_of_month: int, + min_month: int, + min_day_of_week: int, + max_second: int, + max_minute: int, + max_hour: int, + max_day_of_month: int, + max_month: int, + max_day_of_week: int, ): super().__init__() # Attributes - self._second = second - self._minute = minute - self._hour = hour - self._day_of_month = day_of_month - self._month = month - self._day_of_week = day_of_week - self.add_attribute("second", second) - self.add_attribute("minute", minute) - self.add_attribute("hour", hour) - self.add_attribute("day_of_month", day_of_month) - self.add_attribute("month", month) - self.add_attribute("day_of_week", day_of_week) + self._min_second = min_second + self._max_second = max_second + self._min_minute = min_minute + self._max_minute = max_minute + self._min_hour = min_hour + self._max_hour = max_hour + self._min_day_of_month = min_day_of_month + self._max_day_of_month = max_day_of_month + self._min_month = min_month + self._max_month = max_month + self._min_day_of_week = min_day_of_week + self._max_day_of_week = max_day_of_week + self.add_attribute("min_second", min_second) + self.add_attribute("max_second", max_second) + self.add_attribute("min_minute", min_minute) + self.add_attribute("max_minute", max_minute) + self.add_attribute("min_hour", min_hour) + self.add_attribute("max_hour", max_hour) + self.add_attribute("min_day_of_month", min_day_of_month) + self.add_attribute("max_day_of_month", max_day_of_month) + self.add_attribute("min_month", min_month) + self.add_attribute("max_month", max_month) + self.add_attribute("min_day_of_week", min_day_of_week) + self.add_attribute("max_day_of_week", max_day_of_week) self.add_input("input", input) @@ -70,39 +88,105 @@ def __init__( self.check() @property - def second(self) -> Union[int, str]: - return self._second + def min_second(self) -> int: + return self._min_second @property - def minute(self) -> Union[int, str]: - return self._minute + def max_second(self) -> int: + return self._max_second @property - def hour(self) -> Union[int, str]: - return self._hour + def min_minute(self) -> int: + return self._min_minute @property - def day_of_month(self) -> Union[int, str]: - return self._day_of_month + def max_minute(self) -> int: + return self._max_minute @property - def month(self) -> Union[int, str]: - return self._month + def min_hour(self) -> int: + return self._min_hour @property - def day_of_week(self) -> Union[int, str]: - return self._day_of_week + def max_hour(self) -> int: + return self._max_hour + + @property + def min_day_of_month(self) -> int: + return self._min_day_of_month + + @property + def max_day_of_month(self) -> int: + return self._max_day_of_month + + @property + def min_month(self) -> int: + return self._min_month + + @property + def max_month(self) -> int: + return self._max_month + + @property + def min_day_of_week(self) -> int: + return self._min_day_of_week + + @property + def max_day_of_week(self) -> int: + return self._max_day_of_week @classmethod def build_op_definition(cls) -> pb.OperatorDef: return pb.OperatorDef( key="TICK_CALENDAR", - # TODO: add attributes attributes=[ pb.OperatorDef.Attribute( - key="param", - type=pb.OperatorDef.Attribute.Type.FLOAT_64, - is_optional=False, + key="min_second", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="max_second", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="min_minute", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="max_minute", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="min_hour", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="max_hour", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="min_day_of_month", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="max_day_of_month", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="min_month", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="max_month", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="min_day_of_week", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, + ), + pb.OperatorDef.Attribute( + key="max_day_of_week", + type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), ], inputs=[pb.OperatorDef.Input(key="input")], @@ -124,11 +208,51 @@ def tick_calendar( month: Union[int, str, None], day_of_week: Union[int, str, None], ) -> EventSetOrNode: - # TODO: Logic for auto arguments (None) - assert second is not None - assert minute is not None - assert hour is not None - assert day_of_month is not None - assert month is not None - assert day_of_week is not None - return TickCalendar(input=input, second=second, minute=minute, hour=hour, day_of_month=day_of_month, month=month, day_of_week=day_of_week).outputs["output"] # type: ignore + args = [second, minute, hour, day_of_month, month, day_of_week] + + # Default for empty args + if all(arg is None for arg in args): + day_of_month = "*" + month = "*" + + if second == "*": + min_second = 0 + max_second = 59 + else: + min_second = max_second = 0 if second is None else int(second) + + if minute == "*": + min_minute = 0 + max_minute = 59 + elif minute is not None: + min_minute = max_minute = int(minute) + else: # None (auto set): only if adjacent values are specified + raise ValueError() # TODO + + # TODO + min_hour = 0 + max_hour = 23 + min_day_of_month = 1 + max_day_of_month = 31 + min_month = 1 + max_month = 12 + min_day_of_week = 0 + max_day_of_week = 6 + + return TickCalendar( + input=input, + min_second=min_second, + max_second=max_second, + min_minute=min_minute, + max_minute=max_minute, + min_hour=min_hour, + max_hour=max_hour, + min_day_of_month=min_day_of_month, + max_day_of_month=max_day_of_month, + min_month=min_month, + max_month=max_month, + min_day_of_week=min_day_of_week, + max_day_of_week=max_day_of_week, + ).outputs[ + "output" + ] # type: ignore diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index 508d10028..59b2fed94 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -25,6 +25,7 @@ from temporian.implementation.numpy import implementation_lib from temporian.implementation.numpy.operators.base import OperatorImplementation from temporian.core.data import duration +from temporian.implementation.numpy_cc.operators import operators_cc class TickCalendarNumpyImplementation(OperatorImplementation): @@ -34,13 +35,6 @@ def __init__(self, operator: TickCalendar) -> None: def __call__(self, input: EventSet) -> Dict[str, EventSet]: assert isinstance(self.operator, TickCalendar) - second = self.operator.second - minute = self.operator.minute - hour = self.operator.hour - day_of_month = self.operator.day_of_month - month = self.operator.month - day_of_week = self.operator.day_of_week - output_schema = self.output_schema("output") # Create output EventSet @@ -53,6 +47,22 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: else: begin = index_data.timestamps[0] end = index_data.timestamps[-1] + dst_timestamps = operators_cc.tick_calendar( + start_timestamp=begin, + end_timestamp=end, + min_second=self.operator.min_second, + max_second=self.operator.max_second, + min_minute=self.operator.min_minute, + max_minute=self.operator.max_minute, + min_hour=self.operator.min_hour, + max_hour=self.operator.max_hour, + min_mday=self.operator.min_day_of_month, + max_mday=self.operator.max_day_of_month, + min_month=self.operator.min_month, + max_month=self.operator.max_month, + min_wday=self.operator.min_day_of_week, + max_wday=self.operator.max_day_of_week, + ) output_evset.set_index_value( index_key, IndexData( From 68a949281d4c30da5991bedea0e92870fd3d9d27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Wed, 6 Sep 2023 13:19:13 -0300 Subject: [PATCH 04/27] Initial C++ code for tick_calendar --- .../implementation/numpy/operators/BUILD | 3 +- .../operators/test/tick_calendar_test.py | 28 ++++++++--- .../numpy/operators/tick_calendar.py | 3 +- .../implementation/numpy_cc/operators/BUILD | 8 +++ .../numpy_cc/operators/pyinit.cc | 2 + .../numpy_cc/operators/tick_calendar.cc | 50 +++++++++++++++++++ .../numpy_cc/operators/tick_calendar.h | 4 ++ 7 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 temporian/implementation/numpy_cc/operators/tick_calendar.cc create mode 100644 temporian/implementation/numpy_cc/operators/tick_calendar.h diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index 2c8abe3e1..616b96cd3 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -377,7 +377,6 @@ py_library( "//temporian/implementation/numpy:implementation_lib", "//temporian/implementation/numpy:utils", "//temporian/implementation/numpy/data:event_set", + "//temporian/implementation/numpy_cc/operators:operators_cc", ], ) - - \ No newline at end of file diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 9e26dd730..949a825e9 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -26,17 +26,18 @@ testOperatorAndImp, ) + class TickCalendarOperatorTest(absltest.TestCase): def setUp(self): pass def test_base(self): evset = event_set( - timestamps=[1,2,3,4], + timestamps=[1, 2, 3, 4], features={ - "a": [1.0, 2.0, 3.0, 4.0], - "b": [5, 6, 7, 8], - "c": ["A", "A", "B", "B"], + "a": [1.0, 2.0, 3.0, 4.0], + "b": [5, 6, 7, 8], + "c": ["A", "A", "B", "B"], }, indexes=["c"], ) @@ -45,13 +46,27 @@ def test_base(self): expected_output = event_set( timestamps=[1, 1], features={ - "c": ["A", "B"], + "c": ["A", "B"], }, indexes=["c"], ) # Run op - op = TickCalendar(input=node, param=1.0) + op = TickCalendar( + input=node, + min_second=1, + max_second=10, + min_minute=1, + max_minute=1, + min_hour=1, + max_hour=1, + min_day_of_month=1, + max_day_of_month=1, + min_month=1, + max_month=1, + min_day_of_week=1, + max_day_of_week=1, + ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] @@ -61,4 +76,3 @@ def test_base(self): if __name__ == "__main__": absltest.main() - diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index 59b2fed94..fef72eae1 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -42,7 +42,7 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: # Fill output EventSet's data for index_key, index_data in input.data.items(): - if len(index_data.timestamps < 2): + if len(index_data.timestamps) < 2: dst_timestamps = np.array([], dtype=np.float64) else: begin = index_data.timestamps[0] @@ -63,6 +63,7 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: min_wday=self.operator.min_day_of_week, max_wday=self.operator.max_day_of_week, ) + print(f"Result: {dst_timestamps}") output_evset.set_index_value( index_key, IndexData( diff --git a/temporian/implementation/numpy_cc/operators/BUILD b/temporian/implementation/numpy_cc/operators/BUILD index 0b1e4bb2c..8b3205957 100644 --- a/temporian/implementation/numpy_cc/operators/BUILD +++ b/temporian/implementation/numpy_cc/operators/BUILD @@ -45,6 +45,13 @@ pybind_library( deps = [":common"], ) +pybind_library( + name = "tick_calendar", + srcs = ["tick_calendar.cc"], + hdrs = ["tick_calendar.h"], + deps = [":common"], +) + pybind_extension( name = "operators_cc", srcs = ["pyinit.cc"], @@ -53,6 +60,7 @@ pybind_extension( ":join", ":resample", ":since_last", + ":tick_calendar", ":window", ], ) diff --git a/temporian/implementation/numpy_cc/operators/pyinit.cc b/temporian/implementation/numpy_cc/operators/pyinit.cc index 62af9f931..36ccb7c58 100644 --- a/temporian/implementation/numpy_cc/operators/pyinit.cc +++ b/temporian/implementation/numpy_cc/operators/pyinit.cc @@ -5,6 +5,7 @@ #include "temporian/implementation/numpy_cc/operators/join.h" #include "temporian/implementation/numpy_cc/operators/resample.h" #include "temporian/implementation/numpy_cc/operators/since_last.h" +#include "temporian/implementation/numpy_cc/operators/tick_calendar.h" #include "temporian/implementation/numpy_cc/operators/window.h" namespace { @@ -17,4 +18,5 @@ PYBIND11_MODULE(operators_cc, m) { init_window(m); init_join(m); init_add_index(m); + init_tick_calendar(m); } diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc new file mode 100644 index 000000000..57d03ff9a --- /dev/null +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -0,0 +1,50 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "temporian/implementation/numpy_cc/operators/common.h" + +namespace { +namespace py = pybind11; + +py::array_t tick_calendar( + const double start_timestamp, const double end_timestamp, // boundaries + const int min_second, const int max_second, // second range + const int min_minute, const int max_minute, // minute range + const int min_hour, const int max_hour, // hours range + const int min_mday, const int max_mday, // month days + const int min_month, const int max_month, // month range + const int min_wday, const int max_wday // weekdays +) { + // Variable length ticks + std::vector ticks; + + int second = 0; + + while (second <= 10) { + ticks.push_back(second); + second++; + } + + // Allocate output array + // TODO: can we avoid this data copy? + py::array_t result(ticks.size()); + std::copy(ticks.begin(), ticks.end(), result.mutable_data()); + return result; +} + +} // namespace + +void init_tick_calendar(py::module &m) { + m.def("tick_calendar", &tick_calendar, "", py::arg("start_timestamp"), + py::arg("end_timestamp"), py::arg("min_second"), py::arg("max_second"), + py::arg("min_minute"), py::arg("max_minute"), py::arg("min_hour"), + py::arg("max_hour"), py::arg("min_mday"), py::arg("max_mday"), + py::arg("min_month"), py::arg("max_month"), py::arg("min_wday"), + py::arg("max_wday")); +} diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.h b/temporian/implementation/numpy_cc/operators/tick_calendar.h new file mode 100644 index 000000000..9a08c0611 --- /dev/null +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.h @@ -0,0 +1,4 @@ +#include +#include + +void init_tick_calendar(pybind11::module &m); From 2ce5d0eb8df2f669ca00517b30644daac4722c48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 7 Sep 2023 15:57:42 -0300 Subject: [PATCH 05/27] tick_calendar C++ implementation --- temporian/core/operators/tick_calendar.py | 4 + .../operators/test/tick_calendar_test.py | 28 ++++--- .../numpy/operators/tick_calendar.py | 4 +- .../numpy_cc/operators/tick_calendar.cc | 80 ++++++++++++++++--- 4 files changed, 89 insertions(+), 27 deletions(-) diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 74475fd1b..7f31868f7 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -46,6 +46,10 @@ def __init__( max_day_of_week: int, ): super().__init__() + if not input.schema.is_unix_timestamp: + raise ValueError( + "Can only use tick_calendar on unix timestamp samplings" + ) # Attributes self._min_second = min_second diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 949a825e9..3d0612413 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -13,9 +13,11 @@ # limitations under the License. +from datetime import datetime from absl.testing import absltest import numpy as np +from temporian.core.data.duration_utils import convert_timestamps_to_datetimes from temporian.core.operators.tick_calendar import TickCalendar from temporian.implementation.numpy.data.io import event_set from temporian.implementation.numpy.operators.tick_calendar import ( @@ -33,13 +35,10 @@ def setUp(self): def test_base(self): evset = event_set( - timestamps=[1, 2, 3, 4], - features={ - "a": [1.0, 2.0, 3.0, 4.0], - "b": [5, 6, 7, 8], - "c": ["A", "A", "B", "B"], - }, - indexes=["c"], + timestamps=[ + datetime(2020, 1, 1, 0, 0, 0), + datetime(2020, 4, 1, 0, 0, 0), + ], ) node = evset.node() @@ -54,8 +53,8 @@ def test_base(self): # Run op op = TickCalendar( input=node, - min_second=1, - max_second=10, + min_second=0, + max_second=5, min_minute=1, max_minute=1, min_hour=1, @@ -63,14 +62,19 @@ def test_base(self): min_day_of_month=1, max_day_of_month=1, min_month=1, - max_month=1, - min_day_of_week=1, - max_day_of_week=1, + max_month=12, + min_day_of_week=0, + max_day_of_week=6, ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] + print( + "Result:" + f" {convert_timestamps_to_datetimes(output.get_arbitrary_index_data().timestamps)}" + ) + assertEqualEventSet(self, output, expected_output) diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index fef72eae1..d9216f548 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -45,8 +45,8 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: if len(index_data.timestamps) < 2: dst_timestamps = np.array([], dtype=np.float64) else: - begin = index_data.timestamps[0] - end = index_data.timestamps[-1] + begin = int(index_data.timestamps[0]) # discard sub-second info + end = int(index_data.timestamps[-1]) dst_timestamps = operators_cc.tick_calendar( start_timestamp=begin, end_timestamp=end, diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 57d03ff9a..23e9f9da2 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -1,9 +1,10 @@ #include #include +#include #include +#include #include -#include #include #include @@ -13,23 +14,76 @@ namespace { namespace py = pybind11; py::array_t tick_calendar( - const double start_timestamp, const double end_timestamp, // boundaries - const int min_second, const int max_second, // second range - const int min_minute, const int max_minute, // minute range - const int min_hour, const int max_hour, // hours range - const int min_mday, const int max_mday, // month days - const int min_month, const int max_month, // month range - const int min_wday, const int max_wday // weekdays + const long start_timestamp, // min date + const long end_timestamp, // max date + const int min_second, const int max_second, // second range + const int min_minute, const int max_minute, // minute range + const int min_hour, const int max_hour, // hours range + const int min_mday, const int max_mday, // month days + const int min_month, const int max_month, // month range + const int min_wday, const int max_wday // weekdays ) { - // Variable length ticks + // Ticks list std::vector ticks; - int second = 0; + // Date range + std::tm start_utc = *std::gmtime(&start_timestamp); + std::tm end_utc = *std::gmtime(&end_timestamp); - while (second <= 10) { - ticks.push_back(second); - second++; + int year = start_utc.tm_year; // from 1900 + int month = std::max(start_utc.tm_mon + 1, min_month); // zero-based tm_mon + int mday = std::max(start_utc.tm_mday, min_mday); // 1-31 + int hour = std::max(start_utc.tm_hour, min_hour); + int minute = std::max(start_utc.tm_min, min_minute); + int second = std::max(start_utc.tm_sec, min_second); + + bool in_range = true; + while (in_range) { + while (month <= max_month && in_range) { + while (mday <= max_mday && in_range) { + while (hour <= max_hour && in_range) { + while (minute <= max_minute && in_range) { + while (second <= max_second && in_range) { + std::tm tm_struct = {}; + tm_struct.tm_year = year; // Since 1900 + tm_struct.tm_mon = month - 1; // zero-based + tm_struct.tm_mday = mday; + tm_struct.tm_hour = hour; + tm_struct.tm_min = minute; + tm_struct.tm_sec = second; + + // Check valid date + std::time_t time = std::mktime(&tm_struct); + if (time != -1) { + // Finish condition + if (time > end_timestamp) { + in_range = false; + } + + // Check weekday match + if (tm_struct.tm_wday >= min_wday && + tm_struct.tm_wday <= max_wday) { + ticks.push_back(time); + } + } + second++; + } + second = min_second; + minute++; + } + minute = min_minute; + hour++; + } + hour = min_hour; + mday++; + } + mday = min_mday; + month++; + } + month = min_month; + year++; } + // TODO: optimize mday += 7 on specific wdays // Allocate output array // TODO: can we avoid this data copy? From 5a004c1e97042b3059ff5ca3139d199ed67ffde2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 7 Sep 2023 20:34:07 -0300 Subject: [PATCH 06/27] Test & bugfix for tick_calendar (end of month) --- .../operators/test/tick_calendar_test.py | 30 ++++++++++++------- .../numpy_cc/operators/tick_calendar.cc | 11 +++++-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 3d0612413..8bc311b91 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -37,30 +37,36 @@ def test_base(self): evset = event_set( timestamps=[ datetime(2020, 1, 1, 0, 0, 0), - datetime(2020, 4, 1, 0, 0, 0), + datetime(2020, 6, 1, 0, 0, 0), ], ) node = evset.node() expected_output = event_set( - timestamps=[1, 1], - features={ - "c": ["A", "B"], - }, - indexes=["c"], + timestamps=[ + datetime(2020, 1, 31, 1, 1, 0), + datetime(2020, 1, 31, 1, 1, 1), + datetime(2020, 1, 31, 1, 1, 2), + datetime(2020, 3, 31, 1, 1, 0), + datetime(2020, 3, 31, 1, 1, 1), + datetime(2020, 3, 31, 1, 1, 2), + datetime(2020, 5, 31, 1, 1, 0), + datetime(2020, 5, 31, 1, 1, 1), + datetime(2020, 5, 31, 1, 1, 2), + ], ) # Run op op = TickCalendar( input=node, min_second=0, - max_second=5, + max_second=2, min_minute=1, max_minute=1, min_hour=1, max_hour=1, - min_day_of_month=1, - max_day_of_month=1, + min_day_of_month=31, + max_day_of_month=31, min_month=1, max_month=12, min_day_of_week=0, @@ -70,9 +76,11 @@ def test_base(self): testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] + print("Result:") print( - "Result:" - f" {convert_timestamps_to_datetimes(output.get_arbitrary_index_data().timestamps)}" + convert_timestamps_to_datetimes( + output.get_arbitrary_index_data().timestamps + ) ) assertEqualEventSet(self, output, expected_output) diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 23e9f9da2..998141994 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -52,12 +52,14 @@ py::array_t tick_calendar( tm_struct.tm_min = minute; tm_struct.tm_sec = second; - // Check valid date std::time_t time = std::mktime(&tm_struct); - if (time != -1) { + + // Valid date + if (time != -1 && tm_struct.tm_mday == mday) { // Finish condition if (time > end_timestamp) { in_range = false; + break; } // Check weekday match @@ -65,6 +67,11 @@ py::array_t tick_calendar( tm_struct.tm_wday <= max_wday) { ticks.push_back(time); } + } else { + // Invalid date (end of month) + second = max_second; // avoid unnecessary loops + minute = max_minute; + hour = max_hour; } second++; } From 5f88dfef1a42da493311d2c689c032080859d97c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 7 Sep 2023 21:06:33 -0300 Subject: [PATCH 07/27] Added tests for weekdays and end of year (tick calendar) --- .../operators/test/tick_calendar_test.py | 105 ++++++++++++++++-- 1 file changed, 96 insertions(+), 9 deletions(-) diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 8bc311b91..6ec3988eb 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -13,11 +13,9 @@ # limitations under the License. -from datetime import datetime +from datetime import datetime, timedelta from absl.testing import absltest -import numpy as np -from temporian.core.data.duration_utils import convert_timestamps_to_datetimes from temporian.core.operators.tick_calendar import TickCalendar from temporian.implementation.numpy.data.io import event_set from temporian.implementation.numpy.operators.tick_calendar import ( @@ -33,7 +31,9 @@ class TickCalendarOperatorTest(absltest.TestCase): def setUp(self): pass - def test_base(self): + def test_end_of_month_seconds(self): + # All seconds at mday=31, should only be valid for months 1, 3, 5 + evset = event_set( timestamps=[ datetime(2020, 1, 1, 0, 0, 0), @@ -76,12 +76,99 @@ def test_base(self): testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] - print("Result:") - print( - convert_timestamps_to_datetimes( - output.get_arbitrary_index_data().timestamps - ) + assertEqualEventSet(self, output, expected_output) + + def test_end_of_year_minutes(self): + # All hours/minutes from 30/12/2019 to 2/1/2020 + + evset = event_set( + timestamps=[ + # 4 days: 2 on 2019 + 2 on 2020 + datetime(2019, 12, 30, 0, 0, 0), + datetime(2020, 1, 2, 23, 59, 59), # 2/1 at 23:59:59 + ], + ) + node = evset.node() + + # Expected timestamps: all hours/minutes in 4 days + timestamps = [] + for day, month, year in [ + (30, 12, 2019), + (31, 12, 2019), + (1, 1, 2020), + (2, 1, 2020), + ]: + for hour in range(24): + for minute in range(60): + timestamps += [datetime(year, month, day, hour, minute, 0)] + expected_output = event_set( + timestamps=timestamps, + ) + + # Run op + op = TickCalendar( + input=node, + min_second=0, + max_second=0, + min_minute=0, + max_minute=59, + min_hour=0, + max_hour=23, + min_day_of_month=1, # any day + max_day_of_month=31, + min_month=1, # any month + max_month=12, + min_day_of_week=0, + max_day_of_week=6, ) + instance = TickCalendarNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + + def test_weekdays(self): + # All exact hours from all saturdays in 2023 + + evset = event_set( + timestamps=[ + datetime(2023, 1, 1), + datetime(2023, 12, 31, 23, 0, 0), + ], + ) + node = evset.node() + + # Expected timestamps: all hours/minutes in 4 days + timestamps = [] + day = datetime(2023, 1, 7) # First saturday + one_week = timedelta(days=7) + while day.year < 2024: + for hour in range(24): + timestamps += [day + timedelta(hours=hour)] + day += one_week + expected_output = event_set( + timestamps=timestamps, + ) + + # Run op + op = TickCalendar( + input=node, + min_second=0, + max_second=0, + min_minute=0, + max_minute=0, + min_hour=0, + max_hour=23, # all hours + min_day_of_month=1, # any month day + max_day_of_month=31, + min_month=1, # any month + max_month=12, + min_day_of_week=6, # saturday + max_day_of_week=6, # saturday + ) + instance = TickCalendarNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] assertEqualEventSet(self, output, expected_output) From 6eb1ea316128c65dd79f10bb21163f625ad0e0d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 8 Sep 2023 12:28:28 -0300 Subject: [PATCH 08/27] Auto setup None args in tick_calendar --- temporian/core/event_set_ops.py | 4 +- temporian/core/operators/tick_calendar.py | 137 +++++++++++------- .../operators/test/tick_calendar_test.py | 24 +-- .../numpy/operators/tick_calendar.py | 8 +- 4 files changed, 99 insertions(+), 74 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 5fa6590f3..7c3fa1f1f 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2254,9 +2254,9 @@ def tick_calendar( second=second, minute=minute, hour=hour, - day_of_month=day_of_month, + mday=day_of_month, month=month, - day_of_week=day_of_week, + wday=day_of_week, ) def timestamps(self: EventSetOrNode) -> EventSetOrNode: diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 7f31868f7..7da8a4305 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -35,15 +35,15 @@ def __init__( min_second: int, min_minute: int, min_hour: int, - min_day_of_month: int, + min_mday: int, min_month: int, - min_day_of_week: int, + min_wday: int, max_second: int, max_minute: int, max_hour: int, - max_day_of_month: int, + max_mday: int, max_month: int, - max_day_of_week: int, + max_wday: int, ): super().__init__() if not input.schema.is_unix_timestamp: @@ -58,24 +58,24 @@ def __init__( self._max_minute = max_minute self._min_hour = min_hour self._max_hour = max_hour - self._min_day_of_month = min_day_of_month - self._max_day_of_month = max_day_of_month + self._min_mday = min_mday + self._max_mday = max_mday self._min_month = min_month self._max_month = max_month - self._min_day_of_week = min_day_of_week - self._max_day_of_week = max_day_of_week + self._min_wday = min_wday + self._max_wday = max_wday self.add_attribute("min_second", min_second) self.add_attribute("max_second", max_second) self.add_attribute("min_minute", min_minute) self.add_attribute("max_minute", max_minute) self.add_attribute("min_hour", min_hour) self.add_attribute("max_hour", max_hour) - self.add_attribute("min_day_of_month", min_day_of_month) - self.add_attribute("max_day_of_month", max_day_of_month) + self.add_attribute("min_mday", min_mday) + self.add_attribute("max_mday", max_mday) self.add_attribute("min_month", min_month) self.add_attribute("max_month", max_month) - self.add_attribute("min_day_of_week", min_day_of_week) - self.add_attribute("max_day_of_week", max_day_of_week) + self.add_attribute("min_wday", min_wday) + self.add_attribute("max_wday", max_wday) self.add_input("input", input) @@ -116,12 +116,12 @@ def max_hour(self) -> int: return self._max_hour @property - def min_day_of_month(self) -> int: - return self._min_day_of_month + def min_mday(self) -> int: + return self._min_mday @property - def max_day_of_month(self) -> int: - return self._max_day_of_month + def max_mday(self) -> int: + return self._max_mday @property def min_month(self) -> int: @@ -132,12 +132,12 @@ def max_month(self) -> int: return self._max_month @property - def min_day_of_week(self) -> int: - return self._min_day_of_week + def min_wday(self) -> int: + return self._min_wday @property - def max_day_of_week(self) -> int: - return self._max_day_of_week + def max_wday(self) -> int: + return self._max_wday @classmethod def build_op_definition(cls) -> pb.OperatorDef: @@ -169,11 +169,11 @@ def build_op_definition(cls) -> pb.OperatorDef: type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), pb.OperatorDef.Attribute( - key="min_day_of_month", + key="min_mday", type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), pb.OperatorDef.Attribute( - key="max_day_of_month", + key="max_mday", type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), pb.OperatorDef.Attribute( @@ -185,11 +185,11 @@ def build_op_definition(cls) -> pb.OperatorDef: type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), pb.OperatorDef.Attribute( - key="min_day_of_week", + key="min_wday", type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), pb.OperatorDef.Attribute( - key="max_day_of_week", + key="max_wday", type=pb.OperatorDef.Attribute.Type.INTEGER_64, ), ], @@ -201,6 +201,20 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(TickCalendar) +def set_arg_range(arg_value, val_range, prefer_free): + if arg_value == "*": + range_ini, range_end = val_range + elif arg_value is not None: + range_ini = range_end = int(arg_value) + else: # None (auto setup) + if prefer_free: # Don't restrict the range + range_ini, range_end = val_range + else: # Fix to first value + range_ini = range_end = val_range[0] + + return range_ini, range_end + + @typecheck @compile def tick_calendar( @@ -208,40 +222,51 @@ def tick_calendar( second: Union[int, str, None], minute: Union[int, str, None], hour: Union[int, str, None], - day_of_month: Union[int, str, None], + mday: Union[int, str, None], month: Union[int, str, None], - day_of_week: Union[int, str, None], + wday: Union[int, str, None], ) -> EventSetOrNode: - args = [second, minute, hour, day_of_month, month, day_of_week] - # Default for empty args - if all(arg is None for arg in args): - day_of_month = "*" + if all(arg is None for arg in (second, minute, hour, mday, month, wday)): + mday = "*" month = "*" - if second == "*": - min_second = 0 - max_second = 59 + # All defined values must be consecutive (no gaps with None) + if wday is not None: + sorted_args = [second, minute, hour, wday] else: - min_second = max_second = 0 if second is None else int(second) - - if minute == "*": - min_minute = 0 - max_minute = 59 - elif minute is not None: - min_minute = max_minute = int(minute) - else: # None (auto set): only if adjacent values are specified - raise ValueError() # TODO - - # TODO - min_hour = 0 - max_hour = 23 - min_day_of_month = 1 - max_day_of_month = 31 - min_month = 1 - max_month = 12 - min_day_of_week = 0 - max_day_of_week = 6 + sorted_args = [second, minute, hour, mday, month] + for idx, arg in enumerate(sorted_args): + if ( + arg is None + and any(a is not None for a in sorted_args[:idx]) + and any(a is not None for a in sorted_args[idx + 1 :]) + ): + raise ValueError( + "Can't set argument to None because previous and" + " following arguments were specified. Set to '*' or an" + " integer value instead" + ) + + prefer_free = False + min_second, max_second = set_arg_range(second, (0, 59), prefer_free) + + # prefer_free becomes True when next None args should be set to '*' + # e.g: only hour=1 -> second=0,minute=0, mday='*', month='*' + prefer_free = second is not None + min_minute, max_minute = set_arg_range(minute, (0, 59), prefer_free) + + prefer_free = prefer_free or minute is not None + min_hour, max_hour = set_arg_range(hour, (0, 23), prefer_free) + + prefer_free = prefer_free or hour is not None + min_mday, max_mday = set_arg_range(mday, (1, 31), prefer_free) + + prefer_free = prefer_free or mday is not None + min_month, max_month = set_arg_range(month, (1, 12), prefer_free) + + prefer_free = True # Always free wday by default + min_wday, max_wday = set_arg_range(wday, (0, 6), True) return TickCalendar( input=input, @@ -251,12 +276,12 @@ def tick_calendar( max_minute=max_minute, min_hour=min_hour, max_hour=max_hour, - min_day_of_month=min_day_of_month, - max_day_of_month=max_day_of_month, + min_mday=min_mday, + max_mday=max_mday, min_month=min_month, max_month=max_month, - min_day_of_week=min_day_of_week, - max_day_of_week=max_day_of_week, + min_wday=min_wday, + max_wday=max_wday, ).outputs[ "output" ] # type: ignore diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 6ec3988eb..b5ed26c5d 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -65,12 +65,12 @@ def test_end_of_month_seconds(self): max_minute=1, min_hour=1, max_hour=1, - min_day_of_month=31, - max_day_of_month=31, + min_mday=31, + max_mday=31, min_month=1, max_month=12, - min_day_of_week=0, - max_day_of_week=6, + min_wday=0, + max_wday=6, ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) @@ -114,12 +114,12 @@ def test_end_of_year_minutes(self): max_minute=59, min_hour=0, max_hour=23, - min_day_of_month=1, # any day - max_day_of_month=31, + min_mday=1, # any day + max_mday=31, min_month=1, # any month max_month=12, - min_day_of_week=0, - max_day_of_week=6, + min_wday=0, + max_wday=6, ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) @@ -159,12 +159,12 @@ def test_weekdays(self): max_minute=0, min_hour=0, max_hour=23, # all hours - min_day_of_month=1, # any month day - max_day_of_month=31, + min_mday=1, # any month day + max_mday=31, min_month=1, # any month max_month=12, - min_day_of_week=6, # saturday - max_day_of_week=6, # saturday + min_wday=6, # saturday + max_wday=6, # saturday ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index d9216f548..6729595bc 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -56,12 +56,12 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: max_minute=self.operator.max_minute, min_hour=self.operator.min_hour, max_hour=self.operator.max_hour, - min_mday=self.operator.min_day_of_month, - max_mday=self.operator.max_day_of_month, + min_mday=self.operator.min_mday, + max_mday=self.operator.max_mday, min_month=self.operator.min_month, max_month=self.operator.max_month, - min_wday=self.operator.min_day_of_week, - max_wday=self.operator.max_day_of_week, + min_wday=self.operator.min_wday, + max_wday=self.operator.max_wday, ) print(f"Result: {dst_timestamps}") output_evset.set_index_value( From 83c33cc15ac75c7e84e1de5303f6827b3c5714a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 8 Sep 2023 13:25:46 -0300 Subject: [PATCH 09/27] Many docstring examples for tick_calendar --- temporian/core/event_set_ops.py | 79 ++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 7c3fa1f1f..996034f3a 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2185,9 +2185,9 @@ def tick_calendar( second: Union[int, str, None] = None, minute: Union[int, str, None] = None, hour: Union[int, str, None] = None, - day_of_month: Union[int, str, None] = None, + mday: Union[int, str, None] = None, month: Union[int, str, None] = None, - day_of_week: Union[int, str, None] = None, + wday: Union[int, str, None] = None, ) -> EventSetOrNode: """Generates timestamps at specified datetimes, in the range of a guide [`EventSet`][temporian.EventSet]. @@ -2205,25 +2205,82 @@ def tick_calendar( , resulting in one tick at every exact hour of every day/month/year in the input guide range. - Example: + Examples: ```python - >>> a = tp.event_set(timestamps=["2020-01-01", "2021-01-01"]) - >>> # Every day in the period + >>> # Every day in the period (exactly one year) + >>> a = tp.event_set(timestamps=["2021-01-01", "2021-12-31 23:59:59"]) >>> b = a.tick_calendar() >>> b + indexes: ... + events: + (365 events): + timestamps: [...] + ... + >>> # Every day at 2:30am >>> b = a.tick_calendar(hour=2, minute=30) - >>> b + >>> tp.glue(b.calendar_hour(), b.calendar_minute()) + indexes: ... + events: + (365 events): + timestamps: [...] + 'calendar_hour': [2 2 2 ... 2 2 2] + 'calendar_minute': [30 30 30 ... 30 30 30] + ... + >>> # Day 5 of every month - >>> b = a.tick_calendar(day_of_month=5) - >>> b + >>> b = a.tick_calendar(mday=5) + >>> b.calendar_day_of_month() + indexes: ... + events: + (12 events): + timestamps: [...] + 'calendar_day_of_month': [5 5 5 ... 5 5 5] + ... + - >>> a = tp.event_set(timestamps=["2020-01-01", "2023-01-01"]) >>> # 1st of February of every year + >>> a = tp.event_set(timestamps=["2020-01-01", "2021-12-31"]) >>> b = a.tick_calendar(month=2) + >>> tp.glue(b.calendar_day_of_month(), b.calendar_month()) + indexes: ... + events: + (2 events): + timestamps: [...] + 'calendar_day_of_month': [1 1] + 'calendar_month': [2 2] + ... + + >>> # Every second in the period (2 hours -> 7200 seconds) + >>> a = tp.event_set(timestamps=["2020-01-01 00:00:00", + ... "2020-01-01 01:59:59"]) + >>> b = a.tick_calendar(second='*') + >>> b + indexes: ... + events: + (7200 events): + timestamps: [...] + ... + + >>> # Every second of the minute 30 of every hour (00:30 and 01:30) + >>> a = tp.event_set(timestamps=["2020-01-01 00:00", + ... "2020-01-01 02:00"]) + >>> b = a.tick_calendar(second='*', minute=30) >>> b + indexes: ... + events: + (120 events): + timestamps: [...] + ... + + >>> # Not allowed: intermediate arguments (minute, hour) not specified + >>> b = a.tick_calendar(second='1', mday=1) # ambiguous meaning + Traceback (most recent call last): + ... + ValueError: Can't set argument to None because previous and + following arguments were specified. Set to '*' or an integer ... ``` @@ -2254,9 +2311,9 @@ def tick_calendar( second=second, minute=minute, hour=hour, - mday=day_of_month, + mday=mday, month=month, - wday=day_of_week, + wday=wday, ) def timestamps(self: EventSetOrNode) -> EventSetOrNode: From 24855eeb71be86af68662e53c4547458a81a982a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 8 Sep 2023 13:26:24 -0300 Subject: [PATCH 10/27] Fixed tests, added .md files --- docs/src/reference/index.md | 1 + .../temporian/operators/tick_calendar.md | 1 + temporian/core/operators/tick_calendar.py | 16 ++++++++-------- temporian/core/test/registered_operators_test.py | 1 + temporian/implementation/numpy/operators/BUILD | 1 + .../implementation/numpy/operators/__init__.py | 1 + .../numpy/operators/tick_calendar.py | 1 - .../numpy/test/registered_operators_test.py | 1 + 8 files changed, 14 insertions(+), 9 deletions(-) create mode 100644 docs/src/reference/temporian/operators/tick_calendar.md diff --git a/docs/src/reference/index.md b/docs/src/reference/index.md index 188b96918..e6424dbc9 100644 --- a/docs/src/reference/index.md +++ b/docs/src/reference/index.md @@ -61,6 +61,7 @@ Check the index on the left for a more detailed description of any symbol. | [`EventSet.set_index()`][temporian.EventSet.set_index] | Replaces the indexes in an [`EventSet`][temporian.EventSet]. | | [`EventSet.since_last()`][temporian.EventSet.since_last] | Computes the amount of time since the last distinct timestamp. | | [`EventSet.tick()`][temporian.EventSet.tick] | Generates timestamps at regular intervals in the range of a guide. | +| [`EventSet.tick_calendar()`][temporian.EventSet.tick] | Generates timestamps at the specified calendar date-time events. | | [`EventSet.timestamps()`][temporian.EventSet.timestamps] | Creates a feature from the events timestamps (`float64`). | | [`EventSet.unique_timestamps()`][temporian.EventSet.unique_timestamps] | Removes events with duplicated timestamps from an [`EventSet`][temporian.EventSet]. | diff --git a/docs/src/reference/temporian/operators/tick_calendar.md b/docs/src/reference/temporian/operators/tick_calendar.md new file mode 100644 index 000000000..598fe2f33 --- /dev/null +++ b/docs/src/reference/temporian/operators/tick_calendar.md @@ -0,0 +1 @@ +::: temporian.EventSet.tick_calendar diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 7da8a4305..e45899c61 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -248,11 +248,11 @@ def tick_calendar( " integer value instead" ) - prefer_free = False - min_second, max_second = set_arg_range(second, (0, 59), prefer_free) + # Always set second=0 by default + min_second, max_second = set_arg_range(second, (0, 59), prefer_free=False) - # prefer_free becomes True when next None args should be set to '*' - # e.g: only hour=1 -> second=0,minute=0, mday='*', month='*' + # prefer_free becomes True when next args should be set to '*' by default + # e.g: user sets only hour=1 -> second=0,minute=0, mday='*', month='*' prefer_free = second is not None min_minute, max_minute = set_arg_range(minute, (0, 59), prefer_free) @@ -262,11 +262,11 @@ def tick_calendar( prefer_free = prefer_free or hour is not None min_mday, max_mday = set_arg_range(mday, (1, 31), prefer_free) - prefer_free = prefer_free or mday is not None - min_month, max_month = set_arg_range(month, (1, 12), prefer_free) + # Always free month range by default + min_month, max_month = set_arg_range(month, (1, 12), prefer_free=True) - prefer_free = True # Always free wday by default - min_wday, max_wday = set_arg_range(wday, (0, 6), True) + # Always free wday range by default + min_wday, max_wday = set_arg_range(wday, (0, 6), prefer_free=True) return TickCalendar( input=input, diff --git a/temporian/core/test/registered_operators_test.py b/temporian/core/test/registered_operators_test.py index a27876b75..f06c116f0 100644 --- a/temporian/core/test/registered_operators_test.py +++ b/temporian/core/test/registered_operators_test.py @@ -93,6 +93,7 @@ def test_base(self): "SUBTRACTION", "SUBTRACTION_SCALAR", "TICK", + "TICK_CALENDAR", "TIMESTAMPS", "UNIQUE_TIMESTAMPS", "XOR", diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index 616b96cd3..e8bff38a8 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -31,6 +31,7 @@ py_library( ":select", ":since_last", ":tick", + ":tick_calendar", ":timestamps", ":unary", ":unique_timestamps", diff --git a/temporian/implementation/numpy/operators/__init__.py b/temporian/implementation/numpy/operators/__init__.py index 79d48c5da..dfd1486c0 100644 --- a/temporian/implementation/numpy/operators/__init__.py +++ b/temporian/implementation/numpy/operators/__init__.py @@ -59,6 +59,7 @@ from temporian.implementation.numpy.operators import begin from temporian.implementation.numpy.operators import end from temporian.implementation.numpy.operators import tick +from temporian.implementation.numpy.operators import tick_calendar from temporian.implementation.numpy.operators import timestamps from temporian.implementation.numpy.operators import enumerate from temporian.implementation.numpy.operators import fast_fourier_transform diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index 6729595bc..4b0806417 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -63,7 +63,6 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: min_wday=self.operator.min_wday, max_wday=self.operator.max_wday, ) - print(f"Result: {dst_timestamps}") output_evset.set_index_value( index_key, IndexData( diff --git a/temporian/implementation/numpy/test/registered_operators_test.py b/temporian/implementation/numpy/test/registered_operators_test.py index c627b0eda..5680d509a 100644 --- a/temporian/implementation/numpy/test/registered_operators_test.py +++ b/temporian/implementation/numpy/test/registered_operators_test.py @@ -91,6 +91,7 @@ def test_base(self): "SUBTRACTION", "SUBTRACTION_SCALAR", "TICK", + "TICK_CALENDAR", "TIMESTAMPS", "UNIQUE_TIMESTAMPS", "XOR", From b390263508efe5eb167067fe7439a261aa31e444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Mon, 11 Sep 2023 15:39:42 -0300 Subject: [PATCH 11/27] Addressed some comments after PR --- temporian/core/event_set_ops.py | 30 +++++++------ temporian/core/operators/base.py | 2 +- temporian/core/operators/tick_calendar.py | 45 ++++++++++++------- .../operators/test/tick_calendar_test.py | 3 -- .../numpy/operators/tick_calendar.py | 8 ++-- .../numpy_cc/operators/tick_calendar.cc | 14 +++--- 6 files changed, 59 insertions(+), 43 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 996034f3a..c938e7184 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -16,7 +16,7 @@ from __future__ import annotations -from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING +from typing import Any, Dict, List, Literal, Optional, Union, TYPE_CHECKING from temporian.core.data.duration import Duration @@ -2182,21 +2182,23 @@ def tick( def tick_calendar( self: EventSetOrNode, - second: Union[int, str, None] = None, - minute: Union[int, str, None] = None, - hour: Union[int, str, None] = None, - mday: Union[int, str, None] = None, - month: Union[int, str, None] = None, - wday: Union[int, str, None] = None, + second: Union[int, Literal["*"], None], + minute: Union[int, Literal["*"], None], + hour: Union[int, Literal["*"], None], + mday: Union[int, Literal["*"], None], + month: Union[int, Literal["*"], None], + wday: Union[int, Literal["*"], None], ) -> EventSetOrNode: - """Generates timestamps at specified datetimes, in the range of a guide - [`EventSet`][temporian.EventSet]. + """Generates events periodically at fixed times or dates e.g. each month. + + Events are generated in the range of the input + [`EventSet`][temporian.EventSet] independently for each index. The usability is inspired in the crontab format, where arguments can take a value of `'*'` to tick at all values, or a fixed integer to tick only at that precise value. - Non-specified values (`None`), are set to `*` if a finer + Non-specified values (`None`), are set to `'*'` if a finer resolution argument is specified, or fixed to the first valid value if a lower resolution is specified. For example, setting only `tick_calendar(hour='*')` @@ -2205,11 +2207,13 @@ def tick_calendar( , resulting in one tick at every exact hour of every day/month/year in the input guide range. + The datetime timezone is always assumed to be UTC. + Examples: ```python - >>> # Every day in the period (exactly one year) + >>> # Every day (at 00:00:00) in the period (exactly one year) >>> a = tp.event_set(timestamps=["2021-01-01", "2021-12-31 23:59:59"]) - >>> b = a.tick_calendar() + >>> b = a.tick_calendar(hour=0) >>> b indexes: ... events: @@ -2230,7 +2234,7 @@ def tick_calendar( ... - >>> # Day 5 of every month + >>> # Day 5 of every month (at 00:00) >>> b = a.tick_calendar(mday=5) >>> b.calendar_day_of_month() indexes: ... diff --git a/temporian/core/operators/base.py b/temporian/core/operators/base.py index 25cbda34e..3c032ce79 100644 --- a/temporian/core/operators/base.py +++ b/temporian/core/operators/base.py @@ -26,7 +26,7 @@ # Valid types for operator attributes AttributeType = Union[ - None, str, int, float, bool, bytes, List[str], Dict[str, str], List[DType] + str, int, float, bool, bytes, List[str], Dict[str, str], List[DType] ] diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index e45899c61..0c3e888b1 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -14,7 +14,9 @@ """TickCalendar operator class and public API function definitions.""" -from typing import Union +from typing import Union, Literal, Tuple + +import numpy as np from temporian.core import operator_lib from temporian.core.compilation import compile @@ -84,7 +86,7 @@ def __init__( create_node_new_features_new_sampling( features=[], indexes=input.schema.indexes, - is_unix_timestamp=input.schema.is_unix_timestamp, + is_unix_timestamp=True, creator=self, ), ) @@ -201,11 +203,25 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(TickCalendar) -def set_arg_range(arg_value, val_range, prefer_free): +def set_arg_range( + arg_value: Union[int, Literal["*"], None], + val_range: Tuple[int, int], + prefer_free: bool, +): if arg_value == "*": range_ini, range_end = val_range elif arg_value is not None: - range_ini = range_end = int(arg_value) + if ( + not isinstance(arg_value, (int, np.integer)) + or arg_value < val_range[0] + or arg_value > val_range[1] + ): + raise ValueError( + f"Value should be '*' or integer in range {val_range}, got:" + f" {arg_value} (type {type(arg_value)})" + ) + + range_ini = range_end = arg_value else: # None (auto setup) if prefer_free: # Don't restrict the range range_ini, range_end = val_range @@ -219,17 +235,16 @@ def set_arg_range(arg_value, val_range, prefer_free): @compile def tick_calendar( input: EventSetOrNode, - second: Union[int, str, None], - minute: Union[int, str, None], - hour: Union[int, str, None], - mday: Union[int, str, None], - month: Union[int, str, None], - wday: Union[int, str, None], + second: Union[int, Literal["*"], None], + minute: Union[int, Literal["*"], None], + hour: Union[int, Literal["*"], None], + mday: Union[int, Literal["*"], None], + month: Union[int, Literal["*"], None], + wday: Union[int, Literal["*"], None], ) -> EventSetOrNode: - # Default for empty args + # Don't allow empty args if all(arg is None for arg in (second, minute, hour, mday, month, wday)): - mday = "*" - month = "*" + raise ValueError("At least one argument must be provided (not None).") # All defined values must be consecutive (no gaps with None) if wday is not None: @@ -256,10 +271,10 @@ def tick_calendar( prefer_free = second is not None min_minute, max_minute = set_arg_range(minute, (0, 59), prefer_free) - prefer_free = prefer_free or minute is not None + prefer_free |= minute is not None min_hour, max_hour = set_arg_range(hour, (0, 23), prefer_free) - prefer_free = prefer_free or hour is not None + prefer_free |= hour is not None min_mday, max_mday = set_arg_range(mday, (1, 31), prefer_free) # Always free month range by default diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index b5ed26c5d..1eed7274f 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -28,9 +28,6 @@ class TickCalendarOperatorTest(absltest.TestCase): - def setUp(self): - pass - def test_end_of_month_seconds(self): # All seconds at mday=31, should only be valid for months 1, 3, 5 diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index 4b0806417..a78934063 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -42,14 +42,12 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: # Fill output EventSet's data for index_key, index_data in input.data.items(): - if len(index_data.timestamps) < 2: + if len(index_data.timestamps) == 0: dst_timestamps = np.array([], dtype=np.float64) else: - begin = int(index_data.timestamps[0]) # discard sub-second info - end = int(index_data.timestamps[-1]) dst_timestamps = operators_cc.tick_calendar( - start_timestamp=begin, - end_timestamp=end, + start_timestamp=index_data.timestamps[0], + end_timestamp=index_data.timestamps[-1], min_second=self.operator.min_second, max_second=self.operator.max_second, min_minute=self.operator.min_minute, diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 998141994..0ed331409 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -14,8 +15,8 @@ namespace { namespace py = pybind11; py::array_t tick_calendar( - const long start_timestamp, // min date - const long end_timestamp, // max date + const double start_timestamp, // min date + const double end_timestamp, // max date const int min_second, const int max_second, // second range const int min_minute, const int max_minute, // minute range const int min_hour, const int max_hour, // hours range @@ -27,9 +28,10 @@ py::array_t tick_calendar( std::vector ticks; // Date range - std::tm start_utc = *std::gmtime(&start_timestamp); - std::tm end_utc = *std::gmtime(&end_timestamp); + const long start_t = (long)std::floor(start_timestamp); + const long end_t = (long)std::floor(end_timestamp); + std::tm start_utc = *std::gmtime(&start_t); int year = start_utc.tm_year; // from 1900 int month = std::max(start_utc.tm_mon + 1, min_month); // zero-based tm_mon int mday = std::max(start_utc.tm_mday, min_mday); // 1-31 @@ -57,7 +59,7 @@ py::array_t tick_calendar( // Valid date if (time != -1 && tm_struct.tm_mday == mday) { // Finish condition - if (time > end_timestamp) { + if (time > end_t) { in_range = false; break; } @@ -68,7 +70,7 @@ py::array_t tick_calendar( ticks.push_back(time); } } else { - // Invalid date (end of month) + // Invalid date (e.g: 31/4) second = max_second; // avoid unnecessary loops minute = max_minute; hour = max_hour; From 8b9ba7c72153ca815a087e08aadd1e1656688208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 15:04:41 -0300 Subject: [PATCH 12/27] Handle Literal types in typecheck --- temporian/utils/typecheck.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/temporian/utils/typecheck.py b/temporian/utils/typecheck.py index da5659738..116049229 100644 --- a/temporian/utils/typecheck.py +++ b/temporian/utils/typecheck.py @@ -6,7 +6,7 @@ import logging -from typing import List, Set, Dict, Optional, Union, Tuple, Any +from typing import List, Set, Dict, Optional, Union, Tuple, Any, Literal import inspect import typing @@ -119,6 +119,13 @@ def _check_annotation(trace: _Trace, is_compiled: bool, value, annotation): origin = typing.get_origin(annotation) assert origin is not None + # Literal values check (e.g: Literal['*']) + if origin is Literal: + # Check param value in the allowed literal values + if value not in typing.get_args(annotation): + trace.exception(_base_error(value, annotation)) + return + if origin is not Union: if not isinstance(value, origin): # The origin (e.g. "list" in "List[int]") is wrong. From f418eaea0f775fefe95b0a40a8e69f8363489350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 16:03:54 -0300 Subject: [PATCH 13/27] tick_calendar args serialized (type ANY), get ranges in implementation --- temporian/core/event_set_ops.py | 12 +- temporian/core/operators/tick_calendar.py | 271 +++++++----------- .../numpy/operators/tick_calendar.py | 59 +++- 3 files changed, 161 insertions(+), 181 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index c938e7184..157316523 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2182,12 +2182,12 @@ def tick( def tick_calendar( self: EventSetOrNode, - second: Union[int, Literal["*"], None], - minute: Union[int, Literal["*"], None], - hour: Union[int, Literal["*"], None], - mday: Union[int, Literal["*"], None], - month: Union[int, Literal["*"], None], - wday: Union[int, Literal["*"], None], + second: Union[int, Literal["*"], None] = None, + minute: Union[int, Literal["*"], None] = None, + hour: Union[int, Literal["*"], None] = None, + mday: Union[int, Literal["*"], None] = None, + month: Union[int, Literal["*"], None] = None, + wday: Union[int, Literal["*"], None] = None, ) -> EventSetOrNode: """Generates events periodically at fixed times or dates e.g. each month. diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 0c3e888b1..b38eb04b1 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -29,23 +29,19 @@ from temporian.proto import core_pb2 as pb from temporian.utils.typecheck import typecheck +TypeWildCard = Literal["*"] + class TickCalendar(Operator): def __init__( self, input: EventSetNode, - min_second: int, - min_minute: int, - min_hour: int, - min_mday: int, - min_month: int, - min_wday: int, - max_second: int, - max_minute: int, - max_hour: int, - max_mday: int, - max_month: int, - max_wday: int, + second: Union[int, TypeWildCard], + minute: Union[int, TypeWildCard], + hour: Union[int, TypeWildCard], + mday: Union[int, TypeWildCard], + month: Union[int, TypeWildCard], + wday: Union[int, TypeWildCard], ): super().__init__() if not input.schema.is_unix_timestamp: @@ -54,30 +50,18 @@ def __init__( ) # Attributes - self._min_second = min_second - self._max_second = max_second - self._min_minute = min_minute - self._max_minute = max_minute - self._min_hour = min_hour - self._max_hour = max_hour - self._min_mday = min_mday - self._max_mday = max_mday - self._min_month = min_month - self._max_month = max_month - self._min_wday = min_wday - self._max_wday = max_wday - self.add_attribute("min_second", min_second) - self.add_attribute("max_second", max_second) - self.add_attribute("min_minute", min_minute) - self.add_attribute("max_minute", max_minute) - self.add_attribute("min_hour", min_hour) - self.add_attribute("max_hour", max_hour) - self.add_attribute("min_mday", min_mday) - self.add_attribute("max_mday", max_mday) - self.add_attribute("min_month", min_month) - self.add_attribute("max_month", max_month) - self.add_attribute("min_wday", min_wday) - self.add_attribute("max_wday", max_wday) + self._second = self._check_arg(second, self.seconds_max_range()) + self._minute = self._check_arg(minute, self.minutes_max_range()) + self._hour = self._check_arg(hour, self.hours_max_range()) + self._mday = self._check_arg(mday, self.mday_max_range()) + self._month = self._check_arg(month, self.month_max_range()) + self._wday = self._check_arg(wday, self.wday_max_range()) + self.add_attribute("second", second) + self.add_attribute("minute", minute) + self.add_attribute("hour", hour) + self.add_attribute("mday", mday) + self.add_attribute("month", month) + self.add_attribute("wday", wday) self.add_input("input", input) @@ -93,53 +77,77 @@ def __init__( self.check() - @property - def min_second(self) -> int: - return self._min_second + def _check_arg(self, arg_value, val_range): + if arg_value == "*" or ( + isinstance(arg_value, (int, np.integer)) + and arg_value >= val_range[0] + and arg_value <= val_range[1] + ): + return arg_value + raise ValueError( + f"Value should be '*' or integer in range {val_range}, got:" + f" {arg_value} (type {type(arg_value)})" + ) @property - def max_second(self) -> int: - return self._max_second + def second(self) -> Union[int, TypeWildCard]: + # assert for typecheck + assert self._second == "*" or not isinstance(self._second, str) + return self._second @property - def min_minute(self) -> int: - return self._min_minute + def minute(self) -> Union[int, TypeWildCard]: + # assert for typecheck + assert self._minute == "*" or not isinstance(self._minute, str) + return self._minute @property - def max_minute(self) -> int: - return self._max_minute + def hour(self) -> Union[int, TypeWildCard]: + # assert for typecheck + assert self._hour == "*" or not isinstance(self._hour, str) + return self._hour @property - def min_hour(self) -> int: - return self._min_hour + def mday(self) -> Union[int, TypeWildCard]: + # assert for typecheck + assert self._mday == "*" or not isinstance(self._mday, str) + return self._mday @property - def max_hour(self) -> int: - return self._max_hour + def month(self) -> Union[int, TypeWildCard]: + # assert for typecheck + assert self._month == "*" or not isinstance(self._month, str) + return self._month @property - def min_mday(self) -> int: - return self._min_mday + def wday(self) -> Union[int, TypeWildCard]: + # assert for typecheck + assert self._wday == "*" or not isinstance(self._wday, str) + return self._wday - @property - def max_mday(self) -> int: - return self._max_mday + @classmethod + def seconds_max_range(cls) -> Tuple[int, int]: + return (0, 59) - @property - def min_month(self) -> int: - return self._min_month + @classmethod + def minutes_max_range(cls) -> Tuple[int, int]: + return (0, 59) - @property - def max_month(self) -> int: - return self._max_month + @classmethod + def hours_max_range(cls) -> Tuple[int, int]: + return (0, 23) - @property - def min_wday(self) -> int: - return self._min_wday + @classmethod + def mday_max_range(cls) -> Tuple[int, int]: + return (1, 31) - @property - def max_wday(self) -> int: - return self._max_wday + @classmethod + def month_max_range(cls) -> Tuple[int, int]: + return (1, 12) + + @classmethod + def wday_max_range(cls) -> Tuple[int, int]: + return (0, 6) @classmethod def build_op_definition(cls) -> pb.OperatorDef: @@ -147,52 +155,28 @@ def build_op_definition(cls) -> pb.OperatorDef: key="TICK_CALENDAR", attributes=[ pb.OperatorDef.Attribute( - key="min_second", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, - ), - pb.OperatorDef.Attribute( - key="max_second", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, + key="second", + type=pb.OperatorDef.Attribute.Type.ANY, ), pb.OperatorDef.Attribute( - key="min_minute", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, + key="minute", + type=pb.OperatorDef.Attribute.Type.ANY, ), pb.OperatorDef.Attribute( - key="max_minute", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, + key="hour", + type=pb.OperatorDef.Attribute.Type.ANY, ), pb.OperatorDef.Attribute( - key="min_hour", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, + key="mday", + type=pb.OperatorDef.Attribute.Type.ANY, ), pb.OperatorDef.Attribute( - key="max_hour", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, + key="month", + type=pb.OperatorDef.Attribute.Type.ANY, ), pb.OperatorDef.Attribute( - key="min_mday", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, - ), - pb.OperatorDef.Attribute( - key="max_mday", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, - ), - pb.OperatorDef.Attribute( - key="min_month", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, - ), - pb.OperatorDef.Attribute( - key="max_month", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, - ), - pb.OperatorDef.Attribute( - key="min_wday", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, - ), - pb.OperatorDef.Attribute( - key="max_wday", - type=pb.OperatorDef.Attribute.Type.INTEGER_64, + key="wday", + type=pb.OperatorDef.Attribute.Type.ANY, ), ], inputs=[pb.OperatorDef.Input(key="input")], @@ -203,44 +187,16 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(TickCalendar) -def set_arg_range( - arg_value: Union[int, Literal["*"], None], - val_range: Tuple[int, int], - prefer_free: bool, -): - if arg_value == "*": - range_ini, range_end = val_range - elif arg_value is not None: - if ( - not isinstance(arg_value, (int, np.integer)) - or arg_value < val_range[0] - or arg_value > val_range[1] - ): - raise ValueError( - f"Value should be '*' or integer in range {val_range}, got:" - f" {arg_value} (type {type(arg_value)})" - ) - - range_ini = range_end = arg_value - else: # None (auto setup) - if prefer_free: # Don't restrict the range - range_ini, range_end = val_range - else: # Fix to first value - range_ini = range_end = val_range[0] - - return range_ini, range_end - - @typecheck @compile def tick_calendar( input: EventSetOrNode, - second: Union[int, Literal["*"], None], - minute: Union[int, Literal["*"], None], - hour: Union[int, Literal["*"], None], - mday: Union[int, Literal["*"], None], - month: Union[int, Literal["*"], None], - wday: Union[int, Literal["*"], None], + second: Union[int, TypeWildCard, None] = None, + minute: Union[int, TypeWildCard, None] = None, + hour: Union[int, TypeWildCard, None] = None, + mday: Union[int, TypeWildCard, None] = None, + month: Union[int, TypeWildCard, None] = None, + wday: Union[int, TypeWildCard, None] = None, ) -> EventSetOrNode: # Don't allow empty args if all(arg is None for arg in (second, minute, hour, mday, month, wday)): @@ -264,39 +220,32 @@ def tick_calendar( ) # Always set second=0 by default - min_second, max_second = set_arg_range(second, (0, 59), prefer_free=False) + second = 0 if second is None else second # prefer_free becomes True when next args should be set to '*' by default # e.g: user sets only hour=1 -> second=0,minute=0, mday='*', month='*' prefer_free = second is not None - min_minute, max_minute = set_arg_range(minute, (0, 59), prefer_free) + if minute is None: + minute = "*" if prefer_free else 0 prefer_free |= minute is not None - min_hour, max_hour = set_arg_range(hour, (0, 23), prefer_free) + if hour is None: + hour = "*" if prefer_free else 0 prefer_free |= hour is not None - min_mday, max_mday = set_arg_range(mday, (1, 31), prefer_free) - - # Always free month range by default - min_month, max_month = set_arg_range(month, (1, 12), prefer_free=True) + if mday is None: + mday = "*" if prefer_free else 1 - # Always free wday range by default - min_wday, max_wday = set_arg_range(wday, (0, 6), prefer_free=True) + # Always free range by default + month = "*" if month is None else month + wday = "*" if wday is None else wday return TickCalendar( - input=input, - min_second=min_second, - max_second=max_second, - min_minute=min_minute, - max_minute=max_minute, - min_hour=min_hour, - max_hour=max_hour, - min_mday=min_mday, - max_mday=max_mday, - min_month=min_month, - max_month=max_month, - min_wday=min_wday, - max_wday=max_wday, - ).outputs[ - "output" - ] # type: ignore + input=input, # type: ignore + second=second, + minute=minute, + hour=hour, + mday=mday, + month=month, + wday=wday, + ).outputs["output"] diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index a78934063..295f36bbe 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -15,8 +15,7 @@ """Implementation for the TickCalendar operator.""" -from datetime import datetime, timedelta -from typing import Dict, List +from typing import Dict, Literal, Union, Tuple import numpy as np @@ -33,6 +32,18 @@ def __init__(self, operator: TickCalendar) -> None: assert isinstance(operator, TickCalendar) super().__init__(operator) + def _get_arg_range( + self, + arg_value: Union[int, Literal["*"]], + val_range: Tuple[int, int], + ): + if arg_value == "*": + range_ini, range_end = val_range + else: + range_ini = range_end = arg_value + + return range_ini, range_end + def __call__(self, input: EventSet) -> Dict[str, EventSet]: assert isinstance(self.operator, TickCalendar) output_schema = self.output_schema("output") @@ -40,6 +51,26 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: # Create output EventSet output_evset = EventSet(data={}, schema=output_schema) + # Get range for each argument + second_range = self._get_arg_range( + self.operator.second, self.operator.seconds_max_range() + ) + minute_range = self._get_arg_range( + self.operator.minute, self.operator.minutes_max_range() + ) + hour_range = self._get_arg_range( + self.operator.hour, self.operator.hours_max_range() + ) + mday_range = self._get_arg_range( + self.operator.mday, self.operator.mday_max_range() + ) + month_range = self._get_arg_range( + self.operator.month, self.operator.month_max_range() + ) + wday_range = self._get_arg_range( + self.operator.wday, self.operator.wday_max_range() + ) + # Fill output EventSet's data for index_key, index_data in input.data.items(): if len(index_data.timestamps) == 0: @@ -48,18 +79,18 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: dst_timestamps = operators_cc.tick_calendar( start_timestamp=index_data.timestamps[0], end_timestamp=index_data.timestamps[-1], - min_second=self.operator.min_second, - max_second=self.operator.max_second, - min_minute=self.operator.min_minute, - max_minute=self.operator.max_minute, - min_hour=self.operator.min_hour, - max_hour=self.operator.max_hour, - min_mday=self.operator.min_mday, - max_mday=self.operator.max_mday, - min_month=self.operator.min_month, - max_month=self.operator.max_month, - min_wday=self.operator.min_wday, - max_wday=self.operator.max_wday, + min_second=second_range[0], + max_second=second_range[1], + min_minute=minute_range[0], + max_minute=minute_range[1], + min_hour=hour_range[0], + max_hour=hour_range[1], + min_mday=mday_range[0], + max_mday=mday_range[1], + min_month=month_range[0], + max_month=month_range[1], + min_wday=wday_range[0], + max_wday=wday_range[1], ) output_evset.set_index_value( index_key, From da7a61ff3dd42a81abb2ea82cfa08bec9185d00f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 16:04:55 -0300 Subject: [PATCH 14/27] Updated tests for new tick_calendar args --- .../operators/test/tick_calendar_test.py | 72 +++++++------------ 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 1eed7274f..ddb321b46 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -39,35 +39,25 @@ def test_end_of_month_seconds(self): ) node = evset.node() + # Expected output + def seconds_at_01_01(day, month): + return [datetime(2020, day, month, 1, 1, sec) for sec in range(60)] + expected_output = event_set( - timestamps=[ - datetime(2020, 1, 31, 1, 1, 0), - datetime(2020, 1, 31, 1, 1, 1), - datetime(2020, 1, 31, 1, 1, 2), - datetime(2020, 3, 31, 1, 1, 0), - datetime(2020, 3, 31, 1, 1, 1), - datetime(2020, 3, 31, 1, 1, 2), - datetime(2020, 5, 31, 1, 1, 0), - datetime(2020, 5, 31, 1, 1, 1), - datetime(2020, 5, 31, 1, 1, 2), - ], + timestamps=seconds_at_01_01(1, 31) + + seconds_at_01_01(3, 31) + + seconds_at_01_01(5, 31), ) # Run op op = TickCalendar( input=node, - min_second=0, - max_second=2, - min_minute=1, - max_minute=1, - min_hour=1, - max_hour=1, - min_mday=31, - max_mday=31, - min_month=1, - max_month=12, - min_wday=0, - max_wday=6, + second="*", + minute=1, + hour=1, + mday=31, + month="*", + wday="*", ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) @@ -105,18 +95,12 @@ def test_end_of_year_minutes(self): # Run op op = TickCalendar( input=node, - min_second=0, - max_second=0, - min_minute=0, - max_minute=59, - min_hour=0, - max_hour=23, - min_mday=1, # any day - max_mday=31, - min_month=1, # any month - max_month=12, - min_wday=0, - max_wday=6, + second=0, + minute="*", + hour="*", + mday="*", + month="*", + wday="*", ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) @@ -150,18 +134,12 @@ def test_weekdays(self): # Run op op = TickCalendar( input=node, - min_second=0, - max_second=0, - min_minute=0, - max_minute=0, - min_hour=0, - max_hour=23, # all hours - min_mday=1, # any month day - max_mday=31, - min_month=1, # any month - max_month=12, - min_wday=6, # saturday - max_wday=6, # saturday + second=0, + minute=0, + hour="*", + wday=6, + mday="*", + month="*", ) instance = TickCalendarNumpyImplementation(op) testOperatorAndImp(self, op, instance) From ec4ab1dadb697f83946cdf6049f6a4c881dc7cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 16:21:54 -0300 Subject: [PATCH 15/27] Fixes and test for tick_calendar core function --- temporian/core/operators/BUILD | 6 +- temporian/core/operators/test/BUILD | 12 ++ .../core/operators/test/tick_calendar_test.py | 103 ++++++++++++++++++ temporian/core/operators/tick_calendar.py | 26 +++-- 4 files changed, 136 insertions(+), 11 deletions(-) create mode 100644 temporian/core/operators/test/tick_calendar_test.py diff --git a/temporian/core/operators/BUILD b/temporian/core/operators/BUILD index 69ef61906..c6ae8d742 100644 --- a/temporian/core/operators/BUILD +++ b/temporian/core/operators/BUILD @@ -367,10 +367,12 @@ py_library( srcs_version = "PY3", deps = [ ":base", + "//temporian/core:compilation", "//temporian/core:operator_lib", + "//temporian/core:typing", + "//temporian/core/data:dtype", "//temporian/core/data:node", - "//temporian/core/data:schema", "//temporian/proto:core_py_proto", + "//temporian/utils:typecheck", ], ) - \ No newline at end of file diff --git a/temporian/core/operators/test/BUILD b/temporian/core/operators/test/BUILD index 5bc3af1e3..4aaf30e63 100644 --- a/temporian/core/operators/test/BUILD +++ b/temporian/core/operators/test/BUILD @@ -40,3 +40,15 @@ py_test( "//temporian/core/operators:fast_fourier_transform", ], ) + +py_test( + name = "tick_calendar_test", + srcs = ["tick_calendar_test.py"], + srcs_version = "PY3", + deps = [ + # already_there/absl/testing:absltest + "//temporian/core/data:dtype", + "//temporian/core/data:node", + "//temporian/core/operators:tick_calendar", + ], +) diff --git a/temporian/core/operators/test/tick_calendar_test.py b/temporian/core/operators/test/tick_calendar_test.py new file mode 100644 index 000000000..9bf93f9bf --- /dev/null +++ b/temporian/core/operators/test/tick_calendar_test.py @@ -0,0 +1,103 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from absl.testing import absltest + +from temporian.core.data.node import input_node +from temporian.core.operators.tick_calendar import tick_calendar, TickCalendar + + +class TickCalendarOperatorTest(absltest.TestCase): + def setUp(self): + self._in = input_node([], is_unix_timestamp=True) + + def test_free_seconds_month(self): + output = tick_calendar(self._in, second="*", minute=1, hour=1, mday=31) + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, "*") + self.assertEqual(op.minute, 1) + self.assertEqual(op.hour, 1) + self.assertEqual(op.mday, 31) + self.assertEqual(op.month, "*") + self.assertEqual(op.wday, "*") + + def test_free_minutes(self): + output = tick_calendar(self._in, minute="*") + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, 0) + self.assertEqual(op.minute, "*") + self.assertEqual(op.hour, "*") + self.assertEqual(op.mday, "*") + self.assertEqual(op.month, "*") + self.assertEqual(op.wday, "*") + + def test_month_day(self): + output = tick_calendar(self._in, mday=5) + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, 0) + self.assertEqual(op.minute, 0) + self.assertEqual(op.hour, 0) + self.assertEqual(op.mday, 5) + self.assertEqual(op.month, "*") + self.assertEqual(op.wday, "*") + + def test_month(self): + output = tick_calendar(self._in, month=8) + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, 0) + self.assertEqual(op.minute, 0) + self.assertEqual(op.hour, 0) + self.assertEqual(op.mday, 1) + self.assertEqual(op.month, 8) + self.assertEqual(op.wday, "*") + + def test_weekdays(self): + output = tick_calendar(self._in, wday=6) + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, 0) + self.assertEqual(op.minute, 0) + self.assertEqual(op.hour, 0) + self.assertEqual(op.mday, "*") + self.assertEqual(op.month, "*") + self.assertEqual(op.wday, 6) + + def test_weekdays_month(self): + output = tick_calendar(self._in, wday=6, month=3) + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, 0) + self.assertEqual(op.minute, 0) + self.assertEqual(op.hour, 0) + self.assertEqual(op.mday, "*") + self.assertEqual(op.month, 3) + self.assertEqual(op.wday, 6) + + def test_weekdays_all_hours(self): + output = tick_calendar(self._in, wday=6, hour="*") + op = output.creator + assert isinstance(op, TickCalendar) + self.assertEqual(op.second, 0) + self.assertEqual(op.minute, 0) + self.assertEqual(op.hour, "*") + self.assertEqual(op.mday, "*") + self.assertEqual(op.month, "*") + self.assertEqual(op.wday, 6) + + +if __name__ == "__main__": + absltest.main() diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index b38eb04b1..938a537a9 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -219,22 +219,30 @@ def tick_calendar( " integer value instead" ) - # Always set second=0 by default - second = 0 if second is None else second - # prefer_free becomes True when next args should be set to '*' by default # e.g: user sets only hour=1 -> second=0,minute=0, mday='*', month='*' - prefer_free = second is not None + release_ranges = False + + # Always set second=0 by default + if second is None: + second = 0 + else: + release_ranges = True # fixed seconds, free minute, hour + if minute is None: - minute = "*" if prefer_free else 0 + minute = "*" if release_ranges else 0 + else: + release_ranges = True # fixed minutes, free hour, day, month - prefer_free |= minute is not None if hour is None: - hour = "*" if prefer_free else 0 + hour = "*" if release_ranges else 0 + else: + release_ranges = True - prefer_free |= hour is not None if mday is None: - mday = "*" if prefer_free else 1 + # If wday is specified, always leave mday free by default + free_mday = release_ranges or wday is not None + mday = "*" if free_mday else 1 # Always free range by default month = "*" if month is None else month From 1cb707865bb01faceb785fec9c1216933ef8695c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 16:33:06 -0300 Subject: [PATCH 16/27] Small bugfix in docstring example --- temporian/core/event_set_ops.py | 2 +- temporian/utils/typecheck.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 157316523..651960d21 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2280,7 +2280,7 @@ def tick_calendar( ... >>> # Not allowed: intermediate arguments (minute, hour) not specified - >>> b = a.tick_calendar(second='1', mday=1) # ambiguous meaning + >>> b = a.tick_calendar(second=1, mday=1) # ambiguous meaning Traceback (most recent call last): ... ValueError: Can't set argument to None because previous and diff --git a/temporian/utils/typecheck.py b/temporian/utils/typecheck.py index 116049229..0bb46b9da 100644 --- a/temporian/utils/typecheck.py +++ b/temporian/utils/typecheck.py @@ -215,6 +215,7 @@ def _check_annotation_union( trace.exception( f'Non matching type for "{type(value)}" in the union {annotation_args}.' + f' The value is "{value}".' ) From c8690d768a746b7a54b9ae8f9331b351fba5fab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 16:59:12 -0300 Subject: [PATCH 17/27] Added invalid args tests --- .../core/operators/test/tick_calendar_test.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/temporian/core/operators/test/tick_calendar_test.py b/temporian/core/operators/test/tick_calendar_test.py index 9bf93f9bf..b1e059779 100644 --- a/temporian/core/operators/test/tick_calendar_test.py +++ b/temporian/core/operators/test/tick_calendar_test.py @@ -98,6 +98,57 @@ def test_weekdays_all_hours(self): self.assertEqual(op.month, "*") self.assertEqual(op.wday, 6) + def test_invalid_ranges(self): + for kwargs in ( + {"second": -1}, + {"second": 60}, + {"minute": -1}, + {"minute": 60}, + {"hour": -1}, + {"hour": 24}, + {"mday": 0}, + {"mday": 32}, + {"mday": -1}, # may be supported in the future + {"month": -1}, + {"month": 13}, + {"wday": -1}, + {"wday": 7}, + ): + with self.assertRaisesRegex( + ValueError, "Value should be '\*' or integer in range" + ): + _ = tick_calendar(self._in, **kwargs) # type: ignore + + def test_invalid_types(self): + for kwargs in ( + {"second": "1"}, + {"minute": "00"}, + {"hour": "00:00"}, + {"month": "January"}, + {"wday": "Sat"}, + ): + with self.assertRaisesRegex(ValueError, "Non matching type"): + _ = tick_calendar(self._in, **kwargs) # type: ignore + + def test_undefined_args(self): + with self.assertRaisesRegex( + ValueError, + "Can't set argument to None because previous and following", + ): + _ = tick_calendar(self._in, second=1, hour=1) # undefined min + + with self.assertRaisesRegex( + ValueError, + "Can't set argument to None because previous and following", + ): + _ = tick_calendar(self._in, second=1, month=1) + + with self.assertRaisesRegex( + ValueError, + "Can't set argument to None because previous and following", + ): + _ = tick_calendar(self._in, hour=0, month=1) + if __name__ == "__main__": absltest.main() From 25f476f70f0fdc21f88643019795b64ebb809d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 12 Sep 2023 17:03:21 -0300 Subject: [PATCH 18/27] Fix in docstring --- temporian/core/event_set_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 872cf8439..91fa9cf92 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2410,7 +2410,7 @@ def tick_calendar( a lower resolution is specified. For example, setting only `tick_calendar(hour='*')` is equivalent to: - `tick_calendar(second=0, minute=0, hour='*', day_of_month='*', month='*')` + `tick_calendar(second=0, minute=0, hour='*', mday='*', month='*')` , resulting in one tick at every exact hour of every day/month/year in the input guide range. @@ -2502,13 +2502,13 @@ def tick_calendar( to tick at specific minute of each hour. hour: '*' (any hour), None (auto), or number in range `[0-23]` to tick at specific hour of each day. - day_of_month: '*' (any day), None (auto) or number in range `[1-31]` + mday: '*' (any day), None (auto) or number in range `[1-31]` to tick at specific day of each month. Note that months without some particular day may not have any tick (e.g: day 31 on February). month: '*' (any month), None (auto) or number in range `[1-12]` to tick at one particular month of each year. - day_of_week: '*' (any day), None (auto) or number in range `[0-6]` + wday: '*' (any day), None (auto) or number in range `[0-6]` (Sun-Sat) to tick at particular day of week. Can only be specified if `day_of_month` is `None`. From c9693ec15637e80866da0c400ee68a9cbc94aefa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Wed, 13 Sep 2023 12:24:10 -0300 Subject: [PATCH 19/27] Changes after PR comments --- temporian/core/event_set_ops.py | 12 ++++++------ temporian/core/operators/tick_calendar.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py index 91fa9cf92..7f64343aa 100644 --- a/temporian/core/event_set_ops.py +++ b/temporian/core/event_set_ops.py @@ -2389,12 +2389,12 @@ def tick( def tick_calendar( self: EventSetOrNode, - second: Union[int, Literal["*"], None] = None, - minute: Union[int, Literal["*"], None] = None, - hour: Union[int, Literal["*"], None] = None, - mday: Union[int, Literal["*"], None] = None, - month: Union[int, Literal["*"], None] = None, - wday: Union[int, Literal["*"], None] = None, + second: Optional[Union[int, Literal["*"]]] = None, + minute: Optional[Union[int, Literal["*"]]] = None, + hour: Optional[Union[int, Literal["*"]]] = None, + mday: Optional[Union[int, Literal["*"]]] = None, + month: Optional[Union[int, Literal["*"]]] = None, + wday: Optional[Union[int, Literal["*"]]] = None, ) -> EventSetOrNode: """Generates events periodically at fixed times or dates e.g. each month. diff --git a/temporian/core/operators/tick_calendar.py b/temporian/core/operators/tick_calendar.py index 938a537a9..07e64136b 100644 --- a/temporian/core/operators/tick_calendar.py +++ b/temporian/core/operators/tick_calendar.py @@ -14,7 +14,7 @@ """TickCalendar operator class and public API function definitions.""" -from typing import Union, Literal, Tuple +from typing import Literal, Tuple, Optional, Union import numpy as np @@ -191,12 +191,12 @@ def build_op_definition(cls) -> pb.OperatorDef: @compile def tick_calendar( input: EventSetOrNode, - second: Union[int, TypeWildCard, None] = None, - minute: Union[int, TypeWildCard, None] = None, - hour: Union[int, TypeWildCard, None] = None, - mday: Union[int, TypeWildCard, None] = None, - month: Union[int, TypeWildCard, None] = None, - wday: Union[int, TypeWildCard, None] = None, + second: Optional[Union[int, TypeWildCard]] = None, + minute: Optional[Union[int, TypeWildCard]] = None, + hour: Optional[Union[int, TypeWildCard]] = None, + mday: Optional[Union[int, TypeWildCard]] = None, + month: Optional[Union[int, TypeWildCard]] = None, + wday: Optional[Union[int, TypeWildCard]] = None, ) -> EventSetOrNode: # Don't allow empty args if all(arg is None for arg in (second, minute, hour, mday, month, wday)): From 07429877d0458fcd0c4a4cdd7468f636c10de2d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 21 Sep 2023 12:05:15 -0300 Subject: [PATCH 20/27] Update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 718fb4b8d..71c754094 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Features +- Added `EventSet.tick_calendar()` operator. + ### Improvements ### Fixes @@ -29,7 +31,6 @@ ### Fixes - Fixed tutorials opening unreleased versions of the notebooks. -- ## 0.1.3 From 13952dcf50b768703099c8a866779886caaf06a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 21 Sep 2023 12:24:57 -0300 Subject: [PATCH 21/27] Fix bazel deps --- temporian/implementation/numpy/operators/BUILD | 2 -- temporian/implementation/numpy/operators/tick_calendar.py | 1 - 2 files changed, 3 deletions(-) diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index fae0f64d5..eda5eeca7 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -377,7 +377,6 @@ py_library( ":base", "//temporian/core/operators:select_index_values", "//temporian/implementation/numpy:implementation_lib", - "//temporian/implementation/numpy:utils", "//temporian/implementation/numpy/data:event_set", ], ) @@ -392,7 +391,6 @@ py_library( "//temporian/core/data:duration_utils", "//temporian/core/operators:tick_calendar", "//temporian/implementation/numpy:implementation_lib", - "//temporian/implementation/numpy:utils", "//temporian/implementation/numpy/data:event_set", "//temporian/implementation/numpy_cc/operators:operators_cc", ], diff --git a/temporian/implementation/numpy/operators/tick_calendar.py b/temporian/implementation/numpy/operators/tick_calendar.py index 295f36bbe..7e46500fd 100644 --- a/temporian/implementation/numpy/operators/tick_calendar.py +++ b/temporian/implementation/numpy/operators/tick_calendar.py @@ -23,7 +23,6 @@ from temporian.core.operators.tick_calendar import TickCalendar from temporian.implementation.numpy import implementation_lib from temporian.implementation.numpy.operators.base import OperatorImplementation -from temporian.core.data import duration from temporian.implementation.numpy_cc.operators import operators_cc From 3f9535ad88f654f2500ec2af63ca84a446d87bc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Mon, 25 Sep 2023 10:58:47 -0300 Subject: [PATCH 22/27] Minor update --- temporian/implementation/numpy_cc/operators/tick_calendar.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 0ed331409..08f24ef3e 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -54,7 +54,7 @@ py::array_t tick_calendar( tm_struct.tm_min = minute; tm_struct.tm_sec = second; - std::time_t time = std::mktime(&tm_struct); + const std::time_t time = std::mktime(&tm_struct); // Valid date if (time != -1 && tm_struct.tm_mday == mday) { From 75b795eaa24c8cddc6fae03b46a2104313bd55e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Wed, 27 Sep 2023 14:00:30 -0300 Subject: [PATCH 23/27] Bugfix & more tests in tick_calendar --- .../operators/test/tick_calendar_test.py | 67 +++++++++++++++++++ .../numpy_cc/operators/tick_calendar.cc | 10 +++ 2 files changed, 77 insertions(+) diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index ddb321b46..632a6f495 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -28,6 +28,73 @@ class TickCalendarOperatorTest(absltest.TestCase): + def test_start_end_00_00(self): + evset = event_set( + timestamps=[ + "2020-01-01 00:00", + "2020-03-01 00:00", + ], + ) + node = evset.node() + + # Expected output + expected_output = event_set( + timestamps=[ + "2020-01-01 00:00", + "2020-02-01 00:00", + "2020-03-01 00:00", + ], + ) + + # Run op + op = TickCalendar( + input=node, + second=0, + minute=0, + hour=0, + mday=1, + month="*", + wday="*", + ) + instance = TickCalendarNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + + def test_start_end_offset(self): + evset = event_set( + timestamps=[ + "2020-01-01 13:04", + "2020-03-06 19:35", + ], + ) + node = evset.node() + + # Expected output + expected_output = event_set( + timestamps=[ + "2020-02-01 00:00", + "2020-03-01 00:00", + ], + ) + + # Run op + op = TickCalendar( + input=node, + second=0, + minute=0, + hour=0, + mday=1, + month="*", + wday="*", + ) + instance = TickCalendarNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + def test_end_of_month_seconds(self): # All seconds at mday=31, should only be valid for months 1, 3, 5 diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 08f24ef3e..55c5a411d 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -80,15 +80,25 @@ py::array_t tick_calendar( second = min_second; minute++; } + second = min_second; minute = min_minute; hour++; } + second = min_second; + minute = min_minute; hour = min_hour; mday++; } + second = min_second; + minute = min_minute; + hour = min_hour; mday = min_mday; month++; } + second = min_second; + minute = min_minute; + hour = min_hour; + mday = min_mday; month = min_month; year++; } From 06f4ce6b1158293385aadb25d1f84556f2fcac8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Wed, 27 Sep 2023 16:06:03 -0300 Subject: [PATCH 24/27] Bugfix in tick_calendar cpp: set UTC explicitly --- temporian/implementation/numpy_cc/operators/tick_calendar.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 55c5a411d..1b4f91b4f 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -53,6 +53,8 @@ py::array_t tick_calendar( tm_struct.tm_hour = hour; tm_struct.tm_min = minute; tm_struct.tm_sec = second; + tm_struct.tm_gmtoff = 0; // set UTC + tm_struct.tm_isdst = 0; const std::time_t time = std::mktime(&tm_struct); From 567414100c98e1294c72276d6e391c3bfb384d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 28 Sep 2023 18:16:35 -0300 Subject: [PATCH 25/27] Add SetTimezone context and test TZ!=UTC --- .../operators/test/calendar_hour_test.py | 31 +++++++++++++------ .../numpy/operators/test/test_util.py | 18 ++++++++++- .../operators/test/tick_calendar_test.py | 11 ++++++- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/temporian/implementation/numpy/operators/test/calendar_hour_test.py b/temporian/implementation/numpy/operators/test/calendar_hour_test.py index 983795e8f..72c8972b2 100644 --- a/temporian/implementation/numpy/operators/test/calendar_hour_test.py +++ b/temporian/implementation/numpy/operators/test/calendar_hour_test.py @@ -25,6 +25,7 @@ from temporian.io.pandas import from_pandas from temporian.implementation.numpy.data.io import event_set from temporian.implementation.numpy.operators.test.test_util import ( + SetTimezone, assertEqualEventSet, ) @@ -32,9 +33,8 @@ class CalendarHourNumpyImplementationTest(absltest.TestCase): """Test numpy implementation of calendar_hour operator.""" - def test_basic(self) -> None: - "Basic test with flat node." - input_evset = from_pandas( + def setUp(self): + self.input_evset = from_pandas( pd.DataFrame( data=[ [pd.to_datetime("1970-01-01 00:00:00", utc=True)], @@ -47,23 +47,36 @@ def test_basic(self) -> None: ), ) - output_evset = event_set( - timestamps=input_evset.get_arbitrary_index_data().timestamps, + self.output_evset = event_set( + timestamps=self.input_evset.get_arbitrary_index_data().timestamps, features={ "calendar_hour": np.array([0, 1, 1, 12, 23]).astype(np.int32), }, is_unix_timestamp=True, ) - operator = CalendarHourOperator(input_evset.node()) - impl = CalendarHourNumpyImplementation(operator) - output = impl.call(sampling=input_evset)["output"] + self.operator = CalendarHourOperator(self.input_evset.node()) + self.impl = CalendarHourNumpyImplementation(self.operator) - assertEqualEventSet(self, output, output_evset) + def test_basic(self) -> None: + "Basic test with flat node." + output = self.impl.call(sampling=self.input_evset)["output"] + + assertEqualEventSet(self, output, self.output_evset) self.assertTrue( output.get_arbitrary_index_data().features[0].dtype == np.int32 ) + def test_timezone_defined(self) -> None: + "Define TZ env var and check that it works identically" + with SetTimezone(): + output = self.impl.call(sampling=self.input_evset)["output"] + + assertEqualEventSet(self, output, self.output_evset) + self.assertTrue( + output.get_arbitrary_index_data().features[0].dtype == np.int32 + ) + if __name__ == "__main__": absltest.main() diff --git a/temporian/implementation/numpy/operators/test/test_util.py b/temporian/implementation/numpy/operators/test/test_util.py index 52a36401c..8706c3696 100644 --- a/temporian/implementation/numpy/operators/test/test_util.py +++ b/temporian/implementation/numpy/operators/test/test_util.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import os +import time from absl.testing import absltest from temporian.implementation.numpy.data.event_set import EventSet @@ -78,3 +79,18 @@ def testOperatorAndImp( nodes[serialization._identifier(node)] = node _ = serialization._unserialize_operator(serialized_op, nodes) + + +class SetTimezone: + def __init__(self, timezone: str = "America/Montevideo"): + self._tz = timezone + self._restore_tz = "" + + def __enter__(self): + self._restore_tz = os.environ.get("TZ", "") + os.environ["TZ"] = self._tz + time.tzset() + + def __exit__(self, *args): + os.environ["TZ"] = self._restore_tz + time.tzset() diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 632a6f495..8a1edcdea 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import os, time from datetime import datetime, timedelta from absl.testing import absltest @@ -24,6 +24,7 @@ from temporian.implementation.numpy.operators.test.test_util import ( assertEqualEventSet, testOperatorAndImp, + SetTimezone, ) @@ -62,6 +63,14 @@ def test_start_end_00_00(self): assertEqualEventSet(self, output, expected_output) + # Check that it's exactly the same with env TZ!=UTC defined + with SetTimezone(): + instance = TickCalendarNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + def test_start_end_offset(self): evset = event_set( timestamps=[ From 3885bdd58cbcf85ef415a1b9d6d17c131f0da14b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Thu, 28 Sep 2023 18:18:03 -0300 Subject: [PATCH 26/27] Bugfix in tick_calendar.cc for timezone support --- .../numpy_cc/operators/tick_calendar.cc | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/temporian/implementation/numpy_cc/operators/tick_calendar.cc b/temporian/implementation/numpy_cc/operators/tick_calendar.cc index 1b4f91b4f..ca7c9a5a9 100644 --- a/temporian/implementation/numpy_cc/operators/tick_calendar.cc +++ b/temporian/implementation/numpy_cc/operators/tick_calendar.cc @@ -32,6 +32,7 @@ py::array_t tick_calendar( const long end_t = (long)std::floor(end_timestamp); std::tm start_utc = *std::gmtime(&start_t); + int year = start_utc.tm_year; // from 1900 int month = std::max(start_utc.tm_mon + 1, min_month); // zero-based tm_mon int mday = std::max(start_utc.tm_mday, min_mday); // 1-31 @@ -39,6 +40,10 @@ py::array_t tick_calendar( int minute = std::max(start_utc.tm_min, min_minute); int second = std::max(start_utc.tm_sec, min_second); + // Workaround to get timestamp from UTC datetimes (mktime depends on timezone) + std::tm start_local = *std::localtime(&start_t); + const int offset_tzone = std::mktime(&start_utc) - std::mktime(&start_local); + bool in_range = true; while (in_range) { while (month <= max_month && in_range) { @@ -46,30 +51,34 @@ py::array_t tick_calendar( while (hour <= max_hour && in_range) { while (minute <= max_minute && in_range) { while (second <= max_second && in_range) { - std::tm tm_struct = {}; - tm_struct.tm_year = year; // Since 1900 - tm_struct.tm_mon = month - 1; // zero-based - tm_struct.tm_mday = mday; - tm_struct.tm_hour = hour; - tm_struct.tm_min = minute; - tm_struct.tm_sec = second; - tm_struct.tm_gmtoff = 0; // set UTC - tm_struct.tm_isdst = 0; - - const std::time_t time = std::mktime(&tm_struct); + std::tm tm_date = {}; + tm_date.tm_year = year; // Since 1900 + tm_date.tm_mon = month - 1; // zero-based + tm_date.tm_mday = mday; + tm_date.tm_hour = hour; + tm_date.tm_min = minute; + tm_date.tm_sec = second; + tm_date.tm_isdst = 0; + tm_date.tm_gmtoff = start_local.tm_gmtoff; + + // This assumes that the date is in local timezone + const std::time_t time_local = std::mktime(&tm_date); // Valid date - if (time != -1 && tm_struct.tm_mday == mday) { + if (time_local != -1 && tm_date.tm_mday == mday) { + // Remove timezone offset from timestamp + const std::time_t time_utc = time_local - offset_tzone; + // Finish condition - if (time > end_t) { + if (time_utc > end_t) { in_range = false; break; } - // Check weekday match - if (tm_struct.tm_wday >= min_wday && - tm_struct.tm_wday <= max_wday) { - ticks.push_back(time); + // Check weekday match (mktime sets it properly) + if (tm_date.tm_wday >= min_wday && + tm_date.tm_wday <= max_wday) { + ticks.push_back(time_utc); } } else { // Invalid date (e.g: 31/4) From 662380cc6bca8b6db12f1947adfbfb2a9abddfbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Wed, 4 Oct 2023 13:14:10 -0300 Subject: [PATCH 27/27] Fixes after merge --- temporian/implementation/numpy/operators/test/BUILD | 2 +- .../implementation/numpy/operators/test/calendar_hour_test.py | 3 ++- .../implementation/numpy/operators/test/tick_calendar_test.py | 3 +-- temporian/implementation/numpy/operators/test/utils.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/temporian/implementation/numpy/operators/test/BUILD b/temporian/implementation/numpy/operators/test/BUILD index fdca3efcd..20455f783 100644 --- a/temporian/implementation/numpy/operators/test/BUILD +++ b/temporian/implementation/numpy/operators/test/BUILD @@ -753,7 +753,7 @@ py_test( srcs_version = "PY3", deps = [ # already_there/absl/testing:absltest - ":test_util", + ":utils", "//temporian/core/data:dtype", "//temporian/core/data:node", "//temporian/core/data:schema", diff --git a/temporian/implementation/numpy/operators/test/calendar_hour_test.py b/temporian/implementation/numpy/operators/test/calendar_hour_test.py index 88d842123..03f5328bb 100644 --- a/temporian/implementation/numpy/operators/test/calendar_hour_test.py +++ b/temporian/implementation/numpy/operators/test/calendar_hour_test.py @@ -25,7 +25,8 @@ from temporian.io.pandas import from_pandas from temporian.implementation.numpy.data.io import event_set from temporian.implementation.numpy.operators.test.utils import ( - assertEqualEventSet, SetTimezone + assertEqualEventSet, + SetTimezone, ) diff --git a/temporian/implementation/numpy/operators/test/tick_calendar_test.py b/temporian/implementation/numpy/operators/test/tick_calendar_test.py index 8a1edcdea..6d8b4bee5 100644 --- a/temporian/implementation/numpy/operators/test/tick_calendar_test.py +++ b/temporian/implementation/numpy/operators/test/tick_calendar_test.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os, time from datetime import datetime, timedelta from absl.testing import absltest @@ -21,7 +20,7 @@ from temporian.implementation.numpy.operators.tick_calendar import ( TickCalendarNumpyImplementation, ) -from temporian.implementation.numpy.operators.test.test_util import ( +from temporian.implementation.numpy.operators.test.utils import ( assertEqualEventSet, testOperatorAndImp, SetTimezone, diff --git a/temporian/implementation/numpy/operators/test/utils.py b/temporian/implementation/numpy/operators/test/utils.py index e6fd60ac3..8706c3696 100644 --- a/temporian/implementation/numpy/operators/test/utils.py +++ b/temporian/implementation/numpy/operators/test/utils.py @@ -93,4 +93,4 @@ def __enter__(self): def __exit__(self, *args): os.environ["TZ"] = self._restore_tz - time.tzset() \ No newline at end of file + time.tzset()