From 3c76c5a782ebf8b252793df1c93d50d5617edaf6 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 8 Aug 2022 20:42:32 -0700 Subject: [PATCH 01/13] fix parse --- .../stream_slicers/datetime_stream_slicer.py | 11 ++++++++--- .../stream_slicers/test_datetime_stream_slicer.py | 6 ++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index c81d11e85129..809265e08f6c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -174,10 +174,15 @@ def _get_date(self, cursor_value, default_date: datetime.datetime, comparator) - return comparator(cursor_date, default_date) def parse_date(self, date: Union[str, datetime.datetime]) -> datetime.datetime: - if isinstance(date, str): - return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone) - else: + if isinstance(date, datetime.datetime): return date + elif self.datetime_format == "%s": + # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on + # It's safer to use the timestamp() method than the %s directive + # See https://stackoverflow.com/a/4974930 + return datetime.datetime.fromtimestamp(int(date)).replace(tzinfo=self._timezone) + else: + return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone) @classmethod def _parse_timedelta(cls, time_str): diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py index e2321ad607f2..e260c5fc17d5 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py @@ -461,6 +461,12 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), + ( + "test_parse_timestamp", + "1609459200", + "%s", + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), ], ) def test_parse_date(test_name, input_date, date_format, expected_output_date): From b9277d0d2dd5344f5c87237b6b76f6c44ad5c0bf Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 07:13:29 -0700 Subject: [PATCH 02/13] Revert "fix parse" This reverts commit 3c76c5a782ebf8b252793df1c93d50d5617edaf6. --- .../stream_slicers/datetime_stream_slicer.py | 11 +++-------- .../stream_slicers/test_datetime_stream_slicer.py | 6 ------ 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index 809265e08f6c..c81d11e85129 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -174,15 +174,10 @@ def _get_date(self, cursor_value, default_date: datetime.datetime, comparator) - return comparator(cursor_date, default_date) def parse_date(self, date: Union[str, datetime.datetime]) -> datetime.datetime: - if isinstance(date, datetime.datetime): - return date - elif self.datetime_format == "%s": - # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on - # It's safer to use the timestamp() method than the %s directive - # See https://stackoverflow.com/a/4974930 - return datetime.datetime.fromtimestamp(int(date)).replace(tzinfo=self._timezone) - else: + if isinstance(date, str): return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone) + else: + return date @classmethod def _parse_timedelta(cls, time_str): diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py index e260c5fc17d5..e2321ad607f2 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py @@ -461,12 +461,6 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), - ( - "test_parse_timestamp", - "1609459200", - "%s", - datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), - ), ], ) def test_parse_date(test_name, input_date, date_format, expected_output_date): From a584b8436c5e55da4d10d9e410ec2ad1253cc98c Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 07:23:21 -0700 Subject: [PATCH 03/13] fix parse timestamp --- .../stream_slicers/datetime_stream_slicer.py | 11 ++++++++++- .../stream_slicers/test_datetime_stream_slicer.py | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index c81d11e85129..569659c2753b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -175,7 +175,16 @@ def _get_date(self, cursor_value, default_date: datetime.datetime, comparator) - def parse_date(self, date: Union[str, datetime.datetime]) -> datetime.datetime: if isinstance(date, str): - return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone) + # "%s" is a valid (but unreliable) directive for formatting, but not for parsing + # It is defined as + # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html + # + # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp + # See https://stackoverflow.com/questions/4974712/python-setting-a-datetime-in-a-specific-timezone-without-utc-conversions/4974930#4974930 + if self.datetime_format == "%s": + return datetime.datetime.fromtimestamp(int(date), tz=self._timezone) + else: + return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone) else: return date diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py index e2321ad607f2..338ce181eda9 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py @@ -454,6 +454,12 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, "%Y-%m-%dT%H:%M:%S.%f%z", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), + ( + "test_parse_timestamp", + "1609459200", + "%s", + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), ("test_parse_date_number", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)), ( "test_parse_date_datetime", @@ -483,6 +489,7 @@ def test_parse_date(test_name, input_date, date_format, expected_output_date): [ ("test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "1609459200"), ("test_format_string", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "2021-01-01"), + ("test_format_to_number", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y%m%d", "20210101"), ], ) def test_format_datetime(test_name, input_dt, datetimeformat, expected_output): From 99573030ebbf98114307d0c9665d37462046bd34 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 08:00:37 -0700 Subject: [PATCH 04/13] extract datetime parser --- .../declarative/datetime/datetime_parser.py | 29 ++++++++++++ .../declarative/datetime/min_max_datetime.py | 10 ++-- .../stream_slicers/datetime_stream_slicer.py | 38 ++++++--------- .../datetime/test_datetime_parser.py | 46 +++++++++++++++++++ .../test_datetime_stream_slicer.py | 6 --- 5 files changed, 93 insertions(+), 36 deletions(-) create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_datetime_parser.py diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py new file mode 100644 index 000000000000..96ff76553c11 --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +import datetime +from typing import Union + + +class DatetimeParser: + def parse(self, date: Union[str, int], format: str, timezone): + # "%s" is a valid (but unreliable) directive for formatting, but not for parsing + # It is defined as + # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html + # + # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp + # See https://stackoverflow.com/a/4974930 + if format == "%s": + return datetime.datetime.fromtimestamp(int(date), tz=timezone) + else: + return datetime.datetime.strptime(str(date), format).replace(tzinfo=timezone) + + def format(self, dt: datetime.datetime, format: str) -> str: + # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on + # It's safer to use the timestamp() method than the %s directive + # See https://stackoverflow.com/a/4974930 + if format == "%s": + return str(int(dt.timestamp())) + else: + return dt.strftime(format) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index 0c4b5232cf69..a30a1fe9f69e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field from typing import Any, Mapping, Union +from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from dataclasses_jsonschema import JsonSchemaMixin @@ -40,6 +41,7 @@ class MinMaxDatetime(JsonSchemaMixin): def __post_init__(self, options: Mapping[str, Any]): self.datetime = InterpolatedString.create(self.datetime, options=options or {}) self.timezone = dt.timezone.utc + self._parser = DatetimeParser() self.min_datetime = InterpolatedString.create(self.min_datetime, options=options) if self.min_datetime else None self.max_datetime = InterpolatedString.create(self.max_datetime, options=options) if self.max_datetime else None @@ -60,14 +62,10 @@ def get_datetime(self, config, **additional_options) -> dt.datetime: time = dt.datetime.strptime(str(self.datetime.eval(config, **additional_options)), datetime_format).replace(tzinfo=self._timezone) if self.min_datetime: - min_time = dt.datetime.strptime(str(self.min_datetime.eval(config, **additional_options)), datetime_format).replace( - tzinfo=self._timezone - ) + min_time = self._parser.parse(str(self.min_datetime.eval(config, **additional_options)), datetime_format, self.timezone) time = max(time, min_time) if self.max_datetime: - max_time = dt.datetime.strptime(str(self.max_datetime.eval(config, **additional_options)), datetime_format).replace( - tzinfo=self._timezone - ) + max_time = self._parser.parse(str(self.max_datetime.eval(config, **additional_options)), datetime_format, self.timezone) time = min(time, max_time) return time diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index 569659c2753b..206467f6fb6e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -5,9 +5,10 @@ import datetime import re from dataclasses import InitVar, dataclass, field -from typing import Any, Iterable, Mapping, Optional, Union +from typing import Any, Iterable, Mapping, Optional from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation @@ -77,8 +78,10 @@ def __post_init__(self, options: Mapping[str, Any]): self.cursor_field = InterpolatedString.create(self.cursor_field, options=options) self.stream_slice_field_start = InterpolatedString.create(self.stream_state_field_start or "start_time", options=options) self.stream_slice_field_end = InterpolatedString.create(self.stream_state_field_end or "end_time", options=options) + self._parser = DatetimeParser() # If datetime format is not specified then start/end datetime should inherit it from the stream slicer + print(f"initial format: {self.start_datetime.datetime_format}") if not self.start_datetime.datetime_format: self.start_datetime.datetime_format = self.datetime_format if not self.end_datetime.datetime_format: @@ -142,7 +145,12 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> start_datetime = max(cursor_datetime, start_datetime) - state_date = self.parse_date(stream_state.get(self.cursor_field.eval(self.config, stream_state=stream_state))) + stream_state_unparsed_date = stream_state.get(self.cursor_field.eval(self.config, stream_state=stream_state)) + + if stream_state_unparsed_date: + state_date = self.parse_date(stream_state_unparsed_date) + else: + state_date = None if state_date: # If the input_state's date is greater than start_datetime, the start of the time window is the state's next day next_date = state_date + datetime.timedelta(days=1) @@ -151,13 +159,7 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> return dates def _format_datetime(self, dt: datetime.datetime): - # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on - # It's safer to use the timestamp() method than the %s directive - # See https://stackoverflow.com/a/4974930 - if self.datetime_format == "%s": - return str(int(dt.timestamp())) - else: - return dt.strftime(self.datetime_format) + return self._parser.format(dt, self.datetime_format) def _partition_daterange(self, start, end, step: datetime.timedelta): start_field = self.stream_slice_field_start.eval(self.config) @@ -170,23 +172,11 @@ def _partition_daterange(self, start, end, step: datetime.timedelta): return dates def _get_date(self, cursor_value, default_date: datetime.datetime, comparator) -> datetime.datetime: - cursor_date = self.parse_date(cursor_value or default_date) + cursor_date = cursor_value or default_date return comparator(cursor_date, default_date) - def parse_date(self, date: Union[str, datetime.datetime]) -> datetime.datetime: - if isinstance(date, str): - # "%s" is a valid (but unreliable) directive for formatting, but not for parsing - # It is defined as - # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html - # - # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp - # See https://stackoverflow.com/questions/4974712/python-setting-a-datetime-in-a-specific-timezone-without-utc-conversions/4974930#4974930 - if self.datetime_format == "%s": - return datetime.datetime.fromtimestamp(int(date), tz=self._timezone) - else: - return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone) - else: - return date + def parse_date(self, date: str) -> datetime.datetime: + return self._parser.parse(date, self.datetime_format, self._timezone) @classmethod def _parse_timedelta(cls, time_str): diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_datetime_parser.py b/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_datetime_parser.py new file mode 100644 index 000000000000..e4d701fc3e7a --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_datetime_parser.py @@ -0,0 +1,46 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +import datetime + +import pytest +from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser + + +@pytest.mark.parametrize( + "test_name, input_date, date_format, expected_output_date", + [ + ( + "test_parse_date_iso", + "2021-01-01T00:00:00.000000+0000", + "%Y-%m-%dT%H:%M:%S.%f%z", + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_parse_timestamp", + "1609459200", + "%s", + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ("test_parse_date_number", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)), + ], +) +def test_parse_date(test_name, input_date, date_format, expected_output_date): + parser = DatetimeParser() + output_date = parser.parse(input_date, date_format, datetime.timezone.utc) + assert expected_output_date == output_date + + +@pytest.mark.parametrize( + "test_name, input_dt, datetimeformat, expected_output", + [ + ("test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "1609459200"), + ("test_format_string", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "2021-01-01"), + ("test_format_to_number", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y%m%d", "20210101"), + ], +) +def test_format_datetime(test_name, input_dt, datetimeformat, expected_output): + parser = DatetimeParser() + output_date = parser.format(input_dt, datetimeformat) + assert expected_output == output_date diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py index 338ce181eda9..ea83a06ad449 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py @@ -461,12 +461,6 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), ("test_parse_date_number", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)), - ( - "test_parse_date_datetime", - datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), - "%Y%m%d", - datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), - ), ], ) def test_parse_date(test_name, input_date, date_format, expected_output_date): From 5f8e7d5eab2f571a7f8813a8110afd1edfd599a1 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 08:01:50 -0700 Subject: [PATCH 05/13] remove print --- .../sources/declarative/stream_slicers/datetime_stream_slicer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index 206467f6fb6e..f5578bbca583 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -81,7 +81,6 @@ def __post_init__(self, options: Mapping[str, Any]): self._parser = DatetimeParser() # If datetime format is not specified then start/end datetime should inherit it from the stream slicer - print(f"initial format: {self.start_datetime.datetime_format}") if not self.start_datetime.datetime_format: self.start_datetime.datetime_format = self.datetime_format if not self.end_datetime.datetime_format: From 72880ce3ee0fdc602c884d84838a44c5c29ca5d9 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 08:04:20 -0700 Subject: [PATCH 06/13] use parser --- .../sources/declarative/datetime/min_max_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index a30a1fe9f69e..c7b3b498b28a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -59,7 +59,7 @@ def get_datetime(self, config, **additional_options) -> dt.datetime: if not datetime_format: datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" - time = dt.datetime.strptime(str(self.datetime.eval(config, **additional_options)), datetime_format).replace(tzinfo=self._timezone) + time = self._parser.parse(str(self.datetime.eval(config, **additional_options)), datetime_format, self.timezone) if self.min_datetime: min_time = self._parser.parse(str(self.min_datetime.eval(config, **additional_options)), datetime_format, self.timezone) From 4cd75ea3d41748d0647b1a6d1810f1574310d4aa Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 08:15:35 -0700 Subject: [PATCH 07/13] top level docstring --- .../sources/declarative/datetime/datetime_parser.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 96ff76553c11..44b503e1a356 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -7,6 +7,15 @@ class DatetimeParser: + """ + Parses and formats datetime objects according to a specified format. + + This class mainly acts as a wrapper to properly handling timestamp formatting through the "%s" directive. + + %s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it ignores the time zone information + Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp() + """ + def parse(self, date: Union[str, int], format: str, timezone): # "%s" is a valid (but unreliable) directive for formatting, but not for parsing # It is defined as From 28f05889214040b1321017c3f20dafaa073fb162 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 9 Aug 2022 08:17:27 -0700 Subject: [PATCH 08/13] rename variable --- .../declarative/stream_slicers/datetime_stream_slicer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index f5578bbca583..ff08da789638 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -144,10 +144,10 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> start_datetime = max(cursor_datetime, start_datetime) - stream_state_unparsed_date = stream_state.get(self.cursor_field.eval(self.config, stream_state=stream_state)) + state_cursor_value = stream_state.get(self.cursor_field.eval(self.config, stream_state=stream_state)) - if stream_state_unparsed_date: - state_date = self.parse_date(stream_state_unparsed_date) + if state_cursor_value: + state_date = self.parse_date(state_cursor_value) else: state_date = None if state_date: From 016cb69193535ef67536fcf7909b59e68d423c78 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 10 Aug 2022 06:46:09 -0700 Subject: [PATCH 09/13] do not use timestamp() --- .../sources/declarative/datetime/datetime_parser.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 44b503e1a356..5d4f03974e3e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -32,7 +32,4 @@ def format(self, dt: datetime.datetime, format: str) -> str: # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on # It's safer to use the timestamp() method than the %s directive # See https://stackoverflow.com/a/4974930 - if format == "%s": - return str(int(dt.timestamp())) - else: - return dt.strftime(format) + return dt.strftime(format) From b984d20b2be3d6a9f50c87b692f96cfffc3b9a77 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 10 Aug 2022 06:56:34 -0700 Subject: [PATCH 10/13] Revert "do not use timestamp()" This reverts commit 016cb69193535ef67536fcf7909b59e68d423c78. --- .../sources/declarative/datetime/datetime_parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 5d4f03974e3e..44b503e1a356 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -32,4 +32,7 @@ def format(self, dt: datetime.datetime, format: str) -> str: # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on # It's safer to use the timestamp() method than the %s directive # See https://stackoverflow.com/a/4974930 - return dt.strftime(format) + if format == "%s": + return str(int(dt.timestamp())) + else: + return dt.strftime(format) From d867b865bd293bbc950c05b3247e9e561d48c8f7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 10 Aug 2022 16:11:47 -0700 Subject: [PATCH 11/13] update comment --- .../airbyte_cdk/sources/declarative/datetime/datetime_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 44b503e1a356..f3ed27da3a46 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -12,7 +12,7 @@ class DatetimeParser: This class mainly acts as a wrapper to properly handling timestamp formatting through the "%s" directive. - %s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it ignores the time zone information + %s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it always return a datetime in the system's timezone. Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp() """ From ad81cb1be2819fda5160ba61f87c72d560d90818 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 10 Aug 2022 16:12:42 -0700 Subject: [PATCH 12/13] bump cdk version --- airbyte-cdk/python/CHANGELOG.md | 5 ++++- airbyte-cdk/python/setup.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 7f0fa5574a95..00cc78fb6d86 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,8 +1,11 @@ # Changelog -## 0.1.72 +## 0.1.73 - Bugfix: Fix bug in DatetimeStreamSlicer's parsing method +## 0.1.72 +- Bugfix: Fix bug in DatetimeStreamSlicer's format method + ## 0.1.71 - Refactor declarative package to dataclasses - Bugfix: Requester header always converted to string diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index ff3a17f01051..2c839f7eadcb 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.1.72", + version="0.1.73", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", From bc35c8d0490e85013d6fbc5655f3b7fda46f8fac Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 10 Aug 2022 16:13:12 -0700 Subject: [PATCH 13/13] Update template --- .../connector-templates/source-configuration-based/setup.py.hbs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connector-templates/source-configuration-based/setup.py.hbs b/airbyte-integrations/connector-templates/source-configuration-based/setup.py.hbs index 49b54192a86e..e60fbe235ae6 100644 --- a/airbyte-integrations/connector-templates/source-configuration-based/setup.py.hbs +++ b/airbyte-integrations/connector-templates/source-configuration-based/setup.py.hbs @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk~=0.1.72", + "airbyte-cdk~=0.1.73", ] TEST_REQUIREMENTS = [