diff --git a/docs/intro_2.md b/docs/intro_2.md index d68ca1bd2..061c58120 100644 --- a/docs/intro_2.md +++ b/docs/intro_2.md @@ -66,6 +66,37 @@ python semantics is its treament of integers. For performance and memory reasons this won't be a problem, but if you attempt to place an integer larger than 64 bits into a `typed_python` container, you'll see the integer get cast down to 64 bits. +### Timestamp + +`typed_python` provides the Timestamp type that wraps useful datetime functionality around a +unix timestamp. + +For e.g, you can create a Timestamp from a unixtime with the following: + +``` +ts1 = Timestamp.make(1654615145) +ts2 = Timestamp(ts=1654615145) +``` + +You can also create Timestamps from datestrings. The parser supports ISO 8601 along with variety +of non-iso formats. E.g: +``` + ts1 = Timestamp.parse("2022-01-05T10:11:12+00:15") + ts2 = Timestamp.parse("2022-01-05T10:11:12NYC") + ts3 = Timestamp.parse("January 1, 2022") + ts4 = Timestamp.parse("January/1/2022") + ts5 = Timestamp.parse("Jan-1-2022") +``` + +You can format Timestamps as strings using standard time format directives. E.g: + +``` +timestamp = Timestamp.make(1654615145) +print(timestamp.format(utc_offset=144000)) # 2022-06-09T07:19:05 +print(timestamp.format(format="%Y-%m-%d")) # 2022-06-09 +``` + + ### Object In some cases, you may have types that need to hold regular python objects. For these cases, you may diff --git a/typed_python/lib/datetime/chrono.py b/typed_python/lib/datetime/chrono.py new file mode 100644 index 000000000..160c98e3b --- /dev/null +++ b/typed_python/lib/datetime/chrono.py @@ -0,0 +1,264 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Entrypoint + +# This file implements some useful low level algorithms for processing dates and times. +# Many of the algorithms are described here https://howardhinnant.github.io/date_algorithms.html + + +@Entrypoint +def days_from_civil(year: int = 0, month: int = 0, day: int = 0) -> int: + ''' + Creates a unix timestamp from date values. + Parameters: + year (int): The year + month (int): The month. January: 1, February: 2, .... + day (int): The day + Returns: + seconds(float): The number of seconds + + Implements the low level days_from_civil algorithm + ''' + year -= month <= 2 + era = (year if year >= 0 else year - 399) // 400 + yoe = (year - era * 400) + doy = (153 * ( month - 3 if month > 2 else month + 9) + 2) // 5 + day - 1 + doe = yoe * 365 + yoe // 4 - yoe // 100 + doy + days = era * 146097 + doe - 719468 + + return days + + +@Entrypoint +def date_to_seconds(year: int = 0, month: int = 0, day: int = 0) -> float: + ''' + Creates a unix timestamp from date values. + Parameters: + year (int): The year + month (int): The month. January: 1, February: 2, .... + day (int): The day + Returns: + seconds(float): The number of seconds + + ''' + return days_from_civil(year, month, day) * 86400 + + +@Entrypoint +def time_to_seconds(hour: int = 0, minute: int = 0, second: float = 0) -> float: + ''' + Converts and hour, min, second combination into seconds + Parameters: + hour (int): The hour (0-23) + minute (int): The minute + second (int): The second + Returns: + (float) the number of seconds + ''' + return (hour * 3600) + (minute * 60) + second + + +@Entrypoint +def weekday_difference(x: int, y: int) -> int: + ''' + Gets the difference in days between two weekdays + Parameters: + x (int): The first day + y (int): The second day + + Returns: + (int) the difference between the two weekdays + ''' + x -= y + return x if x >= 0 and x <= 6 else x + 7 + + +@Entrypoint +def weekday_from_days(z: int) -> int: + ''' + Gets the day of week given the number of days from the unix epoch + Parameters: + z (int): The number of days from the epoch + + Returns: + (int) the weekday (0-6) + ''' + return (z + 4) % 7 if z >= -4 else (z + 5) % 7 + 6 + + +@Entrypoint +def get_nth_dow_of_month(n: int, wd: int, month: int, year: int) -> int: + ''' + Gets the date of the nth day of the month for a given year. E.g. get 2nd Sat in July 2022 + Parameters: + n (int): nth day of week (1-4). + wd (int): the weekday (0-6) where 0 => Sunday + month (int): the month (1-12) + year (int): the year + + Returns: + (int, int, int): a tuple of (day, month, year) + ''' + if n > 4: + raise ValueError('n should be 1-4') + if wd > 6: + raise ValueError('wd should be 0-6') + if month < 1 or month > 12: + raise ValueError('invalid month') + + wd_1st = weekday_from_days(days_from_civil(year, month, 1)) + day = weekday_difference(wd, wd_1st) + 1 + (n - 1) * 7 + + return (day, month, year) + + +@Entrypoint +def get_nth_dow_of_month_unixtime(n: int, wd: int, month: int, year: int) -> int: + ''' + Gets the date of the nth day of the month for a given year. E.g. get 2nd Sat in July 2022 + Parameters: + n (int): nth day of week (1-4). + wd (int): the weekday (0-6) where 0 => Sunday + month (int): the month (1-12) + year (int): the year + + Returns: + (int): The nth day of the month in unixtime + ''' + if n > 4: + raise ValueError('n should be 1-4') + if wd > 6: + raise ValueError('wd should be 0-6') + if month < 1 or month > 12: + raise ValueError('invalid month') + + wd_1st = weekday_from_days(days_from_civil(year, month, 1)) + + return date_to_seconds(year=year, + month=month, + day=weekday_difference(wd, wd_1st) + 1 + (n - 1) * 7) + + +@Entrypoint +def get_year_from_unixtime(ts: float) -> int: + ''' + Gets the year from a unixtime + Parameters: + ts (float): the unix timestamp + Returns: + (int): The year + ''' + z = ts // 86400 + 719468 + era = (z if z >= 0 else z - 146096) // 146097 + doe = z - era * 146097 + yoe = (doe - (doe // 1460) + (doe // 36524) - (doe // 146096)) // 365 + y = int(yoe + era * 400) + doy = int(doe - ((365 * yoe) + (yoe // 4) - (yoe // 100))) + mp = (5 * doy + 2) // 153 + m = int(mp + (3 if mp < 10 else -9)) + y += (m <= 2) + return y + + +@Entrypoint +def is_leap_year(year: int): + ''' + Tests if a year is a leap year. + Parameters: + year(int): The year + Returns: + True if the year is a leap year, False otherwise + ''' + return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 + + +@Entrypoint +def convert_to_12h(hour: int): + if hour == 0 or hour == 12 or hour == 24: + return 12 + elif hour < 12: + return hour + else: + return hour - 12 + + +@Entrypoint +def is_date(year: int, month: int, day: int) -> bool: + ''' + Tests if a year, month, day combination is a valid date. Year is required. + Month and day are optional. If day is present, month is required. + Parameters: + year (int): The year + month (int): The month (January=1) + day (int): The day of the month + Returns: + True if the date is valid, False otherwise + ''' + if year is None: + return False + if month is None and day is not None: + return False + if month is not None: + if month > 12 or month < 1: + return False + if month == 2 and day is not None: + # is leap year? + if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0: + if (day > 29): + return False + elif day > 28: + return False + if (month == 9 or month == 4 or month == 6 or month == 11) and day is not None and day > 30: + return False + + if day is not None and (day > 31 or day < 1): + return False + return True + + +@Entrypoint +def is_time(hour: int, min: int, sec: float) -> bool: + ''' + Tests if a hour, min, sec combination is a valid time. + Parameters: + hour(int): The hour + min(int): The min + sec(float): The second + Returns: + True if the time is valid, False otherwise + ''' + # '24' is valid alternative to '0' but only when min and sec are both 0 + if hour < 0 or hour > 24 or (hour == 24 and (min != 0 or sec != 0)): + return False + elif min < 0 or min > 59 or sec < 0 or sec >= 60: + return False + return True + + +@Entrypoint +def is_datetime(year: int, month: int, day: int, hour: float, min: float, sec: float) -> bool: + ''' + Tests if a year, month, day hour, min, sec combination is a valid date time. + Parameters: + year (int): The year + month (int): The month (January=>1) + day (int): The day of the month + hour(int): The hour + min(int): The min + sec(float): The second + Returns: + True if the datetime is valid, False otherwise + ''' + return is_date(year, month, day) and is_time(hour, min, sec) diff --git a/typed_python/lib/datetime/chrono_test.py b/typed_python/lib/datetime/chrono_test.py new file mode 100644 index 000000000..026d2200c --- /dev/null +++ b/typed_python/lib/datetime/chrono_test.py @@ -0,0 +1,90 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typed_python.lib.datetime.chrono import is_leap_year, is_date, is_time + + +class TestChrono(unittest.TestCase): + + def test_is_leap_year_valid(self): + leap_years = [ + 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032, 2036, 2040, 2044, 2048 + ] + + for year in leap_years: + assert is_leap_year(year), year + + def test_is_leap_year_invalid(self): + not_leap_years = [ + 1700, 1800, 1900, 1997, 1999, 2100, 2022 + ] + + for year in not_leap_years: + assert not is_leap_year(year), year + + def test_is_date_valid(self): + # y, m, d + dates = [ + (1997, 1, 1), # random date + (2020, 2, 29) # Feb 29 on leap year + ] + + for date in dates: + assert is_date(date[0], date[1], date[2]), date + + def test_is_date_invalid(self): + # y, m, d + dates = [ + (1997, 0, 1), # Month < 1 + (1997, 13, 1), # Month > 12 + (1997, 1, 0), # Day < 1 + (1997, 1, 32), # Day > 31 in Jan + (1997, 2, 29), # Day > 28 in non-leap-year Feb, + (2100, 2, 29), # Day > 28 in non-leap-year Feb, + (1997, 0, 25), # Month < 1 + (2020, 2, 30), # Day > 29 in Feb (leap year) + (2020, 4, 31), # Day > 30 in Apr (leap year) + (2020, 6, 31), # Day > 30 in June (leap year) + (2020, 9, 31), # Day > 30 in Sept (leap year) + (2020, 11, 31) # Day > 30 in Nov (leap year) + ] + + for date in dates: + assert not is_date(date[0], date[1], date[2]), date + + def test_is_time_valid(self): + # h, m, s + times = [ + (0, 0, 0), # 00:00:00 + (24, 0, 0), # 24:00:00 + (1, 1, 1), # random time + (12, 59, 59) # random time + ] + for time in times: + assert is_time(time[0], time[1], time[2]), time + + def test_is_time_invalid(self): + # h, m, s + times = [ + (24, 1, 0), # m and s must be 0 if hour is 24 + (25, 0, 0), # hour greater than 24 + (-1, 0, 0), # hour less than 0 + (1, 0, -1), # second < 1 + (1, -1, 0), # min < 1 + (1, 0, 60), # second > 59 + (1, 60, 0) # min > 59 + ] + for time in times: + assert not is_time(time[0], time[1], time[2]), time diff --git a/typed_python/lib/datetime/date_formatter.py b/typed_python/lib/datetime/date_formatter.py new file mode 100644 index 000000000..22111aa68 --- /dev/null +++ b/typed_python/lib/datetime/date_formatter.py @@ -0,0 +1,252 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Class, Final, Entrypoint, ListOf, Dict +from typed_python.lib.datetime.chrono import is_leap_year, convert_to_12h + +# int to string month mapping where 1 => January +MONTH_NAMES = Dict(int, str)({ + 1: 'January', + 2: 'February', + 3: 'March', + 4: 'April', + 5: 'May', + 6: 'June', + 7: 'July', + 8: 'August', + 9: 'September', + 10: 'October', + 11: 'November', + 12: 'December', +}) + +# int to string abbreviated month mapping where 1 => Jan +MONTH_ABBR = Dict(int, str)({ + 1: 'Jan', + 2: 'Feb', + 3: 'Mar', + 4: 'Apr', + 5: 'May', + 6: 'Jun', + 7: 'Jul', + 8: 'Aug', + 9: 'Sep', + 10: 'Oct', + 11: 'Nov', + 12: 'Dec', +}) + +# int to string abbreviated day mapping where 0 => Sunday +DAY_NAMES = Dict(int, str)({ + 0: 'Sunday', + 1: 'Monday', + 2: 'Tuesday', + 3: 'Wednesday', + 4: 'Thursday', + 5: 'Friday', + 6: 'Saturday' +}) + +# int to string abbreviated day mapping where 0 => Sun +DAY_ABBR = Dict(int, str)({ + 0: 'Sun', + 1: 'Mon', + 2: 'Tue', + 3: 'Wed', + 4: 'Thu', + 5: 'Fri', + 6: 'Sat' +}) + + +class DateFormatter(Class, Final): + + @Entrypoint + @staticmethod + def isoformat(ts: float, utc_offset: int = 0): + return DateFormatter.format(ts=ts, format='%Y-%m-%dT%H:%M:%S', utc_offset=utc_offset) + + @Entrypoint + @staticmethod + def f2d(num: int) -> str: + ''' + Converts a int to string and left pads it to 2 digits + Parameters: + num (int): The int to format + Returns: + (str): a 2 digit string representation of the int + ''' + res = str(num) + if len(res) == 1: + return '0' + res + return res + + @Entrypoint + @staticmethod + def f3d(num: int) -> str: + ''' + Converts a int to string and left pads it to 3 digits + Parameters: + num (int): The int to format + Returns: + (str): a 3 digit string representation of the int + ''' + res = str(num) + if len(res) == 2: + return '0' + res + elif len(res) == 1: + return '00' + res + return res + + @Entrypoint + @staticmethod + def f4d(num: int) -> str: + ''' + C converts a int to string and left pads it with zeroes to 4 digits + Parameters: + num (int): The int to format + Returns: + (str): a 4 digit string representation of the int + ''' + res = str(num) + if len(res) == 3: + return '0' + res + elif len(res) == 2: + return '00' + res + elif len(res) == 1: + return '000' + res + return res + + @ Entrypoint + @ staticmethod + def format(ts: float = 0, utc_offset: int = 0, format: str = "%Y-%m-%d %H:%M:%S") -> str: + ''' + Converts a Timestamp to a string in a given format + Parameters: + utc_offset (int): The offset from UTC in seconds + format (str): A string specifying formatting directives. E.g. '%Y-%m-%d %H:%M:%S' + Returns: + date_str(str): A string representing the date in the specified format. E.g. "Mon January 2, 2021" + ''' + # This bit of logic rightly belongs in the Chrono module. However, we gain some efficiency by inlining + # here instead of paying the tuple creation cost - i.e to return (year, month, day, hour, etc) + # especially considering that .format may be called in large loops/batches + ts = ts + utc_offset + z = ts // 86400 + 719468 + era = (z if z >= 0 else z - 146096) // 146097 + doe = z - era * 146097 + yoe = (doe - (doe // 1460) + (doe // 36524) - (doe // 146096)) // 365 + y = int(yoe + era * 400) + doy = int(doe - ((365 * yoe) + (yoe // 4) - (yoe // 100))) + mp = (5 * doy + 2) // 153 + d = int(doy - (153 * mp + 2) // 5 + 1) + m = int(mp + (3 if mp < 10 else -9)) + y += (m <= 2) + + h = int((ts // 3600) % 24) + min = int((ts // (3600 / 60)) % 60) + s = (ts // (3600 / 60 / 60)) % (60) + + # http://howardhinnant.github.io/date_algorithms.html#weekday_from_days + days = int(ts // 86400) + weekday = int((days + 4) % 7 if days >= -4 else (days + 5) % 7 + 6) + + # The above algorithm is based on a year starting on March 1. + # We'll have to shift this to January 1 based year by adding 60 days and wrapping at + # at year end + doy += 60 + doy = doy % 365 if doy > 365 else doy + + if is_leap_year(y) and m > 2: + doy += 1 + + if format == '%Y-%m-%d': + return "-".join(ListOf(str)([DateFormatter.f4d(y), DateFormatter.f2d(m), DateFormatter.f2d(d)])) + + result = ListOf(str)([]) + + pos = 0 + strlen = len(format) + + while pos < strlen: + if format[pos] == '%' and pos + 1 < strlen: + directive = format[pos + 1] + + if directive == 'a': + result.append(DAY_ABBR[weekday]) + pos += 1 + elif directive == 'A': + result.append(DAY_NAMES[weekday]) + pos += 1 + elif directive == 'w': + result.append(str(weekday)) + pos += 1 + elif directive == 'd': + result.append(DateFormatter.f2d(d)) + pos += 1 + elif directive == 'b': + result.append(MONTH_ABBR[m]) + pos += 1 + elif directive == 'B': + result.append(MONTH_NAMES[m]) + pos += 1 + elif directive == 'm': + result.append(DateFormatter.f2d(m)) + pos += 1 + elif directive == 'y': + result.append(DateFormatter.f2d(y % 100)) + pos += 1 + elif directive == 'Y': + result.append(DateFormatter.f4d(y)) + pos += 1 + elif directive == 'H': + result.append(DateFormatter.f2d(h)) + pos += 1 + elif directive == 'I': + result.append(DateFormatter.f2d(convert_to_12h(h))) + pos += 1 + elif directive == 'p': + result.append('AM' if h < 12 else 'PM') + pos += 1 + elif directive == 'M': + result.append(DateFormatter.f2d(min)) + pos += 1 + elif directive == 'S': + result.append(DateFormatter.f2d(s)) + pos += 1 + elif directive == 'Z': + result.append('UTC') # timestamps don't store tz data, are pegged to UTC + pos += 1 + elif directive == 'z': + result.append('+0000') # timestamps don't store tz data, are pegged to UTC + pos += 1 + elif directive == 'j': + result.append(DateFormatter.f3d(doy)) # day number of year + pos += 1 + elif directive == 'C': + result.append(DateFormatter.f2d(y // 100)) # century + pos += 1 + elif directive == '%': + result.append('%') + pos += 1 + elif directive == 'u': + result.append(str(7 if weekday == 0 else weekday)) # ISO weekday 1-7 + pos += 1 + else: + result.append(directive) + pos += 1 + else: + result.append(format[pos]) + pos += 1 + return ''.join(result) diff --git a/typed_python/lib/datetime/date_formatter_test.py b/typed_python/lib/datetime/date_formatter_test.py new file mode 100644 index 000000000..d8bfd1499 --- /dev/null +++ b/typed_python/lib/datetime/date_formatter_test.py @@ -0,0 +1,216 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import time +from datetime import datetime, timedelta +from typed_python.lib.datetime.date_formatter import DateFormatter +import pytz + + +def get_datetimes_in_range(start, end, step): + d = start + dates = [start] + + while d < end: + if step == 'days': + d += timedelta(days=1) + elif step == 'hours': + d += timedelta(hours=1) + elif step == 'minutes': + d += timedelta(minutes=1) + elif step == 'seconds': + d += timedelta(seconds=1) + else: + raise ValueError('Unsupported step: ' + step) + dates.append(d) + return dates + + +def get_years_in_range(start, end): + dates = [] + for i in range(start, end): + dates.append(datetime(i, 1, 1, 0, 0, 0, 0, pytz.UTC)) + + return dates + + +class TestDateFormatter(unittest.TestCase): + def test_isoformat(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + for second in seconds: + assert DateFormatter.isoformat(datetime.timestamp(second), 0) == second.strftime( + '%Y-%m-%dT%H:%M:%S'), second.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directives(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + for second in seconds: + assert DateFormatter.format(datetime.timestamp( + second), 0, '%Y-%m-%dT%H:%M:%S') == second.strftime('%Y-%m-%dT%H:%M:%S'), second.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_a(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%a') == day.strftime('%a'), day.strftime('%Y-%m-%d') + + def test_format_directive_A(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%A') == day.strftime('%A'), day.strftime('%Y-%m-%d') + + def test_format_directive_w(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%w') == day.strftime('%w'), day.strftime('%Y-%m-%d') + + def test_format_directive_d(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%d') == day.strftime('%d'), day.strftime('%Y-%m-%d') + + def test_format_directive_b(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%b') == day.strftime('%b'), day.strftime('%Y-%m-%d') + + def test_format_directive_B(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%B') == day.strftime('%B'), day.strftime('%Y-%m-%d') + + def test_format_directive_m(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%m') == day.strftime('%m'), day.strftime('%Y-%m-%d') + + def test_format_directive_y(self): + years = get_years_in_range(1999, 2022) + for year in years: + assert DateFormatter.format(datetime.timestamp(year), 0, '%y') == year.strftime('%y'), year.strftime('%Y-%m-%d') + + def test_format_directive_H(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%H') == minute.strftime( + '%H'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_I(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%I') == minute.strftime('%I'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + unixtime = time.time() + dt = datetime.fromtimestamp(unixtime) + assert dt.strftime('%I') == DateFormatter.format(unixtime, time.localtime().tm_gmtoff, '%I') + + def test_format_directive_p(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%p') == minute.strftime('%p'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_M(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 10, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 12, 19, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%M') == minute.strftime('%M'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_S(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + for second in seconds: + assert DateFormatter.format(datetime.timestamp(second), 0, '%S') == second.strftime('%S'), second.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_Z(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%Z') == 'UTC', day.strftime('%Y-%m-%d') + + def test_format_directive_z(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%z') == '+0000', day.strftime('%Y-%m-%d') + + def test_format_directive_C(self): + years = get_years_in_range(1999, 2022) + + for year in years: + assert DateFormatter.format(datetime.timestamp(year), 0, '%C') == year.strftime('%C'), year.strftime('%Y') + + def test_format_directive_Y(self): + years = get_years_in_range(1999, 2022) + for year in years: + assert DateFormatter.format(datetime.timestamp(year), 0, '%Y') == year.strftime('%Y'), year.strftime('%Y-%m-%d') + + def test_format_directive_u(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%u') == day.strftime('%u'), day.strftime('%Y-%m-%d') + + def test_format_directive_percent(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%%') == day.strftime('%%'), day.strftime('%Y-%m-%d') + + def test_format_directive_doy(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%j') == day.strftime('%j'), day.strftime('%Y-%m-%d') diff --git a/typed_python/lib/datetime/date_parser.py b/typed_python/lib/datetime/date_parser.py new file mode 100644 index 000000000..3c642e0e9 --- /dev/null +++ b/typed_python/lib/datetime/date_parser.py @@ -0,0 +1,409 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Class, Final +from typed_python import Entrypoint, ListOf, Dict +from typed_python.lib.datetime.timezone import tz_abbr_to_utc_offset +from typed_python.lib.datetime.chrono import time_to_seconds, date_to_seconds, is_date, is_time + +MONTHS = Dict(str, int)({'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3, 'apr': 4, + 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7, 'aug': 8, 'august': 8, + 'sep': 9, 'sept': 9, 'september': 9, 'oct': 10, 'october': 10, 'nov': 11, 'november': 11, + 'dec': 12, 'december': 12}) + + +class DateParser(Class, Final): + + @Entrypoint + def is_tz_offset(hour: int, min: int, second: float = 0.0) -> bool: + ''' + Tests if an hour,min combination is a valid offset from UTC + Parameters: + hour(int): The hour + min(int): The minute + Returns: + True if the inputs are in the range UTC-12:00 to UTC+14 + ''' + if hour > 14 or hour < -12: + return False + + if (hour == 14 or hour == -12) and min > 0: + return False + + if min < 0 or min + second >= 60: + return False + + return True + + @Entrypoint + @staticmethod + def is_month(month_str: str) -> bool: + ''' + Tests if string represents a valid month + Parameters: + month_str: The month string (case insenstive). Examples: 'Jan', 'January' + Returns: + True if the month string is valid, False otherwise + ''' + return month_str.strip().lower() in MONTHS + + @Entrypoint + @staticmethod + def is_year(year_str: str) -> bool: + ''' + Tests if a string represents a valid 4 digit year. + Parameters: + year(string): The year + Returns: + True if the input is a 4 digit string + ''' + return year_str.isdigit() and len(year_str) == 4 + + @Entrypoint + @staticmethod + def parse(date_str: str) -> float: + ''' + Parse a date string and return a unix timestamp + Parameters: + date_str (str): A string representing a date time. examples: 2022-01-03T02:45 or January 2, 1997 2:00pm + Returns: + (float) A unix timestamp + ''' + try: + return DateParser.parse_iso(date_str) + except ValueError: + return DateParser.parse_non_iso(date_str) + + @Entrypoint + @staticmethod + def get_tokens(time_str: str, skip_chars: str = '') -> ListOf(str): + ''' + Tokenises a string into components suitable for datetime processing + Parameters: + time_str (str): A string representing a date time. examples: 2022-01-03T02:45 or January 2, 1997 2:00pm + Returns: + (ListOf(str) A list of tokens. E.g. ["1997", "/", "01", "/", "02"] + ''' + tokens = ListOf(str)() + cursor = 0 + while cursor < len(time_str): + token = '' + while cursor < len(time_str) and time_str[cursor].isalpha(): + token += time_str[cursor] + cursor += 1 + + if token != '': + tokens.append(token) + + token = '' + while cursor < len(time_str) and (time_str[cursor].isdigit() or time_str[cursor] == '.'): + token += time_str[cursor] + cursor += 1 + continue + + if token != '': + tokens.append(token) + + token = '' + while cursor < len(time_str) and time_str[cursor].isspace(): + token += time_str[cursor] + cursor += 1 + + if token != '': + if skip_chars.find(' ') > -1: + pass + else: + tokens.append(' ') + + token = '' + while (cursor < len(time_str) and not time_str[cursor].isspace() + and not time_str[cursor].isdigit() and not time_str[cursor].isalpha()): + token += time_str[cursor] + cursor += 1 + + if token != '': + if skip_chars.find(token) > -1: + pass + else: + tokens.append(token) + return tokens + + @Entrypoint + @staticmethod + def parse_tz_offset(tokens: ListOf(str)) -> int: + ''' + Converts a set of tokens representing a timezone offset to seconds. + Parameters: + tokens (ListOf(str)): A set of string tokens representing a timezone. E.g. ['Z'] or ['+', '02', ':', '23'] + Returns: + (int): The offset in seconds + ''' + if tokens[0] != '+' and tokens[0] != '-': + raise ValueError("tz offset must begin with '+' or '-'") + + hour = min = 0 + second = 0.0 + if len(tokens) == 2: + # [+|-]HH or [+|-]HHMM + if len(tokens[1]) == 2: + hour, min, second = int(tokens[1]), 0, 0.0 + elif len(tokens[1]) == 4: + hour, min, second = int(tokens[1][:2]), int(tokens[1][2:4]), 0.0 + elif len(tokens[1]) >= 6: + hour, min, second = int(tokens[1][:2]), int(tokens[1][2:4]), float(tokens[1][6:]) + elif len(tokens) == 3: + hour, min, second = int(tokens[1]), int(tokens[2]), 0.0 + elif len(tokens) == 4: + hour, min, second = int(tokens[1]), int(tokens[2]), float(tokens[2]) + else: + raise ValueError('Invalid tz offset') + + hour = hour * -1 if tokens[0] == '-' else hour + + if DateParser.is_tz_offset(hour, min, second): + return hour * 3600 + (min * 60 if hour > 0 else min * -60) + (second if hour > 0 else second * -1) + else: + raise ValueError('Invalid tz offset: ') + + @Entrypoint + @staticmethod + def parse_iso(date_str: str) -> float: + ''' + Converts an ISO 8601 formated date string to a unix timestamp + Parameters: + date_str (str): An ISO 8601 formatted string + Returns: + unixtime(float): A unix timestamp + ''' + tokens = DateParser.get_tokens(time_str=date_str, skip_chars='/-:') + + # Process date segment + segment = ListOf(str)([]) + cursor = 0 + while cursor < len(tokens): + if tokens[cursor] == 'T' or tokens[cursor] == 't' or tokens[cursor] == ' ': + cursor += 1 + break + elif tokens[cursor] == '+' or tokens[cursor] == '-': + cursor += 1 + break + else: + segment.append(tokens[cursor]) + cursor += 1 + + year = month = day = -1 + + if len(segment) == 1: + if len(segment[0]) == 8: + year, month, day = int(segment[0][:4]), int(segment[0][4:6]), int(segment[0][6:8]) + elif len(segment[0]) == 6: + year, month, day = int(segment[0][:4]), int(segment[0][4:6]), 1 + elif len(segment[0]) == 4: + year, month, day = int(segment[0][:4]), 1, 1 + elif len(segment) == 2: + year, month, day = int(segment[0]), int(segment[1]), 1 + elif len(segment) == 3: + year, month, day = int(segment[0]), int(segment[1]), int(segment[2]) + + if not is_date(year, month, day): + raise ValueError('Invalid date: ', segment) + + # Process time segement + segment.clear() + while cursor < len(tokens): + if tokens[cursor] == 'T' or tokens[cursor] == 't' or tokens[cursor] == ' ': + cursor += 1 + break + elif tokens[cursor] == '+' or tokens[cursor] == '-' or tokens[cursor].isalpha(): + break + else: + segment.append(tokens[cursor]) + cursor += 1 + + hour = minute = 0 + second = 0.0 + + if len(segment) == 1: + if len(segment[0]) == 6: + hour, minute, second = int(segment[0][:2]), int(segment[0][2:4]), float(segment[0][4:6]) + elif len(segment[0]) == 4: + hour, minute, second = int(segment[0][:2]), int(segment[0][2:4]), 0.0 + elif len(segment[0]) == 2: + hour, minute, second = int(segment[0][:2]), 0, 0.0 + elif len(segment) == 2: + hour, minute, second = int(segment[0]), int(segment[1]), 0.0 + elif len(segment) == 3: + hour, minute, second = int(segment[0]), int(segment[1]), float(segment[2]) + + if not is_time(hour, minute, second): + raise ValueError('Invalid time: ', segment) + + # Process timezone segment + segment.clear() + while cursor < len(tokens): + segment.append(tokens[cursor]) + cursor += 1 + + dt = date_to_seconds(year, month, day) + time_to_seconds(hour, minute, second) + if len(segment) == 0: + tz_offset = 0 + elif len(segment) == 1: + tz_offset = tz_abbr_to_utc_offset(segment[0], dt) + elif segment[0] == '+' or segment[0] == '-': + tz_offset = DateParser.parse_tz_offset(segment) + else: + raise ValueError('Unsupported tz format', segment) + + return dt + tz_offset + + @Entrypoint + @staticmethod + def parse_non_iso_time(tokens) -> float: + ''' + Converts a set of tokens representing a time seconds + Parameters: + tokens (str): The time tokens + Returns: + (float): The seconds + ''' + h = m = s = 0 + + if len(tokens) == 0: + return 0 + + # break into time and ampm parts + sep_idx = None + for idx, token in enumerate(tokens): + if token == 'am' or token == 'pm': + sep_idx = idx + break + + if sep_idx is not None: + time_part, ampm_part = tokens[:sep_idx], tokens[sep_idx:] + else: + time_part = tokens + ampm_part = None + + if len(time_part) == 5 and time_part[1] == ':' and time_part[2].isdigit() and time_part[3] == ':': + # HH:MM:SS + if time_part[0].isdigit() and time_part[4].isdigit(): + h, m, s = int(time_part[0]), int(time_part[2]), int(time_part[4]) + else: + raise ValueError() + elif len(time_part) == 3: + # HH:MM + if time_part[0].isdigit() and time_part[1] == ':' and time_part[2].isdigit(): + h, m, s = int(time_part[0]), int(time_part[2]), 0 + else: + raise ValueError() + else: + raise ValueError('Unsupported time format', tokens) + + if ampm_part is not None: + if h > 12 or h < 1: + raise ValueError('AM/PM specified. hour must be between 1 and 12') + if ampm_part[0].lower() == 'am' and h == 12: + h = 0 + elif ampm_part[0].lower() == 'pm': + h = h + 12 + + if not is_time(h, m, s): + raise ValueError('Invalid time: ', h, m, s) + + return time_to_seconds(h, m, s) + + @Entrypoint + @staticmethod + def parse_non_iso(date_str: str) -> float: + ''' + Parse a date string and return a unix timestamp + Parameters: + date_str (str): A date string + Returns: + (float) A unix timestamp + ''' + date_str = date_str.lower().replace('a.m.', 'am').replace('p.m.', 'pm').strip() + tokens = DateParser.get_tokens(date_str, skip_chars=' ,') + + # if/elsif block is long but it's simple and allows us to clearly define the formats we support + # and add new formats as needed + + y = m = d = 0 + time_tokens = None + + # 5+ tokens with 4 digit year as 5th token + if len(tokens) >= 5 and DateParser.is_year(tokens[4]): + # DD/Month/YYYY or DD-Month-YYYY + if (DateParser.is_month(tokens[2]) and ((tokens[1] == '/' and tokens[3] == '/') or + (tokens[1] == '-' and tokens[3] == '-'))): + y, m, d, time_tokens = int(tokens[4]), MONTHS[tokens[2].lower()], int(tokens[0]), tokens[5:] + + # Month-DD-YYYY + elif DateParser.is_month(tokens[0]) and tokens[1] == '-' and tokens[2].isdigit() and tokens[3] == '-': + y, m, d, time_tokens = int(tokens[4]), MONTHS[tokens[0].lower()], int(tokens[2]), tokens[5:] + + # Month-DD-YYYY or Month/DD/YYYY + elif (DateParser.is_month(tokens[0]) and ((tokens[1] == '/' and tokens[3] == '/') or + (tokens[1] == '-' and tokens[3] == '-'))): + y, m, d, time_tokens = int(tokens[4]), MONTHS[tokens[0].lower()], int(tokens[2]), tokens[5:] + + else: + raise ValueError('Unsupported date format: ' + date_str) + + # 5+ tokens with 4 digit year as 1st token + elif len(tokens) >= 5 and DateParser.is_year(tokens[0]) and DateParser.is_month(tokens[2]) and tokens[4].isdigit(): + # YYYY/Month/DD or YYYY-Month-DD + if (tokens[1] == '/' and tokens[3] == '/') or (tokens[1] == '-' and tokens[3] == '-'): + y, m, d, time_tokens = int(tokens[0]), MONTHS[tokens[2].lower()], int(tokens[4]), tokens[5:] + else: + raise ValueError('Unsupported date format: ' + date_str) + + # Month D YYYY + elif len(tokens) >= 3 and DateParser.is_month(tokens[0]) and tokens[1].isdigit() and DateParser.is_year(tokens[2]): + y, m, d, time_tokens = int(tokens[2]), MONTHS[tokens[0].lower()], int(tokens[1]), tokens[3:] + + # D Month YYYY + elif len(tokens) >= 3 and DateParser.is_month(tokens[1]) and tokens[0].isdigit() and DateParser.is_year(tokens[2]): + y, m, d, time_tokens = int(tokens[2]), MONTHS[tokens[1].lower()], int(tokens[0]), tokens[3:] + + # YYYY Month DD + elif len(tokens) >= 3 and DateParser.is_year(tokens[0]) and DateParser.is_month(tokens[1]) and tokens[2].isdigit(): + y, m, d, time_tokens = int(tokens[0]), MONTHS[tokens[1].lower()], int(tokens[2]), tokens[3:] + + else: + raise ValueError('Unsupported date format: ' + date_str) + + if not is_date(y, m, d): + raise ValueError('Invalid date: ' + date_str) + + return date_to_seconds(y, m, d) + DateParser.parse_non_iso_time(time_tokens) + + @Entrypoint + @staticmethod + def utc_offset_string_to_seconds(utc_offset: str) -> int: + ''' + Converts a tz offset in the form [+|-]HH[:]MM to seconds + Parameters: + utc_offset(string): The utc offset + Returns: + The utc offset in seconds + ''' + offset = ''.join(utc_offset.split(':')) + hrs = int(offset[0:3]) + mins = int(offset[3:5]) + + if DateParser.is_tz_offset(hrs, mins): + return hrs * 3600 + (mins * 60 if hrs > 0 else mins * -60) + else: + raise ValueError('Invalid tz offset: ' + utc_offset) diff --git a/typed_python/lib/datetime/date_parser_test.py b/typed_python/lib/datetime/date_parser_test.py new file mode 100644 index 000000000..eab9bb615 --- /dev/null +++ b/typed_python/lib/datetime/date_parser_test.py @@ -0,0 +1,554 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typed_python.lib.datetime.date_parser import DateParser +import pytest +from datetime import datetime, timedelta +import pytz + + +def get_years_in_range(start, end): + dates = [] + for i in range(start, end): + dates.append(datetime(i, 1, 1, 0, 0, 0, 0, pytz.UTC)) + + return dates + + +def get_months_in_year(year): + dates = [] + for i in range(1, 13): + dates.append(datetime(year, i, 1, 0, 0, 0, 0, pytz.UTC)) + + return dates + + +def get_datetimes_in_range(start, end, step): + d = start + dates = [start] + + while d < end: + if step == 'days': + d += timedelta(days=1) + elif step == 'hours': + d += timedelta(hours=1) + elif step == 'minutes': + d += timedelta(minutes=1) + elif step == 'seconds': + d += timedelta(seconds=1) + else: + raise ValueError('Unsupported step: ' + step) + dates.append(d) + return dates + + +def time_to_sec(hours=0, mins=0, secs=0, fract=0): + return (hours * 60 * 60) + (mins * 60) + secs + fract + + +class TestDateParser(unittest.TestCase): + + def test_empty_string(self): + with pytest.raises(ValueError): + DateParser.parse('') + + def test_fails_on_random_text(self): + with pytest.raises(ValueError): + DateParser.parse('scuse me while i kiss the sky') + + def test_fails_with_extra_text(self): + with pytest.raises(ValueError): + DateParser.parse('1997-01-01 and some more text') + + def test_parse_invalid_year(self): + days = [ + 'a997', # not 4 digit number + '97', + ] + for day in days: + with pytest.raises(ValueError): + DateParser.parse(day) + + def test_parse_valid_year(self): + days = [ + '1997', + '2020', + '9999', + '0000' + ] + for day in days: + DateParser.parse_iso(day) + + def test_parse_invalid_month(self): + days = [ + '1997-00', + '1997-13', + '1997-ab' + ] + for day in days: + with pytest.raises(ValueError): + DateParser.parse_iso(day) + + def test_parse_invalid_day(self): + days = [ + '1997-01-00', # day < 1 + '1997-01-32', # day > 31 + '1997-04-31', # day > 30 in Apr + '1997-06-31', # day > 30 in June + '1997-09-31', # day > 30 in Sep + '1997-11-31', # day > 30 in Nov + '1997-02-29', # day > 28 for non-leap year Feb + '2020-02-30', # day > 30 for leap year Feb + '2020-02-ab', # day is not digit + '1900-02-29', # year is multiple of 4, but not leap year so no 29 + ] + + for day in days: + with pytest.raises(ValueError): + DateParser.parse_iso(day) + + def test_parse_iso_yyyyx(self): + years = get_years_in_range(1942, 1970) + get_years_in_range(2001, 2022) + for year in years: + assert DateParser.parse_iso(year.strftime('%Y')) == datetime.timestamp(year), year.strftime('%Y') + + def test_parse_iso_yyyymm(self): + months = get_months_in_year(1999) + get_months_in_year(2020) + formats = [ + '%Y-%m', + '%Y/%m', + '%Y%m', + ] + for format in formats: + for month in months: + assert DateParser.parse(month.strftime(format)) == datetime.timestamp(month), month.strftime(format) + + def test_parse_iso_yyyymmdd(self): + # all days in non leap year and leap year + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + formats = [ + '%Y-%m-%d', + '%Y/%m/%d', + '%Y%m%d', + + ] + for format in formats: + for day in days: + assert DateParser.parse_iso(day.strftime(format)) == datetime.timestamp(day), day.strftime(format) + + def test_parse_iso_yyyymmddhh(self): + # all hours in feb 2020 + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + formats = [ + '%Y-%m-%dT%H', + '%Y-%m-%dT%HZ', + '%Y-%m-%d %H', + '%Y/%m/%dT%H', + '%Y/%m/%d %H', + '%Y%m%dT%H', + '%Y%m%d %H', + '%Y-%m-%dT%HZ', + '%Y-%m-%dT%H+00', + '%Y-%m-%dT%H+00:00' + ] + + for format in formats: + for hour in hours: + assert DateParser.parse_iso(hour.strftime(format)) == datetime.timestamp(hour), hour.strftime(format) + + def test_parse_iso_yyyymmddhhmm(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 15, 0, 0, 0, pytz.UTC), + step='minutes') + formats = [ + '%Y-%m-%dT%H:%M', + '%Y-%m-%dT%H:%MZ', + '%Y-%m-%d %H:%M', + '%Y/%m/%dT%H:%M', + '%Y/%m/%d %H:%M', + '%Y%m%dT%H:%M', + '%Y%m%d %H:%M', + '%Y-%m-%dT%H:%MZ', + '%Y-%m-%dT%H:%M+00', + '%Y-%m-%dT%H:%M+00:00' + ] + + for format in formats: + for minute in minutes: + assert DateParser.parse(minute.strftime(format)) == datetime.timestamp(minute), minute.strftime(format) + + def test_parse_iso_yyyymmddhhmmss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%d %H:%M:%S', + '%Y/%m/%dT%H:%M:%S', + '%Y/%m/%d %H:%M:%S', + '%Y%m%dT%H:%M:%S', + '%Y%m%d %H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S+00', + '%Y-%m-%dT%H:%M:%S+00:00' + ] + + for format in formats: + for second in seconds: + assert DateParser.parse_iso(second.strftime(format)) == datetime.timestamp(second), second.strftime(format) + + def test_parse_iso_yyyymmddhhmmsssss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S.123', + '%Y-%m-%dT%H:%M:%S.123Z', + '%Y-%m-%d %H:%M:%S.123', + '%Y/%m/%dT%H:%M:%S.123', + '%Y/%m/%d %H:%M:%S.123', + '%Y%m%dT%H:%M:%S.123', + '%Y%m%d %H:%M:%S.123', + '%Y-%m-%dT%H:%M:%S.123Z', + '%Y-%m-%dT%H:%M:%S.123+00', + '%Y-%m-%dT%H:%M:%S.123+00:00' + ] + + for format in formats: + for second in seconds: + assert DateParser.parse_iso(second.strftime(format)) == datetime.timestamp(second) + .123, second.strftime(format) + + def test_parse_iso_with_tz_offset(self): + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + + tz_offset = 4500 + + formats = [ + '%Y-%m-%dT%H:%M+01:15', + '%Y-%m-%d %H:%M+01:15', + ] + + for format in formats: + for hour in hours: + assert DateParser.parse_iso(hour.strftime(format)) == datetime.timestamp(hour) + tz_offset, hour.strftime(format) + + def test_parse_non_iso_with_whitespace(self): + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + formats = [ + ' %B-%d-%Y %H:%M', + '%B-%d-%Y %H:%M ', + ' %B-%d-%Y %H:%M ', + ] + + for format in formats: + for hour in hours: + assert DateParser.parse(hour.strftime(format)) == datetime.timestamp(hour), hour.strftime(format) + + def test_parse_non_iso_dates(self): + # all days in non leap year and leap year + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + supported_formats = [ + '%b %d %Y', # e.g Jan 1 1997 + '%B %d %Y', # e.g January 1 1997 + '%b %d, %Y', # e.g Jan 1, 1997 + '%B %d, %Y', # e.g January 1, 1997 + '%b%d, %Y', # e.g Jan1, 1997 + '%B%d, %Y', # e.g January1, 1997 + '%b %d,%Y', # e.g Jan1, 1997 + '%B %d,%Y', # e.g January1, 1997 + '%d %b %Y', # e.g 1 Jan 1997 + '%d %B %Y', # e.g 1January 1997 + '%d%b%Y', # e.g 1Jan1997 + '%d%B%Y', # e.g 1January1997 + '%d/%b/%Y', # e.g 1/Jan/1997 + '%d/%B/%Y', # e.g 1/January/1997 + '%d-%b-%Y', # e.g 1-Jan-1997 + '%d-%B-%Y', # e.g 1-January-1997 + '%Y %b %d', # e.g 1997 Jan 1 + '%Y %B %d', # e.g 1997 January 1 + '%Y/%b/%d', # e.g 1997/Jan/1 + '%Y/%B/%d', # e.g 1997/January/1 + '%Y-%b-%d', # e.g 1997-Jan-1 + '%Y-%B-%d', # e.g 1997-January-1 + '%b-%d-%Y', # e.g Jan-1-1997 + '%B-%d-%Y', # e.g January-1-1997 + '%b/%d/%Y', # e.g Jan/1/1997 + '%B/%d/%Y', # e.g January/1/1997 + ] + for format in supported_formats: + for day in days: + assert DateParser.parse_non_iso(day.strftime(format)) == datetime.timestamp(day), day.strftime(format) + + def test_parse_non_iso_yyyymmddhhmm(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 15, 0, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%b %d %Y %H:%M', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M', # e.g January/1/1997 13:50 + ] + for format in supported_formats: + for minute in minutes: + assert DateParser.parse_non_iso(minute.strftime(format)) == datetime.timestamp(minute), minute.strftime(format) + + def test_parse_non_iso_yyyymmddhhmmss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + supported_formats = [ + '%b %d %Y %H:%M:%S', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M:%S', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M:%S', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M:%S', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M:%S', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M:%S', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M:%S', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M:%S', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M:%S', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M:%S', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M:%S', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M:%S', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M:%S', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M:%S', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M:%S', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M:%S', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M:%S', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M:%S', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M:%S', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M:%S', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M:%S', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M:%S', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M:%S', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M:%S', # e.g January/1/1997 13:50 + ] + for format in supported_formats: + for second in seconds: + assert DateParser.parse_non_iso(second.strftime(format)) == datetime.timestamp(second), second.strftime(format) + + def test_non_iso_pm_indicator(self): + + times = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%B/%d/%Y %I:%MPM', + '%B/%d/%Y %I:%Mpm', + '%B/%d/%Y %I:%M:%SPM', + '%B/%d/%Y %I:%M:%Spm', + ] + + for format in supported_formats: + for time in times: + assert DateParser.parse_non_iso(time.strftime(format)) == datetime.timestamp(time), time.strftime(format) + + def test_non_iso_am_indicator(self): + + times = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 11, 59, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%B/%d/%Y %I:%MAM', + '%B/%d/%Y %I:%Mam', + '%B/%d/%Y %I:%M:%SAM', + '%B/%d/%Y %I:%M:%Sam', + ] + + for format in supported_formats: + for time in times: + assert DateParser.parse_non_iso(time.strftime(format)) == datetime.timestamp(time), time.strftime(format) + + def test_parse_non_iso_invalid_day(self): + with pytest.raises(ValueError): + DateParser.parse_non_iso('1997 Jan 32') + with pytest.raises(ValueError): + DateParser.parse_non_iso('1997 Jan 0') + + def test_parse_non_iso_invalid_month(self): + with pytest.raises(ValueError): + DateParser.parse_non_iso('Janeary 01 1997') + + def test_parse(self): + # test main entry point with mix of iso and non iso format dates + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%d %H:%M:%S', + '%Y/%m/%dT%H:%M:%S', + '%Y/%m/%d %H:%M:%S', + '%Y%m%dT%H:%M:%S', + '%Y%m%d %H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S+00', + '%Y-%m-%dT%H:%M:%S+00:00', + '%b %d %Y %H:%M:%S', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M:%S', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M:%S', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M:%S', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M:%S', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M:%S', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M:%S', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M:%S', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M:%S', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M:%S', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M:%S', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M:%S', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M:%S', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M:%S', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M:%S', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M:%S', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M:%S', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M:%S', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M:%S', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M:%S', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M:%S', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M:%S', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M:%S', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M:%S', # e.g January/1/1997 13:50 + ] + + for format in formats: + for second in seconds: + assert DateParser.parse(second.strftime(format)) == datetime.timestamp(second), second.strftime(format) + + def test_nyc_tz(self): + # edt: Oct 21, 2022 + assert 1666355040 == DateParser.parse('2022-10-21t08:24:00NYC') + assert 1666355040 == DateParser.parse('2022-10-21t08:24:00EDT') + + # est: Dec 21, 2022 + assert 1671629040 == DateParser.parse('2022-12-21t08:24:00NYC') + assert 1671629040 == DateParser.parse('2022-12-21t08:24:00EST') + + def test_is_month_valid(self): + months = [ + 'Jan', 'January', + 'Feb', 'February', + 'Mar', 'March', + 'Apr', 'April', + 'May', + 'Jun', 'June', + 'Jul', 'July', + 'Aug', 'August', + 'Sep', 'Sept', 'September', + 'Oct', 'October', + 'Nov', 'November', + 'Dec', 'December' + ] + + for month in months: + assert DateParser.is_month(month), month + assert DateParser.is_month(month.lower()), month.lower() + assert DateParser.is_month(month.upper()), month.upper() + assert DateParser.is_month(' ' + month + ' '), month + + def test_is_month_invalid(self): + months = [ + 'not a month', + 'Jane', + 'Movember', + '', + '1', + ] + + for month in months: + assert not DateParser.is_month(month), month + assert not DateParser.is_month(month.lower()), month.lower() + assert not DateParser.is_month(month.upper()), month.upper() + + def test_is_year_valid(self): + years = [ + '1000', + '1999', + '0001', + '0000' + ] + + for year in years: + assert DateParser.is_year(year), year + + def test_is_year_invalid(self): + years = [ + '000', + 'abcd', + '10a0', + '12345' + ] + + for year in years: + assert not DateParser.is_year(year), year + + def test_is_tz_offset_valid(self): + tz_offsets = [ + (-12, 0, 0), # eastmost + (14, 0, 0), # westmost + (10, 4, 4), # random + ] + + for tz_offset in tz_offsets: + assert DateParser.is_tz_offset(tz_offset[0], tz_offset[1]), tz_offset + + def test_is_tz_offset_invalid(self): + tz_offsets = [ + (-13, 0, 0), # out of range + (-12, 1, 0), # out of range + (14, 1, 0), # out of range + (10, 60, 4), # min > 59 + (10, -1, 4), # min < 0 + ] + + for tz_offset in tz_offsets: + assert not DateParser.is_tz_offset(tz_offset[0], tz_offset[1]), tz_offset diff --git a/typed_python/lib/datetime/timezone.py b/typed_python/lib/datetime/timezone.py new file mode 100644 index 000000000..629931fa1 --- /dev/null +++ b/typed_python/lib/datetime/timezone.py @@ -0,0 +1,166 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Entrypoint, Dict, Class +from typed_python.lib.datetime.chrono import get_nth_dow_of_month_unixtime, get_year_from_unixtime + + +@Entrypoint +def is_us_ca_dst(ts: float) -> bool: + ''' + Checks if a timestamp falls within Daylight Saving Time or Standard Time in the United States or Canada + Parameters: + ts*float): the timestamp + Returns: + (boolean): True if the ts is in DST, false otherwise + ''' + year = get_year_from_unixtime(ts) + + # 2:00am second Sunday march + ds_start = get_nth_dow_of_month_unixtime(2, 0, 3, year) + (2 * 60 * 60) + + # 2:00 am first sunday in november + ds_end = get_nth_dow_of_month_unixtime(1, 0, 11, year) + (2 * 60 * 60) + + return ts >= ds_start and ts <= ds_end + + +class Timezone(Class): + @Entrypoint + @staticmethod + def get_offset(_: int) -> int: + raise NotImplementedError + + +class CST(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 21600 + + +class CDT(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 18000 + + +class CT(Timezone): + @Entrypoint + @staticmethod + def get_offset(ts): + return CDT.get_offset() if is_us_ca_dst(ts) else CST.get_offset() + + +class EDT(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 14400 + + +class EST(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 18000 + + +class ET(Timezone): + @Entrypoint + @staticmethod + def get_offset(ts): + return EDT.get_offset() if is_us_ca_dst(ts) else EST.get_offset() + + +class MST(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 25200 + + +class MDT(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 21600 + + +class MT(Timezone): + @Entrypoint + @staticmethod + def get_offset(ts): + return MDT.get_offset() if is_us_ca_dst(ts) else MST.get_offset() + + +class PDT(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 25200 + + +class PST(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 28800 + + +class PT(Timezone): + @Entrypoint + @staticmethod + def get_offset(ts): + return PDT.get_offset() if is_us_ca_dst(ts) else PST.get_offset() + + +class UTC(Timezone): + @Entrypoint + @staticmethod + def get_offset(_: int = 0): + return 0 + + +TZ_ABBR_TO_TIMEZONE = Dict(str, Timezone)({ + 'CDT': CDT(), + 'CST': CST(), + 'CT': CT(), + 'EDT': EDT(), + 'EST': EST(), + 'ET': ET(), + 'GMT': UTC(), + 'MDT': MDT(), + 'MST': MST(), + 'MT': MT(), + 'NYC': ET(), + 'PDT': PDT(), + 'PST': PST(), + 'PT': PT(), + 'UTC': UTC(), + 'Z': UTC(), +}) + + +@Entrypoint +def tz_abbr_to_utc_offset(tz_abbr: str, unixtime: int) -> int: + ''' + Get utc offset by timezone abbreviation + Parameters: + tz_abbr(string): a timezone indicator. examples: 'ET', 'EST', 'NYC' + Returns: + (int): The utc offset in seconds + ''' + return TZ_ABBR_TO_TIMEZONE[tz_abbr.upper()].get_offset(unixtime) diff --git a/typed_python/lib/datetime/timezone_test.py b/typed_python/lib/datetime/timezone_test.py new file mode 100644 index 000000000..3a3a91aad --- /dev/null +++ b/typed_python/lib/datetime/timezone_test.py @@ -0,0 +1,90 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typed_python.lib.datetime.timezone import tz_abbr_to_utc_offset + +ST_TIME = 1645812452 # feb 25, 2022 - Standard time +DS_TIME = 1661447252 # aug 25, 2022 - DST time + + +class TestTimezone(unittest.TestCase): + + def test_tz_abbr_ct(self): + assert tz_abbr_to_utc_offset('ct', ST_TIME) == 21600 + assert tz_abbr_to_utc_offset('ct', DS_TIME) == 18000 + + def test_tz_abbr_cdt(self): + assert tz_abbr_to_utc_offset('cdt', ST_TIME) == 18000 + assert tz_abbr_to_utc_offset('cdt', DS_TIME) == 18000 + + def test_tz_abbr_cst(self): + assert tz_abbr_to_utc_offset('cst', ST_TIME) == 21600 + assert tz_abbr_to_utc_offset('cst', DS_TIME) == 21600 + + def test_tz_abbr_est(self): + assert tz_abbr_to_utc_offset('est', ST_TIME) == 18000 + assert tz_abbr_to_utc_offset('est', DS_TIME) == 18000 + + def test_tz_abbr_edt(self): + assert tz_abbr_to_utc_offset('edt', ST_TIME) == 14400 + assert tz_abbr_to_utc_offset('edt', DS_TIME) == 14400 + + def test_tz_abbr_et(self): + assert tz_abbr_to_utc_offset('et', ST_TIME) == 18000 + assert tz_abbr_to_utc_offset('et', DS_TIME) == 14400 + + def test_tz_abbr_mt(self): + assert tz_abbr_to_utc_offset('mt', ST_TIME) == 25200 + assert tz_abbr_to_utc_offset('mt', DS_TIME) == 21600 + + def test_tz_abbr_mdt(self): + assert tz_abbr_to_utc_offset('mdt', ST_TIME) == 21600 + assert tz_abbr_to_utc_offset('mdt', DS_TIME) == 21600 + + def test_tz_abbr_mst(self): + assert tz_abbr_to_utc_offset('mst', ST_TIME) == 25200 + assert tz_abbr_to_utc_offset('mst', DS_TIME) == 25200 + + def test_tz_abbr_pt(self): + assert tz_abbr_to_utc_offset('pt', ST_TIME) == 28800 + assert tz_abbr_to_utc_offset('pt', DS_TIME) == 25200 + + def test_tz_abbr_pdt(self): + assert tz_abbr_to_utc_offset('pdt', ST_TIME) == 25200 + assert tz_abbr_to_utc_offset('pdt', DS_TIME) == 25200 + + def test_tz_abbr_pst(self): + assert tz_abbr_to_utc_offset('pst', ST_TIME) == 28800 + assert tz_abbr_to_utc_offset('pst', DS_TIME) == 28800 + + def test_tz_abbr_nyc(self): + assert tz_abbr_to_utc_offset('nyc', ST_TIME) == 18000 + assert tz_abbr_to_utc_offset('nyc', DS_TIME) == 14400 + assert (tz_abbr_to_utc_offset('nyc', ST_TIME) == + tz_abbr_to_utc_offset('et', ST_TIME)) + assert (tz_abbr_to_utc_offset('nyc', DS_TIME) == + tz_abbr_to_utc_offset('et', DS_TIME)) + + def test_tz_abbr_utc(self): + assert tz_abbr_to_utc_offset('utc', ST_TIME) == 0 + assert tz_abbr_to_utc_offset('utc', DS_TIME) == 0 + + def test_tz_abbr_gmt(self): + assert tz_abbr_to_utc_offset('gmt', ST_TIME) == 0 + assert tz_abbr_to_utc_offset('gmt', DS_TIME) == 0 + + def test_tz_abbr_z(self): + assert tz_abbr_to_utc_offset('z', ST_TIME) == 0 + assert tz_abbr_to_utc_offset('z', DS_TIME) == 0 diff --git a/typed_python/lib/timestamp.py b/typed_python/lib/timestamp.py new file mode 100644 index 000000000..f4a281d07 --- /dev/null +++ b/typed_python/lib/timestamp.py @@ -0,0 +1,127 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typed_python.compiler.runtime import Entrypoint +from typed_python import Class, Final, Member, Held +from typed_python.lib.datetime.date_parser import DateParser +from typed_python.lib.datetime.date_formatter import DateFormatter +from typed_python.lib.datetime.chrono import date_to_seconds, time_to_seconds + + +@Held +class Timestamp(Class, Final): + """A wrapper around a unix timestamp that adds functionality for parsing and string formatting""" + ts = Member(float) + + @Entrypoint + def __int__(self): + return int(self.ts) + + @Entrypoint + def __float__(self): + return self.ts + + @Entrypoint + def __str__(self): + return self.to_string() + + @Entrypoint + def __eq__(self, other) -> bool: + return self.ts == other.ts + + @Entrypoint + def __ne__(self, other) -> bool: + return self.ts != other.ts + + @Entrypoint + def __ge__(self, other) -> bool: + return self.ts >= other.ts + + @Entrypoint + def __gt__(self, other) -> bool: + return self.ts > other.ts + + @Entrypoint + def __lt__(self, other) -> bool: + return self.ts < other.ts + + @Entrypoint + def __le__(self, other) -> bool: + return self.ts <= other.ts + + @Entrypoint + def __add__(self, other): + return Timestamp(ts=self.ts + other.ts) + + @Entrypoint + def __sub__(self, other): + return Timestamp(ts=self.ts - other.ts) + + @Entrypoint + @staticmethod + def make(ts: float): + ''' + Creates a Timestamp from a float + Parameters: + ts: a float + Returns: + timestamp (Timestamp): A Timestamp + ''' + return Timestamp(ts=ts) + + @Entrypoint + def __init__(self, ts: float): + self.ts = ts + + @Entrypoint + @staticmethod + def parse(date_str: str): + ''' + Creates a Timestamp from date strings. + Parameters: + date_str (str): A date string. E.g 2022-07-30 17:56:46 + Returns: + timestamp (Timestamp): A Timestamp + ''' + return Timestamp(ts=DateParser.parse(date_str)) + + @Entrypoint + def format(self, utc_offset: int = 0, format: str = "%Y-%m-%dT%H:%M:%S") -> str: + ''' + Converts a Timestamp to a string in a given format + Parameters: + utc_offset (int): The offset from UTC in seconds + format (str): A string specifying formatting directives. E.g. '%Y-%m-%dT%H:%M:%S' + Returns: + date_str(str): A string representing the date in the specified format. E.g. "Mon January 2, 2021" + ''' + return DateFormatter.format(self.ts, utc_offset, format) + + @Entrypoint + @staticmethod + def from_date(year=0, month=0, day=0, hour=0, minute=0, second=0): + ''' + Creates a Timestamp from date values. + Parameters: + year (int): The year + month (int): The month. January: 1, February: 2, .... + day (int): The day + hour (int): The hour (0-23) + minute (int): The minute + second (float): The second. + Returns: + timestamp (Timestamp): A Timestamp + ''' + return Timestamp(ts=date_to_seconds(year, month, day) + time_to_seconds(hour, minute, second)) diff --git a/typed_python/lib/timestamp_test.py b/typed_python/lib/timestamp_test.py new file mode 100644 index 000000000..8911e0513 --- /dev/null +++ b/typed_python/lib/timestamp_test.py @@ -0,0 +1,381 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import unittest + +from typed_python.compiler.runtime import Entrypoint, PrintNewFunctionVisitor + +from typed_python.lib.timestamp import Timestamp +from datetime import datetime, timezone +from typed_python import ListOf + + +class Timer: + def __enter__(self): + self.t0 = time.time() + return self + + def __exit__(self, *args): + self.duration = time.time() - self.t0 + print(self.duration) + + def getDuration(self): + return self.duration + + +@Entrypoint +def make_list_of_iso_datestrings(n): + res = ListOf(str)() + for i in range(n): + dt = datetime.fromtimestamp(i) + res.append(dt.isoformat()) + return res + + +@Entrypoint +def make_list_of_datetimes(n): + res = ListOf(datetime)() + for i in range(n): + dt = datetime.fromtimestamp(i) + res.append(dt) + return res + + +@Entrypoint +def listOfTimestamps(N): + res = ListOf(Timestamp)() + for unixtime in range(N): + res.append(Timestamp.make(unixtime)) + + return res + + +@Entrypoint +def listOfDatetimes(N): + res = ListOf(datetime)([]) + for unixtime in range(N): + res.append(datetime.fromtimestamp(unixtime)) + + return res + + +@Entrypoint +def parseTimestamps(strings: ListOf(str)): + res = ListOf(Timestamp)([]) + for string in strings: + res.append(Timestamp.parse(string)) + return res + + +@Entrypoint +def parseDatetimes(strings: ListOf(str)): + res = ListOf(datetime)([]) + for string in strings: + res.append(datetime.strptime(string, '%Y-%m-%dT%H:%M:%S')) + return res + + +@Entrypoint +def formatTimestamps(timestamps: ListOf(Timestamp)): + res = ListOf(str)() + for timestamp in timestamps: + res.append(timestamp.format(format='%Y-%m-%d')) + + return res + + +@Entrypoint +def formatDatetimes(datetimes: ListOf(datetime)): + res = ListOf(str)() + for dt in datetimes: + res.append(dt.strftime('%Y-%m-%d')) + return res + + +class TestTimestamp(unittest.TestCase): + + def test_demo_usage(self): + + # create timestamp from unixtime + Timestamp.make(time.time()) + Timestamp.make(ts=time.time()) + + # create timestamp from iso 8601 date string + Timestamp.parse('2022-10-22T06:39') + + # create timestamp from non iso date string + Timestamp.parse('Oct 22, 2022 06:39') + + # with relative tz (offset changes with dst) + Timestamp.parse('2022-10-22T06:39NYC') + + # with relative tz (offset changes with dst) + Timestamp.parse('2022-10-22T06:39ET') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39UTC') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39EST') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39EDT') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39EDT') + + def test_eq(self): + # The following commented block of code sometimes unexpectedly errors with something like + # AssertionError: assert Held(Timestamp)(ts=2,) == Held(Timestamp)(ts=2,) + # + # ts1 = Timestamp.make(2) + # ts2 = Timestamp.make(2) + # assert ts1 == ts2 + # + # Added an otherwise unnecessary Entrypointed inner() function as a workaround + # + # See: https://github.com/APrioriInvestments/typed_python/issues/404 for bug details + @Entrypoint + def inner(): + assert Timestamp.make(2) == Timestamp.make(2) + inner() + + def test_ge(self): + # The following commented block of unexpectedly errors with something like + # AssertionError: assert Held(Timestamp)(ts=1.6694e+09,) >= Held(Timestamp)(ts=1.6694e+09,) + # + # Added an otherwise unnecessary Entrypointed inner() function as a workaround + # + # See: https://github.com/APrioriInvestments/typed_python/issues/404 for bug details + @Entrypoint + def inner(): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime) + ts3 = Timestamp.make(unixtime - 1) + assert ts1 >= ts2 + assert ts1 >= ts3 + inner() + + def test_gt(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime - 1) + assert ts1 > ts2 + + def test_le(self): + # See comments in test_ge above + @Entrypoint + def inner(): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime) + ts3 = Timestamp.make(unixtime + 1) + assert ts1 <= ts2 + assert ts1 <= ts3 + inner() + + def test_lt(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime + 1) + + assert ts1 < ts2 + + def test_ne(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime + 1) + ts3 = Timestamp.make(unixtime - 1) + + assert ts1 != ts2 + assert ts1 != ts3 + + def test_add(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(5) + ts3 = ts1 + ts2 + assert ts3.ts == unixtime + 5 + + def test_sub(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(5) + ts3 = ts1 - ts2 + assert ts3.ts == unixtime - 5 + + def test_format_default(self): + # Just a superficial test. format calls DateFormatter.format + # which has a robust set of tests + unixtime = time.time() + timestamp = Timestamp.make(unixtime + time.localtime().tm_gmtoff) + dt = datetime.fromtimestamp(unixtime) + assert dt.isoformat(timespec='seconds') == timestamp.format() + + def test_format(self): + # Just a superficial test. format calls DateFormatter.format + # which has a robust set of tests + unixtime = time.time() + timestamp = Timestamp.make(unixtime + time.localtime().tm_gmtoff) + dt = datetime.fromtimestamp(unixtime) + assert dt.isoformat(timespec='seconds') == timestamp.format(format="%Y-%m-%dT%H:%M:%S") + + def test_from_date(self): + unixtime = time.time() + dt_tuple = datetime.fromtimestamp(unixtime, tz=timezone.utc).timetuple() + + timestamp = Timestamp.from_date(year=dt_tuple.tm_year, + month=dt_tuple.tm_mon, + day=dt_tuple.tm_mday, + hour=dt_tuple.tm_hour, + minute=dt_tuple.tm_min, + second=dt_tuple.tm_sec) + assert int(unixtime) == int(timestamp) + + def test_parse(self): + unixtime = time.time() + timestamp = Timestamp.make(unixtime) + dt = datetime.fromtimestamp(unixtime, tz=timezone.utc).timetuple() + date_str = f"{dt.tm_year}-{dt.tm_mon:02d}-{dt.tm_mday:02d} {dt.tm_hour:02d}:{dt.tm_min:02d}:{dt.tm_sec:02d}" + parsed_timestamp = Timestamp.parse(date_str) + + assert ((int(timestamp) == int(parsed_timestamp))) + + def test_timestamp_is_held_class(self): + """ This is a temporary test intended to exhibit the Held class semantics. + + Moral of the story: a Class decorated with 'Held' isn't supposed to have + pointers to its instances. Instead, its instances get cloned when otherwise + a new pointer would be created (e.g. insertion to a list, access from a list). + This is how python constants behave already. However, unlike python constants, + Held Classes may have modifiable state (Members). This can be confusing if + the user expects state to be shared between clones (it isn't). For the most + part, the user should probably not modify their state - in particular, use + the default Class constructor rather than writing __init__ methods. + + Why do this? + In the case of Timestamp - it allows us to inject some type information (the + statement that certain floats represent timestamps, have access to certain + methods etc.) but maintain the performance of working directly with the + floats (values live on the stack). + """ + ts0 = Timestamp.make(0.0) + + timestamps = ListOf(Timestamp)([ts0]) + + # putting ts0 in the ListOf cloned it, so: + + # (1) modifications to ts0 are not seen by the ListOf + ts0.ts = 1.0 + assert ts0.ts == 1.0 + assert timestamps[0].ts == 0.0 + + # (2) modifications to timestamps[0] are not seen by ts0 + timestamps[0].ts = 2.0 + assert ts0.ts == 1.0 + assert timestamps[0].ts == 2.0 + + # 'access-to-modify' and 'access-to-assign' diverge when dealing + # with held classes (this is a major departure from python semantics), i.e.: + + # access-to-assign: ts1 is a clone of timestamps[0] + ts1 = timestamps[0] + assert ts1.ts == timestamps[0].ts == 2.0 + + # (3) modifications changes to ts1 are not seen by timestamps[0] + ts1.ts = 3.0 + assert ts1.ts == 3.0 + assert timestamps[0].ts == 2.0 + + # (4) 'access-to-modify': modifications to timestamps[0] are not seen by ts1 + timestamps[0].ts = 4.0 + assert ts1.ts == 3.0 + assert timestamps[0].ts == 4.0 + + def test_compare_timestamp_datetime_from_unixtime(self): + runs = 10000000 + + Timestamp.make(1) + + start = time.time() + listOfTimestamps(runs) + tsTime = time.time() - start + + start = time.time() + listOfDatetimes(runs) + dtTime = time.time() - start + + speedup = dtTime / tsTime + + print('Timestamp.make (' + str(tsTime) + ') is ' + str(speedup) + + ' faster than datetime.fromtimestamp (' + str(dtTime) + ')') + + # assert speedup > 30 and speedup < 40, speedup + + def test_compare_timestamp_datetime_from_string(self): + runs = 100000 + date_strings = make_list_of_iso_datestrings(runs) + + with PrintNewFunctionVisitor(): + Timestamp.parse('1997') + + start = time.time() + parseTimestamps(date_strings) + tsTime = time.time() - start + + start = time.time() + parseDatetimes(date_strings) + dtTime = time.time() - start + + if dtTime > tsTime: + speedup = dtTime / tsTime + compare = 'x faster' + else: + speedup = tsTime / dtTime + compare = 'x slower' + + print('Timestamp.parse (' + str(tsTime) + ') is ' + + str("{:.2f}".format(speedup)) + compare + ' than datetime.strptime (' + str(dtTime) + ')') + # assert speedup > 7 and speedup < 8 + + def test_compare_timestamp_datetime_format(self): + runs = 1000000 + timestamps = listOfTimestamps(runs) + datetimes = listOfDatetimes(runs) + + with PrintNewFunctionVisitor(): + ts = Timestamp.make(2) + ts.format(format='%Y-%m-%d') + + start = time.time() + formatTimestamps(timestamps) + tsTime = time.time() - start + + start = time.time() + formatDatetimes(datetimes) + dtTime = time.time() - start + + if dtTime > tsTime: + speedup = dtTime / tsTime + compare = 'x faster' + else: + speedup = tsTime / dtTime + compare = 'x slower' + + print('Timestamp.format (' + str(tsTime) + ') is ' + + str("{:.2f}".format(speedup)) + compare + ' than datetime.strformat (' + str(dtTime) + ')') + + # assert speedup > 1 and speedup < 1.5