From 8237019b0915f99e0a8932e3e475448e06497c16 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 8 Nov 2023 22:16:58 -0500 Subject: [PATCH 01/24] Add functionality to parse datetimes according C standard format codes. --- README.rst | 9 ++++++++- parse.py | 33 ++++++++++++++++++++++++++++++++- test_parse.py | 23 +++++++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 123a446..0a823c6 100644 --- a/README.rst +++ b/README.rst @@ -206,6 +206,10 @@ tt Time time e.g. 10:21:36 PM -5:30 ===== =========================================== ======== +The type can also be a datetime format string, following the +`1989 C standard format codes`_, e.g. %Y-%m-%d. Any type containing %Y +or %y will be parsed and output as a ``datetime.datetime``. + Some examples of typed parsing with ``None`` returned if the typing does not match: @@ -244,7 +248,7 @@ a maximum. For example: >>> parse('{:2d}{:2d}', '0440') # parsing two contiguous numbers -Some notes for the date and time types: +Some notes for the special date and time types: - the presence of the time part is optional (including ISO 8601, starting at the "T"). A full datetime object will always be returned; the time @@ -277,6 +281,9 @@ that this limit will be removed one day. https://docs.python.org/3/library/string.html#format-string-syntax .. _`Format Specification Mini-Language`: https://docs.python.org/3/library/string.html#format-specification-mini-language +.. _`1989 C standard format codes`: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes + Result and Match Objects diff --git a/parse.py b/parse.py index bbc999b..be148c1 100644 --- a/parse.py +++ b/parse.py @@ -5,6 +5,7 @@ # yes, I now have two problems import re import sys +from copy import copy from datetime import datetime, time, tzinfo, timedelta from decimal import Decimal from functools import partial @@ -272,6 +273,33 @@ def date_convert( return d +dt_format_to_regex = {symbol: "[0-9]{2}" for symbol in "ymdIMSUW"} +dt_format_to_regex.update({"-" + symbol: "[0-9]{1,2}" for symbol in "ymdIMS"}) + +dt_format_to_regex.update( + { + "a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", + "A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)", + "Y": "[0-9]{4}", + "H": "[0-9]{1,2}", + "B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)", + "b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", + "f": "[0-9]{6}", + "p": "(?:AM|PM)", + "z": "[+|-][0-9]{4}", + "j": "[0-9]{3}", + "-j": "[0-9]{1,3}", + } +) + + +def get_regex_for_datetime_format(format_): + regex = copy(format_) + for k, v in dt_format_to_regex.items(): + regex = regex.replace(f"%{k}", v) + return regex + + class TooManyFields(ValueError): pass @@ -327,7 +355,7 @@ def extract_format(format, extra_types): # the rest is the type, if present type = format - if type and type not in ALLOWED_TYPES and type not in extra_types: + if type and type not in ALLOWED_TYPES and type not in extra_types and "%Y" not in type and "%y" not in type: raise ValueError('format spec %r not recognised' % type) return locals() @@ -666,6 +694,9 @@ def _handle_field(self, field): self._type_conversions[ group ] = int_convert() # do not specify number base, determine it automatically + elif "%Y" in type or "%y" in type: + s = get_regex_for_datetime_format(type) + self._type_conversions[group] = lambda x, _: datetime.strptime(x, type) elif type == 'ti': s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT n = self._group_index diff --git a/test_parse.py b/test_parse.py index 2392686..d12de87 100644 --- a/test_parse.py +++ b/test_parse.py @@ -444,6 +444,29 @@ def test_two_datetimes(self): self.assertEqual(r[0], datetime(1997, 7, 16)) self.assertEqual(r[1], datetime(2012, 8, 1)) + def test_flexible_datetimes(self): + r = parse.parse('a {:%Y-%m-%d} b', "a 1997-07-16 b") + self.assertEqual(len(r.fixed), 1) + self.assertEqual(r[0], datetime(1997, 7, 16)) + + r = parse.parse('a {:%Y-%b-%d} b', "a 1997-Feb-16 b") + self.assertEqual(len(r.fixed), 1) + self.assertEqual(r[0], datetime(1997, 2, 16)) + + r = parse.parse('a {:%Y-%b-%d} {:d} b', "a 1997-Feb-16 8 b") + self.assertEqual(len(r.fixed), 2) + self.assertEqual(r[0], datetime(1997, 2, 16)) + + r = parse.parse('a {my_date:%Y-%b-%d} {num:d} b', "a 1997-Feb-16 8 b") + self.assertEqual((r.named["my_date"]), datetime(1997, 2, 16)) + self.assertEqual((r.named["num"]), 8) + + r = parse.parse('a {:%Y-%B-%d} b', "a 1997-February-16 b") + self.assertEqual(r[0], datetime(1997, 2, 16)) + + r = parse.parse('a {:%Y%m%d} b', "a 19970716 b") + self.assertEqual(r[0], datetime(1997, 7, 16)) + def test_datetimes(self): def y(fmt, s, e, tz=None): p = parse.compile(fmt) From f4b0cbe11cc32f12eb4bcc575b64060482fb289f Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 8 Nov 2023 22:34:13 -0500 Subject: [PATCH 02/24] use regex to dynamically create specific datetime regex paterns --- parse.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/parse.py b/parse.py index be148c1..bdec8be 100644 --- a/parse.py +++ b/parse.py @@ -292,12 +292,22 @@ def date_convert( } ) +# Compile a regular expression pattern that matches any date/time format symbol. +dt_format_symbols_re = re.compile('|'.join(re.escape(f"%{k}") for k in dt_format_to_regex.keys())) + def get_regex_for_datetime_format(format_): - regex = copy(format_) - for k, v in dt_format_to_regex.items(): - regex = regex.replace(f"%{k}", v) - return regex + """ + Generate a regex pattern for a given datetime format string. + + Parameters: + format_ (str): The datetime format string. + + Returns: + str: A regex pattern corresponding to the datetime format string. + """ + # Replace all format symbols with their regex patterns. + return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)[1:]], format_) class TooManyFields(ValueError): From c1bee765bc255d3ee186d025f83aac2d5f18b437 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 8 Nov 2023 22:43:32 -0500 Subject: [PATCH 03/24] change from f-string to str.format syntax --- parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.py b/parse.py index bdec8be..fab20f6 100644 --- a/parse.py +++ b/parse.py @@ -293,7 +293,7 @@ def date_convert( ) # Compile a regular expression pattern that matches any date/time format symbol. -dt_format_symbols_re = re.compile('|'.join(re.escape(f"%{k}") for k in dt_format_to_regex.keys())) +dt_format_symbols_re = re.compile('|'.join(re.escape("%{}".format(k)) for k in dt_format_to_regex.keys())) def get_regex_for_datetime_format(format_): From 28b6336730cab7239ed9439d77f52bad486cd811 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 8 Nov 2023 22:45:17 -0500 Subject: [PATCH 04/24] remove copy import --- parse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/parse.py b/parse.py index fab20f6..bbe5015 100644 --- a/parse.py +++ b/parse.py @@ -5,7 +5,6 @@ # yes, I now have two problems import re import sys -from copy import copy from datetime import datetime, time, tzinfo, timedelta from decimal import Decimal from functools import partial From cd31fc280c1d10389a19d1735741937798a8cd69 Mon Sep 17 00:00:00 2001 From: bendichter Date: Tue, 21 Nov 2023 14:34:57 -0500 Subject: [PATCH 05/24] allow for colons in time format --- parse.py | 2 +- tests/test_parse.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/parse.py b/parse.py index fe18c67..9458938 100644 --- a/parse.py +++ b/parse.py @@ -613,7 +613,7 @@ def _handle_field(self, field): format = "" if ":" in field: - name, format = field.split(":") + name, format = field.split(":", 1) else: name = field diff --git a/tests/test_parse.py b/tests/test_parse.py index 56a95d8..a13dfab 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -467,6 +467,10 @@ def test_flexible_datetimes(self): r = parse.parse('a {:%Y%m%d} b', "a 19970716 b") self.assertEqual(r[0], datetime(1997, 7, 16)) + def test_flexible_datetime_with_colon(self): + r = parse.parse("{dt:%Y-%m-%d %H:%M:%S}", "2023-11-21 13:23:27") + self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27)) + def test_datetimes(self): def y(fmt, s, e, tz=None): p = parse.compile(fmt) From 3d2ea9931198c35e24f7d32fd34e08e98f8ddd2a Mon Sep 17 00:00:00 2001 From: bendichter Date: Tue, 21 Nov 2023 15:01:43 -0500 Subject: [PATCH 06/24] allow for more flexible parsing of %z --- parse.py | 2 +- tests/test_parse.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/parse.py b/parse.py index 9458938..524dc71 100644 --- a/parse.py +++ b/parse.py @@ -284,7 +284,7 @@ def date_convert( "b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", "f": "[0-9]{6}", "p": "(?:AM|PM)", - "z": "[+|-][0-9]{4}", + "z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", "j": "[0-9]{3}", "-j": "[0-9]{1,3}", } diff --git a/tests/test_parse.py b/tests/test_parse.py index a13dfab..f4064d9 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -6,7 +6,7 @@ """ import unittest -from datetime import datetime, time +from datetime import datetime, time, timezone from decimal import Decimal import pickle import re @@ -471,6 +471,14 @@ def test_flexible_datetime_with_colon(self): r = parse.parse("{dt:%Y-%m-%d %H:%M:%S}", "2023-11-21 13:23:27") self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27)) + def test_flexible_datetime_with_timezone(self): + r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +0000") + self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) + + def test_flexible_datetime_with_timezone_that_has_colons(self): + r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +00:00:00") + self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) + def test_datetimes(self): def y(fmt, s, e, tz=None): p = parse.compile(fmt) From 32d83fb595b729026e5ad993276112da78b0cdcd Mon Sep 17 00:00:00 2001 From: bendichter Date: Tue, 21 Nov 2023 15:58:04 -0500 Subject: [PATCH 07/24] shield Python 2 from timezone features --- tests/test_parse.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index f4064d9..43c220f 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -4,9 +4,9 @@ This code is copyright 2011 eKit.com Inc (http://www.ekit.com/) See the end of the source file for the license of use. """ - +import sys import unittest -from datetime import datetime, time, timezone +from datetime import datetime, time from decimal import Decimal import pickle import re @@ -471,11 +471,15 @@ def test_flexible_datetime_with_colon(self): r = parse.parse("{dt:%Y-%m-%d %H:%M:%S}", "2023-11-21 13:23:27") self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27)) + @unittest.skipIf(sys.version_info[0] < 3, "Python 3+ required for timezone support") def test_flexible_datetime_with_timezone(self): + from datetime import timezone r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +0000") self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) + @unittest.skipIf(sys.version_info[0] < 3, "Python 3+ required for timezone support") def test_flexible_datetime_with_timezone_that_has_colons(self): + from datetime import timezone r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +00:00:00") self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) From acdfeb051aa4511e484ab0a5999d8328e086137a Mon Sep 17 00:00:00 2001 From: bendichter Date: Tue, 21 Nov 2023 17:55:26 -0500 Subject: [PATCH 08/24] add time parsing --- README.rst | 4 +++- parse.py | 9 ++++++--- tests/test_parse.py | 12 ++++++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 46fa790..7fe8cec 100644 --- a/README.rst +++ b/README.rst @@ -208,7 +208,9 @@ tt Time time The type can also be a datetime format string, following the `1989 C standard format codes`_, e.g. %Y-%m-%d. Any type containing %Y -or %y will be parsed and output as a ``datetime.datetime``. +or %y will be parsed and output as a ``datetime.datetime``. Remaining +types containing %H, %I, %M, %S, or %f will be parsed and output as a +``datetime.time``. Some examples of typed parsing with ``None`` returned if the typing does not match: diff --git a/parse.py b/parse.py index 524dc71..0493215 100644 --- a/parse.py +++ b/parse.py @@ -363,7 +363,7 @@ def extract_format(format, extra_types): # the rest is the type, if present type = format - if type and type not in ALLOWED_TYPES and type not in extra_types and "%Y" not in type and "%y" not in type: + if type and type not in ALLOWED_TYPES and type not in extra_types and not any("%" + x in type for x in "YyHIMSf"): raise ValueError("format spec %r not recognised" % type) return locals() @@ -706,9 +706,12 @@ def _handle_field(self, field): self._type_conversions[ group ] = int_convert() # do not specify number base, determine it automatically - elif "%Y" in type or "%y" in type: + elif any("%" + x in type for x in "YyHIMSf"): s = get_regex_for_datetime_format(type) - self._type_conversions[group] = lambda x, _: datetime.strptime(x, type) + if "%y" in type or "%Y" in type: + self._type_conversions[group] = lambda x, _: datetime.strptime(x, type) + else: + self._type_conversions[group] = lambda x, _: datetime.strptime(x, type).time() elif type == "ti": s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT n = self._group_index diff --git a/tests/test_parse.py b/tests/test_parse.py index 43c220f..ac3bb70 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -483,6 +483,18 @@ def test_flexible_datetime_with_timezone_that_has_colons(self): r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +00:00:00") self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) + def test_flexible_time(self): + r = parse.parse('a {time:%H:%M:%S} b', "a 13:23:27 b") + self.assertEqual(r.named["time"], time(13, 23, 27)) + + def test_flexible_time_no_hour(self): + r = parse.parse('a {time:%M:%S} b', "a 23:27 b") + self.assertEqual(r.named["time"], time(0, 23, 27)) + + def test_flexible_time_ms(self): + r = parse.parse('a {time:%M:%S:%f} b', "a 23:27:123456 b") + self.assertEqual(r.named["time"], time(0, 23, 27, 123456)) + def test_datetimes(self): def y(fmt, s, e, tz=None): p = parse.compile(fmt) From 5cc711166902424c86718ce3ceaf403d80e730c2 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Tue, 21 Nov 2023 21:59:42 -0600 Subject: [PATCH 09/24] uglify code with black --- README.rst | 1 + parse.py | 20 +++++++++++++++----- tests/test_parse.py | 28 +++++++++++++++++----------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index 7fe8cec..b142b58 100644 --- a/README.rst +++ b/README.rst @@ -432,6 +432,7 @@ To run the tests locally: Changelog --------- +- 1.20.0 Added support for strptime codes (thanks @bendichter) - 1.19.1 Added support for sign specifiers in number formats (thanks @anntzer) - 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen). Also corrected matching of *full string* vs. *full line* (thanks @giladreti) diff --git a/parse.py b/parse.py index 0493215..cc1cb13 100644 --- a/parse.py +++ b/parse.py @@ -273,7 +273,6 @@ def date_convert( dt_format_to_regex = {symbol: "[0-9]{2}" for symbol in "ymdIMSUW"} dt_format_to_regex.update({"-" + symbol: "[0-9]{1,2}" for symbol in "ymdIMS"}) - dt_format_to_regex.update( { "a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", @@ -291,7 +290,9 @@ def date_convert( ) # Compile a regular expression pattern that matches any date/time format symbol. -dt_format_symbols_re = re.compile('|'.join(re.escape("%{}".format(k)) for k in dt_format_to_regex.keys())) +dt_format_symbols_re = re.compile( + "|".join(re.escape("%{}".format(k)) for k in dt_format_to_regex) +) def get_regex_for_datetime_format(format_): @@ -305,7 +306,9 @@ def get_regex_for_datetime_format(format_): str: A regex pattern corresponding to the datetime format string. """ # Replace all format symbols with their regex patterns. - return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)[1:]], format_) + return dt_format_symbols_re.sub( + lambda m: dt_format_to_regex[m.group(0)[1:]], format_ + ) class TooManyFields(ValueError): @@ -363,7 +366,12 @@ def extract_format(format, extra_types): # the rest is the type, if present type = format - if type and type not in ALLOWED_TYPES and type not in extra_types and not any("%" + x in type for x in "YyHIMSf"): + if ( + type + and type not in ALLOWED_TYPES + and type not in extra_types + and not any("%" + x in type for x in "YyHIMSf") + ): raise ValueError("format spec %r not recognised" % type) return locals() @@ -711,7 +719,9 @@ def _handle_field(self, field): if "%y" in type or "%Y" in type: self._type_conversions[group] = lambda x, _: datetime.strptime(x, type) else: - self._type_conversions[group] = lambda x, _: datetime.strptime(x, type).time() + self._type_conversions[group] = lambda x, _: datetime.strptime( + x, type + ).time() elif type == "ti": s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT n = self._group_index diff --git a/tests/test_parse.py b/tests/test_parse.py index ac3bb70..3f297e1 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -445,26 +445,26 @@ def test_two_datetimes(self): self.assertEqual(r[1], datetime(2012, 8, 1)) def test_flexible_datetimes(self): - r = parse.parse('a {:%Y-%m-%d} b', "a 1997-07-16 b") + r = parse.parse("a {:%Y-%m-%d} b", "a 1997-07-16 b") self.assertEqual(len(r.fixed), 1) self.assertEqual(r[0], datetime(1997, 7, 16)) - r = parse.parse('a {:%Y-%b-%d} b', "a 1997-Feb-16 b") + r = parse.parse("a {:%Y-%b-%d} b", "a 1997-Feb-16 b") self.assertEqual(len(r.fixed), 1) self.assertEqual(r[0], datetime(1997, 2, 16)) - r = parse.parse('a {:%Y-%b-%d} {:d} b', "a 1997-Feb-16 8 b") + r = parse.parse("a {:%Y-%b-%d} {:d} b", "a 1997-Feb-16 8 b") self.assertEqual(len(r.fixed), 2) self.assertEqual(r[0], datetime(1997, 2, 16)) - r = parse.parse('a {my_date:%Y-%b-%d} {num:d} b', "a 1997-Feb-16 8 b") + r = parse.parse("a {my_date:%Y-%b-%d} {num:d} b", "a 1997-Feb-16 8 b") self.assertEqual((r.named["my_date"]), datetime(1997, 2, 16)) self.assertEqual((r.named["num"]), 8) - r = parse.parse('a {:%Y-%B-%d} b', "a 1997-February-16 b") + r = parse.parse("a {:%Y-%B-%d} b", "a 1997-February-16 b") self.assertEqual(r[0], datetime(1997, 2, 16)) - r = parse.parse('a {:%Y%m%d} b', "a 19970716 b") + r = parse.parse("a {:%Y%m%d} b", "a 19970716 b") self.assertEqual(r[0], datetime(1997, 7, 16)) def test_flexible_datetime_with_colon(self): @@ -474,25 +474,31 @@ def test_flexible_datetime_with_colon(self): @unittest.skipIf(sys.version_info[0] < 3, "Python 3+ required for timezone support") def test_flexible_datetime_with_timezone(self): from datetime import timezone + r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +0000") - self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) + self.assertEqual( + r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc) + ) @unittest.skipIf(sys.version_info[0] < 3, "Python 3+ required for timezone support") def test_flexible_datetime_with_timezone_that_has_colons(self): from datetime import timezone + r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +00:00:00") - self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)) + self.assertEqual( + r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc) + ) def test_flexible_time(self): - r = parse.parse('a {time:%H:%M:%S} b', "a 13:23:27 b") + r = parse.parse("a {time:%H:%M:%S} b", "a 13:23:27 b") self.assertEqual(r.named["time"], time(13, 23, 27)) def test_flexible_time_no_hour(self): - r = parse.parse('a {time:%M:%S} b', "a 23:27 b") + r = parse.parse("a {time:%M:%S} b", "a 23:27 b") self.assertEqual(r.named["time"], time(0, 23, 27)) def test_flexible_time_ms(self): - r = parse.parse('a {time:%M:%S:%f} b', "a 23:27:123456 b") + r = parse.parse("a {time:%M:%S:%f} b", "a 23:27:123456 b") self.assertEqual(r.named["time"], time(0, 23, 27, 123456)) def test_datetimes(self): From f71aa7ac0cdb633a2fbecb8f6c3da28befeb0129 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Tue, 21 Nov 2023 22:00:50 -0600 Subject: [PATCH 10/24] bump version --- parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.py b/parse.py index cc1cb13..aede912 100644 --- a/parse.py +++ b/parse.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -__version__ = "1.19.1" +__version__ = "1.20.0" # yes, I now have two problems import re From 72591f7387e215298976b404b1e86d85b706148a Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Nov 2023 13:26:10 -0500 Subject: [PATCH 11/24] handle and test single digits for day and month --- parse.py | 5 ++--- tests/test_parse.py | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/parse.py b/parse.py index 0493215..29856a8 100644 --- a/parse.py +++ b/parse.py @@ -271,15 +271,14 @@ def date_convert( return d -dt_format_to_regex = {symbol: "[0-9]{2}" for symbol in "ymdIMSUW"} -dt_format_to_regex.update({"-" + symbol: "[0-9]{1,2}" for symbol in "ymdIMS"}) +dt_format_to_regex = {symbol: "[0-9]{2}" for symbol in "yMS"} +dt_format_to_regex.update({symbol: "[0-9]{1,2}" for symbol in "mdIUWH"}) dt_format_to_regex.update( { "a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", "A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)", "Y": "[0-9]{4}", - "H": "[0-9]{1,2}", "B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)", "b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", "f": "[0-9]{6}", diff --git a/tests/test_parse.py b/tests/test_parse.py index ac3bb70..2af7e4d 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -495,6 +495,10 @@ def test_flexible_time_ms(self): r = parse.parse('a {time:%M:%S:%f} b', "a 23:27:123456 b") self.assertEqual(r.named["time"], time(0, 23, 27, 123456)) + def test_flexible_dates_single_digit(self): + r = parse.parse("{dt:%Y/%m/%d}", "2023/1/1") + self.assertEqual(r.named["dt"], datetime(2023, 1, 1, 0, 0)) + def test_datetimes(self): def y(fmt, s, e, tz=None): p = parse.compile(fmt) From 5210e18eb13196f9bf3af0499d3bd553f34a14e4 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Wed, 22 Nov 2023 20:45:09 -0600 Subject: [PATCH 12/24] remove %-j handling --- parse.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/parse.py b/parse.py index 77b455d..27d14f3 100644 --- a/parse.py +++ b/parse.py @@ -284,14 +284,11 @@ def date_convert( "p": "(?:AM|PM)", "z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", "j": "[0-9]{3}", - "-j": "[0-9]{1,3}", } ) # Compile a regular expression pattern that matches any date/time format symbol. -dt_format_symbols_re = re.compile( - "|".join(re.escape("%{}".format(k)) for k in dt_format_to_regex) -) +dt_format_symbols_re = re.compile("|".join("%" + k for k in dt_format_to_regex)) def get_regex_for_datetime_format(format_): From fb6d2c0d594a628a831e878ddd58b725b404bf6b Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Nov 2023 21:51:43 -0500 Subject: [PATCH 13/24] make j flexible number of digits --- parse.py | 2 +- tests/test_parse.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/parse.py b/parse.py index 27d14f3..0f860e3 100644 --- a/parse.py +++ b/parse.py @@ -283,7 +283,7 @@ def date_convert( "f": "[0-9]{6}", "p": "(?:AM|PM)", "z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", - "j": "[0-9]{3}", + "j": "[0-9]{1,3}", } ) diff --git a/tests/test_parse.py b/tests/test_parse.py index cb74f3f..b3b2d17 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -505,6 +505,13 @@ def test_flexible_dates_single_digit(self): r = parse.parse("{dt:%Y/%m/%d}", "2023/1/1") self.assertEqual(r.named["dt"], datetime(2023, 1, 1, 0, 0)) + def test_flexible_dates_j(self): + r = parse.parse("{dt:%Y/%j}", "2023/9") + self.assertEqual(r.named["dt"], datetime(2023, 1, 9, 0, 0)) + + r = parse.parse("{dt:%Y/%j}", "2023/009") + self.assertEqual(r.named["dt"], datetime(2023, 1, 9, 0, 0)) + def test_datetimes(self): def y(fmt, s, e, tz=None): p = parse.compile(fmt) From 550f5e5637bca7e8fbfc6a738f557993513927ec Mon Sep 17 00:00:00 2001 From: wim glenn Date: Wed, 22 Nov 2023 21:14:50 -0600 Subject: [PATCH 14/24] simplify and reorder, to match docs --- parse.py | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/parse.py b/parse.py index 0f860e3..10b2d7d 100644 --- a/parse.py +++ b/parse.py @@ -270,25 +270,32 @@ def date_convert( return d - -dt_format_to_regex = {symbol: "[0-9]{2}" for symbol in "yMS"} -dt_format_to_regex.update({symbol: "[0-9]{1,2}" for symbol in "mdIUWH"}) -dt_format_to_regex.update( - { - "a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", - "A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)", - "Y": "[0-9]{4}", - "B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)", - "b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", - "f": "[0-9]{6}", - "p": "(?:AM|PM)", - "z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", - "j": "[0-9]{1,3}", - } -) +# ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes +dt_format_to_regex = { + "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", + "%A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)", + "%w": "[0-6]", + "%d": "[0-9]{1,2}", + "%b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", + "%B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)", + "%m": "[0-9]{1,2}", + "%y": "[0-9]{2}", + "%Y": "[0-9]{4}", + "%H": "[0-9]{1,2}", + "%I": "[0-9]{1,2}", + "%p": "(?:AM|PM)", + "%M": "[0-9]{2}", + "%S": "[0-9]{2}", + "%f": "[0-9]{6}", + "%z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", + # "%Z": punt + "%j": "[0-9]{1,3}", + "%U": "[0-9]{1,2}", + "%W": "[0-9]{1,2}", +} # Compile a regular expression pattern that matches any date/time format symbol. -dt_format_symbols_re = re.compile("|".join("%" + k for k in dt_format_to_regex)) +dt_format_symbols_re = re.compile("|".join(dt_format_to_regex)) def get_regex_for_datetime_format(format_): @@ -303,7 +310,7 @@ def get_regex_for_datetime_format(format_): """ # Replace all format symbols with their regex patterns. return dt_format_symbols_re.sub( - lambda m: dt_format_to_regex[m.group(0)[1:]], format_ + lambda m: dt_format_to_regex[m.group(0)], format_ ) From 8771d3ffada6ef38ff32292c2e11a903dd37e7f9 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Wed, 22 Nov 2023 21:18:01 -0600 Subject: [PATCH 15/24] just use the map directly --- parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parse.py b/parse.py index 10b2d7d..679f86c 100644 --- a/parse.py +++ b/parse.py @@ -373,7 +373,7 @@ def extract_format(format, extra_types): type and type not in ALLOWED_TYPES and type not in extra_types - and not any("%" + x in type for x in "YyHIMSf") + and not any(k in type for k in dt_format_to_regex) ): raise ValueError("format spec %r not recognised" % type) @@ -717,7 +717,7 @@ def _handle_field(self, field): self._type_conversions[ group ] = int_convert() # do not specify number base, determine it automatically - elif any("%" + x in type for x in "YyHIMSf"): + elif any(k in type for k in dt_format_to_regex): s = get_regex_for_datetime_format(type) if "%y" in type or "%Y" in type: self._type_conversions[group] = lambda x, _: datetime.strptime(x, type) From fb0c8d9bb38a5055e3b3bd9c9311fa41917129b0 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Wed, 22 Nov 2023 21:25:17 -0600 Subject: [PATCH 16/24] readability improvements --- parse.py | 96 +++++++++++++++++++------------------------------------- 1 file changed, 33 insertions(+), 63 deletions(-) diff --git a/parse.py b/parse.py index 679f86c..4d67c14 100644 --- a/parse.py +++ b/parse.py @@ -270,6 +270,7 @@ def date_convert( return d + # ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes dt_format_to_regex = { "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", @@ -309,9 +310,7 @@ def get_regex_for_datetime_format(format_): str: A regex pattern corresponding to the datetime format string. """ # Replace all format symbols with their regex patterns. - return dt_format_symbols_re.sub( - lambda m: dt_format_to_regex[m.group(0)], format_ - ) + return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)], format_) class TooManyFields(ValueError): @@ -665,6 +664,7 @@ def _handle_field(self, field): # figure type conversions, if any type = format["type"] is_numeric = type and type in "n%fegdobx" + conv = self._type_conversions if type in self._extra_types: type_converter = self._extra_types[type] s = getattr(type_converter, "pattern", r".+?") @@ -672,40 +672,40 @@ def _handle_field(self, field): if regex_group_count is None: regex_group_count = 0 self._group_index += regex_group_count - self._type_conversions[group] = convert_first(type_converter) + conv[group] = convert_first(type_converter) elif type == "n": s = r"\d{1,3}([,.]\d{3})*" self._group_index += 1 - self._type_conversions[group] = int_convert(10) + conv[group] = int_convert(10) elif type == "b": s = r"(0[bB])?[01]+" - self._type_conversions[group] = int_convert(2) + conv[group] = int_convert(2) self._group_index += 1 elif type == "o": s = r"(0[oO])?[0-7]+" - self._type_conversions[group] = int_convert(8) + conv[group] = int_convert(8) self._group_index += 1 elif type == "x": s = r"(0[xX])?[0-9a-fA-F]+" - self._type_conversions[group] = int_convert(16) + conv[group] = int_convert(16) self._group_index += 1 elif type == "%": s = r"\d+(\.\d+)?%" self._group_index += 1 - self._type_conversions[group] = percentage + conv[group] = percentage elif type == "f": s = r"\d*\.\d+" - self._type_conversions[group] = convert_first(float) + conv[group] = convert_first(float) elif type == "F": s = r"\d*\.\d+" - self._type_conversions[group] = convert_first(Decimal) + conv[group] = convert_first(Decimal) elif type == "e": s = r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF" - self._type_conversions[group] = convert_first(float) + conv[group] = convert_first(float) elif type == "g": s = r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF" self._group_index += 2 - self._type_conversions[group] = convert_first(float) + conv[group] = convert_first(float) elif type == "d": if format.get("width"): width = r"{1,%s}" % int(format["width"]) @@ -714,93 +714,63 @@ def _handle_field(self, field): s = r"\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}".format( w=width ) - self._type_conversions[ - group - ] = int_convert() # do not specify number base, determine it automatically + conv[group] = int_convert() + # do not specify number base, determine it automatically elif any(k in type for k in dt_format_to_regex): s = get_regex_for_datetime_format(type) if "%y" in type or "%Y" in type: - self._type_conversions[group] = lambda x, _: datetime.strptime(x, type) + conv[group] = lambda x, _: datetime.strptime(x, type) else: - self._type_conversions[group] = lambda x, _: datetime.strptime( - x, type - ).time() + conv[group] = lambda x, _: datetime.strptime(x, type).time() elif type == "ti": s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT n = self._group_index - self._type_conversions[group] = partial( - date_convert, ymd=n + 1, hms=n + 4, tz=n + 7 - ) + conv[group] = partial(date_convert, ymd=n + 1, hms=n + 4, tz=n + 7) self._group_index += 7 elif type == "tg": - s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?" % ( - ALL_MONTHS_PAT, - TIME_PAT, - AM_PAT, - TZ_PAT, - ) + s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?" + s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial( + conv[group] = partial( date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 ) self._group_index += 9 elif type == "ta": - s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?" % ( - ALL_MONTHS_PAT, - TIME_PAT, - AM_PAT, - TZ_PAT, - ) + s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?" + s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial( + conv[group] = partial( date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 ) self._group_index += 9 elif type == "te": # this will allow microseconds through if they're present, but meh - s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s" % ( - DAYS_PAT, - MONTHS_PAT, - TIME_PAT, - TZ_PAT, - ) + s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s" + s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial( - date_convert, dmy=n + 3, hms=n + 5, tz=n + 8 - ) + conv[group] = partial(date_convert, dmy=n + 3, hms=n + 5, tz=n + 8) self._group_index += 8 elif type == "th": # slight flexibility here from the stock Apache format s = r"(\d{1,2}[-/]%s[-/]\d{4}):%s%s" % (MONTHS_PAT, TIME_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial( - date_convert, dmy=n + 1, hms=n + 3, tz=n + 6 - ) + conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6) self._group_index += 6 elif type == "tc": - s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" % ( - DAYS_PAT, - MONTHS_PAT, - TIME_PAT, - ) + s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" + s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT) n = self._group_index - self._type_conversions[group] = partial( - date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5 - ) + conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5) self._group_index += 8 elif type == "tt": s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index - self._type_conversions[group] = partial( - date_convert, hms=n + 1, am=n + 4, tz=n + 5 - ) + conv[group] = partial(date_convert, hms=n + 1, am=n + 4, tz=n + 5) self._group_index += 5 elif type == "ts": s = r"%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?" % MONTHS_PAT n = self._group_index - self._type_conversions[group] = partial( - date_convert, mm=n + 1, dd=n + 3, hms=n + 5 - ) + conv[group] = partial(date_convert, mm=n + 1, dd=n + 3, hms=n + 5) self._group_index += 5 elif type == "l": s = r"[A-Za-z]+" From 77328ef755620058e8e335b2c94dc259d754e8ef Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Nov 2023 22:29:57 -0500 Subject: [PATCH 17/24] change "tc" to use generic datetime parsing approach --- parse.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/parse.py b/parse.py index 679f86c..860d6e7 100644 --- a/parse.py +++ b/parse.py @@ -778,16 +778,8 @@ def _handle_field(self, field): ) self._group_index += 6 elif type == "tc": - s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" % ( - DAYS_PAT, - MONTHS_PAT, - TIME_PAT, - ) - n = self._group_index - self._type_conversions[group] = partial( - date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5 - ) - self._group_index += 8 + s = get_regex_for_datetime_format("%a %b %d %H:%M:%S %Y") + self._type_conversions[group] = lambda x, _: datetime.strptime(x, "%a %b %d %H:%M:%S %Y") elif type == "tt": s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index From 34edfc875c3ba5d04c38bfbbdd6cc533210201ea Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Nov 2023 22:31:35 -0500 Subject: [PATCH 18/24] use new conv variable --- parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.py b/parse.py index 345878a..6231016 100644 --- a/parse.py +++ b/parse.py @@ -758,7 +758,7 @@ def _handle_field(self, field): self._group_index += 6 elif type == "tc": s = get_regex_for_datetime_format("%a %b %d %H:%M:%S %Y") - self._type_conversions[group] = lambda x, _: datetime.strptime(x, "%a %b %d %H:%M:%S %Y") + conv[group] = lambda x, _: datetime.strptime(x, "%a %b %d %H:%M:%S %Y") elif type == "tt": s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index From 2c5f9059e116bf2bc638d52bf92174a34fc7a37b Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Nov 2023 22:43:35 -0500 Subject: [PATCH 19/24] revert to old logic for tc --- parse.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/parse.py b/parse.py index 6231016..be27a62 100644 --- a/parse.py +++ b/parse.py @@ -757,8 +757,16 @@ def _handle_field(self, field): conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6) self._group_index += 6 elif type == "tc": - s = get_regex_for_datetime_format("%a %b %d %H:%M:%S %Y") - conv[group] = lambda x, _: datetime.strptime(x, "%a %b %d %H:%M:%S %Y") + s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" % ( + DAYS_PAT, + MONTHS_PAT, + TIME_PAT, + ) + n = self._group_index + conv[group] = partial( + date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5 + ) + self._group_index += 8 elif type == "tt": s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index From 6af64dd926d623f514eafa2890073037396d66e7 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Wed, 22 Nov 2023 21:47:16 -0600 Subject: [PATCH 20/24] blacken again --- parse.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/parse.py b/parse.py index be27a62..8c3cde9 100644 --- a/parse.py +++ b/parse.py @@ -739,9 +739,7 @@ def _handle_field(self, field): s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?" s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index - conv[group] = partial( - date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 - ) + conv[group] = partial(date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9) self._group_index += 9 elif type == "te": # this will allow microseconds through if they're present, but meh @@ -757,15 +755,10 @@ def _handle_field(self, field): conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6) self._group_index += 6 elif type == "tc": - s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" % ( - DAYS_PAT, - MONTHS_PAT, - TIME_PAT, - ) + s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" + s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT) n = self._group_index - conv[group] = partial( - date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5 - ) + conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5) self._group_index += 8 elif type == "tt": s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) From b08ed213750a3b8fa4936a53c1f176488326ef40 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Wed, 22 Nov 2023 21:47:51 -0600 Subject: [PATCH 21/24] blacken again sorry --- parse.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parse.py b/parse.py index 8c3cde9..4d67c14 100644 --- a/parse.py +++ b/parse.py @@ -739,7 +739,9 @@ def _handle_field(self, field): s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?" s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) n = self._group_index - conv[group] = partial(date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9) + conv[group] = partial( + date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 + ) self._group_index += 9 elif type == "te": # this will allow microseconds through if they're present, but meh From 4b1ba6103cffc8426e04dee4b7ba1273e1eb1d32 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Nov 2023 23:15:44 -0500 Subject: [PATCH 22/24] new logic: If it contains day, month, or year, it's a date. Missing years default to the current year. If it contains hours, minutes, seconds, or milliseconds, its a time. If it is a date and a time it's a datetime. --- parse.py | 24 ++++++++++++++++++++---- tests/test_parse.py | 24 ++++++++++++++---------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/parse.py b/parse.py index be27a62..1da5a9c 100644 --- a/parse.py +++ b/parse.py @@ -271,6 +271,25 @@ def date_convert( return d +def strf_date_convert(x, _, type): + + is_date = any("%" + x in type for x in "aAwdbBmyYjUW") + is_time = any("%" + x in type for x in "HIpMSfz") + + dt = datetime.strptime(x, type) + if "%y" not in type and "%Y" not in type: # year not specified + dt = dt.replace(year=datetime.today().year) + + if is_date and is_time: + return dt + elif is_date: + return dt.date() + elif is_time: + return dt.time() + else: + ValueError("Datetime not a date nor a time?") + + # ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes dt_format_to_regex = { "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", @@ -718,10 +737,7 @@ def _handle_field(self, field): # do not specify number base, determine it automatically elif any(k in type for k in dt_format_to_regex): s = get_regex_for_datetime_format(type) - if "%y" in type or "%Y" in type: - conv[group] = lambda x, _: datetime.strptime(x, type) - else: - conv[group] = lambda x, _: datetime.strptime(x, type).time() + conv[group] = partial(strf_date_convert, type=type) elif type == "ti": s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT n = self._group_index diff --git a/tests/test_parse.py b/tests/test_parse.py index b3b2d17..f413100 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -6,7 +6,7 @@ """ import sys import unittest -from datetime import datetime, time +from datetime import datetime, time, date from decimal import Decimal import pickle import re @@ -447,25 +447,25 @@ def test_two_datetimes(self): def test_flexible_datetimes(self): r = parse.parse("a {:%Y-%m-%d} b", "a 1997-07-16 b") self.assertEqual(len(r.fixed), 1) - self.assertEqual(r[0], datetime(1997, 7, 16)) + self.assertEqual(r[0], date(1997, 7, 16)) r = parse.parse("a {:%Y-%b-%d} b", "a 1997-Feb-16 b") self.assertEqual(len(r.fixed), 1) - self.assertEqual(r[0], datetime(1997, 2, 16)) + self.assertEqual(r[0], date(1997, 2, 16)) r = parse.parse("a {:%Y-%b-%d} {:d} b", "a 1997-Feb-16 8 b") self.assertEqual(len(r.fixed), 2) - self.assertEqual(r[0], datetime(1997, 2, 16)) + self.assertEqual(r[0], date(1997, 2, 16)) r = parse.parse("a {my_date:%Y-%b-%d} {num:d} b", "a 1997-Feb-16 8 b") - self.assertEqual((r.named["my_date"]), datetime(1997, 2, 16)) + self.assertEqual((r.named["my_date"]), date(1997, 2, 16)) self.assertEqual((r.named["num"]), 8) r = parse.parse("a {:%Y-%B-%d} b", "a 1997-February-16 b") - self.assertEqual(r[0], datetime(1997, 2, 16)) + self.assertEqual(r[0], date(1997, 2, 16)) r = parse.parse("a {:%Y%m%d} b", "a 19970716 b") - self.assertEqual(r[0], datetime(1997, 7, 16)) + self.assertEqual(r[0], date(1997, 7, 16)) def test_flexible_datetime_with_colon(self): r = parse.parse("{dt:%Y-%m-%d %H:%M:%S}", "2023-11-21 13:23:27") @@ -503,14 +503,18 @@ def test_flexible_time_ms(self): def test_flexible_dates_single_digit(self): r = parse.parse("{dt:%Y/%m/%d}", "2023/1/1") - self.assertEqual(r.named["dt"], datetime(2023, 1, 1, 0, 0)) + self.assertEqual(r.named["dt"], date(2023, 1, 1)) def test_flexible_dates_j(self): r = parse.parse("{dt:%Y/%j}", "2023/9") - self.assertEqual(r.named["dt"], datetime(2023, 1, 9, 0, 0)) + self.assertEqual(r.named["dt"], date(2023, 1, 9)) r = parse.parse("{dt:%Y/%j}", "2023/009") - self.assertEqual(r.named["dt"], datetime(2023, 1, 9, 0, 0)) + self.assertEqual(r.named["dt"], date(2023, 1, 9)) + + def test_flexible_dates_year_current_year_inferred(self): + r = parse.parse("{dt:%j}", "9") + self.assertEqual(r.named["dt"], date(datetime.today().year, 1, 9)) def test_datetimes(self): def y(fmt, s, e, tz=None): From 44c97bbb61d3d58230ed76574994b6acdd5034bc Mon Sep 17 00:00:00 2001 From: wim glenn Date: Thu, 23 Nov 2023 12:57:46 -0600 Subject: [PATCH 23/24] doc update --- README.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index b142b58..9b6b9d9 100644 --- a/README.rst +++ b/README.rst @@ -207,10 +207,19 @@ tt Time time ===== =========================================== ======== The type can also be a datetime format string, following the -`1989 C standard format codes`_, e.g. %Y-%m-%d. Any type containing %Y -or %y will be parsed and output as a ``datetime.datetime``. Remaining -types containing %H, %I, %M, %S, or %f will be parsed and output as a -``datetime.time``. +`1989 C standard format codes`_, e.g. ``%Y-%m-%d``. Depending on the +directives contained in the format string, parsed output may be an instance +``datetime.datetime``, ``datetime.time``, or ``datetime.date``. + +.. code-block:: pycon + + >>> parse("{:%Y-%m-%d %H:%M:%S}", "2023-11-23 12:56:47") + + >>> parse("{:%H:%M}", "10:26") + + >>> parse("{:%Y/%m/%d}", "2023/11/25") + + Some examples of typed parsing with ``None`` returned if the typing does not match: From 58c998ba5daeb23b22de4458227ee63900e3dae9 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Fri, 24 Nov 2023 19:07:52 -0600 Subject: [PATCH 24/24] test roundtrip every directive --- tests/test_parse.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_parse.py b/tests/test_parse.py index f413100..3c5035a 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1170,6 +1170,14 @@ def test_int_convert_stateless_base(self): self.assertEqual(parser.parse("0b1011")[0], 0b1011) +def test_strftime_strptime_roundtrip(): + dt = datetime.now() + fmt = "_".join([k for k in parse.dt_format_to_regex if k != "%z"]) + s = dt.strftime(fmt) + [res] = parse.parse("{:" + fmt + "}", s) + assert res == dt + + if __name__ == "__main__": unittest.main()