Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality to parse datetimes according C standard format codes. #165

Merged
merged 28 commits into from
Nov 25, 2023
Merged
Changes from 3 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8237019
Add functionality to parse datetimes according C standard format codes.
bendichter Nov 9, 2023
f4b0cbe
use regex to dynamically create specific datetime regex paterns
bendichter Nov 9, 2023
c1bee76
change from f-string to str.format syntax
bendichter Nov 9, 2023
28b6336
remove copy import
bendichter Nov 9, 2023
30173ed
Merge branch 'og_master' into parse_flexible_dates
bendichter Nov 9, 2023
cd31fc2
allow for colons in time format
bendichter Nov 21, 2023
3d2ea99
allow for more flexible parsing of %z
bendichter Nov 21, 2023
32d83fb
shield Python 2 from timezone features
bendichter Nov 21, 2023
acdfeb0
add time parsing
bendichter Nov 21, 2023
5cc7111
uglify code with black
wimglenn Nov 22, 2023
f71aa7a
bump version
wimglenn Nov 22, 2023
72591f7
handle and test single digits for day and month
bendichter Nov 22, 2023
c3a4f32
Merge remote-tracking branch 'origin/parse_flexible_dates' into parse…
bendichter Nov 22, 2023
5210e18
remove %-j handling
wimglenn Nov 23, 2023
fb6d2c0
make j flexible number of digits
bendichter Nov 23, 2023
550f5e5
simplify and reorder, to match docs
wimglenn Nov 23, 2023
8771d3f
just use the map directly
wimglenn Nov 23, 2023
fb0c8d9
readability improvements
wimglenn Nov 23, 2023
77328ef
change "tc" to use generic datetime parsing approach
bendichter Nov 23, 2023
11342e1
Merge remote-tracking branch 'origin/parse_flexible_dates' into parse…
bendichter Nov 23, 2023
34edfc8
use new conv variable
bendichter Nov 23, 2023
2c5f905
revert to old logic for tc
bendichter Nov 23, 2023
6af64dd
blacken again
wimglenn Nov 23, 2023
b08ed21
blacken again sorry
wimglenn Nov 23, 2023
4b1ba61
new logic:
bendichter Nov 23, 2023
fd1a414
Merge remote-tracking branch 'origin/parse_flexible_dates' into parse…
bendichter Nov 23, 2023
44c97bb
doc update
wimglenn Nov 23, 2023
58c998b
test roundtrip every directive
wimglenn Nov 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -432,6 +432,7 @@ To run the tests locally:
Changelog
---------

- 1.20.0 Added support for strptime codes (thanks @bendichter)
- 1.19.1 Added support for sign specifiers in number formats (thanks @anntzer)
- 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen).
Also corrected matching of *full string* vs. *full line* (thanks @giladreti)
22 changes: 16 additions & 6 deletions parse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import

__version__ = "1.19.1"
__version__ = "1.20.0"

# yes, I now have two problems
import re
@@ -273,7 +273,6 @@ def date_convert(

dt_format_to_regex = {symbol: "[0-9]{2}" for symbol in "yMS"}
dt_format_to_regex.update({symbol: "[0-9]{1,2}" for symbol in "mdIUWH"})

dt_format_to_regex.update(
{
"a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)",
@@ -290,7 +289,9 @@ def date_convert(
)

# Compile a regular expression pattern that matches any date/time format symbol.
dt_format_symbols_re = re.compile('|'.join(re.escape("%{}".format(k)) for k in dt_format_to_regex.keys()))
dt_format_symbols_re = re.compile(
"|".join(re.escape("%{}".format(k)) for k in dt_format_to_regex)
)


def get_regex_for_datetime_format(format_):
@@ -304,7 +305,9 @@ def get_regex_for_datetime_format(format_):
str: A regex pattern corresponding to the datetime format string.
"""
# Replace all format symbols with their regex patterns.
return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)[1:]], format_)
return dt_format_symbols_re.sub(
lambda m: dt_format_to_regex[m.group(0)[1:]], format_
)


class TooManyFields(ValueError):
@@ -362,7 +365,12 @@ def extract_format(format, extra_types):

# the rest is the type, if present
type = format
if type and type not in ALLOWED_TYPES and type not in extra_types and not any("%" + x in type for x in "YyHIMSf"):
if (
type
and type not in ALLOWED_TYPES
and type not in extra_types
and not any("%" + x in type for x in "YyHIMSf")
):
raise ValueError("format spec %r not recognised" % type)

return locals()
@@ -710,7 +718,9 @@ def _handle_field(self, field):
if "%y" in type or "%Y" in type:
self._type_conversions[group] = lambda x, _: datetime.strptime(x, type)
else:
self._type_conversions[group] = lambda x, _: datetime.strptime(x, type).time()
self._type_conversions[group] = lambda x, _: datetime.strptime(
x, type
).time()
elif type == "ti":
s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT
n = self._group_index
28 changes: 17 additions & 11 deletions tests/test_parse.py
Original file line number Diff line number Diff line change
@@ -445,26 +445,26 @@ def test_two_datetimes(self):
self.assertEqual(r[1], datetime(2012, 8, 1))

def test_flexible_datetimes(self):
r = parse.parse('a {:%Y-%m-%d} b', "a 1997-07-16 b")
r = parse.parse("a {:%Y-%m-%d} b", "a 1997-07-16 b")
self.assertEqual(len(r.fixed), 1)
self.assertEqual(r[0], datetime(1997, 7, 16))

r = parse.parse('a {:%Y-%b-%d} b', "a 1997-Feb-16 b")
r = parse.parse("a {:%Y-%b-%d} b", "a 1997-Feb-16 b")
self.assertEqual(len(r.fixed), 1)
self.assertEqual(r[0], datetime(1997, 2, 16))

r = parse.parse('a {:%Y-%b-%d} {:d} b', "a 1997-Feb-16 8 b")
r = parse.parse("a {:%Y-%b-%d} {:d} b", "a 1997-Feb-16 8 b")
self.assertEqual(len(r.fixed), 2)
self.assertEqual(r[0], datetime(1997, 2, 16))

r = parse.parse('a {my_date:%Y-%b-%d} {num:d} b', "a 1997-Feb-16 8 b")
r = parse.parse("a {my_date:%Y-%b-%d} {num:d} b", "a 1997-Feb-16 8 b")
self.assertEqual((r.named["my_date"]), datetime(1997, 2, 16))
self.assertEqual((r.named["num"]), 8)

r = parse.parse('a {:%Y-%B-%d} b', "a 1997-February-16 b")
r = parse.parse("a {:%Y-%B-%d} b", "a 1997-February-16 b")
self.assertEqual(r[0], datetime(1997, 2, 16))

r = parse.parse('a {:%Y%m%d} b', "a 19970716 b")
r = parse.parse("a {:%Y%m%d} b", "a 19970716 b")
self.assertEqual(r[0], datetime(1997, 7, 16))

def test_flexible_datetime_with_colon(self):
@@ -474,25 +474,31 @@ def test_flexible_datetime_with_colon(self):
@unittest.skipIf(sys.version_info[0] < 3, "Python 3+ required for timezone support")
def test_flexible_datetime_with_timezone(self):
from datetime import timezone

r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +0000")
self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc))
self.assertEqual(
r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)
)

@unittest.skipIf(sys.version_info[0] < 3, "Python 3+ required for timezone support")
def test_flexible_datetime_with_timezone_that_has_colons(self):
from datetime import timezone

r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +00:00:00")
self.assertEqual(r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc))
self.assertEqual(
r.named["dt"], datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)
)

def test_flexible_time(self):
r = parse.parse('a {time:%H:%M:%S} b', "a 13:23:27 b")
r = parse.parse("a {time:%H:%M:%S} b", "a 13:23:27 b")
self.assertEqual(r.named["time"], time(13, 23, 27))

def test_flexible_time_no_hour(self):
r = parse.parse('a {time:%M:%S} b', "a 23:27 b")
r = parse.parse("a {time:%M:%S} b", "a 23:27 b")
self.assertEqual(r.named["time"], time(0, 23, 27))

def test_flexible_time_ms(self):
r = parse.parse('a {time:%M:%S:%f} b', "a 23:27:123456 b")
r = parse.parse("a {time:%M:%S:%f} b", "a 23:27:123456 b")
self.assertEqual(r.named["time"], time(0, 23, 27, 123456))

def test_flexible_dates_single_digit(self):