Skip to content

Commit

Permalink
Merge pull request #157 from ohadmata/add-static-date-formats
Browse files Browse the repository at this point in the history
include static patterns in timestamp + added formats
  • Loading branch information
ohadmata authored Jun 27, 2024
2 parents 9828a5b + 22a9540 commit 52e3f00
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
15 changes: 12 additions & 3 deletions src/shmessy/types/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
class DateType(BaseType):
weight = 2
delimiters: list[str] = {"/", ".", "-", " "}
static_patterns: list[str] = ["%B %d, %Y"] # January 23, 2024
static_patterns: list[str] = [
"%B %d, %Y", # January 23, 2024
"%b %d, %Y", # Feb 17, 1995
"%d %b %y, %a", # 10 Apr 24, Wed
]
date_only_patterns: list[list[str]] = [
# Do not attach time combinations to these patterns
["%Y", "%m"], # 2022-07 | 2022 07 | 2022/07 | 2022.07
Expand All @@ -38,7 +42,9 @@ class DateType(BaseType):

@classmethod
def get_patterns(
cls, include_date_only_patterns: Optional[bool] = True
cls,
include_date_only_patterns: Optional[bool] = True,
include_static_date_patterns: Optional[bool] = True,
) -> list[str]:
# The value returned cannot be set since the order is important!
input_patterns: list[list[str]] = cls.dynamic_patterns.copy()
Expand All @@ -49,7 +55,10 @@ def get_patterns(
for pattern in input_patterns:
for delimiter in cls.delimiters:
results.append(delimiter.join(pattern))
return results + cls.static_patterns

if include_static_date_patterns:
return results + cls.static_patterns
return results

def validate(self, data: ndarray) -> Optional[InferredField]:
return validate(
Expand Down
5 changes: 4 additions & 1 deletion src/shmessy/types/datetime_.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class DatetimeType(BaseType):
"%Y-%m-%d %H:%M:%S.%fZ", # 2022-12-30 00:00:00.000Z
"%Y-%m-%dT%H:%M:%S.%fZ", # 2022-12-30T00:00:00.000Z
"%m/%d/%Y %H:%M:%S %p", # 11/23/2024 00:00:00 AM
"%Y-%m-%dT%H:%MZ", # 2024-01-20T11:30Z
]
date_time_delimiters: set[str] = {" ", "T"}
dynamic_patterns: list[str] = [
Expand All @@ -34,7 +35,9 @@ class DatetimeType(BaseType):
@classmethod
def get_patterns(cls) -> list[str]:
result: list[str] = []
for date in DateType.get_patterns(include_date_only_patterns=False):
for date in DateType.get_patterns(
include_date_only_patterns=False, include_static_date_patterns=False
):
for date_time_delimiter in cls.date_time_delimiters:
for dynamic_pattern in cls.dynamic_patterns:
result.append(date + date_time_delimiter + dynamic_pattern)
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/test_date_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,14 @@ def test_get_patterns_with_date_only():
for p in date_only_patterns:
assert p in result_all_patterns
assert p not in result_date_only_patterns


def test_include_date_static_patterns():
input_static_patterns: list[str] = ["%B %d, %Y"]
date_type = DateType()
result_all_patterns = date_type.get_patterns()
result_without_static_patterns = date_type.get_patterns(include_static_date_patterns=False)

for p in input_static_patterns:
assert p in result_all_patterns
assert p not in result_without_static_patterns

0 comments on commit 52e3f00

Please sign in to comment.