Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

include static patterns in timestamp + added formats #157

Merged
merged 1 commit into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/shmessy/types/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
class DateType(BaseType):
weight = 2
delimiters: list[str] = {"/", ".", "-", " "}
static_patterns: list[str] = ["%B %d, %Y"] # January 23, 2024
static_patterns: list[str] = [
"%B %d, %Y", # January 23, 2024
"%b %d, %Y", # Feb 17, 1995
"%d %b %y, %a", # 10 Apr 24, Wed
]
date_only_patterns: list[list[str]] = [
# Do not attach time combinations to these patterns
["%Y", "%m"], # 2022-07 | 2022 07 | 2022/07 | 2022.07
Expand All @@ -38,7 +42,9 @@ class DateType(BaseType):

@classmethod
def get_patterns(
cls, include_date_only_patterns: Optional[bool] = True
cls,
include_date_only_patterns: Optional[bool] = True,
include_static_date_patterns: Optional[bool] = True,
) -> list[str]:
# The value returned cannot be set since the order is important!
input_patterns: list[list[str]] = cls.dynamic_patterns.copy()
Expand All @@ -49,7 +55,10 @@ def get_patterns(
for pattern in input_patterns:
for delimiter in cls.delimiters:
results.append(delimiter.join(pattern))
return results + cls.static_patterns

if include_static_date_patterns:
return results + cls.static_patterns
return results

def validate(self, data: ndarray) -> Optional[InferredField]:
return validate(
Expand Down
5 changes: 4 additions & 1 deletion src/shmessy/types/datetime_.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class DatetimeType(BaseType):
"%Y-%m-%d %H:%M:%S.%fZ", # 2022-12-30 00:00:00.000Z
"%Y-%m-%dT%H:%M:%S.%fZ", # 2022-12-30T00:00:00.000Z
"%m/%d/%Y %H:%M:%S %p", # 11/23/2024 00:00:00 AM
"%Y-%m-%dT%H:%MZ", # 2024-01-20T11:30Z
]
date_time_delimiters: set[str] = {" ", "T"}
dynamic_patterns: list[str] = [
Expand All @@ -34,7 +35,9 @@ class DatetimeType(BaseType):
@classmethod
def get_patterns(cls) -> list[str]:
result: list[str] = []
for date in DateType.get_patterns(include_date_only_patterns=False):
for date in DateType.get_patterns(
include_date_only_patterns=False, include_static_date_patterns=False
):
for date_time_delimiter in cls.date_time_delimiters:
for dynamic_pattern in cls.dynamic_patterns:
result.append(date + date_time_delimiter + dynamic_pattern)
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/test_date_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,14 @@ def test_get_patterns_with_date_only():
for p in date_only_patterns:
assert p in result_all_patterns
assert p not in result_date_only_patterns


def test_include_date_static_patterns():
input_static_patterns: list[str] = ["%B %d, %Y"]
date_type = DateType()
result_all_patterns = date_type.get_patterns()
result_without_static_patterns = date_type.get_patterns(include_static_date_patterns=False)

for p in input_static_patterns:
assert p in result_all_patterns
assert p not in result_without_static_patterns
Loading