-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
WIP warn if parsing with du_parse #47828
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8e37314
41eed78
bbc0b5f
25fdf66
c8f281f
62ce174
fa53a4e
e0c0822
0f0ed12
040f080
abea3e4
dc6f7e4
0c24d14
98d0b20
317954a
6ecd4fa
b639c09
903c14e
7f04dc0
4eb9a75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -263,6 +263,19 @@ cdef inline bint does_string_look_like_time(str parse_string): | |
|
||
return 0 <= hour <= 23 and 0 <= minute <= 59 | ||
|
||
from pandas.util._exceptions import find_stack_level | ||
import inspect | ||
|
||
|
||
def du_parse_with_warning(*args, **kwargs): | ||
parsed = du_parse(*args, **kwargs) | ||
warnings.warn( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we take a perf hit here? |
||
"Parsing datetime strings without a format specified, " | ||
"please specify a format to avoid unexpected results", | ||
stacklevel=find_stack_level(inspect.currentframe()), | ||
) | ||
return parsed | ||
|
||
|
||
def parse_datetime_string( | ||
# NB: This will break with np.str_ (GH#32264) even though | ||
|
@@ -290,8 +303,12 @@ def parse_datetime_string( | |
|
||
if does_string_look_like_time(date_string): | ||
# use current datetime as default, not pass _DEFAULT_DATETIME | ||
dt = du_parse(date_string, dayfirst=dayfirst, | ||
yearfirst=yearfirst, **kwargs) | ||
dt = du_parse_with_warning( | ||
date_string, | ||
dayfirst=dayfirst, | ||
yearfirst=yearfirst, | ||
**kwargs, | ||
) | ||
return dt | ||
|
||
dt, _ = _parse_delimited_date(date_string, dayfirst) | ||
|
@@ -307,8 +324,13 @@ def parse_datetime_string( | |
pass | ||
|
||
try: | ||
dt = du_parse(date_string, default=_DEFAULT_DATETIME, | ||
dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) | ||
dt = du_parse_with_warning( | ||
date_string, | ||
default=_DEFAULT_DATETIME, | ||
dayfirst=dayfirst, | ||
yearfirst=yearfirst, | ||
**kwargs, | ||
) | ||
except TypeError: | ||
# following may be raised from dateutil | ||
# TypeError: 'NoneType' object is not iterable | ||
|
@@ -706,7 +728,11 @@ def try_parse_dates( | |
date = datetime.now() | ||
default = datetime(date.year, date.month, 1) | ||
|
||
parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) | ||
parse_date = lambda x: du_parse_with_warning( | ||
x, | ||
dayfirst=dayfirst, | ||
default=default, | ||
) | ||
|
||
# EAFP here | ||
try: | ||
|
@@ -753,13 +779,17 @@ def try_parse_date_and_time( | |
date = datetime.now() | ||
default = datetime(date.year, date.month, 1) | ||
|
||
parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) | ||
parse_date = lambda x: du_parse_with_warning( | ||
x, | ||
dayfirst=dayfirst, | ||
default=default, | ||
) | ||
|
||
else: | ||
parse_date = date_parser | ||
|
||
if time_parser is None: | ||
parse_time = lambda x: du_parse(x) | ||
parse_time = lambda x: du_parse_with_warning(x) | ||
|
||
else: | ||
parse_time = time_parser | ||
|
@@ -980,7 +1010,7 @@ def guess_datetime_format(dt_str, bint dayfirst=False): | |
datetime_attrs_to_format.insert(0, day_attribute_and_format) | ||
|
||
try: | ||
parsed_datetime = du_parse(dt_str, dayfirst=dayfirst) | ||
parsed_datetime = du_parse_with_warning(dt_str, dayfirst=dayfirst) | ||
except (ValueError, OverflowError): | ||
# In case the datetime can't be parsed, its format cannot be guessed | ||
return None | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ class TestABCClasses: | |
multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) | ||
datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) | ||
timedelta_index = pd.to_timedelta(np.arange(5), unit="s") | ||
period_index = pd.period_range("2000/1/1", "2010/1/1/", freq="M") | ||
period_index = pd.period_range("1/1/2000", "1/1/2010", freq="M") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
don't know why, need to investigate |
||
categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) | ||
categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) | ||
df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is there an option that avoids importing from outside libs here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yup, could set it explicitly each time - I'll do that once I've sorted out the tests