From 8e37314137391c3b225034d28f69b1ae8ce87991 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 18:51:18 +0200 Subject: [PATCH 01/18] add missing return --- doc/source/user_guide/timedeltas.rst | 2 + pandas/_libs/tslibs/parsing.pyx | 45 +++- .../tests/frame/methods/test_reset_index.py | 23 +-- pandas/tests/io/parser/test_parse_dates.py | 195 +++++++++++++----- pandas/tests/io/test_date_converters.py | 2 +- pandas/tests/scalar/period/test_period.py | 124 ++++++++--- pandas/tests/tslibs/test_array_to_datetime.py | 47 +++-- pandas/tests/tslibs/test_parsing.py | 46 +++-- 8 files changed, 357 insertions(+), 127 deletions(-) diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 180de1df53f9e..b0e0d72d05f88 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -424,6 +424,7 @@ Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodI Selections work similarly, with coercion on string-likes and slices: .. ipython:: python + :okwarning: s["1 day":"2 day"] s["1 day 01:00:00"] @@ -432,6 +433,7 @@ Selections work similarly, with coercion on string-likes and slices: Furthermore you can use partial string selection and the range will be inferred: .. ipython:: python + :okwarning: s["1 day":"1 day 5 hours"] diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 97a8f81094a8f..0141454feae03 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -263,6 +263,18 @@ cdef inline bint does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 +from pandas.util._exceptions import find_stack_level + + +def du_parse_with_warning(*args, **kwargs): + parsed = du_parse(*args, **kwargs) + warnings.warn( + "Parsing datetime strings without a format specified, " + "please specify a format to avoid unexpected results", + stacklevel=find_stack_level(), + ) + return parsed + def parse_datetime_string( # NB: This will break with np.str_ (GH#32264) even though @@ -290,8 +302,12 @@ def parse_datetime_string( if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME - dt = du_parse(date_string, dayfirst=dayfirst, - yearfirst=yearfirst, **kwargs) + dt = du_parse_with_warning( + date_string, + dayfirst=dayfirst, + yearfirst=yearfirst, + **kwargs, + ) return dt dt, _ = _parse_delimited_date(date_string, dayfirst) @@ -307,8 +323,13 @@ def parse_datetime_string( pass try: - dt = du_parse(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + dt = du_parse_with_warning( + date_string, + default=_DEFAULT_DATETIME, + dayfirst=dayfirst, + yearfirst=yearfirst, + **kwargs, + ) except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable @@ -706,7 +727,11 @@ def try_parse_dates( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + parse_date = lambda x: du_parse_with_warning( + x, + dayfirst=dayfirst, + default=default, + ) # EAFP here try: @@ -753,13 +778,17 @@ def try_parse_date_and_time( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + parse_date = lambda x: du_parse_with_warning( + x, + dayfirst=dayfirst, + default=default, + ) else: parse_date = date_parser if time_parser is None: - parse_time = lambda x: du_parse(x) + parse_time = lambda x: du_parse_with_warning(x) else: parse_time = time_parser @@ -980,7 +1009,7 @@ def guess_datetime_format(dt_str, bint dayfirst=False): datetime_attrs_to_format.insert(0, day_attribute_and_format) try: - parsed_datetime = du_parse(dt_str, dayfirst=dayfirst) + parsed_datetime = du_parse_with_warning(dt_str, dayfirst=dayfirst) except (ValueError, OverflowError): # In case the datetime can't be parsed, its format cannot be guessed return None diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index bd168e4f14558..b4198b589deae 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -337,24 +337,19 @@ def test_reset_index_multiindex_nan(self): tm.assert_frame_equal(rs, df) @pytest.mark.parametrize( - "name", + "name, warn", [ - None, - "foo", - 2, - 3.0, - pd.Timedelta(6), - Timestamp("2012-12-30", tz="UTC"), - "2012-12-31", + (None, UserWarning), + ("foo", UserWarning), + (2, None), + (3.0, None), + (pd.Timedelta(6), None), + (Timestamp("2012-12-30", tz="UTC"), FutureWarning), + ("2012-12-31", None), ], ) - def test_reset_index_with_datetimeindex_cols(self, name): + def test_reset_index_with_datetimeindex_cols(self, name, warn): # GH#5818 - warn = None - if isinstance(name, Timestamp) and name.tz is not None: - # _deprecate_mismatched_indexing - warn = FutureWarning - df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 5d2e5bccd9762..366720a2d98be 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -129,7 +129,9 @@ def test_separator_date_conflict(all_parsers): [[datetime(2013, 6, 2, 13, 0, 0), 1000.215]], columns=["Date", 2] ) - df = parser.read_csv( + df = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", StringIO(data), sep=";", thousands="-", @@ -175,9 +177,10 @@ def date_parser(*date_cols): "keep_date_col": keep_date_col, } result = parser.read_csv_check_warnings( - FutureWarning, + (FutureWarning, UserWarning), "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", + "and will be removed in a future version. .*\n\n" + "|Parsing datetime strings without a format specified", StringIO(data), **kwds, ) @@ -319,9 +322,10 @@ def test_multiple_date_col(all_parsers, keep_date_col): "keep_date_col": keep_date_col, } result = parser.read_csv_check_warnings( - FutureWarning, + (FutureWarning, UserWarning), "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", + "and will be removed in a future version. .*\n\n" + "|Parsing datetime strings without a format specified", StringIO(data), **kwds, ) @@ -499,9 +503,10 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): "date_parser": date_parser, } result = parser.read_csv_check_warnings( - FutureWarning, + (FutureWarning, UserWarning), "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", + "and will be removed in a future version. .*\n\n" + "|Parsing datetime strings without a format specified", StringIO(data), **kwds, ) @@ -551,13 +556,16 @@ def test_multiple_date_col_timestamp_parse(all_parsers): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" - result = parser.read_csv( - StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp - ) + with tm.assert_produces_warning( + UserWarning, match=r"Parsing datetime strings without a format specified" + ): + result = parser.read_csv( + StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp + ) expected = DataFrame( [ [ - Timestamp("05/31/2012, 15:30:00.029"), + Timestamp(2012, 5, 31, 15, 30, 0, 29000), 1306.25, 1, "E", @@ -566,7 +574,7 @@ def test_multiple_date_col_timestamp_parse(all_parsers): 1306.25, ], [ - Timestamp("05/31/2012, 15:30:00.029"), + Timestamp(2012, 5, 31, 15, 30, 0, 29000), 1306.25, 8, "E", @@ -592,7 +600,12 @@ def test_multiple_date_cols_with_header(all_parsers): KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" - result = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]}) + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", + StringIO(data), + parse_dates={"nominal": [1, 2]}, + ) expected = DataFrame( [ [ @@ -695,7 +708,12 @@ def test_multiple_date_col_name_collision(all_parsers, data, parse_dates, msg): parser = all_parsers with pytest.raises(ValueError, match=msg): - parser.read_csv(StringIO(data), parse_dates=parse_dates) + parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", + StringIO(data), + parse_dates=parse_dates, + ) def test_date_parser_int_bug(all_parsers): @@ -899,7 +917,10 @@ def test_multi_index_parse_dates(all_parsers, index_col): columns=["A", "B", "C"], index=index, ) - result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True) + with tm.assert_produces_warning( + UserWarning, match="Parsing datetime strings without a format specified" + ): + result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True) tm.assert_frame_equal(result, expected) @@ -1058,8 +1079,12 @@ def test_multiple_date_cols_index(all_parsers, parse_dates, index_col): if not isinstance(parse_dates, dict): expected.index.name = "date_NominalTime" - result = parser.read_csv( - StringIO(data), parse_dates=parse_dates, index_col=index_col + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", + StringIO(data), + parse_dates=parse_dates, + index_col=index_col, ) tm.assert_frame_equal(result, expected) @@ -1143,12 +1168,12 @@ def test_multiple_date_cols_chunked(all_parsers): columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"], ) expected = expected.set_index("nominal") - with parser.read_csv( StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal", chunksize=2, + date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) as reader: chunks = list(reader) @@ -1171,12 +1196,16 @@ def test_multiple_date_col_named_index_compat(all_parsers): """ with_indices = parser.read_csv( - StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal" + StringIO(data), + parse_dates={"nominal": [1, 2]}, + index_col="nominal", + date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) with_names = parser.read_csv( StringIO(data), index_col="nominal", parse_dates={"nominal": ["date", "nominalTime"]}, + date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) tm.assert_frame_equal(with_indices, with_names) @@ -1194,9 +1223,16 @@ def test_multiple_date_col_multiple_index_compat(all_parsers): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 """ result = parser.read_csv( - StringIO(data), index_col=["nominal", "ID"], parse_dates={"nominal": [1, 2]} + StringIO(data), + index_col=["nominal", "ID"], + parse_dates={"nominal": [1, 2]}, + date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), + ) + expected = parser.read_csv( + StringIO(data), + parse_dates={"nominal": [1, 2]}, + date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) - expected = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]}) expected = expected.set_index(["nominal", "ID"]) tm.assert_frame_equal(result, expected) @@ -1232,21 +1268,38 @@ def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates): @pytest.mark.parametrize("cache_dates", [True, False]) -@pytest.mark.parametrize("value", ["nan", "0", ""]) -def test_bad_date_parse(all_parsers, cache_dates, value): +@pytest.mark.parametrize( + "value, warning", + [ + ( + "nan", + (None, ""), + ), + ( + "0", + (UserWarning, "Parsing datetime strings without a format specified"), + ), + ( + "", + (None, ""), + ), + ], +) +def test_bad_date_parse(all_parsers, cache_dates, value, warning): # if we have an invalid date make sure that we handle this with # and w/o the cache properly parser = all_parsers s = StringIO((f"{value},\n") * 50000) - parser.read_csv( - s, - header=None, - names=["foo", "bar"], - parse_dates=["foo"], - infer_datetime_format=False, - cache_dates=cache_dates, - ) + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + parser.read_csv( + s, + header=None, + names=["foo", "bar"], + parse_dates=["foo"], + infer_datetime_format=False, + cache_dates=cache_dates, + ) @xfail_pyarrow @@ -1305,7 +1358,10 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected): @xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", - ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), + ( + [conv.parse_date_time, (FutureWarning, UserWarning)], + [pd.to_datetime, UserWarning], + ), ) def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warning): data = """\ @@ -1334,7 +1390,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni @xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", - ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), + ( + [conv.parse_date_time, (FutureWarning, UserWarning)], + [pd.to_datetime, UserWarning], + ), ) @pytest.mark.parametrize( "data,kwargs,expected", @@ -1567,7 +1626,12 @@ def test_parse_date_column_with_empty_string(all_parsers): # see gh-6428 parser = all_parsers data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, " - result = parser.read_csv(StringIO(data), parse_dates=["opdate"]) + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", + StringIO(data), + parse_dates=["opdate"], + ) expected_data = [[7, "10/18/2006"], [7, "10/18/2008"], [621, " "]] expected = DataFrame(expected_data, columns=["case", "opdate"]) @@ -1629,13 +1693,34 @@ def test_parse_timezone(all_parsers): @skip_pyarrow @pytest.mark.parametrize( - "date_string", - ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], + "date_string, warning, msg", + [ + ("32/32/2019", None, ""), + ("02/30/2019", None, ""), + ("13/13/2019", None, ""), + ("13/2019", None, ""), + ( + "a3/11/2018", + UserWarning, + "Parsing datetime strings without a format specified", + ), + ( + "10/11/2o17", + UserWarning, + "Parsing datetime strings without a format specified", + ), + ], ) -def test_invalid_parse_delimited_date(all_parsers, date_string): +def test_invalid_parse_delimited_date(all_parsers, date_string, warning, msg): parser = all_parsers expected = DataFrame({0: [date_string]}, dtype="object") - result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0]) + result = parser.read_csv_check_warnings( + warning, + msg, + StringIO(date_string), + header=None, + parse_dates=[0], + ) tm.assert_frame_equal(result, expected) @@ -1781,7 +1866,13 @@ def test_date_parser_and_names(all_parsers): # GH#33699 parser = all_parsers data = StringIO("""x,y\n1,2""") - result = parser.read_csv(data, parse_dates=["B"], names=["B"]) + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", + data, + parse_dates=["B"], + names=["B"], + ) expected = DataFrame({"B": ["y", "2"]}, index=["x", "1"]) tm.assert_frame_equal(result, expected) @@ -1810,7 +1901,9 @@ def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, co data = """a,b,c 1,2,3 2019-12,-31,6""" - result = parser.read_csv( + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", StringIO(data), parse_dates=parse_spec, header=[0, 1], @@ -1828,13 +1921,15 @@ def test_date_parser_usecols_thousands(all_parsers): """ parser = all_parsers - result = parser.read_csv( + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", StringIO(data), parse_dates=[1], usecols=[1, 2], thousands="-", ) - expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) + expected = DataFrame({"B": [3, 4], "C": [Timestamp(2001, 9, 20, 1)] * 2}) tm.assert_frame_equal(result, expected) @@ -1898,12 +1993,16 @@ def test_dayfirst_warnings(): # D. infer_datetime_format=True overrides dayfirst default # no warning + correct result - res4 = read_csv( - StringIO(input), - parse_dates=["date"], - infer_datetime_format=True, - index_col="date", - ).index + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + res4 = read_csv( + StringIO(input), + parse_dates=["date"], + infer_datetime_format=True, + index_col="date", + ).index tm.assert_index_equal(expected_consistent, res4) # CASE 2: invalid input diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py index a9fa27e091714..8a59a8fb86358 100644 --- a/pandas/tests/io/test_date_converters.py +++ b/pandas/tests/io/test_date_converters.py @@ -12,7 +12,7 @@ def test_parse_date_time(): dates = np.array(["2007/1/3", "2008/2/4"], dtype=object) times = np.array(["05:07:09", "06:08:00"], dtype=object) expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): result = conv.parse_date_time(dates, times) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 20d6b9e77a034..0251d688fc91c 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -51,8 +51,12 @@ def test_from_td64nat_raises(self): Period(td, freq="D") def test_construction(self): - i1 = Period("1/1/2005", freq="M") - i2 = Period("Jan 2005") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i1 = Period("1/1/2005", freq="M") + i2 = Period("Jan 2005") assert i1 == i2 @@ -81,7 +85,11 @@ def test_construction(self): assert i1 == i2 i1 = Period(year=2005, month=3, day=1, freq="D") - i2 = Period("3/1/2005", freq="D") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("3/1/2005", freq="D") assert i1 == i2 i3 = Period(year=2005, month=3, day=1, freq="d") @@ -131,29 +139,57 @@ def test_construction_from_timestamp_nanos(self): def test_construction_bday(self): # Biz day construction, roll forward if non-weekday - i1 = Period("3/10/12", freq="B") - i2 = Period("3/10/12", freq="D") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i1 = Period("3/10/12", freq="B") + i2 = Period("3/10/12", freq="D") assert i1 == i2.asfreq("B") - i2 = Period("3/11/12", freq="D") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("3/11/12", freq="D") assert i1 == i2.asfreq("B") - i2 = Period("3/12/12", freq="D") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("3/12/12", freq="D") assert i1 == i2.asfreq("B") - i3 = Period("3/10/12", freq="b") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i3 = Period("3/10/12", freq="b") assert i1 == i3 i1 = Period(year=2012, month=3, day=10, freq="B") - i2 = Period("3/12/12", freq="B") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("3/12/12", freq="B") assert i1 == i2 def test_construction_quarter(self): i1 = Period(year=2005, quarter=1, freq="Q") - i2 = Period("1/1/2005", freq="Q") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("1/1/2005", freq="Q") assert i1 == i2 i1 = Period(year=2005, quarter=3, freq="Q") - i2 = Period("9/1/2005", freq="Q") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("9/1/2005", freq="Q") assert i1 == i2 i1 = Period("2005Q1") @@ -185,20 +221,36 @@ def test_construction_quarter(self): def test_construction_month(self): expected = Period("2007-01", freq="M") - i1 = Period("200701", freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i1 = Period("200701", freq="M") assert i1 == expected - i1 = Period("200701", freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i1 = Period("200701", freq="M") assert i1 == expected - i1 = Period(200701, freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i1 = Period(200701, freq="M") assert i1 == expected i1 = Period(ordinal=200701, freq="M") assert i1.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") - i2 = Period("200701", freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("200701", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") @@ -212,15 +264,27 @@ def test_construction_month(self): assert i1 == i5 def test_period_constructor_offsets(self): - assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( - "1/1/2005", freq="M" - ) + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( + "1/1/2005", freq="M" + ) assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") - assert Period("3/10/12", freq=offsets.BusinessDay()) == Period( - "3/10/12", freq="B" - ) - assert Period("3/10/12", freq=offsets.Day()) == Period("3/10/12", freq="D") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + assert Period("3/10/12", freq=offsets.BusinessDay()) == Period( + "3/10/12", freq="B" + ) + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + assert Period("3/10/12", freq=offsets.Day()) == Period("3/10/12", freq="D") assert Period( year=2005, quarter=1, freq=offsets.QuarterEnd(startingMonth=12) @@ -244,7 +308,11 @@ def test_period_constructor_offsets(self): year=2012, month=3, day=10, freq="3B" ) - assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) i2 = Period(ordinal=200701, freq="M") @@ -253,7 +321,11 @@ def test_period_constructor_offsets(self): assert i2.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") - i2 = Period("200701", freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("200701", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") @@ -580,8 +652,8 @@ def test_hash(self): def test_to_timestamp_tz_arg(self, tzstr): # GH#34522 tz kwarg deprecated with tm.assert_produces_warning(FutureWarning): - p = Period("1/1/2005", freq="M").to_timestamp(tz=tzstr) - exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + p = Period("01/01/2005", freq="M").to_timestamp(tz=tzstr) + exp = Timestamp("01/01/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) assert p == exp diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 64a45f6507810..abde1175f358b 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize( - "data,expected", + "data,expected,warning,message", [ ( ["01-01-2013", "01-02-2013"], @@ -26,6 +26,8 @@ "2013-01-01T00:00:00.000000000", "2013-01-02T00:00:00.000000000", ], + None, + "", ), ( ["Mon Sep 16 2013", "Tue Sep 17 2013"], @@ -33,34 +35,39 @@ "2013-09-16T00:00:00.000000000", "2013-09-17T00:00:00.000000000", ], + UserWarning, + "without a format specified", ), ], ) -def test_parsing_valid_dates(data, expected): +def test_parsing_valid_dates(data, expected, warning, message): arr = np.array(data, dtype=object) - result, _ = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(warning, match=message): + result, _ = tslib.array_to_datetime(arr) expected = np.array(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( - "dt_string, expected_tz", + "dt_string, expected_tz, warning, message", [ - ["01-01-2013 08:00:00+08:00", 480], - ["2013-01-01T08:00:00.000000000+0800", 480], - ["2012-12-31T16:00:00.000000000-0800", -480], - ["12-31-2012 23:00:00-01:00", -60], + ["01-01-2013 08:00:00+08:00", 480, UserWarning, "without a format specified"], + ["2013-01-01T08:00:00.000000000+0800", 480, None, ""], + ["2012-12-31T16:00:00.000000000-0800", -480, None, ""], + ["12-31-2012 23:00:00-01:00", -60, UserWarning, "without a format specified"], ], ) -def test_parsing_timezone_offsets(dt_string, expected_tz): +def test_parsing_timezone_offsets(dt_string, expected_tz, warning, message): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added. arr = np.array(["01-01-2013 00:00:00"], dtype=object) - expected, _ = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(UserWarning, match="without a format specified"): + expected, _ = tslib.array_to_datetime(arr) - arr = np.array([dt_string], dtype=object) - result, result_tz = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(warning, match=message): + arr = np.array([dt_string], dtype=object) + result, result_tz = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected) assert result_tz is pytz.FixedOffset(expected_tz) @@ -70,7 +77,8 @@ def test_parsing_non_iso_timezone_offset(): dt_string = "01-01-2013T00:00:00.000000000+0000" arr = np.array([dt_string], dtype=object) - result, result_tz = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(UserWarning, match="without a format specified"): + result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")]) tm.assert_numpy_array_equal(result, expected) @@ -82,7 +90,8 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - result, result_tz = tslib.array_to_datetime(data) + with tm.assert_produces_warning(UserWarning, match="without a format specified"): + result, result_tz = tslib.array_to_datetime(data) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), @@ -154,11 +163,17 @@ def test_coerce_of_invalid_datetimes(errors): if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. - result, _ = tslib.array_to_datetime(**kwargs) + with tm.assert_produces_warning( + UserWarning, match="without a format specified" + ): + result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT - result, _ = tslib.array_to_datetime(arr, errors="coerce") + with tm.assert_produces_warning( + UserWarning, match="without a format specified" + ): + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 4dae6c586e306..f1bbccebb6632 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -185,7 +185,8 @@ def test_guess_datetime_format_with_parseable_formats(string, fmt): @pytest.mark.parametrize("dayfirst,expected", [(True, "%d/%m/%Y"), (False, "%m/%d/%Y")]) def test_guess_datetime_format_with_dayfirst(dayfirst, expected): ambiguous_string = "01/01/2011" - result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) + with tm.assert_produces_warning(UserWarning, match="without a format specified"): + result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) assert result == expected @@ -204,22 +205,31 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): @pytest.mark.parametrize( - "invalid_dt", + "invalid_dt, warning, message", [ - "2013", - "01/2013", - "12:00:00", - "1/1/1/1", - "this_is_not_a_datetime", - "51a", - 9, - datetime(2011, 1, 1), + ("2013", UserWarning, "Parsing datetime strings without a format specified"), + ("01/2013", UserWarning, "Parsing datetime strings without a format specified"), + ( + "12:00:00", + UserWarning, + "Parsing datetime strings without a format specified", + ), + ("1/1/1/1", UserWarning, "Parsing datetime strings without a format specified"), + ( + "this_is_not_a_datetime", + UserWarning, + "Parsing datetime strings without a format specified", + ), + ("51a", UserWarning, "Parsing datetime strings without a format specified"), + (9, None, ""), + (datetime(2011, 1, 1), None, ""), ], ) -def test_guess_datetime_format_invalid_inputs(invalid_dt): +def test_guess_datetime_format_invalid_inputs(invalid_dt, warning, message): # A datetime string must include a year, month and a day for it to be # guessable, in addition to being a string that looks like a datetime. - assert parsing.guess_datetime_format(invalid_dt) is None + with tm.assert_produces_warning(warning, match=message): + assert parsing.guess_datetime_format(invalid_dt) is None @pytest.mark.parametrize( @@ -235,13 +245,21 @@ def test_guess_datetime_format_invalid_inputs(invalid_dt): ) def test_guess_datetime_format_no_padding(string, fmt): # see gh-11142 - result = parsing.guess_datetime_format(string) + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + result = parsing.guess_datetime_format(string) assert result == fmt def test_try_parse_dates(): arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) - result = parsing.try_parse_dates(arr, dayfirst=True) + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + result = parsing.try_parse_dates(arr, dayfirst=True) expected = np.array([parse(d, dayfirst=True) for d in arr]) tm.assert_numpy_array_equal(result, expected) From 41eed78046e534f47e626f108dd416dd0c8cd67a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 19:26:03 +0200 Subject: [PATCH 02/18] fixup --- pandas/tests/dtypes/test_generic.py | 4 ++-- pandas/tests/groupby/test_quantile.py | 4 ++-- pandas/tests/io/parser/test_parse_dates.py | 13 +++++-------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 4f73754d2708f..c6aeff809d6f1 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,9 +18,9 @@ class TestABCClasses: tuples = [[1, 2, 2], ["red", "blue", "red"]] multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) - datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) + datetime_index = pd.to_datetime(["1/1/2000", "1/1/2010"]) timedelta_index = pd.to_timedelta(np.arange(5), unit="s") - period_index = pd.period_range("2000/1/1", "2010/1/1/", freq="M") + period_index = pd.period_range("1/1/2000", "1/1/2010", freq="M") categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 2b7e71d9619a4..d583c9f28d3ac 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -28,8 +28,8 @@ ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]), # Timestamps ( - list(pd.date_range("1/1/18", freq="D", periods=5)), - list(pd.date_range("1/1/18", freq="D", periods=5))[::-1], + list(pd.date_range("1/1/2018", freq="D", periods=5)), + list(pd.date_range("1/1/2018", freq="D", periods=5))[::-1], ), # All NA ([np.nan] * 5, [np.nan] * 5), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 366720a2d98be..3d8a0961ec923 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -773,7 +773,7 @@ def test_nat_parse(all_parsers): # see gh-3062 parser = all_parsers df = DataFrame( - dict({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")}) + dict({"A": np.arange(10, dtype="float64"), "B": Timestamp("2001-01-01")}) ) df.iloc[3:6, :] = np.nan @@ -1604,7 +1604,9 @@ def date_parser(dt, time): arr = [datetime.combine(d, t) for d, t in zip(dt, time)] return np.array(arr, dtype="datetime64[s]") - result = parser.read_csv( + result = parser.read_csv_with_warnings( + UserWarning, + "without a format specified", StringIO(data), date_parser=date_parser, parse_dates={"datetime": ["date", "time"]}, @@ -1626,12 +1628,7 @@ def test_parse_date_column_with_empty_string(all_parsers): # see gh-6428 parser = all_parsers data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, " - result = parser.read_csv_check_warnings( - UserWarning, - "Parsing datetime strings without a format specified", - StringIO(data), - parse_dates=["opdate"], - ) + result = parser.read_csv(StringIO(data), parse_dates=["opdate"]) expected_data = [[7, "10/18/2006"], [7, "10/18/2008"], [621, " "]] expected = DataFrame(expected_data, columns=["case", "opdate"]) From bbc0b5fb23403997a8e897d2acfb9dd72eec044d Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 19:37:23 +0200 Subject: [PATCH 03/18] remove some now unnecessary warnings --- pandas/tests/scalar/period/test_period.py | 61 ++++++----------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 0251d688fc91c..da9de0c8d8c33 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -51,12 +51,8 @@ def test_from_td64nat_raises(self): Period(td, freq="D") def test_construction(self): - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i1 = Period("1/1/2005", freq="M") - i2 = Period("Jan 2005") + i1 = Period("1/1/2005", freq="M") + i2 = Period("Jan 2005") assert i1 == i2 @@ -85,11 +81,7 @@ def test_construction(self): assert i1 == i2 i1 = Period(year=2005, month=3, day=1, freq="D") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("3/1/2005", freq="D") + i2 = Period("3/1/2005", freq="D") assert i1 == i2 i3 = Period(year=2005, month=3, day=1, freq="d") @@ -177,19 +169,11 @@ def test_construction_bday(self): def test_construction_quarter(self): i1 = Period(year=2005, quarter=1, freq="Q") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("1/1/2005", freq="Q") + i2 = Period("1/1/2005", freq="Q") assert i1 == i2 i1 = Period(year=2005, quarter=3, freq="Q") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("9/1/2005", freq="Q") + i2 = Period("9/1/2005", freq="Q") assert i1 == i2 i1 = Period("2005Q1") @@ -264,27 +248,15 @@ def test_construction_month(self): assert i1 == i5 def test_period_constructor_offsets(self): - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( - "1/1/2005", freq="M" - ) + assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( + "1/1/2005", freq="M" + ) assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - assert Period("3/10/12", freq=offsets.BusinessDay()) == Period( - "3/10/12", freq="B" - ) - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - assert Period("3/10/12", freq=offsets.Day()) == Period("3/10/12", freq="D") + assert Period("3/10/2012", freq=offsets.BusinessDay()) == Period( + "3/10/2012", freq="B" + ) + assert Period("3/10/2012", freq=offsets.Day()) == Period("3/10/2012", freq="D") assert Period( year=2005, quarter=1, freq=offsets.QuarterEnd(startingMonth=12) @@ -309,8 +281,7 @@ def test_period_constructor_offsets(self): ) with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", + UserWarning, match="without a format specified" ): assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") @@ -321,11 +292,7 @@ def test_period_constructor_offsets(self): assert i2.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("200701", freq="M") + i2 = Period("2007-01", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") From 25fdf663a387403e7afab9edcdb82a00943a0c70 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 19:54:35 +0200 Subject: [PATCH 04/18] more fixups --- pandas/tests/dtypes/test_generic.py | 2 +- .../tests/frame/methods/test_reset_index.py | 23 ++++++++------ pandas/tests/tslibs/test_array_to_datetime.py | 31 +++++++++---------- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index c6aeff809d6f1..231c605ecbe04 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,7 +18,7 @@ class TestABCClasses: tuples = [[1, 2, 2], ["red", "blue", "red"]] multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) - datetime_index = pd.to_datetime(["1/1/2000", "1/1/2010"]) + datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) timedelta_index = pd.to_timedelta(np.arange(5), unit="s") period_index = pd.period_range("1/1/2000", "1/1/2010", freq="M") categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index b4198b589deae..bd168e4f14558 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -337,19 +337,24 @@ def test_reset_index_multiindex_nan(self): tm.assert_frame_equal(rs, df) @pytest.mark.parametrize( - "name, warn", + "name", [ - (None, UserWarning), - ("foo", UserWarning), - (2, None), - (3.0, None), - (pd.Timedelta(6), None), - (Timestamp("2012-12-30", tz="UTC"), FutureWarning), - ("2012-12-31", None), + None, + "foo", + 2, + 3.0, + pd.Timedelta(6), + Timestamp("2012-12-30", tz="UTC"), + "2012-12-31", ], ) - def test_reset_index_with_datetimeindex_cols(self, name, warn): + def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning + df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index abde1175f358b..2d283d453e1fb 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -119,27 +119,30 @@ def test_number_looking_strings_not_into_datetime(data): @pytest.mark.parametrize( - "invalid_date", + "invalid_date, warn", [ - date(1000, 1, 1), - datetime(1000, 1, 1), - "1000-01-01", - "Jan 1, 1000", - np.datetime64("1000-01-01"), + (date(1000, 1, 1), None), + (datetime(1000, 1, 1), None), + ("1000-01-01", None), + ("Jan 1, 1000", UserWarning), + (np.datetime64("1000-01-01"), None), ], ) @pytest.mark.parametrize("errors", ["coerce", "raise"]) -def test_coerce_outside_ns_bounds(invalid_date, errors): +def test_coerce_outside_ns_bounds(invalid_date, warn, errors): arr = np.array([invalid_date], dtype="object") kwargs = {"values": arr, "errors": errors} if errors == "raise": msg = "Out of bounds .* present at position 0" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( + warn, match="without a format specified" + ): tslib.array_to_datetime(**kwargs) else: # coerce. - result, _ = tslib.array_to_datetime(**kwargs) + with tm.assert_produces_warning(warn, match="without a format specified"): + result, _ = tslib.array_to_datetime(**kwargs) expected = np.array([iNaT], dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected) @@ -163,17 +166,11 @@ def test_coerce_of_invalid_datetimes(errors): if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. - with tm.assert_produces_warning( - UserWarning, match="without a format specified" - ): - result, _ = tslib.array_to_datetime(**kwargs) + result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT - with tm.assert_produces_warning( - UserWarning, match="without a format specified" - ): - result, _ = tslib.array_to_datetime(arr, errors="coerce") + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) From c8f281f060beb9fba3f14d06e80ef4d14e2ac8af Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 23:25:15 +0200 Subject: [PATCH 05/18] wip --- pandas/tests/io/parser/test_parse_dates.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 3d8a0961ec923..0be2e5dadcd57 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -780,7 +780,9 @@ def test_nat_parse(all_parsers): with tm.ensure_clean("__nat_parse_.csv") as path: df.to_csv(path) - result = parser.read_csv(path, index_col=0, parse_dates=["B"]) + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + # pyarrow raises, but not the others, need to figure out why + result = parser.read_csv(path, index_col=0, parse_dates=["B"]) tm.assert_frame_equal(result, df) @@ -1863,9 +1865,7 @@ def test_date_parser_and_names(all_parsers): # GH#33699 parser = all_parsers data = StringIO("""x,y\n1,2""") - result = parser.read_csv_check_warnings( - UserWarning, - "Parsing datetime strings without a format specified", + result = parser.read_csv( data, parse_dates=["B"], names=["B"], From 62ce1748eade98976ef3095da0f0864d07db07a7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 18:51:18 +0200 Subject: [PATCH 06/18] add missing return --- .../tests/frame/methods/test_reset_index.py | 23 ++++------ pandas/tests/io/parser/test_parse_dates.py | 7 ++- pandas/tests/scalar/period/test_period.py | 45 ++++++++++++++----- pandas/tests/tslibs/test_array_to_datetime.py | 10 ++++- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index bd168e4f14558..b4198b589deae 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -337,24 +337,19 @@ def test_reset_index_multiindex_nan(self): tm.assert_frame_equal(rs, df) @pytest.mark.parametrize( - "name", + "name, warn", [ - None, - "foo", - 2, - 3.0, - pd.Timedelta(6), - Timestamp("2012-12-30", tz="UTC"), - "2012-12-31", + (None, UserWarning), + ("foo", UserWarning), + (2, None), + (3.0, None), + (pd.Timedelta(6), None), + (Timestamp("2012-12-30", tz="UTC"), FutureWarning), + ("2012-12-31", None), ], ) - def test_reset_index_with_datetimeindex_cols(self, name): + def test_reset_index_with_datetimeindex_cols(self, name, warn): # GH#5818 - warn = None - if isinstance(name, Timestamp) and name.tz is not None: - # _deprecate_mismatched_indexing - warn = FutureWarning - df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 0be2e5dadcd57..b6aa65be067ed 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1630,7 +1630,12 @@ def test_parse_date_column_with_empty_string(all_parsers): # see gh-6428 parser = all_parsers data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, " - result = parser.read_csv(StringIO(data), parse_dates=["opdate"]) + result = parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", + StringIO(data), + parse_dates=["opdate"], + ) expected_data = [[7, "10/18/2006"], [7, "10/18/2008"], [621, " "]] expected = DataFrame(expected_data, columns=["case", "opdate"]) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index da9de0c8d8c33..69bb5fb02751e 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -51,8 +51,12 @@ def test_from_td64nat_raises(self): Period(td, freq="D") def test_construction(self): - i1 = Period("1/1/2005", freq="M") - i2 = Period("Jan 2005") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i1 = Period("1/1/2005", freq="M") + i2 = Period("Jan 2005") assert i1 == i2 @@ -81,7 +85,11 @@ def test_construction(self): assert i1 == i2 i1 = Period(year=2005, month=3, day=1, freq="D") - i2 = Period("3/1/2005", freq="D") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("3/1/2005", freq="D") assert i1 == i2 i3 = Period(year=2005, month=3, day=1, freq="d") @@ -169,11 +177,19 @@ def test_construction_bday(self): def test_construction_quarter(self): i1 = Period(year=2005, quarter=1, freq="Q") - i2 = Period("1/1/2005", freq="Q") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("1/1/2005", freq="Q") assert i1 == i2 i1 = Period(year=2005, quarter=3, freq="Q") - i2 = Period("9/1/2005", freq="Q") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("9/1/2005", freq="Q") assert i1 == i2 i1 = Period("2005Q1") @@ -248,9 +264,13 @@ def test_construction_month(self): assert i1 == i5 def test_period_constructor_offsets(self): - assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( - "1/1/2005", freq="M" - ) + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( + "1/1/2005", freq="M" + ) assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") assert Period("3/10/2012", freq=offsets.BusinessDay()) == Period( @@ -281,7 +301,8 @@ def test_period_constructor_offsets(self): ) with tm.assert_produces_warning( - UserWarning, match="without a format specified" + UserWarning, + match="Parsing datetime strings without a format specified", ): assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") @@ -292,7 +313,11 @@ def test_period_constructor_offsets(self): assert i2.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") - i2 = Period("2007-01", freq="M") + with tm.assert_produces_warning( + UserWarning, + match="Parsing datetime strings without a format specified", + ): + i2 = Period("200701", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 2d283d453e1fb..03b11dbb57c58 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -166,11 +166,17 @@ def test_coerce_of_invalid_datetimes(errors): if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. - result, _ = tslib.array_to_datetime(**kwargs) + with tm.assert_produces_warning( + UserWarning, match="without a format specified" + ): + result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT - result, _ = tslib.array_to_datetime(arr, errors="coerce") + with tm.assert_produces_warning( + UserWarning, match="without a format specified" + ): + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) From fa53a4e3874916c5302d2f95aaa78c53e5f48948 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 19:26:03 +0200 Subject: [PATCH 07/18] fixup --- pandas/tests/dtypes/test_generic.py | 2 +- pandas/tests/io/parser/test_parse_dates.py | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 231c605ecbe04..c6aeff809d6f1 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,7 +18,7 @@ class TestABCClasses: tuples = [[1, 2, 2], ["red", "blue", "red"]] multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) - datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) + datetime_index = pd.to_datetime(["1/1/2000", "1/1/2010"]) timedelta_index = pd.to_timedelta(np.arange(5), unit="s") period_index = pd.period_range("1/1/2000", "1/1/2010", freq="M") categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index b6aa65be067ed..0be2e5dadcd57 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1630,12 +1630,7 @@ def test_parse_date_column_with_empty_string(all_parsers): # see gh-6428 parser = all_parsers data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, " - result = parser.read_csv_check_warnings( - UserWarning, - "Parsing datetime strings without a format specified", - StringIO(data), - parse_dates=["opdate"], - ) + result = parser.read_csv(StringIO(data), parse_dates=["opdate"]) expected_data = [[7, "10/18/2006"], [7, "10/18/2008"], [621, " "]] expected = DataFrame(expected_data, columns=["case", "opdate"]) From e0c08227ac3b7387427438ae7adc90121d49720e Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 19:37:23 +0200 Subject: [PATCH 08/18] remove some now unnecessary warnings --- pandas/tests/scalar/period/test_period.py | 45 +++++------------------ 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 69bb5fb02751e..da9de0c8d8c33 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -51,12 +51,8 @@ def test_from_td64nat_raises(self): Period(td, freq="D") def test_construction(self): - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i1 = Period("1/1/2005", freq="M") - i2 = Period("Jan 2005") + i1 = Period("1/1/2005", freq="M") + i2 = Period("Jan 2005") assert i1 == i2 @@ -85,11 +81,7 @@ def test_construction(self): assert i1 == i2 i1 = Period(year=2005, month=3, day=1, freq="D") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("3/1/2005", freq="D") + i2 = Period("3/1/2005", freq="D") assert i1 == i2 i3 = Period(year=2005, month=3, day=1, freq="d") @@ -177,19 +169,11 @@ def test_construction_bday(self): def test_construction_quarter(self): i1 = Period(year=2005, quarter=1, freq="Q") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("1/1/2005", freq="Q") + i2 = Period("1/1/2005", freq="Q") assert i1 == i2 i1 = Period(year=2005, quarter=3, freq="Q") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("9/1/2005", freq="Q") + i2 = Period("9/1/2005", freq="Q") assert i1 == i2 i1 = Period("2005Q1") @@ -264,13 +248,9 @@ def test_construction_month(self): assert i1 == i5 def test_period_constructor_offsets(self): - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( - "1/1/2005", freq="M" - ) + assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( + "1/1/2005", freq="M" + ) assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") assert Period("3/10/2012", freq=offsets.BusinessDay()) == Period( @@ -301,8 +281,7 @@ def test_period_constructor_offsets(self): ) with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", + UserWarning, match="without a format specified" ): assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") @@ -313,11 +292,7 @@ def test_period_constructor_offsets(self): assert i2.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") - with tm.assert_produces_warning( - UserWarning, - match="Parsing datetime strings without a format specified", - ): - i2 = Period("200701", freq="M") + i2 = Period("2007-01", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") From 0f0ed12d849294faab09e367cf784c1bd558846a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 31 Jul 2022 19:54:35 +0200 Subject: [PATCH 09/18] more fixups --- pandas/tests/dtypes/test_generic.py | 2 +- .../tests/frame/methods/test_reset_index.py | 23 +++++++++++-------- pandas/tests/tslibs/test_array_to_datetime.py | 10 ++------ 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index c6aeff809d6f1..231c605ecbe04 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,7 +18,7 @@ class TestABCClasses: tuples = [[1, 2, 2], ["red", "blue", "red"]] multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) - datetime_index = pd.to_datetime(["1/1/2000", "1/1/2010"]) + datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) timedelta_index = pd.to_timedelta(np.arange(5), unit="s") period_index = pd.period_range("1/1/2000", "1/1/2010", freq="M") categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index b4198b589deae..bd168e4f14558 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -337,19 +337,24 @@ def test_reset_index_multiindex_nan(self): tm.assert_frame_equal(rs, df) @pytest.mark.parametrize( - "name, warn", + "name", [ - (None, UserWarning), - ("foo", UserWarning), - (2, None), - (3.0, None), - (pd.Timedelta(6), None), - (Timestamp("2012-12-30", tz="UTC"), FutureWarning), - ("2012-12-31", None), + None, + "foo", + 2, + 3.0, + pd.Timedelta(6), + Timestamp("2012-12-30", tz="UTC"), + "2012-12-31", ], ) - def test_reset_index_with_datetimeindex_cols(self, name, warn): + def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning + df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 03b11dbb57c58..2d283d453e1fb 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -166,17 +166,11 @@ def test_coerce_of_invalid_datetimes(errors): if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. - with tm.assert_produces_warning( - UserWarning, match="without a format specified" - ): - result, _ = tslib.array_to_datetime(**kwargs) + result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT - with tm.assert_produces_warning( - UserWarning, match="without a format specified" - ): - result, _ = tslib.array_to_datetime(arr, errors="coerce") + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) From 040f0800242aab622ae6cdf8f952b78491d4f60e Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 5 Aug 2022 09:53:58 +0200 Subject: [PATCH 10/18] remove note about pyarrow --- pandas/tests/io/parser/test_parse_dates.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 0be2e5dadcd57..b212308aaca22 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -780,9 +780,7 @@ def test_nat_parse(all_parsers): with tm.ensure_clean("__nat_parse_.csv") as path: df.to_csv(path) - with tm.assert_produces_warning(None, raise_on_extra_warnings=False): - # pyarrow raises, but not the others, need to figure out why - result = parser.read_csv(path, index_col=0, parse_dates=["B"]) + result = parser.read_csv(path, index_col=0, parse_dates=["B"]) tm.assert_frame_equal(result, df) From abea3e4564a5d8455ba14a15837735bd157d92f0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 5 Aug 2022 10:30:06 +0200 Subject: [PATCH 11/18] fixup after rebase --- pandas/tests/io/parser/test_parse_dates.py | 62 +++++++++------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index b212308aaca22..ff2a79550cfe3 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1168,12 +1168,13 @@ def test_multiple_date_cols_chunked(all_parsers): columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"], ) expected = expected.set_index("nominal") - with parser.read_csv( + with parser.read_csv_check_warnings( + UserWarning, + "Parsing datetime strings without a format specified", StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal", chunksize=2, - date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) as reader: chunks = list(reader) @@ -1195,17 +1196,19 @@ def test_multiple_date_col_named_index_compat(all_parsers): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 """ - with_indices = parser.read_csv( + with_indices = parser.read_csv_check_warnings( + UserWarning, + "without a format specified", StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal", - date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) - with_names = parser.read_csv( + with_names = parser.read_csv_check_warnings( + UserWarning, + "without a format specified", StringIO(data), index_col="nominal", parse_dates={"nominal": ["date", "nominalTime"]}, - date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) tm.assert_frame_equal(with_indices, with_names) @@ -1222,16 +1225,18 @@ def test_multiple_date_col_multiple_index_compat(all_parsers): KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 """ - result = parser.read_csv( + result = parser.read_csv_check_warnings( + UserWarning, + "without a format specified", StringIO(data), index_col=["nominal", "ID"], parse_dates={"nominal": [1, 2]}, - date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) - expected = parser.read_csv( + expected = parser.read_csv_check_warnings( + UserWarning, + "without a format specified", StringIO(data), parse_dates={"nominal": [1, 2]}, - date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H:%M:%S"), ) expected = expected.set_index(["nominal", "ID"]) @@ -1268,38 +1273,21 @@ def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates): @pytest.mark.parametrize("cache_dates", [True, False]) -@pytest.mark.parametrize( - "value, warning", - [ - ( - "nan", - (None, ""), - ), - ( - "0", - (UserWarning, "Parsing datetime strings without a format specified"), - ), - ( - "", - (None, ""), - ), - ], -) -def test_bad_date_parse(all_parsers, cache_dates, value, warning): +@pytest.mark.parametrize("value", ["nan", "0", ""]) +def test_bad_date_parse(all_parsers, cache_dates, value): # if we have an invalid date make sure that we handle this with # and w/o the cache properly parser = all_parsers s = StringIO((f"{value},\n") * 50000) - with tm.assert_produces_warning(None, raise_on_extra_warnings=False): - parser.read_csv( - s, - header=None, - names=["foo", "bar"], - parse_dates=["foo"], - infer_datetime_format=False, - cache_dates=cache_dates, - ) + parser.read_csv( + s, + header=None, + names=["foo", "bar"], + parse_dates=["foo"], + infer_datetime_format=False, + cache_dates=cache_dates, + ) @xfail_pyarrow From dc6f7e4139c4fabf28b6f837620629c6174a3b69 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 5 Aug 2022 12:09:22 +0200 Subject: [PATCH 12/18] fixup some tests after rebase --- pandas/tests/io/parser/test_parse_dates.py | 47 +++++----------------- 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index ff2a79550cfe3..3ccb3b2d924d8 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -917,10 +917,7 @@ def test_multi_index_parse_dates(all_parsers, index_col): columns=["A", "B", "C"], index=index, ) - with tm.assert_produces_warning( - UserWarning, match="Parsing datetime strings without a format specified" - ): - result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True) + result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True) tm.assert_frame_equal(result, expected) @@ -1168,15 +1165,16 @@ def test_multiple_date_cols_chunked(all_parsers): columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"], ) expected = expected.set_index("nominal") - with parser.read_csv_check_warnings( - UserWarning, - "Parsing datetime strings without a format specified", + with parser.read_csv( StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal", chunksize=2, ) as reader: - chunks = list(reader) + with tm.assert_produces_warning( + UserWarning, match="without a format specified" + ): + chunks = list(reader) tm.assert_frame_equal(chunks[0], expected[:2]) tm.assert_frame_equal(chunks[1], expected[2:4]) @@ -1592,9 +1590,7 @@ def date_parser(dt, time): arr = [datetime.combine(d, t) for d, t in zip(dt, time)] return np.array(arr, dtype="datetime64[s]") - result = parser.read_csv_with_warnings( - UserWarning, - "without a format specified", + result = parser.read_csv( StringIO(data), date_parser=date_parser, parse_dates={"datetime": ["date", "time"]}, @@ -1678,34 +1674,13 @@ def test_parse_timezone(all_parsers): @skip_pyarrow @pytest.mark.parametrize( - "date_string, warning, msg", - [ - ("32/32/2019", None, ""), - ("02/30/2019", None, ""), - ("13/13/2019", None, ""), - ("13/2019", None, ""), - ( - "a3/11/2018", - UserWarning, - "Parsing datetime strings without a format specified", - ), - ( - "10/11/2o17", - UserWarning, - "Parsing datetime strings without a format specified", - ), - ], + "date_string", + ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], ) -def test_invalid_parse_delimited_date(all_parsers, date_string, warning, msg): +def test_invalid_parse_delimited_date(all_parsers, date_string): parser = all_parsers expected = DataFrame({0: [date_string]}, dtype="object") - result = parser.read_csv_check_warnings( - warning, - msg, - StringIO(date_string), - header=None, - parse_dates=[0], - ) + result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0]) tm.assert_frame_equal(result, expected) From 0c24d14b681fb63045e93799be573c6b850f3cd4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 5 Aug 2022 12:42:48 +0200 Subject: [PATCH 13/18] wip --- pandas/tests/io/parser/test_parse_dates.py | 8 +++++++- pandas/tests/tseries/holiday/test_calendar.py | 6 +++--- pandas/tests/tseries/offsets/test_dst.py | 2 +- pandas/tests/tslibs/test_parsing.py | 10 +++------- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 3ccb3b2d924d8..a133b71d39eed 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -844,7 +844,13 @@ def test_yy_format_with_year_first(all_parsers, parse_dates): 090331,0830,5,6 """ parser = all_parsers - result = parser.read_csv(StringIO(data), index_col=0, parse_dates=parse_dates) + result = parser.read_csv_check_warnings( + UserWarning, + "without a format specified", + StringIO(data), + index_col=0, + parse_dates=parse_dates, + ) index = DatetimeIndex( [ datetime(2009, 1, 31, 0, 10, 0), diff --git a/pandas/tests/tseries/holiday/test_calendar.py b/pandas/tests/tseries/holiday/test_calendar.py index 57acf15443ca8..3580b6fbbbf7c 100644 --- a/pandas/tests/tseries/holiday/test_calendar.py +++ b/pandas/tests/tseries/holiday/test_calendar.py @@ -57,8 +57,8 @@ def __init__(self, name=None, rules=None) -> None: jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)]) # Getting holidays for Jan 1 should not alter results for Jan 2. - tm.assert_index_equal(jan1.holidays(), DatetimeIndex(["01-Jan-2015"])) - tm.assert_index_equal(jan2.holidays(), DatetimeIndex(["02-Jan-2015"])) + tm.assert_index_equal(jan1.holidays(), DatetimeIndex(["2015-01-01"])) + tm.assert_index_equal(jan2.holidays(), DatetimeIndex(["2015-01-02"])) def test_calendar_observance_dates(): @@ -111,6 +111,6 @@ class NoHolidaysCalendar(AbstractHolidayCalendar): pass cal = NoHolidaysCalendar() - holidays = cal.holidays(Timestamp("01-Jan-2020"), Timestamp("01-Jan-2021")) + holidays = cal.holidays(Timestamp("01/01/2020"), Timestamp("01/01/2021")) empty_index = DatetimeIndex([]) # Type is DatetimeIndex since return_name=False tm.assert_index_equal(holidays, empty_index) diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index 50c5a91fc2390..e5eb736895eeb 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -165,7 +165,7 @@ def test_springforward_singular(self): QuarterEnd: ["11/2/2012", "12/31/2012"], BQuarterBegin: ["11/2/2012", "12/3/2012"], BQuarterEnd: ["11/2/2012", "12/31/2012"], - Day: ["11/4/2012", "11/4/2012 23:00"], + Day: ["11/4/2012", "2012-11-04 23:00"], }.items() @pytest.mark.parametrize("tup", offset_classes) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index f1bbccebb6632..2055a7dced79a 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -214,13 +214,9 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): UserWarning, "Parsing datetime strings without a format specified", ), - ("1/1/1/1", UserWarning, "Parsing datetime strings without a format specified"), - ( - "this_is_not_a_datetime", - UserWarning, - "Parsing datetime strings without a format specified", - ), - ("51a", UserWarning, "Parsing datetime strings without a format specified"), + ("1/1/1/1", None, ""), + ("this_is_not_a_datetime", None, ""), + ("51a", None, ""), (9, None, ""), (datetime(2011, 1, 1), None, ""), ], From 98d0b2027b7bda52efcf77ffbc78c4da6a387fb9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 5 Aug 2022 12:52:41 +0200 Subject: [PATCH 14/18] catch more warnings --- pandas/tests/tslibs/test_parsing.py | 70 +++++++++++++++-------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 2055a7dced79a..725580fe3627e 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -143,42 +143,43 @@ def test_parsers_month_freq(date_str, expected): @td.skip_if_not_us_locale @pytest.mark.parametrize( - "string,fmt", + "string,fmt,warn", [ - ("20111230", "%Y%m%d"), - ("2011-12-30", "%Y-%m-%d"), - ("30-12-2011", "%d-%m-%Y"), - ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"), - ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), - ("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z"), - ("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+9", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+09", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+090", None), - ("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+09:00", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+09:000", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+9:0", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+09:", None), - ("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z"), - ("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+9", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+09", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+090", None), - ("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+09:00", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+09:000", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+09:", None), - ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), - ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), - ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), + ("20111230", "%Y%m%d", UserWarning), + ("2011-12-30", "%Y-%m-%d", UserWarning), + ("30-12-2011", "%d-%m-%Y", UserWarning), + ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S", UserWarning), + ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S", UserWarning), + ("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z", UserWarning), + ("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+9", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+09", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+090", None, None), + ("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+09:00", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+09:000", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+9:0", "%Y-%m-%dT%H:%M:%S%z", UserWarning), + ("2011-12-30T00:00:00+09:", None, None), + ("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z", UserWarning), + ("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+9", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+09", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+090", None, None), + ("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+09:00", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+09:000", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z", UserWarning), + ("2011-12-30T00:00:00.000000+09:", None, None), + ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f", UserWarning), + ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p", UserWarning), + ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p", UserWarning), ], ) -def test_guess_datetime_format_with_parseable_formats(string, fmt): - result = parsing.guess_datetime_format(string) +def test_guess_datetime_format_with_parseable_formats(string, fmt, warn): + with tm.assert_produces_warning(warn, match="without a format specified"): + result = parsing.guess_datetime_format(string) assert result == fmt @@ -200,7 +201,8 @@ def test_guess_datetime_format_with_dayfirst(dayfirst, expected): ], ) def test_guess_datetime_format_with_locale_specific_formats(string, fmt): - result = parsing.guess_datetime_format(string) + with tm.assert_produces_warning(UserWarning, match="without a format specified"): + result = parsing.guess_datetime_format(string) assert result == fmt From 317954adb941fb4565fa69d9694807909f082930 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 5 Aug 2022 17:52:51 +0200 Subject: [PATCH 15/18] more fixups --- pandas/tests/indexing/test_loc.py | 8 +++++--- pandas/tests/resample/test_resample_api.py | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4c38a2219372d..e1f8912cb1e5f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -632,12 +632,14 @@ def test_loc_setitem_consistency_slice_column_len(self): ] df = DataFrame(values, index=mi, columns=cols) - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = ( + "will attempt to set the values inplace instead|without a format specified" + ) + with tm.assert_produces_warning((FutureWarning, UserWarning), match=msg): df.loc[:, ("Respondent", "StartDate")] = to_datetime( df.loc[:, ("Respondent", "StartDate")] ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning((FutureWarning, UserWarning), match=msg): df.loc[:, ("Respondent", "EndDate")] = to_datetime( df.loc[:, ("Respondent", "EndDate")] ) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index c5cd777962df3..0e6933087b579 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -688,9 +688,19 @@ def test_selection_api_validation(): @pytest.mark.parametrize( - "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"] + "col_name, warn", + [ + ("t2", None), + ("t2x", None), + ("t2q", None), + ("T_2M", None), + ("t2p", None), + ("t2m", UserWarning), + ("t2m1", UserWarning), + ("T2M", UserWarning), + ], ) -def test_agg_with_datetime_index_list_agg_func(col_name): +def test_agg_with_datetime_index_list_agg_func(col_name, warn): # GH 22660 # The parametrized column names would get converted to dates by our # date parser. Some would result in OutOfBoundsError (ValueError) while @@ -703,7 +713,8 @@ def test_agg_with_datetime_index_list_agg_func(col_name): ), columns=[col_name], ) - result = df.resample("1d").aggregate(["mean"]) + with tm.assert_produces_warning(warn, match="without a format specified"): + result = df.resample("1d").aggregate(["mean"]) expected = DataFrame( [47.5, 143.5, 195.5], index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"), From 6ecd4faf842061085707660d21558bfc0949d0c3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 7 Aug 2022 10:19:02 +0200 Subject: [PATCH 16/18] wip --- pandas/tests/resample/test_datetime_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 970d4f155ecfc..2091f6a2f2e6e 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1731,7 +1731,7 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k): # GH 24127 n1_ = n1 * k n2_ = n2 * k - s = Series(0, index=date_range("19910905 13:00", "19911005 07:00", freq=freq1)) + s = Series(0, index=date_range("1991-09-05 13:00", "1991-10-05 07:00", freq=freq1)) s = s + range(len(s)) result1 = s.resample(str(n1_) + freq1).mean() From 903c14e66f0407db98153bd4b0a029ec116397f7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 12 Aug 2022 13:50:03 +0200 Subject: [PATCH 17/18] update after merge --- pandas/_libs/tslibs/parsing.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 0141454feae03..f90d4cff469ef 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -264,6 +264,7 @@ cdef inline bint does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 from pandas.util._exceptions import find_stack_level +import inspect def du_parse_with_warning(*args, **kwargs): @@ -271,7 +272,7 @@ def du_parse_with_warning(*args, **kwargs): warnings.warn( "Parsing datetime strings without a format specified, " "please specify a format to avoid unexpected results", - stacklevel=find_stack_level(), + stacklevel=find_stack_level(inspect.currentframe()), ) return parsed From 4eb9a75b0e7dd5082f629ff6bba7e07c111da4a4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 12 Aug 2022 17:06:18 +0200 Subject: [PATCH 18/18] wip --- pandas/tests/apply/test_frame_apply.py | 3 ++- pandas/tests/arithmetic/test_datetime64.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 23 ++++++++++++++----- pandas/tests/frame/methods/test_drop.py | 12 +++++----- pandas/tests/frame/methods/test_quantile.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 2 +- .../tests/frame/methods/test_to_timestamp.py | 4 +++- pandas/tests/groupby/test_apply.py | 6 ++++- pandas/tests/groupby/test_function.py | 3 ++- 9 files changed, 38 insertions(+), 19 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index ff3abaf819206..542f0d2cca1bc 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -836,7 +836,8 @@ def test_with_dictlike_columns_with_datetime(): df["author"] = ["X", "Y", "Z"] df["publisher"] = ["BBC", "NBC", "N24"] df["date"] = pd.to_datetime( - ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"] + ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"], + format="%d-%m-%Y %H:%M:%S", ) result = df.apply(lambda x: {}, axis=1) expected = Series([{}, {}, {}]) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 0b1d56a956c07..588a6b2b87acd 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1620,7 +1620,7 @@ def test_dti_add_sub_nonzero_mth_offset( ): # GH 26258 tz = tz_aware_fixture - date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz) + date = date_range(start="2014-01-01", end="2017-01-01", freq="AS", tz=tz) date = tm.box_expected(date, box_with_array, False) mth = getattr(date, op) result = mth(offset) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index ea895e5656ccb..36a1a1ce0d3b7 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -316,13 +316,19 @@ def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage arr = pd.Series(arr) # scalar - result = arr.searchsorted(str(arr[1])) + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + # wip + result = arr.searchsorted(str(arr[1])) assert result == 1 - result = arr.searchsorted(str(arr[2]), side="right") + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + # wip + result = arr.searchsorted(str(arr[2]), side="right") assert result == 3 - result = arr.searchsorted([str(x) for x in arr[1:3]]) + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + # wip + result = arr.searchsorted([str(x) for x in arr[1:3]]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) @@ -345,7 +351,10 @@ def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage f"or array of those. Got '{arr_type}' instead." ), ): - arr.searchsorted([str(arr[1]), "baz"]) + with tm.assert_produces_warning( + UserWarning, match="without a format specified" + ): + arr.searchsorted([str(arr[1]), "baz"]) def test_getitem_near_implementation_bounds(self): # We only check tz-naive for DTA bc the bounds are slightly different @@ -480,7 +489,8 @@ def test_setitem_strs(self, arr1d, request): expected[[0, 1]] = arr1d[-2:] result = arr1d.copy() - result[:2] = [str(x) for x in arr1d[-2:]] + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + result[:2] = [str(x) for x in arr1d[-2:]] tm.assert_equal(result, expected) # Same thing but now for just a scalar str @@ -488,7 +498,8 @@ def test_setitem_strs(self, arr1d, request): expected[0] = arr1d[-1] result = arr1d.copy() - result[0] = str(arr1d[-1]) + with tm.assert_produces_warning(None, raise_on_extra_warnings=False): + result[0] = str(arr1d[-1]) tm.assert_equal(result, expected) @pytest.mark.parametrize("as_index", [True, False]) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 50b60f9e06ef1..6bd10e4276d9a 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -405,17 +405,17 @@ def test_drop_level_nonunique_datetime(self): idx = Index([2, 3, 4, 4, 5], name="id") idxdt = pd.to_datetime( [ - "201603231400", - "201603231500", - "201603231600", - "201603231600", - "201603231700", + "2016-03-23 14:00", + "2016-03-23 15:00", + "2016-03-23 16:00", + "2016-03-23 16:00", + "2016-03-23 17:00", ] ) df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx) df["tstamp"] = idxdt df = df.set_index("tstamp", append=True) - ts = Timestamp("201603231600") + ts = Timestamp("2016-03-23 16:00") assert df.index.is_unique is False result = df.drop(ts, level="tstamp") diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 16b82727fd069..c29a5e49c1e8a 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -590,7 +590,7 @@ def test_quantile_empty_no_rows_dt64(self): def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns - df = DataFrame(pd.date_range("1/1/18", periods=5)) + df = DataFrame(pd.date_range("1/1/2018", periods=5)) df.columns.name = "captain tightpants" result = df.quantile(0.5, numeric_only=True) expected = Series([], index=[], name=0.5, dtype=np.float64) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8575e7895ae5a..8988a5979f118 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -304,7 +304,7 @@ def test_reindex_nearest_tz_empty_frame(self): tm.assert_frame_equal(result, expected) def test_reindex_frame_add_nat(self): - rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") + rng = date_range("2000-01-01 00:00:00", periods=10, freq="10s") df = DataFrame({"A": np.random.randn(len(rng)), "B": rng}) result = df.reindex(range(15)) diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index acbb51fe79643..3198fb594d835 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -136,7 +136,9 @@ def test_to_timestamp_hourly(self, frame_or_series): if frame_or_series is not Series: obj = obj.to_frame() - exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H") + exp_index = date_range( + "2001-01-01 00:59:59", end="2001-01-02 00:59:59", freq="H" + ) result = obj.to_timestamp(how="end") exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") tm.assert_index_equal(result.index, exp_index) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b064c12f89c21..856568e95343d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -603,7 +603,11 @@ def test_apply_numeric_coercion_when_datetime(): # GH 15421 df = DataFrame( - {"A": [10, 20, 30], "B": ["foo", "3", "4"], "T": [pd.Timestamp("12:31:22")] * 3} + { + "A": [10, 20, 30], + "B": ["foo", "3", "4"], + "T": [pd.Timestamp("2000-01-01 12:31:22")] * 3, + } ) def get_B(g): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 93e9b5bb776ab..dfd83bd894f50 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -688,7 +688,8 @@ def test_max_nan_bug(): -05-06,2013-05-06 00:00:00,,log.log -05-07,2013-05-07 00:00:00,OE,xlsx""" - df = pd.read_csv(StringIO(raw), parse_dates=[0]) + with tm.assert_produces_warning(UserWarning, match="without a format specified"): + df = pd.read_csv(StringIO(raw), parse_dates=[0]) gb = df.groupby("Date") r = gb[["File"]].max() e = gb["File"].max().to_frame()