From 00f1f0ae924463211b366c21fcc064b39a4786db Mon Sep 17 00:00:00 2001 From: aresnow1 <109642806+aresnow1@users.noreply.github.com> Date: Wed, 4 Jan 2023 16:13:38 +0800 Subject: [PATCH] BUG: Fix `date_range` and pin sphinx<6.0.0 (#118) --- docs/requirements-doc.txt | 2 +- mars/dataframe/datasource/date_range.py | 69 ++++++++++++++++++- .../tests/test_datasource_execution.py | 5 ++ .../stats/tests/test_stats_execution.py | 18 ++++- 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/docs/requirements-doc.txt b/docs/requirements-doc.txt index f2bf58f323..c4f0648a8b 100644 --- a/docs/requirements-doc.txt +++ b/docs/requirements-doc.txt @@ -10,7 +10,7 @@ pytest-cov>=2.5.0 pytest-timeout>=1.2.0 cloudpickle>=1.0.0 sqlalchemy>=1.2.0 -sphinx>=3.0.0 +sphinx<6.0.0 pydata-sphinx-theme>=0.3.0 sphinx-intl>=0.9.9 ipython>=4.0 diff --git a/mars/dataframe/datasource/date_range.py b/mars/dataframe/datasource/date_range.py index 655f3a0e37..fca3b45ca6 100644 --- a/mars/dataframe/datasource/date_range.py +++ b/mars/dataframe/datasource/date_range.py @@ -17,6 +17,7 @@ import numpy as np import pandas as pd +from pandas import Timestamp, NaT from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick from pandas._libs.tslibs import timezones @@ -51,6 +52,72 @@ def normalize_date(dt): # from pandas/_libs/tslibs/conversion.pyx _date_range_use_inclusive = pd_release_version[:2] >= (1, 4) +# adapted from pandas.core.arrays.datetimes.generate_range +def generate_range_count( + start=None, end=None, periods=None, offset=None +): # pragma: no cover + offset = to_offset(offset) + + start = Timestamp(start) + start = start if start is not NaT else None + end = Timestamp(end) + end = end if end is not NaT else None + + if start and not offset.is_on_offset(start): + start = offset.rollforward(start) + + elif end and not offset.is_on_offset(end): + end = offset.rollback(end) + + if periods is None and end < start and offset.n >= 0: + end = None + periods = 0 + + if end is None: + end = start + (periods - 1) * offset + + if start is None: + start = end - (periods - 1) * offset + + cur = start + count = 0 + if offset.n >= 0: + while cur <= end: + count += 1 + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + try: + next_date = offset._apply(cur) + except AttributeError: + next_date = cur + offset + if next_date <= cur: + raise ValueError(f"Offset {offset} did not increment date") + cur = next_date + else: + while cur >= end: + count += 1 + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + try: + next_date = offset._apply(cur) + except AttributeError: + next_date = cur + offset + if next_date >= cur: + raise ValueError(f"Offset {offset} did not decrement date") + cur = next_date + return count + + class DataFrameDateRange(DataFrameOperand, DataFrameOperandMixin): _op_type_ = OperandDef.DATE_RANGE @@ -511,7 +578,7 @@ def date_range( inclusive = "both" else: if periods is None: - periods = size = int((end - start) / freq + 1) + periods = size = generate_range_count(start, end, periods, freq) else: size = periods if inclusive in ("left", "right"): diff --git a/mars/dataframe/datasource/tests/test_datasource_execution.py b/mars/dataframe/datasource/tests/test_datasource_execution.py index 224b556970..9480cf570f 100644 --- a/mars/dataframe/datasource/tests/test_datasource_execution.py +++ b/mars/dataframe/datasource/tests/test_datasource_execution.py @@ -1007,6 +1007,11 @@ def test_date_range_execution(setup): expected = pd.date_range(start="1/1/2018", periods=5, freq="M") pd.testing.assert_index_equal(result, expected) + dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="M") + result = dr.execute().fetch() + expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="M") + pd.testing.assert_index_equal(result, expected) + parquet_engines = ["auto"] if pa is not None: diff --git a/mars/tensor/stats/tests/test_stats_execution.py b/mars/tensor/stats/tests/test_stats_execution.py index 69404b43ee..b820383d93 100644 --- a/mars/tensor/stats/tests/test_stats_execution.py +++ b/mars/tensor/stats/tests/test_stats_execution.py @@ -205,7 +205,6 @@ def test_t_test_execution(setup): functools.partial(mt_from_stats, equal_var=False), functools.partial(sp_from_stats, equal_var=False), ), - (ttest_1samp, sp_ttest_1samp), ] fa_raw = np.array([16, 18, 16, 14, 12, 12]) @@ -233,6 +232,23 @@ def test_t_test_execution(setup): np.testing.assert_almost_equal(expected[0], result[0]) np.testing.assert_almost_equal(expected[1], result[1]) + # second param size must be 1 for ttest_1samp + fb_raw = np.array([16]) + fb = tensor(fb_raw) + for alt in alternatives: + if parse_version(scipy.__version__) >= parse_version("1.6.0"): + r = ttest_1samp(fa, fb, alternative=alt) + else: + r = ttest_1samp(fa, fb) + result = r.execute().fetch() + + if parse_version(scipy.__version__) >= parse_version("1.6.0"): + expected = sp_ttest_1samp(fa_raw, fb_raw, alternative=alt) + else: + expected = sp_ttest_1samp(fa_raw, fb_raw) + np.testing.assert_almost_equal(expected[0], result[0]) + np.testing.assert_almost_equal(expected[1], result[1]) + @pytest.mark.parametrize("chunk_size", [5, 15]) @pytest.mark.parametrize(