diff --git a/pyproject.toml b/pyproject.toml index f4de900cde5..fe48d090c0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,10 @@ license = { text = "MIT" } dependencies = [ "pyyaml", "numpy", - "pandas>=0.24", + # Since version 1.1.0, pandas supports the ffill and bfill methods. + # Since version 2.1.0, pandas has deprecated the method parameter of the fillna method. + # qlib has updated the fillna method in PR 1987 and limited the minimum version of pandas. + "pandas>=1.1", # I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1; # But earlier versions of mlflow does not have this problem. # But when I switch to 2.*.* version, another error occurs, which is even more strange... diff --git a/qlib/backtest/profit_attribution.py b/qlib/backtest/profit_attribution.py index 63a1d692db7..05ca8670659 100644 --- a/qlib/backtest/profit_attribution.py +++ b/qlib/backtest/profit_attribution.py @@ -281,13 +281,13 @@ def brinson_pa( stock_group_field = stock_df[group_field].unstack().T # FIXME: some attributes of some suspend stock is NAN. - stock_group_field = stock_group_field.fillna(method="ffill") + stock_group_field = stock_group_field.ffill() stock_group_field = stock_group_field.loc[start_date:end_date] stock_group = get_stock_group(stock_group_field, bench_stock_weight, group_method, group_n) deal_price_df = stock_df["deal_price"].unstack().T - deal_price_df = deal_price_df.fillna(method="ffill") + deal_price_df = deal_price_df.ffill() # NOTE: # The return will be slightly different from the of the return in the report. diff --git a/qlib/contrib/ops/high_freq.py b/qlib/contrib/ops/high_freq.py index 25e66570f09..51852b66cca 100644 --- a/qlib/contrib/ops/high_freq.py +++ b/qlib/contrib/ops/high_freq.py @@ -135,7 +135,7 @@ class FFillNan(ElemOperator): def _load_internal(self, instrument, start_index, end_index, freq): series = self.feature.load(instrument, start_index, end_index, freq) - return series.fillna(method="ffill") + return series.ffill() class BFillNan(ElemOperator): @@ -154,7 +154,7 @@ class BFillNan(ElemOperator): def _load_internal(self, instrument, start_index, end_index, freq): series = self.feature.load(instrument, start_index, end_index, freq) - return series.fillna(method="bfill") + return series.bfill() class Date(ElemOperator): diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py index 2b64a5bf439..0f6510e8182 100644 --- a/qlib/contrib/report/analysis_position/parse_position.py +++ b/qlib/contrib/report/analysis_position/parse_position.py @@ -33,7 +33,7 @@ def parse_position(position: dict = None) -> pd.DataFrame: position_weight_df = get_stock_weight_df(position) # If the day does not exist, use the last weight - position_weight_df.fillna(method="ffill", inplace=True) + position_weight_df.ffill(inplace=True) previous_data = {"date": None, "code_list": []} diff --git a/qlib/data/dataset/storage.py b/qlib/data/dataset/storage.py index dd51f1d5fed..ca3325a28cf 100644 --- a/qlib/data/dataset/storage.py +++ b/qlib/data/dataset/storage.py @@ -67,7 +67,6 @@ def fetch( col_set: Union[str, List[str]] = DataHandler.CS_ALL, fetch_orig: bool = True, ) -> pd.DataFrame: - # Following conflicts may occur # - Does [20200101", "20210101"] mean selecting this slice or these two days? # To solve this issue diff --git a/qlib/utils/mod.py b/qlib/utils/mod.py index 4e0cb707f33..12fbc587034 100644 --- a/qlib/utils/mod.py +++ b/qlib/utils/mod.py @@ -161,7 +161,6 @@ def init_instance_by_config( # path like 'file:////obj.pkl' pr = urlparse(config) if pr.scheme == "file": - # To enable relative path like file://data/a/b/c.pkl. pr.netloc will be data path = pr.path if pr.netloc != "": diff --git a/qlib/utils/resam.py b/qlib/utils/resam.py index 9fe38ad6629..21c6a17ca42 100644 --- a/qlib/utils/resam.py +++ b/qlib/utils/resam.py @@ -222,7 +222,7 @@ def get_valid_value(series, last=True): Nan | float the first/last valid value """ - return series.fillna(method="ffill").iloc[-1] if last else series.fillna(method="bfill").iloc[0] + return series.ffill().iloc[-1] if last else series.bfill().iloc[0] def _ts_data_valid(ts_feature, last=False): diff --git a/scripts/data_collector/baostock_5min/collector.py b/scripts/data_collector/baostock_5min/collector.py index 337a414aad7..0a69beefb82 100644 --- a/scripts/data_collector/baostock_5min/collector.py +++ b/scripts/data_collector/baostock_5min/collector.py @@ -172,7 +172,7 @@ def __init__( @staticmethod def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series: df = df.copy() - _tmp_series = df["close"].fillna(method="ffill") + _tmp_series = df["close"].ffill() _tmp_shift_series = _tmp_series.shift(1) if last_close is not None: _tmp_shift_series.iloc[0] = float(last_close) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 804b92c9cb2..a1b4d64f65e 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -371,7 +371,7 @@ class YahooNormalize(BaseNormalize): @staticmethod def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series: df = df.copy() - _tmp_series = df["close"].fillna(method="ffill") + _tmp_series = df["close"].ffill() _tmp_shift_series = _tmp_series.shift(1) if last_close is not None: _tmp_shift_series.iloc[0] = float(last_close) @@ -459,7 +459,7 @@ def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame: df.set_index(self._date_field_name, inplace=True) if "adjclose" in df: df["factor"] = df["adjclose"] / df["close"] - df["factor"] = df["factor"].fillna(method="ffill") + df["factor"] = df["factor"].ffill() else: df["factor"] = 1 for _col in self.COLUMNS: