From 600c784cf0bc36585b676aeffcc23c1695ed5434 Mon Sep 17 00:00:00 2001 From: zengbin93 Date: Fri, 6 Dec 2024 21:00:45 +0800 Subject: [PATCH] =?UTF-8?q?V0.9.61=20=E6=9B=B4=E6=96=B0=E4=B8=80=E6=89=B9?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=20(#217)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 0.9.61 start coding * 0.9.61 start coding * 0.9.61 update * 0.9.61 update * 0.9.61 update * 0.9.61 新增 pyd 打包支持 * 0.9.61 新增 pyd 打包支持 * 0.9.61 新增 pyd 打包支持 * 0.9.61 新增 yearly_days 参数支持 * 0.9.61 新增 limit_leverage 函数 * 0.9.61 更新 setup 配置 * 0.9.61 新增 show_corr_graph 相关性可视化 * 0.9.61 新增 price_type 参数控制回测交易价格 * 0.9.61 优化 cross_sectional_strategy * 0.9.61 fix networkx * 0.9.61 更新2025年的交易日历 * 0.9.61 WeightBacktest 支持多空分离的收益统计 * 0.9.61 WeightBacktest 支持多空分离的收益统计 * 0.9.61 新增时间效应分析组件 * 0.9.61 update --- .github/workflows/python-publish.yml | 7 +- .github/workflows/pythonpackage.yml | 2 +- czsc/__init__.py | 8 +- czsc/connectors/jq_connector.py | 282 +-- czsc/eda.py | 104 +- czsc/traders/weight_backtest.py | 148 +- czsc/utils/calendar.py | 21 +- czsc/utils/china_calendar.feather | Bin 49314 -> 52490 bytes czsc/utils/plotly_plot.py | 309 ++- czsc/utils/st_components.py | 158 +- czsc/utils/stats.py | 8 +- .../corr_graph.py" | 35 + ...66\351\227\264\346\225\210\345\272\224.py" | 99 + examples/develop/weight_backtest.py | 1902 ++++++++++++++++- examples/test_offline/test_weight_backtest.py | 59 +- ...40\345\255\220\346\240\267\344\276\213.py" | 3 + requirements.txt | 4 +- setup.py | 4 +- test/test_cross_sectional_strategy.py | 68 + test/test_eda.py | 34 + 20 files changed, 2992 insertions(+), 263 deletions(-) create mode 100644 "examples/Streamlit\347\273\204\344\273\266\345\272\223\344\275\277\347\224\250\346\241\210\344\276\213/corr_graph.py" create mode 100644 "examples/develop/ST\346\227\245\346\224\266\347\233\212\347\232\204\346\227\266\351\227\264\346\225\210\345\272\224.py" create mode 100644 test/test_cross_sectional_strategy.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index beb265ecf..6b2b1e6dc 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -9,15 +9,18 @@ on: jobs: deploy: - runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] + steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.8' + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index b9efe4993..30a280f95 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -5,7 +5,7 @@ name: Python package on: push: - branches: [ master, 'V0.9.60' ] + branches: [ master, 'V0.9.61' ] pull_request: branches: [ master ] diff --git a/czsc/__init__.py b/czsc/__init__.py index 2c63d06fb..6035a6d82 100644 --- a/czsc/__init__.py +++ b/czsc/__init__.py @@ -136,6 +136,7 @@ show_splited_daily, show_monthly_return, show_correlation, + show_corr_graph, show_sectional_ic, show_factor_returns, show_factor_layering, @@ -161,6 +162,8 @@ show_factor_value, show_code_editor, show_classify, + show_df_describe, + show_date_effect, ) from czsc.utils.bi_info import ( @@ -218,13 +221,14 @@ sma_long_bear, dif_long_bear, tsf_type, + limit_leverage, ) -__version__ = "0.9.60" +__version__ = "0.9.61" __author__ = "zengbin93" __email__ = "zeng_bin8888@163.com" -__date__ = "20240918" +__date__ = "20241101" def welcome(): diff --git a/czsc/connectors/jq_connector.py b/czsc/connectors/jq_connector.py index a2b1ef2bf..000d900b3 100644 --- a/czsc/connectors/jq_connector.py +++ b/czsc/connectors/jq_connector.py @@ -22,11 +22,27 @@ date_fmt = "%Y-%m-%d" # 1m, 5m, 15m, 30m, 60m, 120m, 1d, 1w, 1M -freq_convert = {"1min": "1m", "5min": '5m', '15min': '15m', - "30min": "30m", "60min": '60m', "D": "1d", "W": '1w', "M": "1M"} - -freq_map = {'1min': Freq.F1, '5min': Freq.F5, '15min': Freq.F15, '30min': Freq.F30, - '60min': Freq.F60, 'D': Freq.D, 'W': Freq.W, 'M': Freq.M} +freq_convert = { + "1min": "1m", + "5min": "5m", + "15min": "15m", + "30min": "30m", + "60min": "60m", + "D": "1d", + "W": "1w", + "M": "1M", +} + +freq_map = { + "1min": Freq.F1, + "5min": Freq.F5, + "15min": Freq.F15, + "30min": Freq.F30, + "60min": Freq.F60, + "D": Freq.D, + "W": Freq.W, + "M": Freq.M, +} def set_token(jq_mob, jq_pwd): @@ -38,7 +54,7 @@ def set_token(jq_mob, jq_pwd): Password为聚宽官网登录密码,新申请用户默认为手机号后6位 :return: None """ - with open(file_token, 'wb') as f: + with open(file_token, "wb") as f: pickle.dump([jq_mob, jq_pwd], f) @@ -47,7 +63,7 @@ def get_token(): if not os.path.exists(file_token): raise ValueError(f"{file_token} 文件不存在,请先调用 set_token 进行设置") - with open(file_token, 'rb') as f: + with open(file_token, "rb") as f: jq_mob, jq_pwd = pickle.load(f) body = { @@ -61,7 +77,7 @@ def get_token(): def text2df(text): - rows = [x.split(",") for x in text.strip().split('\n')] + rows = [x.split(",") for x in text.strip().split("\n")] df = pd.DataFrame(rows[1:], columns=rows[0]) return df @@ -120,14 +136,9 @@ def get_concept_stocks(symbol, date=None): if isinstance(date, datetime): date = str(date.date()) - data = { - "method": "get_concept_stocks", - "token": get_token(), - "code": symbol, - "date": date - } + data = {"method": "get_concept_stocks", "token": get_token(), "code": symbol, "date": date} r = requests.post(url, data=json.dumps(data)) - return r.text.split('\n') + return r.text.split("\n") def get_index_stocks(symbol, date=None): @@ -152,14 +163,9 @@ def get_index_stocks(symbol, date=None): if isinstance(date, datetime): date = str(date.date()) - data = { - "method": "get_index_stocks", - "token": get_token(), - "code": symbol, - "date": date - } + data = {"method": "get_index_stocks", "token": get_token(), "code": symbol, "date": date} r = requests.post(url, data=json.dumps(data)) - return r.text.split('\n') + return r.text.split("\n") def get_industry(symbol): @@ -168,28 +174,23 @@ def get_industry(symbol): :param symbol: :return: """ - data = { - "method": "get_industry", - "token": get_token(), - "code": symbol, - "date": str(datetime.now().date()) - } + data = {"method": "get_industry", "token": get_token(), "code": symbol, "date": str(datetime.now().date())} r = requests.post(url, data=json.dumps(data)) df = text2df(r.text) res = { "股票代码": symbol, - "证监会行业代码": df[df['industry'] == 'zjw']['industry_code'].iloc[0], - "证监会行业名称": df[df['industry'] == 'zjw']['industry_name'].iloc[0], - "聚宽一级行业代码": df[df['industry'] == 'jq_l1']['industry_code'].iloc[0], - "聚宽一级行业名称": df[df['industry'] == 'jq_l1']['industry_name'].iloc[0], - "聚宽二级行业代码": df[df['industry'] == 'jq_l2']['industry_code'].iloc[0], - "聚宽二级行业名称": df[df['industry'] == 'jq_l2']['industry_name'].iloc[0], - "申万一级行业代码": df[df['industry'] == 'sw_l1']['industry_code'].iloc[0], - "申万一级行业名称": df[df['industry'] == 'sw_l1']['industry_name'].iloc[0], - "申万二级行业代码": df[df['industry'] == 'sw_l2']['industry_code'].iloc[0], - "申万二级行业名称": df[df['industry'] == 'sw_l2']['industry_name'].iloc[0], - "申万三级行业代码": df[df['industry'] == 'sw_l3']['industry_code'].iloc[0], - "申万三级行业名称": df[df['industry'] == 'sw_l3']['industry_name'].iloc[0], + "证监会行业代码": df[df["industry"] == "zjw"]["industry_code"].iloc[0], + "证监会行业名称": df[df["industry"] == "zjw"]["industry_name"].iloc[0], + "聚宽一级行业代码": df[df["industry"] == "jq_l1"]["industry_code"].iloc[0], + "聚宽一级行业名称": df[df["industry"] == "jq_l1"]["industry_name"].iloc[0], + "聚宽二级行业代码": df[df["industry"] == "jq_l2"]["industry_code"].iloc[0], + "聚宽二级行业名称": df[df["industry"] == "jq_l2"]["industry_name"].iloc[0], + "申万一级行业代码": df[df["industry"] == "sw_l1"]["industry_code"].iloc[0], + "申万一级行业名称": df[df["industry"] == "sw_l1"]["industry_name"].iloc[0], + "申万二级行业代码": df[df["industry"] == "sw_l2"]["industry_code"].iloc[0], + "申万二级行业名称": df[df["industry"] == "sw_l2"]["industry_name"].iloc[0], + "申万三级行业代码": df[df["industry"] == "sw_l3"]["industry_code"].iloc[0], + "申万三级行业名称": df[df["industry"] == "sw_l3"]["industry_name"].iloc[0], } return res @@ -213,18 +214,14 @@ def get_all_securities(code, date=None) -> pd.DataFrame: if isinstance(date, datetime): date = str(date.date()) - data = { - "method": "get_all_securities", - "token": get_token(), - "code": code, - "date": date - } + data = {"method": "get_all_securities", "token": get_token(), "code": code, "date": date} r = requests.post(url, data=json.dumps(data)) return text2df(r.text) -def get_kline(symbol: str, end_date: [datetime, str], freq: str, - start_date: [datetime, str] = None, count=None, fq: bool = True) -> List[RawBar]: +def get_kline( + symbol: str, end_date: [datetime, str], freq: str, start_date: [datetime, str] = None, count=None, fq: bool = True +) -> List[RawBar]: """获取K线数据 https://www.joinquant.com/help/api/help#JQDataHttp:get_priceget_bars-%E8%8E%B7%E5%8F%96%E6%8C%87%E5%AE%9A%E6%97%B6%E9%97%B4%E5%91%A8%E6%9C%9F%E7%9A%84%E8%A1%8C%E6%83%85%E6%95%B0%E6%8D%AE @@ -274,7 +271,7 @@ def get_kline(symbol: str, end_date: [datetime, str], freq: str, data.update({"fq_ref_date": end_date.strftime("%Y-%m-%d")}) r = requests.post(url, data=json.dumps(data)) - rows = [x.split(",") for x in r.text.strip().split('\n')][1:] + rows = [x.split(",") for x in r.text.strip().split("\n")][1:] bars = [] i = -1 @@ -286,12 +283,20 @@ def get_kline(symbol: str, end_date: [datetime, str], freq: str, if int(row[5]) > 0: i += 1 - bars.append(RawBar(symbol=symbol, dt=dt, id=i, freq=freq_map[freq], - open=round(float(row[1]), 2), - close=round(float(row[2]), 2), - high=round(float(row[3]), 2), - low=round(float(row[4]), 2), - vol=int(row[5]), amount=int(float(row[6])))) + bars.append( + RawBar( + symbol=symbol, + dt=dt, + id=i, + freq=freq_map[freq], + open=round(float(row[1]), 4), + close=round(float(row[2]), 4), + high=round(float(row[3]), 4), + low=round(float(row[4]), 4), + vol=int(row[5]), + amount=int(float(row[6])), + ) + ) # amount 单位:元 if start_date: bars = [x for x in bars if x.dt >= start_date] @@ -301,8 +306,9 @@ def get_kline(symbol: str, end_date: [datetime, str], freq: str, return bars -def get_kline_period(symbol: str, start_date: [datetime, str], - end_date: [datetime, str], freq: str, fq=True) -> List[RawBar]: +def get_kline_period( + symbol: str, start_date: [datetime, str], end_date: [datetime, str], freq: str, fq=True +) -> List[RawBar]: """获取指定时间段的行情数据 https://www.joinquant.com/help/api/help#JQDataHttp:get_price_periodget_bars_period-%E8%8E%B7%E5%8F%96%E6%8C%87%E5%AE%9A%E6%97%B6%E9%97%B4%E6%AE%B5%E7%9A%84%E8%A1%8C%E6%83%85%E6%95%B0%E6%8D%AE @@ -331,7 +337,7 @@ def get_kline_period(symbol: str, start_date: [datetime, str], data.update({"fq_ref_date": end_date.strftime("%Y-%m-%d")}) r = requests.post(url, data=json.dumps(data)) - rows = [x.split(",") for x in r.text.strip().split('\n')][1:] + rows = [x.split(",") for x in r.text.strip().split("\n")][1:] bars = [] i = -1 for row in rows: @@ -342,12 +348,20 @@ def get_kline_period(symbol: str, start_date: [datetime, str], if int(row[5]) > 0: i += 1 - bars.append(RawBar(symbol=symbol, dt=dt, id=i, freq=freq_map[freq], - open=round(float(row[1]), 2), - close=round(float(row[2]), 2), - high=round(float(row[3]), 2), - low=round(float(row[4]), 2), - vol=int(row[5]), amount=int(float(row[6])))) + bars.append( + RawBar( + symbol=symbol, + dt=dt, + id=i, + freq=freq_map[freq], + open=round(float(row[1]), 4), + close=round(float(row[2]), 4), + high=round(float(row[3]), 4), + low=round(float(row[4]), 4), + vol=int(row[5]), + amount=int(float(row[6])), + ) + ) # amount 单位:元 if start_date: bars = [x for x in bars if x.dt >= start_date] @@ -357,12 +371,7 @@ def get_kline_period(symbol: str, start_date: [datetime, str], return bars -def get_init_bg(symbol: str, - end_dt: [str, datetime], - base_freq: str, - freqs: List[str], - max_count=1000, - fq=True): +def get_init_bg(symbol: str, end_dt: [str, datetime], base_freq: str, freqs: List[str], max_count=1000, fq=True): """获取 symbol 的初始化 bar generator""" if isinstance(end_dt, str): end_dt = pd.to_datetime(end_dt, utc=False) @@ -380,7 +389,8 @@ def get_init_bg(symbol: str, data = [x for x in bars2 if x.dt > last_day] assert len(data) > 0 print( - f"{symbol}: bar generator 最新时间 {bg.bars[base_freq][-1].dt.strftime(dt_fmt)},还有{len(data)}行数据需要update") + f"{symbol}: bar generator 最新时间 {bg.bars[base_freq][-1].dt.strftime(dt_fmt)},还有{len(data)}行数据需要update" + ) return bg, data @@ -411,7 +421,7 @@ def get_fundamental(table: str, symbol: str, date: str, columns: str = "") -> di "columns": columns, "code": symbol, "date": date, - "count": 1 + "count": 1, } r = requests.post(url, data=json.dumps(data)) df = text2df(r.text) @@ -435,15 +445,9 @@ def run_query(table: str, conditions: str, columns=None, count=1): :param count: 数量 :return: """ - data = { - "method": "run_query", - "token": get_token(), - "table": table, - "conditions": conditions, - "count": count - } + data = {"method": "run_query", "token": get_token(), "table": table, "conditions": conditions, "count": count} if columns: - data['columns'] = columns + data["columns"] = columns r = requests.post(url, data=json.dumps(data)) df = text2df(r.text) return df @@ -459,64 +463,80 @@ def get_share_basic(symbol): basic_info = basic_info.iloc[0].to_dict() f10 = OrderedDict() - f10['股票代码'] = basic_info['code'] - f10['股票名称'] = basic_info['short_name'] - f10['行业'] = "{}-{}".format(basic_info['industry_1'], basic_info['industry_2']) - f10['地域'] = "{}{}".format(basic_info['province'], basic_info['city']) - f10['主营'] = basic_info['main_business'] - f10['同花顺F10'] = "http://basic.10jqka.com.cn/{}".format(basic_info['code'][:6]) + f10["股票代码"] = basic_info["code"] + f10["股票名称"] = basic_info["short_name"] + f10["行业"] = "{}-{}".format(basic_info["industry_1"], basic_info["industry_2"]) + f10["地域"] = "{}{}".format(basic_info["province"], basic_info["city"]) + f10["主营"] = basic_info["main_business"] + f10["同花顺F10"] = "http://basic.10jqka.com.cn/{}".format(basic_info["code"][:6]) # 市盈率,总市值,流通市值,流通比 # ------------------------------------------------------------------------------------------------------------------ last_date = datetime.now() - timedelta(days=1) res = get_fundamental(table="valuation", symbol=symbol, date=last_date.strftime("%Y-%m-%d")) - f10['总市值(亿)'] = float(res['market_cap']) - f10['流通市值(亿)'] = float(res['circulating_market_cap']) - f10['流通比(%)'] = round(float(res['circulating_market_cap']) / float(res['market_cap']) * 100, 2) - f10['PE_TTM'] = float(res['pe_ratio']) - f10['PE'] = float(res['pe_ratio_lyr']) - f10['PB'] = float(res['pb_ratio']) + f10["总市值(亿)"] = float(res["market_cap"]) + f10["流通市值(亿)"] = float(res["circulating_market_cap"]) + f10["流通比(%)"] = round(float(res["circulating_market_cap"]) / float(res["market_cap"]) * 100, 2) + f10["PE_TTM"] = float(res["pe_ratio"]) + f10["PE"] = float(res["pe_ratio_lyr"]) + f10["PB"] = float(res["pb_ratio"]) # 净资产收益率 # ------------------------------------------------------------------------------------------------------------------ - for year in ['2017', '2018', '2019', '2020']: + for year in ["2017", "2018", "2019", "2020"]: indicator = get_fundamental(table="indicator", symbol=symbol, date=year) - f10['{}EPS'.format(year)] = float(indicator.get('eps', 0)) if indicator.get('eps', 0) else 0 - f10['{}ROA'.format(year)] = float(indicator.get('roa', 0)) if indicator.get('roa', 0) else 0 - f10['{}ROE'.format(year)] = float(indicator.get('roe', 0)) if indicator.get('roe', 0) else 0 - f10['{}销售净利率(%)'.format(year)] = float(indicator.get('net_profit_margin', 0)) if indicator.get( - 'net_profit_margin', 0) else 0 - f10['{}销售毛利率(%)'.format(year)] = float(indicator.get('gross_profit_margin', 0)) if indicator.get( - 'gross_profit_margin', 0) else 0 - f10['{}营业收入同比增长率(%)'.format(year)] = float( - indicator.get('inc_revenue_year_on_year', 0)) if indicator.get('inc_revenue_year_on_year', 0) else 0 - f10['{}营业收入环比增长率(%)'.format(year)] = float(indicator.get('inc_revenue_annual', 0)) if indicator.get( - 'inc_revenue_annual', 0) else 0 - f10['{}营业利润同比增长率(%)'.format(year)] = float( - indicator.get('inc_operation_profit_year_on_year', 0)) if indicator.get('inc_operation_profit_year_on_year', - 0) else 0 - f10['{}经营活动产生的现金流量净额/营业收入(%)'.format(year)] = float( - indicator.get('ocf_to_revenue', 0)) if indicator.get('ocf_to_revenue', 0) else 0 + f10["{}EPS".format(year)] = float(indicator.get("eps", 0)) if indicator.get("eps", 0) else 0 + f10["{}ROA".format(year)] = float(indicator.get("roa", 0)) if indicator.get("roa", 0) else 0 + f10["{}ROE".format(year)] = float(indicator.get("roe", 0)) if indicator.get("roe", 0) else 0 + f10["{}销售净利率(%)".format(year)] = ( + float(indicator.get("net_profit_margin", 0)) if indicator.get("net_profit_margin", 0) else 0 + ) + f10["{}销售毛利率(%)".format(year)] = ( + float(indicator.get("gross_profit_margin", 0)) if indicator.get("gross_profit_margin", 0) else 0 + ) + f10["{}营业收入同比增长率(%)".format(year)] = ( + float(indicator.get("inc_revenue_year_on_year", 0)) if indicator.get("inc_revenue_year_on_year", 0) else 0 + ) + f10["{}营业收入环比增长率(%)".format(year)] = ( + float(indicator.get("inc_revenue_annual", 0)) if indicator.get("inc_revenue_annual", 0) else 0 + ) + f10["{}营业利润同比增长率(%)".format(year)] = ( + float(indicator.get("inc_operation_profit_year_on_year", 0)) + if indicator.get("inc_operation_profit_year_on_year", 0) + else 0 + ) + f10["{}经营活动产生的现金流量净额/营业收入(%)".format(year)] = ( + float(indicator.get("ocf_to_revenue", 0)) if indicator.get("ocf_to_revenue", 0) else 0 + ) # 组合成可以用来推送的文本 - msg = "{}({})@{}\n".format(f10['股票代码'], f10['股票名称'], f10['地域']) + msg = "{}({})@{}\n".format(f10["股票代码"], f10["股票名称"], f10["地域"]) msg += "\n{}\n".format("*" * 30) - for k in ['行业', '主营', 'PE_TTM', 'PE', 'PB', '总市值(亿)', '流通市值(亿)', '流通比(%)', '同花顺F10']: + for k in ["行业", "主营", "PE_TTM", "PE", "PB", "总市值(亿)", "流通市值(亿)", "流通比(%)", "同花顺F10"]: msg += "{}:{}\n".format(k, f10[k]) msg += "\n{}\n".format("*" * 30) - cols = ['EPS', 'ROA', 'ROE', '销售净利率(%)', '销售毛利率(%)', '营业收入同比增长率(%)', '营业利润同比增长率(%)', - '经营活动产生的现金流量净额/营业收入(%)'] + cols = [ + "EPS", + "ROA", + "ROE", + "销售净利率(%)", + "销售毛利率(%)", + "营业收入同比增长率(%)", + "营业利润同比增长率(%)", + "经营活动产生的现金流量净额/营业收入(%)", + ] msg += "2017~2020 财务变化\n\n" for k in cols: msg += k + ":{} | {} | {} | {}\n".format( - *[f10['{}{}'.format(year, k)] for year in ['2017', '2018', '2019', '2020']]) + *[f10["{}{}".format(year, k)] for year in ["2017", "2018", "2019", "2020"]] + ) - f10['msg'] = msg + f10["msg"] = msg return f10 -def get_symbols(name='ALL', **kwargs): +def get_symbols(name="ALL", **kwargs): """获取指定分组下的所有标的代码 :param name: 分组名称,可选值: @@ -526,17 +546,19 @@ def get_symbols(name='ALL', **kwargs): :param kwargs: 其他参数 :return: """ - if name.upper() == 'ALL': - codes = get_all_securities('stock', date=None)['code'].unique().tolist() + \ - get_all_securities('index', date=None)['code'].unique().tolist() + \ - get_all_securities('futures', date=None)['code'].unique().tolist() + \ - get_all_securities('etf', date=None)['code'].unique().tolist() + if name.upper() == "ALL": + codes = ( + get_all_securities("stock", date=None)["code"].unique().tolist() + + get_all_securities("index", date=None)["code"].unique().tolist() + + get_all_securities("futures", date=None)["code"].unique().tolist() + + get_all_securities("etf", date=None)["code"].unique().tolist() + ) else: - codes = get_all_securities(name, date=None)['code'].unique().tolist() + codes = get_all_securities(name, date=None)["code"].unique().tolist() return codes -def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs): +def get_raw_bars(symbol, freq, sdt, edt, fq="前复权", **kwargs): """获取 CZSC 库定义的标准 RawBar 对象列表 :param symbol: 标的代码 @@ -548,9 +570,17 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs): :param kwargs: :return: """ - kwargs['fq'] = fq + kwargs["fq"] = fq freq = str(freq) fq = True if fq == "前复权" else False - _map = {"1分钟": "1min", "5分钟": "5min", "15分钟": "15min", "30分钟": "30min", - "60分钟": "60min", "日线": "D", "周线": "W", "月线": "M"} + _map = { + "1分钟": "1min", + "5分钟": "5min", + "15分钟": "15min", + "30分钟": "30min", + "60分钟": "60min", + "日线": "D", + "周线": "W", + "月线": "M", + } return get_kline(symbol, freq=_map[freq], start_date=sdt, end_date=edt, fq=fq) diff --git a/czsc/eda.py b/czsc/eda.py index 016117fc3..86060c06c 100644 --- a/czsc/eda.py +++ b/czsc/eda.py @@ -83,25 +83,27 @@ def remove_beta_effects(df, **kwargs): return dfr -def cross_sectional_strategy(df, factor, **kwargs): +def cross_sectional_strategy(df, factor, weight="weight", long=0.3, short=0.3, **kwargs): """根据截面因子值构建多空组合 :param df: pd.DataFrame, 包含因子列的数据, 必须包含 dt, symbol, factor 列 :param factor: str, 因子列名称 + :param weight: str, 权重列名称,默认为 weight + :param long: float, 多头持仓比例/数量,默认为 0.3, 取值范围为 [0, n_symbols], 0~1 表示比例,大于等于1表示数量 + :param short: float, 空头持仓比例/数量,默认为 0.3, 取值范围为 [0, n_symbols], 0~1 表示比例,大于等于1表示数量 :param kwargs: - factor_direction: str, 因子方向,positive 或 negative - - long_num: int, 多头持仓数量 - - short_num: int, 空头持仓数量 - logger: loguru.logger, 日志记录器 + - norm: bool, 是否对 weight 进行截面持仓标准化,默认为 True :return: pd.DataFrame, 包含 weight 列的数据 """ factor_direction = kwargs.get("factor_direction", "positive") - long_num = kwargs.get("long_num", 5) - short_num = kwargs.get("short_num", 5) logger = kwargs.get("logger", loguru.logger) + norm = kwargs.get("norm", True) + assert long >= 0 and short >= 0, "long 和 short 参数必须大于等于0" assert factor in df.columns, f"{factor} 不在 df 中" assert factor_direction in ["positive", "negative"], f"factor_direction 参数错误" @@ -109,20 +111,33 @@ def cross_sectional_strategy(df, factor, **kwargs): if factor_direction == "negative": df[factor] = -df[factor] - df['weight'] = 0 + df[weight] = 0.0 + rows = [] + for dt, dfg in df.groupby("dt"): - if len(dfg) < long_num + short_num: - logger.warning(f"{dt} 截面数据量过小,跳过;仅有 {len(dfg)} 条数据,需要 {long_num + short_num} 条数据") + long_num = int(long) if long >= 1 else int(len(dfg) * long) + short_num = int(short) if short >= 1 else int(len(dfg) * short) + + if long_num == 0 and short_num == 0: + logger.warning(f"{dt} 多空目前持仓数量都为0; long: {long}, short: {short}") + rows.append(dfg) continue - dfa = dfg.sort_values(factor, ascending=False).head(long_num) - dfb = dfg.sort_values(factor, ascending=True).head(short_num) - if long_num > 0: - df.loc[dfa.index, "weight"] = 1 / long_num - if short_num > 0: - df.loc[dfb.index, "weight"] = -1 / short_num + long_symbols = dfg.sort_values(factor, ascending=False).head(long_num)['symbol'].tolist() + short_symbols = dfg.sort_values(factor, ascending=True).head(short_num)['symbol'].tolist() - return df + union_symbols = set(long_symbols) & set(short_symbols) + if union_symbols: + logger.warning(f"{dt} 存在同时在多头和空头的品种:{union_symbols}") + long_symbols = list(set(long_symbols) - union_symbols) + short_symbols = list(set(short_symbols) - union_symbols) + + dfg.loc[dfg['symbol'].isin(long_symbols), weight] = 1 / long_num if norm else 1.0 + dfg.loc[dfg['symbol'].isin(short_symbols), weight] = -1 / short_num if norm else -1.0 + rows.append(dfg) + + dfx = pd.concat(rows, ignore_index=True) + return dfx def judge_factor_direction(df: pd.DataFrame, factor, target='n1b', by='symbol', **kwargs): @@ -266,12 +281,15 @@ def cal_symbols_factor(dfk: pd.DataFrame, factor_function: Callable, **kwargs): - logger: loguru.logger, 默认为 loguru.logger - factor_params: dict, 因子计算参数 - min_klines: int, 最小K线数据量,默认为 300 + - price_type: str, 交易价格类型,默认为 close,可选值为 close 或 next_open :return: dff, pd.DataFrame, 计算后的因子数据 """ logger = kwargs.get("logger", loguru.logger) min_klines = kwargs.get("min_klines", 300) factor_params = kwargs.get("factor_params", {}) + price_type = kwargs.get("price_type", "close") + symbols = dfk["symbol"].unique().tolist() factor_name = factor_function.__name__ @@ -285,7 +303,13 @@ def cal_symbols_factor(dfk: pd.DataFrame, factor_function: Callable, **kwargs): continue df = factor_function(df, **factor_params) - df["price"] = df["close"] + if price_type == 'next_open': + df["price"] = df["open"].shift(-1).fillna(df["close"]) + elif price_type == 'close': + df["price"] = df["close"] + else: + raise ValueError("price_type 参数错误, 可选值为 close 或 next_open") + df["n1b"] = (df["price"].shift(-1) / df["price"] - 1).fillna(0) factor = [x for x in df.columns if x.startswith("F#")][0] @@ -478,6 +502,7 @@ def tsf_type(df: pd.DataFrame, factor, n=5, **kwargs): :return: str, 返回分层收益排序(从大到小)结果,例如:第01层->第02层->第03层->第04层->第05层 """ + logger = kwargs.get("logger", loguru.logger) window = kwargs.get("window", 600) min_periods = kwargs.get("min_periods", 300) target = kwargs.get("target", "n1b") @@ -491,9 +516,12 @@ def tsf_type(df: pd.DataFrame, factor, n=5, **kwargs): rows = [] for symbol, dfg in df.groupby("symbol"): - dfg = dfg.copy().reset_index(drop=True) - dfg = rolling_layers(dfg, factor, n=n, window=window, min_periods=min_periods) - rows.append(dfg) + try: + dfg = dfg.copy().reset_index(drop=True) + dfg = rolling_layers(dfg, factor, n=n, window=window, min_periods=min_periods) + rows.append(dfg) + except Exception as e: + logger.warning(f"{symbol} 计算分层失败: {e}") df = pd.concat(rows, ignore_index=True) layers = [x for x in df[f"{factor}分层"].unique() if x != "第00层" and str(x).endswith("层")] @@ -507,3 +535,41 @@ def tsf_type(df: pd.DataFrame, factor, n=5, **kwargs): sorted_layers = sorted(layer_returns.items(), key=lambda x: x[1], reverse=True) return "->".join([f"{x[0]}" for x in sorted_layers]) + + +def limit_leverage(df: pd.DataFrame, leverage: float = 1.0, **kwargs): + """限制杠杆比例 + + 原理描述: + + 1. 计算滚动窗口内权重的绝对均值 abs_mean,初始窗口内权重的绝对均值设为 leverage + 2. 用 leverage 除以 abs_mean,得到调整比例 adjust_ratio + 3. 将原始权重乘以 adjust_ratio,再限制在 -leverage 和 leverage 之间 + + :param df: DataFrame, columns=['dt', 'symbol', 'weight'] + :param leverage: float, 杠杆倍数 + :param kwargs: + + - copy: bool, 是否复制 DataFrame + - window: int, 滚动窗口,默认为 300 + - min_periods: int, 最小样本数,小于该值的窗口不计算均值,默认为 50 + - weight: str, 权重列名,默认为 'weight' + + :return: DataFrame + """ + window = kwargs.get("window", 300) + min_periods = kwargs.get("min_periods", 50) + weight = kwargs.get("weight", "weight") + + assert weight in df.columns, f"数据中不包含权重列 {weight}" + assert df['symbol'].nunique() == 1, "数据中包含多个品种,必须单品种" + assert df['dt'].is_monotonic_increasing, "数据未按日期排序,必须升序排列" + assert df['dt'].is_unique, "数据中存在重复dt,必须唯一" + + if kwargs.get("copy", False): + df = df.copy() + + abs_mean = df[weight].abs().rolling(window=window, min_periods=min_periods).mean().fillna(leverage) + adjust_ratio = leverage / abs_mean + df[weight] = (df[weight] * adjust_ratio).clip(-leverage, leverage) + return df diff --git a/czsc/traders/weight_backtest.py b/czsc/traders/weight_backtest.py index 2052507ee..94598476e 100644 --- a/czsc/traders/weight_backtest.py +++ b/czsc/traders/weight_backtest.py @@ -231,12 +231,18 @@ class WeightBacktest: 更新日志: - - V240627: 增加dailys属性,品种每日的交易信息 + #### 20241125 + + 1. 新增 yearly_days 参数,用于指定每年的交易日天数,默认为 252。 + + #### 20241205 + + 1. 新增 weight_type 参数,用于指定输入的持仓权重类别,ts 表示 time series,时序策略;。 """ - version = "V240627" + version = "20241205" - def __init__(self, dfw, digits=2, **kwargs) -> None: + def __init__(self, dfw, digits=2, weight_type="ts", **kwargs) -> None: """持仓权重回测 初始化函数逻辑: @@ -269,6 +275,11 @@ def __init__(self, dfw, digits=2, **kwargs) -> None: =================== ======== ======== ======= :param digits: int, 权重列保留小数位数 + :param weight_type: str, default 'ts',持仓权重类别,可选值包括:'ts'、'cs',分别表示时序策略、截面策略 + + ts 表示 time series,时序策略; + cs 表示 cross section,截面策略。 + :param kwargs: - fee_rate: float,单边交易成本,包括手续费与冲击成本, 默认为 0.0002 @@ -280,12 +291,15 @@ def __init__(self, dfw, digits=2, **kwargs) -> None: self.dfw["dt"] = pd.to_datetime(self.dfw["dt"]) if self.dfw.isnull().sum().sum() > 0: raise ValueError("dfw 中存在空值, 请先处理") + self.digits = digits + self.weight_type = weight_type.lower() self.fee_rate = kwargs.get("fee_rate", 0.0002) self.dfw["weight"] = self.dfw["weight"].astype("float").round(digits) self.symbols = list(self.dfw["symbol"].unique().tolist()) self._dailys = None - self.results = self.backtest(n_jobs=kwargs.get("n_jobs", 1)) + self.yearly_days = kwargs.pop("yearly_days", 252) + self.results = self.backtest(n_jobs=kwargs.pop("n_jobs", 1)) @property def stats(self): @@ -332,7 +346,7 @@ def alpha(self) -> pd.DataFrame: def alpha_stats(self): """策略超额收益统计""" df = self.alpha.copy() - stats = czsc.daily_performance(df["超额"].to_list()) + stats = czsc.daily_performance(df["超额"].to_list(), yearly_days=self.yearly_days) stats["开始日期"] = df["date"].min().strftime("%Y-%m-%d") stats["结束日期"] = df["date"].max().strftime("%Y-%m-%d") return stats @@ -341,7 +355,57 @@ def alpha_stats(self): def bench_stats(self): """基准收益统计""" df = self.alpha.copy() - stats = czsc.daily_performance(df["基准"].to_list()) + stats = czsc.daily_performance(df["基准"].to_list(), yearly_days=self.yearly_days) + stats["开始日期"] = df["date"].min().strftime("%Y-%m-%d") + stats["结束日期"] = df["date"].max().strftime("%Y-%m-%d") + return stats + + @property + def long_daily_return(self): + """多头每日收益率""" + df = self.dailys.copy() + dfv = pd.pivot_table(df, index="date", columns="symbol", values="long_return").fillna(0) + + if self.weight_type == "ts": + dfv["total"] = dfv.mean(axis=1) + elif self.weight_type == "cs": + dfv["total"] = dfv.sum(axis=1) + else: + raise ValueError(f"weight_type {self.weight_type} not supported") + + dfv = dfv.reset_index(drop=False) + return dfv + + @property + def short_daily_return(self): + """空头每日收益率""" + df = self.dailys.copy() + dfv = pd.pivot_table(df, index="date", columns="symbol", values="short_return").fillna(0) + + if self.weight_type == "ts": + dfv["total"] = dfv.mean(axis=1) + elif self.weight_type == "cs": + dfv["total"] = dfv.sum(axis=1) + else: + raise ValueError(f"weight_type {self.weight_type} not supported") + + dfv = dfv.reset_index(drop=False) + return dfv + + @property + def long_stats(self): + """多头收益统计""" + df = self.long_daily_return.copy() + stats = czsc.daily_performance(df["total"].to_list(), yearly_days=self.yearly_days) + stats["开始日期"] = df["date"].min().strftime("%Y-%m-%d") + stats["结束日期"] = df["date"].max().strftime("%Y-%m-%d") + return stats + + @property + def short_stats(self): + """空头收益统计""" + df = self.short_daily_return.copy() + stats = czsc.daily_performance(df["total"].to_list(), yearly_days=self.yearly_days) stats["开始日期"] = df["date"].min().strftime("%Y-%m-%d") stats["结束日期"] = df["date"].max().strftime("%Y-%m-%d") return stats @@ -369,6 +433,8 @@ def get_symbol_daily(self, symbol): symbol 合约代码, n1b 品种每日收益率, edge 策略每日收益率, + long_edge 多头每日收益率, + short_edge 空头每日收益率, return 策略每日收益率减去交易成本后的真实收益, cost 交易成本 turnover 当日的单边换手率 @@ -390,15 +456,64 @@ def get_symbol_daily(self, symbol): dfs["edge"] = dfs["weight"] * dfs["n1b"] dfs["turnover"] = abs(dfs["weight"].shift(1) - dfs["weight"]) dfs["cost"] = dfs["turnover"] * self.fee_rate - dfs["edge_post_fee"] = dfs["edge"] - dfs["cost"] + dfs["return"] = dfs["edge"] - dfs["cost"] + + # 分别计算多头和空头的收益 + dfs["long_weight"] = np.where(dfs["weight"] > 0, dfs["weight"], 0) + dfs["short_weight"] = np.where(dfs["weight"] < 0, dfs["weight"], 0) + dfs["long_edge"] = dfs["long_weight"] * dfs["n1b"] + dfs["short_edge"] = dfs["short_weight"] * dfs["n1b"] + + dfs["long_turnover"] = abs(dfs["long_weight"].shift(1) - dfs["long_weight"]) + dfs["short_turnover"] = abs(dfs["short_weight"].shift(1) - dfs["short_weight"]) + dfs["long_cost"] = dfs["long_turnover"] * self.fee_rate + dfs["short_cost"] = dfs["short_turnover"] * self.fee_rate + + dfs["long_return"] = dfs["long_edge"] - dfs["long_cost"] + dfs["short_return"] = dfs["short_edge"] - dfs["short_cost"] + daily = ( dfs.groupby(dfs["dt"].dt.date) - .agg({"edge": "sum", "edge_post_fee": "sum", "cost": "sum", "n1b": "sum", "turnover": "sum"}) + .agg( + { + "edge": "sum", + "return": "sum", + "cost": "sum", + "n1b": "sum", + "turnover": "sum", + "long_edge": "sum", + "short_edge": "sum", + "long_cost": "sum", + "short_cost": "sum", + "long_turnover": "sum", + "short_turnover": "sum", + "long_return": "sum", + "short_return": "sum", + } + ) .reset_index() ) daily["symbol"] = symbol - daily.rename(columns={"edge_post_fee": "return", "dt": "date"}, inplace=True) - daily = daily[["date", "symbol", "n1b", "edge", "return", "cost", "turnover"]].copy() + daily.rename(columns={"dt": "date"}, inplace=True) + cols = [ + "date", + "symbol", + "edge", + "return", + "cost", + "n1b", + "turnover", + "long_edge", + "long_cost", + "long_return", + "long_turnover", + "short_edge", + "short_cost", + "short_return", + "short_turnover", + ] + + daily = daily[cols].copy() return daily def get_symbol_pairs(self, symbol): @@ -553,13 +668,22 @@ def backtest(self, n_jobs=1): dret = pd.concat([v["daily"] for k, v in res.items() if k in symbols], ignore_index=True) dret = pd.pivot_table(dret, index="date", columns="symbol", values="return").fillna(0) - dret["total"] = dret[list(res.keys())].mean(axis=1) + + if self.weight_type == "ts": + # 时序策略每日收益为各品种收益的等权 + dret["total"] = dret[list(res.keys())].mean(axis=1) + elif self.weight_type == "cs": + # 截面策略每日收益为各品种收益的和 + dret["total"] = dret[list(res.keys())].sum(axis=1) + else: + raise ValueError(f"weight_type {self.weight_type} not supported, should be 'ts' or 'cs'") + # dret 中的 date 对应的是上一日;date 后移一位,对应的才是当日收益 dret = dret.round(4).reset_index() res["品种等权日收益"] = dret stats = {"开始日期": dret["date"].min().strftime("%Y%m%d"), "结束日期": dret["date"].max().strftime("%Y%m%d")} - stats.update(daily_performance(dret["total"])) + stats.update(daily_performance(dret["total"], yearly_days=self.yearly_days)) dfp = pd.concat([v["pairs"] for k, v in res.items() if k in symbols], ignore_index=True) pairs_stats = evaluate_pairs(dfp) pairs_stats = {k: v for k, v in pairs_stats.items() if k in ["单笔收益", "持仓K线数", "交易胜率", "持仓天数"]} diff --git a/czsc/utils/calendar.py b/czsc/utils/calendar.py index 04a16b068..05a064704 100644 --- a/czsc/utils/calendar.py +++ b/czsc/utils/calendar.py @@ -15,32 +15,33 @@ def prepare_chain_calendar(): import tushare as ts + pro = ts.pro_api() - df = pro.trade_cal(exchange='', start_date='20100101', end_date='20301231') - df['cal_date'] = pd.to_datetime(df['cal_date']) - df = df.sort_values('cal_date').reset_index(drop=True)[['cal_date', 'is_open']] + df = pro.trade_cal(exchange="", start_date="20100101", end_date="20301231") + df["cal_date"] = pd.to_datetime(df["cal_date"]) + df = df.sort_values("cal_date").reset_index(drop=True)[["cal_date", "is_open"]] df.to_feather(Path(__file__).parent / "china_calendar.feather") def is_trading_date(date=datetime.now()): """判断是否是交易日""" date = pd.to_datetime(pd.to_datetime(date).date()) - is_open = calendar[calendar['cal_date'] == date].iloc[0]['is_open'] + is_open = calendar[calendar["cal_date"] == date].iloc[0]["is_open"] return is_open == 1 def next_trading_date(date=datetime.now(), n=1): """获取未来第N个交易日""" date = pd.to_datetime(pd.to_datetime(date).date()) - df = calendar[calendar['cal_date'] > date] - return df[df['is_open'] == 1].iloc[n - 1]['cal_date'] + df = calendar[calendar["cal_date"] > date] + return df[df["is_open"] == 1].iloc[n - 1]["cal_date"] def prev_trading_date(date=datetime.now(), n=1): """获取过去第N个交易日""" date = pd.to_datetime(pd.to_datetime(date).date()) - df = calendar[calendar['cal_date'] < date] - return df[df['is_open'] == 1].iloc[-n]['cal_date'] + df = calendar[calendar["cal_date"] < date] + return df[df["is_open"] == 1].iloc[-n]["cal_date"] def get_trading_dates(sdt, edt=datetime.now()): @@ -48,5 +49,5 @@ def get_trading_dates(sdt, edt=datetime.now()): sdt = pd.to_datetime(sdt).date() edt = pd.to_datetime(edt).date() sdt, edt = pd.to_datetime(sdt), pd.to_datetime(edt) - df = calendar[(calendar['cal_date'] >= sdt) & (calendar['cal_date'] <= edt)] - return df[df['is_open'] == 1]['cal_date'].tolist() + df = calendar[(calendar["cal_date"] >= sdt) & (calendar["cal_date"] <= edt)] + return df[df["is_open"] == 1]["cal_date"].tolist() diff --git a/czsc/utils/china_calendar.feather b/czsc/utils/china_calendar.feather index 29c7c57cfc5e1f881376d1c939ecf837ccf3542b..d2ca260b3c0fbeb363c427d2fec7227af9919e71 100644 GIT binary patch delta 3019 zcmaLZc{G*#9>?*=yi2AsWeyoLCPLUE%9Kztm7$Vks0_)lW$KVhb|uP?j3JeX5RoD! zoJf-=%~_8ONsXB~V2D`$zOY_pc|4`uq8$^xr~JtoZm) z_2W^_s|48N&M@e8*&91)R5MNE!(w4kigIp_*-4}5MH?1f zgMJ$><`99JhpcjfGC6wkafVNA|AJ9V>4%9Z#@M=}N1+d_pLKK+6}YT!GFO+@PePR=SSsp_zBy zX5+lu9Q6^S-Uog|83xv*$iifB-vWaHuzXIJ!V>5{4WCeGOY>AKRKOv1p)Az%m5dj( zGNif^=QoGdZ;ZAXgEn#)&noRZ10Aj7`T#YsKD$yLn*2=j5Og4*tq-a==OVup8kRP; z7s_Z`5`PmcSS7-*0QNC2-vYkUN3XzP@$H+%Xg}>|rHC;%`S)q)m>Bad)TmrhR|%SG z)Q|vu+GYPIRK;^-i83_eOGGfVJ&v~tDk6BEM+Lg0P|qD&t$XhR4tu*NV+O0E%Qs6^ ztoa|3Pl3NgN{vA2JXOn=L67BF#z6-)o^?ajZU#%ML8BZv0->F+G8>>`q3Yb~P(RLA z7ievUa|sTY^2hHOH(ifiqk*x|TId9H@>%IH)GW|gOH-6VQR#EN4Dib|-!8DG>`w_T zyv(Rd^M`hsFTDrVYI>dB9bzd(O)rTgA7L(2&a;~a|%AM|J zGl1?@-tGXsd3}at&Tn({Gyd}CtBo|HO4zp z{AZ!UyoIFm?Ya8;X3(peU8Hli7VmzjgX5IEIrQA?!~3A)p)xH{6Rv9hHPEA{*KLJ9 zS7r>7&X?8i{fRZwhRfOl>)q#PGNFruG~YpYQf<1H&}+wC5}}-OW2EzqSL0;YLJQW2 z?16smFC(3=-bv?O2R%OXkaRwj;#Y>FA@Q4J9#fIN)(Yc&W5o=pM0foNjX^d0d)ez) zgRf7#CY{eqi0Hw~n}v8~Y@o$O`K0sjdU}n}H65MYw$KxQ1BbZgrHJ02aJDG#_3Sm^j^~W9ywnPM`-iaS<*S5McO^+mPhj9 zPSA>-b)@s3Gq$x*>%+sG8=+Ye;TxgvE4Ud2xGPQaNau&Tbxb#5?f2>+oiF|BF$fJw zm{{fvZ528c30+t$ei!Ps@{;Ig=%o&GU+A3cW74_Z#{hO0Xm&KKBlIJ0#yK3rJk@cG zDO%0Ot{5M;I30m1IlV{bjOOr>Xcaf`?XZOrU?EyD>3mzJp|Cr&N~N20&f2i83cA5| zhQ$M#H+aMz`Z-887sr_88tMEps3Aozr3>FXA}!@&CeDlviNA9pg7G{VWF9 zy!AfmoayAPw*x#p@||?96@K_3G?peK=L>z1d6jf7rMj*e8qn~ZcPF&LcCQDN`vqq? zj%)Cl9~etnG}rlIoKC+*I=7T}c>_IF{YhsRbi^Vq9;!1S+zVyI`<2P~gL{9N1c8^N zJ|vw7OZqL?4Q;yki*(LQPcOys=uw!)SngeK5rA>_>-v*WtHjr&^Gx9g?Lg?eVxEIg zJ^g&rdD2~7sUT>dTPMj}{?pca=)RcAdBM;YK1T9pu)sOVVjNHHE2Q(vRtxhzSbsY` zC7s*833>@V8_uq|7dlGIjDZ?tsojSrtG0;mgAO!o+67g#8zr5Gyz zl_YbzyLmhG7~|7`a3pvzCSW^Qji2=@IO^Of)+lJFw(16`*sUhgx!*>oF^siu-mQwp z$hANEFm%(rg-@V`Sw%`Q&`DLpaHv`11JZfA-8Mn;73bxPX_C3-o+Fo`3|3hx7TU$E zVc3Bsms)4z?5=r1IAYlCOCb(Am3o_WZXxB?3O!l*Ss)(z znx5bZUEV8Ni4*5jK|1f5G_^^y;cYLH{{U65wRmh5E?6^Jl-OF22z`0Obj=FNO(=6Jps8)Z*1jT)`V7w?qDtpD@f RJaztBHi|NJa9HoG^)H5i-~s>u delta 282 zcmeC0#k{DIdBP566LZVW{ilvFDjMhkp;E1mf>J?ZUP@wdd|7Hyab|v=l9hszp`MAJ z$>ar0JD5CnPL5($V@emD+{CPxxNJ597_5NO5FP^qj6Y8nCc?nL1Z1X*BKRN{I^SY7 zM4m;-S0cfosekrnL6)Z3Y7chK4LvO?!N9 df['open'], self.color_red, self.color_green) - self.add_bar_indicator(df['dt'], df['vol'], color=df['vol_color'], name="成交量", row=row, show_legend=False) + df["vol_color"] = np.where(df["close"] > df["open"], self.color_red, self.color_green) + self.add_bar_indicator(df["dt"], df["vol"], color=df["vol_color"], name="成交量", row=row, show_legend=False) def add_sma(self, kline: pd.DataFrame, row=1, ma_seq=(5, 10, 20), visible=False, **kwargs): """绘制均线图 @@ -147,10 +177,17 @@ def add_sma(self, kline: pd.DataFrame, row=1, ma_seq=(5, 10, 20), visible=False, - show_legend: 是否显示图例,默认值为 True """ df = kline.copy() - line_width = kwargs.get('line_width', 0.6) + line_width = kwargs.get("line_width", 0.6) for ma in ma_seq: - self.add_scatter_indicator(df['dt'], df['close'].rolling(ma).mean(), name=f"MA{ma}", - row=row, line_width=line_width, visible=visible, show_legend=True) + self.add_scatter_indicator( + df["dt"], + df["close"].rolling(ma).mean(), + name=f"MA{ma}", + row=row, + line_width=line_width, + visible=visible, + show_legend=True, + ) def add_macd(self, kline: pd.DataFrame, row=3, **kwargs): """绘制MACD图 @@ -178,24 +215,28 @@ def add_macd(self, kline: pd.DataFrame, row=3, **kwargs): - show_legend: 是否显示图例,默认值为 False """ df = kline.copy() - fastperiod = kwargs.get('fastperiod', 12) - slowperiod = kwargs.get('slowperiod', 26) - signalperiod = kwargs.get('signalperiod', 9) - line_width = kwargs.get('line_width', 0.6) + fastperiod = kwargs.get("fastperiod", 12) + slowperiod = kwargs.get("slowperiod", 26) + signalperiod = kwargs.get("signalperiod", 9) + line_width = kwargs.get("line_width", 0.6) - if 'DIFF' in df.columns and 'DEA' in df.columns and 'MACD' in df.columns: - diff, dea, macd = df['DIFF'], df['DEA'], df['MACD'] + if "DIFF" in df.columns and "DEA" in df.columns and "MACD" in df.columns: + diff, dea, macd = df["DIFF"], df["DEA"], df["MACD"] else: diff, dea, macd = MACD(df["close"], fastperiod=fastperiod, slowperiod=slowperiod, signalperiod=signalperiod) macd_colors = np.where(macd > 0, self.color_red, self.color_green) - self.add_scatter_indicator(df['dt'], diff, name="DIFF", row=row, - line_color='white', show_legend=False, line_width=line_width) - self.add_scatter_indicator(df['dt'], dea, name="DEA", row=row, - line_color='yellow', show_legend=False, line_width=line_width) - self.add_bar_indicator(df['dt'], macd, name="MACD", row=row, color=macd_colors, show_legend=False) + self.add_scatter_indicator( + df["dt"], diff, name="DIFF", row=row, line_color="white", show_legend=False, line_width=line_width + ) + self.add_scatter_indicator( + df["dt"], dea, name="DEA", row=row, line_color="yellow", show_legend=False, line_width=line_width + ) + self.add_bar_indicator(df["dt"], macd, name="MACD", row=row, color=macd_colors, show_legend=False) - def add_indicator(self, dt, scatters: list = None, scatter_names: list = None, bar=None, bar_name='', row=4, **kwargs): + def add_indicator( + self, dt, scatters: list = None, scatter_names: list = None, bar=None, bar_name="", row=4, **kwargs + ): """绘制曲线叠加bar型指标 1. 获取自定义参数 line_width,默认值为 0.6。 @@ -216,9 +257,11 @@ def add_indicator(self, dt, scatters: list = None, scatter_names: list = None, b - color: 根据上一步计算的颜色设置 - show_legend: 是否显示图例,默认值为 False """ - line_width = kwargs.get('line_width', 0.6) + line_width = kwargs.get("line_width", 0.6) for i, scatter in enumerate(scatters): - self.add_scatter_indicator(dt, scatter, name=scatter_names[i], row=row, show_legend=False, line_width=line_width) + self.add_scatter_indicator( + dt, scatter, name=scatter_names[i], row=row, show_legend=False, line_width=line_width + ) if bar: bar_colors = np.where(np.array(bar, dtype=np.double) > 0, self.color_red, self.color_green) @@ -254,16 +297,27 @@ def add_marker_indicator(self, x, y, name: str, row: int, text=None, **kwargs): :param kwargs: :return: """ - line_color = kwargs.get('line_color', None) - line_width = kwargs.get('line_width', None) - hover_template = kwargs.get('hover_template', '%{y:.3f}-%{text}') - show_legend = kwargs.get('show_legend', True) - visible = True if kwargs.get('visible', True) else 'legendonly' - color = kwargs.get('color', None) - tag = kwargs.get('tag', None) - scatter = go.Scatter(x=x, y=y, name=name, text=text, line_width=line_width, line_color=line_color, - hovertemplate=hover_template, showlegend=show_legend, visible=visible, opacity=1.0, - mode='markers', marker=dict(size=10, color=color, symbol=tag)) + line_color = kwargs.get("line_color", None) + line_width = kwargs.get("line_width", None) + hover_template = kwargs.get("hover_template", "%{y:.3f}-%{text}") + show_legend = kwargs.get("show_legend", True) + visible = True if kwargs.get("visible", True) else "legendonly" + color = kwargs.get("color", None) + tag = kwargs.get("tag", None) + scatter = go.Scatter( + x=x, + y=y, + name=name, + text=text, + line_width=line_width, + line_color=line_color, + hovertemplate=hover_template, + showlegend=show_legend, + visible=visible, + opacity=1.0, + mode="markers", + marker=dict(size=10, color=color, symbol=tag), + ) self.fig.add_trace(scatter, row=row, col=1) self.fig.update_traces(xaxis="x1") @@ -296,14 +350,24 @@ def add_scatter_indicator(self, x, y, name: str, row: int, text=None, **kwargs): :param kwargs: :return: """ - mode = kwargs.pop('mode', 'text+lines') - hover_template = kwargs.pop('hover_template', '%{y:.3f}') - show_legend = kwargs.pop('show_legend', True) - opacity = kwargs.pop('opacity', 1.0) - visible = True if kwargs.pop('visible', True) else 'legendonly' - - scatter = go.Scatter(x=x, y=y, name=name, text=text, mode=mode, hovertemplate=hover_template, - showlegend=show_legend, visible=visible, opacity=opacity, **kwargs) + mode = kwargs.pop("mode", "text+lines") + hover_template = kwargs.pop("hover_template", "%{y:.3f}") + show_legend = kwargs.pop("show_legend", True) + opacity = kwargs.pop("opacity", 1.0) + visible = True if kwargs.pop("visible", True) else "legendonly" + + scatter = go.Scatter( + x=x, + y=y, + name=name, + text=text, + mode=mode, + hovertemplate=hover_template, + showlegend=show_legend, + visible=visible, + opacity=opacity, + **kwargs, + ) self.fig.add_trace(scatter, row=row, col=1) self.fig.update_traces(xaxis="x1") @@ -337,15 +401,25 @@ def add_bar_indicator(self, x, y, name: str, row: int, color=None, **kwargs): :param kwargs: :return: """ - hover_template = kwargs.pop('hover_template', '%{y:.3f}') - show_legend = kwargs.pop('show_legend', True) - visible = kwargs.pop('visible', True) - base = kwargs.pop('base', True) + hover_template = kwargs.pop("hover_template", "%{y:.3f}") + show_legend = kwargs.pop("show_legend", True) + visible = kwargs.pop("visible", True) + base = kwargs.pop("base", True) if color is None: color = self.color_red - bar = go.Bar(x=x, y=y, marker_line_color=color, marker_color=color, name=name, - showlegend=show_legend, hovertemplate=hover_template, visible=visible, base=base, **kwargs) + bar = go.Bar( + x=x, + y=y, + marker_line_color=color, + marker_color=color, + name=name, + showlegend=show_legend, + hovertemplate=hover_template, + visible=visible, + base=base, + **kwargs, + ) self.fig.add_trace(bar, row=row, col=1) self.fig.update_traces(xaxis="x1") @@ -356,3 +430,96 @@ def open_in_browser(self, file_name: str = None, **kwargs): self.fig.update_layout(**kwargs) self.fig.write_html(file_name) webbrowser.open(file_name) + + +def plot_nx_graph(G: nx.Graph, **kwargs) -> go.Figure: + """使用 Plotly 绘制 nx.Graph 的图形 + + :param G: nx.Graph 对象 + :param kwargs: + :return: go.Figure 对象 + """ + title = kwargs.get("title", "Network graph made with Python") + edge_width = kwargs.get("edge_width", 1.5) + node_marker_size = kwargs.get("node_marker_size", 10) + + # 使用 spring_layout 为图分配位置 + pos = nx.spring_layout(G) + + # 准备绘图数据 + edge_x = [] + edge_y = [] + edge_weights = [] + for edge in G.edges(): + x0, y0 = pos[edge[0]] + x1, y1 = pos[edge[1]] + edge_x.extend([x0, x1, None]) + edge_y.extend([y0, y1, None]) + edge_weights.append(f'{G[edge[0]][edge[1]]["weight"]:.2f}') + + node_x = [] + node_y = [] + node_labels = [] + for node in G.nodes(): + node_x.append(pos[node][0]) + node_y.append(pos[node][1]) + node_labels.append(node) + + # 创建边的散点图 + edge_trace = go.Scatter( + x=edge_x, + y=edge_y, + line=dict(width=edge_width, color="#888"), + hoverinfo="none", + mode="lines", + ) + + # 创建节点的散点图 + node_trace = go.Scatter( + x=node_x, + y=node_y, + mode="markers", + hoverinfo="text", + text=node_labels, # 添加节点标签 + marker=dict( + showscale=False, + color="skyblue", + size=node_marker_size, + line_width=0, + ), + ) + + # 计算边的中点位置并添加注释 + edge_annotations = [] + for edge in G.edges(): + x0, y0 = pos[edge[0]] + x1, y1 = pos[edge[1]] + mid_x = (x0 + x1) / 2 + mid_y = (y0 + y1) / 2 + weight = f'{G[edge[0]][edge[1]]["weight"]:.2f}' + edge_annotations.append( + dict( + x=mid_x, + y=mid_y, + text=weight, + showarrow=False, + font=dict(size=12, color="red" if float(weight) > 0 else "green"), + ) + ) + + # 创建图表 + fig = go.Figure( + data=[edge_trace, node_trace], + layout=go.Layout( + title=f"
{title}", + titlefont_size=16, + showlegend=False, + hovermode="closest", + margin=dict(b=20, l=5, r=5, t=40), + xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + annotations=edge_annotations, # 添加边的注释 + ), + ) + + return fig diff --git a/czsc/utils/st_components.py b/czsc/utils/st_components.py index f5c0c6c5b..52a73cd46 100644 --- a/czsc/utils/st_components.py +++ b/czsc/utils/st_components.py @@ -223,7 +223,9 @@ def show_correlation(df, cols=None, method="pearson", **kwargs): """ cols = cols or df.columns.to_list() dfr = df[cols].corr(method=method) - dfr["average"] = (dfr.sum(axis=1) - 1) / (len(cols) - 1) + dfr = dfr.copy().where(~np.eye(dfr.shape[0], dtype=bool)) + + dfr["average"] = dfr.sum(axis=1) / (len(cols) - 1) dfr = dfr.style.background_gradient(cmap="RdYlGn_r", axis=None).format("{:.4f}", na_rep="MISS") if kwargs.get("use_st_table", False): @@ -486,14 +488,27 @@ def show_weight_backtest(dfw, **kwargs): - n_jobs: int, 并行计算的进程数,默认为 1 """ + from czsc.eda import cal_yearly_days + fee = kwargs.get("fee", 2) digits = kwargs.get("digits", 2) + yearly_days = kwargs.pop("yearly_days", None) + + if not yearly_days: + yearly_days = cal_yearly_days(dts=dfw["dt"].unique()) + if (dfw.isnull().sum().sum() > 0) or (dfw.isna().sum().sum() > 0): st.warning("show_weight_backtest :: 持仓权重数据中存在空值,请检查数据后再试;空值数据如下:") st.dataframe(dfw[dfw.isnull().sum(axis=1) > 0], use_container_width=True) st.stop() - wb = czsc.WeightBacktest(dfw, fee_rate=fee / 10000, digits=digits, n_jobs=kwargs.get("n_jobs", 1)) + wb = czsc.WeightBacktest( + dfw, + fee_rate=fee / 10000, + digits=digits, + n_jobs=kwargs.get("n_jobs", 1), + yearly_days=yearly_days, + ) stat = wb.results["绩效评价"] st.divider() @@ -509,13 +524,14 @@ def show_weight_backtest(dfw, **kwargs): c9.metric("年化波动率", f"{stat['年化波动率']:.2%}") c10.metric("多头占比", f"{stat['多头占比']:.2%}") c11.metric("空头占比", f"{stat['空头占比']:.2%}") + st.caption(f"回测参数:单边手续费 {fee} BP,权重小数位数 {digits} ,年交易天数 {yearly_days}") st.divider() dret = wb.results["品种等权日收益"].copy() dret["dt"] = pd.to_datetime(dret["date"]) dret = dret.set_index("dt").drop(columns=["date"]) # dret.index = pd.to_datetime(dret.index) - show_daily_return(dret, legend_only_cols=dfw["symbol"].unique().tolist(), **kwargs) + show_daily_return(dret, legend_only_cols=dfw["symbol"].unique().tolist(), yearly_days=yearly_days, **kwargs) if kwargs.get("show_drawdowns", False): show_drawdowns(dret, ret_col="total", sub_title="") @@ -532,7 +548,7 @@ def show_weight_backtest(dfw, **kwargs): if kwargs.get("show_splited_daily", False): with st.expander("品种等权日收益分段表现", expanded=False): - show_splited_daily(dret[["total"]].copy(), ret_col="total") + show_splited_daily(dret[["total"]].copy(), ret_col="total", yearly_days=yearly_days) if kwargs.get("show_yearly_stats", False): with st.expander("年度绩效指标", expanded=False): @@ -1541,8 +1557,8 @@ def show_strategies_recent(df, **kwargs): ) # 计算每个时间段的盈利策略数量 - win_count = n_rets.applymap(lambda x: 1 if x > 0 else 0).sum(axis=0) - win_rate = n_rets.applymap(lambda x: 1 if x > 0 else 0).sum(axis=0) / n_rets.shape[0] + win_count = n_rets.map(lambda x: 1 if x > 0 else 0).sum(axis=0) + win_rate = n_rets.map(lambda x: 1 if x > 0 else 0).sum(axis=0) / n_rets.shape[0] dfs = pd.DataFrame({"盈利策略数量": win_count, "盈利策略比例": win_rate}).T dfs = dfs.style.background_gradient(cmap="RdYlGn_r", axis=1).format("{:.4f}", na_rep="-") st.dataframe(dfs, use_container_width=True) @@ -1705,8 +1721,9 @@ def show_classify(df, col1, col2, n=10, method="cut", **kwargs): fig.update_layout(margin=dict(l=0, r=0, t=0, b=0)) st.plotly_chart(fig, use_container_width=True) - dfg = dfg.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["count"]) - dfg = dfg.background_gradient(cmap="RdYlGn_r", axis=None, subset=["mean", "std", "min", "25%", "50%", "75%", "max"]) + dfg = dfg.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["mean"]) + dfg = dfg.background_gradient(cmap="RdYlGn_r", axis=None, subset=["std"]) + dfg = dfg.background_gradient(cmap="RdYlGn_r", axis=None, subset=["min", "25%", "50%", "75%", "max"]) dfg = dfg.format( { "count": "{:.0f}", @@ -1720,3 +1737,128 @@ def show_classify(df, col1, col2, n=10, method="cut", **kwargs): } ) st.dataframe(dfg, use_container_width=True) + + +def show_corr_graph(df, columns=None, threshold=0.2, **kwargs): + """显示相关性矩阵的图形 + + :param df: pd.DataFrame, 需要计算相关性的数据 + :param columns: list, 需要显示的列名 + :param threshold: float, 相关性阈值 + :param kwargs: + + - method: str, 相关性计算方法,默认为 pearson, 可选 pearson, kendall, spearman + """ + import networkx as nx + from czsc.utils.plotly_plot import plot_nx_graph + + method = kwargs.get("method", "pearson") + + if columns is None: + columns = df.columns + + dfr = df[columns].corr(method=method).round(4) + + # 创建一个无向图 + G = nx.Graph() + + # 添加节点,使用列名作为节点名称 + G.add_nodes_from(dfr.columns) + + # 添加边,只有当相关性超过阈值时 + for i, col1 in enumerate(dfr.columns): + for j, col2 in enumerate(dfr.columns): + if i < j: # 避免重复和自环 + if abs(dfr.iat[i, j]) > threshold: + G.add_edge(col1, col2, weight=dfr.iat[i, j]) + + fig = plot_nx_graph(G, node_marker_size=15) + st.plotly_chart(fig, use_container_width=True) + with st.expander("相关性矩阵"): + # 将 dfr 对角线上的 1 填充为 0 + dfr = dfr.copy().where(~np.eye(dfr.shape[0], dtype=bool)) + dfr["average"] = dfr.sum(axis=1) / (len(columns) - 1) + + dfr = dfr.style.background_gradient(cmap="RdYlGn_r", axis=None).format("{:.4f}", na_rep="MISS") + st.dataframe(dfr, use_container_width=True) + + +def show_df_describe(df: pd.DataFrame): + """展示 DataFrame 的描述性统计信息 + + :param df: pd.DataFrame,必须是 df.describe() 的结果 + """ + quantiles = [x for x in df.columns if "%" in x] + df = df.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["mean"]) + df = df.background_gradient(cmap="RdYlGn_r", axis=None, subset=["std"]) + df = df.background_gradient(cmap="RdYlGn_r", axis=None, subset=["max", "min"] + quantiles) + + format_dict = { + "count": "{:.0f}", + "mean": "{:.4f}", + "std": "{:.4f}", + "min": "{:.4f}", + "max": "{:.4f}", + } + for q in quantiles: + format_dict[q] = "{:.4f}" + + df = df.format(format_dict) + st.dataframe(df, use_container_width=True) + + +def show_date_effect(df: pd.DataFrame, ret_col: str, **kwargs): + """分析日收益数据的日历效应 + + :param df: pd.DataFrame, 包含日期的日收益数据 + :param ret_col: str, 收益列名称 + :param kwargs: dict, 其他参数 + + - show_weekday: bool, 是否展示星期效应,默认为 True + - show_month: bool, 是否展示月份效应,默认为 True + - percentiles: list, 分位数,默认为 [0.1, 0.25, 0.5, 0.75, 0.9] + + """ + show_weekday = kwargs.get("show_weekday", True) + show_month = kwargs.get("show_month", True) + percentiles = kwargs.get("percentiles", [0.1, 0.25, 0.5, 0.75, 0.9]) + + assert ret_col in df.columns, f"ret_col 必须是 {df.columns} 中的一个" + assert show_month or show_weekday, "show_month 和 show_weekday 不能同时为 False" + + if not df.index.dtype == "datetime64[ns]": + df["dt"] = pd.to_datetime(df["dt"]) + df.set_index("dt", inplace=True) + + assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换" + df = df.copy() + + st.write( + f"交易区间 {df.index.min().strftime('%Y-%m-%d')} ~ {df.index.max().strftime('%Y-%m-%d')};总天数:{len(df)}" + ) + + if show_weekday: + st.write("##### 星期效应") + df["weekday"] = df.index.weekday + sorted_weekday = sorted(df["weekday"].unique().tolist()) + weekday_map = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"} + df["weekday"] = df["weekday"].map(weekday_map) + sorted_rows = [weekday_map[i] for i in sorted_weekday] + + weekday_effect = df.groupby("weekday")[ret_col].describe(percentiles=percentiles) + weekday_effect = weekday_effect.loc[sorted_rows] + show_df_describe(weekday_effect) + + if show_month: + st.write("##### 月份效应") + df["month"] = df.index.month + month_map = {i: f"{i}月" for i in range(1, 13)} + sorted_month = sorted(df["month"].unique().tolist()) + sorted_rows = [month_map[i] for i in sorted_month] + + df["month"] = df["month"].map(month_map) + month_effect = df.groupby("month")[ret_col].describe(percentiles=percentiles) + month_effect = month_effect.loc[sorted_rows] + show_df_describe(month_effect) + + st.caption("数据说明:count 为样本数量,mean 为均值,std 为标准差,min 为最小值,n% 为分位数,max 为最大值") diff --git a/czsc/utils/stats.py b/czsc/utils/stats.py index 0b3ad1032..cf80c6a02 100644 --- a/czsc/utils/stats.py +++ b/czsc/utils/stats.py @@ -163,7 +163,7 @@ def __min_max(x, min_val, max_val, digits=4): def rolling_daily_performance(df: pd.DataFrame, ret_col, window=252, min_periods=100, **kwargs): - """计算滚动日收益 + """计算滚动日收益的各项指标 :param df: pd.DataFrame, 日收益数据,columns=['dt', ret_col] :param ret_col: str, 收益列名 @@ -173,11 +173,15 @@ def rolling_daily_performance(df: pd.DataFrame, ret_col, window=252, min_periods - yearly_days: int, 252, 一年的交易日数 """ + from czsc.eda import cal_yearly_days + if not df.index.dtype == "datetime64[ns]": df["dt"] = pd.to_datetime(df["dt"]) df.set_index("dt", inplace=True) assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换" + yearly_days = kwargs.get("yearly_days", cal_yearly_days(df.index)) + df = df[[ret_col]].copy().fillna(0) df.sort_index(inplace=True, ascending=True) dts = sorted(df.index.to_list()) @@ -185,7 +189,7 @@ def rolling_daily_performance(df: pd.DataFrame, ret_col, window=252, min_periods for edt in dts[min_periods:]: sdt = edt - pd.Timedelta(days=window) dfg = df[(df.index >= sdt) & (df.index <= edt)].copy() - s = daily_performance(dfg[ret_col].to_list(), yearly_days=kwargs.get("yearly_days", 252)) + s = daily_performance(dfg[ret_col].to_list(), yearly_days=yearly_days) s["sdt"] = sdt s["edt"] = edt res.append(s) diff --git "a/examples/Streamlit\347\273\204\344\273\266\345\272\223\344\275\277\347\224\250\346\241\210\344\276\213/corr_graph.py" "b/examples/Streamlit\347\273\204\344\273\266\345\272\223\344\275\277\347\224\250\346\241\210\344\276\213/corr_graph.py" new file mode 100644 index 000000000..dbaf1e98f --- /dev/null +++ "b/examples/Streamlit\347\273\204\344\273\266\345\272\223\344\275\277\347\224\250\346\241\210\344\276\213/corr_graph.py" @@ -0,0 +1,35 @@ +import sys + +sys.path.insert(0, ".") + +import czsc +import pandas as pd +import numpy as np + + +def main(): + # 设置随机数种子以获得可重复的结果 + np.random.seed(42) + + # 生成样例数据 + data = { + "V01": np.random.normal(0, 1, 100), + "V02": np.random.normal(0, 1, 100), + "V03": np.random.normal(0, 1, 100), + "V04": np.random.normal(0, 1, 100), + "V05": np.random.normal(0, 1, 100), + "V06": np.random.normal(0, 1, 100), + "V07": np.random.normal(0, 1, 100), + "V08": np.random.normal(0, 1, 100), + "V09": np.random.normal(0, 1, 100), + "V10": np.random.normal(0, 1, 100), + } + + # 创建 DataFrame + df = pd.DataFrame(data) + czsc.show_correlation(df) + czsc.show_corr_graph(df, threshold=0.1) + + +if __name__ == "__main__": + main() diff --git "a/examples/develop/ST\346\227\245\346\224\266\347\233\212\347\232\204\346\227\266\351\227\264\346\225\210\345\272\224.py" "b/examples/develop/ST\346\227\245\346\224\266\347\233\212\347\232\204\346\227\266\351\227\264\346\225\210\345\272\224.py" new file mode 100644 index 000000000..5b337a341 --- /dev/null +++ "b/examples/develop/ST\346\227\245\346\224\266\347\233\212\347\232\204\346\227\266\351\227\264\346\225\210\345\272\224.py" @@ -0,0 +1,99 @@ +import pandas as pd +import streamlit as st + +st.set_page_config(layout="wide") + + +def show_df_describe(df: pd.DataFrame): + """展示 DataFrame 的描述性统计信息 + + :param df: pd.DataFrame,必须是 df.describe() 的结果 + """ + quantiles = [x for x in df.columns if "%" in x] + df = df.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["mean"]) + df = df.background_gradient(cmap="RdYlGn_r", axis=None, subset=["std"]) + df = df.background_gradient(cmap="RdYlGn_r", axis=None, subset=["max", "min"] + quantiles) + + format_dict = { + "count": "{:.0f}", + "mean": "{:.4f}", + "std": "{:.4f}", + "min": "{:.4f}", + "max": "{:.4f}", + } + for q in quantiles: + format_dict[q] = "{:.4f}" + + df = df.format(format_dict) + st.dataframe(df, use_container_width=True) + + +def show_date_effect(df: pd.DataFrame, ret_col: str, **kwargs): + """分析日收益数据的日历效应 + + :param df: pd.DataFrame, 包含日期的日收益数据 + :param ret_col: str, 收益列名称 + :param kwargs: dict, 其他参数 + + - show_weekday: bool, 是否展示星期效应,默认为 True + - show_month: bool, 是否展示月份效应,默认为 True + - percentiles: list, 分位数,默认为 [0.1, 0.25, 0.5, 0.75, 0.9] + + """ + show_weekday = kwargs.get("show_weekday", True) + show_month = kwargs.get("show_month", True) + percentiles = kwargs.get("percentiles", [0.1, 0.25, 0.5, 0.75, 0.9]) + + assert ret_col in df.columns, f"ret_col 必须是 {df.columns} 中的一个" + assert show_month or show_weekday, "show_month 和 show_weekday 不能同时为 False" + + if not df.index.dtype == "datetime64[ns]": + df["dt"] = pd.to_datetime(df["dt"]) + df.set_index("dt", inplace=True) + + assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换" + df = df.copy() + + st.write( + f"交易区间 {df.index.min().strftime('%Y-%m-%d')} ~ {df.index.max().strftime('%Y-%m-%d')};总天数:{len(df)}" + ) + + if show_weekday: + st.write("##### 星期效应") + df["weekday"] = df.index.weekday + sorted_weekday = sorted(df["weekday"].unique().tolist()) + weekday_map = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"} + df["weekday"] = df["weekday"].map(weekday_map) + sorted_rows = [weekday_map[i] for i in sorted_weekday] + + weekday_effect = df.groupby("weekday")[ret_col].describe(percentiles=percentiles) + weekday_effect = weekday_effect.loc[sorted_rows] + show_df_describe(weekday_effect) + + if show_month: + st.write("##### 月份效应") + df["month"] = df.index.month + month_map = {i: f"{i}月" for i in range(1, 13)} + sorted_month = sorted(df["month"].unique().tolist()) + sorted_rows = [month_map[i] for i in sorted_month] + + df["month"] = df["month"].map(month_map) + month_effect = df.groupby("month")[ret_col].describe(percentiles=percentiles) + month_effect = month_effect.loc[sorted_rows] + show_df_describe(month_effect) + + st.caption("数据说明:count 为样本数量,mean 为均值,std 为标准差,min 为最小值,n% 为分位数,max 为最大值") + + +def main(): + df = pd.read_feather(r"A:\量化研究\BTC策略1H持仓权重和日收益241201\BTC_2H_001-daily_return.feather") + df["date"] = pd.to_datetime(df["date"]) + df = df[df["date"] >= pd.to_datetime("2021-01-01")].copy() + df.set_index("date", inplace=True) + df["total"] = df.mean(axis=1) * 10000 + + show_date_effect(df, ret_col="total") + + +if __name__ == "__main__": + main() diff --git a/examples/develop/weight_backtest.py b/examples/develop/weight_backtest.py index e508dbdf2..426aae280 100644 --- a/examples/develop/weight_backtest.py +++ b/examples/develop/weight_backtest.py @@ -3,13 +3,1907 @@ sys.path.insert(0, r"A:\ZB\git_repo\waditu\czsc") import czsc +import rs_czsc import pandas as pd -czsc.welcome() +def test_daily_performance(): + rets = [ + 0.003, + -0.0022, + -0.0004, + -0.0048, + -0.0, + 0.005, + 0.0015, + -0.0017, + 0.0017, + 0.0031, + -0.0002, + 0.0003, + -0.0064, + -0.0006, + -0.0031, + 0.0027, + -0.0, + -0.0013, + -0.004, + 0.0013, + -0.0036, + -0.0008, + 0.0, + 0.002, + 0.0001, + -0.0007, + 0.0006, + -0.0006, + 0.0, + 0.0005, + -0.0017, + -0.0001, + 0.0008, + 0.0005, + 0.0, + 0.0019, + -0.003, + -0.0015, + 0.0016, + 0.0009, + -0.0002, + 0.0009, + 0.0004, + 0.0033, + -0.0032, + 0.0057, + -0.0005, + -0.0024, + 0.0002, + 0.0022, + -0.0011, + -0.0039, + -0.0002, + 0.0014, + 0.001, + -0.0012, + 0.0008, + -0.001, + 0.0, + 0.001, + -0.0035, + -0.0014, + -0.0018, + -0.0016, + -0.0002, + -0.0032, + -0.0021, + 0.0015, + 0.0008, + 0.0023, + -0.0034, + 0.0008, + -0.0001, + -0.0034, + 0.0043, + 0.0036, + 0.005, + -0.0005, + 0.0025, + 0.0001, + -0.0005, + 0.0038, + -0.0018, + -0.003, + -0.0003, + 0.0, + -0.0013, + 0.0007, + 0.0015, + -0.001, + 0.0026, + -0.0009, + 0.0012, + 0.005, + -0.0045, + 0.0, + -0.0006, + 0.0011, + -0.0022, + -0.0013, + -0.003, + 0.0, + 0.0027, + -0.0019, + -0.0015, + -0.0001, + 0.0039, + -0.0001, + -0.0028, + 0.0007, + -0.004, + -0.0024, + 0.0007, + 0.005, + 0.0023, + 0.0001, + -0.0, + -0.0011, + -0.0006, + -0.0, + -0.0003, + 0.0012, + -0.0, + 0.0011, + -0.0022, + 0.0002, + 0.0007, + 0.0018, + 0.0001, + 0.0029, + -0.0004, + 0.0062, + -0.0017, + -0.0012, + -0.0, + -0.0004, + 0.003, + 0.0012, + 0.0015, + 0.0003, + 0.0002, + 0.0029, + 0.0008, + -0.0011, + -0.0003, + 0.0054, + -0.0006, + 0.0019, + 0.0012, + -0.0008, + -0.001, + -0.0034, + 0.0002, + -0.0017, + 0.0017, + 0.0003, + -0.0024, + -0.0022, + -0.0, + 0.0006, + -0.0006, + -0.0005, + -0.0013, + 0.003, + -0.0, + 0.0039, + 0.0001, + 0.0011, + -0.0008, + 0.0011, + 0.0001, + 0.0001, + 0.0028, + 0.0038, + 0.0072, + -0.0021, + -0.0001, + -0.0003, + -0.0005, + 0.006, + 0.0009, + 0.0039, + -0.0006, + 0.0071, + -0.0032, + 0.0023, + 0.0003, + -0.0043, + 0.0, + 0.0025, + -0.0019, + 0.0, + -0.0021, + -0.0003, + 0.0005, + 0.0034, + -0.0014, + -0.0015, + 0.0006, + -0.0027, + 0.0003, + 0.0003, + 0.0011, + 0.003, + -0.0003, + 0.0047, + 0.0003, + 0.0035, + 0.0039, + 0.0011, + 0.0089, + 0.001, + 0.0001, + -0.0004, + 0.0003, + 0.0038, + -0.0, + -0.0018, + 0.0004, + -0.0002, + 0.0011, + -0.0025, + 0.0015, + -0.0001, + -0.0012, + -0.0014, + 0.0044, + 0.0007, + 0.0009, + 0.0, + 0.0018, + 0.0003, + -0.0001, + 0.0002, + 0.0006, + -0.0001, + -0.0045, + 0.0005, + -0.0027, + 0.0004, + -0.0004, + 0.0, + 0.0049, + -0.0017, + 0.0054, + -0.005, + 0.0007, + -0.0003, + -0.0026, + -0.0044, + -0.0016, + 0.0004, + 0.0001, + 0.0002, + 0.003, + 0.0026, + 0.0027, + -0.0029, + -0.0005, + 0.0, + -0.0021, + 0.0004, + 0.0057, + 0.0026, + 0.0113, + -0.0003, + 0.0068, + -0.0031, + 0.0068, + 0.0034, + 0.0045, + 0.0, + -0.0011, + -0.004, + 0.0003, + -0.0044, + -0.0017, + -0.0, + -0.0012, + -0.0026, + -0.0016, + -0.0048, + -0.0002, + 0.0001, + 0.0026, + 0.0005, + 0.0025, + 0.0006, + 0.0053, + -0.0044, + -0.0008, + 0.0003, + -0.0006, + -0.0, + -0.0005, + -0.0002, + -0.0005, + 0.0004, + 0.0003, + 0.0002, + 0.0003, + 0.0016, + -0.0003, + 0.0036, + 0.0003, + -0.0001, + -0.0035, + -0.0034, + -0.0009, + 0.0008, + -0.0008, + 0.0, + -0.0002, + 0.0011, + -0.002, + -0.0007, + 0.003, + 0.0004, + 0.0022, + 0.0002, + 0.0019, + -0.0013, + -0.0021, + -0.0002, + -0.0007, + -0.0004, + -0.0001, + -0.0001, + 0.0049, + -0.0, + -0.0007, + -0.0007, + 0.0001, + -0.0006, + -0.0005, + 0.0001, + 0.0031, + 0.0004, + 0.0018, + 0.0014, + 0.0034, + -0.0003, + 0.0025, + 0.0016, + -0.0004, + 0.0004, + 0.0014, + -0.0, + -0.0, + -0.0011, + -0.0011, + -0.0016, + 0.0013, + -0.0001, + 0.002, + 0.0061, + 0.0024, + -0.0004, + -0.0038, + -0.0, + -0.0002, + -0.0004, + 0.0002, + -0.0015, + -0.0001, + 0.0028, + -0.0017, + 0.0003, + -0.0001, + 0.0003, + 0.005, + -0.0005, + -0.0005, + -0.0016, + -0.0001, + 0.0047, + -0.0006, + 0.0005, + 0.004, + 0.0005, + 0.0021, + -0.002, + 0.0009, + 0.0002, + 0.0026, + -0.0018, + 0.0002, + -0.001, + 0.0037, + -0.0002, + 0.0082, + 0.0066, + 0.0019, + -0.0004, + -0.0031, + -0.0006, + -0.0003, + 0.0065, + -0.0063, + -0.0026, + 0.0023, + 0.0008, + -0.002, + -0.0018, + 0.0012, + 0.0006, + -0.0012, + 0.0002, + -0.003, + -0.0024, + -0.0009, + 0.0015, + 0.0019, + 0.0001, + -0.0028, + -0.0013, + 0.0014, + 0.0024, + -0.0001, + -0.0017, + -0.0062, + -0.0008, + -0.0059, + -0.0003, + 0.002, + 0.002, + 0.0001, + 0.0004, + -0.001, + -0.0006, + -0.0033, + -0.0012, + -0.001, + -0.0027, + -0.0019, + -0.0002, + -0.0013, + 0.0014, + 0.0011, + -0.0002, + -0.003, + -0.0002, + -0.0026, + -0.0023, + -0.0004, + 0.0023, + 0.0021, + -0.0, + 0.0019, + 0.0043, + -0.0001, + 0.0056, + 0.0019, + -0.0006, + 0.0053, + 0.0009, + -0.0, + 0.0057, + 0.0059, + 0.0003, + 0.0096, + -0.0089, + 0.0001, + -0.0013, + -0.0012, + -0.0003, + 0.0026, + -0.0018, + 0.0012, + 0.0028, + 0.0059, + 0.0005, + -0.0044, + -0.0006, + 0.0007, + -0.0011, + -0.0041, + -0.0003, + 0.0024, + -0.0025, + 0.0009, + 0.0035, + 0.0002, + 0.0001, + 0.0025, + -0.0008, + 0.0001, + -0.0015, + -0.0042, + -0.0009, + 0.0, + 0.0041, + 0.0012, + -0.0034, + -0.0019, + 0.0004, + -0.0019, + -0.0017, + 0.0013, + 0.0006, + 0.0047, + -0.0031, + -0.0003, + 0.0044, + -0.0066, + 0.0014, + 0.0072, + -0.0045, + 0.0013, + 0.0053, + -0.0008, + -0.0, + 0.0014, + -0.0013, + -0.0022, + 0.0035, + -0.0002, + -0.0004, + 0.0008, + -0.0035, + -0.0002, + -0.0034, + 0.0002, + -0.0032, + -0.0027, + 0.0011, + 0.0015, + -0.0, + 0.0002, + -0.002, + 0.0003, + 0.0005, + 0.0007, + 0.0055, + -0.0005, + 0.0023, + 0.0035, + 0.0011, + 0.0005, + -0.0024, + -0.0002, + -0.0027, + 0.0042, + -0.0043, + -0.001, + 0.008, + -0.0, + -0.0003, + 0.0047, + -0.0067, + 0.001, + -0.0033, + -0.0046, + -0.0013, + 0.0039, + -0.0023, + -0.004, + -0.0059, + -0.0014, + -0.0007, + -0.0026, + -0.0003, + -0.0022, + -0.0006, + -0.0, + -0.0002, + 0.0026, + 0.0047, + 0.0017, + 0.0029, + 0.0, + 0.0034, + 0.0071, + -0.0036, + 0.0042, + -0.0001, + 0.0002, + 0.0026, + 0.0051, + -0.0004, + 0.0033, + -0.0016, + 0.0021, + -0.0002, + -0.0001, + -0.0, + -0.0006, + 0.0003, + -0.0004, + 0.0014, + 0.0052, + -0.0002, + -0.0023, + -0.0029, + -0.0006, + 0.0015, + 0.0012, + 0.0005, + -0.0012, + -0.0044, + -0.001, + -0.0002, + 0.0003, + -0.0039, + -0.0037, + -0.0003, + 0.0012, + 0.0017, + 0.0016, + -0.0018, + 0.0, + 0.0004, + -0.003, + 0.0025, + -0.0002, + -0.0006, + 0.0004, + -0.0014, + -0.0005, + -0.0007, + 0.0012, + -0.0012, + 0.0004, + -0.0014, + 0.0006, + 0.0016, + -0.0018, + -0.0012, + -0.0014, + 0.0009, + 0.0002, + -0.0039, + 0.0, + 0.0019, + 0.0031, + -0.0006, + 0.0009, + -0.0002, + -0.0001, + -0.0025, + 0.0013, + 0.0028, + 0.003, + -0.0017, + 0.0005, + 0.0003, + 0.0017, + -0.0001, + -0.0003, + 0.0019, + -0.0024, + -0.0013, + -0.0012, + -0.0035, + 0.0004, + 0.0034, + -0.0016, + -0.0025, + -0.001, + -0.0026, + 0.0012, + 0.0017, + 0.0016, + -0.0005, + 0.0033, + -0.0015, + -0.0005, + -0.0018, + 0.0018, + -0.0033, + -0.0011, + 0.002, + 0.0029, + -0.0002, + -0.0003, + 0.0021, + 0.0025, + 0.004, + 0.0029, + 0.0015, + 0.0014, + 0.0029, + 0.0046, + 0.002, + 0.004, + 0.0032, + -0.0009, + -0.0066, + 0.0003, + -0.0033, + 0.0, + 0.0078, + 0.0026, + 0.0016, + -0.0034, + 0.0074, + -0.0045, + -0.0023, + 0.0006, + -0.0037, + -0.005, + 0.0003, + -0.0008, + 0.0022, + -0.0009, + -0.0, + 0.0044, + -0.002, + 0.0005, + -0.0011, + -0.0007, + 0.0025, + -0.0022, + -0.0027, + 0.0004, + -0.003, + -0.0005, + -0.0041, + -0.0019, + -0.0002, + 0.0003, + 0.0029, + 0.0047, + -0.0012, + -0.0013, + -0.0019, + -0.0002, + 0.0007, + 0.0031, + 0.0053, + 0.0055, + 0.0037, + -0.0018, + -0.0034, + 0.002, + -0.0002, + -0.0006, + 0.0017, + -0.0005, + 0.0016, + -0.0032, + 0.0006, + 0.0079, + -0.0029, + 0.0002, + 0.0037, + -0.0023, + 0.0077, + -0.0022, + -0.0011, + -0.0001, + -0.0008, + 0.0, + -0.0055, + -0.0022, + -0.0004, + -0.0001, + -0.0025, + -0.0039, + -0.0002, + -0.0035, + 0.0009, + 0.0019, + 0.0024, + 0.0062, + -0.0009, + 0.0034, + -0.0048, + -0.0003, + -0.0033, + 0.003, + -0.0015, + 0.001, + 0.0028, + 0.0032, + 0.0054, + 0.0027, + -0.0027, + -0.0016, + 0.009, + -0.0058, + -0.0026, + 0.0014, + -0.0006, + 0.0005, + 0.0028, + 0.0033, + 0.0015, + 0.0009, + 0.0009, + -0.0002, + 0.0102, + -0.0117, + -0.0024, + 0.0014, + 0.0033, + -0.0002, + 0.0044, + -0.0026, + 0.0062, + 0.0029, + -0.0018, + 0.0004, + 0.0007, + -0.0028, + -0.0006, + 0.0023, + 0.0008, + 0.0007, + -0.0043, + -0.0031, + 0.0005, + 0.0018, + -0.0032, + -0.0007, + 0.0001, + 0.0027, + 0.0013, + 0.0003, + 0.0019, + -0.0004, + 0.0012, + -0.0015, + -0.0012, + -0.0032, + -0.0019, + -0.0007, + -0.0014, + 0.0042, + -0.0049, + -0.0009, + 0.0015, + 0.0004, + 0.0002, + -0.0022, + -0.0013, + -0.0005, + -0.0012, + -0.0002, + 0.0018, + 0.0034, + -0.0012, + -0.0003, + 0.0045, + 0.0003, + 0.0008, + 0.0012, + -0.001, + -0.0039, + -0.0023, + 0.0003, + 0.0013, + -0.0013, + -0.0046, + -0.0024, + 0.0005, + -0.0001, + 0.0026, + 0.0007, + 0.0018, + -0.0008, + -0.0014, + 0.0003, + 0.0008, + -0.0018, + 0.0001, + -0.0029, + 0.0024, + 0.0017, + -0.0015, + 0.0053, + -0.0153, + 0.0045, + 0.0016, + 0.0001, + 0.0026, + 0.0008, + 0.0007, + -0.0039, + -0.0021, + 0.0001, + -0.0001, + -0.0, + 0.0024, + 0.0304, + 0.0084, + -0.0086, + -0.0081, + -0.0016, + 0.0001, + -0.0012, + 0.0016, + 0.0012, + -0.0009, + 0.0019, + -0.0008, + 0.0006, + 0.0036, + 0.0017, + 0.0019, + -0.0028, + -0.0016, + 0.0014, + 0.0113, + 0.0039, + -0.0146, + 0.0032, + 0.0002, + 0.0018, + 0.0185, + 0.0112, + -0.0109, + 0.0093, + 0.019, + 0.0052, + 0.015, + 0.0181, + 0.0241, + -0.0058, + 0.0214, + -0.005, + 0.005, + 0.0064, + -0.0057, + 0.0023, + 0.0019, + -0.0076, + -0.0008, + -0.0018, + 0.0038, + -0.0079, + 0.0083, + -0.0019, + 0.0064, + -0.008, + 0.0011, + -0.0063, + 0.0056, + 0.0068, + 0.0037, + 0.0128, + 0.0071, + -0.0173, + 0.0127, + -0.0008, + -0.0027, + 0.0063, + 0.0098, + -0.0081, + -0.0013, + -0.0023, + -0.0003, + -0.0001, + -0.0035, + -0.0003, + 0.0004, + 0.0108, + 0.0054, + 0.0084, + -0.0076, + 0.0052, + -0.0014, + -0.0077, + -0.0003, + -0.0054, + -0.0012, + -0.0054, + 0.0004, + 0.0019, + 0.0018, + 0.0013, + 0.0041, + 0.0027, + -0.0038, + 0.0026, + 0.0013, + -0.0034, + -0.0029, + 0.0048, + -0.0, + -0.0093, + -0.0011, + -0.0021, + -0.0035, + 0.0008, + 0.0043, + 0.0024, + 0.0008, + -0.0042, + -0.0006, + 0.0044, + -0.0021, + 0.0047, + 0.001, + -0.0059, + 0.0009, + 0.0, + -0.0014, + -0.0036, + 0.0028, + -0.0011, + -0.0013, + 0.0002, + 0.004, + -0.0053, + -0.0001, + 0.001, + 0.0043, + 0.0004, + -0.0013, + 0.0052, + 0.0081, + 0.0089, + -0.0024, + 0.0001, + 0.0026, + 0.0008, + -0.0016, + 0.001, + 0.001, + 0.0001, + 0.011, + 0.0061, + 0.002, + 0.0053, + 0.0072, + 0.0, + -0.0082, + -0.0036, + 0.0027, + -0.0037, + 0.0021, + -0.0012, + -0.0023, + -0.0022, + -0.0036, + 0.0046, + 0.0041, + 0.0004, + -0.0, + 0.0021, + -0.001, + 0.0009, + 0.0004, + 0.0002, + 0.0058, + 0.0046, + 0.0018, + -0.0009, + 0.001, + 0.0011, + -0.003, + 0.0124, + -0.0061, + 0.0025, + -0.0051, + 0.0002, + -0.0018, + -0.0021, + 0.0045, + 0.0026, + 0.0016, + -0.0007, + -0.001, + 0.0024, + 0.0059, + 0.0006, + -0.0023, + -0.0003, + -0.0061, + -0.0033, + -0.0069, + 0.0128, + -0.0, + 0.0015, + 0.0044, + -0.0, + -0.0065, + 0.0027, + -0.0, + 0.0004, + 0.0033, + -0.0052, + -0.0001, + 0.0047, + 0.0015, + 0.0037, + 0.0022, + 0.0057, + 0.0125, + 0.0033, + 0.0019, + -0.0003, + 0.0042, + 0.0013, + -0.0002, + 0.0097, + -0.0008, + -0.003, + -0.0063, + 0.0041, + -0.0018, + 0.0014, + 0.0001, + -0.0053, + -0.0067, + -0.0012, + 0.0022, + 0.0035, + 0.0004, + -0.0049, + 0.0078, + -0.0042, + -0.0024, + -0.0023, + 0.0009, + 0.0006, + 0.0045, + 0.0027, + -0.0018, + 0.0138, + -0.0, + -0.0055, + -0.0047, + 0.0087, + 0.003, + -0.0026, + 0.0004, + -0.0088, + -0.0052, + 0.0023, + 0.0148, + 0.0043, + -0.0018, + -0.0004, + -0.0082, + 0.0008, + -0.0043, + 0.0102, + 0.0012, + -0.0063, + -0.0081, + -0.0038, + 0.0027, + 0.0046, + 0.0051, + 0.0034, + 0.0063, + 0.0072, + 0.0058, + 0.0042, + 0.0011, + 0.0024, + -0.0043, + -0.0089, + 0.0007, + -0.0083, + -0.0008, + -0.0011, + -0.0046, + -0.007, + -0.0013, + -0.0026, + 0.0034, + -0.0002, + 0.0005, + 0.0129, + 0.0039, + 0.0043, + 0.0036, + -0.0056, + -0.0032, + 0.0015, + 0.0005, + -0.0034, + -0.0044, + 0.0029, + 0.0048, + 0.0114, + -0.0002, + 0.0163, + -0.0047, + 0.0059, + -0.0124, + 0.0119, + -0.0013, + 0.0005, + -0.005, + -0.0026, + 0.0076, + 0.0115, + 0.0022, + -0.0114, + 0.0008, + 0.0007, + -0.0088, + 0.0012, + -0.0011, + -0.0016, + -0.003, + 0.012, + 0.0006, + 0.0137, + -0.0013, + -0.0043, + 0.0039, + -0.0084, + -0.0054, + -0.0003, + 0.0004, + 0.0016, + -0.0026, + -0.0019, + -0.0011, + -0.0031, + 0.0011, + -0.0047, + -0.0014, + -0.0046, + 0.0002, + -0.0045, + -0.0047, + 0.0022, + 0.0029, + 0.003, + -0.0005, + 0.0064, + 0.0002, + 0.0016, + 0.0002, + -0.0008, + 0.0001, + -0.0044, + -0.0024, + 0.003, + -0.0028, + 0.0007, + 0.0157, + 0.0053, + 0.0012, + -0.0108, + 0.0062, + 0.0168, + -0.015, + -0.0097, + -0.0005, + 0.0011, + -0.001, + 0.0054, + -0.0017, + 0.006, + 0.0, + -0.0085, + 0.0009, + -0.0017, + -0.0021, + 0.0026, + -0.0013, + 0.0038, + 0.0057, + 0.006, + -0.0031, + 0.0014, + 0.0012, + 0.0015, + -0.0106, + 0.0065, + -0.0023, + -0.0035, + -0.0031, + 0.0027, + 0.008, + -0.0069, + -0.0006, + -0.0077, + -0.0066, + 0.0061, + 0.0057, + -0.0046, + 0.0003, + -0.0108, + 0.0053, + -0.002, + -0.0018, + 0.0045, + -0.0, + 0.0031, + -0.0198, + 0.0041, + -0.0052, + -0.0021, + -0.0001, + -0.0027, + 0.0049, + -0.0074, + 0.0076, + 0.0016, + 0.0015, + -0.0009, + 0.0116, + -0.003, + 0.0002, + 0.0029, + -0.0, + 0.002, + 0.0003, + 0.0023, + 0.004, + -0.0121, + -0.0002, + 0.0022, + -0.0054, + 0.0014, + -0.0004, + 0.0035, + 0.0012, + -0.0058, + 0.0009, + 0.0012, + 0.0031, + 0.0111, + -0.0001, + -0.0088, + 0.0002, + 0.0052, + 0.0028, + 0.0009, + -0.0, + 0.0026, + -0.001, + 0.0056, + -0.0036, + -0.0045, + 0.0013, + 0.0023, + -0.0007, + 0.0018, + 0.0062, + -0.0028, + -0.0012, + 0.0116, + 0.0041, + 0.0183, + 0.0081, + -0.0134, + 0.0017, + 0.0005, + 0.005, + 0.006, + -0.0019, + 0.0089, + 0.0123, + 0.0069, + 0.003, + 0.0018, + -0.0065, + 0.0048, + 0.0039, + 0.0174, + 0.0047, + 0.0001, + 0.0182, + 0.0074, + -0.0315, + -0.0073, + 0.0057, + 0.0002, + 0.0096, + -0.0166, + -0.0112, + 0.0051, + 0.0164, + -0.0, + 0.0169, + 0.0039, + 0.0299, + -0.0271, + 0.0015, + -0.0003, + -0.0006, + 0.0006, + -0.0144, + -0.0118, + -0.0074, + 0.0002, + 0.0013, + 0.0085, + -0.0066, + -0.0035, + 0.001, + -0.0001, + 0.0081, + -0.0027, + -0.003, + 0.0088, + -0.0124, + 0.0014, + -0.0043, + 0.0038, + 0.0068, + -0.0095, + 0.014, + -0.0032, + -0.0056, + 0.0039, + -0.0067, + 0.0005, + -0.0051, + -0.0009, + -0.0036, + 0.0059, + 0.0067, + -0.005, + -0.0018, + -0.0009, + -0.0076, + -0.0021, + 0.0043, + -0.0023, + -0.0117, + 0.0007, + 0.0012, + -0.009, + -0.0018, + -0.0059, + -0.003, + 0.0003, + -0.0025, + 0.0008, + 0.0006, + 0.0015, + 0.0049, + -0.0029, + -0.0003, + 0.0003, + 0.0021, + -0.0006, + -0.0039, + 0.0028, + 0.0069, + -0.0066, + 0.006, + 0.0014, + -0.0111, + -0.0015, + -0.0031, + 0.0018, + -0.0037, + -0.0016, + -0.0073, + 0.0007, + 0.005, + 0.0094, + -0.0021, + 0.0059, + -0.0172, + -0.0056, + 0.0068, + -0.0117, + 0.0025, + 0.0004, + -0.0094, + 0.0018, + 0.0012, + -0.0006, + -0.0002, + -0.0003, + -0.0001, + 0.0003, + 0.0038, + -0.0051, + -0.0048, + -0.0016, + 0.0017, + 0.0103, + 0.0079, + 0.0263, + 0.0043, + -0.0135, + 0.0203, + -0.0287, + -0.0034, + 0.0048, + 0.0012, + 0.0117, + 0.0017, + -0.0054, + -0.0111, + -0.0004, + 0.0007, + -0.0024, + -0.0071, + 0.0058, + 0.0015, + 0.0021, + -0.0006, + -0.0005, + 0.0081, + -0.0009, + -0.0059, + 0.0064, + -0.0046, + 0.0069, + 0.0023, + -0.0004, + -0.0045, + -0.0, + 0.002, + 0.0049, + 0.005, + 0.0021, + 0.0058, + -0.0083, + -0.0033, + -0.0013, + 0.0039, + 0.0024, + 0.012, + -0.0053, + 0.002, + 0.0013, + 0.0033, + 0.0006, + 0.0087, + -0.0011, + 0.0022, + 0.0032, + -0.0144, + 0.0092, + 0.0, + -0.0002, + -0.0036, + -0.0044, + -0.0046, + -0.008, + -0.0024, + 0.0003, + -0.0065, + -0.0004, + 0.0003, + 0.0041, + 0.0066, + 0.0017, + 0.0048, + -0.0016, + -0.0031, + 0.001, + 0.0023, + 0.0125, + 0.0086, + -0.0113, + -0.0067, + 0.0002, + 0.0014, + 0.0084, + -0.0024, + 0.0137, + 0.0173, + 0.0017, + 0.0029, + -0.001, + 0.0035, + -0.0015, + 0.001, + -0.0002, + -0.0094, + 0.0063, + -0.0006, + -0.0062, + 0.0144, + 0.0008, + -0.0116, + 0.0088, + 0.001, + -0.0104, + 0.0126, + 0.0004, + 0.0065, + 0.0172, + -0.0026, + 0.0094, + -0.0138, + -0.0008, + 0.0013, + -0.0094, + -0.0033, + 0.0008, + -0.0087, + -0.0007, + 0.0008, + -0.0136, + 0.0047, + 0.011, + 0.0078, + -0.0023, + -0.0123, + -0.0015, + -0.0033, + 0.0054, + -0.0065, + 0.0003, + -0.0089, + -0.0049, + -0.0048, + -0.0065, + 0.0014, + -0.0, + -0.0116, + 0.0017, + 0.0044, + 0.0077, + 0.0041, + -0.0, + -0.0095, + 0.0024, + 0.0044, + 0.0005, + -0.004, + 0.0003, + -0.0033, + -0.0007, + 0.001, + 0.008, + -0.0091, + -0.0011, + 0.0056, + -0.003, + -0.0039, + 0.0037, + 0.0173, + -0.0055, + -0.0038, + -0.0075, + -0.0029, + -0.0004, + 0.0072, + -0.0063, + -0.0028, + 0.01, + -0.0111, + 0.0004, + 0.0079, + 0.0006, + -0.0055, + 0.0012, + 0.0169, + 0.0006, + -0.0083, + 0.0023, + -0.0054, + 0.0049, + -0.0009, + 0.0057, + 0.0026, + 0.0026, + -0.0033, + -0.0027, + 0.0013, + -0.0016, + 0.0024, + 0.0002, + 0.0112, + 0.0022, + -0.0, + 0.0023, + -0.0062, + -0.0007, + -0.0004, + 0.0019, + 0.0004, + 0.0033, + 0.0045, + 0.0019, + 0.0048, + 0.0048, + -0.0001, + -0.0112, + 0.0099, + -0.0031, + 0.0055, + 0.0011, + 0.0005, + 0.0036, + -0.0048, + 0.0024, + 0.0019, + -0.0, + 0.0028, + 0.0003, + -0.0081, + -0.0074, + -0.0011, + 0.0017, + 0.0015, + -0.0079, + 0.0047, + -0.0014, + 0.0023, + 0.0116, + 0.0002, + -0.0019, + 0.0022, + 0.0049, + -0.0011, + -0.0074, + -0.002, + 0.0062, + -0.0043, + -0.0033, + 0.0014, + 0.0028, + 0.0011, + -0.0111, + -0.0022, + -0.0047, + -0.0022, + -0.0013, + -0.0021, + -0.0023, + 0.0017, + -0.0042, + -0.0006, + -0.0043, + 0.0002, + 0.0006, + 0.0069, + 0.0018, + 0.0002, + 0.0006, + 0.0102, + -0.0016, + 0.0026, + 0.0047, + -0.0158, + -0.0052, + 0.0067, + 0.0034, + 0.0033, + 0.0024, + 0.0018, + -0.0047, + 0.0022, + 0.0013, + 0.0035, + 0.0002, + 0.0002, + -0.0077, + -0.0036, + 0.001, + -0.0065, + -0.0001, + 0.0015, + 0.0011, + 0.0077, + -0.0008, + -0.0033, + 0.0006, + -0.0046, + -0.0032, + -0.0046, + -0.0026, + -0.0065, + 0.001, + 0.0008, + 0.0004, + 0.0042, + -0.0046, + 0.0026, + -0.0015, + 0.0062, + 0.0019, + 0.0069, + 0.0014, + -0.0015, + -0.0006, + 0.0015, + -0.004, + -0.0017, + 0.0013, + -0.0037, + -0.0002, + 0.0016, + 0.0026, + -0.0029, + 0.0011, + 0.0039, + 0.0063, + 0.0017, + 0.0067, + 0.0071, + 0.0003, + -0.0005, + -0.0011, + -0.0007, + 0.0034, + -0.0007, + -0.0067, + -0.0002, + -0.0071, + -0.0032, + 0.0005, + 0.0001, + 0.0089, + 0.0003, + -0.0019, + 0.0049, + -0.001, + 0.0087, + 0.0151, + 0.0054, + -0.0138, + -0.0003, + -0.003, + 0.0019, + 0.0023, + 0.0068, + 0.0044, + -0.0007, + -0.0003, + 0.0002, + -0.002, + 0.0083, + 0.0044, + 0.0001, + 0.0089, + 0.0008, + 0.013, + 0.0015, + -0.0111, + -0.0002, + 0.0014, + -0.0044, + -0.0031, + -0.0021, + -0.0052, + 0.0001, + -0.006, + -0.0022, + 0.0009, + -0.0013, + 0.0006, + -0.0002, + 0.0058, + -0.0083, + -0.0012, + -0.0002, + 0.0012, + -0.0004, + -0.0049, + -0.0024, + 0.0014, + -0.0007, + -0.0052, + -0.0019, + 0.002, + -0.0004, + -0.0004, + -0.004, + -0.001, + -0.0028, + -0.0042, + 0.0021, + 0.0006, + 0.0063, + 0.0015, + 0.0092, + 0.0118, + -0.0093, + -0.001, + -0.0004, + 0.0124, + 0.0069, + -0.0033, + -0.0004, + 0.0001, + 0.0008, + 0.0118, + 0.0062, + -0.0024, + -0.0004, + 0.0003, + -0.0019, + ] -def test_ensemble_weight(): + s1 = czsc.daily_performance(rets) + s2 = rs_czsc.daily_performance(rets) + assert s1 == s2 + + +def test_weight_backtest(): """从持仓权重样例数据中回测""" dfw = pd.read_feather(r"C:\Users\zengb\Downloads\weight_example.feather") - wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0002, res_path=r"C:\Users\zengb\Desktop\weight_example") - # res = wb.backtest() + + pw = czsc.WeightBacktest(dfw.copy(), digits=2, fee_rate=0.0002, n_jobs=1) + print(sorted(pw.stats.items())) + + rw = rs_czsc.WeightBacktest(dfw.copy(), digits=2, fee_rate=0.0002, n_jobs=1) + print(sorted(rw.stats.items())) diff --git a/examples/test_offline/test_weight_backtest.py b/examples/test_offline/test_weight_backtest.py index a7be4e278..14561d35b 100644 --- a/examples/test_offline/test_weight_backtest.py +++ b/examples/test_offline/test_weight_backtest.py @@ -5,18 +5,71 @@ import czsc import pandas as pd -assert czsc.WeightBacktest.version == "V240627" +# assert czsc.WeightBacktest.version == "V240627" def run_by_weights(): """从持仓权重样例数据中回测""" dfw = pd.read_feather(r"C:\Users\zengb\Downloads\weight_example.feather") - wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0002, n_jobs=1) + wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0, n_jobs=1, weight_type="ts") # wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0002) dailys = wb.dailys print(wb.stats) print(wb.alpha_stats) print(wb.bench_stats) + print(wb.long_stats) + print(wb.short_stats) + # 计算等权组合的超额 + df1 = dailys.groupby("date").agg({"return": "mean", "n1b": "mean"}) + df1["alpha"] = df1["return"] - df1["n1b"] + + # ------------------------------------------------------------------------------------ + # 查看绩效评价 + # ------------------------------------------------------------------------------------ + print(wb.results["绩效评价"]) + # {'开始日期': '20170103', + # '结束日期': '20230731', + # '年化': 0.093, # 品种等权之后的年化收益率 + # '夏普': 1.19, # 品种等权之后的夏普比率 + # '最大回撤': 0.1397, # 品种等权之后的最大回撤 + # '卡玛': 0.67, + # '日胜率': 0.5228, # 品种等权之后的日胜率 + # '年化波动率': 0.0782, + # '非零覆盖': 1.0, + # '盈亏平衡点': 0.9782, # 品种等权之后的盈亏平衡点,这个值越小越好,正常策略的范围应该在 0.85~0.98 之间 + # '单笔收益': 25.6, # 将所有品种的单笔汇总之后的平均收益,单位是 BP,即 0.01% + # '交易胜率': 0.3717, # 将所有品种的单笔汇总之后的交易胜率 + # '持仓天数': 3.69, # 将所有品种的单笔汇总之后的平均持仓天数 + # '持仓K线数': 971.66} # 将所有品种的单笔汇总之后的平均持仓 K 线数 + + # ------------------------------------------------------------------------------------ + # 获取指定品种的回测结果 + # ------------------------------------------------------------------------------------ + symbol_res = wb.results[wb.symbols[0]] + print(symbol_res) + + # wb.report(res_path=r"C:\Users\zengb\Desktop\231005\weight_example") + + +def run_weights_by_cs(): + """从持仓权重样例数据中回测""" + from czsc import cross_sectional_strategy + + dfw = pd.read_feather(r"C:\Users\zengb\Downloads\weight_example.feather") + dfw.rename({"weight": "factor"}, axis=1, inplace=True) + # 仅保留 15:00 的数据 + dfw = dfw[dfw["dt"].dt.time == pd.to_datetime("15:00").time()].copy().reset_index(drop=True) + dfw = cross_sectional_strategy(dfw, factor="factor", long=2, short=2, norm=True) + + wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0, n_jobs=1, weight_type="cs") + # wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0002) + dailys = wb.dailys + print(wb.stats) + print(wb.alpha_stats) + print(wb.bench_stats) + + print(wb.long_stats) + print(wb.short_stats) # 计算等权组合的超额 df1 = dailys.groupby("date").agg({"return": "mean", "n1b": "mean"}) @@ -47,7 +100,7 @@ def run_by_weights(): symbol_res = wb.results[wb.symbols[0]] print(symbol_res) - wb.report(res_path=r"C:\Users\zengb\Desktop\231005\weight_example") + # wb.report(res_path=r"C:\Users\zengb\Desktop\231005\weight_example") if __name__ == "__main__": diff --git "a/examples/\346\234\237\350\264\247\345\245\227\345\210\251\345\233\240\345\255\220\346\240\267\344\276\213.py" "b/examples/\346\234\237\350\264\247\345\245\227\345\210\251\345\233\240\345\255\220\346\240\267\344\276\213.py" index b4f3814cb..d17e405ed 100644 --- "a/examples/\346\234\237\350\264\247\345\245\227\345\210\251\345\233\240\345\255\220\346\240\267\344\276\213.py" +++ "b/examples/\346\234\237\350\264\247\345\245\227\345\210\251\345\233\240\345\255\220\346\240\267\344\276\213.py" @@ -67,6 +67,7 @@ def main(): import czsc from czsc.connectors import cooperation as coo + # 构建策略 df1 = coo.get_raw_bars(symbol="DLy9001", freq="日线", sdt="20170101", edt="20221231", raw_bars=False, fq="后复权") df2 = coo.get_raw_bars(symbol="DLp9001", freq="日线", sdt="20170101", edt="20221231", raw_bars=False, fq="后复权") df = pd.concat([df1, df2], axis=0) @@ -76,6 +77,8 @@ def main(): df["price"] = df["close"] dfw = df[["dt", "symbol", "price", "weight"]].copy() + + # 执行回测 st.title("期货套利研究") czsc.show_weight_backtest( dfw, fee_rate=0.0002, show_drawdowns=True, show_yearly_stats=True, show_monthly_return=True diff --git a/requirements.txt b/requirements.txt index cabef51c9..da4e2e270 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +Cython requests>=2.24.0 pyecharts>=1.9.1 tqdm>=4.66.4 @@ -29,4 +30,5 @@ pytz flask scipy requests_toolbelt -pandas-ta \ No newline at end of file +pandas-ta +networkx \ No newline at end of file diff --git a/setup.py b/setup.py index fb250ad0c..69473de35 100644 --- a/setup.py +++ b/setup.py @@ -16,13 +16,13 @@ version=czsc.__version__, author=czsc.__author__, author_email=czsc.__email__, - keywords=["缠论", "技术分析", "A股", "期货", "缠中说禅"], + keywords=["缠论", "技术分析", "A股", "期货", "缠中说禅", "量化", "QUANT", "程序化交易"], description="缠中说禅技术分析工具", long_description=long_description, long_description_content_type="text/markdown", license="Apache Software License", url="https://github.com/waditu/czsc", - packages=find_packages(exclude=["test", "images", "docs", "examples", "hist"]), + packages=find_packages(include=["czsc", "czsc.*"]), include_package_data=True, install_requires=install_requires, package_data={"": ["utils/china_calendar.feather", "utils/minutes_split.feather"]}, diff --git a/test/test_cross_sectional_strategy.py b/test/test_cross_sectional_strategy.py new file mode 100644 index 000000000..143d4fbef --- /dev/null +++ b/test/test_cross_sectional_strategy.py @@ -0,0 +1,68 @@ +# tests/test_cross_sectional_strategy.py +import pytest +import pandas as pd +from czsc.eda import cross_sectional_strategy + + +@pytest.fixture +def sample_data(): + data = { + "dt": [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + "2023-01-08", + "2023-01-09", + "2023-01-10", + ] + * 5, + "symbol": ["A"] * 10 + ["B"] * 10 + ["C"] * 10 + ["D"] * 10 + ["E"] * 10, + "factor": list(range(1, 51)), + } + return pd.DataFrame(data) + + +def test_cross_sectional_strategy_positive(sample_data): + result = cross_sectional_strategy(sample_data, factor="factor", long=0.5, short=0.5, factor_direction="positive") + assert "weight" in result.columns + assert result["weight"].sum() == 0 # Long and short positions should balance out + + +def test_cross_sectional_strategy_negative(sample_data): + result = cross_sectional_strategy(sample_data, factor="factor", long=0.5, short=0.5, factor_direction="negative") + assert "weight" in result.columns + assert result["weight"].sum() == 0 # Long and short positions should balance out + print(result) + + +def test_cross_sectional_strategy_negative_norm(sample_data): + result = cross_sectional_strategy( + sample_data, factor="factor", long=0.5, short=0.5, factor_direction="negative", norm=False + ) + assert "weight" in result.columns + assert result["weight"].sum() == 0 # Long and short positions should balance out + print(result) + + +def test_cross_sectional_strategy_no_positions(sample_data): + result = cross_sectional_strategy(sample_data, factor="factor", long=0, short=0) + assert "weight" in result.columns + assert result["weight"].sum() == 0 # No positions should be taken + + +def test_cross_sectional_strategy_invalid_factor(sample_data): + with pytest.raises(AssertionError): + cross_sectional_strategy(sample_data, factor="invalid_factor", long=0.5, short=0.5) + + +def test_cross_sectional_strategy_invalid_factor_direction(sample_data): + with pytest.raises(AssertionError): + cross_sectional_strategy(sample_data, factor="factor", long=0.5, short=0.5, factor_direction="invalid") + + +if __name__ == "__main__": + pytest.main() diff --git a/test/test_eda.py b/test/test_eda.py index fd6c40542..b257338d4 100644 --- a/test/test_eda.py +++ b/test/test_eda.py @@ -69,5 +69,39 @@ def test_weights_simple_ensemble_only_long(): pd.testing.assert_series_equal(result["weight"], expected) +def test_limit_leverage(): + from czsc.eda import limit_leverage + + data = { + "dt": pd.date_range(start="2023-01-01", periods=10, freq="D"), + "symbol": ["TEST"] * 10, + "weight": [0.1, 0.2, -0.3, 3, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0], + "price": [100 + i for i in range(10)], + } + df = pd.DataFrame(data) + + # Test with leverage = 1.0 + df_result = limit_leverage(df, leverage=1.0, copy=True, window=3, min_periods=2) + assert df_result["weight"].max() <= 1.0 + assert df_result["weight"].min() >= -1.0 + + # Test with leverage = 2.0 + df_result = limit_leverage(df, leverage=2.0, copy=True, window=3, min_periods=2) + assert df_result["weight"].max() <= 2.0 + assert df_result["weight"].min() >= -2.0 + + # Test with different window and min_periods + df_result = limit_leverage(df, leverage=1.0, window=5, min_periods=2, copy=True) + assert df_result["weight"].max() <= 1.0 + assert df_result["weight"].min() >= -1.0 + + df1 = df.copy() + df1.rename(columns={"weight": "weight1"}, inplace=True) + # Test with leverage = 1.0 + df_result = limit_leverage(df1, leverage=1.0, copy=True, window=3, min_periods=2, weight="weight1") + assert df_result["weight1"].max() <= 1.0 + assert df_result["weight1"].min() >= -1.0 + + if __name__ == "__main__": pytest.main()