From 9cc98eb4c3535d8488bd8f10532704b66a1cf35c Mon Sep 17 00:00:00 2001 From: Andrew Campbell Date: Mon, 23 Nov 2015 16:54:20 -0500 Subject: [PATCH] ENH Replace linear cone with bootstrapped non-parametric cone --- pyfolio/plotting.py | 139 ++++++++++++-------------- pyfolio/tears.py | 5 +- pyfolio/tests/test_timeseries.py | 28 ++++++ pyfolio/timeseries.py | 166 ++++++++++--------------------- 4 files changed, 145 insertions(+), 193 deletions(-) diff --git a/pyfolio/plotting.py b/pyfolio/plotting.py index 29a751f0..1d581de1 100644 --- a/pyfolio/plotting.py +++ b/pyfolio/plotting.py @@ -521,21 +521,22 @@ def show_perf_stats(returns, factor_returns, live_start_date=None): print(perf_stats) -def plot_rolling_returns( - returns, - factor_returns=None, - live_start_date=None, - cone_std=None, - legend_loc='best', - volatility_match=False, - ax=None, **kwargs): - """Plots cumulative rolling returns versus some benchmarks'. +def plot_rolling_returns(returns, + factor_returns=None, + live_start_date=None, + cone_std=None, + legend_loc='best', + volatility_match=False, + cone_function=timeseries.forecast_cone_bootstrap, + ax=None, **kwargs): + """ + Plots cumulative rolling returns versus some benchmarks'. Backtest returns are in green, and out-of-sample (live trading) returns are in red. - Additionally, a linear cone plot may be added to the out-of-sample - returns region. + Additionally, a non-parametric cone plot may be added to the + out-of-sample returns region. Parameters ---------- @@ -546,19 +547,26 @@ def plot_rolling_returns( Daily noncumulative returns of a risk factor. - This is in the same style as returns. live_start_date : datetime, optional - The point in time when the strategy began live trading, after - its backtest period. + The date when the strategy began live trading, after + its backtest period. This date should be normalized. cone_std : float, or tuple, optional If float, The standard deviation to use for the cone plots. If tuple, Tuple of standard deviation values to use for the cone plots - - The cone is a normal distribution with this standard deviation - centered around a linear regression. + - See timeseries.forecast_cone_bounds for more details. legend_loc : matplotlib.loc, optional The location of the legend on the plot. volatility_match : bool, optional Whether to normalize the volatility of the returns to those of the benchmark returns. This helps compare strategies with different volatilities. Requires passing of benchmark_rets. + cone_function : function, optional + Function to use when generating forecast probability cone. + The function signiture must follow the form: + def cone(in_sample_returns (pd.Series), + days_to_project_forward (int), + cone_std= (float, or tuple), + starting_value= (int, or float)) + See timeseries.forecast_cone_bootstrap for an example. ax : matplotlib.Axes, optional Axes upon which to plot. **kwargs, optional @@ -570,27 +578,12 @@ def plot_rolling_returns( The axes that were plotted on. """ - def draw_cone(returns, num_stdev, live_start_date, ax): - cone_df = timeseries.cone_rolling( - returns, - num_stdev=num_stdev, - cone_fit_end_date=live_start_date) - - cone_in_sample = cone_df[cone_df.index < live_start_date] - cone_out_of_sample = cone_df[cone_df.index > live_start_date] - cone_out_of_sample = cone_out_of_sample[ - cone_out_of_sample.index < returns.index[-1]] - - ax.fill_between(cone_out_of_sample.index, - cone_out_of_sample.sd_down, - cone_out_of_sample.sd_up, - color='steelblue', alpha=0.25) - - return cone_in_sample, cone_out_of_sample - if ax is None: ax = plt.gca() + ax.set_ylabel('Cumulative returns') + ax.set_xlabel('') + if volatility_match and factor_returns is None: raise ValueError('volatility_match requires passing of' 'factor_returns.') @@ -598,65 +591,55 @@ def draw_cone(returns, num_stdev, live_start_date, ax): bmark_vol = factor_returns.loc[returns.index].std() returns = (returns / returns.std()) * bmark_vol - df_cum_rets = timeseries.cum_returns(returns, 1.0) + cum_rets = timeseries.cum_returns(returns, 1.0) y_axis_formatter = FuncFormatter(utils.one_dec_places) ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter)) if factor_returns is not None: - timeseries.cum_returns(factor_returns[df_cum_rets.index], 1.0).plot( - lw=2, color='gray', label=factor_returns.name, alpha=0.60, - ax=ax, **kwargs) + cum_factor_returns = timeseries.cum_returns( + factor_returns[cum_rets.index], 1.0) + cum_factor_returns.plot(lw=2, color='gray', + label=factor_returns.name, alpha=0.60, + ax=ax, **kwargs) + if live_start_date is not None: live_start_date = utils.get_utc_timestamp(live_start_date) - - if (live_start_date is None) or (df_cum_rets.index[-1] <= - live_start_date): - df_cum_rets.plot(lw=3, color='forestgreen', alpha=0.6, - label='Backtest', ax=ax, **kwargs) + is_cum_returns = cum_rets.loc[cum_rets.index < live_start_date] + oos_cum_returns = cum_rets.loc[cum_rets.index >= live_start_date] else: - df_cum_rets[:live_start_date].plot( - lw=3, color='forestgreen', alpha=0.6, - label='Backtest', ax=ax, **kwargs) - df_cum_rets[live_start_date:].plot( - lw=4, color='red', alpha=0.6, - label='Live', ax=ax, **kwargs) + is_cum_returns = cum_rets + oos_cum_returns = pd.Series([]) + + is_cum_returns.plot(lw=3, color='forestgreen', alpha=0.6, + label='Backtest', ax=ax, **kwargs) + + if len(oos_cum_returns) > 0: + oos_cum_returns.plot(lw=4, color='red', alpha=0.6, + label='Live', ax=ax, **kwargs) if cone_std is not None: - # check to see if cone_std was passed as a single value and, - # if so, just convert to list automatically - if isinstance(cone_std, float): + if isinstance(cone_std, (float, int)): cone_std = [cone_std] - for cone_i in cone_std: - cone_in_sample, cone_out_of_sample = draw_cone( - returns, - cone_i, - live_start_date, - ax) - - cone_in_sample['line'].plot( - ax=ax, - ls='--', - label='Backtest trend', - lw=2, - color='forestgreen', - alpha=0.7, - **kwargs) - cone_out_of_sample['line'].plot( - ax=ax, - ls='--', - label='Predicted trend', - lw=2, - color='red', - alpha=0.7, - **kwargs) + is_returns = returns.loc[returns.index < live_start_date] + cone_bounds = cone_function( + is_returns, + len(oos_cum_returns), + cone_std=cone_std, + starting_value=is_cum_returns[-1]) + + cone_bounds = cone_bounds.set_index(oos_cum_returns.index) + + for std in cone_std: + ax.fill_between(cone_bounds.index, + cone_bounds[float(std)], + cone_bounds[float(-std)], + color='steelblue', alpha=0.5) + if legend_loc is not None: + ax.legend(loc=legend_loc) ax.axhline(1.0, linestyle='--', color='black', lw=2) - ax.set_ylabel('Cumulative returns') - ax.set_title('Cumulative Returns') - ax.legend(loc=legend_loc) - ax.set_xlabel('') return ax diff --git a/pyfolio/tears.py b/pyfolio/tears.py index 333fa1e4..32355703 100644 --- a/pyfolio/tears.py +++ b/pyfolio/tears.py @@ -120,7 +120,7 @@ def create_full_tear_sheet(returns, - See txn.adjust_returns_for_slippage for more details. live_start_date : datetime, optional The point in time when the strategy began live trading, - after its backtest period. + after its backtest period. This datetime should be normalized. hide_positions : bool, optional If True, will not output any symbol names. bayesian: boolean, optional @@ -275,6 +275,8 @@ def create_returns_tear_sheet(returns, live_start_date=None, live_start_date=live_start_date, cone_std=cone_std, ax=ax_rolling_returns) + ax_rolling_returns.set_title( + 'Cumulative Returns') plotting.plot_rolling_returns( returns, @@ -282,6 +284,7 @@ def create_returns_tear_sheet(returns, live_start_date=None, live_start_date=live_start_date, cone_std=None, volatility_match=True, + legend_loc=None, ax=ax_rolling_returns_vol_match) ax_rolling_returns_vol_match.set_title( 'Cumulative returns volatility matched to benchmark.') diff --git a/pyfolio/tests/test_timeseries.py b/pyfolio/tests/test_timeseries.py index 822e332d..2a486012 100644 --- a/pyfolio/tests/test_timeseries.py +++ b/pyfolio/tests/test_timeseries.py @@ -2,6 +2,7 @@ from unittest import TestCase from nose_parameterized import parameterized +from numpy.testing import assert_allclose import numpy as np import pandas as pd @@ -365,3 +366,30 @@ def test_calc_multifactor(self, returns, factors, expected): returns, factors).values.tolist(), expected) + + +class TestCone(TestCase): + def test_bootstrap_cone_against_linear_cone_normal_returns(self): + random_seed = 100 + np.random.seed(random_seed) + days_forward = 200 + cone_stdevs = [1, 1.5, 2] + mu = .005 + sigma = .002 + rets = pd.Series(np.random.normal(mu, sigma, 10000)) + + midline = np.cumprod(1 + (rets.mean() * np.ones(days_forward))) + stdev = rets.std() * midline * np.sqrt(np.arange(days_forward)+1) + + normal_cone = pd.DataFrame(columns=pd.Float64Index([])) + for s in cone_stdevs: + normal_cone[s] = midline + s * stdev + normal_cone[-s] = midline - s * stdev + + bootstrap_cone = timeseries.forecast_cone_bootstrap( + rets, days_forward, cone_stdevs, starting_value=1, + random_seed=random_seed, num_samples=10000) + + for col, vals in bootstrap_cone.iteritems(): + expected = normal_cone[col].values + assert_allclose(vals.values, expected, rtol=.005) diff --git a/pyfolio/timeseries.py b/pyfolio/timeseries.py index 33313948..e5504e8f 100644 --- a/pyfolio/timeseries.py +++ b/pyfolio/timeseries.py @@ -985,129 +985,67 @@ def rolling_sharpe(returns, rolling_sharpe_window): * np.sqrt(APPROX_BDAYS_PER_YEAR) -def cone_rolling( - input_rets, - num_stdev=1.0, - warm_up_days_pct=0.5, - std_scale_factor=APPROX_BDAYS_PER_YEAR, - update_std_oos_rolling=False, - cone_fit_end_date=None, - extend_fit_trend=True, - create_future_cone=True): - """Computes a rolling cone to place in the cumulative returns - plot. See plotting.plot_rolling_returns. +def forecast_cone_bootstrap(is_returns, num_days, cone_std=[1, 1.5, 2], + starting_value=1, num_samples=1000, + random_seed=None): """ + Determines the upper and lower bounds of an n standard deviation + cone of forecasted cumulative returns. Future cumulative mean and + standard devation are computed by repeatedly sampling from the + in-sample daily returns (i.e. bootstrap). This cone is non-parametric, + meaning it does not assume that returns are normally distributed. - # if specifying 'cone_fit_end_date' please use a pandas compatible format, - # e.g. '2015-8-4', 'YYYY-MM-DD' - - warm_up_days = int(warm_up_days_pct * input_rets.size) - - # create initial linear fit from beginning of timeseries thru warm_up_days - # or the specified 'cone_fit_end_date' - if cone_fit_end_date is None: - returns = input_rets[:warm_up_days] - else: - returns = input_rets[input_rets.index < cone_fit_end_date] - - perf_ts = cum_returns(returns, 1) - - X = list(range(0, perf_ts.size)) - X = sm.add_constant(X) - sm.OLS(perf_ts, list(range(0, len(perf_ts)))) - line_ols = sm.OLS(perf_ts.values, X).fit() - fit_line_ols_coef = line_ols.params[1] - fit_line_ols_inter = line_ols.params[0] - - x_points = list(range(0, perf_ts.size)) - x_points = np.array(x_points) * fit_line_ols_coef + fit_line_ols_inter + Parameters + ---------- + is_returns : pd.Series + In-sample daily returns of the strategy, noncumulative. + - See full explanation in tears.create_full_tear_sheet. + num_days : int + Number of days to project the probability cone forward. + cone_std : int, float, or list of int/float + Number of standard devations to use in the boundaries of + the cone. If multiple values are passed, cone bounds will + be generated for each value. + starting_value : int or float + Starting value of the out of sample period. + num_samples : int + Number of samples to draw from the in-sample daily returns. + Each sample will be an array with length num_days. + A higher number of samples will generate a more accurate + bootstrap cone. + random_seed : int + Seed for the pseudorandom number generator used by the pandas + sample method. - perf_ts_r = pd.DataFrame(perf_ts) - perf_ts_r.columns = ['perf'] + Returns + ------- + pd.DataFrame + Contains upper and lower cone boundaries. Column names are + strings corresponding to the number of standard devations + above (positive) or below (negative) the projected mean + cumulative returns. + """ - warm_up_std_pct = np.std(perf_ts.pct_change().dropna()) - std_pct = warm_up_std_pct * np.sqrt(std_scale_factor) + samples = np.empty((num_samples, num_days)) + seed = np.random.RandomState(seed=random_seed) + for i in range(num_samples): + samples[i, :] = is_returns.sample(num_days, replace=True, + random_state=seed) - perf_ts_r['line'] = x_points - perf_ts_r['sd_up'] = perf_ts_r['line'] * (1 + num_stdev * std_pct) - perf_ts_r['sd_down'] = perf_ts_r['line'] * (1 - num_stdev * std_pct) + cum_samples = np.cumprod(1 + samples, axis=1) * starting_value - std_pct = warm_up_std_pct * np.sqrt(std_scale_factor) + cum_mean = cum_samples.mean(axis=0) + cum_std = cum_samples.std(axis=0) - last_backtest_day_index = returns.index[-1] - cone_end_rets = input_rets[input_rets.index > last_backtest_day_index] - new_cone_day_scale_factor = int(1) - oos_intercept_shift = perf_ts_r.perf[-1] - perf_ts_r.line[-1] + if isinstance(cone_std, (float, int)): + cone_std = [cone_std] - # make the cone for the out-of-sample/live papertrading period - for i in cone_end_rets.index: - returns = input_rets[:i] - perf_ts = cum_returns(returns, 1) + cone_bounds = pd.DataFrame(columns=pd.Float64Index([])) + for num_std in cone_std: + cone_bounds.loc[:, float(num_std)] = cum_mean + cum_std * num_std + cone_bounds.loc[:, float(-num_std)] = cum_mean - cum_std * num_std - if extend_fit_trend: - line_ols_coef = fit_line_ols_coef - line_ols_inter = fit_line_ols_inter - else: - X = list(range(0, perf_ts.size)) - X = sm.add_constant(X) - sm.OLS(perf_ts, list(range(0, len(perf_ts)))) - line_ols = sm.OLS(perf_ts.values, X).fit() - line_ols_coef = line_ols.params[1] - line_ols_inter = line_ols.params[0] - - x_points = list(range(0, perf_ts.size)) - x_points = np.array(x_points) * line_ols_coef + \ - line_ols_inter + oos_intercept_shift - - temp_line = x_points - if update_std_oos_rolling: - std_pct = np.sqrt(new_cone_day_scale_factor) * \ - np.std(perf_ts.pct_change().dropna()) - else: - std_pct = np.sqrt(new_cone_day_scale_factor) * warm_up_std_pct - - temp_sd_up = temp_line * (1 + num_stdev * std_pct) - temp_sd_down = temp_line * (1 - num_stdev * std_pct) - - new_daily_cone = pd.DataFrame(index=[i], - data={'perf': perf_ts[i], - 'line': temp_line[-1], - 'sd_up': temp_sd_up[-1], - 'sd_down': temp_sd_down[-1]}) - - perf_ts_r = perf_ts_r.append(new_daily_cone) - new_cone_day_scale_factor += 1 - - if create_future_cone: - extend_ahead_days = APPROX_BDAYS_PER_YEAR - future_cone_dates = pd.date_range( - cone_end_rets.index[-1], periods=extend_ahead_days, freq='B') - - future_cone_intercept_shift = perf_ts_r.perf[-1] - perf_ts_r.line[-1] - - future_days_scale_factor = np.linspace( - 1, - extend_ahead_days, - extend_ahead_days) - std_pct = np.sqrt(future_days_scale_factor) * warm_up_std_pct - - x_points = list(range(perf_ts.size, perf_ts.size + extend_ahead_days)) - x_points = np.array(x_points) * line_ols_coef + line_ols_inter + \ - oos_intercept_shift + future_cone_intercept_shift - temp_line = x_points - temp_sd_up = temp_line * (1 + num_stdev * std_pct) - temp_sd_down = temp_line * (1 - num_stdev * std_pct) - - future_cone = pd.DataFrame(index=list(map(np.datetime64, - future_cone_dates)), - data={'perf': temp_line, - 'line': temp_line, - 'sd_up': temp_sd_up, - 'sd_down': temp_sd_down}) - - perf_ts_r = perf_ts_r.append(future_cone) - - return perf_ts_r + return cone_bounds def extract_interesting_date_ranges(returns):