ENH Replace linear cone with bootstrapped non-parametric cone

quantopian · Dec 4, 2015 · 9cc98eb · 9cc98eb
1 parent 19f6503
commit 9cc98eb
Show file tree

Hide file tree

Showing 4 changed files with 145 additions and 193 deletions.
diff --git a/pyfolio/plotting.py b/pyfolio/plotting.py
@@ -521,21 +521,22 @@ def show_perf_stats(returns, factor_returns, live_start_date=None):
     print(perf_stats)
 
 
-def plot_rolling_returns(
-        returns,
-        factor_returns=None,
-        live_start_date=None,
-        cone_std=None,
-        legend_loc='best',
-        volatility_match=False,
-        ax=None, **kwargs):
-    """Plots cumulative rolling returns versus some benchmarks'.
+def plot_rolling_returns(returns,
+                         factor_returns=None,
+                         live_start_date=None,
+                         cone_std=None,
+                         legend_loc='best',
+                         volatility_match=False,
+                         cone_function=timeseries.forecast_cone_bootstrap,
+                         ax=None, **kwargs):
+    """
+    Plots cumulative rolling returns versus some benchmarks'.
 
     Backtest returns are in green, and out-of-sample (live trading)
     returns are in red.
 
-    Additionally, a linear cone plot may be added to the out-of-sample
-    returns region.
+    Additionally, a non-parametric cone plot may be added to the
+    out-of-sample returns region.
 
     Parameters
     ----------
@@ -546,19 +547,26 @@ def plot_rolling_returns(
         Daily noncumulative returns of a risk factor.
          - This is in the same style as returns.
     live_start_date : datetime, optional
-        The point in time when the strategy began live trading, after
-        its backtest period.
+        The date when the strategy began live trading, after
+        its backtest period. This date should be normalized.
     cone_std : float, or tuple, optional
         If float, The standard deviation to use for the cone plots.
         If tuple, Tuple of standard deviation values to use for the cone plots
-         - The cone is a normal distribution with this standard deviation
-             centered around a linear regression.
+         - See timeseries.forecast_cone_bounds for more details.
     legend_loc : matplotlib.loc, optional
         The location of the legend on the plot.
     volatility_match : bool, optional
         Whether to normalize the volatility of the returns to those of the
         benchmark returns. This helps compare strategies with different
         volatilities. Requires passing of benchmark_rets.
+    cone_function : function, optional
+        Function to use when generating forecast probability cone.
+        The function signiture must follow the form:
+        def cone(in_sample_returns (pd.Series),
+                 days_to_project_forward (int),
+                 cone_std= (float, or tuple),
+                 starting_value= (int, or float))
+        See timeseries.forecast_cone_bootstrap for an example.
     ax : matplotlib.Axes, optional
         Axes upon which to plot.
     **kwargs, optional
@@ -570,93 +578,68 @@ def plot_rolling_returns(
         The axes that were plotted on.
 
 """
-    def draw_cone(returns, num_stdev, live_start_date, ax):
-        cone_df = timeseries.cone_rolling(
-            returns,
-            num_stdev=num_stdev,
-            cone_fit_end_date=live_start_date)
-
-        cone_in_sample = cone_df[cone_df.index < live_start_date]
-        cone_out_of_sample = cone_df[cone_df.index > live_start_date]
-        cone_out_of_sample = cone_out_of_sample[
-            cone_out_of_sample.index < returns.index[-1]]
-
-        ax.fill_between(cone_out_of_sample.index,
-                        cone_out_of_sample.sd_down,
-                        cone_out_of_sample.sd_up,
-                        color='steelblue', alpha=0.25)
-
-        return cone_in_sample, cone_out_of_sample
-
     if ax is None:
         ax = plt.gca()
 
+    ax.set_ylabel('Cumulative returns')
+    ax.set_xlabel('')
+
     if volatility_match and factor_returns is None:
         raise ValueError('volatility_match requires passing of'
                          'factor_returns.')
     elif volatility_match and factor_returns is not None:
         bmark_vol = factor_returns.loc[returns.index].std()
         returns = (returns / returns.std()) * bmark_vol
 
-    df_cum_rets = timeseries.cum_returns(returns, 1.0)
+    cum_rets = timeseries.cum_returns(returns, 1.0)
 
     y_axis_formatter = FuncFormatter(utils.one_dec_places)
     ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
 
     if factor_returns is not None:
-        timeseries.cum_returns(factor_returns[df_cum_rets.index], 1.0).plot(
-            lw=2, color='gray', label=factor_returns.name, alpha=0.60,
-            ax=ax, **kwargs)
+        cum_factor_returns = timeseries.cum_returns(
+            factor_returns[cum_rets.index], 1.0)
+        cum_factor_returns.plot(lw=2, color='gray',
+                                label=factor_returns.name, alpha=0.60,
+                                ax=ax, **kwargs)
+
     if live_start_date is not None:
         live_start_date = utils.get_utc_timestamp(live_start_date)
-
-    if (live_start_date is None) or (df_cum_rets.index[-1] <=
-                                     live_start_date):
-        df_cum_rets.plot(lw=3, color='forestgreen', alpha=0.6,
-                         label='Backtest', ax=ax, **kwargs)
+        is_cum_returns = cum_rets.loc[cum_rets.index < live_start_date]
+        oos_cum_returns = cum_rets.loc[cum_rets.index >= live_start_date]
     else:
-        df_cum_rets[:live_start_date].plot(
-            lw=3, color='forestgreen', alpha=0.6,
-            label='Backtest', ax=ax, **kwargs)
-        df_cum_rets[live_start_date:].plot(
-            lw=4, color='red', alpha=0.6,
-            label='Live', ax=ax, **kwargs)
+        is_cum_returns = cum_rets
+        oos_cum_returns = pd.Series([])
+
+    is_cum_returns.plot(lw=3, color='forestgreen', alpha=0.6,
+                        label='Backtest', ax=ax, **kwargs)
+
+    if len(oos_cum_returns) > 0:
+        oos_cum_returns.plot(lw=4, color='red', alpha=0.6,
+                             label='Live', ax=ax, **kwargs)
 
         if cone_std is not None:
-            # check to see if cone_std was passed as a single value and,
-            # if so, just convert to list automatically
-            if isinstance(cone_std, float):
+            if isinstance(cone_std, (float, int)):
                 cone_std = [cone_std]
 
-            for cone_i in cone_std:
-                cone_in_sample, cone_out_of_sample = draw_cone(
-                    returns,
-                    cone_i,
-                    live_start_date,
-                    ax)
-
-            cone_in_sample['line'].plot(
-                ax=ax,
-                ls='--',
-                label='Backtest trend',
-                lw=2,
-                color='forestgreen',
-                alpha=0.7,
-                **kwargs)
-            cone_out_of_sample['line'].plot(
-                ax=ax,
-                ls='--',
-                label='Predicted trend',
-                lw=2,
-                color='red',
-                alpha=0.7,
-                **kwargs)
+            is_returns = returns.loc[returns.index < live_start_date]
+            cone_bounds = cone_function(
+                is_returns,
+                len(oos_cum_returns),
+                cone_std=cone_std,
+                starting_value=is_cum_returns[-1])
+
+            cone_bounds = cone_bounds.set_index(oos_cum_returns.index)
+
+            for std in cone_std:
+                ax.fill_between(cone_bounds.index,
+                                cone_bounds[float(std)],
+                                cone_bounds[float(-std)],
+                                color='steelblue', alpha=0.5)
 
+    if legend_loc is not None:
+        ax.legend(loc=legend_loc)
     ax.axhline(1.0, linestyle='--', color='black', lw=2)
-    ax.set_ylabel('Cumulative returns')
-    ax.set_title('Cumulative Returns')
-    ax.legend(loc=legend_loc)
-    ax.set_xlabel('')
 
     return ax
 

diff --git a/pyfolio/tears.py b/pyfolio/tears.py
@@ -120,7 +120,7 @@ def create_full_tear_sheet(returns,
         - See txn.adjust_returns_for_slippage for more details.
     live_start_date : datetime, optional
         The point in time when the strategy began live trading,
-        after its backtest period.
+        after its backtest period. This datetime should be normalized.
     hide_positions : bool, optional
         If True, will not output any symbol names.
     bayesian: boolean, optional
@@ -275,13 +275,16 @@ def create_returns_tear_sheet(returns, live_start_date=None,
         live_start_date=live_start_date,
         cone_std=cone_std,
         ax=ax_rolling_returns)
+    ax_rolling_returns.set_title(
+        'Cumulative Returns')
 
     plotting.plot_rolling_returns(
         returns,
         factor_returns=benchmark_rets,
         live_start_date=live_start_date,
         cone_std=None,
         volatility_match=True,
+        legend_loc=None,
         ax=ax_rolling_returns_vol_match)
     ax_rolling_returns_vol_match.set_title(
         'Cumulative returns volatility matched to benchmark.')

diff --git a/pyfolio/tests/test_timeseries.py b/pyfolio/tests/test_timeseries.py
@@ -2,6 +2,7 @@
 
 from unittest import TestCase
 from nose_parameterized import parameterized
+from numpy.testing import assert_allclose
 
 import numpy as np
 import pandas as pd
@@ -365,3 +366,30 @@ def test_calc_multifactor(self, returns, factors, expected):
                 returns,
                 factors).values.tolist(),
             expected)
+
+
+class TestCone(TestCase):
+    def test_bootstrap_cone_against_linear_cone_normal_returns(self):
+        random_seed = 100
+        np.random.seed(random_seed)
+        days_forward = 200
+        cone_stdevs = [1, 1.5, 2]
+        mu = .005
+        sigma = .002
+        rets = pd.Series(np.random.normal(mu, sigma, 10000))
+
+        midline = np.cumprod(1 + (rets.mean() * np.ones(days_forward)))
+        stdev = rets.std() * midline * np.sqrt(np.arange(days_forward)+1)
+
+        normal_cone = pd.DataFrame(columns=pd.Float64Index([]))
+        for s in cone_stdevs:
+            normal_cone[s] = midline + s * stdev
+            normal_cone[-s] = midline - s * stdev
+
+        bootstrap_cone = timeseries.forecast_cone_bootstrap(
+            rets, days_forward, cone_stdevs, starting_value=1,
+            random_seed=random_seed, num_samples=10000)
+
+        for col, vals in bootstrap_cone.iteritems():
+            expected = normal_cone[col].values
+            assert_allclose(vals.values, expected, rtol=.005)