From 9cc98eb4c3535d8488bd8f10532704b66a1cf35c Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrew@quantopian.com>
Date: Mon, 23 Nov 2015 16:54:20 -0500
Subject: [PATCH] ENH Replace linear cone with bootstrapped non-parametric cone

---
 pyfolio/plotting.py              | 139 ++++++++++++--------------
 pyfolio/tears.py                 |   5 +-
 pyfolio/tests/test_timeseries.py |  28 ++++++
 pyfolio/timeseries.py            | 166 ++++++++++---------------------
 4 files changed, 145 insertions(+), 193 deletions(-)

diff --git a/pyfolio/plotting.py b/pyfolio/plotting.py
index 29a751f0..1d581de1 100644
--- a/pyfolio/plotting.py
+++ b/pyfolio/plotting.py
@@ -521,21 +521,22 @@ def show_perf_stats(returns, factor_returns, live_start_date=None):
     print(perf_stats)
 
 
-def plot_rolling_returns(
-        returns,
-        factor_returns=None,
-        live_start_date=None,
-        cone_std=None,
-        legend_loc='best',
-        volatility_match=False,
-        ax=None, **kwargs):
-    """Plots cumulative rolling returns versus some benchmarks'.
+def plot_rolling_returns(returns,
+                         factor_returns=None,
+                         live_start_date=None,
+                         cone_std=None,
+                         legend_loc='best',
+                         volatility_match=False,
+                         cone_function=timeseries.forecast_cone_bootstrap,
+                         ax=None, **kwargs):
+    """
+    Plots cumulative rolling returns versus some benchmarks'.
 
     Backtest returns are in green, and out-of-sample (live trading)
     returns are in red.
 
-    Additionally, a linear cone plot may be added to the out-of-sample
-    returns region.
+    Additionally, a non-parametric cone plot may be added to the
+    out-of-sample returns region.
 
     Parameters
     ----------
@@ -546,19 +547,26 @@ def plot_rolling_returns(
         Daily noncumulative returns of a risk factor.
          - This is in the same style as returns.
     live_start_date : datetime, optional
-        The point in time when the strategy began live trading, after
-        its backtest period.
+        The date when the strategy began live trading, after
+        its backtest period. This date should be normalized.
     cone_std : float, or tuple, optional
         If float, The standard deviation to use for the cone plots.
         If tuple, Tuple of standard deviation values to use for the cone plots
-         - The cone is a normal distribution with this standard deviation
-             centered around a linear regression.
+         - See timeseries.forecast_cone_bounds for more details.
     legend_loc : matplotlib.loc, optional
         The location of the legend on the plot.
     volatility_match : bool, optional
         Whether to normalize the volatility of the returns to those of the
         benchmark returns. This helps compare strategies with different
         volatilities. Requires passing of benchmark_rets.
+    cone_function : function, optional
+        Function to use when generating forecast probability cone.
+        The function signiture must follow the form:
+        def cone(in_sample_returns (pd.Series),
+                 days_to_project_forward (int),
+                 cone_std= (float, or tuple),
+                 starting_value= (int, or float))
+        See timeseries.forecast_cone_bootstrap for an example.
     ax : matplotlib.Axes, optional
         Axes upon which to plot.
     **kwargs, optional
@@ -570,27 +578,12 @@ def plot_rolling_returns(
         The axes that were plotted on.
 
 """
-    def draw_cone(returns, num_stdev, live_start_date, ax):
-        cone_df = timeseries.cone_rolling(
-            returns,
-            num_stdev=num_stdev,
-            cone_fit_end_date=live_start_date)
-
-        cone_in_sample = cone_df[cone_df.index < live_start_date]
-        cone_out_of_sample = cone_df[cone_df.index > live_start_date]
-        cone_out_of_sample = cone_out_of_sample[
-            cone_out_of_sample.index < returns.index[-1]]
-
-        ax.fill_between(cone_out_of_sample.index,
-                        cone_out_of_sample.sd_down,
-                        cone_out_of_sample.sd_up,
-                        color='steelblue', alpha=0.25)
-
-        return cone_in_sample, cone_out_of_sample
-
     if ax is None:
         ax = plt.gca()
 
+    ax.set_ylabel('Cumulative returns')
+    ax.set_xlabel('')
+
     if volatility_match and factor_returns is None:
         raise ValueError('volatility_match requires passing of'
                          'factor_returns.')
@@ -598,65 +591,55 @@ def draw_cone(returns, num_stdev, live_start_date, ax):
         bmark_vol = factor_returns.loc[returns.index].std()
         returns = (returns / returns.std()) * bmark_vol
 
-    df_cum_rets = timeseries.cum_returns(returns, 1.0)
+    cum_rets = timeseries.cum_returns(returns, 1.0)
 
     y_axis_formatter = FuncFormatter(utils.one_dec_places)
     ax.yaxis.set_major_formatter(FuncFormatter(y_axis_formatter))
 
     if factor_returns is not None:
-        timeseries.cum_returns(factor_returns[df_cum_rets.index], 1.0).plot(
-            lw=2, color='gray', label=factor_returns.name, alpha=0.60,
-            ax=ax, **kwargs)
+        cum_factor_returns = timeseries.cum_returns(
+            factor_returns[cum_rets.index], 1.0)
+        cum_factor_returns.plot(lw=2, color='gray',
+                                label=factor_returns.name, alpha=0.60,
+                                ax=ax, **kwargs)
+
     if live_start_date is not None:
         live_start_date = utils.get_utc_timestamp(live_start_date)
-
-    if (live_start_date is None) or (df_cum_rets.index[-1] <=
-                                     live_start_date):
-        df_cum_rets.plot(lw=3, color='forestgreen', alpha=0.6,
-                         label='Backtest', ax=ax, **kwargs)
+        is_cum_returns = cum_rets.loc[cum_rets.index < live_start_date]
+        oos_cum_returns = cum_rets.loc[cum_rets.index >= live_start_date]
     else:
-        df_cum_rets[:live_start_date].plot(
-            lw=3, color='forestgreen', alpha=0.6,
-            label='Backtest', ax=ax, **kwargs)
-        df_cum_rets[live_start_date:].plot(
-            lw=4, color='red', alpha=0.6,
-            label='Live', ax=ax, **kwargs)
+        is_cum_returns = cum_rets
+        oos_cum_returns = pd.Series([])
+
+    is_cum_returns.plot(lw=3, color='forestgreen', alpha=0.6,
+                        label='Backtest', ax=ax, **kwargs)
+
+    if len(oos_cum_returns) > 0:
+        oos_cum_returns.plot(lw=4, color='red', alpha=0.6,
+                             label='Live', ax=ax, **kwargs)
 
         if cone_std is not None:
-            # check to see if cone_std was passed as a single value and,
-            # if so, just convert to list automatically
-            if isinstance(cone_std, float):
+            if isinstance(cone_std, (float, int)):
                 cone_std = [cone_std]
 
-            for cone_i in cone_std:
-                cone_in_sample, cone_out_of_sample = draw_cone(
-                    returns,
-                    cone_i,
-                    live_start_date,
-                    ax)
-
-            cone_in_sample['line'].plot(
-                ax=ax,
-                ls='--',
-                label='Backtest trend',
-                lw=2,
-                color='forestgreen',
-                alpha=0.7,
-                **kwargs)
-            cone_out_of_sample['line'].plot(
-                ax=ax,
-                ls='--',
-                label='Predicted trend',
-                lw=2,
-                color='red',
-                alpha=0.7,
-                **kwargs)
+            is_returns = returns.loc[returns.index < live_start_date]
+            cone_bounds = cone_function(
+                is_returns,
+                len(oos_cum_returns),
+                cone_std=cone_std,
+                starting_value=is_cum_returns[-1])
+
+            cone_bounds = cone_bounds.set_index(oos_cum_returns.index)
+
+            for std in cone_std:
+                ax.fill_between(cone_bounds.index,
+                                cone_bounds[float(std)],
+                                cone_bounds[float(-std)],
+                                color='steelblue', alpha=0.5)
 
+    if legend_loc is not None:
+        ax.legend(loc=legend_loc)
     ax.axhline(1.0, linestyle='--', color='black', lw=2)
-    ax.set_ylabel('Cumulative returns')
-    ax.set_title('Cumulative Returns')
-    ax.legend(loc=legend_loc)
-    ax.set_xlabel('')
 
     return ax
 
diff --git a/pyfolio/tears.py b/pyfolio/tears.py
index 333fa1e4..32355703 100644
--- a/pyfolio/tears.py
+++ b/pyfolio/tears.py
@@ -120,7 +120,7 @@ def create_full_tear_sheet(returns,
         - See txn.adjust_returns_for_slippage for more details.
     live_start_date : datetime, optional
         The point in time when the strategy began live trading,
-        after its backtest period.
+        after its backtest period. This datetime should be normalized.
     hide_positions : bool, optional
         If True, will not output any symbol names.
     bayesian: boolean, optional
@@ -275,6 +275,8 @@ def create_returns_tear_sheet(returns, live_start_date=None,
         live_start_date=live_start_date,
         cone_std=cone_std,
         ax=ax_rolling_returns)
+    ax_rolling_returns.set_title(
+        'Cumulative Returns')
 
     plotting.plot_rolling_returns(
         returns,
@@ -282,6 +284,7 @@ def create_returns_tear_sheet(returns, live_start_date=None,
         live_start_date=live_start_date,
         cone_std=None,
         volatility_match=True,
+        legend_loc=None,
         ax=ax_rolling_returns_vol_match)
     ax_rolling_returns_vol_match.set_title(
         'Cumulative returns volatility matched to benchmark.')
diff --git a/pyfolio/tests/test_timeseries.py b/pyfolio/tests/test_timeseries.py
index 822e332d..2a486012 100644
--- a/pyfolio/tests/test_timeseries.py
+++ b/pyfolio/tests/test_timeseries.py
@@ -2,6 +2,7 @@
 
 from unittest import TestCase
 from nose_parameterized import parameterized
+from numpy.testing import assert_allclose
 
 import numpy as np
 import pandas as pd
@@ -365,3 +366,30 @@ def test_calc_multifactor(self, returns, factors, expected):
                 returns,
                 factors).values.tolist(),
             expected)
+
+
+class TestCone(TestCase):
+    def test_bootstrap_cone_against_linear_cone_normal_returns(self):
+        random_seed = 100
+        np.random.seed(random_seed)
+        days_forward = 200
+        cone_stdevs = [1, 1.5, 2]
+        mu = .005
+        sigma = .002
+        rets = pd.Series(np.random.normal(mu, sigma, 10000))
+
+        midline = np.cumprod(1 + (rets.mean() * np.ones(days_forward)))
+        stdev = rets.std() * midline * np.sqrt(np.arange(days_forward)+1)
+
+        normal_cone = pd.DataFrame(columns=pd.Float64Index([]))
+        for s in cone_stdevs:
+            normal_cone[s] = midline + s * stdev
+            normal_cone[-s] = midline - s * stdev
+
+        bootstrap_cone = timeseries.forecast_cone_bootstrap(
+            rets, days_forward, cone_stdevs, starting_value=1,
+            random_seed=random_seed, num_samples=10000)
+
+        for col, vals in bootstrap_cone.iteritems():
+            expected = normal_cone[col].values
+            assert_allclose(vals.values, expected, rtol=.005)
diff --git a/pyfolio/timeseries.py b/pyfolio/timeseries.py
index 33313948..e5504e8f 100644
--- a/pyfolio/timeseries.py
+++ b/pyfolio/timeseries.py
@@ -985,129 +985,67 @@ def rolling_sharpe(returns, rolling_sharpe_window):
         * np.sqrt(APPROX_BDAYS_PER_YEAR)
 
 
-def cone_rolling(
-        input_rets,
-        num_stdev=1.0,
-        warm_up_days_pct=0.5,
-        std_scale_factor=APPROX_BDAYS_PER_YEAR,
-        update_std_oos_rolling=False,
-        cone_fit_end_date=None,
-        extend_fit_trend=True,
-        create_future_cone=True):
-    """Computes a rolling cone to place in the cumulative returns
-    plot. See plotting.plot_rolling_returns.
+def forecast_cone_bootstrap(is_returns, num_days, cone_std=[1, 1.5, 2],
+                            starting_value=1, num_samples=1000,
+                            random_seed=None):
     """
+    Determines the upper and lower bounds of an n standard deviation
+    cone of forecasted cumulative returns. Future cumulative mean and
+    standard devation are computed by repeatedly sampling from the
+    in-sample daily returns (i.e. bootstrap). This cone is non-parametric,
+    meaning it does not assume that returns are normally distributed.
 
-    # if specifying 'cone_fit_end_date' please use a pandas compatible format,
-    # e.g. '2015-8-4', 'YYYY-MM-DD'
-
-    warm_up_days = int(warm_up_days_pct * input_rets.size)
-
-    # create initial linear fit from beginning of timeseries thru warm_up_days
-    # or the specified 'cone_fit_end_date'
-    if cone_fit_end_date is None:
-        returns = input_rets[:warm_up_days]
-    else:
-        returns = input_rets[input_rets.index < cone_fit_end_date]
-
-    perf_ts = cum_returns(returns, 1)
-
-    X = list(range(0, perf_ts.size))
-    X = sm.add_constant(X)
-    sm.OLS(perf_ts, list(range(0, len(perf_ts))))
-    line_ols = sm.OLS(perf_ts.values, X).fit()
-    fit_line_ols_coef = line_ols.params[1]
-    fit_line_ols_inter = line_ols.params[0]
-
-    x_points = list(range(0, perf_ts.size))
-    x_points = np.array(x_points) * fit_line_ols_coef + fit_line_ols_inter
+    Parameters
+    ----------
+    is_returns : pd.Series
+        In-sample daily returns of the strategy, noncumulative.
+         - See full explanation in tears.create_full_tear_sheet.
+    num_days : int
+        Number of days to project the probability cone forward.
+    cone_std : int, float, or list of int/float
+        Number of standard devations to use in the boundaries of
+        the cone. If multiple values are passed, cone bounds will
+        be generated for each value.
+    starting_value : int or float
+        Starting value of the out of sample period.
+    num_samples : int
+        Number of samples to draw from the in-sample daily returns.
+        Each sample will be an array with length num_days.
+        A higher number of samples will generate a more accurate
+        bootstrap cone.
+    random_seed : int
+        Seed for the pseudorandom number generator used by the pandas
+        sample method.
 
-    perf_ts_r = pd.DataFrame(perf_ts)
-    perf_ts_r.columns = ['perf']
+    Returns
+    -------
+    pd.DataFrame
+        Contains upper and lower cone boundaries. Column names are
+        strings corresponding to the number of standard devations
+        above (positive) or below (negative) the projected mean
+        cumulative returns.
+    """
 
-    warm_up_std_pct = np.std(perf_ts.pct_change().dropna())
-    std_pct = warm_up_std_pct * np.sqrt(std_scale_factor)
+    samples = np.empty((num_samples, num_days))
+    seed = np.random.RandomState(seed=random_seed)
+    for i in range(num_samples):
+        samples[i, :] = is_returns.sample(num_days, replace=True,
+                                          random_state=seed)
 
-    perf_ts_r['line'] = x_points
-    perf_ts_r['sd_up'] = perf_ts_r['line'] * (1 + num_stdev * std_pct)
-    perf_ts_r['sd_down'] = perf_ts_r['line'] * (1 - num_stdev * std_pct)
+    cum_samples = np.cumprod(1 + samples, axis=1) * starting_value
 
-    std_pct = warm_up_std_pct * np.sqrt(std_scale_factor)
+    cum_mean = cum_samples.mean(axis=0)
+    cum_std = cum_samples.std(axis=0)
 
-    last_backtest_day_index = returns.index[-1]
-    cone_end_rets = input_rets[input_rets.index > last_backtest_day_index]
-    new_cone_day_scale_factor = int(1)
-    oos_intercept_shift = perf_ts_r.perf[-1] - perf_ts_r.line[-1]
+    if isinstance(cone_std, (float, int)):
+        cone_std = [cone_std]
 
-    # make the cone for the out-of-sample/live papertrading period
-    for i in cone_end_rets.index:
-        returns = input_rets[:i]
-        perf_ts = cum_returns(returns, 1)
+    cone_bounds = pd.DataFrame(columns=pd.Float64Index([]))
+    for num_std in cone_std:
+        cone_bounds.loc[:, float(num_std)] = cum_mean + cum_std * num_std
+        cone_bounds.loc[:, float(-num_std)] = cum_mean - cum_std * num_std
 
-        if extend_fit_trend:
-            line_ols_coef = fit_line_ols_coef
-            line_ols_inter = fit_line_ols_inter
-        else:
-            X = list(range(0, perf_ts.size))
-            X = sm.add_constant(X)
-            sm.OLS(perf_ts, list(range(0, len(perf_ts))))
-            line_ols = sm.OLS(perf_ts.values, X).fit()
-            line_ols_coef = line_ols.params[1]
-            line_ols_inter = line_ols.params[0]
-
-        x_points = list(range(0, perf_ts.size))
-        x_points = np.array(x_points) * line_ols_coef + \
-            line_ols_inter + oos_intercept_shift
-
-        temp_line = x_points
-        if update_std_oos_rolling:
-            std_pct = np.sqrt(new_cone_day_scale_factor) * \
-                np.std(perf_ts.pct_change().dropna())
-        else:
-            std_pct = np.sqrt(new_cone_day_scale_factor) * warm_up_std_pct
-
-        temp_sd_up = temp_line * (1 + num_stdev * std_pct)
-        temp_sd_down = temp_line * (1 - num_stdev * std_pct)
-
-        new_daily_cone = pd.DataFrame(index=[i],
-                                      data={'perf': perf_ts[i],
-                                            'line': temp_line[-1],
-                                            'sd_up': temp_sd_up[-1],
-                                            'sd_down': temp_sd_down[-1]})
-
-        perf_ts_r = perf_ts_r.append(new_daily_cone)
-        new_cone_day_scale_factor += 1
-
-    if create_future_cone:
-        extend_ahead_days = APPROX_BDAYS_PER_YEAR
-        future_cone_dates = pd.date_range(
-            cone_end_rets.index[-1], periods=extend_ahead_days, freq='B')
-
-        future_cone_intercept_shift = perf_ts_r.perf[-1] - perf_ts_r.line[-1]
-
-        future_days_scale_factor = np.linspace(
-            1,
-            extend_ahead_days,
-            extend_ahead_days)
-        std_pct = np.sqrt(future_days_scale_factor) * warm_up_std_pct
-
-        x_points = list(range(perf_ts.size, perf_ts.size + extend_ahead_days))
-        x_points = np.array(x_points) * line_ols_coef + line_ols_inter + \
-            oos_intercept_shift + future_cone_intercept_shift
-        temp_line = x_points
-        temp_sd_up = temp_line * (1 + num_stdev * std_pct)
-        temp_sd_down = temp_line * (1 - num_stdev * std_pct)
-
-        future_cone = pd.DataFrame(index=list(map(np.datetime64,
-                                                  future_cone_dates)),
-                                   data={'perf': temp_line,
-                                         'line': temp_line,
-                                         'sd_up': temp_sd_up,
-                                         'sd_down': temp_sd_down})
-
-        perf_ts_r = perf_ts_r.append(future_cone)
-
-    return perf_ts_r
+    return cone_bounds
 
 
 def extract_interesting_date_ranges(returns):