diff --git a/backtesting/_stats.py b/backtesting/_stats.py new file mode 100644 index 00000000..8435605c --- /dev/null +++ b/backtesting/_stats.py @@ -0,0 +1,153 @@ +from typing import List, TYPE_CHECKING, Union + +import numpy as np +import pandas as pd + +from ._util import _data_period + +if TYPE_CHECKING: + from .backtesting import Strategy, Trade + + +def compute_drawdown_duration_peaks(dd: pd.Series): + iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1]) + iloc = pd.Series(iloc, index=dd.index[iloc]) + df = iloc.to_frame('iloc').assign(prev=iloc.shift()) + df = df[df['iloc'] > df['prev'] + 1].astype(int) + + # If no drawdown since no trade, avoid below for pandas sake and return nan series + if not len(df): + return (dd.replace(0, np.nan),) * 2 + + df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__) + df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1) + df = df.reindex(dd.index) + return df['duration'], df['peak_dd'] + + +def geometric_mean(returns: pd.Series) -> float: + returns = returns.fillna(0) + 1 + if np.any(returns <= 0): + return 0 + return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1 + + +def compute_stats( + trades: Union[List['Trade'], pd.DataFrame], + equity: np.ndarray, + ohlc_data: pd.DataFrame, + strategy_instance: 'Strategy', + risk_free_rate: float = 0, +) -> pd.Series: + assert -1 < risk_free_rate < 1 + + index = ohlc_data.index + dd = 1 - equity / np.maximum.accumulate(equity) + dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index)) + + equity_df = pd.DataFrame({ + 'Equity': equity, + 'DrawdownPct': dd, + 'DrawdownDuration': dd_dur}, + index=index) + + if isinstance(trades, pd.DataFrame): + trades_df = trades + else: + # Came straight from Backtest.run() + trades_df = pd.DataFrame({ + 'Size': [t.size for t in trades], + 'EntryBar': [t.entry_bar for t in trades], + 'ExitBar': [t.exit_bar for t in trades], + 'EntryPrice': [t.entry_price for t in trades], + 'ExitPrice': [t.exit_price for t in trades], + 'PnL': [t.pl for t in trades], + 'ReturnPct': [t.pl_pct for t in trades], + 'EntryTime': [t.entry_time for t in trades], + 'ExitTime': [t.exit_time for t in trades], + }) + trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime'] + del trades + + pl = trades_df['PnL'] + returns = trades_df['ReturnPct'] + durations = trades_df['Duration'] + + def _round_timedelta(value, _period=_data_period(index)): + if not isinstance(value, pd.Timedelta): + return value + resolution = getattr(_period, 'resolution_string', None) or _period.resolution + return value.ceil(resolution) + + s = pd.Series(dtype=object) + s.loc['Start'] = index[0] + s.loc['End'] = index[-1] + s.loc['Duration'] = s.End - s.Start + + have_position = np.repeat(0, len(index)) + for t in trades_df.itertuples(index=False): + have_position[t.EntryBar:t.ExitBar + 1] = 1 + + s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time + s.loc['Equity Final [$]'] = equity[-1] + s.loc['Equity Peak [$]'] = equity.max() + s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100 + c = ohlc_data.Close.values + s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return + + gmean_day_return: float = 0 + day_returns = np.array(np.nan) + annual_trading_days = np.nan + if isinstance(index, pd.DatetimeIndex): + day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change() + gmean_day_return = geometric_mean(day_returns) + annual_trading_days = float( + 365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else + 252) + + # Annualized return and risk metrics are computed based on the (mostly correct) + # assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517 + # Our annualized return matches `empyrical.annual_return(day_returns)` whereas + # our risk doesn't; they use the simpler approach below. + annualized_return = (1 + gmean_day_return)**annual_trading_days - 1 + s.loc['Return (Ann.) [%]'] = annualized_return * 100 + s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501 + # s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100 + # s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100 + + # Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return + # and simple standard deviation + s.loc['Sharpe Ratio'] = np.clip((s.loc['Return (Ann.) [%]'] - risk_free_rate) / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501 + # Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return + s.loc['Sortino Ratio'] = np.clip((annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501 + max_dd = -np.nan_to_num(dd.max()) + s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf) + s.loc['Max. Drawdown [%]'] = max_dd * 100 + s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100 + s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max()) + s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean()) + s.loc['# Trades'] = n_trades = len(trades_df) + s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501 + s.loc['Best Trade [%]'] = returns.max() * 100 + s.loc['Worst Trade [%]'] = returns.min() * 100 + mean_return = geometric_mean(returns) + s.loc['Avg. Trade [%]'] = mean_return * 100 + s.loc['Max. Trade Duration'] = _round_timedelta(durations.max()) + s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean()) + s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501 + s.loc['Expectancy [%]'] = returns.mean() * 100 + s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan) + + s.loc['_strategy'] = strategy_instance + s.loc['_equity_curve'] = equity_df + s.loc['_trades'] = trades_df + + s = _Stats(s) + return s + + +class _Stats(pd.Series): + def __repr__(self): + # Prevent expansion due to _equity and _trades dfs + with pd.option_context('max_colwidth', 20): + return super().__repr__() diff --git a/backtesting/backtesting.py b/backtesting/backtesting.py index d4535f44..9c820596 100644 --- a/backtesting/backtesting.py +++ b/backtesting/backtesting.py @@ -29,7 +29,8 @@ def _tqdm(seq, **_): return seq from ._plotting import plot -from ._util import _as_str, _Indicator, _Data, _data_period, try_ +from ._stats import compute_stats +from ._util import _as_str, _Indicator, _Data, try_ __pdoc__ = { 'Strategy.__init__': False, @@ -1086,7 +1087,7 @@ def __init__(self, exclusive_orders=exclusive_orders, index=data.index, ) self._strategy = strategy - self._results = None + self._results: Optional[pd.Series] = None def run(self, **kwargs) -> pd.Series: """ @@ -1177,7 +1178,15 @@ def run(self, **kwargs) -> pd.Series: # for future `indicator._opts['data'].index` calls to work data._set_length(len(self._data)) - self._results = self._compute_stats(broker, strategy) + equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values + self._results = compute_stats( + trades=broker.closed_trades, + equity=equity, + ohlc_data=self._data, + risk_free_rate=0.0, + strategy_instance=strategy, + ) + return self._results def optimize(self, *, @@ -1485,134 +1494,6 @@ def _mp_task(backtest_uuid, batch_index): _mp_backtests: Dict[float, Tuple['Backtest', List, Callable]] = {} - @staticmethod - def _compute_drawdown_duration_peaks(dd: pd.Series): - iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1]) - iloc = pd.Series(iloc, index=dd.index[iloc]) - df = iloc.to_frame('iloc').assign(prev=iloc.shift()) - df = df[df['iloc'] > df['prev'] + 1].astype(int) - # If no drawdown since no trade, avoid below for pandas sake and return nan series - if not len(df): - return (dd.replace(0, np.nan),) * 2 - df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__) - df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1) - df = df.reindex(dd.index) - return df['duration'], df['peak_dd'] - - def _compute_stats(self, broker: _Broker, strategy: Strategy) -> pd.Series: - data = self._data - index = data.index - - equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values - dd = 1 - equity / np.maximum.accumulate(equity) - dd_dur, dd_peaks = self._compute_drawdown_duration_peaks(pd.Series(dd, index=index)) - - equity_df = pd.DataFrame({ - 'Equity': equity, - 'DrawdownPct': dd, - 'DrawdownDuration': dd_dur}, - index=index) - - trades = broker.closed_trades - trades_df = pd.DataFrame({ - 'Size': [t.size for t in trades], - 'EntryBar': [t.entry_bar for t in trades], - 'ExitBar': [t.exit_bar for t in trades], - 'EntryPrice': [t.entry_price for t in trades], - 'ExitPrice': [t.exit_price for t in trades], - 'PnL': [t.pl for t in trades], - 'ReturnPct': [t.pl_pct for t in trades], - 'EntryTime': [t.entry_time for t in trades], - 'ExitTime': [t.exit_time for t in trades], - }) - trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime'] - - pl = trades_df['PnL'] - returns = trades_df['ReturnPct'] - durations = trades_df['Duration'] - - def _round_timedelta(value, _period=_data_period(index)): - if not isinstance(value, pd.Timedelta): - return value - resolution = getattr(_period, 'resolution_string', None) or _period.resolution - return value.ceil(resolution) - - s = pd.Series(dtype=object) - s.loc['Start'] = index[0] - s.loc['End'] = index[-1] - s.loc['Duration'] = s.End - s.Start - - have_position = np.repeat(0, len(index)) - for t in trades: - have_position[t.entry_bar:t.exit_bar + 1] = 1 # type: ignore - - s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time - s.loc['Equity Final [$]'] = equity[-1] - s.loc['Equity Peak [$]'] = equity.max() - s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100 - c = data.Close.values - s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return - - def geometric_mean(returns): - returns = returns.fillna(0) + 1 - return (0 if np.any(returns <= 0) else - np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1) - - day_returns = gmean_day_return = np.array(np.nan) - annual_trading_days = np.nan - if isinstance(index, pd.DatetimeIndex): - day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change() - gmean_day_return = geometric_mean(day_returns) - annual_trading_days = float( - 365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else - 252) - - # Annualized return and risk metrics are computed based on the (mostly correct) - # assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517 - # Our annualized return matches `empyrical.annual_return(day_returns)` whereas - # our risk doesn't; they use the simpler approach below. - annualized_return = (1 + gmean_day_return)**annual_trading_days - 1 - s.loc['Return (Ann.) [%]'] = annualized_return * 100 - s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501 - # s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100 - # s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100 - - # Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return - # and simple standard deviation - s.loc['Sharpe Ratio'] = np.clip(s.loc['Return (Ann.) [%]'] / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501 - # Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return - s.loc['Sortino Ratio'] = np.clip(annualized_return / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501 - max_dd = -np.nan_to_num(dd.max()) - s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf) - s.loc['Max. Drawdown [%]'] = max_dd * 100 - s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100 - s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max()) - s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean()) - s.loc['# Trades'] = n_trades = len(trades) - s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501 - s.loc['Best Trade [%]'] = returns.max() * 100 - s.loc['Worst Trade [%]'] = returns.min() * 100 - mean_return = geometric_mean(returns) - s.loc['Avg. Trade [%]'] = mean_return * 100 - s.loc['Max. Trade Duration'] = _round_timedelta(durations.max()) - s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean()) - s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501 - s.loc['Expectancy [%]'] = returns.mean() * 100 - s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan) - - s.loc['_strategy'] = strategy - s.loc['_equity_curve'] = equity_df - s.loc['_trades'] = trades_df - - s = Backtest._Stats(s) - return s - - class _Stats(pd.Series): - def __repr__(self): - # Prevent expansion due to _equity and _trades dfs - with pd.option_context('max_colwidth', 20): - return super().__repr__() - def plot(self, *, results: pd.Series = None, filename=None, plot_width=None, plot_equity=True, plot_return=False, plot_pl=True, plot_volume=True, plot_drawdown=False, diff --git a/backtesting/lib.py b/backtesting/lib.py index fafbae7a..09f5ebcf 100644 --- a/backtesting/lib.py +++ b/backtesting/lib.py @@ -22,6 +22,7 @@ from .backtesting import Strategy from ._plotting import plot_heatmaps as _plot_heatmaps +from ._stats import compute_stats as _compute_stats from ._util import _Array, _as_str __pdoc__ = {} @@ -164,6 +165,38 @@ def quantile(series: Sequence, quantile: Union[None, float] = None): return np.nanpercentile(series, quantile * 100) +def compute_stats( + *, + stats: pd.Series, + data: pd.DataFrame, + trades: pd.DataFrame = None, + risk_free_rate: float = 0.) -> pd.Series: + """ + (Re-)compute strategy performance metrics. + + `stats` is the statistics series as returned by `Backtest.run()`. + `data` is OHLC data as passed to the `Backtest` the `stats` were obtained in. + `trades` can be a dataframe subset of `stats._trades` (e.g. only long trades). + You can also tune `risk_free_rate`, used in calculation of Sharpe and Sortino ratios. + + >>> stats = Backtest(GOOG, MyStrategy).run() + >>> only_long_trades = stats._trades[stats._trades.Size > 0] + >>> long_stats = compute_stats(stats=stats, trades=only_long_trades, + ... data=GOOG, risk_free_rate=.02) + """ + equity = stats._equity_curve.Equity + if trades is None: + trades = stats._trades + else: + # XXX: Is this buggy? + equity = equity.copy() + equity[:] = stats._equity_curve.Equity.iloc[0] + for t in trades.itertuples(index=False): + equity.iloc[t.EntryBar:] += t.PnL + return _compute_stats(trades=trades, equity=equity, ohlc_data=data, + risk_free_rate=risk_free_rate, strategy_instance=stats._strategy) + + def resample_apply(rule: str, func: Optional[Callable[..., Sequence]], series: Union[pd.Series, pd.DataFrame, _Array], diff --git a/backtesting/test/_test.py b/backtesting/test/_test.py index c2698a93..a943c986 100644 --- a/backtesting/test/_test.py +++ b/backtesting/test/_test.py @@ -13,11 +13,14 @@ import numpy as np import pandas as pd +from pandas.testing import assert_frame_equal from backtesting import Backtest, Strategy +from backtesting._stats import compute_drawdown_duration_peaks from backtesting.lib import ( OHLCV_AGG, barssince, + compute_stats, cross, crossover, quantile, @@ -242,7 +245,7 @@ def test_strategy_str(self): def test_compute_drawdown(self): dd = pd.Series([0, 1, 7, 0, 4, 0, 0]) - durations, peaks = Backtest._compute_drawdown_duration_peaks(dd) + durations, peaks = compute_drawdown_duration_peaks(dd) np.testing.assert_array_equal(durations, pd.Series([3, 2], index=[3, 5]).reindex(dd.index)) np.testing.assert_array_equal(peaks, pd.Series([7, 4], index=[3, 5]).reindex(dd.index)) @@ -835,6 +838,18 @@ def test_random_ohlc_data(self): self.assertEqual(new_data.shape, GOOG.shape) self.assertEqual(list(new_data.columns), list(GOOG.columns)) + def test_compute_stats(self): + stats = Backtest(GOOG, SmaCross).run() + only_long_trades = stats._trades[stats._trades.Size > 0] + long_stats = compute_stats(stats=stats, trades=only_long_trades, + data=GOOG, risk_free_rate=.02) + self.assertNotEqual(list(stats._equity_curve.Equity), + list(long_stats._equity_curve.Equity)) + self.assertNotEqual(stats['Sharpe Ratio'], long_stats['Sharpe Ratio']) + self.assertEqual(long_stats['# Trades'], len(only_long_trades)) + self.assertEqual(stats._strategy, long_stats._strategy) + assert_frame_equal(long_stats._trades, only_long_trades) + def test_SignalStrategy(self): class S(SignalStrategy): def init(self):