Skip to content

ENH: Extract strategy performance method compute_stats #281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions backtesting/_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from typing import List, TYPE_CHECKING, Union

import numpy as np
import pandas as pd

from ._util import _data_period

if TYPE_CHECKING:
from .backtesting import Strategy, Trade


def compute_drawdown_duration_peaks(dd: pd.Series):
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
iloc = pd.Series(iloc, index=dd.index[iloc])
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
df = df[df['iloc'] > df['prev'] + 1].astype(int)

# If no drawdown since no trade, avoid below for pandas sake and return nan series
if not len(df):
return (dd.replace(0, np.nan),) * 2

df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
df = df.reindex(dd.index)
return df['duration'], df['peak_dd']


def geometric_mean(returns: pd.Series) -> float:
returns = returns.fillna(0) + 1
if np.any(returns <= 0):
return 0
return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1


def compute_stats(
trades: Union[List['Trade'], pd.DataFrame],
equity: np.ndarray,
ohlc_data: pd.DataFrame,
strategy_instance: 'Strategy',
risk_free_rate: float = 0,
) -> pd.Series:
assert -1 < risk_free_rate < 1

index = ohlc_data.index
dd = 1 - equity / np.maximum.accumulate(equity)
dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))

equity_df = pd.DataFrame({
'Equity': equity,
'DrawdownPct': dd,
'DrawdownDuration': dd_dur},
index=index)

if isinstance(trades, pd.DataFrame):
trades_df = trades
else:
# Came straight from Backtest.run()
trades_df = pd.DataFrame({
'Size': [t.size for t in trades],
'EntryBar': [t.entry_bar for t in trades],
'ExitBar': [t.exit_bar for t in trades],
'EntryPrice': [t.entry_price for t in trades],
'ExitPrice': [t.exit_price for t in trades],
'PnL': [t.pl for t in trades],
'ReturnPct': [t.pl_pct for t in trades],
'EntryTime': [t.entry_time for t in trades],
'ExitTime': [t.exit_time for t in trades],
})
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
del trades

pl = trades_df['PnL']
returns = trades_df['ReturnPct']
durations = trades_df['Duration']

def _round_timedelta(value, _period=_data_period(index)):
if not isinstance(value, pd.Timedelta):
return value
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
return value.ceil(resolution)

s = pd.Series(dtype=object)
s.loc['Start'] = index[0]
s.loc['End'] = index[-1]
s.loc['Duration'] = s.End - s.Start

have_position = np.repeat(0, len(index))
for t in trades_df.itertuples(index=False):
have_position[t.EntryBar:t.ExitBar + 1] = 1

s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
s.loc['Equity Final [$]'] = equity[-1]
s.loc['Equity Peak [$]'] = equity.max()
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
c = ohlc_data.Close.values
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return

gmean_day_return: float = 0
day_returns = np.array(np.nan)
annual_trading_days = np.nan
if isinstance(index, pd.DatetimeIndex):
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
gmean_day_return = geometric_mean(day_returns)
annual_trading_days = float(
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
252)

# Annualized return and risk metrics are computed based on the (mostly correct)
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
# our risk doesn't; they use the simpler approach below.
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
s.loc['Return (Ann.) [%]'] = annualized_return * 100
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100

# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
# and simple standard deviation
s.loc['Sharpe Ratio'] = np.clip((s.loc['Return (Ann.) [%]'] - risk_free_rate) / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
s.loc['Sortino Ratio'] = np.clip((annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
max_dd = -np.nan_to_num(dd.max())
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
s.loc['Max. Drawdown [%]'] = max_dd * 100
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
s.loc['# Trades'] = n_trades = len(trades_df)
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
s.loc['Best Trade [%]'] = returns.max() * 100
s.loc['Worst Trade [%]'] = returns.min() * 100
mean_return = geometric_mean(returns)
s.loc['Avg. Trade [%]'] = mean_return * 100
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
s.loc['Expectancy [%]'] = returns.mean() * 100
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)

s.loc['_strategy'] = strategy_instance
s.loc['_equity_curve'] = equity_df
s.loc['_trades'] = trades_df

s = _Stats(s)
return s


class _Stats(pd.Series):
def __repr__(self):
# Prevent expansion due to _equity and _trades dfs
with pd.option_context('max_colwidth', 20):
return super().__repr__()
143 changes: 12 additions & 131 deletions backtesting/backtesting.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def _tqdm(seq, **_):
return seq

from ._plotting import plot
from ._util import _as_str, _Indicator, _Data, _data_period, try_
from ._stats import compute_stats
from ._util import _as_str, _Indicator, _Data, try_

__pdoc__ = {
'Strategy.__init__': False,
Expand Down Expand Up @@ -1086,7 +1087,7 @@ def __init__(self,
exclusive_orders=exclusive_orders, index=data.index,
)
self._strategy = strategy
self._results = None
self._results: Optional[pd.Series] = None

def run(self, **kwargs) -> pd.Series:
"""
Expand Down Expand Up @@ -1177,7 +1178,15 @@ def run(self, **kwargs) -> pd.Series:
# for future `indicator._opts['data'].index` calls to work
data._set_length(len(self._data))

self._results = self._compute_stats(broker, strategy)
equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
self._results = compute_stats(
trades=broker.closed_trades,
equity=equity,
ohlc_data=self._data,
risk_free_rate=0.0,
strategy_instance=strategy,
)

return self._results

def optimize(self, *,
Expand Down Expand Up @@ -1485,134 +1494,6 @@ def _mp_task(backtest_uuid, batch_index):

_mp_backtests: Dict[float, Tuple['Backtest', List, Callable]] = {}

@staticmethod
def _compute_drawdown_duration_peaks(dd: pd.Series):
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
iloc = pd.Series(iloc, index=dd.index[iloc])
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
df = df[df['iloc'] > df['prev'] + 1].astype(int)
# If no drawdown since no trade, avoid below for pandas sake and return nan series
if not len(df):
return (dd.replace(0, np.nan),) * 2
df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
df = df.reindex(dd.index)
return df['duration'], df['peak_dd']

def _compute_stats(self, broker: _Broker, strategy: Strategy) -> pd.Series:
data = self._data
index = data.index

equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
dd = 1 - equity / np.maximum.accumulate(equity)
dd_dur, dd_peaks = self._compute_drawdown_duration_peaks(pd.Series(dd, index=index))

equity_df = pd.DataFrame({
'Equity': equity,
'DrawdownPct': dd,
'DrawdownDuration': dd_dur},
index=index)

trades = broker.closed_trades
trades_df = pd.DataFrame({
'Size': [t.size for t in trades],
'EntryBar': [t.entry_bar for t in trades],
'ExitBar': [t.exit_bar for t in trades],
'EntryPrice': [t.entry_price for t in trades],
'ExitPrice': [t.exit_price for t in trades],
'PnL': [t.pl for t in trades],
'ReturnPct': [t.pl_pct for t in trades],
'EntryTime': [t.entry_time for t in trades],
'ExitTime': [t.exit_time for t in trades],
})
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']

pl = trades_df['PnL']
returns = trades_df['ReturnPct']
durations = trades_df['Duration']

def _round_timedelta(value, _period=_data_period(index)):
if not isinstance(value, pd.Timedelta):
return value
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
return value.ceil(resolution)

s = pd.Series(dtype=object)
s.loc['Start'] = index[0]
s.loc['End'] = index[-1]
s.loc['Duration'] = s.End - s.Start

have_position = np.repeat(0, len(index))
for t in trades:
have_position[t.entry_bar:t.exit_bar + 1] = 1 # type: ignore

s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
s.loc['Equity Final [$]'] = equity[-1]
s.loc['Equity Peak [$]'] = equity.max()
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
c = data.Close.values
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return

def geometric_mean(returns):
returns = returns.fillna(0) + 1
return (0 if np.any(returns <= 0) else
np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1)

day_returns = gmean_day_return = np.array(np.nan)
annual_trading_days = np.nan
if isinstance(index, pd.DatetimeIndex):
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
gmean_day_return = geometric_mean(day_returns)
annual_trading_days = float(
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
252)

# Annualized return and risk metrics are computed based on the (mostly correct)
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
# our risk doesn't; they use the simpler approach below.
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
s.loc['Return (Ann.) [%]'] = annualized_return * 100
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100

# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
# and simple standard deviation
s.loc['Sharpe Ratio'] = np.clip(s.loc['Return (Ann.) [%]'] / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
s.loc['Sortino Ratio'] = np.clip(annualized_return / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
max_dd = -np.nan_to_num(dd.max())
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
s.loc['Max. Drawdown [%]'] = max_dd * 100
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
s.loc['# Trades'] = n_trades = len(trades)
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
s.loc['Best Trade [%]'] = returns.max() * 100
s.loc['Worst Trade [%]'] = returns.min() * 100
mean_return = geometric_mean(returns)
s.loc['Avg. Trade [%]'] = mean_return * 100
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
s.loc['Expectancy [%]'] = returns.mean() * 100
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)

s.loc['_strategy'] = strategy
s.loc['_equity_curve'] = equity_df
s.loc['_trades'] = trades_df

s = Backtest._Stats(s)
return s

class _Stats(pd.Series):
def __repr__(self):
# Prevent expansion due to _equity and _trades dfs
with pd.option_context('max_colwidth', 20):
return super().__repr__()

def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
plot_equity=True, plot_return=False, plot_pl=True,
plot_volume=True, plot_drawdown=False,
Expand Down
33 changes: 33 additions & 0 deletions backtesting/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from .backtesting import Strategy
from ._plotting import plot_heatmaps as _plot_heatmaps
from ._stats import compute_stats as _compute_stats
from ._util import _Array, _as_str

__pdoc__ = {}
Expand Down Expand Up @@ -164,6 +165,38 @@ def quantile(series: Sequence, quantile: Union[None, float] = None):
return np.nanpercentile(series, quantile * 100)


def compute_stats(
*,
stats: pd.Series,
data: pd.DataFrame,
trades: pd.DataFrame = None,
risk_free_rate: float = 0.) -> pd.Series:
"""
(Re-)compute strategy performance metrics.

`stats` is the statistics series as returned by `Backtest.run()`.
`data` is OHLC data as passed to the `Backtest` the `stats` were obtained in.
`trades` can be a dataframe subset of `stats._trades` (e.g. only long trades).
You can also tune `risk_free_rate`, used in calculation of Sharpe and Sortino ratios.

>>> stats = Backtest(GOOG, MyStrategy).run()
>>> only_long_trades = stats._trades[stats._trades.Size > 0]
>>> long_stats = compute_stats(stats=stats, trades=only_long_trades,
... data=GOOG, risk_free_rate=.02)
"""
equity = stats._equity_curve.Equity
if trades is None:
trades = stats._trades
else:
# XXX: Is this buggy?
equity = equity.copy()
equity[:] = stats._equity_curve.Equity.iloc[0]
for t in trades.itertuples(index=False):
equity.iloc[t.EntryBar:] += t.PnL
return _compute_stats(trades=trades, equity=equity, ohlc_data=data,
risk_free_rate=risk_free_rate, strategy_instance=stats._strategy)


def resample_apply(rule: str,
func: Optional[Callable[..., Sequence]],
series: Union[pd.Series, pd.DataFrame, _Array],
Expand Down
Loading