Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove statsmodels, scikit-learn, and other dependencies; add gross leverage to performance stats summary #347

Merged
merged 13 commits into from
Nov 5, 2016
3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@ before_install:
- cp pyfolio/tests/matplotlibrc .

install:
- conda create -q -n testenv --yes python=$TRAVIS_PYTHON_VERSION ipython pyzmq numpy scipy nose matplotlib pandas Cython patsy statsmodels flake8 scikit-learn seaborn runipy pytables networkx pandas-datareader matplotlib-tests joblib
- conda create -q -n testenv --yes python=$TRAVIS_PYTHON_VERSION ipython pyzmq numpy scipy nose matplotlib pandas Cython patsy flake8 seaborn runipy pytables networkx pandas-datareader matplotlib-tests joblib
- source activate testenv
- pip install nose_parameterized
#- pip install --no-deps git+https://github.com/quantopian/zipline
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes mock enum34; fi
- pip install -e .[bayesian]

before_script:
Expand Down
6 changes: 0 additions & 6 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,12 @@ requirements:

run:
- python
- funcsigs >=0.4
- matplotlib >=1.4.0
- mock >=1.1.2
- numpy >=1.9.1
- pandas >=0.18.0
- pyparsing >=2.0.3
- python-dateutil >=2.4.2
- pytz >=2014.10
- scikit-learn >=0.15.0
- scipy >=0.14.0
- seaborn >=0.6.0
- statsmodels >=0.5.0
- pandas-datareader >=0.2
- ipython
- empyrical >=0.2.1
Expand Down
33 changes: 18 additions & 15 deletions pyfolio/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
from matplotlib import figure
from matplotlib.backends.backend_agg import FigureCanvasAgg

from sklearn import preprocessing

from . import utils
from . import timeseries
from . import pos
Expand Down Expand Up @@ -499,8 +497,8 @@ def plot_perf_stats(returns, factor_returns, ax=None):
return ax


def show_perf_stats(returns, factor_returns, live_start_date=None,
bootstrap=False):
def show_perf_stats(returns, factor_returns, gross_lev=None,
live_start_date=None, bootstrap=False):
"""Prints some performance metrics of the strategy.

- Shows amount of time the strategy has been run in backtest and
Expand Down Expand Up @@ -537,25 +535,35 @@ def show_perf_stats(returns, factor_returns, live_start_date=None,
returns_backtest = returns[returns.index < live_start_date]
returns_live = returns[returns.index > live_start_date]

gross_lev_backtest = None
gross_lev_live = None
if gross_lev is not None:
gross_lev_backtest = gross_lev[gross_lev.index < live_start_date]
gross_lev_live = gross_lev[gross_lev.index > live_start_date]

perf_stats_live = perf_func(
returns_live,
factor_returns=factor_returns)
factor_returns=factor_returns,
gross_lev=gross_lev_live)

perf_stats_all = perf_func(
returns,
factor_returns=factor_returns)
factor_returns=factor_returns,
gross_lev=gross_lev)

print('Out-of-Sample Months: ' +
str(int(len(returns_live) / APPROX_BDAYS_PER_MONTH)))
else:
returns_backtest = returns
gross_lev_backtest = gross_lev

print('Backtest Months: ' +
str(int(len(returns_backtest) / APPROX_BDAYS_PER_MONTH)))

perf_stats = perf_func(
returns_backtest,
factor_returns=factor_returns)
factor_returns=factor_returns,
gross_lev=gross_lev_backtest)

if live_start_date is not None:
perf_stats = pd.concat(OrderedDict([
Expand Down Expand Up @@ -1425,8 +1433,7 @@ def plot_daily_volume(returns, transactions, ax=None, **kwargs):


def plot_daily_returns_similarity(returns_backtest, returns_live,
title='', scale_kws=None, ax=None,
**kwargs):
title='', ax=None, **kwargs):
"""Plots overlapping distributions of in-sample (backtest) returns
and out-of-sample (live trading) returns.

Expand All @@ -1438,8 +1445,6 @@ def plot_daily_returns_similarity(returns_backtest, returns_live,
Daily returns of the strategy's live trading, noncumulative.
title : str, optional
The title to use for the plot.
scale_kws : dict, optional
Additional arguments passed to preprocessing.scale.
ax : matplotlib.Axes, optional
Axes upon which to plot.
**kwargs, optional
Expand All @@ -1454,13 +1459,11 @@ def plot_daily_returns_similarity(returns_backtest, returns_live,

if ax is None:
ax = plt.gca()
if scale_kws is None:
scale_kws = {}

sns.kdeplot(preprocessing.scale(returns_backtest, **scale_kws),
sns.kdeplot(utils.standardize_data(returns_backtest),
bw='scott', shade=True, label='backtest',
color='forestgreen', ax=ax, **kwargs)
sns.kdeplot(preprocessing.scale(returns_live, **scale_kws),
sns.kdeplot(utils.standardize_data(returns_live),
bw='scott', shade=True, label='out-of-sample',
color='red', ax=ax, **kwargs)
ax.set_title(title)
Expand Down
3 changes: 3 additions & 0 deletions pyfolio/tears.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def create_full_tear_sheet(returns,
create_returns_tear_sheet(
returns,
live_start_date=live_start_date,
gross_lev=gross_lev,
cone_std=cone_std,
benchmark_rets=benchmark_rets,
bootstrap=bootstrap,
Expand Down Expand Up @@ -204,6 +205,7 @@ def create_full_tear_sheet(returns,

@plotting_context
def create_returns_tear_sheet(returns, live_start_date=None,
gross_lev=None,
cone_std=(1.0, 1.5, 2.0),
benchmark_rets=None,
bootstrap=False,
Expand Down Expand Up @@ -256,6 +258,7 @@ def create_returns_tear_sheet(returns, live_start_date=None,
print('\n')

plotting.show_perf_stats(returns, benchmark_rets,
gross_lev=gross_lev,
bootstrap=bootstrap,
live_start_date=live_start_date)

Expand Down
27 changes: 0 additions & 27 deletions pyfolio/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,33 +276,6 @@ def test_beta(self, returns, benchmark_rets, rolling_window, expected):
expected)


class TestMultifactor(TestCase):
simple_rets = pd.Series(
[0.1] * 3 + [0] * 497,
pd.date_range(
'2000-1-1',
periods=500,
freq='D'))
simple_benchmark_rets = pd.DataFrame(
pd.Series(
[0.03] * 4 + [0] * 496,
pd.date_range(
'2000-1-1',
periods=500,
freq='D')),
columns=['bm'])

@parameterized.expand([
(simple_rets[:4], simple_benchmark_rets[:4], [2.5000000000000004])
])
def test_calc_multifactor(self, returns, factors, expected):
self.assertEqual(
timeseries.calc_multifactor(
returns,
factors).values.tolist(),
expected)


class TestCone(TestCase):
def test_bootstrap_cone_against_linear_cone_normal_returns(self):
random_seed = 100
Expand Down
45 changes: 13 additions & 32 deletions pyfolio/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def common_sense_ratio(returns):
stats.skew,
stats.kurtosis,
tail_ratio,
common_sense_ratio,
common_sense_ratio
]

FACTOR_STAT_FUNCS = [
Expand Down Expand Up @@ -554,34 +554,6 @@ def aggregate_returns(returns, convert_to):
return empyrical.aggregate_returns(returns, convert_to=convert_to)


def calc_multifactor(returns, factors):
"""Computes multiple ordinary least squares linear fits, and returns
fit parameters.

Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in tears.create_full_tear_sheet.
factors : pd.Series
Secondary sets to fit.

Returns
-------
pd.DataFrame
Fit parameters.

"""

import statsmodels.api as sm
factors = factors.loc[returns.index]
factors = sm.add_constant(factors)
factors = factors.dropna(axis=0)
results = sm.OLS(returns[factors.index], factors).fit()

return results.params


def rolling_beta(returns, factor_returns,
rolling_window=APPROX_BDAYS_PER_MONTH * 6):
"""Determines the rolling beta of a strategy.
Expand Down Expand Up @@ -658,7 +630,7 @@ def rolling_fama_french(returns, factor_returns=None,
rolling_window=rolling_window)


def perf_stats(returns, factor_returns=None):
def perf_stats(returns, factor_returns=None, gross_lev=None):
"""Calculates various performance metrics of a strategy, for use in
plotting.show_perf_stats.

Expand All @@ -671,6 +643,8 @@ def perf_stats(returns, factor_returns=None):
Daily noncumulative returns of the benchmark.
- This is in the same style as returns.
If None, do not compute alpha, beta, and information ratio.
gross_lev : pd.Series (optional)
Daily gross leverage of the strategy.

Returns
-------
Expand All @@ -680,10 +654,12 @@ def perf_stats(returns, factor_returns=None):
"""

stats = pd.Series()

for stat_func in SIMPLE_STAT_FUNCS:
stats[stat_func.__name__] = stat_func(returns)

if gross_lev is not None:
stats['mean_gross_leverage'] = gross_lev.mean()

if factor_returns is not None:
for stat_func in FACTOR_STAT_FUNCS:
stats[stat_func.__name__] = stat_func(returns,
Expand All @@ -692,7 +668,8 @@ def perf_stats(returns, factor_returns=None):
return stats


def perf_stats_bootstrap(returns, factor_returns=None, return_stats=True):
def perf_stats_bootstrap(returns, factor_returns=None, gross_lev=None,
return_stats=True):
"""Calculates various bootstrapped performance metrics of a strategy.

Parameters
Expand Down Expand Up @@ -726,6 +703,10 @@ def perf_stats_bootstrap(returns, factor_returns=None, return_stats=True):
bootstrap_values[stat_name] = calc_bootstrap(stat_func,
returns)

if gross_lev is not None:
bootstrap_values['mean_gross_leverage'] = calc_bootstrap(np.mean,
gross_lev)

if factor_returns is not None:
for stat_func in FACTOR_STAT_FUNCS:
stat_name = stat_func.__name__
Expand Down
21 changes: 20 additions & 1 deletion pyfolio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from os.path import expanduser, join, getmtime, isdir
import warnings

import numpy as np
from IPython.display import display
import pandas as pd
from pandas.tseries.offsets import BDay
Expand Down Expand Up @@ -475,7 +476,8 @@ def get_symbol_rets(symbol, start=None, end=None):


def print_table(table, name=None, fmt=None):
"""Pretty print a pandas DataFrame.
"""
Pretty print a pandas DataFrame.

Uses HTML output if running inside Jupyter Notebook, otherwise
formatted text output.
Expand Down Expand Up @@ -506,3 +508,20 @@ def print_table(table, name=None, fmt=None):

if fmt is not None:
pd.set_option('display.float_format', prev_option)


def standardize_data(x):
"""
Standardize an array with mean and standard deviation.

Parameters
----------
x : np.array
Array to standardize.

Returns
-------
np.array
Standardized array.
"""
return (x - np.mean(x)) / np.std(x)
9 changes: 1 addition & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,15 @@
'Operating System :: OS Independent']

install_reqs = [
'funcsigs>=0.4',
'ipython>=3.2.3',
'matplotlib>=1.4.0',
'mock>=1.1.2',
'numpy>=1.9.1',
'pandas>=0.18.0',
'pyparsing>=2.0.3',
'python-dateutil>=2.4.2',
'pytz>=2014.10',
'scipy>=0.14.0',
'seaborn>=0.7.1',
'pandas-datareader>=0.2',
'scikit-learn>=0.17',
'empyrical>=0.2.1',
'statsmodels>=0.6.1',
'jsonschema>=2.5.1',
'empyrical>=0.2.1'
]

test_reqs = ['nose>=1.3.7', 'nose-parameterized>=0.5.0', 'runipy>=0.1.3']
Expand Down