Skip to content

use z-scores in rolling skew/kurt calculations #8270

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y,
sum_wt = 1.
sum_wt2 = 1.
old_wt = 1.

for i from 1 <= i < N:
cur_x = input_x[i]
cur_y = input_y[i]
Expand Down Expand Up @@ -1117,7 +1117,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y,
elif is_observation:
mean_x = cur_x
mean_y = cur_y

if nobs >= minp:
if not bias:
numerator = sum_wt * sum_wt
Expand Down Expand Up @@ -1344,10 +1344,32 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
#-------------------------------------------------------------------------------
# Rolling skewness

def roll_skew(ndarray[double_t] input, int win, int minp):
@cython.boundscheck(False)
@cython.wraparound(False)
def _get_zscores(ndarray[double_t] inp):
"""removes mean and scales variance to one"""
cdef:
ndarray[double_t] out
ndarray[np.uint8_t, ndim=1, cast=True] mask
double_t mu, sigma

mask = np.isfinite(inp)
if not mask.any():
return inp

mu = inp[mask].mean()
out = inp - mu
sigma = out[mask].std()
if sigma > 0 and not np.isclose(sigma, 0.0):
out[mask] /= sigma

return out

def roll_skew(ndarray[double_t] inp, int win, int minp):
cdef double val, prev
cdef double x = 0, xx = 0, xxx = 0
cdef Py_ssize_t nobs = 0, i
cdef ndarray[double_t] input = _get_zscores(inp)
cdef Py_ssize_t N = len(input)

cdef ndarray[double_t] output = np.empty(N, dtype=float)
Expand Down Expand Up @@ -1405,11 +1427,12 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
# Rolling kurtosis


def roll_kurt(ndarray[double_t] input,
def roll_kurt(ndarray[double_t] inp,
int win, int minp):
cdef double val, prev
cdef double x = 0, xx = 0, xxx = 0, xxxx = 0
cdef Py_ssize_t nobs = 0, i
cdef ndarray[double_t] input = _get_zscores(inp)
cdef Py_ssize_t N = len(input)

cdef ndarray[double_t] output = np.empty(N, dtype=float)
Expand Down
27 changes: 21 additions & 6 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,21 @@ def test_rolling_kurt(self):
self._check_moment_func(mom.rolling_kurt,
lambda x: kurtosis(x, bias=False))

def test_affine_invariance(self):
"""
rolling skew/kurt should be invariant under affine transformations
"""

xs = np.random.rand(50)
window = 10

for f in mom.rolling_skew, mom.rolling_kurt:
left = f(xs, window)

for a, b in [(1, 100), (1, 5000), (100, 100), (100, 5000)]:
right = f(a*xs + b, window)
assert_almost_equal(left, right)

def test_fperr_robustness(self):
# TODO: remove this once python 2.5 out of picture
if PY3:
Expand Down Expand Up @@ -524,7 +539,7 @@ def test_ewma(self):
self.assertTrue(np.abs(result - 1) < 1e-2)

s = Series([1.0, 2.0, 4.0, 8.0])

expected = Series([1.0, 1.6, 2.736842, 4.923077])
for f in [lambda s: mom.ewma(s, com=2.0, adjust=True),
lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False),
Expand Down Expand Up @@ -750,7 +765,7 @@ def _non_null_values(x):

for (std, var, cov) in [(std_biased, var_biased, cov_biased),
(std_unbiased, var_unbiased, cov_unbiased)]:

# check that var(x), std(x), and cov(x) are all >= 0
var_x = var(x)
std_x = std(x)
Expand All @@ -762,7 +777,7 @@ def _non_null_values(x):

# check that var(x) == cov(x, x)
assert_equal(var_x, cov_x_x)

# check that var(x) == std(x)^2
assert_equal(var_x, std_x * std_x)

Expand Down Expand Up @@ -796,7 +811,7 @@ def _non_null_values(x):
cov_x_y = cov(x, y)
cov_y_x = cov(y, x)
assert_equal(cov_x_y, cov_y_x)

# check that cov(x, y) == (var(x+y) - var(x) - var(y)) / 2
var_x_plus_y = var(x + y)
var_y = var(y)
Expand Down Expand Up @@ -1007,7 +1022,7 @@ def test_rolling_consistency(self):
expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j],
window=window, min_periods=min_periods, center=center)
assert_panel_equal(rolling_f_result, expected)

# binary moments
def test_rolling_cov(self):
A = self.series
Expand Down Expand Up @@ -1432,7 +1447,7 @@ def test_expanding_corr_pairwise_diff_length(self):
assert_frame_equal(result2, expected)
assert_frame_equal(result3, expected)
assert_frame_equal(result4, expected)

def test_pairwise_stats_column_names_order(self):
# GH 7738
df1s = [DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[0,1]),
Expand Down