pandas-dev · behzadnouri · Sep 13, 2014
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
@@ -1087,7 +1087,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y,
     sum_wt = 1.
     sum_wt2 = 1.
     old_wt = 1.
-    
+
     for i from 1 <= i < N:
         cur_x = input_x[i]
         cur_y = input_y[i]
@@ -1117,7 +1117,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y,
         elif is_observation:
             mean_x = cur_x
             mean_y = cur_y
-        
+
         if nobs >= minp:
             if not bias:
                 numerator = sum_wt * sum_wt
@@ -1344,10 +1344,32 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
 #-------------------------------------------------------------------------------
 # Rolling skewness
 
-def roll_skew(ndarray[double_t] input, int win, int minp):
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def _get_zscores(ndarray[double_t] inp):
+    """removes mean and scales variance to one"""
+    cdef:
+        ndarray[double_t] out
+        ndarray[np.uint8_t, ndim=1, cast=True] mask
+        double_t mu, sigma
+
+    mask = np.isfinite(inp)
+    if not mask.any():
+        return inp
+
+    mu = inp[mask].mean()
+    out = inp - mu
+    sigma = out[mask].std()
+    if sigma > 0 and not np.isclose(sigma, 0.0):
+        out[mask] /= sigma
+
+    return out
+
+def roll_skew(ndarray[double_t] inp, int win, int minp):
     cdef double val, prev
     cdef double x = 0, xx = 0, xxx = 0
     cdef Py_ssize_t nobs = 0, i
+    cdef ndarray[double_t] input = _get_zscores(inp)
     cdef Py_ssize_t N = len(input)
 
     cdef ndarray[double_t] output = np.empty(N, dtype=float)
@@ -1405,11 +1427,12 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
 # Rolling kurtosis
 
 
-def roll_kurt(ndarray[double_t] input,
+def roll_kurt(ndarray[double_t] inp,
                int win, int minp):
     cdef double val, prev
     cdef double x = 0, xx = 0, xxx = 0, xxxx = 0
     cdef Py_ssize_t nobs = 0, i
+    cdef ndarray[double_t] input = _get_zscores(inp)
     cdef Py_ssize_t N = len(input)
 
     cdef ndarray[double_t] output = np.empty(N, dtype=float)

diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py
@@ -321,6 +321,21 @@ def test_rolling_kurt(self):
         self._check_moment_func(mom.rolling_kurt,
                                 lambda x: kurtosis(x, bias=False))
 
+    def test_affine_invariance(self):
+        """
+        rolling skew/kurt should be invariant under affine transformations
+        """
+
+        xs = np.random.rand(50)
+        window = 10
+
+        for f in mom.rolling_skew, mom.rolling_kurt:
+            left = f(xs, window)
+
+            for a, b in [(1, 100), (1, 5000), (100, 100), (100, 5000)]:
+                right = f(a*xs  + b, window)
+                assert_almost_equal(left, right)
+
     def test_fperr_robustness(self):
         # TODO: remove this once python 2.5 out of picture
         if PY3:
@@ -524,7 +539,7 @@ def test_ewma(self):
         self.assertTrue(np.abs(result - 1) < 1e-2)
 
         s = Series([1.0, 2.0, 4.0, 8.0])
-        
+
         expected = Series([1.0, 1.6, 2.736842, 4.923077])
         for f in [lambda s: mom.ewma(s, com=2.0, adjust=True),
                   lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False),
@@ -750,7 +765,7 @@ def _non_null_values(x):
 
             for (std, var, cov) in [(std_biased, var_biased, cov_biased),
                                     (std_unbiased, var_unbiased, cov_unbiased)]:
-                
+
                 # check that var(x), std(x), and cov(x) are all >= 0
                 var_x = var(x)
                 std_x = std(x)
@@ -762,7 +777,7 @@ def _non_null_values(x):
 
                     # check that var(x) == cov(x, x)
                     assert_equal(var_x, cov_x_x)
-                
+
                 # check that var(x) == std(x)^2
                 assert_equal(var_x, std_x * std_x)
 
@@ -796,7 +811,7 @@ def _non_null_values(x):
                             cov_x_y = cov(x, y)
                             cov_y_x = cov(y, x)
                             assert_equal(cov_x_y, cov_y_x)
-                    
+
                             # check that cov(x, y) == (var(x+y) - var(x) - var(y)) / 2
                             var_x_plus_y = var(x + y)
                             var_y = var(y)
@@ -1007,7 +1022,7 @@ def test_rolling_consistency(self):
                                         expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j],
                                                                            window=window, min_periods=min_periods, center=center)
                                 assert_panel_equal(rolling_f_result, expected)
-    
+
     # binary moments
     def test_rolling_cov(self):
         A = self.series
@@ -1432,7 +1447,7 @@ def test_expanding_corr_pairwise_diff_length(self):
         assert_frame_equal(result2, expected)
         assert_frame_equal(result3, expected)
         assert_frame_equal(result4, expected)
-    
+
     def test_pairwise_stats_column_names_order(self):
         # GH 7738
         df1s = [DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[0,1]),