Skip to content

Commit b864812

Browse files
committed
BUG: Fix inaccurate rolling.var calculation
1 parent 1798c9d commit b864812

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

pandas/_libs/window.pyx

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -661,9 +661,9 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
661661
if val == val:
662662
nobs[0] = nobs[0] + 1
663663

664-
delta = (val - mean_x[0])
664+
delta = val - mean_x[0]
665665
mean_x[0] = mean_x[0] + delta / nobs[0]
666-
ssqdm_x[0] = ssqdm_x[0] + delta * (val - mean_x[0])
666+
ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
667667

668668

669669
cdef inline void remove_var(double val, double *nobs, double *mean_x,
@@ -675,9 +675,9 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x,
675675
if val == val:
676676
nobs[0] = nobs[0] - 1
677677
if nobs[0]:
678-
delta = (val - mean_x[0])
678+
delta = val - mean_x[0]
679679
mean_x[0] = mean_x[0] - delta / nobs[0]
680-
ssqdm_x[0] = ssqdm_x[0] - delta * (val - mean_x[0])
680+
ssqdm_x[0] = ssqdm_x[0] - ((nobs[0] + 1) * delta ** 2) / nobs[0]
681681
else:
682682
mean_x[0] = 0
683683
ssqdm_x[0] = 0
@@ -689,7 +689,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
689689
Numerically stable implementation using Welford's method.
690690
"""
691691
cdef:
692-
double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta
692+
double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old
693693
int64_t s, e
694694
bint is_variable
695695
Py_ssize_t i, j, N
@@ -760,10 +760,12 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
760760

761761
# Adding one observation and removing another one
762762
delta = val - prev
763-
prev -= mean_x
763+
mean_x_old = mean_x
764+
764765
mean_x += delta / nobs
765-
val -= mean_x
766-
ssqdm_x += (val + prev) * delta
766+
ssqdm_x += ((nobs - 1) * val
767+
+ (nobs + 1) * prev
768+
- 2 * nobs * mean_x_old) * delta / nobs
767769

768770
else:
769771
add_var(val, &nobs, &mean_x, &ssqdm_x)

0 commit comments

Comments
 (0)