From 737c03305d2376f2991ef6216a8c1ac08b561d65 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 6 Feb 2018 21:43:37 +0700 Subject: [PATCH 01/20] First stab at using deque over full iterations --- pandas/_libs/window.pyx | 51 +++++++++++++++++++++++++++++------------ pandas/core/window.py | 1 + setup.py | 5 ++-- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index cacb073da581c..fe90a6759bde5 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -3,6 +3,7 @@ cimport cython from cython cimport Py_ssize_t +from libcpp.deque cimport deque from libc.stdlib cimport malloc, free @@ -1224,6 +1225,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, bint is_variable, should_replace int64_t s, e, N, i, j, removed Py_ssize_t nobs = 0 + deque Q[int64_t] ndarray[int64_t] starti, endi ndarray[numeric, ndim=1] output cdef: @@ -1242,29 +1244,48 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, output = np.empty(N, dtype=input.dtype) + Q = deque[int64_t]() + if is_variable: with nogil: - for i in range(N): - s = starti[i] - e = endi[i] + for i from 0 <= i < win: + while not Q.empty() and input[i] >= input[Q.back()]: + Q.pop_back() + Q.push_back(i) - r = input[s] - nobs = 0 - for j in range(s, e): + for i from win <= i < N: + output[i-1] = input[Q.front()] - # adds, death at the i offset - ai = init_mm(input[j], &nobs, is_max) + while not Q.empty() and input[i] >= input[Q.back()]: + Q.pop_back() - if is_max: - if ai > r: - r = ai - else: - if ai < r: - r = ai + while not Q.empty() and Q.front() <= i-win: + Q.pop_front() + + Q.push_back(i) - output[i] = calc_mm(minp, nobs, r) + output[N-1] = input[Q[0]] +# for i in range(N): +# s = starti[i] +# e = endi[i] +# +# r = input[s] +# nobs = 0 +# for j in range(s, e): +# +# # adds, death at the i offset +# ai = init_mm(input[j], &nobs, is_max) +# +# if is_max: +# if ai > r: +# r = ai +# else: +# if ai < r: +# r = ai +# +# output[i] = calc_mm(minp, nobs, r) else: diff --git a/pandas/core/window.py b/pandas/core/window.py index a3f19ef50459d..7acf64c916701 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1689,6 +1689,7 @@ def _apply(self, func, **kwargs): y : type of input argument """ + import ipdb; ipdb.set_trace() blocks, obj, index = self._create_blocks() results = [] for b in blocks: diff --git a/setup.py b/setup.py index 5397a1b84dc4d..6cb8d28a01e51 100755 --- a/setup.py +++ b/setup.py @@ -616,7 +616,8 @@ def pxd(name): 'pyxfile': '_libs/testing'}, '_libs.window': { 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist', '_libs/src/util']}, + 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], + 'language': 'c++'}, '_libs.writers': { 'pyxfile': '_libs/writers', 'pxdfiles': ['_libs/src/util']}, @@ -639,11 +640,11 @@ def pxd(name): sources=sources, depends=data.get('depends', []), include_dirs=include, + language=data.get('language', 'c'), extra_compile_args=extra_compile_args) extensions.append(obj) - # ---------------------------------------------------------------------- # msgpack From 15d2563fbfb6789eb539d1ea4e6c3c69ea469577 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 6 Feb 2018 22:11:26 +0700 Subject: [PATCH 02/20] Working deque implementation of min/max --- pandas/_libs/window.pyx | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index fe90a6759bde5..cacfa4b68645d 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1251,22 +1251,34 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, with nogil: for i from 0 <= i < win: - while not Q.empty() and input[i] >= input[Q.back()]: - Q.pop_back() + ai = init_mm(input[i], &nobs, is_max) + + if is_max: + while not Q.empty() and ai >= input[Q.back()]: + Q.pop_back() + else: + while not Q.empty() and ai <= input[Q.back()]: + Q.pop_back() Q.push_back(i) for i from win <= i < N: - output[i-1] = input[Q.front()] + output[i-1] = calc_mm(minp, nobs, input[Q.front()]) - while not Q.empty() and input[i] >= input[Q.back()]: - Q.pop_back() + ai = init_mm(input[i], &nobs, is_max) + + if is_max: + while not Q.empty() and ai >= input[Q.back()]: + Q.pop_back() + else: + while not Q.empty() and ai <= input[Q.back()]: + Q.pop_back() while not Q.empty() and Q.front() <= i-win: Q.pop_front() Q.push_back(i) - output[N-1] = input[Q[0]] + output[N-1] = calc_mm(minp, nobs, input[Q[0]]) # for i in range(N): # s = starti[i] # e = endi[i] @@ -1288,7 +1300,6 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, # output[i] = calc_mm(minp, nobs, r) else: - # setup the rings of death! ring = malloc(win * sizeof(numeric)) death = malloc(win * sizeof(int64_t)) From 06f26585c7389026b0db6d7c7dc32e42100b0d8a Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 6 Feb 2018 22:17:35 +0700 Subject: [PATCH 03/20] oops --- pandas/core/window.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 7acf64c916701..a3f19ef50459d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1689,7 +1689,6 @@ def _apply(self, func, **kwargs): y : type of input argument """ - import ipdb; ipdb.set_trace() blocks, obj, index = self._create_blocks() results = [] for b in blocks: From 8089e676c902487c09db9cf7029ff0ab44229a9a Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 6 Feb 2018 22:26:13 +0700 Subject: [PATCH 04/20] Remove some extraneous variables --- pandas/_libs/window.pyx | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index cacfa4b68645d..3d6f9ba6b9882 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1223,10 +1223,11 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: numeric ai bint is_variable, should_replace - int64_t s, e, N, i, j, removed + int64_t N, i, removed Py_ssize_t nobs = 0 deque Q[int64_t] - ndarray[int64_t] starti, endi + ndarray[int64_t] _ + # ndarray[int64_t] starti, endi ndarray[numeric, ndim=1] output cdef: int64_t* death @@ -1238,7 +1239,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: cdef numeric r - starti, endi, N, win, minp, is_variable = get_window_indexer( + _, _ , N, win, minp, is_variable = get_window_indexer( input, win, minp, index, closed) @@ -1279,25 +1280,6 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, Q.push_back(i) output[N-1] = calc_mm(minp, nobs, input[Q[0]]) -# for i in range(N): -# s = starti[i] -# e = endi[i] -# -# r = input[s] -# nobs = 0 -# for j in range(s, e): -# -# # adds, death at the i offset -# ai = init_mm(input[j], &nobs, is_max) -# -# if is_max: -# if ai > r: -# r = ai -# else: -# if ai < r: -# r = ai -# -# output[i] = calc_mm(minp, nobs, r) else: # setup the rings of death! From 08ff55330f87dd2ddd18925997d2df76965a8664 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 6 Feb 2018 22:38:25 +0700 Subject: [PATCH 05/20] Get rid of some of the branches in the code --- pandas/_libs/window.pyx | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 3d6f9ba6b9882..ab501d2bb042c 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1223,6 +1223,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: numeric ai bint is_variable, should_replace + int is_max_multiplier = (2 * is_max - 1) int64_t N, i, removed Py_ssize_t nobs = 0 deque Q[int64_t] @@ -1254,12 +1255,8 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, for i from 0 <= i < win: ai = init_mm(input[i], &nobs, is_max) - if is_max: - while not Q.empty() and ai >= input[Q.back()]: - Q.pop_back() - else: - while not Q.empty() and ai <= input[Q.back()]: - Q.pop_back() + while Q.empty() and is_max_multiplier * (ai - input[Q.back()]) >= 0: + Q.pop_back() Q.push_back(i) for i from win <= i < N: @@ -1267,12 +1264,8 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, ai = init_mm(input[i], &nobs, is_max) - if is_max: - while not Q.empty() and ai >= input[Q.back()]: - Q.pop_back() - else: - while not Q.empty() and ai <= input[Q.back()]: - Q.pop_back() + while Q.empty() and is_max_multiplier * (ai - input[Q.back()]) >= 0: + Q.pop_back() while not Q.empty() and Q.front() <= i-win: Q.pop_front() From 6ef87b2010eb8bb0e195f50d6d12a84985cbad59 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 6 Feb 2018 22:43:36 +0700 Subject: [PATCH 06/20] Revert "Get rid of some of the branches in the code" This reverts commit 08ff55330f87dd2ddd18925997d2df76965a8664. --- pandas/_libs/window.pyx | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index ab501d2bb042c..3d6f9ba6b9882 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1223,7 +1223,6 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: numeric ai bint is_variable, should_replace - int is_max_multiplier = (2 * is_max - 1) int64_t N, i, removed Py_ssize_t nobs = 0 deque Q[int64_t] @@ -1255,8 +1254,12 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, for i from 0 <= i < win: ai = init_mm(input[i], &nobs, is_max) - while Q.empty() and is_max_multiplier * (ai - input[Q.back()]) >= 0: - Q.pop_back() + if is_max: + while not Q.empty() and ai >= input[Q.back()]: + Q.pop_back() + else: + while not Q.empty() and ai <= input[Q.back()]: + Q.pop_back() Q.push_back(i) for i from win <= i < N: @@ -1264,8 +1267,12 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, ai = init_mm(input[i], &nobs, is_max) - while Q.empty() and is_max_multiplier * (ai - input[Q.back()]) >= 0: - Q.pop_back() + if is_max: + while not Q.empty() and ai >= input[Q.back()]: + Q.pop_back() + else: + while not Q.empty() and ai <= input[Q.back()]: + Q.pop_back() while not Q.empty() and Q.front() <= i-win: Q.pop_front() From 6e8c0414b00caf02148fdebddf68bc18136b8567 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 7 Feb 2018 10:23:20 +0700 Subject: [PATCH 07/20] Prefer cmath over math.h for cpp --- pandas/_libs/src/headers/math.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/headers/math.h b/pandas/_libs/src/headers/math.h index 34ad9f24a58f9..eb9e13e7d621a 100644 --- a/pandas/_libs/src/headers/math.h +++ b/pandas/_libs/src/headers/math.h @@ -2,10 +2,10 @@ #define _PANDAS_MATH_H_ #if defined(_MSC_VER) && (_MSC_VER < 1800) -#include +#include __inline int signbit(double num) { return _copysign(1.0, num) < 0; } #else -#include +#include #endif #endif From b0a0ef63185935997201d5ecc253d1d0df775abd Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 7 Feb 2018 13:08:57 +0700 Subject: [PATCH 08/20] Change to std namespace --- pandas/_libs/window.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 3d6f9ba6b9882..de59afadc2edf 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -13,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t cnp.import_array() -cdef extern from "../src/headers/math.h": +cdef extern from "../src/headers/math.h" namespace "std": int signbit(double) nogil double sqrt(double x) nogil From b19774e2b5c2c340de1b3accd104ac39511ab432 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 7 Feb 2018 14:38:45 +0700 Subject: [PATCH 09/20] Fix issue with variable window size --- pandas/_libs/window.pyx | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index de59afadc2edf..678b879e3c8e7 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1226,8 +1226,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, int64_t N, i, removed Py_ssize_t nobs = 0 deque Q[int64_t] - ndarray[int64_t] _ - # ndarray[int64_t] starti, endi + ndarray[int64_t] starti, endi ndarray[numeric, ndim=1] output cdef: int64_t* death @@ -1239,7 +1238,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: cdef numeric r - _, _ , N, win, minp, is_variable = get_window_indexer( + starti, endi , N, win, minp, is_variable = get_window_indexer( input, win, minp, index, closed) @@ -1251,7 +1250,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, with nogil: - for i from 0 <= i < win: + for i from 0 <= i < endi[0]: ai = init_mm(input[i], &nobs, is_max) if is_max: @@ -1262,7 +1261,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, Q.pop_back() Q.push_back(i) - for i from win <= i < N: + for i from endi[0] <= i < N: output[i-1] = calc_mm(minp, nobs, input[Q.front()]) ai = init_mm(input[i], &nobs, is_max) @@ -1279,7 +1278,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, Q.push_back(i) - output[N-1] = calc_mm(minp, nobs, input[Q[0]]) + output[N-1] = calc_mm(minp, nobs, input[Q.front()]) else: # setup the rings of death! From 92857eee1f22f0009f95efaa31abf16e609545e1 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 7 Feb 2018 15:07:17 +0700 Subject: [PATCH 10/20] Oh right variable window size ;) --- pandas/_libs/window.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 678b879e3c8e7..ea303558e33d8 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1223,7 +1223,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: numeric ai bint is_variable, should_replace - int64_t N, i, removed + int64_t N, i, removed, window_i Py_ssize_t nobs = 0 deque Q[int64_t] ndarray[int64_t] starti, endi @@ -1250,7 +1250,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, with nogil: - for i from 0 <= i < endi[0]: + for i from starti[0] <= i < endi[0]: ai = init_mm(input[i], &nobs, is_max) if is_max: @@ -1273,7 +1273,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, while not Q.empty() and ai <= input[Q.back()]: Q.pop_back() - while not Q.empty() and Q.front() <= i-win: + while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]): Q.pop_front() Q.push_back(i) From 832ff9d0c302ecc67652221b2f3e622cf5c17d47 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 7 Feb 2018 16:06:30 +0700 Subject: [PATCH 11/20] Use std namespace for windows compilation --- pandas/_libs/src/headers/math.h | 11 ----------- pandas/_libs/window.pyx | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) delete mode 100644 pandas/_libs/src/headers/math.h diff --git a/pandas/_libs/src/headers/math.h b/pandas/_libs/src/headers/math.h deleted file mode 100644 index eb9e13e7d621a..0000000000000 --- a/pandas/_libs/src/headers/math.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _PANDAS_MATH_H_ -#define _PANDAS_MATH_H_ - -#if defined(_MSC_VER) && (_MSC_VER < 1800) -#include -__inline int signbit(double num) { return _copysign(1.0, num) < 0; } -#else -#include -#endif - -#endif diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index ea303558e33d8..d14d237519587 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -13,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t cnp.import_array() -cdef extern from "../src/headers/math.h" namespace "std": +cdef extern from "../src/headers/cmath" namespace "std": int signbit(double) nogil double sqrt(double x) nogil From f00e994d93956f2c2f333381c8a3b47c7b002c19 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 7 Feb 2018 18:49:29 +0700 Subject: [PATCH 12/20] Add cmath so build will complete --- pandas/_libs/src/headers/cmath | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 pandas/_libs/src/headers/cmath diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath new file mode 100644 index 0000000000000..7df50021d3678 --- /dev/null +++ b/pandas/_libs/src/headers/cmath @@ -0,0 +1,11 @@ +#ifndef _PANDAS_MATH_H_ +#define _PANDAS_MATH_H_ + +#if defined(_MSC_VER) && (_MSC_VER < 1800) +#include +__inline int std::signbit(double num) { return _copysign(1.0, num) < 0; } +#else +#include +#endif + +#endif From 1ab4e2161b6ac12b46c44f22df8834b4e96be014 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Thu, 8 Feb 2018 09:57:38 +0700 Subject: [PATCH 13/20] Fix linting error in window.pyx --- pandas/_libs/window.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index d14d237519587..8fd8e5c1602ca 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1238,13 +1238,13 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: cdef numeric r - starti, endi , N, win, minp, is_variable = get_window_indexer( + starti, endi, N, win, minp, is_variable = get_window_indexer( input, win, minp, index, closed) output = np.empty(N, dtype=input.dtype) - Q = deque[int64_t]() + Q = deque[int64_t]() if is_variable: From d5b60cd9d83aea2754d0b27484498a08052f268a Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Thu, 8 Feb 2018 10:17:17 +0700 Subject: [PATCH 14/20] I think this will fix MSVC build --- pandas/_libs/src/headers/cmath | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index 7df50021d3678..e8cd1e01613a0 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -3,7 +3,9 @@ #if defined(_MSC_VER) && (_MSC_VER < 1800) #include -__inline int std::signbit(double num) { return _copysign(1.0, num) < 0; } +using namespace std; + +__inline int signbit(double num) { return _copysign(1.0, num) < 0; } #else #include #endif From 42f8fdfd58a02e6b410f62b30aa7a08d896f7f2a Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Thu, 8 Feb 2018 16:49:04 +0700 Subject: [PATCH 15/20] I think this is what I want to do --- pandas/_libs/src/headers/cmath | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index e8cd1e01613a0..e552c0a1b1359 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -3,9 +3,9 @@ #if defined(_MSC_VER) && (_MSC_VER < 1800) #include -using namespace std; - -__inline int signbit(double num) { return _copysign(1.0, num) < 0; } +namespace std { + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } +} #else #include #endif From 38e3f707af97095965c14ad6781a6e4d77b77ce4 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Fri, 9 Feb 2018 10:31:21 +0700 Subject: [PATCH 16/20] Add documentation --- pandas/_libs/src/headers/cmath | 2 ++ pandas/_libs/window.pyx | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index e552c0a1b1359..15ce4480f0e8d 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -1,6 +1,8 @@ #ifndef _PANDAS_MATH_H_ #define _PANDAS_MATH_H_ +# In older versions of Visual Studio there wasn't a std::signbit defined +# This defines it using _copysign #if defined(_MSC_VER) && (_MSC_VER < 1800) #include namespace std { diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 8fd8e5c1602ca..aa13f03d8e9e4 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1250,6 +1250,11 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, with nogil: + # This is using a modified version of the C++ code in this + # SO post: http://bit.ly/2nOoHlY + # The original impl didn't deal with variable window sizes + # So the code was optimized for that + for i from starti[0] <= i < endi[0]: ai = init_mm(input[i], &nobs, is_max) From 7f4abf96fb07a6c6036e1ca964d50a3238d9027b Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Fri, 9 Feb 2018 10:47:00 +0700 Subject: [PATCH 17/20] Add another benchmark to test variable window methods --- asv_bench/benchmarks/rolling.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 75990d83f8212..7eeaae04b51ad 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -16,12 +16,20 @@ class Methods(object): def setup(self, constructor, window, dtype, method): N = 10**5 - arr = np.random.random(N).astype(dtype) + arr = (100 * np.random.random(N)).astype(dtype) self.roll = getattr(pd, constructor)(arr).rolling(window) def time_rolling(self, constructor, window, dtype, method): getattr(self.roll, method)() +class VariableWindowMethods(Methods): + params[1] = ['50s', '1h', '1d'] + + def setup(self, constructor, window, dtype, method): + N = 10**5 + arr = (100 * np.random.random(N)).astype(dtype) + index = pd.date_range('2017-01-01', periods=N, freq='5s') + self.roll = getattr(pd, constructor)(arr, index=index) class Pairwise(object): From 23fe8160f170bf1703a79188bad8671f79ed33da Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Fri, 9 Feb 2018 11:10:27 +0700 Subject: [PATCH 18/20] Ugh use // not # --- pandas/_libs/src/headers/cmath | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index 15ce4480f0e8d..d8e2239406cae 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -1,8 +1,8 @@ #ifndef _PANDAS_MATH_H_ #define _PANDAS_MATH_H_ -# In older versions of Visual Studio there wasn't a std::signbit defined -# This defines it using _copysign +// In older versions of Visual Studio there wasn't a std::signbit defined +// This defines it using _copysign #if defined(_MSC_VER) && (_MSC_VER < 1800) #include namespace std { From 060dfb77bd14a1c3d25ee64928d8ae7ecd4f84ca Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Fri, 9 Feb 2018 13:18:25 +0700 Subject: [PATCH 19/20] New better benchmark --- asv_bench/benchmarks/rolling.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 7eeaae04b51ad..ba25ad6c5eda6 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -23,13 +23,19 @@ def time_rolling(self, constructor, window, dtype, method): getattr(self.roll, method)() class VariableWindowMethods(Methods): - params[1] = ['50s', '1h', '1d'] + sample_time = 0.2 + params = (['DataFrame', 'Series'], + ['50s', '1h', '1d'], + ['int', 'float'], + ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', + 'sum']) + param_names = ['contructor', 'window', 'dtype', 'method'] def setup(self, constructor, window, dtype, method): N = 10**5 arr = (100 * np.random.random(N)).astype(dtype) index = pd.date_range('2017-01-01', periods=N, freq='5s') - self.roll = getattr(pd, constructor)(arr, index=index) + self.roll = getattr(pd, constructor)(arr, index=index).rolling(window) class Pairwise(object): From aeb9b9b7af38cd4086670646dd79b22061d943e0 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Mon, 12 Feb 2018 10:21:33 +0700 Subject: [PATCH 20/20] Add whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6c4fce35529ad..b1aae0fecf50d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -580,6 +580,7 @@ Performance Improvements - Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) - Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) - Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) +- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) .. _whatsnew_0230.docs: