Skip to content

Commit

Permalink
MAINT: Feature/cython fixes (stefan-jansen#49)
Browse files Browse the repository at this point in the history
* fix unreacheable code

* fixed missing typed index

* clean up and fix

* use NPY_NO_DEPRECATED_API

* remove unused options
  • Loading branch information
MBounouar committed Oct 27, 2021
1 parent 0214e38 commit c67d24e
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 79 deletions.
44 changes: 35 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,53 +80,79 @@ def window_specialization(typename):
name=f"zipline.lib._{typename}window",
sources=[f"src/zipline/lib/_{typename}window.pyx"],
depends=["src/zipline/lib/_windowtemplate.pxi"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
)


ext_options = dict(
compiler_directives=dict(profile=True, language_level="3"), annotate=True
compiler_directives=dict(profile=True, language_level="3"),
annotate=True,
)
ext_modules = [
Extension(
name="zipline.assets._assets", sources=["src/zipline/assets/_assets.pyx"]
name="zipline.assets._assets",
sources=["src/zipline/assets/_assets.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.assets.continuous_futures",
sources=["src/zipline/assets/continuous_futures.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.lib.adjustment", sources=["src/zipline/lib/adjustment.pyx"]
name="zipline.lib.adjustment",
sources=["src/zipline/lib/adjustment.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.lib._factorize", sources=["src/zipline/lib/_factorize.pyx"]
name="zipline.lib._factorize",
sources=["src/zipline/lib/_factorize.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
window_specialization("float64"),
window_specialization("int64"),
window_specialization("int64"),
window_specialization("uint8"),
window_specialization("label"),
Extension(name="zipline.lib.rank", sources=["src/zipline/lib/rank.pyx"]),
Extension(
name="zipline.data._equities", sources=["src/zipline/data/_equities.pyx"]
name="zipline.lib.rank",
sources=["src/zipline/lib/rank.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.data._equities",
sources=["src/zipline/data/_equities.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.data._adjustments",
sources=["src/zipline/data/_adjustments.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline._protocol",
sources=["src/zipline/_protocol.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(name="zipline._protocol", sources=["src/zipline/_protocol.pyx"]),
Extension(
name="zipline.finance._finance_ext",
sources=["src/zipline/finance/_finance_ext.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.gens.sim_engine", sources=["src/zipline/gens/sim_engine.pyx"]
name="zipline.gens.sim_engine",
sources=["src/zipline/gens/sim_engine.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.data._minute_bar_internal",
sources=["src/zipline/data/_minute_bar_internal.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
name="zipline.data._resample", sources=["src/zipline/data/_resample.pyx"]
name="zipline.data._resample",
sources=["src/zipline/data/_resample.pyx"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
]
for ext_module in ext_modules:
Expand Down
8 changes: 5 additions & 3 deletions src/zipline/gens/sim_engine.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

cimport cython
cimport numpy as np
import numpy as np
import pandas as pd
cimport cython
from cpython cimport bool

cdef np.int64_t _nanos_in_minute = 60000000000
Expand Down Expand Up @@ -62,8 +62,8 @@ cdef class MinuteSimulationClock:
for session_idx, session_nano in enumerate(self.sessions_nanos):
minutes_nanos = np.arange(
self.market_opens_nanos[session_idx],
self.market_closes_nanos[session_idx] + _nanos_in_minute,
_nanos_in_minute
self.market_closes_nanos[session_idx] + NANOS_IN_MINUTE,
NANOS_IN_MINUTE
)
minutes_by_session[session_nano] = pd.to_datetime(
minutes_nanos, utc=True
Expand All @@ -73,6 +73,8 @@ cdef class MinuteSimulationClock:
def __iter__(self):
minute_emission = self.minute_emission

cdef Py_ssize_t idx

for idx, session_nano in enumerate(self.sessions_nanos):
yield pd.Timestamp(session_nano, tz='UTC'), SESSION_START

Expand Down
16 changes: 8 additions & 8 deletions src/zipline/lib/adjustment.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,15 @@ cdef type _choose_adjustment_type(AdjustmentKind adjustment_kind,
if adjustment_kind in (ADD, MULTIPLY):
if column_type is np.float64_t:
return _float_adjustment_types[adjustment_kind]

raise TypeError(
"Can't construct %s Adjustment with value of type %r.\n"
"ADD and MULTIPLY adjustments are only supported for "
"floating point data." % (
ADJUSTMENT_KIND_NAMES[adjustment_kind],
type(value),
else:
raise TypeError(
"Can't construct %s Adjustment with value of type %r.\n"
"ADD and MULTIPLY adjustments are only supported for "
"floating point data." % (
ADJUSTMENT_KIND_NAMES[adjustment_kind],
type(value),
)
)
)

elif adjustment_kind == OVERWRITE:
if column_type is np.float64_t:
Expand Down
92 changes: 33 additions & 59 deletions src/zipline/lib/rank.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,25 @@
Functions for ranking and sorting.
"""
cimport cython
cimport numpy as np

import numpy as np
from cpython cimport bool
from numpy cimport (
float64_t,
import_array,
intp_t,
int64_t,
ndarray,
NPY_DOUBLE,
NPY_MERGESORT,
PyArray_ArgSort,
PyArray_DIMS,
PyArray_EMPTY,
uint8_t,
)
from numpy import apply_along_axis, float64, isnan, nan, zeros_like
from scipy.stats import rankdata

from zipline.utils.numpy_utils import (
is_missing,
float64_dtype,
int64_dtype,
datetime64ns_dtype,
)
from zipline.utils.numpy_utils import is_missing


import_array()
np.import_array()


def rankdata_1d_descending(ndarray data, str method):
def rankdata_1d_descending(np.ndarray data, str method):
"""
1D descending version of scipy.stats.rankdata.
"""
return rankdata(-(data.view(float64)), method=method)
return rankdata(-(data.view(np.float64)), method=method)


def masked_rankdata_2d(ndarray data,
ndarray mask,
def masked_rankdata_2d(np.ndarray data,
np.ndarray mask,
object missing_value,
str method,
bool ascending):
Expand All @@ -51,11 +33,11 @@ def masked_rankdata_2d(ndarray data,
"Can't compute rankdata on array of dtype %r." % dtype_name
)

cdef ndarray missing_locations = (~mask | is_missing(data, missing_value))
cdef np.ndarray missing_locations = (~mask | is_missing(data, missing_value))

# Interpret the bytes of integral data as floats for sorting.
data = data.copy().view(float64)
data[missing_locations] = nan
data = data.copy().view(np.float64)
data[missing_locations] = np.nan
if not ascending:
data = -data

Expand All @@ -67,7 +49,7 @@ def masked_rankdata_2d(ndarray data,
# FUTURE OPTIMIZATION:
# Write a less general "apply to rows" method that doesn't do all
# the extra work that apply_along_axis does.
result = apply_along_axis(rankdata, 1, data, method=method)
result = np.apply_along_axis(rankdata, 1, data, method=method)

# On SciPy >= 0.17, rankdata returns integers for any method except
# average.
Expand All @@ -76,33 +58,30 @@ def masked_rankdata_2d(ndarray data,

# rankdata will sort missing values into last place, but we want our nans
# to propagate, so explicitly re-apply.
result[missing_locations] = nan
result[missing_locations] = np.nan
return result


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.embedsignature(True)
cpdef rankdata_2d_ordinal(ndarray[float64_t, ndim=2] array):
cpdef rankdata_2d_ordinal(np.ndarray[np.float64_t, ndim=2] array):
"""
Equivalent to:
numpy.apply_over_axis(scipy.stats.rankdata, 1, array, method='ordinal')
"""
cdef:
int nrows, ncols
ndarray[Py_ssize_t, ndim=2] sort_idxs
ndarray[float64_t, ndim=2] out

nrows = array.shape[0]
ncols = array.shape[1]
Py_ssize_t nrows = np.PyArray_DIMS(array)[0]
Py_ssize_t ncols = np.PyArray_DIMS(array)[1]
Py_ssize_t[:, ::1] sort_idxs
np.ndarray[np.float64_t, ndim=2] out

# scipy.stats.rankdata explicitly uses MERGESORT instead of QUICKSORT for
# the ordinal branch. c.f. commit ab21d2fee2d27daca0b2c161bbb7dba7e73e70ba
sort_idxs = PyArray_ArgSort(array, 1, NPY_MERGESORT)
sort_idxs = np.PyArray_ArgSort(array, 1, np.NPY_MERGESORT)

# Roughly, "out = np.empty_like(array)"
out = PyArray_EMPTY(2, PyArray_DIMS(array), NPY_DOUBLE, False)
out = np.PyArray_EMPTY(2, np.PyArray_DIMS(array), np.NPY_DOUBLE, False)

cdef Py_ssize_t i
cdef Py_ssize_t j
Expand All @@ -115,23 +94,21 @@ cpdef rankdata_2d_ordinal(ndarray[float64_t, ndim=2] array):


@cython.embedsignature(True)
cpdef grouped_masked_is_maximal(ndarray[int64_t, ndim=2] data,
ndarray[int64_t, ndim=2] groupby,
ndarray[uint8_t, ndim=2] mask):
cpdef grouped_masked_is_maximal(np.ndarray[np.int64_t, ndim=2] data,
np.int64_t[:, ::1] groupby,
np.uint8_t[:, ::1] mask):
"""Build a mask of the top value for each row in ``data``, grouped by
``groupby`` and masked by ``mask``.
Parameters
----------
data : np.array[int64_t]
data : np.array[np.int64_t]
Data on which we should find maximal values for each row.
groupby : np.array[int64_t]
groupby : np.array[np.int64_t]
Grouping labels for rows of ``data``. We choose one entry in each
row for each unique grouping key in that row.
mask : np.array[uint8_t]
mask : np.array[np.uint8_t]
Boolean mask of locations to consider as possible maximal values.
Locations with a 0 in ``mask`` are ignored.
Returns
-------
maximal_locations : np.array[bool]
Expand All @@ -152,15 +129,12 @@ cpdef grouped_masked_is_maximal(ndarray[int64_t, ndim=2] data,
cdef:
Py_ssize_t i
Py_ssize_t j
Py_ssize_t nrows
Py_ssize_t ncols
int64_t group
int64_t value
ndarray[uint8_t, ndim=2] out = zeros_like(mask)
np.int64_t group
np.int64_t value
np.ndarray[np.uint8_t, ndim=2] out = np.zeros_like(mask)
dict best_per_group = {}

nrows = data.shape[0]
ncols = data.shape[1]
Py_ssize_t nrows = np.PyArray_DIMS(data)[0]
Py_ssize_t ncols = np.PyArray_DIMS(data)[1]

for i in range(nrows):
best_per_group.clear()
Expand All @@ -184,4 +158,4 @@ cpdef grouped_masked_is_maximal(ndarray[int64_t, ndim=2] data,
for j in best_per_group.values():
out[i, j] = 1

return out.view(bool)
return out.view(bool)

0 comments on commit c67d24e

Please sign in to comment.