Skip to content

Commit

Permalink
Merge pull request #74 from asmeurer/tuples-of-arrays
Browse files Browse the repository at this point in the history
Implement Tuples of array indices
  • Loading branch information
scopatz authored Aug 26, 2020
2 parents 61440fd + 75a20d8 commit d15a974
Show file tree
Hide file tree
Showing 24 changed files with 820 additions and 592 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ install:
- conda config --add channels conda-forge
- conda update -q conda
- conda info -a
- conda create -n test-environment python=$TRAVIS_PYTHON_VERSION pyflakes pytest pytest-doctestplus numpy sympy hypothesis doctr sphinx myst-parser sphinx_rtd_theme pytest-cov pytest-flakes
- conda create -n test-environment python=$TRAVIS_PYTHON_VERSION pyflakes pytest pytest-doctestplus sympy hypothesis doctr sphinx myst-parser sphinx_rtd_theme pytest-cov pytest-flakes
- source activate test-environment
- pip install git+https://github.com/numpy/numpy.git

script:
- set -e
Expand Down
2 changes: 2 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,5 @@ These classes are only intended for internal use in ndindex.
.. autoclass:: ndindex.slice.default

.. autofunction:: ndindex.ndindex.asshape

.. autofunction:: ndindex.ndindex.operator_index
10 changes: 8 additions & 2 deletions docs/type-confusion.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,10 @@ Additionally, some advice for specific types:

- `ellipsis` is **not** singletonized, unlike the built-in `...`. It would
also be impossible to make `ellipsis() is ...` return True. If you are using
ndindex, **you should use `==` to compare against `...`**, and avoid using `is`.
ndindex, **you should use `==` to compare against `...`**, and avoid using
`is`. Note that as long as you know `idx` is an ndindex type, this is safe
to do, since even the array index types `IntegerArray` and `BooleanArray`
allow `==` comparison (unlike NumPy arrays).

**Right:**

Expand Down Expand Up @@ -278,7 +281,10 @@ Note that `np.newaxis` is just an alias for `None`.
- `Newaxis` is **not** singletonized, unlike the built-in `None`. It would
also be impossible to make `Newaxis() is np.newaxis` or `Newaxis() is None`
return True. If you are using ndindex, **you should use `==` to compare
against `np.newaxis` or `None`**, and avoid using `is`.
against `np.newaxis` or `None`**, and avoid using `is`. Note that as long as
you know `idx` is an ndindex type, this is safe to do, since even the array
index types `IntegerArray` and `BooleanArray` allow `==` comparison (unlike
NumPy arrays).

**Right:**

Expand Down
8 changes: 6 additions & 2 deletions ndindex/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class ArrayIndex(NDIndex):
# Subclasses should redefine this
dtype = None

def _typecheck(self, idx, shape=None):
def _typecheck(self, idx, shape=None, _copy=True):
if self.dtype is None:
raise TypeError("Do not instantiate the superclass ArrayIndex directly")

Expand All @@ -34,12 +34,16 @@ def _typecheck(self, idx, shape=None):
# filtered out anyway since they produce object arrays.
with warnings.catch_warnings(record=True):
a = asarray(idx)
if a is idx:
if a is idx and _copy:
a = a.copy()
if isinstance(idx, list) and 0 in a.shape:
if not _copy:
raise ValueError("_copy=False is not allowed with list input")
a = a.astype(self.dtype)
if self.dtype == intp and issubclass(a.dtype.type, integer):
if a.dtype != self.dtype:
if not _copy:
raise ValueError("If _copy=False, the input array dtype must already be intp")
a = a.astype(self.dtype)
if a.dtype != self.dtype:
raise TypeError(f"The input array to {self.__class__.__name__} must have dtype {self.dtype.__name__}, not {a.dtype}")
Expand Down
12 changes: 7 additions & 5 deletions ndindex/booleanarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ class BooleanArray(ArrayIndex):
and `a` is an array of shape `s = (s1, ..., sn, ..., sm)`, `a[idx]`
replaces the first `n` dimensions of `a` with a single dimensions of size
`np.nonzero(idx)`, where each entry is included if the corresponding
element of `idx` is True.
element of `idx` is True. The axes in the index shape should match the
corresponding axes in the array shape or be 0, or the index produces
IndexError.
The typical way of creating a mask is to use boolean operations on an
array, then index the array with that. For example, if `a` is an array of
Expand All @@ -24,12 +26,12 @@ class BooleanArray(ArrayIndex):
and replace them with a single flat dimension which is the size of the
number of `True` elements in the index.
2. A boolean array index `idx` works the same as the integer index
2. A boolean array index `idx` works the same as the integer array index
`np.nonzero(idx)`. In particular, the elements of the index are always
iterated in row-major, C-style order. This does not apply to
0-dimensional boolean indices.
3. A 0-dimension boolean index (i.e., just the scalar `True` or `False`)
3. A 0-dimensional boolean index (i.e., just the scalar `True` or `False`)
can still be thought of as removing 0 dimensions and adding a single
dimension of length 1 for True or 0 for False. Hence, if `a` has shape
`(s1, ..., sn)`, then `a[True]` has shape `(1, s1, ..., sn)`, and
Expand Down Expand Up @@ -129,7 +131,8 @@ def reduce(self, shape=None, axis=0):
raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional, but {self.ndim + axis} were indexed")

for i in range(axis, axis+self.ndim):
if self.shape[i-axis] != 0 and shape[i] != 0 and 0 not in shape and shape[i] != self.shape[i-axis]:
if self.shape[i-axis] != 0 and shape[i] != self.shape[i-axis]:

raise IndexError(f"boolean index did not match indexed array along dimension {i}; dimension is {shape[i]} but corresponding boolean dimension is {self.shape[i-axis]}")

return self
Expand All @@ -140,7 +143,6 @@ def newshape(self, shape):

# reduce will raise IndexError if it should be raised
self.reduce(shape)

return (self.count_nonzero,) + shape[self.ndim:]

def isempty(self, shape=None):
Expand Down
8 changes: 2 additions & 6 deletions ndindex/integer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import operator

from .ndindex import NDIndex, asshape
from .ndindex import NDIndex, asshape, operator_index

class Integer(NDIndex):
"""
Expand Down Expand Up @@ -29,8 +27,7 @@ class Integer(NDIndex):
"""
def _typecheck(self, idx):
idx = operator.index(idx)

idx = operator_index(idx)
return (idx,)

def __index__(self):
Expand Down Expand Up @@ -96,7 +93,6 @@ def newshape(self, shape):

# reduce will raise IndexError if it should be raised
self.reduce(shape)

return shape[1:]

def as_subindex(self, index):
Expand Down
24 changes: 9 additions & 15 deletions ndindex/integerarray.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from numpy import intp, zeros
from numpy import intp

from .array import ArrayIndex
from .ndindex import asshape
Expand All @@ -14,7 +14,11 @@ class IntegerArray(ArrayIndex):
Integer arrays can also appear as part of tuple indices. In that case,
they replace the axis being indexed. If more than one integer array
appears inside of a tuple index, they are broadcast together.
appears inside of a tuple index, they are broadcast together and iterated
as one. Furthermore, if an integer array appears in a tuple index, all
integer indices in the tuple are treated as scalar integer arrays and are
also broadcast. In general, an :any:`Integer` index semantically behaves
the same as a scalar (`shape=()`) `IntegerArray`.
A list of integers may also be used in place of an integer array. Note
that NumPy treats a direct list of integers as a tuple index, but this
Expand Down Expand Up @@ -48,8 +52,9 @@ def reduce(self, shape=None, axis=0):
Reduce an `IntegerArray` index on an array of shape `shape`.
The result will either be `IndexError` if the index is invalid for the
given shape, or an `IntegerArray` index where the values are all
nonnegative.
given shape, an `IntegerArray` index where the values are all
nonnegative, or, if `self` is a scalar array index (`self.shape ==
()`), an `Integer` whose value is nonnegative.
>>> from ndindex import IntegerArray
>>> idx = IntegerArray([-5, 2])
Expand Down Expand Up @@ -81,16 +86,6 @@ def reduce(self, shape=None, axis=0):
return self

shape = asshape(shape, axis=axis)
if 0 in shape[:axis] + shape[axis+1:]:
# There are no bounds checks for empty arrays if one of the
# non-indexed axes is 0. This behavior will be deprecated in NumPy
# 1.20. Once 1.20 is released, we will change the ndindex behavior
# to match it, since we want to match all post-deprecation NumPy
# behavior. But it is impossible to test against the
# post-deprecation behavior reliably until a version of NumPy is
# released that raises the deprecation warning, so for now, we
# just match the NumPy 1.19 behavior.
return IntegerArray(zeros(self.shape, dtype=intp))

size = shape[axis]
new_array = self.array.copy()
Expand All @@ -107,7 +102,6 @@ def newshape(self, shape):

# reduce will raise IndexError if it should be raised
self.reduce(shape)

return self.shape + shape[1:]

def isempty(self, shape=None):
Expand Down
78 changes: 55 additions & 23 deletions ndindex/ndindex.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import inspect
import operator
import numbers
import warnings

from numpy import ndarray, newaxis, asarray, integer, bool_, intp
from numpy import ndarray, bool_, newaxis

def ndindex(obj):
"""
Expand All @@ -23,25 +21,21 @@ def ndindex(obj):
if isinstance(obj, NDIndex):
return obj

# TODO: Replace this with calls to the IntegerArray() and BooleanArray()
# constructors.
if isinstance(obj, (list, ndarray, bool)):
# Ignore deprecation warnings for things like [1, []]. These will be
# filtered out anyway since they produce object arrays.
with warnings.catch_warnings(record=True):
a = asarray(obj)
if isinstance(obj, list) and 0 in a.shape:
a = a.astype(intp)
if issubclass(a.dtype.type, integer):
return IntegerArray(a)
elif a.dtype == bool_:
return BooleanArray(a)
if isinstance(obj, (list, ndarray, bool, bool_)):
try:
return IntegerArray(obj)
except TypeError:
pass
try:
return BooleanArray(obj)
except TypeError:
pass

# Match the NumPy exceptions
if isinstance(obj, ndarray):
raise IndexError("arrays used as indices must be of integer (or boolean) type")
else:
# Match the NumPy exceptions
if isinstance(obj, ndarray):
raise IndexError("arrays used as indices must be of integer (or boolean) type")
else:
raise IndexError("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices")
raise IndexError("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices")

try:
# If operator.index() works, use that
Expand Down Expand Up @@ -420,7 +414,7 @@ def asshape(shape, axis=None):
"did you mean to use the built-in tuple type?")

if isinstance(shape, numbers.Number):
shape = (operator.index(shape),)
shape = (operator_index(shape),)

try:
l = len(shape)
Expand All @@ -432,7 +426,7 @@ def asshape(shape, axis=None):
# match that
for i in range(l):
# Raise TypeError if invalid
newshape.append(operator.index(shape[i]))
newshape.append(operator_index(shape[i]))

if shape[i] < 0:
raise ValueError("unknown (negative) dimensions are not supported")
Expand All @@ -442,3 +436,41 @@ def asshape(shape, axis=None):
raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional, but {axis + 1} were indexed")

return tuple(newshape)


def operator_index(idx):
"""
Convert `idx` into an integer index using `__index__()` or raise
`TypeError`.
This is the same as `operator.index()` except it disallows boolean types.
This is a slight break in NumPy compatibility, as NumPy allows bools in
some contexts where `__index__()` is used, for instance, in slices. It
does disallow it in others, such as in shapes. The main motivation for
disallowing bools entirely is 1) `numpy.bool_.__index__()` is deprecated
(currently it matches the built-in `bool.__index__()` and returns the
object unchanged, but prints a deprecation warning), and 2) for raw
indices, booleans and `0`/`1` are completely different, i.e., `a[True]` is
*not* the same as `a[1]`.
>>> from ndindex.ndindex import operator_index
>>> operator_index(1)
1
>>> operator_index(1.0)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
>>> operator_index(True)
Traceback (most recent call last):
...
TypeError: 'bool' object cannot be interpreted as an integer
"""
import operator

if isinstance(idx, bool):
raise TypeError("'bool' object cannot be interpreted as an integer")
if isinstance(idx, bool_):
raise TypeError("'np.bool_' object cannot be interpreted as an integer")
return operator.index(idx)
10 changes: 4 additions & 6 deletions ndindex/slice.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import operator

from sympy.ntheory.modular import crt
from sympy import ilcm, Rational

from .ndindex import NDIndex, asshape
from .ndindex import NDIndex, asshape, operator_index

class default:
"""
Expand Down Expand Up @@ -65,11 +63,11 @@ def _typecheck(self, start, stop=default, step=None):
raise ValueError("slice step cannot be zero")

if start is not None:
start = operator.index(start)
start = operator_index(start)
if stop is not None:
stop = operator.index(stop)
stop = operator_index(stop)
if step is not None:
step = operator.index(step)
step = operator_index(step)

args = (start, stop, step)

Expand Down
Loading

0 comments on commit d15a974

Please sign in to comment.