Merge pull request #74 from asmeurer/tuples-of-arrays

Implement Tuples of array indices
Quansight-Labs · Aug 26, 2020 · d15a974 · d15a974
2 parents 61440fd + 75a20d8
commit d15a974
Show file tree

Hide file tree

Showing 24 changed files with 820 additions and 592 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -27,8 +27,9 @@ install:
   - conda config --add channels conda-forge
   - conda update -q conda
   - conda info -a
-  - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION pyflakes pytest pytest-doctestplus numpy sympy hypothesis doctr sphinx myst-parser sphinx_rtd_theme pytest-cov pytest-flakes
+  - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION pyflakes pytest pytest-doctestplus sympy hypothesis doctr sphinx myst-parser sphinx_rtd_theme pytest-cov pytest-flakes
   - source activate test-environment
+  - pip install git+https://github.com/numpy/numpy.git
 
 script:
   - set -e

diff --git a/docs/api.rst b/docs/api.rst
@@ -97,3 +97,5 @@ These classes are only intended for internal use in ndindex.
 .. autoclass:: ndindex.slice.default
 
 .. autofunction:: ndindex.ndindex.asshape
+
+.. autofunction:: ndindex.ndindex.operator_index
diff --git a/docs/type-confusion.md b/docs/type-confusion.md
@@ -208,7 +208,10 @@ Additionally, some advice for specific types:
 
 - `ellipsis` is **not** singletonized, unlike the built-in `...`. It would
   also be impossible to make `ellipsis() is ...` return True. If you are using
-  ndindex, **you should use `==` to compare against `...`**, and avoid using `is`.
+  ndindex, **you should use `==` to compare against `...`**, and avoid using
+  `is`. Note that as long as you know `idx` is an ndindex type, this is safe
+  to do, since even the array index types `IntegerArray` and `BooleanArray`
+  allow `==` comparison (unlike NumPy arrays).
 
   **Right:**
 
@@ -278,7 +281,10 @@ Note that `np.newaxis` is just an alias for `None`.
 - `Newaxis` is **not** singletonized, unlike the built-in `None`. It would
   also be impossible to make `Newaxis() is np.newaxis` or `Newaxis() is None`
   return True. If you are using ndindex, **you should use `==` to compare
-  against `np.newaxis` or `None`**, and avoid using `is`.
+  against `np.newaxis` or `None`**, and avoid using `is`. Note that as long as
+  you know `idx` is an ndindex type, this is safe to do, since even the array
+  index types `IntegerArray` and `BooleanArray` allow `==` comparison (unlike
+  NumPy arrays).
 
   **Right:**
 

diff --git a/ndindex/array.py b/ndindex/array.py
@@ -17,7 +17,7 @@ class ArrayIndex(NDIndex):
     # Subclasses should redefine this
     dtype = None
 
-    def _typecheck(self, idx, shape=None):
+    def _typecheck(self, idx, shape=None, _copy=True):
         if self.dtype is None:
             raise TypeError("Do not instantiate the superclass ArrayIndex directly")
 
@@ -34,12 +34,16 @@ def _typecheck(self, idx, shape=None):
             # filtered out anyway since they produce object arrays.
             with warnings.catch_warnings(record=True):
                 a = asarray(idx)
-                if a is idx:
+                if a is idx and _copy:
                     a = a.copy()
                 if isinstance(idx, list) and 0 in a.shape:
+                    if not _copy:
+                        raise ValueError("_copy=False is not allowed with list input")
                     a = a.astype(self.dtype)
             if self.dtype == intp and issubclass(a.dtype.type, integer):
                 if a.dtype != self.dtype:
+                    if not _copy:
+                        raise ValueError("If _copy=False, the input array dtype must already be intp")
                     a = a.astype(self.dtype)
             if a.dtype != self.dtype:
                 raise TypeError(f"The input array to {self.__class__.__name__} must have dtype {self.dtype.__name__}, not {a.dtype}")

diff --git a/ndindex/booleanarray.py b/ndindex/booleanarray.py
@@ -11,7 +11,9 @@ class BooleanArray(ArrayIndex):
     and `a` is an array of shape `s = (s1, ..., sn, ..., sm)`, `a[idx]`
     replaces the first `n` dimensions of `a` with a single dimensions of size
     `np.nonzero(idx)`, where each entry is included if the corresponding
-    element of `idx` is True.
+    element of `idx` is True. The axes in the index shape should match the
+    corresponding axes in the array shape or be 0, or the index produces
+    IndexError.
 
     The typical way of creating a mask is to use boolean operations on an
     array, then index the array with that. For example, if `a` is an array of
@@ -24,12 +26,12 @@ class BooleanArray(ArrayIndex):
        and replace them with a single flat dimension which is the size of the
        number of `True` elements in the index.
 
-    2. A boolean array index `idx` works the same as the integer index
+    2. A boolean array index `idx` works the same as the integer array index
        `np.nonzero(idx)`. In particular, the elements of the index are always
        iterated in row-major, C-style order. This does not apply to
        0-dimensional boolean indices.
 
-    3. A 0-dimension boolean index (i.e., just the scalar `True` or `False`)
+    3. A 0-dimensional boolean index (i.e., just the scalar `True` or `False`)
        can still be thought of as removing 0 dimensions and adding a single
        dimension of length 1 for True or 0 for False. Hence, if `a` has shape
        `(s1, ..., sn)`, then `a[True]` has shape `(1, s1, ..., sn)`, and
@@ -129,7 +131,8 @@ def reduce(self, shape=None, axis=0):
             raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional, but {self.ndim + axis} were indexed")
 
         for i in range(axis, axis+self.ndim):
-            if self.shape[i-axis] != 0 and shape[i] != 0 and 0 not in shape and shape[i] != self.shape[i-axis]:
+            if self.shape[i-axis] != 0 and shape[i] != self.shape[i-axis]:
+
                 raise IndexError(f"boolean index did not match indexed array along dimension {i}; dimension is {shape[i]} but corresponding boolean dimension is {self.shape[i-axis]}")
 
         return self
@@ -140,7 +143,6 @@ def newshape(self, shape):
 
         # reduce will raise IndexError if it should be raised
         self.reduce(shape)
-
         return (self.count_nonzero,) + shape[self.ndim:]
 
     def isempty(self, shape=None):

diff --git a/ndindex/integer.py b/ndindex/integer.py
@@ -1,6 +1,4 @@
-import operator
-
-from .ndindex import NDIndex, asshape
+from .ndindex import NDIndex, asshape, operator_index
 
 class Integer(NDIndex):
     """
@@ -29,8 +27,7 @@ class Integer(NDIndex):
 
     """
     def _typecheck(self, idx):
-        idx = operator.index(idx)
-
+        idx = operator_index(idx)
         return (idx,)
 
     def __index__(self):
@@ -96,7 +93,6 @@ def newshape(self, shape):
 
         # reduce will raise IndexError if it should be raised
         self.reduce(shape)
-
         return shape[1:]
 
     def as_subindex(self, index):

diff --git a/ndindex/integerarray.py b/ndindex/integerarray.py
@@ -1,4 +1,4 @@
-from numpy import intp, zeros
+from numpy import intp
 
 from .array import ArrayIndex
 from .ndindex import asshape
@@ -14,7 +14,11 @@ class IntegerArray(ArrayIndex):
 
     Integer arrays can also appear as part of tuple indices. In that case,
     they replace the axis being indexed. If more than one integer array
-    appears inside of a tuple index, they are broadcast together.
+    appears inside of a tuple index, they are broadcast together and iterated
+    as one. Furthermore, if an integer array appears in a tuple index, all
+    integer indices in the tuple are treated as scalar integer arrays and are
+    also broadcast. In general, an :any:`Integer` index semantically behaves
+    the same as a scalar (`shape=()`) `IntegerArray`.
 
     A list of integers may also be used in place of an integer array. Note
     that NumPy treats a direct list of integers as a tuple index, but this
@@ -48,8 +52,9 @@ def reduce(self, shape=None, axis=0):
         Reduce an `IntegerArray` index on an array of shape `shape`.
 
         The result will either be `IndexError` if the index is invalid for the
-        given shape, or an `IntegerArray` index where the values are all
-        nonnegative.
+        given shape, an `IntegerArray` index where the values are all
+        nonnegative, or, if `self` is a scalar array index (`self.shape ==
+        ()`), an `Integer` whose value is nonnegative.
 
         >>> from ndindex import IntegerArray
         >>> idx = IntegerArray([-5, 2])
@@ -81,16 +86,6 @@ def reduce(self, shape=None, axis=0):
             return self
 
         shape = asshape(shape, axis=axis)
-        if 0 in shape[:axis] + shape[axis+1:]:
-            # There are no bounds checks for empty arrays if one of the
-            # non-indexed axes is 0. This behavior will be deprecated in NumPy
-            # 1.20. Once 1.20 is released, we will change the ndindex behavior
-            # to match it, since we want to match all post-deprecation NumPy
-            # behavior. But it is impossible to test against the
-            # post-deprecation behavior reliably until a version of NumPy is
-            # released that raises the deprecation warning, so for now, we
-            # just match the NumPy 1.19 behavior.
-            return IntegerArray(zeros(self.shape, dtype=intp))
 
         size = shape[axis]
         new_array = self.array.copy()
@@ -107,7 +102,6 @@ def newshape(self, shape):
 
         # reduce will raise IndexError if it should be raised
         self.reduce(shape)
-
         return self.shape + shape[1:]
 
     def isempty(self, shape=None):

diff --git a/ndindex/ndindex.py b/ndindex/ndindex.py
@@ -1,9 +1,7 @@
 import inspect
-import operator
 import numbers
-import warnings
 
-from numpy import ndarray, newaxis, asarray, integer, bool_, intp
+from numpy import ndarray, bool_, newaxis
 
 def ndindex(obj):
     """
@@ -23,25 +21,21 @@ def ndindex(obj):
     if isinstance(obj, NDIndex):
         return obj
 
-    # TODO: Replace this with calls to the IntegerArray() and BooleanArray()
-    # constructors.
-    if isinstance(obj, (list, ndarray, bool)):
-        # Ignore deprecation warnings for things like [1, []]. These will be
-        # filtered out anyway since they produce object arrays.
-        with warnings.catch_warnings(record=True):
-            a = asarray(obj)
-            if isinstance(obj, list) and 0 in a.shape:
-                a = a.astype(intp)
-        if issubclass(a.dtype.type, integer):
-            return IntegerArray(a)
-        elif a.dtype == bool_:
-            return BooleanArray(a)
+    if isinstance(obj, (list, ndarray, bool, bool_)):
+        try:
+            return IntegerArray(obj)
+        except TypeError:
+            pass
+        try:
+            return BooleanArray(obj)
+        except TypeError:
+            pass
+
+        # Match the NumPy exceptions
+        if isinstance(obj, ndarray):
+            raise IndexError("arrays used as indices must be of integer (or boolean) type")
         else:
-            # Match the NumPy exceptions
-            if isinstance(obj, ndarray):
-                raise IndexError("arrays used as indices must be of integer (or boolean) type")
-            else:
-                raise IndexError("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices")
+            raise IndexError("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices")
 
     try:
         # If operator.index() works, use that
@@ -420,7 +414,7 @@ def asshape(shape, axis=None):
                         "did you mean to use the built-in tuple type?")
 
     if isinstance(shape, numbers.Number):
-        shape = (operator.index(shape),)
+        shape = (operator_index(shape),)
 
     try:
         l = len(shape)
@@ -432,7 +426,7 @@ def asshape(shape, axis=None):
     # match that
     for i in range(l):
         # Raise TypeError if invalid
-        newshape.append(operator.index(shape[i]))
+        newshape.append(operator_index(shape[i]))
 
         if shape[i] < 0:
             raise ValueError("unknown (negative) dimensions are not supported")
@@ -442,3 +436,41 @@ def asshape(shape, axis=None):
             raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional, but {axis + 1} were indexed")
 
     return tuple(newshape)
+
+
+def operator_index(idx):
+    """
+    Convert `idx` into an integer index using `__index__()` or raise
+    `TypeError`.
+
+    This is the same as `operator.index()` except it disallows boolean types.
+
+    This is a slight break in NumPy compatibility, as NumPy allows bools in
+    some contexts where `__index__()` is used, for instance, in slices. It
+    does disallow it in others, such as in shapes. The main motivation for
+    disallowing bools entirely is 1) `numpy.bool_.__index__()` is deprecated
+    (currently it matches the built-in `bool.__index__()` and returns the
+    object unchanged, but prints a deprecation warning), and 2) for raw
+    indices, booleans and `0`/`1` are completely different, i.e., `a[True]` is
+    *not* the same as `a[1]`.
+
+    >>> from ndindex.ndindex import operator_index
+    >>> operator_index(1)
+    1
+    >>> operator_index(1.0)
+    Traceback (most recent call last):
+    ...
+    TypeError: 'float' object cannot be interpreted as an integer
+    >>> operator_index(True)
+    Traceback (most recent call last):
+    ...
+    TypeError: 'bool' object cannot be interpreted as an integer
+
+    """
+    import operator
+
+    if isinstance(idx, bool):
+        raise TypeError("'bool' object cannot be interpreted as an integer")
+    if isinstance(idx, bool_):
+        raise TypeError("'np.bool_' object cannot be interpreted as an integer")
+    return operator.index(idx)
diff --git a/ndindex/slice.py b/ndindex/slice.py
@@ -1,9 +1,7 @@
-import operator
-
 from sympy.ntheory.modular import crt
 from sympy import ilcm, Rational
 
-from .ndindex import NDIndex, asshape
+from .ndindex import NDIndex, asshape, operator_index
 
 class default:
     """
@@ -65,11 +63,11 @@ def _typecheck(self, start, stop=default, step=None):
             raise ValueError("slice step cannot be zero")
 
         if start is not None:
-            start = operator.index(start)
+            start = operator_index(start)
         if stop is not None:
-            stop = operator.index(stop)
+            stop = operator_index(stop)
         if step is not None:
-            step = operator.index(step)
+            step = operator_index(step)
 
         args = (start, stop, step)