From b7069efa4c1a074a7adcb37ed810774bae94ef5e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 2 Feb 2018 15:29:55 -0600
Subject: [PATCH 001/119] ENH: non-interval changes

---
 pandas/core/arrays/base.py                |  14 +-
 pandas/core/dtypes/missing.py             |  16 +-
 pandas/core/frame.py                      |  20 +-
 pandas/core/indexes/base.py               |  34 ++-
 pandas/core/indexes/category.py           |   3 +
 pandas/core/indexes/datetimes.py          |   7 +
 pandas/core/internals.py                  |  29 +-
 pandas/core/series.py                     |  31 ++-
 pandas/tests/extension_arrays/__init__.py |   0
 pandas/tests/extension_arrays/base.py     | 312 ++++++++++++++++++++++
 pandas/tests/indexes/datetimelike.py      |  11 +
 pandas/tests/indexes/test_base.py         |   5 +
 pandas/tests/indexes/test_category.py     |   5 +
 13 files changed, 447 insertions(+), 40 deletions(-)
 create mode 100644 pandas/tests/extension_arrays/__init__.py
 create mode 100644 pandas/tests/extension_arrays/base.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 1556b653819a6..8d44c5133f740 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1,4 +1,6 @@
 """An interface for extending pandas with custom arrays."""
+import numpy as np
+
 from pandas.errors import AbstractMethodError
 
 _not_implemented_message = "{} does not implement {}."
@@ -24,7 +26,6 @@ class ExtensionArray(object):
     * take
     * copy
     * _formatting_values
-    * _concat_same_type
 
     Some additional methods are required to satisfy pandas' internal, private
     block API.
@@ -51,9 +52,6 @@ class ExtensionArray(object):
     Extension arrays should be able to be constructed with instances of
     the class, i.e. ``ExtensionArray(extension_array)`` should return
     an instance, not error.
-
-    Additionally, certain methods and interfaces are required for proper
-    this array to be properly stored inside a ``DataFrame`` or ``Series``.
     """
     # ------------------------------------------------------------------------
     # Must be a Sequence
@@ -177,9 +175,9 @@ def take(self, indexer, allow_fill=True, fill_value=None):
 
         Examples
         --------
-        Suppose the extension array somehow backed by a NumPy structured array
-        and that the underlying structured array is stored as ``self.data``.
-        Then ``take`` may be written as
+        Suppose the extension array somehow backed by a NumPy array and that
+        the underlying structured array is stored as ``self.data``. Then
+        ``take`` may be written as
 
         .. code-block:: python
 
@@ -219,7 +217,7 @@ def _formatting_values(self):
         # type: () -> np.ndarray
         # At the moment, this has to be an array since we use result.dtype
         """An array of values to be printed in, e.g. the Series repr"""
-        raise AbstractMethodError(self)
+        raise np.array(self)
 
     @classmethod
     def _concat_same_type(cls, to_concat):
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index ffac702476af1..4ed6ddec00289 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -13,6 +13,7 @@
                      is_complex_dtype, is_categorical_dtype,
                      is_string_like_dtype, is_bool_dtype,
                      is_integer_dtype, is_dtype_equal,
+                     is_extension_array_dtype,
                      needs_i8_conversion, _ensure_object,
                      pandas_dtype,
                      is_scalar,
@@ -52,12 +53,15 @@ def isna(obj):
 
 
 def _isna_new(obj):
+    from ..arrays import ExtensionArray
+
     if is_scalar(obj):
         return libmissing.checknull(obj)
     # hack (for now) because MI registers as ndarray
     elif isinstance(obj, ABCMultiIndex):
         raise NotImplementedError("isna is not defined for MultiIndex")
-    elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)):
+    elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
+                          ExtensionArray)):
         return _isna_ndarraylike(obj)
     elif isinstance(obj, ABCGeneric):
         return obj._constructor(obj._data.isna(func=isna))
@@ -124,11 +128,14 @@ def _use_inf_as_na(key):
 
 
 def _isna_ndarraylike(obj):
+    from ..arrays import ExtensionArray
 
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if is_string_dtype(dtype):
+    if isinstance(values, ExtensionArray):
+        result = values.isna()
+    elif is_string_dtype(dtype):
         if is_categorical_dtype(values):
             from pandas import Categorical
             if not isinstance(values, Categorical):
@@ -406,4 +413,7 @@ def remove_na_arraylike(arr):
     """
     Return array-like containing only true/non-NaN values, possibly empty.
     """
-    return arr[notna(lib.values_from_object(arr))]
+    if is_extension_array_dtype(arr):
+        return arr[notna(arr)]
+    else:
+        return arr[notna(lib.values_from_object(arr))]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 96d28581cfdd9..ea3e7b33fb5b6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -39,6 +39,7 @@
     is_categorical_dtype,
     is_object_dtype,
     is_extension_type,
+    is_extension_array_dtype,
     is_datetimetz,
     is_datetime64_any_dtype,
     is_datetime64tz_dtype,
@@ -71,7 +72,7 @@
                                    create_block_manager_from_arrays,
                                    create_block_manager_from_blocks)
 from pandas.core.series import Series
-from pandas.core.arrays import Categorical
+from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.algorithms as algorithms
 from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
                            OrderedDict, raise_with_traceback)
@@ -511,7 +512,7 @@ def _get_axes(N, K, index=index, columns=columns):
             index, columns = _get_axes(len(values), 1)
             return _arrays_to_mgr([values], columns, index, columns,
                                   dtype=dtype)
-        elif is_datetimetz(values):
+        elif (is_datetimetz(values) or is_extension_array_dtype(values)):
             # GH19157
             if columns is None:
                 columns = [0]
@@ -2796,7 +2797,7 @@ def reindexer(value):
             # now align rows
             value = reindexer(value).T
 
-        elif isinstance(value, Categorical):
+        elif isinstance(value, ExtensionArray):
             value = value.copy()
 
         elif isinstance(value, Index) or is_sequence(value):
@@ -2804,7 +2805,7 @@ def reindexer(value):
 
             # turn me into an ndarray
             value = _sanitize_index(value, self.index, copy=False)
-            if not isinstance(value, (np.ndarray, Index)):
+            if not isinstance(value, (np.ndarray, Index, ExtensionArray)):
                 if isinstance(value, list) and len(value) > 0:
                     value = maybe_convert_platform(value)
                 else:
@@ -2826,7 +2827,7 @@ def reindexer(value):
             value = maybe_cast_to_datetime(value, value.dtype)
 
         # return internal types directly
-        if is_extension_type(value):
+        if is_extension_type(value) or is_extension_array_dtype(value):
             return value
 
         # broadcast across multiple columns if necessary
@@ -3355,12 +3356,9 @@ class    max    type
             new_obj = self.copy()
 
         def _maybe_casted_values(index, labels=None):
-            if isinstance(index, PeriodIndex):
-                values = index.astype(object).values
-            elif isinstance(index, DatetimeIndex) and index.tz is not None:
-                values = index
-            else:
-                values = index.values
+            values = index._as_best_array()
+            # TODO: Check if nescessary...
+            if not isinstance(index, (PeriodIndex, DatetimeIndex)):
                 if values.dtype == np.object_:
                     values = lib.maybe_convert_objects(values)
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 626f3dc86556a..37a408a8f6c11 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -13,6 +13,7 @@
 from pandas import compat
 
 from pandas.core.accessor import CachedAccessor
+from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.generic import (
     ABCSeries, ABCDataFrame,
     ABCMultiIndex,
@@ -1038,6 +1039,31 @@ def _to_embed(self, keep_tz=False, dtype=None):
 
         return self.values.copy()
 
+    def _as_best_array(self):
+        # type: () -> Union[ExtensionArray, ndarary]
+        """Return the underlying values as the best array type.
+
+        Indexes backed by ExtensionArrays will return the ExtensionArray.
+        Otherwise, an ndarray is returned.
+
+        Examples
+        --------
+        >>> pd.Index([0, 1, 2])._as_best_array()
+        array([0, 1, 2])
+
+        >>> pd.CategoricalIndex(['a', 'a', 'b'])._as_best_array()
+        [a, a, b]
+        Categories (2, object): [a, b]
+
+        >>> pd.IntervalIndex.from_breaks([0, 1, 2])._as_best_array()
+        IntervalArray([(0, 1], (1, 2]])
+        """
+        # We need this since CategoricalIndex.values -> Categorical
+        #                but IntervalIndex.values    -> ndarray[object]
+        # TODO: IntervalIndex defines _array_values. Would be nice to
+        # have an unambiguous way of getting an ndarray (or just use asarray?)
+        return self.values
+
     _index_shared_docs['astype'] = """
         Create an Index with values cast to dtypes. The class of a new Index
         is determined by dtype. When conversion is impossible, a ValueError
@@ -1946,6 +1972,12 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
 
         if is_categorical_dtype(values.dtype):
             values = np.array(values)
+
+        elif isinstance(values, ExtensionArray):
+            # This is still un-exercised within pandas, since all our
+            # extension dtypes have custom indexes.
+            values = values._formatting_values()
+
         elif is_object_dtype(values.dtype):
             values = lib.maybe_convert_objects(values, safe=1)
 
@@ -2525,7 +2557,7 @@ def get_value(self, series, key):
         # if we have something that is Index-like, then
         # use this, e.g. DatetimeIndex
         s = getattr(series, '_values', None)
-        if isinstance(s, Index) and is_scalar(key):
+        if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
             try:
                 return s[key]
             except (IndexError, ValueError):
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 2c7be2b21f959..90541c58b2ef9 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -297,6 +297,9 @@ def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
 
+    def _as_best_array(self):
+        return self._data
+
     def tolist(self):
         return self._data.tolist()
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index e09fa87477122..2ba010aeba467 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1034,6 +1034,13 @@ def _to_embed(self, keep_tz=False, dtype=None):
 
         return self.values.copy()
 
+    def _as_best_array(self):
+        # no-tz -> ndarray
+        # tz    -> DatetimeIndex (for now)
+        if self.tz is not None:
+            return self
+        return self.values
+
     def to_pydatetime(self):
         """
         Return DatetimeIndex as object ndarray of datetime.datetime objects
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index f553e1a02c9d6..364a3b2ae027e 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -56,7 +56,10 @@
     is_null_datelike_scalar)
 import pandas.core.dtypes.concat as _concat
 
-from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex
+from pandas.core.dtypes.generic import (
+    ABCSeries,
+    ABCDatetimeIndex,
+    ABCIndexClass)
 import pandas.core.common as com
 import pandas.core.algorithms as algos
 
@@ -1854,6 +1857,20 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
 
     ExtensionArrays are limited to 1-D.
     """
+
+    def __init__(self, values, placement, ndim=None):
+        values = self._maybe_coerce_values(values)
+        super().__init__(values, placement, ndim)
+
+    def _maybe_coerce_values(self, values):
+        # Unboxes Series / Index
+        # Doesn't change any underlying dtypes.
+        if isinstance(values, ABCSeries):
+            values = values.values
+        elif isinstance(values, ABCIndexClass):
+            values = values._as_best_array()
+        return values
+
     @property
     def _holder(self):
         # For extension blocks, the holder is values-dependent.
@@ -4101,7 +4118,8 @@ def set(self, item, value, check=False):
         # FIXME: refactor, clearly separate broadcasting & zip-like assignment
         #        can prob also fix the various if tests for sparse/categorical
 
-        value_is_extension_type = is_extension_type(value)
+        value_is_extension_type = (is_extension_type(value) or
+                                   is_extension_array_dtype(value))
 
         # categorical/spares/datetimetz
         if value_is_extension_type:
@@ -4834,13 +4852,10 @@ def form_blocks(arrays, names, axes):
     if len(items_dict['ExtensionBlock']):
 
         external_blocks = []
+
         for i, _, array in items_dict['ExtensionBlock']:
-            if isinstance(array, ABCSeries):
-                array = array.values
-            # Allow our internal arrays to chose their block type.
-            block_type = getattr(array, '_block_type', ExtensionBlock)
             external_blocks.append(
-                make_block(array, klass=block_type,
+                make_block(array, klass=ExtensionBlock,
                            fastpath=True, placement=[i]))
         blocks.extend(external_blocks)
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 78b4c3a70a519..6bd6bfc1c8ae9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -14,6 +14,7 @@
 import numpy.ma as ma
 
 from pandas.core.accessor import CachedAccessor
+from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_bool,
@@ -173,12 +174,15 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                 raise NotImplementedError("initializing a Series from a "
                                           "MultiIndex is not supported")
             elif isinstance(data, Index):
-                # need to copy to avoid aliasing issues
                 if name is None:
                     name = data.name
 
-                data = data._to_embed(keep_tz=True, dtype=dtype)
-                copy = False
+                if dtype is not None:
+                    data = data.astype(dtype)
+
+                # need to copy to avoid aliasing issues
+                data = data._as_best_array().copy()
+
             elif isinstance(data, np.ndarray):
                 pass
             elif isinstance(data, Series):
@@ -234,6 +238,10 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                                        copy=copy)
                 elif copy:
                     data = data.copy()
+            elif isinstance(data, ExtensionArray):
+                if copy:
+                    data = data.copy()
+                data = SingleBlockManager(data, index, fastpath=True)
             else:
                 data = _sanitize_array(data, index, dtype, copy,
                                        raise_cast_failure=True)
@@ -2570,7 +2578,11 @@ def _reindex_indexer(self, new_index, indexer, copy):
             return self
 
         # be subclass-friendly
-        new_values = algorithms.take_1d(self.get_values(), indexer)
+        if isinstance(self.values, ExtensionArray):
+            new_values = self.values.take(indexer)
+        else:
+            new_values = algorithms.take_1d(self.get_values(), indexer)
+
         return self._constructor(new_values, index=new_index)
 
     def _needs_reindex_multi(self, axes, method, level):
@@ -3117,11 +3129,8 @@ def _sanitize_index(data, index, copy=False):
         raise ValueError('Length of values does not match length of ' 'index')
 
     if isinstance(data, ABCIndexClass) and not copy:
-        pass
-    elif isinstance(data, PeriodIndex):
-        data = data.astype(object).values
-    elif isinstance(data, DatetimeIndex):
-        data = data._to_embed(keep_tz=True)
+        data = data._as_best_array()
+
     elif isinstance(data, np.ndarray):
 
         # coerce datetimelike types
@@ -3194,11 +3203,12 @@ def _try_cast(arr, take_fast_path):
             # we will try to copy be-definition here
             subarr = _try_cast(data, True)
 
-    elif isinstance(data, Categorical):
+    elif isinstance(data, ExtensionArray):
         subarr = data
 
         if copy:
             subarr = data.copy()
+        # XXX: This is the only early return. See if it can be avoided.
         return subarr
 
     elif isinstance(data, (list, tuple)) and len(data) > 0:
@@ -3221,6 +3231,7 @@ def _try_cast(arr, take_fast_path):
         start, stop, step = get_range_parameters(data)
         arr = np.arange(start, stop, step, dtype='int64')
         subarr = _try_cast(arr, False)
+
     else:
         subarr = _try_cast(data, False)
 
diff --git a/pandas/tests/extension_arrays/__init__.py b/pandas/tests/extension_arrays/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
new file mode 100644
index 0000000000000..7e91ea661721e
--- /dev/null
+++ b/pandas/tests/extension_arrays/base.py
@@ -0,0 +1,312 @@
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.compat import StringIO
+from pandas.core.internals import ExtensionBlock
+from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.dtypes import ExtensionDtype
+
+
+class BaseDtypeTests(object):
+    """Base class for ExtensionDtype classes"""
+
+    @pytest.fixture
+    def dtype(self):
+        """A fixture providing the ExtensionDtype to validate."""
+        raise NotImplementedError
+
+    def test_name(self, dtype):
+        assert isinstance(dtype.name, str)
+
+    def test_kind(self, dtype):
+        valid = set('biufcmMOSUV')
+        if dtype.kind is not None:
+            assert dtype.kind in valid
+
+    def test_construct_from_string_own_name(self, dtype):
+        result = dtype.construct_from_string(dtype.name)
+        assert type(result) is type(dtype)
+
+        # check OK as classmethod
+        result = type(dtype).construct_from_string(dtype.name)
+        assert type(result) is type(dtype)
+
+    def test_is_dtype_from_name(self, dtype):
+        result = type(dtype).is_dtype(dtype.name)
+        assert result is True
+
+    def test_is_dtype_from_self(self, dtype):
+        result = type(dtype).is_dtype(dtype)
+        assert result is True
+
+
+class BaseArrayTests(object):
+    """Base class for extension array classes.
+
+    Subclasses should implement the following fixtures
+
+    * test_data
+    * test_data_missing
+    """
+
+    @pytest.fixture
+    def test_data(self):
+        """Length-100 array for this type."""
+        raise NotImplementedError
+
+    @pytest.fixture
+    def test_data_missing(self):
+        """Length-2 array with [NA, Valid]"""
+        raise NotImplementedError
+
+    @pytest.fixture
+    def na_cmp(self):
+        """Binary operator for comparing NA values.
+
+        Should return a function of two arguments that returns
+        True if both arguments are (scalar) NA for your type.
+
+        By defult, uses ``operator.or``
+        """
+        return operator.is_
+
+    def test_len(self, test_data):
+        assert len(test_data) == 100
+
+    def test_ndim(self, test_data):
+        assert test_data.ndim == 1
+
+    def test_can_hold_na_valid(self, test_data):
+        assert test_data._can_hold_na() in {True, False}
+
+    def test_series_constructor(self, test_data):
+        result = pd.Series(test_data)
+        assert result.dtype == test_data.dtype
+        assert len(result) == len(test_data)
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+
+    @pytest.mark.parametrize("from_series", [True, False])
+    def test_dataframe_constructor(self, test_data, from_series):
+        if from_series:
+            test_data = pd.Series(test_data)
+        result = pd.DataFrame({"A": test_data})
+        assert result.dtypes['A'] == test_data.dtype
+        assert result.shape == (len(test_data), 1)
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+
+    def test_concat(self, test_data):
+        result = pd.concat([
+            pd.Series(test_data),
+            pd.Series(test_data),
+        ], ignore_index=True)
+        assert len(result) == len(test_data) * 2
+
+    def test_iloc(self, test_data):
+        ser = pd.Series(test_data)
+        result = ser.iloc[:4]
+        expected = pd.Series(test_data[:4])
+        tm.assert_series_equal(result, expected)
+
+        result = ser.iloc[[0, 1, 2, 3]]
+        tm.assert_series_equal(result, expected)
+
+    def test_loc(self, test_data):
+        ser = pd.Series(test_data)
+        result = ser.loc[:3]
+        expected = pd.Series(test_data[:4])
+        tm.assert_series_equal(result, expected)
+
+        result = ser.loc[[0, 1, 2, 3]]
+        tm.assert_series_equal(result, expected)
+
+    def test_repr(self, test_data):
+        ser = pd.Series(test_data)
+        assert test_data.dtype.name in repr(ser)
+
+        df = pd.DataFrame({"A": test_data})
+        repr(df)
+
+    def test_dtype_name_in_info(self, test_data):
+        buf = StringIO()
+        pd.DataFrame({"A": test_data}).info(buf=buf)
+        result = buf.getvalue()
+        assert test_data.dtype.name in result
+
+    def test_memory_usage(self, test_data):
+        s = pd.Series(test_data)
+        result = s.memory_usage(index=False)
+        assert result == s.nbytes
+
+    def test_is_extension_array_dtype(self, test_data):
+        assert is_extension_array_dtype(test_data)
+        assert is_extension_array_dtype(test_data.dtype)
+        assert is_extension_array_dtype(pd.Series(test_data))
+        assert isinstance(test_data.dtype, ExtensionDtype)
+
+    def test_array_interface(self, test_data):
+        result = np.array(test_data)
+        assert result[0] == test_data[0]
+
+    def test_getitem_scalar(self, test_data):
+        result = test_data[0]
+        assert isinstance(result, test_data.dtype.type)
+
+        result = pd.Series(test_data)[0]
+        assert isinstance(result, test_data.dtype.type)
+
+    def test_getitem_scalar_na(self, test_data_missing, na_cmp):
+        result = test_data_missing[0]
+        assert na_cmp(result, test_data_missing._fill_value)
+
+    def test_getitem_mask(self, test_data):
+        # Empty mask, raw array
+        mask = np.zeros(len(test_data), dtype=bool)
+        result = test_data[mask]
+        assert len(result) == 0
+        assert isinstance(result, type(test_data))
+
+        # Empty mask, in series
+        mask = np.zeros(len(test_data), dtype=bool)
+        result = pd.Series(test_data)[mask]
+        assert len(result) == 0
+        assert result.dtype == test_data.dtype
+
+        # non-empty mask, raw array
+        mask[0] = True
+        result = test_data[mask]
+        assert len(result) == 1
+        assert isinstance(result, type(test_data))
+
+        # non-empty mask, in series
+        result = pd.Series(test_data)[mask]
+        assert len(result) == 1
+        assert result.dtype == test_data.dtype
+
+    def test_getitem_slice(self, test_data):
+        # getitem[slice] should return an array
+        result = test_data[slice(0)]  # empty
+        assert isinstance(result, type(test_data))
+
+        result = test_data[slice(1)]  # scalar
+        assert isinstance(result, type(test_data))
+
+    def test_take_sequence(self, test_data):
+        result = pd.Series(test_data[[0, 1, 3]])
+        assert result.iloc[0] == test_data[0]
+        assert result.iloc[1] == test_data[1]
+        assert result.iloc[2] == test_data[3]
+
+    def test_isna(self, test_data_missing):
+        if test_data_missing._can_hold_na:
+            expected = np.array([True, False])
+        else:
+            expected = np.array([False, False])
+
+        result = pd.isna(test_data_missing)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = pd.Series(test_data_missing).isna()
+        expected = pd.Series(expected)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("method", [
+        "mean", "sum", "prod", "mad", "sem", "var", "std",
+        "skew", "kurt", "median"
+    ])
+    def test_nuisance_dropped(self, test_data, method):
+        test_data = test_data[:5]
+        func = operator.methodcaller(method)
+        df = pd.DataFrame({"A": np.arange(len(test_data)),
+                           "B": test_data})
+        assert len(func(df)) == 1
+
+    @pytest.mark.parametrize("method", [min, max])
+    def test_reduction_orderable(self, test_data, method):
+        test_data = test_data[:5]
+        func = operator.methodcaller(method.__name__)
+        df = pd.DataFrame({"A": np.arange(len(test_data)),
+                           "B": test_data})
+        result = func(df)
+        assert len(result) == 2
+
+        expected = method(test_data)
+        assert result['B'] == expected
+
+    @pytest.mark.parametrize("method", ['cummax', 'cummin'])
+    @pytest.mark.xfail(reason="Assumes comparable to floating.")
+    def test_cumulative_orderable(self, test_data, method):
+        # Upcast to object
+        # https://github.com/pandas-dev/pandas/issues/19296
+        # assert result.dtypes['B'] == test_data.dtype
+        test_data = test_data[:5]
+        func = operator.methodcaller(method)
+        df = pd.DataFrame({"A": np.arange(len(test_data)),
+                           "B": test_data})
+        result = func(df)
+        assert result.shape == df.shape
+
+    @pytest.mark.parametrize("binop", [
+        operator.add,
+        operator.sub,
+        operator.lt,
+        operator.le,
+        operator.ge,
+        operator.gt,
+        operator.pow,
+    ], ids=lambda x: x.__name__)
+    def test_binops(self, test_data, binop):
+        # Assert that binops work between DataFrames / Series with this type
+        # if binops work between arrays of this type. Extra tests will be
+        # needed for, e.g., Array + scalar
+        test_data = test_data[:5]
+        df = pd.DataFrame({
+            "A": np.arange(len(test_data)),
+            "B": test_data
+        })
+
+        try:
+            expected = pd.DataFrame({
+                "A": binop(df['A'], df['A']),
+                "B": binop(df['B'].values, df['B'].values),
+            })
+        except Exception:
+            msg = "{} not supported for {}".format(binop.__name__,
+                                                   test_data.dtype.name)
+            raise pytest.skip(msg)
+
+        result = binop(df, df)
+        tm.assert_frame_equal(result, expected)
+
+        # homogeneous frame
+        result = binop(df[['B']], df[['B']])
+        tm.assert_frame_equal(result, expected[['B']])
+
+        # series
+        result = binop(df['B'], df['B'])
+        tm.assert_series_equal(result, expected['B'])
+
+    def test_as_ndarray(self, test_data):
+        np.array(test_data, dtype=test_data.dtype.kind)
+
+    def test_align(self, test_data):
+        a = test_data[:3]
+        b = test_data[2:5]
+        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+
+        # TODO: assumes that the ctor can take a list of scalars of the type
+        e1 = pd.Series(type(test_data)(list(a) + [test_data._fill_value]))
+        e2 = pd.Series(type(test_data)([test_data._fill_value] + list(b)))
+        tm.assert_series_equal(r1, e1)
+        tm.assert_series_equal(r2, e2)
+
+    @pytest.mark.xfail(reason="GH-19342")
+    def test_series_given_index(self, test_data):
+        result = pd.Series(test_data[:3], index=[0, 1, 2, 3, 4])
+        assert result.dtype == test_data.dtype
+        assert len(result) == 5
+        assert len(result.values) == 5
+        assert pd.isna(result.loc[[3, 4]]).all()
diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py
index 7d01a2a70145d..b47ca3618342d 100644
--- a/pandas/tests/indexes/datetimelike.py
+++ b/pandas/tests/indexes/datetimelike.py
@@ -83,3 +83,14 @@ def test_asobject_deprecated(self):
         with tm.assert_produces_warning(FutureWarning):
             i = d.asobject
         assert isinstance(i, pd.Index)
+
+    def test_as_best_array(self):
+        result = pd.DatetimeIndex(['2017-01-01',
+                                   '2017-01-02'])._as_best_array()
+        expected = np.array(['2017-01-01', '2017-01-02'], dtype='M8[ns]')
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_as_best_array_tz(self):
+        arr = pd.DatetimeIndex(['2017-01-01', '2017-01-02'], tz='US/Central')
+        result = arr._as_best_array()
+        tm.assert_index_equal(arr, result)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 974099f1fbbe9..20b61eaf38e81 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -2282,6 +2282,11 @@ def test_comparison_tzawareness_compat(self, op):
         # TODO: implement _assert_tzawareness_compat for the reverse
         # comparison with the Series on the left-hand side
 
+    def test_as_best_array(self):
+        result = pd.Index([0, 1, 2])._as_best_array()
+        expected = np.array([0, 1, 2])
+        tm.assert_numpy_array_equal(result, expected)
+
 
 class TestIndexUtils(object):
 
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index c2e40c79f8914..54dd0851ac2d5 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -1080,3 +1080,8 @@ def test_take_invalid_kwargs(self):
         msg = "the 'mode' parameter is not supported"
         tm.assert_raises_regex(ValueError, msg, idx.take,
                                indices, mode='clip')
+
+    def test_as_best_array(self):
+        result = pd.CategoricalIndex([0, 1, 2])._as_best_array()
+        expected = pd.Categorical([0, 1, 2])
+        tm.assert_categorical_equal(result, expected)

From 9cd92c73eb35c4ba38866d77cfabdc1a8341e9dd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 07:00:20 -0600
Subject: [PATCH 002/119] COMPAT: py2 Super

---
 pandas/core/internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 364a3b2ae027e..feff9ef24d8bc 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1860,7 +1860,7 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
 
     def __init__(self, values, placement, ndim=None):
         values = self._maybe_coerce_values(values)
-        super().__init__(values, placement, ndim)
+        super(ExtensionBlock, self).__init__(values, placement, ndim)
 
     def _maybe_coerce_values(self, values):
         # Unboxes Series / Index

From 9211bbdbde9537b2dffc51697afd0985f8ba2648 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 07:00:33 -0600
Subject: [PATCH 003/119] BUG: Use original object for extension array

---
 pandas/core/dtypes/missing.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 4ed6ddec00289..10c52c857ad12 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -10,7 +10,7 @@
                      is_datetimelike_v_numeric, is_float_dtype,
                      is_datetime64_dtype, is_datetime64tz_dtype,
                      is_timedelta64_dtype, is_interval_dtype,
-                     is_complex_dtype, is_categorical_dtype,
+                     is_complex_dtype,
                      is_string_like_dtype, is_bool_dtype,
                      is_integer_dtype, is_dtype_equal,
                      is_extension_array_dtype,
@@ -128,20 +128,15 @@ def _use_inf_as_na(key):
 
 
 def _isna_ndarraylike(obj):
-    from ..arrays import ExtensionArray
-
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if isinstance(values, ExtensionArray):
-        result = values.isna()
+    if is_extension_array_dtype(obj):
+        # work on the original object
+        result = obj.isna()
     elif is_string_dtype(dtype):
-        if is_categorical_dtype(values):
-            from pandas import Categorical
-            if not isinstance(values, Categorical):
-                values = values.values
-            result = values.isna()
-        elif is_interval_dtype(values):
+        if is_interval_dtype(values):
+            # TODO(IntervalArray): remove this if block
             from pandas import IntervalIndex
             result = IntervalIndex(obj).isna()
         else:

From 80f83a6d78652d76955535407cbc410a860e5907 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 13:41:34 -0600
Subject: [PATCH 004/119] Consistent boxing / unboxing

NumPy compat
---
 pandas/core/dtypes/common.py         |  3 ++-
 pandas/core/dtypes/missing.py        |  9 +++++++--
 pandas/tests/indexes/datetimelike.py | 10 ++++++----
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index c66e7fcfc6978..2344091f85a88 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1708,9 +1708,10 @@ def is_extension_array_dtype(arr_or_dtype):
     """
     from pandas.core.arrays import ExtensionArray
 
-    # we want to unpack series, anything else?
     if isinstance(arr_or_dtype, ABCSeries):
         arr_or_dtype = arr_or_dtype._values
+    elif isinstance(arr_or_dtype, ABCIndexClass):
+        arr_or_dtype = arr_or_dtype._as_best_array()
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 
 
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 10c52c857ad12..c7cd97d5ceb87 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -132,8 +132,13 @@ def _isna_ndarraylike(obj):
     dtype = values.dtype
 
     if is_extension_array_dtype(obj):
-        # work on the original object
-        result = obj.isna()
+        if isinstance(obj, ABCIndexClass):
+            values = obj._as_best_array()
+        elif isinstance(obj, ABCSeries):
+            values = obj._values
+        else:
+            values = obj
+        result = values.isna()
     elif is_string_dtype(dtype):
         if is_interval_dtype(values):
             # TODO(IntervalArray): remove this if block
diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py
index b47ca3618342d..64fc1ee8c9680 100644
--- a/pandas/tests/indexes/datetimelike.py
+++ b/pandas/tests/indexes/datetimelike.py
@@ -85,12 +85,14 @@ def test_asobject_deprecated(self):
         assert isinstance(i, pd.Index)
 
     def test_as_best_array(self):
-        result = pd.DatetimeIndex(['2017-01-01',
-                                   '2017-01-02'])._as_best_array()
-        expected = np.array(['2017-01-01', '2017-01-02'], dtype='M8[ns]')
+        result = pd.DatetimeIndex(['2017-01-01T00:00:00',
+                                   '2017-01-02T00:00:00'])._as_best_array()
+        expected = np.array(['2017-01-01T00:00:00',
+                             '2017-01-02T00:00:00'], dtype='M8[ns]')
         tm.assert_numpy_array_equal(result, expected)
 
     def test_as_best_array_tz(self):
-        arr = pd.DatetimeIndex(['2017-01-01', '2017-01-02'], tz='US/Central')
+        arr = pd.DatetimeIndex(['2017-01-01T00:00:00',
+                                '2017-01-02T00:00:00'], tz='US/Central')
         result = arr._as_best_array()
         tm.assert_index_equal(arr, result)

From ca004d8219a43a7da21a44030be03a78e077194b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 16:03:16 -0600
Subject: [PATCH 005/119] 32-bit compat

---
 pandas/tests/indexes/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 20b61eaf38e81..7e90260ae62c7 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -2284,7 +2284,7 @@ def test_comparison_tzawareness_compat(self, op):
 
     def test_as_best_array(self):
         result = pd.Index([0, 1, 2])._as_best_array()
-        expected = np.array([0, 1, 2])
+        expected = np.array([0, 1, 2], dtype=np.int64)
         tm.assert_numpy_array_equal(result, expected)
 
 

From 5d4a68617ebafa1f3fdef8c209cf1d55709b0ab6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 06:56:27 -0600
Subject: [PATCH 006/119] Add a test array

---
 pandas/core/arrays/base.py                 |   8 +-
 pandas/core/dtypes/base.py                 |  12 +-
 pandas/tests/extension_arrays/base.py      | 216 ++++++++++-----------
 pandas/tests/extension_arrays/test_json.py | 154 +++++++++++++++
 4 files changed, 276 insertions(+), 114 deletions(-)
 create mode 100644 pandas/tests/extension_arrays/test_json.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 8d44c5133f740..f51c3bb12fe45 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -20,18 +20,20 @@ class ExtensionArray(object):
 
     * __getitem__
     * __len__
+    * __iter__
     * dtype
     * nbytes
     * isna
     * take
     * copy
-    * _formatting_values
+    * _concat_same_type
 
     Some additional methods are required to satisfy pandas' internal, private
     block API.
 
-    * _concat_same_type
     * _can_hold_na
+    * _formatting_values
+    * _fill_value
 
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
@@ -217,7 +219,7 @@ def _formatting_values(self):
         # type: () -> np.ndarray
         # At the moment, this has to be an array since we use result.dtype
         """An array of values to be printed in, e.g. the Series repr"""
-        raise np.array(self)
+        return np.array(self)
 
     @classmethod
     def _concat_same_type(cls, to_concat):
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index c7c5378801f02..2f071a3b3cf71 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -1,4 +1,6 @@
 """Extend pandas with custom array types"""
+import inspect
+
 from pandas.errors import AbstractMethodError
 
 
@@ -106,7 +108,8 @@ def is_dtype(cls, dtype):
 
         Parameters
         ----------
-        dtype : str or dtype
+        dtype : str, object, or type
+            The dtype to check.
 
         Returns
         -------
@@ -118,12 +121,15 @@ def is_dtype(cls, dtype):
 
         1. ``cls.construct_from_string(dtype)`` is an instance
            of ``cls``.
-        2. 'dtype' is ``cls`` or a subclass of ``cls``.
+        2. ``dtype`` is an object and is an instance of ``cls``
+        3. 'dtype' is a class and is ``cls`` or a subclass of ``cls``.
         """
         if isinstance(dtype, str):
             try:
                 return isinstance(cls.construct_from_string(dtype), cls)
             except TypeError:
                 return False
-        else:
+        elif inspect.isclass(dtype):
             return issubclass(dtype, cls)
+        else:
+            return isinstance(dtype, cls)
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 7e91ea661721e..2a8a3c314e142 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -49,17 +49,17 @@ class BaseArrayTests(object):
 
     Subclasses should implement the following fixtures
 
-    * test_data
-    * test_data_missing
+    * data
+    * data_missing
     """
 
     @pytest.fixture
-    def test_data(self):
+    def data(self):
         """Length-100 array for this type."""
         raise NotImplementedError
 
     @pytest.fixture
-    def test_data_missing(self):
+    def data_missing(self):
         """Length-2 array with [NA, Valid]"""
         raise NotImplementedError
 
@@ -74,142 +74,142 @@ def na_cmp(self):
         """
         return operator.is_
 
-    def test_len(self, test_data):
-        assert len(test_data) == 100
+    def test_len(self, data):
+        assert len(data) == 100
 
-    def test_ndim(self, test_data):
-        assert test_data.ndim == 1
+    def test_ndim(self, data):
+        assert data.ndim == 1
 
-    def test_can_hold_na_valid(self, test_data):
-        assert test_data._can_hold_na() in {True, False}
+    def test_can_hold_na_valid(self, data):
+        assert data._can_hold_na() in {True, False}
 
-    def test_series_constructor(self, test_data):
-        result = pd.Series(test_data)
-        assert result.dtype == test_data.dtype
-        assert len(result) == len(test_data)
+    def test_series_constructor(self, data):
+        result = pd.Series(data)
+        assert result.dtype == data.dtype
+        assert len(result) == len(data)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
     @pytest.mark.parametrize("from_series", [True, False])
-    def test_dataframe_constructor(self, test_data, from_series):
+    def dataframe_constructor(self, data, from_series):
         if from_series:
-            test_data = pd.Series(test_data)
-        result = pd.DataFrame({"A": test_data})
-        assert result.dtypes['A'] == test_data.dtype
-        assert result.shape == (len(test_data), 1)
+            data = pd.Series(data)
+        result = pd.DataFrame({"A": data})
+        assert result.dtypes['A'] == data.dtype
+        assert result.shape == (len(data), 1)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
-    def test_concat(self, test_data):
+    def test_concat(self, data):
         result = pd.concat([
-            pd.Series(test_data),
-            pd.Series(test_data),
+            pd.Series(data),
+            pd.Series(data),
         ], ignore_index=True)
-        assert len(result) == len(test_data) * 2
+        assert len(result) == len(data) * 2
 
-    def test_iloc(self, test_data):
-        ser = pd.Series(test_data)
+    def test_iloc(self, data):
+        ser = pd.Series(data)
         result = ser.iloc[:4]
-        expected = pd.Series(test_data[:4])
+        expected = pd.Series(data[:4])
         tm.assert_series_equal(result, expected)
 
         result = ser.iloc[[0, 1, 2, 3]]
         tm.assert_series_equal(result, expected)
 
-    def test_loc(self, test_data):
-        ser = pd.Series(test_data)
+    def test_loc(self, data):
+        ser = pd.Series(data)
         result = ser.loc[:3]
-        expected = pd.Series(test_data[:4])
+        expected = pd.Series(data[:4])
         tm.assert_series_equal(result, expected)
 
         result = ser.loc[[0, 1, 2, 3]]
         tm.assert_series_equal(result, expected)
 
-    def test_repr(self, test_data):
-        ser = pd.Series(test_data)
-        assert test_data.dtype.name in repr(ser)
+    def test_repr(self, data):
+        ser = pd.Series(data)
+        assert data.dtype.name in repr(ser)
 
-        df = pd.DataFrame({"A": test_data})
+        df = pd.DataFrame({"A": data})
         repr(df)
 
-    def test_dtype_name_in_info(self, test_data):
+    def test_dtype_name_in_info(self, data):
         buf = StringIO()
-        pd.DataFrame({"A": test_data}).info(buf=buf)
+        pd.DataFrame({"A": data}).info(buf=buf)
         result = buf.getvalue()
-        assert test_data.dtype.name in result
+        assert data.dtype.name in result
 
-    def test_memory_usage(self, test_data):
-        s = pd.Series(test_data)
+    def test_memory_usage(self, data):
+        s = pd.Series(data)
         result = s.memory_usage(index=False)
         assert result == s.nbytes
 
-    def test_is_extension_array_dtype(self, test_data):
-        assert is_extension_array_dtype(test_data)
-        assert is_extension_array_dtype(test_data.dtype)
-        assert is_extension_array_dtype(pd.Series(test_data))
-        assert isinstance(test_data.dtype, ExtensionDtype)
+    def test_is_extension_array_dtype(self, data):
+        assert is_extension_array_dtype(data)
+        assert is_extension_array_dtype(data.dtype)
+        assert is_extension_array_dtype(pd.Series(data))
+        assert isinstance(data.dtype, ExtensionDtype)
 
-    def test_array_interface(self, test_data):
-        result = np.array(test_data)
-        assert result[0] == test_data[0]
+    def test_array_interface(self, data):
+        result = np.array(data)
+        assert result[0] == data[0]
 
-    def test_getitem_scalar(self, test_data):
-        result = test_data[0]
-        assert isinstance(result, test_data.dtype.type)
+    def test_getitem_scalar(self, data):
+        result = data[0]
+        assert isinstance(result, data.dtype.type)
 
-        result = pd.Series(test_data)[0]
-        assert isinstance(result, test_data.dtype.type)
+        result = pd.Series(data)[0]
+        assert isinstance(result, data.dtype.type)
 
-    def test_getitem_scalar_na(self, test_data_missing, na_cmp):
-        result = test_data_missing[0]
-        assert na_cmp(result, test_data_missing._fill_value)
+    def test_getitem_scalar_na(self, data_missing, na_cmp):
+        result = data_missing[0]
+        assert na_cmp(result, data_missing._fill_value)
 
-    def test_getitem_mask(self, test_data):
+    def test_getitem_mask(self, data):
         # Empty mask, raw array
-        mask = np.zeros(len(test_data), dtype=bool)
-        result = test_data[mask]
+        mask = np.zeros(len(data), dtype=bool)
+        result = data[mask]
         assert len(result) == 0
-        assert isinstance(result, type(test_data))
+        assert isinstance(result, type(data))
 
         # Empty mask, in series
-        mask = np.zeros(len(test_data), dtype=bool)
-        result = pd.Series(test_data)[mask]
+        mask = np.zeros(len(data), dtype=bool)
+        result = pd.Series(data)[mask]
         assert len(result) == 0
-        assert result.dtype == test_data.dtype
+        assert result.dtype == data.dtype
 
         # non-empty mask, raw array
         mask[0] = True
-        result = test_data[mask]
+        result = data[mask]
         assert len(result) == 1
-        assert isinstance(result, type(test_data))
+        assert isinstance(result, type(data))
 
         # non-empty mask, in series
-        result = pd.Series(test_data)[mask]
+        result = pd.Series(data)[mask]
         assert len(result) == 1
-        assert result.dtype == test_data.dtype
+        assert result.dtype == data.dtype
 
-    def test_getitem_slice(self, test_data):
+    def test_getitem_slice(self, data):
         # getitem[slice] should return an array
-        result = test_data[slice(0)]  # empty
-        assert isinstance(result, type(test_data))
+        result = data[slice(0)]  # empty
+        assert isinstance(result, type(data))
 
-        result = test_data[slice(1)]  # scalar
-        assert isinstance(result, type(test_data))
+        result = data[slice(1)]  # scalar
+        assert isinstance(result, type(data))
 
-    def test_take_sequence(self, test_data):
-        result = pd.Series(test_data[[0, 1, 3]])
-        assert result.iloc[0] == test_data[0]
-        assert result.iloc[1] == test_data[1]
-        assert result.iloc[2] == test_data[3]
+    def test_take_sequence(self, data):
+        result = pd.Series(data[[0, 1, 3]])
+        assert result.iloc[0] == data[0]
+        assert result.iloc[1] == data[1]
+        assert result.iloc[2] == data[3]
 
-    def test_isna(self, test_data_missing):
-        if test_data_missing._can_hold_na:
+    def test_isna(self, data_missing):
+        if data_missing._can_hold_na:
             expected = np.array([True, False])
         else:
             expected = np.array([False, False])
 
-        result = pd.isna(test_data_missing)
+        result = pd.isna(data_missing)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = pd.Series(test_data_missing).isna()
+        result = pd.Series(data_missing).isna()
         expected = pd.Series(expected)
         tm.assert_series_equal(result, expected)
 
@@ -217,35 +217,35 @@ def test_isna(self, test_data_missing):
         "mean", "sum", "prod", "mad", "sem", "var", "std",
         "skew", "kurt", "median"
     ])
-    def test_nuisance_dropped(self, test_data, method):
-        test_data = test_data[:5]
+    def test_nuisance_dropped(self, data, method):
+        data = data[:5]
         func = operator.methodcaller(method)
-        df = pd.DataFrame({"A": np.arange(len(test_data)),
-                           "B": test_data})
+        df = pd.DataFrame({"A": np.arange(len(data)),
+                           "B": data})
         assert len(func(df)) == 1
 
     @pytest.mark.parametrize("method", [min, max])
-    def test_reduction_orderable(self, test_data, method):
-        test_data = test_data[:5]
+    def test_reduction_orderable(self, data, method):
+        data = data[:5]
         func = operator.methodcaller(method.__name__)
-        df = pd.DataFrame({"A": np.arange(len(test_data)),
-                           "B": test_data})
+        df = pd.DataFrame({"A": np.arange(len(data)),
+                           "B": data})
         result = func(df)
         assert len(result) == 2
 
-        expected = method(test_data)
+        expected = method(data)
         assert result['B'] == expected
 
     @pytest.mark.parametrize("method", ['cummax', 'cummin'])
     @pytest.mark.xfail(reason="Assumes comparable to floating.")
-    def test_cumulative_orderable(self, test_data, method):
+    def test_cumulative_orderable(self, data, method):
         # Upcast to object
         # https://github.com/pandas-dev/pandas/issues/19296
-        # assert result.dtypes['B'] == test_data.dtype
-        test_data = test_data[:5]
+        # assert result.dtypes['B'] == data.dtype
+        data = data[:5]
         func = operator.methodcaller(method)
-        df = pd.DataFrame({"A": np.arange(len(test_data)),
-                           "B": test_data})
+        df = pd.DataFrame({"A": np.arange(len(data)),
+                           "B": data})
         result = func(df)
         assert result.shape == df.shape
 
@@ -258,14 +258,14 @@ def test_cumulative_orderable(self, test_data, method):
         operator.gt,
         operator.pow,
     ], ids=lambda x: x.__name__)
-    def test_binops(self, test_data, binop):
+    def test_binops(self, data, binop):
         # Assert that binops work between DataFrames / Series with this type
         # if binops work between arrays of this type. Extra tests will be
         # needed for, e.g., Array + scalar
-        test_data = test_data[:5]
+        data = data[:5]
         df = pd.DataFrame({
-            "A": np.arange(len(test_data)),
-            "B": test_data
+            "A": np.arange(len(data)),
+            "B": data
         })
 
         try:
@@ -275,7 +275,7 @@ def test_binops(self, test_data, binop):
             })
         except Exception:
             msg = "{} not supported for {}".format(binop.__name__,
-                                                   test_data.dtype.name)
+                                                   data.dtype.name)
             raise pytest.skip(msg)
 
         result = binop(df, df)
@@ -289,24 +289,24 @@ def test_binops(self, test_data, binop):
         result = binop(df['B'], df['B'])
         tm.assert_series_equal(result, expected['B'])
 
-    def test_as_ndarray(self, test_data):
-        np.array(test_data, dtype=test_data.dtype.kind)
+    def test_as_ndarray(self, data):
+        np.array(data, dtype=data.dtype.kind)
 
-    def test_align(self, test_data):
-        a = test_data[:3]
-        b = test_data[2:5]
+    def test_align(self, data):
+        a = data[:3]
+        b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
         # TODO: assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(type(test_data)(list(a) + [test_data._fill_value]))
-        e2 = pd.Series(type(test_data)([test_data._fill_value] + list(b)))
+        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
+        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
         tm.assert_series_equal(r1, e1)
         tm.assert_series_equal(r2, e2)
 
     @pytest.mark.xfail(reason="GH-19342")
-    def test_series_given_index(self, test_data):
-        result = pd.Series(test_data[:3], index=[0, 1, 2, 3, 4])
-        assert result.dtype == test_data.dtype
+    def test_series_given_index(self, data):
+        result = pd.Series(data[:3], index=[0, 1, 2, 3, 4])
+        assert result.dtype == data.dtype
         assert len(result) == 5
         assert len(result.values) == 5
         assert pd.isna(result.loc[[3, 4]]).all()
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
new file mode 100644
index 0000000000000..7685c7693b122
--- /dev/null
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -0,0 +1,154 @@
+import collections
+import itertools
+import numbers
+import operator
+import random
+import string
+import sys
+
+import numpy as np
+import pytest
+
+
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.arrays import ExtensionArray
+
+from .base import BaseArrayTests, BaseDtypeTests
+
+
+class JSONDtype(ExtensionDtype):
+    type = collections.Mapping
+    name = 'json'
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+
+class JSONArray(ExtensionArray):
+    dtype = JSONDtype()
+
+    def __init__(self, values):
+        for val in values:
+            if not isinstance(val, collections.Mapping):
+                raise TypeError
+        self.data = values
+
+    def __getitem__(self, item):
+        # TDOO: fancy indexing
+        if isinstance(item, numbers.Integral):
+            return self.data[item]
+        elif isinstance(item, np.ndarray) and item.dtype == 'bool':
+            return type(self)([x for x, m in zip(self, item) if m])
+        elif isinstance(item, collections.Sequence):
+            return type(self)([self.data[i] for i in item])
+        else:
+            return type(self)(self.data[item])
+
+    def __len__(self):
+        return len(self.data)
+
+    def __iter__(self):
+        return iter(self.data)
+
+    def __repr__(self):
+        return 'JSONArary({!r})'.format(self.data)
+
+    @property
+    def nbytes(self):
+        return sys.getsizeof(self.data)
+
+    def isna(self):
+        return np.array([x == {} for x in self.data])
+
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        output = [self.data[loc] if loc != -1 else {}
+                  for loc in indexer]
+        return type(self)(output)
+
+    def copy(self, deep=False):
+        return type(self)(self.data.copy(deep=deep))
+
+    @property
+    def _fill_value(self):
+        return {}
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
+        return cls(data)
+
+
+
+def make_data():
+    return [{random.choice(string.ascii_letters): random.randint(0, 100)
+             for _ in range(random.randint(0, 10))}
+             for _ in range(100)]
+
+
+class TestJSONDtype(BaseDtypeTests):
+    @pytest.fixture
+    def dtype(self):
+        return JSONDtype()
+
+
+class TestJSON(BaseArrayTests):
+
+    @pytest.fixture
+    def data(self):
+        """Length-100 PeriodArray for semantics test."""
+        return JSONArray(make_data())
+
+    @pytest.fixture
+    def data_missing(self):
+        """Length 2 array with [NA, Valid]"""
+        return JSONArray([{}, {'a': 10}])
+
+    @pytest.fixture
+    def na_cmp(self):
+        return operator.eq
+
+    @pytest.mark.skip(reason="Unorderable")
+    def test_reduction_orderable(self, data, method):
+        pass
+
+    
+
+# def test_concat_mixed_closed_raises():
+#     one = IntervalArray.from_breaks([0, 1, 2], closed='left')
+#     two = IntervalArray.from_breaks([1, 2, 3], closed='right')
+#
+#     with tm.assert_raises_regex(ValueError, "Intervals must all be closed"):
+#         IntervalArray._concat_same_type([one, two])
+#
+#
+# def test_series_constructor_intervalindex():
+#     result = pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2]))
+#     assert result.dtype == 'interval[int64]'
+#
+#
+# def dataframe_constructor_intervalindex():
+#     result = pd.DataFrame({"A": pd.IntervalIndex.from_breaks([0, 1, 2])})
+#     assert result.dtypes['A'] == 'interval[int64]'
+#
+#
+# def dataframe_set_intervalarray():
+#     df = pd.DataFrame({"A": [1, 2]})
+#     arr = IntervalArray.from_breaks([0, 1, 2])
+#     df['B'] = arr
+#
+#     expected = pd.DataFrame({"A": [1, 2], "B": arr})
+#     tm.assert_frame_equal(df, expected)
+#
+#
+# def dataframe_set_intervalindex():
+#     df = pd.DataFrame({"A": [1, 2]})
+#     arr = pd.IntervalIndex.from_breaks([0, 1, 2])
+#     df['B'] = arr
+#
+#     expected = pd.DataFrame({"A": [1, 2], "B": arr})
+#     tm.assert_frame_equal(df, expected)

From 9f4ad42734f679d5e9a4bf38fa975e1a176dae12 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 07:00:46 -0600
Subject: [PATCH 007/119] linting

---
 pandas/tests/extension_arrays/test_json.py | 41 +---------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index 7685c7693b122..3fd9d310a9c1c 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -83,11 +83,9 @@ def _concat_same_type(cls, to_concat):
         return cls(data)
 
 
-
 def make_data():
     return [{random.choice(string.ascii_letters): random.randint(0, 100)
-             for _ in range(random.randint(0, 10))}
-             for _ in range(100)]
+             for _ in range(random.randint(0, 10))} for _ in range(100)]
 
 
 class TestJSONDtype(BaseDtypeTests):
@@ -115,40 +113,3 @@ def na_cmp(self):
     @pytest.mark.skip(reason="Unorderable")
     def test_reduction_orderable(self, data, method):
         pass
-
-    
-
-# def test_concat_mixed_closed_raises():
-#     one = IntervalArray.from_breaks([0, 1, 2], closed='left')
-#     two = IntervalArray.from_breaks([1, 2, 3], closed='right')
-#
-#     with tm.assert_raises_regex(ValueError, "Intervals must all be closed"):
-#         IntervalArray._concat_same_type([one, two])
-#
-#
-# def test_series_constructor_intervalindex():
-#     result = pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2]))
-#     assert result.dtype == 'interval[int64]'
-#
-#
-# def dataframe_constructor_intervalindex():
-#     result = pd.DataFrame({"A": pd.IntervalIndex.from_breaks([0, 1, 2])})
-#     assert result.dtypes['A'] == 'interval[int64]'
-#
-#
-# def dataframe_set_intervalarray():
-#     df = pd.DataFrame({"A": [1, 2]})
-#     arr = IntervalArray.from_breaks([0, 1, 2])
-#     df['B'] = arr
-#
-#     expected = pd.DataFrame({"A": [1, 2], "B": arr})
-#     tm.assert_frame_equal(df, expected)
-#
-#
-# def dataframe_set_intervalindex():
-#     df = pd.DataFrame({"A": [1, 2]})
-#     arr = pd.IntervalIndex.from_breaks([0, 1, 2])
-#     df['B'] = arr
-#
-#     expected = pd.DataFrame({"A": [1, 2], "B": arr})
-#     tm.assert_frame_equal(df, expected)

From b1db4e8d39d1193fe54c5b2435fb156b6d899be1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 08:00:41 -0600
Subject: [PATCH 008/119] Default __iter__

---
 pandas/core/arrays/base.py                 | 11 ++++++++++-
 pandas/tests/extension_arrays/base.py      |  2 +-
 pandas/tests/extension_arrays/test_json.py |  6 ------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index f51c3bb12fe45..08625453450f4 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -20,7 +20,6 @@ class ExtensionArray(object):
 
     * __getitem__
     * __len__
-    * __iter__
     * dtype
     * nbytes
     * isna
@@ -105,6 +104,16 @@ def __len__(self):
         # type: () -> int
         raise AbstractMethodError(self)
 
+    def __iter__(self):
+        """Iterate over elements.
+
+        This needs to be implemented so that pandas recognizes extension arrays
+        as list-like. The default implementation makes successive calls to
+        ``__getitem__``, which may be slower than necessary.
+        """
+        for i in range(len(self)):
+            yield self[i]
+
     # ------------------------------------------------------------------------
     # Required attributes
     # ------------------------------------------------------------------------
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 2a8a3c314e142..a393b51ff2762 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -195,7 +195,7 @@ def test_getitem_slice(self, data):
         assert isinstance(result, type(data))
 
     def test_take_sequence(self, data):
-        result = pd.Series(data[[0, 1, 3]])
+        result = pd.Series(data)[[0, 1, 3]]
         assert result.iloc[0] == data[0]
         assert result.iloc[1] == data[1]
         assert result.iloc[2] == data[3]
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index 3fd9d310a9c1c..9b08e8b97f0ce 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -39,22 +39,16 @@ def __init__(self, values):
         self.data = values
 
     def __getitem__(self, item):
-        # TDOO: fancy indexing
         if isinstance(item, numbers.Integral):
             return self.data[item]
         elif isinstance(item, np.ndarray) and item.dtype == 'bool':
             return type(self)([x for x, m in zip(self, item) if m])
-        elif isinstance(item, collections.Sequence):
-            return type(self)([self.data[i] for i in item])
         else:
             return type(self)(self.data[item])
 
     def __len__(self):
         return len(self.data)
 
-    def __iter__(self):
-        return iter(self.data)
-
     def __repr__(self):
         return 'JSONArary({!r})'.format(self.data)
 

From 00d6bb33174f9c0d2d188e8d787894088bbe4fab Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 10:29:14 -0600
Subject: [PATCH 009/119] Tests for value_counts

---
 pandas/tests/extension_arrays/base.py      | 25 +++++++++++++++++++++-
 pandas/tests/extension_arrays/test_json.py | 10 ++++++---
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index a393b51ff2762..529d63215d7ba 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -63,6 +63,13 @@ def data_missing(self):
         """Length-2 array with [NA, Valid]"""
         raise NotImplementedError
 
+    @pytest.fixture(params=['data', 'data_missing'])
+    def all_data(self, request, data, data_missing):
+        if request.param == 'data':
+            return data
+        elif request.param == 'data_missing':
+            return data_missing
+
     @pytest.fixture
     def na_cmp(self):
         """Binary operator for comparing NA values.
@@ -222,7 +229,10 @@ def test_nuisance_dropped(self, data, method):
         func = operator.methodcaller(method)
         df = pd.DataFrame({"A": np.arange(len(data)),
                            "B": data})
-        assert len(func(df)) == 1
+        obj = pd.DataFrame({"A": np.arange(len(data)),
+                            "B": np.array(data, dtype=object)})
+
+        assert len(func(df)) == len(func(obj))
 
     @pytest.mark.parametrize("method", [min, max])
     def test_reduction_orderable(self, data, method):
@@ -310,3 +320,16 @@ def test_series_given_index(self, data):
         assert len(result) == 5
         assert len(result.values) == 5
         assert pd.isna(result.loc[[3, 4]]).all()
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index 9b08e8b97f0ce..f1dc043af6c96 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -34,7 +34,7 @@ class JSONArray(ExtensionArray):
 
     def __init__(self, values):
         for val in values:
-            if not isinstance(val, collections.Mapping):
+            if not isinstance(val, self.dtype.type):
                 raise TypeError
         self.data = values
 
@@ -57,10 +57,10 @@ def nbytes(self):
         return sys.getsizeof(self.data)
 
     def isna(self):
-        return np.array([x == {} for x in self.data])
+        return np.array([x == self._fill_value for x in self.data])
 
     def take(self, indexer, allow_fill=True, fill_value=None):
-        output = [self.data[loc] if loc != -1 else {}
+        output = [self.data[loc] if loc != -1 else self._fill_value
                   for loc in indexer]
         return type(self)(output)
 
@@ -107,3 +107,7 @@ def na_cmp(self):
     @pytest.mark.skip(reason="Unorderable")
     def test_reduction_orderable(self, data, method):
         pass
+
+    @pytest.mark.skip(reason="Unhashable")
+    def test_value_counts(self, all_data, dropna):
+        pass

From 1608e3d3ace66ba4ef066c241a919131c6b0e416 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 10:31:02 -0600
Subject: [PATCH 010/119] Implement value_counts

---
 pandas/core/algorithms.py  | 3 ++-
 pandas/core/arrays/base.py | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c754c063fce8e..427ec5af270bb 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -15,6 +15,7 @@
     is_unsigned_integer_dtype, is_signed_integer_dtype,
     is_integer_dtype, is_complex_dtype,
     is_object_dtype,
+    is_extension_array_dtype,
     is_categorical_dtype, is_sparse,
     is_period_dtype,
     is_numeric_dtype, is_float_dtype,
@@ -542,7 +543,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
     else:
 
-        if is_categorical_dtype(values) or is_sparse(values):
+        if is_extension_array_dtype(values) or is_sparse(values):
 
             # handle Categorical and sparse,
             result = Series(values).values.value_counts(dropna=dropna)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 08625453450f4..b4b1c18b5adf7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -254,3 +254,11 @@ def _can_hold_na(self):
         Setting this to false will optimize some operations like fillna.
         """
         return True
+
+    def value_counts(self, dropna=True):
+        from pandas import value_counts
+
+        if dropna:
+            self = self[~self.isna()]
+
+        return value_counts(np.array(self))

From 52e21802054d2801ac59003bda87319df04673b2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 10:51:16 -0600
Subject: [PATCH 011/119] Py2 compat

---
 pandas/tests/extension_arrays/test_json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index f1dc043af6c96..3d33953b59056 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -65,7 +65,7 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         return type(self)(output)
 
     def copy(self, deep=False):
-        return type(self)(self.data.copy(deep=deep))
+        return type(self)(self.data[:])
 
     @property
     def _fill_value(self):

From e6d06e2f0eb669e36f5d21645b3fb3e158ba7de7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 15:37:48 -0600
Subject: [PATCH 012/119] Fixed dropna

---
 pandas/core/arrays/base.py            | 1 +
 pandas/core/internals.py              | 5 +++++
 pandas/tests/extension_arrays/base.py | 5 +++++
 3 files changed, 11 insertions(+)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index b4b1c18b5adf7..070f6a34b7987 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -245,6 +245,7 @@ def _concat_same_type(cls, to_concat):
         """
         raise AbstractMethodError(cls)
 
+    @property
     def _can_hold_na(self):
         # type: () -> bool
         """Whether your array can hold missing values. True by default.
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index feff9ef24d8bc..b72048f15575f 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1876,6 +1876,11 @@ def _holder(self):
         # For extension blocks, the holder is values-dependent.
         return type(self.values)
 
+    @property
+    def _can_hold_na(self):
+        # The default ExtensionBlock._can_hold_na is True
+        return self._holder._can_hold_na
+
     @property
     def is_view(self):
         """Extension arrays are never treated as views."""
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 529d63215d7ba..3e5a429386cfa 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -220,6 +220,11 @@ def test_isna(self, data_missing):
         expected = pd.Series(expected)
         tm.assert_series_equal(result, expected)
 
+    def test_dropna(self, data_missing):
+        result = pd.Series(data_missing).dropna()
+        expected = pd.Series(data_missing).iloc[[1]]
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("method", [
         "mean", "sum", "prod", "mad", "sem", "var", "std",
         "skew", "kurt", "median"

From d356f191a569b0b3d8bdb15c63e2eb81889a4ecf Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Feb 2018 20:45:39 -0600
Subject: [PATCH 013/119] Test fixups

---
 pandas/core/internals.py              | 2 ++
 pandas/tests/extension_arrays/base.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index b72048f15575f..8d07686988143 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -5693,6 +5693,8 @@ def is_na(self):
             if not values._null_fill_value and values.sp_index.ngaps > 0:
                 return False
             values_flat = values.ravel(order='K')
+        elif isinstance(self.block, ExtensionBlock):
+            values_flat = values
         else:
             values_flat = values.ravel(order='K')
         total_len = values_flat.shape[0]
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 3e5a429386cfa..19f56b1e672dd 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -88,7 +88,7 @@ def test_ndim(self, data):
         assert data.ndim == 1
 
     def test_can_hold_na_valid(self, data):
-        assert data._can_hold_na() in {True, False}
+        assert data._can_hold_na in {True, False}
 
     def test_series_constructor(self, data):
         result = pd.Series(data)

From a6ae340b409cb018852fe7d1263f1e1d3742d08d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Feb 2018 06:37:34 -0600
Subject: [PATCH 014/119] Started setitem

---
 pandas/core/arrays/base.py                 | 32 +++++++++++++++++++++-
 pandas/core/internals.py                   |  5 ++++
 pandas/tests/extension_arrays/base.py      | 29 ++++++++++++++++++++
 pandas/tests/extension_arrays/test_json.py | 12 ++++++++
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 070f6a34b7987..5aa478a51efdc 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -89,7 +89,37 @@ def __getitem__(self, item):
         raise AbstractMethodError(self)
 
     def __setitem__(self, key, value):
-        # type: (Any, Any) -> None
+        # type: (Union[int, np.ndarray], Any) -> None
+        """Set one or more values inplace.
+
+        Parameters
+        ----------
+        key : int or ndarray
+            When called from, e.g. ``Series.__setitem__``, ``key`` will
+            always be an ndarray of integers.
+        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
+            ExtensionArrays may
+
+        Notes
+        -----
+        This method is not required to satisfy the interface. If an
+        ExtensionArray chooses to implement __setitem__, then some semantics
+        should be observed.
+
+        * Setting multiple values : ExtensionArrays should support setting
+          multiple values at once, ``key`` will be a sequence of integers.
+
+        * Broadcasting : For a sequence ``key`` and a scalar ``value``,
+          each position in ``key`` should be set to ``value``.
+
+        * Coercion : Most users will expect basic coercion to work. For
+          example, a string like ``'2018-01-01'`` is coerced to a datetime
+          when setting on a datetime64ns array. In general, if the
+        ``__init__`` method coerces that value, then so should ``__setitem__``.
+
+        When called from, e.g. ``Series.__setitem__``, ``key`` will always
+        be an ndarray of positions.
+        """
         raise NotImplementedError(_not_implemented_message.format(
             type(self), '__setitem__')
         )
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 8d07686988143..c32a663d51482 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1938,6 +1938,11 @@ def _slice(self, slicer):
 
         return self.values[slicer]
 
+    def setitem(self, indexer, value, mgr=None):
+        print(indexer, value)
+        self.values[indexer] = value
+        return self
+
     def formatting_values(self):
         return self.values._formatting_values()
 
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 19f56b1e672dd..80a807a27f45a 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -207,6 +207,35 @@ def test_take_sequence(self, data):
         assert result.iloc[1] == data[1]
         assert result.iloc[2] == data[3]
 
+    def test_setitem_scalar(self, data):
+        arr = pd.Series(data)
+        arr[0] = data[1]
+        assert arr[0] == data[1]
+
+    def test_setitem_sequence(self, data):
+        arr = pd.Series(data)
+        original = data.copy()
+
+        arr[[0, 1]] = [data[1], data[0]]
+        assert arr[0] == original[1]
+        assert arr[1] == original[0]
+
+    def test_setitem_sequence_broadcasts(self, data):
+        arr = pd.Series(data)
+
+        arr[[0, 1]] = data[2]
+        assert arr[0] == data[2]
+        assert arr[1] == data[2]
+
+    def test_loc_set_scalar(self, data):
+        arr = pd.Series(data)
+        arr.loc[0] = data[1]
+        assert arr[0] == data[1]
+
+        df = pd.DataFrame({"A": data})
+        df.loc[0, 'A'] = data[1]
+        assert df.loc[0, 'A'] == data[1]
+
     def test_isna(self, data_missing):
         if data_missing._can_hold_na:
             expected = np.array([True, False])
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index 3d33953b59056..a3e3c119561cc 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -46,6 +46,18 @@ def __getitem__(self, item):
         else:
             return type(self)(self.data[item])
 
+    def __setitem__(self, key, value):
+        if isinstance(key, numbers.Integral):
+            self.data[key] = value
+        else:
+            if not isinstance(value, collections.Sequence):
+                # broadcast value
+                value = itertools.cycle([value])
+
+            for k, v in zip(key, value):
+                assert isinstance(v, self.dtype.type)
+                self.data[k] = v
+
     def __len__(self):
         return len(self.data)
 

From 41f09d899c4eaa726f0f0f7ffbc55d924a5dcab7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 14:13:57 -0600
Subject: [PATCH 015/119] REF/Clean: Internal / External values

---
 doc/source/internals.rst                      | 15 +++++
 pandas/core/base.py                           | 48 +++++++++++---
 pandas/core/dtypes/concat.py                  | 15 +++--
 pandas/core/indexes/base.py                   | 65 ++++++++++++-------
 pandas/core/indexes/category.py               | 25 +++++--
 pandas/core/indexes/datetimelike.py           |  2 +-
 pandas/core/indexes/datetimes.py              |  9 +++
 pandas/core/indexes/multi.py                  | 38 ++++++-----
 pandas/core/indexes/numeric.py                |  2 +-
 pandas/core/indexes/period.py                 | 42 +++++++-----
 pandas/core/series.py                         |  4 +-
 pandas/io/formats/format.py                   |  2 +-
 pandas/io/pytables.py                         |  2 +-
 pandas/plotting/_converter.py                 |  6 +-
 pandas/tests/indexes/common.py                |  6 +-
 .../tests/indexes/period/test_construction.py |  4 +-
 pandas/tests/indexes/period/test_period.py    |  6 +-
 pandas/tests/indexes/period/test_tools.py     |  2 +-
 pandas/tests/test_base.py                     | 65 ++++++++++++++++++-
 19 files changed, 265 insertions(+), 93 deletions(-)

diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index ee4df879d9478..29aaed318b802 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -89,6 +89,21 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the
 constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
 if you compute the levels and labels yourself, please be careful.
 
+Values
+~~~~~~
+
+Pandas extends NumPy's type system in a few places, so we have multiple notions of "values" floating around.
+For 1-D containers (``Index`` classes and ``Series``) we have the following convention:
+
+* ``cls._ndarray_values`` is *always* and ``ndarray``
+* ``cls._values`` refers is the "best possible" array. This could be an ``ndarray``, ``ExtensionArray``, or
+  in ``Index`` subclass (note: we're in the process of removing the index subclasses here so that it's
+  always an ``ndarray`` or ``ExtensionArray``).
+
+So, for example, ``Series[category]._values`` is a ``Categorical``, while ``Series[category]._ndarray_values`` is
+the underlying ndarray.
+
+
 .. _ref-subclassing-pandas:
 
 Subclassing pandas Data Structures
diff --git a/pandas/core/base.py b/pandas/core/base.py
index d5b204dba063e..52b1f82e8824d 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -7,7 +7,8 @@
 import numpy as np
 
 from pandas.core.dtypes.missing import isna
-from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
+from pandas.core.dtypes.generic import (
+    ABCDataFrame, ABCSeries, ABCIndexClass, ABCDatetimeIndex)
 from pandas.core.dtypes.common import (
     is_object_dtype,
     is_list_like,
@@ -706,7 +707,7 @@ def transpose(self, *args, **kwargs):
     @property
     def shape(self):
         """ return a tuple of the shape of the underlying data """
-        return self._values.shape
+        return self._ndarray_values.shape
 
     @property
     def ndim(self):
@@ -734,22 +735,22 @@ def data(self):
     @property
     def itemsize(self):
         """ return the size of the dtype of the item of the underlying data """
-        return self._values.itemsize
+        return self._ndarray_values.itemsize
 
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self._values.nbytes
+        return self._ndarray_values.nbytes
 
     @property
     def strides(self):
         """ return the strides of the underlying data """
-        return self._values.strides
+        return self._ndarray_values.strides
 
     @property
     def size(self):
         """ return the number of elements in the underlying data """
-        return self._values.size
+        return self._ndarray_values.size
 
     @property
     def flags(self):
@@ -763,9 +764,34 @@ def base(self):
         """
         return self.values.base
 
+    @property
+    def _ndarray_values(self):
+        """The data as an ndarray. See '_values' for more."""
+        # type: () -> np.ndarray
+        return self.values
+
     @property
     def _values(self):
-        """ the internal implementation """
+        # type: () -> Union[ExtensionArray, Index]
+        # TODO: remove index types as they become is extension arrays
+        """ The best array representation.
+
+        This is an ndarray, ExtensionArray, or Index subclass. This differs
+        from '._ndarray_values', which always returns an ndarray. It may differ
+        from the public '.values'
+
+        index             | values          | _values
+        ----------------- | -------------- -| ----------
+        CategoricalIndex  | Categorical     | Categorical
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
+        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
+        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
+
+        See Also
+        --------
+        values
+        _ndarray_values
+        """
         return self.values
 
     @property
@@ -816,7 +842,7 @@ def tolist(self):
         if is_datetimelike(self):
             return [com._maybe_box_datetimelike(x) for x in self._values]
         else:
-            return self._values.tolist()
+            return self._ndarray_values.tolist()
 
     def __iter__(self):
         """
@@ -973,8 +999,12 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     @Appender(_shared_docs['unique'] % _indexops_doc_kwargs)
     def unique(self):
         values = self._values
-
+        if isinstance(values, ABCDatetimeIndex):
+            values = values._ndarray_values
+        # TODO: Make unique part of the ExtensionArray interface.
+        # else, this could be surprising.
         if hasattr(values, 'unique'):
+
             result = values.unique()
         else:
             from pandas.core.algorithms import unique1d
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index ddecbe85087d8..a49a2680e4daa 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -480,7 +480,7 @@ def _concat_datetimetz(to_concat, name=None):
 
 def _concat_index_same_dtype(indexes, klass=None):
     klass = klass if klass is not None else indexes[0].__class__
-    return klass(np.concatenate([x._values for x in indexes]))
+    return klass(np.concatenate([x._ndarray_values for x in indexes]))
 
 
 def _concat_index_asobject(to_concat, name=None):
@@ -498,9 +498,16 @@ def _concat_index_asobject(to_concat, name=None):
     attribs = self._get_attributes_dict()
     attribs['name'] = name
 
-    to_concat = [x._values if isinstance(x, Index) else x
-                 for x in to_concat]
-    return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
+    arrays = []
+    for x in to_concat:
+        if is_categorical_dtype(x):
+            arrays.append(np.asarray(x, dtype=object))
+        elif isinstance(x, Index):
+            arrays.append(x._values)
+        else:
+            arrays.append(x)
+
+    return self._shallow_copy_with_infer(np.concatenate(arrays), **attribs)
 
 
 def _concat_sparse(to_concat, axis=0, typs=None):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 1e1bb0d49b3df..450e0f47ef6ff 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -392,7 +392,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
                 values = np.array(values, copy=False)
                 if is_object_dtype(values):
                     values = cls(values, name=name, dtype=dtype,
-                                 **kwargs)._values
+                                 **kwargs)._ndarray_values
 
         result = object.__new__(cls)
         result._data = values
@@ -644,7 +644,7 @@ def ravel(self, order='C'):
         --------
         numpy.ndarray.ravel
         """
-        return self._values.ravel(order=order)
+        return self._ndarray_values.ravel(order=order)
 
     # construction helpers
     @classmethod
@@ -1577,7 +1577,7 @@ def _constructor(self):
     @cache_readonly
     def _engine(self):
         # property, for now, slow to look up
-        return self._engine_type(lambda: self._values, len(self))
+        return self._engine_type(lambda: self._ndarray_values, len(self))
 
     def _validate_index_level(self, level):
         """
@@ -2208,27 +2208,37 @@ def union(self, other):
             other = other.astype('O')
             return this.union(other)
 
+        if is_categorical_dtype(self):
+            lvals = self.values
+        else:
+            lvals = self._ndarray_values
+
+        if is_categorical_dtype(other):
+            rvals = other.values
+        else:
+            rvals = other._ndarray_values
+
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._outer_indexer(self._values, other._values)[0]
+                result = self._outer_indexer(lvals, rvals)[0]
             except TypeError:
                 # incomparable objects
-                result = list(self._values)
+                result = list(lvals)
 
                 # worth making this faster? a very unusual case
-                value_set = set(self._values)
-                result.extend([x for x in other._values if x not in value_set])
+                value_set = set(lvals)
+                result.extend([x for x in rvals if x not in value_set])
         else:
             indexer = self.get_indexer(other)
             indexer, = (indexer == -1).nonzero()
 
             if len(indexer) > 0:
-                other_diff = algos.take_nd(other._values, indexer,
+                other_diff = algos.take_nd(rvals, indexer,
                                            allow_fill=False)
-                result = _concat._concat_compat((self._values, other_diff))
+                result = _concat._concat_compat((lvals, other_diff))
 
                 try:
-                    self._values[0] < other_diff[0]
+                    lvals[0] < other_diff[0]
                 except TypeError as e:
                     warnings.warn("%s, sort order is undefined for "
                                   "incomparable objects" % e, RuntimeWarning,
@@ -2240,7 +2250,7 @@ def union(self, other):
                         result.sort()
 
             else:
-                result = self._values
+                result = lvals
 
                 try:
                     result = np.sort(result)
@@ -2293,18 +2303,21 @@ def intersection(self, other):
 
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._inner_indexer(self._values, other._values)[0]
+                result = self._inner_indexer(self._ndarray_values,
+                                             other._ndarray_values)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass
 
         try:
-            indexer = Index(other._values).get_indexer(self._values)
+            indexer = Index(other._ndarray_values).get_indexer(
+                self._ndarray_values)
             indexer = indexer.take((indexer != -1).nonzero()[0])
         except Exception:
             # duplicates
             indexer = algos.unique1d(
-                Index(other._values).get_indexer_non_unique(self._values)[0])
+                Index(other._ndarray_values).get_indexer_non_unique(
+                    self._ndarray_values)[0])
             indexer = indexer[indexer != -1]
 
         taken = other.take(indexer)
@@ -2680,7 +2693,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
                 raise ValueError('limit argument only valid if doing pad, '
                                  'backfill or nearest reindexing')
 
-            indexer = self._engine.get_indexer(target._values)
+            indexer = self._engine.get_indexer(target._ndarray_values)
 
         return _ensure_platform_int(indexer)
 
@@ -2696,12 +2709,13 @@ def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
         if self.is_monotonic_increasing and target.is_monotonic_increasing:
             method = (self._engine.get_pad_indexer if method == 'pad' else
                       self._engine.get_backfill_indexer)
-            indexer = method(target._values, limit)
+            indexer = method(target._ndarray_values, limit)
         else:
             indexer = self._get_fill_indexer_searchsorted(target, method,
                                                           limit)
         if tolerance is not None:
-            indexer = self._filter_indexer_tolerance(target._values, indexer,
+            indexer = self._filter_indexer_tolerance(target._ndarray_values,
+                                                     indexer,
                                                      tolerance)
         return indexer
 
@@ -2792,7 +2806,7 @@ def get_indexer_non_unique(self, target):
             self = Index(self.asi8)
             tgt_values = target.asi8
         else:
-            tgt_values = target._values
+            tgt_values = target._ndarray_values
 
         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
         return _ensure_platform_int(indexer), missing
@@ -3227,16 +3241,17 @@ def _join_multi(self, other, how, return_indexers=True):
     def _join_non_unique(self, other, how='left', return_indexers=False):
         from pandas.core.reshape.merge import _get_join_indexers
 
-        left_idx, right_idx = _get_join_indexers([self._values],
-                                                 [other._values], how=how,
+        left_idx, right_idx = _get_join_indexers([self._ndarray_values],
+                                                 [other._ndarray_values],
+                                                 how=how,
                                                  sort=True)
 
         left_idx = _ensure_platform_int(left_idx)
         right_idx = _ensure_platform_int(right_idx)
 
-        join_index = np.asarray(self._values.take(left_idx))
+        join_index = np.asarray(self._ndarray_values.take(left_idx))
         mask = left_idx == -1
-        np.putmask(join_index, mask, other._values.take(right_idx))
+        np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
 
         join_index = self._wrap_joined_index(join_index, other)
 
@@ -3383,8 +3398,8 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
             else:
                 return ret_index
 
-        sv = self._values
-        ov = other._values
+        sv = self._ndarray_values
+        ov = other._ndarray_values
 
         if self.is_unique and other.is_unique:
             # We can perform much better than the general case
@@ -3736,7 +3751,7 @@ def insert(self, loc, item):
             item = self._na_value
 
         _self = np.asarray(self)
-        item = self._coerce_scalar_to_index(item)._values
+        item = self._coerce_scalar_to_index(item)._ndarray_values
         idx = np.concatenate((_self[:loc], item, _self[loc:]))
         return self._shallow_copy_with_infer(idx)
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 2c7be2b21f959..5b01f7d2cbe95 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -227,7 +227,7 @@ def _is_dtype_compat(self, other):
         """
         if is_categorical_dtype(other):
             if isinstance(other, CategoricalIndex):
-                other = other._values
+                other = other.values
             if not other.is_dtype_equal(self):
                 raise TypeError("categories must match existing categories "
                                 "when appending")
@@ -293,6 +293,23 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
+    @property
+    def _values(self):
+        return self._data
+
+    @property
+    def _ndarray_values(self):
+        return self._data.codes
+
+    @property
+    def itemsize(self):
+        return self.values.itemsize
+
+    @property
+    def nbytes(self):
+        """ return the number of bytes in the underlying data """
+        return self.values.nbytes
+
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
@@ -386,8 +403,8 @@ def is_monotonic_decreasing(self):
     def unique(self, level=None):
         if level is not None:
             self._validate_index_level(level)
-        result = base.IndexOpsMixin.unique(self)
-        # CategoricalIndex._shallow_copy uses keeps original categories
+        result = self.values.unique()
+        # CategoricalIndex._shallow_copy keeps original categories
         # and ordered if not otherwise specified
         return self._shallow_copy(result, categories=result.categories,
                                   ordered=result.ordered)
@@ -762,7 +779,7 @@ def _evaluate_compare(self, other):
 
     def _delegate_method(self, name, *args, **kwargs):
         """ method delegation to the ._values """
-        method = getattr(self._values, name)
+        method = getattr(self.values, name)
         if 'inplace' in kwargs:
             raise ValueError("cannot use inplace with CategoricalIndex")
         res = method(*args, **kwargs)
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 8e77c7a7fa48c..94500a58edd4c 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -389,7 +389,7 @@ def sort_values(self, return_indexer=False, ascending=True):
             sorted_index = self.take(_as)
             return sorted_index, _as
         else:
-            sorted_values = np.sort(self._values)
+            sorted_values = np.sort(self._ndarray_values)
             attribs = self._get_attributes_dict()
             freq = attribs['freq']
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index e09fa87477122..c32d7ce930a7c 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -678,6 +678,15 @@ def _assert_tzawareness_compat(self, other):
             raise TypeError('Cannot compare tz-naive and tz-aware '
                             'datetime-like objects')
 
+    @property
+    def _values(self):
+        # tz-naive -> ndarray
+        # tz-aware -> DatetimeIndex
+        if self.tz is not None:
+            return self
+        else:
+            return self.values
+
     @property
     def tzinfo(self):
         """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 510f7245cebd8..1478012aa9dbe 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -799,9 +799,11 @@ def values(self):
             box = hasattr(lev, '_box_values')
             # Try to minimize boxing.
             if box and len(lev) > len(lab):
-                taken = lev._box_values(algos.take_1d(lev._values, lab))
+                taken = lev._box_values(algos.take_1d(lev._values,
+                                                      lab))
             elif box:
-                taken = algos.take_1d(lev._box_values(lev._values), lab,
+                taken = algos.take_1d(lev._box_values(lev._ndarray_values),
+                                      lab,
                                       fill_value=_get_na_value(lev.dtype.type))
             else:
                 taken = algos.take_1d(np.asarray(lev._values), lab)
@@ -1317,7 +1319,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
             arrays = [[]] * len(names)
         elif isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
-                tuples = tuples._values
+                tuples = tuples._ndarray_values
 
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
@@ -2410,7 +2412,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
                 mapper = Series(indexer)
                 indexer = labels.take(_ensure_platform_int(indexer))
                 result = Series(Index(indexer).isin(r).nonzero()[0])
-                m = result.map(mapper)._values
+                m = result.map(mapper)._ndarray_values
 
             else:
                 m = np.zeros(len(labels), dtype=bool)
@@ -2569,7 +2571,7 @@ def _update_indexer(idxr, indexer=indexer):
                 else:
                     from .numeric import Int64Index
                     # no matches we are done
-                    return Int64Index([])._values
+                    return Int64Index([])._ndarray_values
 
             elif com.is_null_slice(k):
                 # empty slice
@@ -2589,8 +2591,8 @@ def _update_indexer(idxr, indexer=indexer):
 
         # empty indexer
         if indexer is None:
-            return Int64Index([])._values
-        return indexer._values
+            return Int64Index([])._ndarray_values
+        return indexer._ndarray_values
 
     def truncate(self, before=None, after=None):
         """
@@ -2639,7 +2641,7 @@ def equals(self, other):
 
         if not isinstance(other, MultiIndex):
             other_vals = com._values_from_object(_ensure_index(other))
-            return array_equivalent(self._values, other_vals)
+            return array_equivalent(self._ndarray_values, other_vals)
 
         if self.nlevels != other.nlevels:
             return False
@@ -2650,13 +2652,15 @@ def equals(self, other):
         for i in range(self.nlevels):
             slabels = self.labels[i]
             slabels = slabels[slabels != -1]
-            svalues = algos.take_nd(np.asarray(self.levels[i]._values),
-                                    slabels, allow_fill=False)
+            svalues = algos.take_nd(
+                np.asarray(self.levels[i]._values),
+                slabels, allow_fill=False)
 
             olabels = other.labels[i]
             olabels = olabels[olabels != -1]
-            ovalues = algos.take_nd(np.asarray(other.levels[i]._values),
-                                    olabels, allow_fill=False)
+            ovalues = algos.take_nd(
+                np.asarray(other.levels[i]._values),
+                olabels, allow_fill=False)
 
             # since we use NaT both datetime64 and timedelta64
             # we can have a situation where a level is typed say
@@ -2704,7 +2708,8 @@ def union(self, other):
         if len(other) == 0 or self.equals(other):
             return self
 
-        uniq_tuples = lib.fast_unique_multiple([self._values, other._values])
+        uniq_tuples = lib.fast_unique_multiple([self._ndarray_values,
+                                                other._ndarray_values])
         return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
                                       names=result_names)
 
@@ -2726,8 +2731,8 @@ def intersection(self, other):
         if self.equals(other):
             return self
 
-        self_tuples = self._values
-        other_tuples = other._values
+        self_tuples = self._ndarray_values
+        other_tuples = other._ndarray_values
         uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
         if len(uniq_tuples) == 0:
             return MultiIndex(levels=[[]] * self.nlevels,
@@ -2756,7 +2761,8 @@ def difference(self, other):
                               labels=[[]] * self.nlevels,
                               names=result_names, verify_integrity=False)
 
-        difference = sorted(set(self._values) - set(other._values))
+        difference = sorted(set(self._ndarray_values) -
+                            set(other._ndarray_values))
 
         if len(difference) == 0:
             return MultiIndex(levels=[[]] * self.nlevels,
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index b02aee0495d8c..a4558116bfa63 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -378,7 +378,7 @@ def equals(self, other):
             if (not is_dtype_equal(self.dtype, other.dtype) or
                     self.shape != other.shape):
                 return False
-            left, right = self._values, other._values
+            left, right = self._ndarray_values, other._ndarray_values
             return ((left == right) | (self._isnan & other._isnan)).all()
         except (TypeError, ValueError):
             return False
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 1f8542ed5ee60..c8b7d6063e378 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -54,7 +54,7 @@
 def _field_accessor(name, alias, docstring=None):
     def f(self):
         base, mult = _gfc(self.freq)
-        result = get_period_field_arr(alias, self._values, base)
+        result = get_period_field_arr(alias, self._ndarray_values, base)
         return Index(result, name=self.name)
     f.__name__ = name
     f.__doc__ = docstring
@@ -82,7 +82,7 @@ def _period_index_cmp(opname, cls, nat_result=False):
 
     def wrapper(self, other):
         if isinstance(other, Period):
-            func = getattr(self._values, opname)
+            func = getattr(self._ndarray_values, opname)
             other_base, _ = _gfc(other.freq)
             if other.freq != self.freq:
                 msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
@@ -94,7 +94,8 @@ def wrapper(self, other):
                 msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
                 raise IncompatibleFrequency(msg)
 
-            result = getattr(self._values, opname)(other._values)
+            op = getattr(self._ndarray_values, opname)
+            result = op(other._ndarray_values)
 
             mask = self._isnan | other._isnan
             if mask.any():
@@ -102,11 +103,11 @@ def wrapper(self, other):
 
             return result
         elif other is tslib.NaT:
-            result = np.empty(len(self._values), dtype=bool)
+            result = np.empty(len(self._ndarray_values), dtype=bool)
             result.fill(nat_result)
         else:
             other = Period(other, freq=self.freq)
-            func = getattr(self._values, opname)
+            func = getattr(self._ndarray_values, opname)
             result = func(other.ordinal)
 
         if self.hasnans:
@@ -275,11 +276,11 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
         if isinstance(data, PeriodIndex):
             if freq is None or freq == data.freq:  # no freq change
                 freq = data.freq
-                data = data._values
+                data = data._ndarray_values
             else:
                 base1, _ = _gfc(data.freq)
                 base2, _ = _gfc(freq)
-                data = period.period_asfreq_arr(data._values,
+                data = period.period_asfreq_arr(data._ndarray_values,
                                                 base1, base2, 1)
             return cls._simple_new(data, name=name, freq=freq)
 
@@ -374,7 +375,7 @@ def _shallow_copy(self, values=None, freq=None, **kwargs):
         if freq is None:
             freq = self.freq
         if values is None:
-            values = self._values
+            values = self._ndarray_values
         return super(PeriodIndex, self)._shallow_copy(values=values,
                                                       freq=freq, **kwargs)
 
@@ -407,7 +408,7 @@ def __contains__(self, key):
 
     @property
     def asi8(self):
-        return self._values.view('i8')
+        return self._ndarray_values.view('i8')
 
     @cache_readonly
     def _int64index(self):
@@ -419,6 +420,12 @@ def values(self):
 
     @property
     def _values(self):
+        # TODO: return PeriodArray
+        return self.values
+
+    @property
+    def _ndarray_values(self):
+        # Ordinals
         return self._data
 
     def __array__(self, dtype=None):
@@ -489,13 +496,15 @@ def asof_locs(self, where, mask):
         if isinstance(where_idx, DatetimeIndex):
             where_idx = PeriodIndex(where_idx.values, freq=self.freq)
 
-        locs = self._values[mask].searchsorted(where_idx._values, side='right')
+        locs = self._ndarray_values[mask].searchsorted(
+            where_idx._ndarray_values, side='right')
 
         locs = np.where(locs > 0, locs - 1, 0)
         result = np.arange(len(self))[mask].take(locs)
 
         first = mask.argmax()
-        result[(locs == 0) & (where_idx._values < self._values[first])] = -1
+        result[(locs == 0) & (where_idx._ndarray_values <
+                              self._ndarray_values[first])] = -1
 
         return result
 
@@ -523,7 +532,8 @@ def searchsorted(self, value, side='left', sorter=None):
         elif isinstance(value, compat.string_types):
             value = Period(value, freq=self.freq).ordinal
 
-        return self._values.searchsorted(value, side=side, sorter=sorter)
+        return self._ndarray_values.searchsorted(value, side=side,
+                                                 sorter=sorter)
 
     @property
     def is_all_dates(self):
@@ -664,7 +674,7 @@ def to_timestamp(self, freq=None, how='start'):
         base, mult = _gfc(freq)
         new_data = self.asfreq(freq, how)
 
-        new_data = period.periodarr_to_dt64arr(new_data._values, base)
+        new_data = period.periodarr_to_dt64arr(new_data._ndarray_values, base)
         return DatetimeIndex(new_data, freq='infer', name=self.name)
 
     def _maybe_convert_timedelta(self, other):
@@ -744,7 +754,7 @@ def shift(self, n):
         -------
         shifted : PeriodIndex
         """
-        values = self._values + n * self.freq.n
+        values = self._ndarray_values + n * self.freq.n
         if self.hasnans:
             values[self._isnan] = tslib.iNaT
         return self._shallow_copy(values=values)
@@ -775,7 +785,7 @@ def get_value(self, series, key):
                 grp = resolution.Resolution.get_freq_group(reso)
                 freqn = resolution.get_freq_group(self.freq)
 
-                vals = self._values
+                vals = self._ndarray_values
 
                 # if our data is higher resolution than requested key, slice
                 if grp < freqn:
@@ -786,7 +796,7 @@ def get_value(self, series, key):
                     if ord2 < vals[0] or ord1 > vals[-1]:
                         raise KeyError(key)
 
-                    pos = np.searchsorted(self._values, [ord1, ord2])
+                    pos = np.searchsorted(self._ndarray_values, [ord1, ord2])
                     key = slice(pos[0], pos[1] + 1)
                     return series[key]
                 elif grp == freqn:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e4b8979d6393a..b0ad76d12f1d9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1303,7 +1303,9 @@ def unique(self):
             # to return an object array of tz-aware Timestamps
 
             # TODO: it must return DatetimeArray with tz in pandas 2.0
-            result = result.astype(object).values
+            # XXX: This surely will have issues around DST boundaries.
+            result = (DatetimeIndex(result, tz='UTC').tz_convert(self.dtype.tz)
+                      .astype(object).values)
 
         return result
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 269c81b380b5e..bbeb9e162452d 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1884,7 +1884,7 @@ def _format(x):
 
         vals = self.values
         if isinstance(vals, Index):
-            vals = vals._values
+            vals = vals._ndarray_values
         elif isinstance(vals, ABCSparseArray):
             vals = vals.values
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 0d833807602e1..2437b7d396e84 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -4430,7 +4430,7 @@ def _convert_index(index, encoding=None, format_type=None):
     elif isinstance(index, (Int64Index, PeriodIndex)):
         atom = _tables().Int64Col()
         # avoid to store ndarray of Period objects
-        return IndexCol(index._values, 'integer', atom,
+        return IndexCol(index._ndarray_values, 'integer', atom,
                         freq=getattr(index, 'freq', None),
                         index_name=index_name)
 
diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py
index 07163615c6ba4..9ca06475290e4 100644
--- a/pandas/plotting/_converter.py
+++ b/pandas/plotting/_converter.py
@@ -249,11 +249,11 @@ def _convert_1d(values, units, axis):
                 is_float(values)):
             return get_datevalue(values, axis.freq)
         if isinstance(values, PeriodIndex):
-            return values.asfreq(axis.freq)._values
+            return values.asfreq(axis.freq)._ndarray_values
         if isinstance(values, Index):
             return values.map(lambda x: get_datevalue(x, axis.freq))
         if is_period_arraylike(values):
-            return PeriodIndex(values, freq=axis.freq)._values
+            return PeriodIndex(values, freq=axis.freq)._ndarray_values
         if isinstance(values, (list, tuple, np.ndarray, Index)):
             return [get_datevalue(x, axis.freq) for x in values]
         return values
@@ -642,7 +642,7 @@ def _daily_finder(vmin, vmax, freq):
     info = np.zeros(span,
                     dtype=[('val', np.int64), ('maj', bool),
                            ('min', bool), ('fmt', '|S20')])
-    info['val'][:] = dates_._values
+    info['val'][:] = dates_._ndarray_values
     info['fmt'][:] = ''
     info['maj'][[0, -1]] = True
     # .. and set some shortcuts
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 8948c5f79900d..2d8d70aa2ac84 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -314,7 +314,8 @@ def test_ensure_copied_data(self):
                 # .values an object array of Period, thus copied
                 result = index_type(ordinal=index.asi8, copy=False,
                                     **init_kwargs)
-                tm.assert_numpy_array_equal(index._values, result._values,
+                tm.assert_numpy_array_equal(index._ndarray_values,
+                                            result._ndarray_values,
                                             check_same='same')
             elif isinstance(index, IntervalIndex):
                 # checked in test_interval.py
@@ -323,7 +324,8 @@ def test_ensure_copied_data(self):
                 result = index_type(index.values, copy=False, **init_kwargs)
                 tm.assert_numpy_array_equal(index.values, result.values,
                                             check_same='same')
-                tm.assert_numpy_array_equal(index._values, result._values,
+                tm.assert_numpy_array_equal(index._ndarray_values,
+                                            result._ndarray_values,
                                             check_same='same')
 
     def test_copy_and_deepcopy(self, indices):
diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py
index 639a9272c3808..eca80d17b1dc3 100644
--- a/pandas/tests/indexes/period/test_construction.py
+++ b/pandas/tests/indexes/period/test_construction.py
@@ -119,8 +119,8 @@ def test_constructor_fromarraylike(self):
         tm.assert_index_equal(PeriodIndex(idx.values), idx)
         tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
 
-        pytest.raises(ValueError, PeriodIndex, idx._values)
-        pytest.raises(ValueError, PeriodIndex, list(idx._values))
+        pytest.raises(ValueError, PeriodIndex, idx._ndarray_values)
+        pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values))
         pytest.raises(TypeError, PeriodIndex,
                       data=Period('2007', freq='A'))
 
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 6fc7fa5486f82..e3b1256fa0584 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -205,7 +205,7 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.get_values(), exp)
         exp = np.array([], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._values, exp)
+        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
 
         idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
 
@@ -213,7 +213,7 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.get_values(), exp)
         exp = np.array([492, -9223372036854775808], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._values, exp)
+        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
 
         idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
 
@@ -222,7 +222,7 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.get_values(), exp)
         exp = np.array([14975, -9223372036854775808], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._values, exp)
+        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
 
     def test_period_index_length(self):
         pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py
index 0e72cadb5d494..f5a62371ae799 100644
--- a/pandas/tests/indexes/period/test_tools.py
+++ b/pandas/tests/indexes/period/test_tools.py
@@ -22,7 +22,7 @@ class TestPeriodRepresentation(object):
     def _check_freq(self, freq, base_date):
         rng = PeriodIndex(start=base_date, periods=10, freq=freq)
         exp = np.arange(10, dtype=np.int64)
-        tm.assert_numpy_array_equal(rng._values, exp)
+
         tm.assert_numpy_array_equal(rng.asi8, exp)
 
     def test_annual(self):
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index df2547fc7b0da..5a67aa3f989ae 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -338,8 +338,9 @@ def test_ops(self):
                 if not isinstance(o, PeriodIndex):
                     expected = getattr(o.values, op)()
                 else:
-                    expected = pd.Period(ordinal=getattr(o._values, op)(),
-                                         freq=o.freq)
+                    expected = pd.Period(
+                        ordinal=getattr(o._ndarray_values, op)(),
+                        freq=o.freq)
                 try:
                     assert result == expected
                 except TypeError:
@@ -450,7 +451,7 @@ def test_value_counts_unique_nunique_null(self):
             for orig in self.objs:
                 o = orig.copy()
                 klass = type(o)
-                values = o._values
+                values = o._ndarray_values
 
                 if not self._allow_na_ops(o):
                     continue
@@ -1175,3 +1176,61 @@ def test_iter_box(self):
             assert isinstance(res, pd.Period)
             assert res.freq == 'M'
             assert res == exp
+
+
+@pytest.mark.parametrize('arr, expected', [
+    (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
+    (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
+     pd.DatetimeIndex(['2017'], tz='US/Eastern')),
+])
+def test_unique_datetime_index(arr, expected):
+    result = arr.unique()
+
+    if isinstance(expected, np.ndarray):
+        tm.assert_numpy_array_equal(result, expected)
+    if isinstance(expected, pd.Series):
+        tm.assert_series_equal(result, expected)
+    if isinstance(expected, pd.DatetimeIndex):
+        tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize('arr, expected', [
+    (pd.Series(pd.DatetimeIndex(['2017', '2017'])),
+     np.array(['2017'], dtype='M8[ns]')),
+    (pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
+     np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
+])
+def test_unique_datetime_series(arr, expected):
+    result = arr.unique()
+
+    if isinstance(expected, np.ndarray):
+        tm.assert_numpy_array_equal(result, expected)
+    if isinstance(expected, pd.Series):
+        tm.assert_series_equal(result, expected)
+    if isinstance(expected, pd.DatetimeIndex):
+        tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize('array, expected_type', [
+    (np.array([0, 1]), np.ndarray),
+    (np.array(['a', 'b']), np.ndarray),
+    (pd.Categorical(['a', 'b']), pd.Categorical),
+    (pd.DatetimeIndex(['2017', '2018']), np.ndarray),
+    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray),
+    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray),
+    (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex),
+])
+def test_values_consistent(array, expected_type):
+    l_values = pd.Series(array)._values
+    r_values = pd.Index(array)._values
+    assert type(l_values) is expected_type
+    assert type(l_values) is type(r_values)
+
+    if isinstance(l_values, np.ndarray):
+        tm.assert_numpy_array_equal(l_values, r_values)
+    elif isinstance(l_values, pd.Index):
+        tm.assert_index_equal(l_values, r_values)
+    elif pd.api.types.is_categorical(l_values):
+        tm.assert_categorical_equal(l_values, r_values)
+    else:
+        raise TypeError("Unexpected type {}".format(type(l_values)))

From 29cfd7c22dd0b5b67c44144f1520f0bce8bf0e74 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Feb 2018 14:34:22 -0600
Subject: [PATCH 016/119] Move to index base

---
 pandas/core/base.py         | 24 ------------------------
 pandas/core/indexes/base.py | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 52b1f82e8824d..ab4c969810c93 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -770,30 +770,6 @@ def _ndarray_values(self):
         # type: () -> np.ndarray
         return self.values
 
-    @property
-    def _values(self):
-        # type: () -> Union[ExtensionArray, Index]
-        # TODO: remove index types as they become is extension arrays
-        """ The best array representation.
-
-        This is an ndarray, ExtensionArray, or Index subclass. This differs
-        from '._ndarray_values', which always returns an ndarray. It may differ
-        from the public '.values'
-
-        index             | values          | _values
-        ----------------- | -------------- -| ----------
-        CategoricalIndex  | Categorical     | Categorical
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
-        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
-        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
-
-        See Also
-        --------
-        values
-        _ndarray_values
-        """
-        return self.values
-
     @property
     def empty(self):
         return not self.size
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 450e0f47ef6ff..d84c4dcb58f83 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -574,6 +574,30 @@ def values(self):
         """ return the underlying data as an ndarray """
         return self._data.view(np.ndarray)
 
+    @property
+    def _values(self):
+        # type: () -> Union[ExtensionArray, Index]
+        # TODO: remove index types as they become is extension arrays
+        """The best array representation.
+
+        This is an ndarray, ExtensionArray, or Index subclass. This differs
+        from '._ndarray_values', which always returns an ndarray. It may differ
+        from the public '.values'
+
+        index             | values          | _values
+        ----------------- | -------------- -| ----------
+        CategoricalIndex  | Categorical     | Categorical
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
+        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
+        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
+
+        See Also
+        --------
+        values
+        _ndarray_values
+        """
+        return self.values
+
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self.values

From 82fd0c6ae185755e2e7f8c06b31155d4c2cefbf7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 06:14:59 -0600
Subject: [PATCH 017/119] Setitem tests, decimal example

---
 pandas/core/internals.py                      |   4 +-
 pandas/tests/extension_arrays/base.py         | 186 ++++++++++++------
 pandas/tests/extension_arrays/test_decimal.py | 147 ++++++++++++++
 pandas/tests/extension_arrays/test_json.py    |  29 +++
 4 files changed, 304 insertions(+), 62 deletions(-)
 create mode 100644 pandas/tests/extension_arrays/test_decimal.py

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index c32a663d51482..ac490f946e5ee 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1939,7 +1939,9 @@ def _slice(self, slicer):
         return self.values[slicer]
 
     def setitem(self, indexer, value, mgr=None):
-        print(indexer, value)
+        print(indexer)
+        if isinstance(indexer, tuple):
+            indexer = indexer[0]
         self.values[indexer] = value
         return self
 
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 80a807a27f45a..bfbd0d96c2615 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -53,6 +53,9 @@ class BaseArrayTests(object):
     * data_missing
     """
 
+    # ------------------------------------------------------------------------
+    # Fixtures
+    # ------------------------------------------------------------------------
     @pytest.fixture
     def data(self):
         """Length-100 array for this type."""
@@ -81,6 +84,10 @@ def na_cmp(self):
         """
         return operator.is_
 
+    # ------------------------------------------------------------------------
+    # Interface
+    # ------------------------------------------------------------------------
+
     def test_len(self, data):
         assert len(data) == 100
 
@@ -90,6 +97,35 @@ def test_ndim(self, data):
     def test_can_hold_na_valid(self, data):
         assert data._can_hold_na in {True, False}
 
+    def test_memory_usage(self, data):
+        s = pd.Series(data)
+        result = s.memory_usage(index=False)
+        assert result == s.nbytes
+
+    def test_array_interface(self, data):
+        result = np.array(data)
+        assert result[0] == data[0]
+
+    def test_as_ndarray_with_dtype_kind(self, data):
+        np.array(data, dtype=data.dtype.kind)
+
+    def test_repr(self, data):
+        ser = pd.Series(data)
+        assert data.dtype.name in repr(ser)
+
+        df = pd.DataFrame({"A": data})
+        repr(df)
+
+    def test_dtype_name_in_info(self, data):
+        buf = StringIO()
+        pd.DataFrame({"A": data}).info(buf=buf)
+        result = buf.getvalue()
+        assert data.dtype.name in result
+
+    # ------------------------------------------------------------------------
+    # Constructors
+    # ------------------------------------------------------------------------
+
     def test_series_constructor(self, data):
         result = pd.Series(data)
         assert result.dtype == data.dtype
@@ -105,6 +141,18 @@ def dataframe_constructor(self, data, from_series):
         assert result.shape == (len(data), 1)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
+    @pytest.mark.xfail(reason="GH-19342")
+    def test_series_given_index(self, data):
+        result = pd.Series(data[:3], index=[0, 1, 2, 3, 4])
+        assert result.dtype == data.dtype
+        assert len(result) == 5
+        assert len(result.values) == 5
+        assert pd.isna(result.loc[[3, 4]]).all()
+
+    # ------------------------------------------------------------------------
+    # Reshaping
+    # ------------------------------------------------------------------------
+
     def test_concat(self, data):
         result = pd.concat([
             pd.Series(data),
@@ -112,6 +160,10 @@ def test_concat(self, data):
         ], ignore_index=True)
         assert len(result) == len(data) * 2
 
+    # ------------------------------------------------------------------------
+    # Indexing - getting
+    # ------------------------------------------------------------------------
+
     def test_iloc(self, data):
         ser = pd.Series(data)
         result = ser.iloc[:4]
@@ -130,34 +182,12 @@ def test_loc(self, data):
         result = ser.loc[[0, 1, 2, 3]]
         tm.assert_series_equal(result, expected)
 
-    def test_repr(self, data):
-        ser = pd.Series(data)
-        assert data.dtype.name in repr(ser)
-
-        df = pd.DataFrame({"A": data})
-        repr(df)
-
-    def test_dtype_name_in_info(self, data):
-        buf = StringIO()
-        pd.DataFrame({"A": data}).info(buf=buf)
-        result = buf.getvalue()
-        assert data.dtype.name in result
-
-    def test_memory_usage(self, data):
-        s = pd.Series(data)
-        result = s.memory_usage(index=False)
-        assert result == s.nbytes
-
     def test_is_extension_array_dtype(self, data):
         assert is_extension_array_dtype(data)
         assert is_extension_array_dtype(data.dtype)
         assert is_extension_array_dtype(pd.Series(data))
         assert isinstance(data.dtype, ExtensionDtype)
 
-    def test_array_interface(self, data):
-        result = np.array(data)
-        assert result[0] == data[0]
-
     def test_getitem_scalar(self, data):
         result = data[0]
         assert isinstance(result, data.dtype.type)
@@ -207,6 +237,10 @@ def test_take_sequence(self, data):
         assert result.iloc[1] == data[1]
         assert result.iloc[2] == data[3]
 
+    # ------------------------------------------------------------------------
+    # Indexing - Setting
+    # ------------------------------------------------------------------------
+
     def test_setitem_scalar(self, data):
         arr = pd.Series(data)
         arr[0] = data[1]
@@ -227,14 +261,51 @@ def test_setitem_sequence_broadcasts(self, data):
         assert arr[0] == data[2]
         assert arr[1] == data[2]
 
-    def test_loc_set_scalar(self, data):
+    @pytest.mark.parametrize('setter', ['loc', 'iloc'])
+    def test_set_scalar(self, data, setter):
         arr = pd.Series(data)
-        arr.loc[0] = data[1]
+        setter = getattr(arr, setter)
+        operator.setitem(setter, 0, data[1])
         assert arr[0] == data[1]
 
+    def test_set_loc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.loc[0, 'B'] = data[1]
+        assert df.loc[0, 'B'] == data[1]
+
+    def test_set_loc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.loc[10, 'B'] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_set_loc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.loc[10, 'B'] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_set_iloc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.iloc[0, 1] = data[1]
+        assert df.loc[0, 'B'] == data[1]
+
+    def test_set_iloc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.iloc[10, 0] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_set_iloc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.iloc[10, 1] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_setitem_expand_columns(self, data):
         df = pd.DataFrame({"A": data})
-        df.loc[0, 'A'] = data[1]
-        assert df.loc[0, 'A'] == data[1]
+        df['B'] = 1
+        assert len(df.columns) == 2
+
+    # ------------------------------------------------------------------------
+    # Methods
+    # ------------------------------------------------------------------------
 
     def test_isna(self, data_missing):
         if data_missing._can_hold_na:
@@ -254,6 +325,34 @@ def test_dropna(self, data_missing):
         expected = pd.Series(data_missing).iloc[[1]]
         tm.assert_series_equal(result, expected)
 
+    def test_align(self, data):
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+
+        # TODO: assumes that the ctor can take a list of scalars of the type
+        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
+        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
+        tm.assert_series_equal(r1, e1)
+        tm.assert_series_equal(r2, e2)
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
+
+    # ------------------------------------------------------------------------
+    # Ops
+    # ------------------------------------------------------------------------
+
     @pytest.mark.parametrize("method", [
         "mean", "sum", "prod", "mad", "sem", "var", "std",
         "skew", "kurt", "median"
@@ -332,38 +431,3 @@ def test_binops(self, data, binop):
         # series
         result = binop(df['B'], df['B'])
         tm.assert_series_equal(result, expected['B'])
-
-    def test_as_ndarray(self, data):
-        np.array(data, dtype=data.dtype.kind)
-
-    def test_align(self, data):
-        a = data[:3]
-        b = data[2:5]
-        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
-
-        # TODO: assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
-        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
-        tm.assert_series_equal(r1, e1)
-        tm.assert_series_equal(r2, e2)
-
-    @pytest.mark.xfail(reason="GH-19342")
-    def test_series_given_index(self, data):
-        result = pd.Series(data[:3], index=[0, 1, 2, 3, 4])
-        assert result.dtype == data.dtype
-        assert len(result) == 5
-        assert len(result.values) == 5
-        assert pd.isna(result.loc[[3, 4]]).all()
-
-    @pytest.mark.parametrize('dropna', [True, False])
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-
-        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension_arrays/test_decimal.py b/pandas/tests/extension_arrays/test_decimal.py
new file mode 100644
index 0000000000000..f6b281516e115
--- /dev/null
+++ b/pandas/tests/extension_arrays/test_decimal.py
@@ -0,0 +1,147 @@
+import decimal
+import numbers
+import random
+import sys
+
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+import pytest
+
+from pandas.core.arrays import ExtensionArray
+from pandas.core.dtypes.base import ExtensionDtype
+
+from .base import BaseDtypeTests, BaseArrayTests
+
+
+class DecimalDtype(ExtensionDtype):
+    type = decimal.Decimal
+    name = 'decimal'
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+
+class DecimalArray(ExtensionArray):
+    dtype = DecimalDtype()
+
+    def __init__(self, values):
+        values = np.asarray(values, dtype=object)
+
+        self.values = values
+
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self.values[item]
+        elif isinstance(item, np.ndarray) and item.dtype == 'bool':
+            return type(self)([x for x, m in zip(self, item) if m])
+        else:
+            return type(self)(self.values[item])
+
+    def copy(self, deep=False):
+        if deep:
+            return type(self)(self.values.copy())
+        return type(self)(self)
+
+    def __setitem__(self, key, value):
+        if pd.api.types.is_list_like(value):
+            value = [decimal.Decimal(v) for v in value]
+        else:
+            value = decimal.Decimal(value)
+        self.values[key] = value
+
+    def __len__(self):
+        return len(self.values)
+
+    def __repr__(self):
+        return repr(self.values)
+
+    @property
+    def nbytes(self):
+        n = len(self)
+        if n:
+            return n * sys.getsizeof(self[0])
+        return 0
+
+    def isna(self):
+        return np.array([x.is_nan() for x in self.values])
+
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        mask = indexer == -1
+
+        out = self.values.take(indexer)
+        out[mask] = self._fill_value
+
+        return type(self)(out)
+
+    @property
+    def _fill_value(self):
+        return decimal.Decimal('NaN')
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([x.values for x in to_concat]))
+
+
+def make_data():
+    return [decimal.Decimal(random.random()) for _ in range(100)]
+
+
+class TestDecimalDtype(BaseDtypeTests):
+
+    @pytest.fixture
+    def dtype(self):
+        return DecimalDtype()
+
+
+class TestDecimalArray(BaseArrayTests):
+
+    @pytest.fixture
+    def data(self):
+        return DecimalArray(make_data())
+
+    @pytest.fixture
+    def data_missing(self):
+        return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
+
+    @pytest.fixture
+    def na_cmp(self):
+        return lambda x, y: x.is_nan() and y.is_nan()
+
+    @pytest.mark.skip(reason="Who knows.")
+    def test_repr(self, data):
+        super().test_repr(data)
+
+    def test_align(self, data):
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+
+        # NaN handling
+        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
+        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
+        tm.assert_series_equal(r1.iloc[:3], e1.iloc[:3])
+        assert r1[3].is_nan()
+        assert e1[3].is_nan()
+
+        tm.assert_series_equal(r2.iloc[1:], e2.iloc[1:])
+        assert r2[0].is_nan()
+        assert e2[0].is_nan()
+
+    @pytest.mark.skip(reason="NaN Sorting")
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index a3e3c119561cc..e48209c143fd0 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -116,6 +116,7 @@ def data_missing(self):
     def na_cmp(self):
         return operator.eq
 
+    # Having trouble setting a sized object like {'a': 1} into a scalar slot
     @pytest.mark.skip(reason="Unorderable")
     def test_reduction_orderable(self, data, method):
         pass
@@ -123,3 +124,31 @@ def test_reduction_orderable(self, data, method):
     @pytest.mark.skip(reason="Unhashable")
     def test_value_counts(self, all_data, dropna):
         pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_scalar(self):
+        pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_loc_scalar_mixed(self):
+        pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_loc_scalar_single(self):
+        pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_loc_scalar_multiple_homogoneous(self):
+        pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_iloc_scalar_mixed(self):
+        pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_iloc_scalar_single(self):
+        pass
+
+    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    def test_set_iloc_scalar_multiple_homogoneous(self):
+        pass

From 8b1e7d61bd8411b641c63b27f8e444b8b49dc51b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 06:30:33 -0600
Subject: [PATCH 018/119] Compat

---
 pandas/core/internals.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index ac490f946e5ee..21457d10303a8 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1879,7 +1879,9 @@ def _holder(self):
     @property
     def _can_hold_na(self):
         # The default ExtensionBlock._can_hold_na is True
-        return self._holder._can_hold_na
+        # Needed getattr to pass our old extension tests
+        # Check if geopandas needs this.
+        return getattr(self._holder, '_can_hold_na', True)
 
     @property
     def is_view(self):

From 10af4b6b934234a23c2ba35fce02e3af23546a2c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 08:09:24 -0600
Subject: [PATCH 019/119] Fixed extension block tests.

The only "API change" was that you can't just inherit from
NonConsolidatableMixin, which is OK since

1. it's a mixin
2. geopandas also inherits from Block
---
 pandas/core/internals.py                      | 4 +---
 pandas/tests/internals/test_external_block.py | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 21457d10303a8..ac490f946e5ee 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1879,9 +1879,7 @@ def _holder(self):
     @property
     def _can_hold_na(self):
         # The default ExtensionBlock._can_hold_na is True
-        # Needed getattr to pass our old extension tests
-        # Check if geopandas needs this.
-        return getattr(self._holder, '_can_hold_na', True)
+        return self._holder._can_hold_na
 
     @property
     def is_view(self):
diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py
index 2487363df8f99..991da41168aa0 100644
--- a/pandas/tests/internals/test_external_block.py
+++ b/pandas/tests/internals/test_external_block.py
@@ -5,12 +5,12 @@
 
 import pandas as pd
 from pandas.core.internals import (
-    BlockManager, SingleBlockManager, ExtensionBlock)
+    BlockManager, SingleBlockManager, NonConsolidatableMixIn, Block)
 
 import pytest
 
 
-class CustomBlock(ExtensionBlock):
+class CustomBlock(NonConsolidatableMixIn, Block):
 
     _holder = np.ndarray
 

From cd5f1eb37d8fc46b959d032becb21789f897bbdd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 08:44:41 -0600
Subject: [PATCH 020/119] Clarify binop tests

Make it clearer which bit might raise
---
 pandas/tests/extension_arrays/base.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index bfbd0d96c2615..269d4d7760930 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -412,14 +412,16 @@ def test_binops(self, data, binop):
         })
 
         try:
-            expected = pd.DataFrame({
-                "A": binop(df['A'], df['A']),
-                "B": binop(df['B'].values, df['B'].values),
-            })
-        except Exception:
+            expected_array = binop(data, data)
+        except TypeError:
             msg = "{} not supported for {}".format(binop.__name__,
                                                    data.dtype.name)
-            raise pytest.skip(msg)
+            pytest.skip(msg)
+
+        expected = pd.DataFrame({
+            "A": binop(df['A'], df['A']),
+            "B": expected_array,
+        })
 
         result = binop(df, df)
         tm.assert_frame_equal(result, expected)

From 0a9d9fd94988245317852c9712cb856b85f85f36 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 09:25:58 -0600
Subject: [PATCH 021/119] TST: Removed ops tests

---
 pandas/core/internals.py                      |  1 -
 pandas/tests/extension_arrays/base.py         | 85 -------------------
 pandas/tests/extension_arrays/test_decimal.py |  4 -
 pandas/tests/extension_arrays/test_json.py    |  5 --
 4 files changed, 95 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index ac490f946e5ee..2f61da7f719c0 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1939,7 +1939,6 @@ def _slice(self, slicer):
         return self.values[slicer]
 
     def setitem(self, indexer, value, mgr=None):
-        print(indexer)
         if isinstance(indexer, tuple):
             indexer = indexer[0]
         self.values[indexer] = value
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 269d4d7760930..b4362b6795737 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -348,88 +348,3 @@ def test_value_counts(self, all_data, dropna):
         expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
 
         tm.assert_series_equal(result, expected)
-
-    # ------------------------------------------------------------------------
-    # Ops
-    # ------------------------------------------------------------------------
-
-    @pytest.mark.parametrize("method", [
-        "mean", "sum", "prod", "mad", "sem", "var", "std",
-        "skew", "kurt", "median"
-    ])
-    def test_nuisance_dropped(self, data, method):
-        data = data[:5]
-        func = operator.methodcaller(method)
-        df = pd.DataFrame({"A": np.arange(len(data)),
-                           "B": data})
-        obj = pd.DataFrame({"A": np.arange(len(data)),
-                            "B": np.array(data, dtype=object)})
-
-        assert len(func(df)) == len(func(obj))
-
-    @pytest.mark.parametrize("method", [min, max])
-    def test_reduction_orderable(self, data, method):
-        data = data[:5]
-        func = operator.methodcaller(method.__name__)
-        df = pd.DataFrame({"A": np.arange(len(data)),
-                           "B": data})
-        result = func(df)
-        assert len(result) == 2
-
-        expected = method(data)
-        assert result['B'] == expected
-
-    @pytest.mark.parametrize("method", ['cummax', 'cummin'])
-    @pytest.mark.xfail(reason="Assumes comparable to floating.")
-    def test_cumulative_orderable(self, data, method):
-        # Upcast to object
-        # https://github.com/pandas-dev/pandas/issues/19296
-        # assert result.dtypes['B'] == data.dtype
-        data = data[:5]
-        func = operator.methodcaller(method)
-        df = pd.DataFrame({"A": np.arange(len(data)),
-                           "B": data})
-        result = func(df)
-        assert result.shape == df.shape
-
-    @pytest.mark.parametrize("binop", [
-        operator.add,
-        operator.sub,
-        operator.lt,
-        operator.le,
-        operator.ge,
-        operator.gt,
-        operator.pow,
-    ], ids=lambda x: x.__name__)
-    def test_binops(self, data, binop):
-        # Assert that binops work between DataFrames / Series with this type
-        # if binops work between arrays of this type. Extra tests will be
-        # needed for, e.g., Array + scalar
-        data = data[:5]
-        df = pd.DataFrame({
-            "A": np.arange(len(data)),
-            "B": data
-        })
-
-        try:
-            expected_array = binop(data, data)
-        except TypeError:
-            msg = "{} not supported for {}".format(binop.__name__,
-                                                   data.dtype.name)
-            pytest.skip(msg)
-
-        expected = pd.DataFrame({
-            "A": binop(df['A'], df['A']),
-            "B": expected_array,
-        })
-
-        result = binop(df, df)
-        tm.assert_frame_equal(result, expected)
-
-        # homogeneous frame
-        result = binop(df[['B']], df[['B']])
-        tm.assert_frame_equal(result, expected[['B']])
-
-        # series
-        result = binop(df['B'], df['B'])
-        tm.assert_series_equal(result, expected['B'])
diff --git a/pandas/tests/extension_arrays/test_decimal.py b/pandas/tests/extension_arrays/test_decimal.py
index f6b281516e115..687e645825a75 100644
--- a/pandas/tests/extension_arrays/test_decimal.py
+++ b/pandas/tests/extension_arrays/test_decimal.py
@@ -113,10 +113,6 @@ def data_missing(self):
     def na_cmp(self):
         return lambda x, y: x.is_nan() and y.is_nan()
 
-    @pytest.mark.skip(reason="Who knows.")
-    def test_repr(self, data):
-        super().test_repr(data)
-
     def test_align(self, data):
         a = data[:3]
         b = data[2:5]
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index e48209c143fd0..343168a63da60 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -116,11 +116,6 @@ def data_missing(self):
     def na_cmp(self):
         return operator.eq
 
-    # Having trouble setting a sized object like {'a': 1} into a scalar slot
-    @pytest.mark.skip(reason="Unorderable")
-    def test_reduction_orderable(self, data, method):
-        pass
-
     @pytest.mark.skip(reason="Unhashable")
     def test_value_counts(self, all_data, dropna):
         pass

From 3185f4e08fdde6736a02edb52da2647cae8d599c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 13:17:40 -0600
Subject: [PATCH 022/119] Cleanup unique handling

---
 pandas/core/base.py              |  4 +---
 pandas/core/indexes/datetimes.py | 12 ++++++++++++
 pandas/core/series.py            |  4 +---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index ab4c969810c93..7a8b5f9b608c7 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -975,10 +975,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     @Appender(_shared_docs['unique'] % _indexops_doc_kwargs)
     def unique(self):
         values = self._values
-        if isinstance(values, ABCDatetimeIndex):
-            values = values._ndarray_values
+
         # TODO: Make unique part of the ExtensionArray interface.
-        # else, this could be surprising.
         if hasattr(values, 'unique'):
 
             result = values.unique()
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index c32d7ce930a7c..d749f8aec50cd 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1095,6 +1095,18 @@ def snap(self, freq='S'):
         # we know it conforms; skip check
         return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
 
+    def unique(self, level=None):
+        # Override here since IndexOpsMixin.unique uses self._values.unique
+        # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error
+        # So we extract the tz-naive DatetimeIndex, unique that, and wrap the
+        # result with out TZ.
+        if self.tz is not None:
+            naive = type(self)(self._ndarray_values, copy=False)
+        else:
+            naive = self
+        result = super(DatetimeIndex, naive).unique(level=level)
+        return self._simple_new(result, name=self.name, tz=self.tz, freq=self.freq)
+
     def union(self, other):
         """
         Specialized union for DatetimeIndex objects. If combine
diff --git a/pandas/core/series.py b/pandas/core/series.py
index b0ad76d12f1d9..e4b8979d6393a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1303,9 +1303,7 @@ def unique(self):
             # to return an object array of tz-aware Timestamps
 
             # TODO: it must return DatetimeArray with tz in pandas 2.0
-            # XXX: This surely will have issues around DST boundaries.
-            result = (DatetimeIndex(result, tz='UTC').tz_convert(self.dtype.tz)
-                      .astype(object).values)
+            result = result.astype(object).values
 
         return result
 

From 476f75d3b8cf07fb9965a1fa96dcdf932a01bde8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 14:29:02 -0600
Subject: [PATCH 023/119] Simplify object concat

---
 pandas/core/dtypes/concat.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index a49a2680e4daa..d6b55d03ebccd 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -493,20 +493,11 @@ def _concat_index_asobject(to_concat, name=None):
     to_concat = [x.astype(object) if isinstance(x, klasses) else x
                  for x in to_concat]
 
-    from pandas import Index
     self = to_concat[0]
     attribs = self._get_attributes_dict()
     attribs['name'] = name
 
-    arrays = []
-    for x in to_concat:
-        if is_categorical_dtype(x):
-            arrays.append(np.asarray(x, dtype=object))
-        elif isinstance(x, Index):
-            arrays.append(x._values)
-        else:
-            arrays.append(x)
-
+    arrays = [np.array(x, copy=False, dtype=object) for x in to_concat]
     return self._shallow_copy_with_infer(np.concatenate(arrays), **attribs)
 
 

From b15ee5a000003e42bf65389308c7277b6461fd05 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 14:38:58 -0600
Subject: [PATCH 024/119] Use values for intersection

I think eventually we'll want to ndarray_values for this, but it'll
require a bit more work to support. Currently, using ndarary_values
causes occasional failures on categorical.
---
 pandas/core/indexes/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index dd4c8ac2e86a3..70c0c822fb5e8 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2347,8 +2347,7 @@ def intersection(self, other):
 
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._inner_indexer(self._ndarray_values,
-                                             other._ndarray_values)[0]
+                result = self._inner_indexer(self._values, other._values)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass

From 659073f8a67e513267048d467da715c60d885c51 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 15:14:32 -0600
Subject: [PATCH 025/119] hmm

---
 pandas/core/indexes/base.py     | 22 +++++++++++++++++++++-
 pandas/core/indexes/category.py | 17 +++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 70c0c822fb5e8..260016661a735 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2310,6 +2310,24 @@ def _wrap_union_result(self, other, result):
         name = self.name if self.name == other.name else None
         return self.__class__(result, name=name)
 
+    def _ensure_join(self, values):
+        """Ensure that the 'values' are ready for our join indexer.
+
+        The default join indexers are object, so this just returns 'values'.
+        This is called before calling those.
+
+
+        Parameters
+        ----------
+        values : array-like
+
+        Returns
+        -------
+        values : ndarray
+            Expected to have the correct type for self.inner_indexer
+        """
+        return values
+
     def intersection(self, other):
         """
         Form the intersection of two Index objects.
@@ -2347,7 +2365,9 @@ def intersection(self, other):
 
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._inner_indexer(self._values, other._values)[0]
+                lvals = self._ensure_join(self._ndarray_values)
+                rvals = self._ensure_join(other._ndarray_values)
+                result = self._inner_indexer(lvals, rvals)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 5b01f7d2cbe95..48cdd28911487 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -1,5 +1,6 @@
 import numpy as np
 from pandas._libs import index as libindex
+from pandas._libs import join as libjoin
 
 from pandas import compat
 from pandas.compat.numpy import function as nv
@@ -8,6 +9,8 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     _ensure_platform_int,
+    _ensure_int32,
+    _ensure_int64,
     is_list_like,
     is_interval_dtype,
     is_scalar)
@@ -214,6 +217,14 @@ def _shallow_copy(self, values=None, categories=None, ordered=None,
             values=values, categories=categories,
             ordered=ordered, **kwargs)
 
+    @cache_readonly
+    def _inner_indexer(self):
+        if self.codes.dtype.itemsize <= 4:
+            # int8, int16, int32
+            return libjoin.inner_join_indexer_int32
+        else:
+            return libjoin.inner_join_indexer_int64
+
     def _is_dtype_compat(self, other):
         """
         *this is an internal non-public method*
@@ -787,6 +798,12 @@ def _delegate_method(self, name, *args, **kwargs):
             return res
         return CategoricalIndex(res, name=self.name)
 
+    def _ensure_join(self, values):
+        if self.codes.dtype.itemsize <= 4:
+            return _ensure_int32(values)
+        else:
+            return _ensure_int64(values)
+
     @classmethod
     def _add_accessors(cls):
         """ add in Categorical accessor methods """

From b15ecac9a4aaaa0b7fbc4d4df0644affc25a5f20 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 16:52:38 -0600
Subject: [PATCH 026/119] More failing tests

---
 pandas/tests/extension_arrays/base.py | 69 ++++++++++++++++++++++++++-
 1 file changed, 67 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index b4362b6795737..d0f82db3a4d14 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -131,6 +131,7 @@ def test_series_constructor(self, data):
         assert result.dtype == data.dtype
         assert len(result) == len(data)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
+        assert result._data.blocks[0].values is data
 
     @pytest.mark.parametrize("from_series", [True, False])
     def dataframe_constructor(self, data, from_series):
@@ -159,12 +160,14 @@ def test_concat(self, data):
             pd.Series(data),
         ], ignore_index=True)
         assert len(result) == len(data) * 2
+        assert result.dtype == data.dtype
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
 
     # ------------------------------------------------------------------------
     # Indexing - getting
     # ------------------------------------------------------------------------
 
-    def test_iloc(self, data):
+    def test_iloc_series(self, data):
         ser = pd.Series(data)
         result = ser.iloc[:4]
         expected = pd.Series(data[:4])
@@ -173,7 +176,29 @@ def test_iloc(self, data):
         result = ser.iloc[[0, 1, 2, 3]]
         tm.assert_series_equal(result, expected)
 
-    def test_loc(self, data):
+    def test_iloc_frame(self, data):
+        df = pd.DataFrame({"A": data, 'B': np.arange(len(data))})
+        expected = pd.DataFrame({"A": data[:4]})
+
+        # slice -> frame
+        result = df.iloc[:4, [0]]
+        tm.assert_frame_equal(result, expected)
+
+        # sequence -> frame
+        result = df.iloc[[0, 1, 2, 3], [0]]
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.Series(data[:4], name='A')
+
+        # slice -> series
+        result = df.iloc[:4, 0]
+        tm.assert_series_equal(result, expected)
+
+        # sequence -> series
+        result = df.iloc[:4, 0]
+        tm.assert_series_equal(result, expected)
+
+    def test_loc_series(self, data):
         ser = pd.Series(data)
         result = ser.loc[:3]
         expected = pd.Series(data[:4])
@@ -182,6 +207,28 @@ def test_loc(self, data):
         result = ser.loc[[0, 1, 2, 3]]
         tm.assert_series_equal(result, expected)
 
+    def test_loc_frame(self, data):
+        df = pd.DataFrame({"A": data, 'B': np.arange(len(data))})
+        expected = pd.DataFrame({"A": data[:4]})
+
+        # slice -> frame
+        result = df.loc[:3, ['A']]
+        tm.assert_frame_equal(result, expected)
+
+        # sequence -> frame
+        result = df.loc[[0, 1, 2, 3], ['A']]
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.Series(data[:4], name='A')
+
+        # slice -> series
+        result = df.loc[:3, 'A']
+        tm.assert_series_equal(result, expected)
+
+        # sequence -> series
+        result = df.loc[:3, 'A']
+        tm.assert_series_equal(result, expected)
+
     def test_is_extension_array_dtype(self, data):
         assert is_extension_array_dtype(data)
         assert is_extension_array_dtype(data.dtype)
@@ -298,6 +345,24 @@ def test_set_iloc_scalar_multiple_homogoneous(self, data):
         df.iloc[10, 1] = data[1]
         assert df.loc[10, 'B'] == data[1]
 
+    def test_set_mask_aligned(self, data):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+
+        ser[mask] = data[5:7]
+        assert ser[0] == data[5]
+        assert ser[1] == data[6]
+
+    def test_set_mask_broadcast(self, data):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+
+        ser[mask] = data[10]
+        assert ser[0] == data[10]
+        assert ser[1] == data[10]
+
     def test_setitem_expand_columns(self, data):
         df = pd.DataFrame({"A": data})
         df['B'] = 1

From 88b8f4fea5b588b7fb1c76abd3b599e100b4a8c3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 16:54:17 -0600
Subject: [PATCH 027/119] remove bad test

---
 pandas/tests/extension_arrays/base.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index d0f82db3a4d14..939fb6fd05bc2 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -142,14 +142,6 @@ def dataframe_constructor(self, data, from_series):
         assert result.shape == (len(data), 1)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
-    @pytest.mark.xfail(reason="GH-19342")
-    def test_series_given_index(self, data):
-        result = pd.Series(data[:3], index=[0, 1, 2, 3, 4])
-        assert result.dtype == data.dtype
-        assert len(result) == 5
-        assert len(result.values) == 5
-        assert pd.isna(result.loc[[3, 4]]).all()
-
     # ------------------------------------------------------------------------
     # Reshaping
     # ------------------------------------------------------------------------

From 349ac1ab06bfb0f51793a75f9270737139001a4f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 21:45:07 -0600
Subject: [PATCH 028/119] better setitem

---
 pandas/core/indexing.py                    |  2 +
 pandas/core/internals.py                   |  5 +-
 pandas/tests/extension_arrays/test_json.py | 54 +++++++++++++++-------
 3 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 9463512ac11de..8f0f88a24552d 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -618,6 +618,8 @@ def can_do_equal_len():
                     return
 
             if isinstance(value, (ABCSeries, dict)):
+                # TODO (maybe) this causes issues with setting for
+                # extensionarrays that store dicts.
                 value = self._align_series(indexer, Series(value))
 
             elif isinstance(value, ABCDataFrame):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 2f61da7f719c0..4d328bc8af189 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -15,6 +15,7 @@
 
 from pandas.core.base import PandasObject
 
+from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import (
     ExtensionDtype, DatetimeTZDtype,
     CategoricalDtype)
@@ -3481,7 +3482,9 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
 
         aligned_args = dict((k, kwargs[k])
                             for k in align_keys
-                            if hasattr(kwargs[k], 'values'))
+                            if hasattr(kwargs[k], 'values')
+                            # eww
+                            and not isinstance(kwargs[k], ExtensionArray))
 
         for b in self.blocks:
             if filter is not None:
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index 343168a63da60..b4c23de6852b1 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -50,13 +50,21 @@ def __setitem__(self, key, value):
         if isinstance(key, numbers.Integral):
             self.data[key] = value
         else:
-            if not isinstance(value, collections.Sequence):
+            if not isinstance(value, (type(self),
+                                      collections.Sequence)):
                 # broadcast value
                 value = itertools.cycle([value])
 
-            for k, v in zip(key, value):
-                assert isinstance(v, self.dtype.type)
-                self.data[k] = v
+            if isinstance(key, np.ndarray) and key.dtype == 'bool':
+                # masking
+                for i, (k, v) in enumerate(zip(key, value)):
+                    if k:
+                        assert isinstance(v, self.dtype.type)
+                        self.data[i] = v
+            else:
+                for k, v in zip(key, value):
+                    assert isinstance(v, self.dtype.type)
+                    self.data[k] = v
 
     def __len__(self):
         return len(self.data)
@@ -90,8 +98,10 @@ def _concat_same_type(cls, to_concat):
 
 
 def make_data():
-    return [{random.choice(string.ascii_letters): random.randint(0, 100)
-             for _ in range(random.randint(0, 10))} for _ in range(100)]
+    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
+    return [collections.UserDict([
+        (random.choice(string.ascii_letters), random.randint(0, 100))
+        for _ in range(random.randint(0, 10))]) for _ in range(100)]
 
 
 class TestJSONDtype(BaseDtypeTests):
@@ -120,30 +130,42 @@ def na_cmp(self):
     def test_value_counts(self, all_data, dropna):
         pass
 
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_scalar(self):
-        pass
+    # @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    # def test_set_scalar(self):
+    #     pass
+    #
 
     @pytest.mark.xfail(reason="Difficulty setting sized objects.")
     def test_set_loc_scalar_mixed(self):
+        # This fails on an np.ndarary(dict) call in _setitem_with_indexer
         pass
 
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_loc_scalar_single(self):
-        pass
+    # @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    # def test_set_loc_scalar_single(self):
+    #     pass
+    #
 
     @pytest.mark.xfail(reason="Difficulty setting sized objects.")
     def test_set_loc_scalar_multiple_homogoneous(self):
+        # This fails in _setitem_with_indexer with a
+        # ValueError: Must have equal len keys and value when setting with
+        # and iterable
         pass
 
     @pytest.mark.xfail(reason="Difficulty setting sized objects.")
     def test_set_iloc_scalar_mixed(self):
+        # This fails in _setitem_with_indexer with a
+        # ValueError: Must have equal len keys and value when setting with an
+        # iterable
         pass
 
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_iloc_scalar_single(self):
-        pass
-
+    # @pytest.mark.xfail(reason="Difficulty setting sized objects.")
+    # def test_set_iloc_scalar_single(self):
+    #     pass
+    #
     @pytest.mark.xfail(reason="Difficulty setting sized objects.")
     def test_set_iloc_scalar_multiple_homogoneous(self):
+        # this fails in _setitem_with_indexer with a
+        # ValueError: Must have equal len keys and value when setting with an
+        # iterable
         pass

From 27ab045e3d83871a9b28d532c8d44b0adc238fff Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 22:15:01 -0600
Subject: [PATCH 029/119] Dropna works.

---
 pandas/core/frame.py                  |  2 +-
 pandas/core/internals.py              |  7 ++++++
 pandas/tests/extension_arrays/base.py | 32 +++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ec0b805b590fe..f565154d5c678 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5622,7 +5622,7 @@ def count(self, axis=0, level=None, numeric_only=False):
         if len(frame._get_axis(axis)) == 0:
             result = Series(0, index=frame._get_agg_axis(axis))
         else:
-            if frame._is_mixed_type:
+            if frame._is_mixed_type or frame._data.any_extension_types:
                 result = notna(frame).sum(axis=axis)
             else:
                 counts = notna(frame.values).sum(axis=axis)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 4d328bc8af189..fe59c0fe6ee2b 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -103,6 +103,7 @@ class Block(PandasObject):
     is_object = False
     is_categorical = False
     is_sparse = False
+    is_extension = False
     _box_to_block_values = True
     _can_hold_na = False
     _can_consolidate = True
@@ -1858,6 +1859,7 @@ class ExtensionBlock(NonConsolidatableMixIn, Block):
 
     ExtensionArrays are limited to 1-D.
     """
+    is_extension = True
 
     def __init__(self, values, placement, ndim=None):
         values = self._maybe_coerce_values(values)
@@ -3727,6 +3729,11 @@ def is_datelike_mixed_type(self):
         self._consolidate_inplace()
         return any(block.is_datelike for block in self.blocks)
 
+    @property
+    def any_extension_types(self):
+        """Whether any of the blocks in this manager are extension blocks"""
+        return any(block.is_extension for block in self.blocks)
+
     @property
     def is_view(self):
         """ return a boolean if we are a single block and are a view """
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 939fb6fd05bc2..a2f87d3bfb302 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -405,3 +405,35 @@ def test_value_counts(self, all_data, dropna):
         expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
 
         tm.assert_series_equal(result, expected)
+
+    def test_count(self, data_missing):
+        df = pd.DataFrame({"A": data_missing})
+        result = df.count(axis='columns')
+        expected = pd.Series([0, 1])
+        tm.assert_series_equal(result, expected)
+
+    def test_dropna_series(self, data_missing):
+        ser = pd.Series(data_missing)
+        result = ser.dropna()
+        expected = ser.iloc[[1]]
+        tm.assert_series_equal(result, expected)
+
+    def test_dropna_frame(self, data_missing):
+        df = pd.DataFrame({"A": data_missing})
+
+        # defaults
+        result = df.dropna()
+        expected = df.iloc[[1]]
+        tm.assert_frame_equal(result, expected)
+
+        # axis = 1
+        result = df.dropna(axis='columns')
+        expected = pd.DataFrame(index=[0, 1])
+        tm.assert_frame_equal(result, expected)
+
+        # multiple
+        df = pd.DataFrame({"A": data_missing,
+                           "B": [1, np.nan]})
+        result = df.dropna()
+        expected = df.iloc[:0]
+        tm.assert_frame_equal(result, expected)

From 8358fb10de6bc49d7f435e1332aabe9d5a31b85b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 08:15:50 -0600
Subject: [PATCH 030/119] Restore xfail test

---
 pandas/tests/extension_arrays/base.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index a2f87d3bfb302..7a5a3e788b384 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -142,6 +142,14 @@ def dataframe_constructor(self, data, from_series):
         assert result.shape == (len(data), 1)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
+    @pytest.mark.xfail(reason="GH-19342")
+    def test_series_given_mismatched_index_raises(self, data):
+        msg = 'Wrong number of items passed 3, placement implies 4'
+        with tm.assert_raises_regex(ValueError, None) as m:
+            pd.Series(data[:3], index=[0, 1, 2, 3, 4])
+
+        assert m.match(msg)
+
     # ------------------------------------------------------------------------
     # Reshaping
     # ------------------------------------------------------------------------

From 8ef34a96c359e2b1798803f83f1193f243d51328 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 08:16:09 -0600
Subject: [PATCH 031/119] Test Categorical

---
 pandas/core/arrays/categorical.py             |  5 ++
 .../extension_arrays/test_categorical.py      | 63 +++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 pandas/tests/extension_arrays/test_categorical.py

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 62c6a6b16cbe9..6c5b0c9d2be98 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2137,6 +2137,10 @@ def repeat(self, repeats, *args, **kwargs):
     def _can_hold_na(self):
         return True
 
+    @property
+    def _fill_value(self):
+        return np.nan
+
     @classmethod
     def _concat_same_type(self, to_concat):
         from pandas.core.dtypes.concat import _concat_categorical
@@ -2146,6 +2150,7 @@ def _concat_same_type(self, to_concat):
     def _formatting_values(self):
         return self
 
+
 # The Series.cat accessor
 
 
diff --git a/pandas/tests/extension_arrays/test_categorical.py b/pandas/tests/extension_arrays/test_categorical.py
new file mode 100644
index 0000000000000..237963bc38415
--- /dev/null
+++ b/pandas/tests/extension_arrays/test_categorical.py
@@ -0,0 +1,63 @@
+import string
+
+import pytest
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.api.types import CategoricalDtype
+from pandas import Categorical
+from .base import BaseArrayTests, BaseDtypeTests
+
+
+class TestCategoricalDtype(BaseDtypeTests):
+    @pytest.fixture
+    def dtype(self):
+        return CategoricalDtype()
+
+
+def make_data():
+    return np.random.choice(list(string.ascii_letters), size=100)
+
+
+class TestCategoricalArray(BaseArrayTests):
+
+    @pytest.fixture
+    def data(self):
+        """Length-100 PeriodArray for semantics test."""
+        return Categorical(make_data())
+
+    @pytest.fixture
+    def data_missing(self):
+        """Length 2 array with [NA, Valid]"""
+        return Categorical([np.nan, 'A'])
+
+    @pytest.mark.skip(reason="Memory usage doesn't match")
+    def test_memory_usage(self):
+        # Is this deliberate?
+        pass
+
+    @pytest.mark.skip(reason="Backwards compatability")
+    def test_getitem_scalar(self):
+        # CategoricalDtype.type isn't "correct" since it should
+        # be a parent of the elements (object). But don't want
+        # to break things by changing.
+        pass
+
+    def test_align(self, data):
+        # Override to pass through dtype
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+
+        # TODO: assumes that the ctor can take a list of scalars of the type
+        e1 = pd.Series(type(data)(list(a) + [data._fill_value],
+                                  dtype=data.dtype))
+        e2 = pd.Series(type(data)([data._fill_value] + list(b),
+                                  dtype=data.dtype))
+        tm.assert_series_equal(r1, e1)
+        tm.assert_series_equal(r2, e2)
+
+    @pytest.mark.skip(reason="Different value_counts semantics.")
+    def test_value_counts(self, all_data, dropna):
+        pass

From 340d11be7b4415238a7a89fea539abee7c07e338 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 08:30:20 -0600
Subject: [PATCH 032/119] Xfail setitem tests

---
 pandas/core/indexing.py               |  5 +++--
 pandas/core/internals.py              | 12 +++---------
 pandas/tests/extension_arrays/base.py | 13 +++++++++++++
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 8f0f88a24552d..515fe7d3b1d9d 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -618,8 +618,9 @@ def can_do_equal_len():
                     return
 
             if isinstance(value, (ABCSeries, dict)):
-                # TODO (maybe) this causes issues with setting for
-                # extensionarrays that store dicts.
+                # TODO: ExtensionBlock.setitem this causes issues with setting for
+                # extensionarrays that store dicts. Need to decide if it's worth
+                # supporting that case.
                 value = self._align_series(indexer, Series(value))
 
             elif isinstance(value, ABCDataFrame):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fe59c0fe6ee2b..b778900157743 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1941,12 +1941,6 @@ def _slice(self, slicer):
 
         return self.values[slicer]
 
-    def setitem(self, indexer, value, mgr=None):
-        if isinstance(indexer, tuple):
-            indexer = indexer[0]
-        self.values[indexer] = value
-        return self
-
     def formatting_values(self):
         return self.values._formatting_values()
 
@@ -3482,11 +3476,11 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
         else:
             align_keys = []
 
+        # TODO: may interfere with ExtensionBlock.setitem for blocks
+        # with a .values attribute.
         aligned_args = dict((k, kwargs[k])
                             for k in align_keys
-                            if hasattr(kwargs[k], 'values')
-                            # eww
-                            and not isinstance(kwargs[k], ExtensionArray))
+                            if hasattr(kwargs[k], 'values'))
 
         for b in self.blocks:
             if filter is not None:
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension_arrays/base.py
index 7a5a3e788b384..dc9bca653e6f3 100644
--- a/pandas/tests/extension_arrays/base.py
+++ b/pandas/tests/extension_arrays/base.py
@@ -288,11 +288,13 @@ def test_take_sequence(self, data):
     # Indexing - Setting
     # ------------------------------------------------------------------------
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_setitem_scalar(self, data):
         arr = pd.Series(data)
         arr[0] = data[1]
         assert arr[0] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_setitem_sequence(self, data):
         arr = pd.Series(data)
         original = data.copy()
@@ -301,6 +303,7 @@ def test_setitem_sequence(self, data):
         assert arr[0] == original[1]
         assert arr[1] == original[0]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_setitem_sequence_broadcasts(self, data):
         arr = pd.Series(data)
 
@@ -308,6 +311,7 @@ def test_setitem_sequence_broadcasts(self, data):
         assert arr[0] == data[2]
         assert arr[1] == data[2]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     @pytest.mark.parametrize('setter', ['loc', 'iloc'])
     def test_set_scalar(self, data, setter):
         arr = pd.Series(data)
@@ -315,36 +319,43 @@ def test_set_scalar(self, data, setter):
         operator.setitem(setter, 0, data[1])
         assert arr[0] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_loc_scalar_mixed(self, data):
         df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
         df.loc[0, 'B'] = data[1]
         assert df.loc[0, 'B'] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_loc_scalar_single(self, data):
         df = pd.DataFrame({"B": data})
         df.loc[10, 'B'] = data[1]
         assert df.loc[10, 'B'] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_loc_scalar_multiple_homogoneous(self, data):
         df = pd.DataFrame({"A": data, "B": data})
         df.loc[10, 'B'] = data[1]
         assert df.loc[10, 'B'] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_iloc_scalar_mixed(self, data):
         df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
         df.iloc[0, 1] = data[1]
         assert df.loc[0, 'B'] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_iloc_scalar_single(self, data):
         df = pd.DataFrame({"B": data})
         df.iloc[10, 0] = data[1]
         assert df.loc[10, 'B'] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_iloc_scalar_multiple_homogoneous(self, data):
         df = pd.DataFrame({"A": data, "B": data})
         df.iloc[10, 1] = data[1]
         assert df.loc[10, 'B'] == data[1]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_mask_aligned(self, data):
         ser = pd.Series(data)
         mask = np.zeros(len(data), dtype=bool)
@@ -354,6 +365,7 @@ def test_set_mask_aligned(self, data):
         assert ser[0] == data[5]
         assert ser[1] == data[6]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_set_mask_broadcast(self, data):
         ser = pd.Series(data)
         mask = np.zeros(len(data), dtype=bool)
@@ -363,6 +375,7 @@ def test_set_mask_broadcast(self, data):
         assert ser[0] == data[10]
         assert ser[1] == data[10]
 
+    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
     def test_setitem_expand_columns(self, data):
         df = pd.DataFrame({"A": data})
         df['B'] = 1

From 82978886564fb299462b4a5752ff9ca9a47a48c3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 08:34:55 -0600
Subject: [PATCH 033/119] TST: Skip JSON tests on py2

---
 pandas/tests/extension_arrays/test_json.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension_arrays/test_json.py
index b4c23de6852b1..515272a4850f9 100644
--- a/pandas/tests/extension_arrays/test_json.py
+++ b/pandas/tests/extension_arrays/test_json.py
@@ -16,6 +16,10 @@
 from .base import BaseArrayTests, BaseDtypeTests
 
 
+pytestmark = pytest.mark.skipif(sys.version_info[0] == 2,
+                                reason="Py2 doesn't have a UserDict")
+
+
 class JSONDtype(ExtensionDtype):
     type = collections.Mapping
     name = 'json'

From 9b8d2a51857a4d8c78ce09c6e54097ab9eddbb08 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 08:54:19 -0600
Subject: [PATCH 034/119] Additional testing

---
 pandas/tests/test_base.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 5a67aa3f989ae..0dbced114ce51 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1211,16 +1211,17 @@ def test_unique_datetime_series(arr, expected):
         tm.assert_index_equal(result, expected)
 
 
-@pytest.mark.parametrize('array, expected_type', [
-    (np.array([0, 1]), np.ndarray),
-    (np.array(['a', 'b']), np.ndarray),
-    (pd.Categorical(['a', 'b']), pd.Categorical),
-    (pd.DatetimeIndex(['2017', '2018']), np.ndarray),
-    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray),
-    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray),
-    (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex),
+@pytest.mark.parametrize('array, expected_type, dtype', [
+    (np.array([0, 1]), np.ndarray, 'int64'),
+    (np.array(['a', 'b']), np.ndarray, 'object'),
+    (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
+    (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
+    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
+    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
+    (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex,
+     'datetime64[ns, US/Central]'),
 ])
-def test_values_consistent(array, expected_type):
+def test_values_consistent(array, expected_type, dtype):
     l_values = pd.Series(array)._values
     r_values = pd.Index(array)._values
     assert type(l_values) is expected_type
@@ -1234,3 +1235,13 @@ def test_values_consistent(array, expected_type):
         tm.assert_categorical_equal(l_values, r_values)
     else:
         raise TypeError("Unexpected type {}".format(type(l_values)))
+
+    assert l_values.dtype == dtype
+    assert r_values.dtype == dtype
+
+
+def test_values_periodindex():
+    arr = pd.period_range("2017", periods=4, freq='D')
+    result = arr._values
+    expected = np.array(arr.astype(object))
+    tm.assert_numpy_array_equal(result, expected)

From 9fbac2959dc34f64133b44fa8274189abcc07655 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 13:55:34 -0600
Subject: [PATCH 035/119] More tests

---
 pandas/tests/test_base.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 0dbced114ce51..94449663b580b 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1216,10 +1216,11 @@ def test_unique_datetime_series(arr, expected):
     (np.array(['a', 'b']), np.ndarray, 'object'),
     (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
     (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
-    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
-    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
     (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex,
      'datetime64[ns, US/Central]'),
+    (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'),
+    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
+    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
 ])
 def test_values_consistent(array, expected_type, dtype):
     l_values = pd.Series(array)._values
@@ -1245,3 +1246,24 @@ def test_values_periodindex():
     result = arr._values
     expected = np.array(arr.astype(object))
     tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize('array, expected', [
+    (np.array([0, 1]), np.array([0, 1])),
+    (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
+    (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')),
+    (pd.DatetimeIndex(['2017-01-01T00:00:00']),
+     np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
+    (pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"),
+     np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')),
+    (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')),
+    pytest.mark.xfail(reason='PeriodArray not implemented')((
+        pd.PeriodIndex(['2017', '2018'], freq='D'),
+        np.array([17167, 17532]),
+    )),
+])
+def test_ndarray_values(array, expected):
+    l_values = pd.Series(array)._ndarray_values
+    r_values = pd.Index(array)._ndarray_values
+    tm.assert_numpy_array_equal(l_values, r_values)
+    tm.assert_numpy_array_equal(l_values, expected)

From 55305dc197cf7444aa50eab3ba426d5b7244672a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 14:29:08 -0600
Subject: [PATCH 036/119] ndarray_values

---
 pandas/core/base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 62f237e253c96..dd950a7b8ff00 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -772,6 +772,11 @@ def base(self):
     def _ndarray_values(self):
         """The data as an ndarray. See '_values' for more."""
         # type: () -> np.ndarray
+        from pandas.core.dtypes.common import is_categorical_dtype
+
+        if is_categorical_dtype(self):
+            return self._values.codes
+
         return self.values
 
     @property

From 0e637086e1e89ed7c580e5b731b030d524431a34 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 15:01:28 -0600
Subject: [PATCH 037/119] API: Default ExtensionArray.astype

(cherry picked from commit 943a915562b72bed147c857de927afa0daf31c1a)
(cherry picked from commit fbf0a0672380e210d3cb3c527fa8045a204d81be)
---
 pandas/core/arrays/base.py                   | 30 +++++++++++++++++
 pandas/tests/extension_arrays/test_common.py | 34 ++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 pandas/tests/extension_arrays/test_common.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 1556b653819a6..8c3d033dffba7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1,4 +1,6 @@
 """An interface for extending pandas with custom arrays."""
+import numpy as np
+
 from pandas.errors import AbstractMethodError
 
 _not_implemented_message = "{} does not implement {}."
@@ -138,6 +140,34 @@ def nbytes(self):
     # ------------------------------------------------------------------------
     # Additional Methods
     # ------------------------------------------------------------------------
+    def astype(self, dtype, copy=True):
+        """Cast to a NumPy array with 'dtype'.
+
+        The default implementation only allows casting to 'object' dtype.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        array : ndarray
+            NumPy ndarray with 'dtype' for its dtype.
+        """
+        np_dtype = np.dtype(dtype)
+
+        if np_dtype != 'object':
+            msg = ("{} can only be coerced to 'object' dtype, "
+                   "not '{}'.").format(type(self).__name__, dtype)
+            raise ValueError(msg)
+
+        return np.array(self, dtype=np_dtype, copy=copy)
+
     def isna(self):
         # type: () -> np.ndarray
         """Boolean NumPy array indicating if each value is missing.
diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
new file mode 100644
index 0000000000000..7feb7fdf09ec6
--- /dev/null
+++ b/pandas/tests/extension_arrays/test_common.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+import pandas.util.testing as tm
+from pandas.core.arrays import ExtensionArray
+
+
+class DummyArray(ExtensionArray):
+
+    def __init__(self, data):
+        self.data = data
+
+    def __array__(self, dtype):
+        return self.data
+
+
+def test_astype():
+    arr = DummyArray(np.array([1, 2, 3]))
+    expected = np.array([1, 2, 3], dtype=object)
+
+    result = arr.astype(object)
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = arr.astype('object')
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_astype_raises():
+    arr = DummyArray(np.array([1, 2, 3]))
+
+    xpr = ("DummyArray can only be coerced to 'object' dtype, not "
+           "'<class 'int'>'")
+
+    with tm.assert_raises_regex(ValueError, xpr):
+        arr.astype(int)

From fbbbc8a08b9bfe66cbe06621795163d65dbd3c77 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 15:22:43 -0600
Subject: [PATCH 038/119] Simplify concat_as_object

---
 pandas/core/dtypes/concat.py          | 10 +++++++---
 pandas/tests/indexes/test_category.py |  8 ++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index d6b55d03ebccd..b36dc03bbc82b 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -488,8 +488,11 @@ def _concat_index_asobject(to_concat, name=None):
     concat all inputs as object. DatetimeIndex, TimedeltaIndex and
     PeriodIndex are converted to object dtype before concatenation
     """
+    from pandas import Index
+    from pandas.core.arrays import ExtensionArray
 
-    klasses = ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex
+    klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex,
+               ExtensionArray)
     to_concat = [x.astype(object) if isinstance(x, klasses) else x
                  for x in to_concat]
 
@@ -497,8 +500,9 @@ def _concat_index_asobject(to_concat, name=None):
     attribs = self._get_attributes_dict()
     attribs['name'] = name
 
-    arrays = [np.array(x, copy=False, dtype=object) for x in to_concat]
-    return self._shallow_copy_with_infer(np.concatenate(arrays), **attribs)
+    to_concat = [x._values if isinstance(x, Index) else x
+                 for x in to_concat]
+    return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
 
 
 def _concat_sparse(to_concat, axis=0, typs=None):
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index c2e40c79f8914..e9fddfde90348 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -353,6 +353,14 @@ def test_append(self):
         expected = Index(list('caaabbca'))
         tm.assert_index_equal(result, expected, exact=True)
 
+    def test_append_to_another(self):
+        # hits _concat_index_asobject
+        fst = Index(['a', 'b'])
+        snd = CategoricalIndex(['d', 'e'])
+        result = fst.append(snd)
+        expected = Index(['a', 'b', 'd', 'e'])
+        tm.assert_index_equal(result, expected)
+
     def test_insert(self):
 
         ci = self.create_index()

From 46a0a49352a1242077e616056f802b0ce35eb8d9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 15:46:36 -0600
Subject: [PATCH 039/119] Py2 compat

(cherry picked from commit b20e12cae68dd86ff51597464045656763d369f7)
---
 pandas/tests/extension_arrays/test_common.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
index 7feb7fdf09ec6..f19754482b04f 100644
--- a/pandas/tests/extension_arrays/test_common.py
+++ b/pandas/tests/extension_arrays/test_common.py
@@ -27,8 +27,10 @@ def test_astype():
 def test_astype_raises():
     arr = DummyArray(np.array([1, 2, 3]))
 
+    # type  int for py2
+    # class int for py3
     xpr = ("DummyArray can only be coerced to 'object' dtype, not "
-           "'<class 'int'>'")
+           "'<.* 'int'>'")
 
     with tm.assert_raises_regex(ValueError, xpr):
         arr.astype(int)

From 2c4445a365d19979b400295ce6a7c671396cb0da Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 16:30:11 -0600
Subject: [PATCH 040/119] Set-ops ugliness

---
 pandas/core/indexes/base.py     | 52 +++++++++++++--------------------
 pandas/core/indexes/category.py |  6 ----
 2 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 260016661a735..3ce3ecce1c140 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -31,12 +31,14 @@
     is_object_dtype,
     is_categorical_dtype,
     is_interval_dtype,
+    is_period_dtype,
     is_bool,
     is_bool_dtype,
     is_signed_integer_dtype,
     is_unsigned_integer_dtype,
     is_integer_dtype, is_float_dtype,
     is_datetime64_any_dtype,
+    is_datetime64tz_dtype,
     is_timedelta64_dtype,
     needs_i8_conversion,
     is_iterator, is_list_like,
@@ -2252,15 +2254,15 @@ def union(self, other):
             other = other.astype('O')
             return this.union(other)
 
-        if is_categorical_dtype(self):
-            lvals = self.values
-        else:
+        # TODO: setops-refactor, clean all this up
+        if is_period_dtype(self) or is_datetime64tz_dtype(self):
             lvals = self._ndarray_values
-
-        if is_categorical_dtype(other):
-            rvals = other.values
         else:
+            lvals = self._values
+        if is_period_dtype(other) or is_datetime64tz_dtype(other):
             rvals = other._ndarray_values
+        else:
+            rvals = other._values
 
         if self.is_monotonic and other.is_monotonic:
             try:
@@ -2310,24 +2312,6 @@ def _wrap_union_result(self, other, result):
         name = self.name if self.name == other.name else None
         return self.__class__(result, name=name)
 
-    def _ensure_join(self, values):
-        """Ensure that the 'values' are ready for our join indexer.
-
-        The default join indexers are object, so this just returns 'values'.
-        This is called before calling those.
-
-
-        Parameters
-        ----------
-        values : array-like
-
-        Returns
-        -------
-        values : ndarray
-            Expected to have the correct type for self.inner_indexer
-        """
-        return values
-
     def intersection(self, other):
         """
         Form the intersection of two Index objects.
@@ -2363,24 +2347,30 @@ def intersection(self, other):
             other = other.astype('O')
             return this.intersection(other)
 
+        # TODO: setops-refactor, clean all this up
+        if is_period_dtype(self):
+            lvals = self._ndarray_values
+        else:
+            lvals = self._values
+        if is_period_dtype(other):
+            rvals = other._ndarray_values
+        else:
+            rvals = other._values
+
         if self.is_monotonic and other.is_monotonic:
             try:
-                lvals = self._ensure_join(self._ndarray_values)
-                rvals = self._ensure_join(other._ndarray_values)
                 result = self._inner_indexer(lvals, rvals)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass
 
         try:
-            indexer = Index(other._ndarray_values).get_indexer(
-                self._ndarray_values)
+            indexer = Index(rvals).get_indexer(lvals)
             indexer = indexer.take((indexer != -1).nonzero()[0])
         except Exception:
-            # duplicates
+            # duplicateters
             indexer = algos.unique1d(
-                Index(other._ndarray_values).get_indexer_non_unique(
-                    self._ndarray_values)[0])
+                Index(rvals).get_indexer_non_unique(lvals)[0])
             indexer = indexer[indexer != -1]
 
         taken = other.take(indexer)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 4381b35f6cb86..93ed2507cb829 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -805,12 +805,6 @@ def _delegate_method(self, name, *args, **kwargs):
             return res
         return CategoricalIndex(res, name=self.name)
 
-    def _ensure_join(self, values):
-        if self.codes.dtype.itemsize <= 4:
-            return _ensure_int32(values)
-        else:
-            return _ensure_int64(values)
-
     @classmethod
     def _add_accessors(cls):
         """ add in Categorical accessor methods """

From 5612cda29f77b5865df92bb97c6e7a2abde6bcb6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 16:46:02 -0600
Subject: [PATCH 041/119] better docstrings

---
 pandas/core/base.py         |  9 ++++++++-
 pandas/core/indexes/base.py | 27 ++++++++++++++++++---------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index dd950a7b8ff00..744d448b16682 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -770,7 +770,14 @@ def base(self):
 
     @property
     def _ndarray_values(self):
-        """The data as an ndarray. See '_values' for more."""
+        """The data as an ndarray, possibly losing information.
+
+        The expectation is that this is cheap to compute.
+
+        - categorical -> codes
+
+        See '_values' for more.
+        """
         # type: () -> np.ndarray
         from pandas.core.dtypes.common import is_categorical_dtype
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3ce3ecce1c140..afefa5de2477e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -603,15 +603,24 @@ def _values(self):
         """The best array representation.
 
         This is an ndarray, ExtensionArray, or Index subclass. This differs
-        from '._ndarray_values', which always returns an ndarray. It may differ
-        from the public '.values'
-
-        index             | values          | _values
-        ----------------- | -------------- -| ----------
-        CategoricalIndex  | Categorical     | Categorical
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
-        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
-        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
+        from ``_ndarray_values``, which always returns an ndarray.
+
+        Both ``_values`` and ``_ndarray_values`` are consistent between
+        ``Series`` and ``Index``.
+
+        It may differ from the public '.values' method.
+
+        index             | values          | _values     | _ndarray_values |
+        ----------------- | -------------- -| ----------- | --------------- |
+        CategoricalIndex  | Categorical     | Categorical | codes           |
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]     | datetime@UTC    |
+
+        In the near-future, we'll implement two more.
+
+        index             | values          | _values     | _ndarray_values |
+        ----------------- | --------------- | ----------- | --------------- |
+        PeriodIndex       | ndarray[object] | PeriodArray | ordinals        |
+        IntervalIndex     | ndarray[object] | IVArray     | ndarray[object] |
 
         See Also
         --------

From b012c1967b6de548b999514fe4b560ba9b7ee635 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 17:03:15 -0600
Subject: [PATCH 042/119] tolist

---
 pandas/core/base.py         | 3 +++
 pandas/core/indexes/base.py | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 744d448b16682..f3b0fb9dbe142 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -14,6 +14,7 @@
     is_list_like,
     is_scalar,
     is_datetimelike,
+    is_categorical_dtype,
     is_extension_type)
 
 from pandas.util._validators import validate_bool_kwarg
@@ -833,6 +834,8 @@ def tolist(self):
 
         if is_datetimelike(self):
             return [com._maybe_box_datetimelike(x) for x in self._values]
+        elif is_categorical_dtype(self):
+            return self.values.tolist()
         else:
             return self._ndarray_values.tolist()
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index afefa5de2477e..9eb0ac1276280 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -617,10 +617,10 @@ def _values(self):
 
         In the near-future, we'll implement two more.
 
-        index             | values          | _values     | _ndarray_values |
-        ----------------- | --------------- | ----------- | --------------- |
-        PeriodIndex       | ndarray[object] | PeriodArray | ordinals        |
-        IntervalIndex     | ndarray[object] | IVArray     | ndarray[object] |
+        index             | values          | _values     | ndarray_values |
+        ----------------- | --------------- | ----------- | -------------- |
+        PeriodIndex       | ndarray[object] | PeriodArray
+        IntervalIndex     | IntervalArray   | ndarray[Interval]
 
         See Also
         --------

From d49e6aa649a0b02ce612b9d18b663668ade6485a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 17:05:46 -0600
Subject: [PATCH 043/119] linting

---
 pandas/core/indexes/datetimes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 788005531efe1..22ce690b3d420 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1105,7 +1105,8 @@ def unique(self, level=None):
         else:
             naive = self
         result = super(DatetimeIndex, naive).unique(level=level)
-        return self._simple_new(result, name=self.name, tz=self.tz, freq=self.freq)
+        return self._simple_new(result, name=self.name, tz=self.tz,
+                                freq=self.freq)
 
     def union(self, other):
         """

From d7d31eecc1411f9d68755bd86f80b2a97a34776e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 08:21:51 -0600
Subject: [PATCH 044/119] Moved dtypes

(cherry picked from commit d1362271bca8a7b183f3241e5c2f040c422118b8)
---
 pandas/tests/dtypes/test_dtypes.py           | 32 +-------------------
 pandas/tests/extension_arrays/test_common.py | 29 ++++++++++++++++++
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index eca4dd4cf2106..d800a7b92b559 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -10,14 +10,12 @@
     Series, Categorical, CategoricalIndex, IntervalIndex, date_range)
 
 from pandas.compat import string_types
-from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype, PeriodDtype,
-    IntervalDtype, CategoricalDtype, ExtensionDtype)
+    IntervalDtype, CategoricalDtype)
 from pandas.core.dtypes.common import (
     is_categorical_dtype, is_categorical,
     is_datetime64tz_dtype, is_datetimetz,
-    is_extension_array_dtype,
     is_period_dtype, is_period,
     is_dtype_equal, is_datetime64_ns_dtype,
     is_datetime64_dtype, is_interval_dtype,
@@ -744,31 +742,3 @@ def test_categorical_categories(self):
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
         c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
-
-
-class DummyArray(ExtensionArray):
-    pass
-
-
-class DummyDtype(ExtensionDtype):
-    pass
-
-
-class TestExtensionArrayDtype(object):
-
-    @pytest.mark.parametrize('values', [
-        pd.Categorical([]),
-        pd.Categorical([]).dtype,
-        pd.Series(pd.Categorical([])),
-        DummyDtype(),
-        DummyArray(),
-    ])
-    def test_is_extension_array_dtype(self, values):
-        assert is_extension_array_dtype(values)
-
-    @pytest.mark.parametrize('values', [
-        np.array([]),
-        pd.Series(np.array([])),
-    ])
-    def test_is_not_extension_array_dtype(self, values):
-        assert not is_extension_array_dtype(values)
diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
index f19754482b04f..1fc4526aff951 100644
--- a/pandas/tests/extension_arrays/test_common.py
+++ b/pandas/tests/extension_arrays/test_common.py
@@ -1,7 +1,15 @@
 import numpy as np
+import pytest
 
+import pandas as pd
 import pandas.util.testing as tm
 from pandas.core.arrays import ExtensionArray
+from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.dtypes import ExtensionDtype
+
+
+class DummyDtype(ExtensionDtype):
+    pass
 
 
 class DummyArray(ExtensionArray):
@@ -13,7 +21,28 @@ def __array__(self, dtype):
         return self.data
 
 
+class TestExtensionArrayDtype(object):
+
+    @pytest.mark.parametrize('values', [
+        pd.Categorical([]),
+        pd.Categorical([]).dtype,
+        pd.Series(pd.Categorical([])),
+        DummyDtype(),
+        DummyArray(np.array([1, 2])),
+    ])
+    def test_is_extension_array_dtype(self, values):
+        assert is_extension_array_dtype(values)
+
+    @pytest.mark.parametrize('values', [
+        np.array([]),
+        pd.Series(np.array([])),
+    ])
+    def test_is_not_extension_array_dtype(self, values):
+        assert not is_extension_array_dtype(values)
+
+
 def test_astype():
+
     arr = DummyArray(np.array([1, 2, 3]))
     expected = np.array([1, 2, 3], dtype=object)
 

From 7b89f1b3dc80c23d02c8b57c9c5d94cd491082c8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 08:36:44 -0600
Subject: [PATCH 045/119] clean

---
 pandas/tests/extension_arrays/test_common.py | 65 --------------------
 1 file changed, 65 deletions(-)
 delete mode 100644 pandas/tests/extension_arrays/test_common.py

diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
deleted file mode 100644
index 1fc4526aff951..0000000000000
--- a/pandas/tests/extension_arrays/test_common.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import numpy as np
-import pytest
-
-import pandas as pd
-import pandas.util.testing as tm
-from pandas.core.arrays import ExtensionArray
-from pandas.core.dtypes.common import is_extension_array_dtype
-from pandas.core.dtypes.dtypes import ExtensionDtype
-
-
-class DummyDtype(ExtensionDtype):
-    pass
-
-
-class DummyArray(ExtensionArray):
-
-    def __init__(self, data):
-        self.data = data
-
-    def __array__(self, dtype):
-        return self.data
-
-
-class TestExtensionArrayDtype(object):
-
-    @pytest.mark.parametrize('values', [
-        pd.Categorical([]),
-        pd.Categorical([]).dtype,
-        pd.Series(pd.Categorical([])),
-        DummyDtype(),
-        DummyArray(np.array([1, 2])),
-    ])
-    def test_is_extension_array_dtype(self, values):
-        assert is_extension_array_dtype(values)
-
-    @pytest.mark.parametrize('values', [
-        np.array([]),
-        pd.Series(np.array([])),
-    ])
-    def test_is_not_extension_array_dtype(self, values):
-        assert not is_extension_array_dtype(values)
-
-
-def test_astype():
-
-    arr = DummyArray(np.array([1, 2, 3]))
-    expected = np.array([1, 2, 3], dtype=object)
-
-    result = arr.astype(object)
-    tm.assert_numpy_array_equal(result, expected)
-
-    result = arr.astype('object')
-    tm.assert_numpy_array_equal(result, expected)
-
-
-def test_astype_raises():
-    arr = DummyArray(np.array([1, 2, 3]))
-
-    # type  int for py2
-    # class int for py3
-    xpr = ("DummyArray can only be coerced to 'object' dtype, not "
-           "'<.* 'int'>'")
-
-    with tm.assert_raises_regex(ValueError, xpr):
-        arr.astype(int)

From b0dbffd72376d88bfc1dd8d4d89c890978686d4e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 09:34:39 -0600
Subject: [PATCH 046/119] cleanup

---
 pandas/core/indexes/base.py     | 10 +++---
 pandas/core/indexes/category.py |  2 +-
 pandas/core/indexes/multi.py    |  2 +-
 pandas/tests/test_base.py       | 56 ++++++++++++++++++++++++++++-----
 4 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 9eb0ac1276280..d8b4a65a91ecc 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -613,14 +613,14 @@ def _values(self):
         index             | values          | _values     | _ndarray_values |
         ----------------- | -------------- -| ----------- | --------------- |
         CategoricalIndex  | Categorical     | Categorical | codes           |
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]     | datetime@UTC    |
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]     | ndarray[M8ns]   |
 
         In the near-future, we'll implement two more.
 
-        index             | values          | _values     | ndarray_values |
-        ----------------- | --------------- | ----------- | -------------- |
-        PeriodIndex       | ndarray[object] | PeriodArray
-        IntervalIndex     | IntervalArray   | ndarray[Interval]
+        index             | values          | _values     | _ndarray_values |
+        ----------------- | --------------- | ----------- | --------------- |
+        PeriodIndex       | ndarray[object] | PeriodArray | ndarray[int]    |
+        IntervalIndex     | ndarray[object] | PeriodArray | ndarray[object] |
 
         See Also
         --------
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 93ed2507cb829..166832cbe6bb1 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -797,7 +797,7 @@ def _evaluate_compare(self, other):
 
     def _delegate_method(self, name, *args, **kwargs):
         """ method delegation to the ._values """
-        method = getattr(self.values, name)
+        method = getattr(self._values, name)
         if 'inplace' in kwargs:
             raise ValueError("cannot use inplace with CategoricalIndex")
         res = method(*args, **kwargs)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 1478012aa9dbe..a257a1ba26128 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -799,7 +799,7 @@ def values(self):
             box = hasattr(lev, '_box_values')
             # Try to minimize boxing.
             if box and len(lev) > len(lab):
-                taken = lev._box_values(algos.take_1d(lev._values,
+                taken = lev._box_values(algos.take_1d(lev._ndarray_values,
                                                       lab))
             elif box:
                 taken = algos.take_1d(lev._box_values(lev._ndarray_values),
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 94449663b580b..66ec2d37c680e 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1241,13 +1241,6 @@ def test_values_consistent(array, expected_type, dtype):
     assert r_values.dtype == dtype
 
 
-def test_values_periodindex():
-    arr = pd.period_range("2017", periods=4, freq='D')
-    result = arr._values
-    expected = np.array(arr.astype(object))
-    tm.assert_numpy_array_equal(result, expected)
-
-
 @pytest.mark.parametrize('array, expected', [
     (np.array([0, 1]), np.array([0, 1])),
     (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
@@ -1267,3 +1260,52 @@ def test_ndarray_values(array, expected):
     r_values = pd.Index(array)._ndarray_values
     tm.assert_numpy_array_equal(l_values, r_values)
     tm.assert_numpy_array_equal(l_values, expected)
+
+
+def test_values_multiindex_datetimesindex():
+    # Test to ensure we hit the boxing / nobox part of MI.values
+    ints = np.arange(10**18, 10**18 + 5)
+    naive = pd.DatetimeIndex(ints)
+    aware = pd.DatetimeIndex(ints, tz='US/Central')
+
+    idx = pd.MultiIndex.from_arrays([naive, aware])
+    result = idx.values
+
+    outer = pd.DatetimeIndex([x[0] for x in result])
+    tm.assert_index_equal(outer, naive)
+
+    inner = pd.DatetimeIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, aware)
+
+    # n_lev > n_lab
+    result = idx[:2].values
+
+    outer = pd.DatetimeIndex([x[0] for x in result])
+    tm.assert_index_equal(outer, naive[:2])
+
+    inner = pd.DatetimeIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, aware[:2])
+
+
+def test_values_multiindex_datetimesindex():
+    # Test to ensure we hit the boxing / nobox part of MI.values
+    ints = np.arange(2007, 2012)
+    pidx = pd.PeriodIndex(ints, freq='D')
+
+    idx = pd.MultiIndex.from_arrays([ints, pidx])
+    result = idx.values
+
+    outer = pd.Int64Index([x[0] for x in result])
+    tm.assert_index_equal(outer, pd.Int64Index(ints))
+
+    inner = pd.PeriodIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, pidx)
+
+    # n_lev > n_lab
+    result = idx[:2].values
+
+    outer = pd.Int64Index([x[0] for x in result])
+    tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
+
+    inner = pd.PeriodIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, pidx[:2])

From 66b936f00b72e3152df807e6e5913f1111084cef Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 09:42:37 -0600
Subject: [PATCH 047/119] NumPy compat

---
 pandas/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 66ec2d37c680e..e649667e3dda1 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1196,7 +1196,7 @@ def test_unique_datetime_index(arr, expected):
 
 @pytest.mark.parametrize('arr, expected', [
     (pd.Series(pd.DatetimeIndex(['2017', '2017'])),
-     np.array(['2017'], dtype='M8[ns]')),
+     np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
     (pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
      np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
 ])

From 32ee0eff6893bd02ed1469330054b0c37914306e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:10:15 -0600
Subject: [PATCH 048/119] Use base _values for CategoricalIndex

---
 pandas/core/indexes/category.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 166832cbe6bb1..f03f8571121f0 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -304,10 +304,6 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
-    @property
-    def _values(self):
-        return self._data
-
     @property
     def _ndarray_values(self):
         return self._data.codes

From a9882e23defc47272f941932c4ce53af9b5ba0e6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:10:34 -0600
Subject: [PATCH 049/119] Update dev docs

---
 doc/source/internals.rst | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index 29aaed318b802..957f82fd9eba7 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -92,16 +92,20 @@ if you compute the levels and labels yourself, please be careful.
 Values
 ~~~~~~
 
-Pandas extends NumPy's type system in a few places, so we have multiple notions of "values" floating around.
-For 1-D containers (``Index`` classes and ``Series``) we have the following convention:
-
-* ``cls._ndarray_values`` is *always* and ``ndarray``
-* ``cls._values`` refers is the "best possible" array. This could be an ``ndarray``, ``ExtensionArray``, or
-  in ``Index`` subclass (note: we're in the process of removing the index subclasses here so that it's
-  always an ``ndarray`` or ``ExtensionArray``).
-
-So, for example, ``Series[category]._values`` is a ``Categorical``, while ``Series[category]._ndarray_values`` is
-the underlying ndarray.
+Pandas extends NumPy's type system with custom types, like ``Categorical`` or
+datetimes with a timezone, so we have multiple notions of "values". For 1-D
+containers (``Index`` classes and ``Series``) we have the following convention:
+
+* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
+  ``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
+  this returns the codes, not the array of objects.
+* ``cls._values`` refers is the "best possible" array. This could be an
+  ``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
+  process of removing the index subclasses here so that it's always an
+  ``ndarray`` or ``ExtensionArray``).
+
+So, for example, ``Series[category]._values`` is a ``Categorical``, while
+``Series[category]._ndarray_values`` is the underlying codes.
 
 
 .. _ref-subclassing-pandas:

From 242562108b099b4e7a205541ee15b9272dcb5265 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:59:22 -0600
Subject: [PATCH 050/119] cleanup

---
 pandas/core/dtypes/cast.py      |  2 +-
 pandas/core/indexes/category.py | 13 +------------
 pandas/core/indexes/multi.py    |  8 +++-----
 pandas/core/indexes/period.py   |  5 -----
 4 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index b2816343fc8eb..55919fb2bea0d 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -927,7 +927,7 @@ def try_timedelta(v):
         # will try first with a string & object conversion
         from pandas import to_timedelta
         try:
-            return to_timedelta(v)._values.reshape(shape)
+            return to_timedelta(v)._ndarray_values.reshape(shape)
         except Exception:
             return v.reshape(shape)
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f03f8571121f0..5aa940499a368 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -1,6 +1,5 @@
 import numpy as np
 from pandas._libs import index as libindex
-from pandas._libs import join as libjoin
 
 from pandas import compat
 from pandas.compat.numpy import function as nv
@@ -9,8 +8,6 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     _ensure_platform_int,
-    _ensure_int32,
-    _ensure_int64,
     is_list_like,
     is_interval_dtype,
     is_scalar)
@@ -217,14 +214,6 @@ def _shallow_copy(self, values=None, categories=None, ordered=None,
             values=values, categories=categories,
             ordered=ordered, **kwargs)
 
-    @cache_readonly
-    def _inner_indexer(self):
-        if self.codes.dtype.itemsize <= 4:
-            # int8, int16, int32
-            return libjoin.inner_join_indexer_int32
-        else:
-            return libjoin.inner_join_indexer_int64
-
     def _is_dtype_compat(self, other):
         """
         *this is an internal non-public method*
@@ -238,7 +227,7 @@ def _is_dtype_compat(self, other):
         """
         if is_categorical_dtype(other):
             if isinstance(other, CategoricalIndex):
-                other = other.values
+                other = other._values
             if not other.is_dtype_equal(self):
                 raise TypeError("categories must match existing categories "
                                 "when appending")
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index a257a1ba26128..907bbb2e8762e 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2507,6 +2507,7 @@ def get_locs(self, seq):
         MultiIndex.slice_locs : Get slice location given start label(s) and
                                 end label(s).
         """
+        from .numeric import Int64Index
 
         # must be lexsorted to at least as many levels
         true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
@@ -2532,7 +2533,6 @@ def _convert_to_indexer(r):
                                      "that is not the same length as the "
                                      "index")
                 r = r.nonzero()[0]
-            from .numeric import Int64Index
             return Int64Index(r)
 
         def _update_indexer(idxr, indexer=indexer):
@@ -2569,7 +2569,6 @@ def _update_indexer(idxr, indexer=indexer):
                 if indexers is not None:
                     indexer = _update_indexer(indexers, indexer=indexer)
                 else:
-                    from .numeric import Int64Index
                     # no matches we are done
                     return Int64Index([])._ndarray_values
 
@@ -2652,9 +2651,8 @@ def equals(self, other):
         for i in range(self.nlevels):
             slabels = self.labels[i]
             slabels = slabels[slabels != -1]
-            svalues = algos.take_nd(
-                np.asarray(self.levels[i]._values),
-                slabels, allow_fill=False)
+            svalues = algos.take_nd(np.asarray(self.levels[i]._values),
+                                    slabels, allow_fill=False)
 
             olabels = other.labels[i]
             olabels = olabels[olabels != -1]
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index c8b7d6063e378..e90d3827fe84e 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -418,11 +418,6 @@ def _int64index(self):
     def values(self):
         return self.astype(object).values
 
-    @property
-    def _values(self):
-        # TODO: return PeriodArray
-        return self.values
-
     @property
     def _ndarray_values(self):
         # Ordinals

From 6abe9da01ee0be4bb2d87f649b2c6066d4ea3835 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:59:22 -0600
Subject: [PATCH 051/119] cleanup

(cherry picked from commit 242562108b099b4e7a205541ee15b9272dcb5265)
---
 pandas/core/dtypes/cast.py    | 2 +-
 pandas/core/indexes/multi.py  | 3 +--
 pandas/core/indexes/period.py | 3 ++-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index b2816343fc8eb..55919fb2bea0d 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -927,7 +927,7 @@ def try_timedelta(v):
         # will try first with a string & object conversion
         from pandas import to_timedelta
         try:
-            return to_timedelta(v)._values.reshape(shape)
+            return to_timedelta(v)._ndarray_values.reshape(shape)
         except Exception:
             return v.reshape(shape)
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 510f7245cebd8..aca81aed29c62 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2505,6 +2505,7 @@ def get_locs(self, seq):
         MultiIndex.slice_locs : Get slice location given start label(s) and
                                 end label(s).
         """
+        from .numeric import Int64Index
 
         # must be lexsorted to at least as many levels
         true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
@@ -2530,7 +2531,6 @@ def _convert_to_indexer(r):
                                      "that is not the same length as the "
                                      "index")
                 r = r.nonzero()[0]
-            from .numeric import Int64Index
             return Int64Index(r)
 
         def _update_indexer(idxr, indexer=indexer):
@@ -2567,7 +2567,6 @@ def _update_indexer(idxr, indexer=indexer):
                 if indexers is not None:
                     indexer = _update_indexer(indexers, indexer=indexer)
                 else:
-                    from .numeric import Int64Index
                     # no matches we are done
                     return Int64Index([])._values
 
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 1f8542ed5ee60..b797d3734380c 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -418,7 +418,8 @@ def values(self):
         return self.astype(object).values
 
     @property
-    def _values(self):
+    def _ndarray_values(self):
+        # Ordinals
         return self._data
 
     def __array__(self, dtype=None):

From 0b112f21a80818d3ad9e7bb6f00c351edd9d1713 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:26:52 -0600
Subject: [PATCH 052/119] cleanup

---
 pandas/core/arrays/categorical.py     |  1 -
 pandas/core/dtypes/common.py          |  5 ++---
 pandas/core/dtypes/missing.py         |  4 +---
 pandas/core/frame.py                  |  3 +--
 pandas/core/indexes/base.py           | 25 -------------------------
 pandas/core/indexes/category.py       |  3 ---
 pandas/core/indexes/datetimes.py      |  7 -------
 pandas/core/internals.py              |  7 ++-----
 pandas/core/series.py                 |  9 +++++----
 pandas/tests/indexes/datetimelike.py  | 13 -------------
 pandas/tests/indexes/test_base.py     |  5 -----
 pandas/tests/indexes/test_category.py |  5 -----
 12 files changed, 11 insertions(+), 76 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6c5b0c9d2be98..d5e8fc5e0b190 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2150,7 +2150,6 @@ def _concat_same_type(self, to_concat):
     def _formatting_values(self):
         return self
 
-
 # The Series.cat accessor
 
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 2344091f85a88..197b35de88896 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1708,10 +1708,9 @@ def is_extension_array_dtype(arr_or_dtype):
     """
     from pandas.core.arrays import ExtensionArray
 
-    if isinstance(arr_or_dtype, ABCSeries):
+    if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
         arr_or_dtype = arr_or_dtype._values
-    elif isinstance(arr_or_dtype, ABCIndexClass):
-        arr_or_dtype = arr_or_dtype._as_best_array()
+
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 
 
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index c7cd97d5ceb87..002839af6daf2 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -132,9 +132,7 @@ def _isna_ndarraylike(obj):
     dtype = values.dtype
 
     if is_extension_array_dtype(obj):
-        if isinstance(obj, ABCIndexClass):
-            values = obj._as_best_array()
-        elif isinstance(obj, ABCSeries):
+        if isinstance(obj, (ABCIndexClass, ABCSeries)):
             values = obj._values
         else:
             values = obj
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7059495ed6467..e91a93827fc84 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3370,8 +3370,7 @@ class    max    type
             new_obj = self.copy()
 
         def _maybe_casted_values(index, labels=None):
-            values = index._as_best_array()
-            # TODO: Check if nescessary...
+            values = index._values
             if not isinstance(index, (PeriodIndex, DatetimeIndex)):
                 if values.dtype == np.object_:
                     values = lib.maybe_convert_objects(values)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 6c2b2dc1eb67a..fdb20995805fd 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1095,31 +1095,6 @@ def _to_embed(self, keep_tz=False, dtype=None):
 
         return self.values.copy()
 
-    def _as_best_array(self):
-        # type: () -> Union[ExtensionArray, ndarary]
-        """Return the underlying values as the best array type.
-
-        Indexes backed by ExtensionArrays will return the ExtensionArray.
-        Otherwise, an ndarray is returned.
-
-        Examples
-        --------
-        >>> pd.Index([0, 1, 2])._as_best_array()
-        array([0, 1, 2])
-
-        >>> pd.CategoricalIndex(['a', 'a', 'b'])._as_best_array()
-        [a, a, b]
-        Categories (2, object): [a, b]
-
-        >>> pd.IntervalIndex.from_breaks([0, 1, 2])._as_best_array()
-        IntervalArray([(0, 1], (1, 2]])
-        """
-        # We need this since CategoricalIndex.values -> Categorical
-        #                but IntervalIndex.values    -> ndarray[object]
-        # TODO: IntervalIndex defines _array_values. Would be nice to
-        # have an unambiguous way of getting an ndarray (or just use asarray?)
-        return self.values
-
     _index_shared_docs['astype'] = """
         Create an Index with values cast to dtypes. The class of a new Index
         is determined by dtype. When conversion is impossible, a ValueError
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f211d41ac2f4c..5aa940499a368 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -310,9 +310,6 @@ def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
 
-    def _as_best_array(self):
-        return self._data
-
     def tolist(self):
         return self._data.tolist()
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index db66dac67bbea..22ce690b3d420 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1043,13 +1043,6 @@ def _to_embed(self, keep_tz=False, dtype=None):
 
         return self.values.copy()
 
-    def _as_best_array(self):
-        # no-tz -> ndarray
-        # tz    -> DatetimeIndex (for now)
-        if self.tz is not None:
-            return self
-        return self.values
-
     def to_pydatetime(self):
         """
         Return DatetimeIndex as object ndarray of datetime.datetime objects
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index b778900157743..c69ce53fbf53f 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1868,10 +1868,8 @@ def __init__(self, values, placement, ndim=None):
     def _maybe_coerce_values(self, values):
         # Unboxes Series / Index
         # Doesn't change any underlying dtypes.
-        if isinstance(values, ABCSeries):
+        if isinstance(values, (ABCIndexClass, ABCSeries)):
             values = values.values
-        elif isinstance(values, ABCIndexClass):
-            values = values._as_best_array()
         return values
 
     @property
@@ -4133,8 +4131,7 @@ def set(self, item, value, check=False):
         # FIXME: refactor, clearly separate broadcasting & zip-like assignment
         #        can prob also fix the various if tests for sparse/categorical
 
-        value_is_extension_type = (is_extension_type(value) or
-                                   is_extension_array_dtype(value))
+        value_is_extension_type = is_extension_type(value)
 
         # categorical/spares/datetimetz
         if value_is_extension_type:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 41240dcbda0d1..7884794c6b5f4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -181,7 +181,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                     data = data.astype(dtype)
 
                 # need to copy to avoid aliasing issues
-                data = data._as_best_array().copy()
+                data = data._values.copy()
+                copy = False
 
             elif isinstance(data, np.ndarray):
                 pass
@@ -3137,7 +3138,9 @@ def _sanitize_index(data, index, copy=False):
         raise ValueError('Length of values does not match length of ' 'index')
 
     if isinstance(data, ABCIndexClass) and not copy:
-        data = data._as_best_array()
+        pass
+    elif isinstance(data, (PeriodIndex, DatetimeIndex)):
+        data = data._values
 
     elif isinstance(data, np.ndarray):
 
@@ -3216,7 +3219,6 @@ def _try_cast(arr, take_fast_path):
 
         if copy:
             subarr = data.copy()
-        # XXX: This is the only early return. See if it can be avoided.
         return subarr
 
     elif isinstance(data, (list, tuple)) and len(data) > 0:
@@ -3239,7 +3241,6 @@ def _try_cast(arr, take_fast_path):
         start, stop, step = get_range_parameters(data)
         arr = np.arange(start, stop, step, dtype='int64')
         subarr = _try_cast(arr, False)
-
     else:
         subarr = _try_cast(data, False)
 
diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py
index 64fc1ee8c9680..7d01a2a70145d 100644
--- a/pandas/tests/indexes/datetimelike.py
+++ b/pandas/tests/indexes/datetimelike.py
@@ -83,16 +83,3 @@ def test_asobject_deprecated(self):
         with tm.assert_produces_warning(FutureWarning):
             i = d.asobject
         assert isinstance(i, pd.Index)
-
-    def test_as_best_array(self):
-        result = pd.DatetimeIndex(['2017-01-01T00:00:00',
-                                   '2017-01-02T00:00:00'])._as_best_array()
-        expected = np.array(['2017-01-01T00:00:00',
-                             '2017-01-02T00:00:00'], dtype='M8[ns]')
-        tm.assert_numpy_array_equal(result, expected)
-
-    def test_as_best_array_tz(self):
-        arr = pd.DatetimeIndex(['2017-01-01T00:00:00',
-                                '2017-01-02T00:00:00'], tz='US/Central')
-        result = arr._as_best_array()
-        tm.assert_index_equal(arr, result)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 900e413b2c2db..90edcb526bb2e 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -2284,11 +2284,6 @@ def test_comparison_tzawareness_compat(self, op):
         # TODO: implement _assert_tzawareness_compat for the reverse
         # comparison with the Series on the left-hand side
 
-    def test_as_best_array(self):
-        result = pd.Index([0, 1, 2])._as_best_array()
-        expected = np.array([0, 1, 2], dtype=np.int64)
-        tm.assert_numpy_array_equal(result, expected)
-
 
 class TestIndexUtils(object):
 
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 0fda05252c74e..e9fddfde90348 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -1088,8 +1088,3 @@ def test_take_invalid_kwargs(self):
         msg = "the 'mode' parameter is not supported"
         tm.assert_raises_regex(ValueError, msg, idx.take,
                                indices, mode='clip')
-
-    def test_as_best_array(self):
-        result = pd.CategoricalIndex([0, 1, 2])._as_best_array()
-        expected = pd.Categorical([0, 1, 2])
-        tm.assert_categorical_equal(result, expected)

From 170d0c7959a54276fff730b002195f46ec64de63 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:33:49 -0600
Subject: [PATCH 053/119] Linting

---
 pandas/core/base.py       | 3 +--
 pandas/tests/test_base.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index f3b0fb9dbe142..01dba132e00c5 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -7,8 +7,7 @@
 import numpy as np
 
 from pandas.core.dtypes.missing import isna
-from pandas.core.dtypes.generic import (
-    ABCDataFrame, ABCSeries, ABCIndexClass, ABCDatetimeIndex)
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
 from pandas.core.dtypes.common import (
     is_object_dtype,
     is_list_like,
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index e649667e3dda1..31fa278f906f5 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1262,7 +1262,7 @@ def test_ndarray_values(array, expected):
     tm.assert_numpy_array_equal(l_values, expected)
 
 
-def test_values_multiindex_datetimesindex():
+def test_values_multiindex_datetimeindex():
     # Test to ensure we hit the boxing / nobox part of MI.values
     ints = np.arange(10**18, 10**18 + 5)
     naive = pd.DatetimeIndex(ints)
@@ -1287,7 +1287,7 @@ def test_values_multiindex_datetimesindex():
     tm.assert_index_equal(inner, aware[:2])
 
 
-def test_values_multiindex_datetimesindex():
+def test_values_multiindex_periodindex():
     # Test to ensure we hit the boxing / nobox part of MI.values
     ints = np.arange(2007, 2012)
     pidx = pd.PeriodIndex(ints, freq='D')

From 402620f3ca75d14dd203f809226ec528113ae54c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:35:24 -0600
Subject: [PATCH 054/119] Precision in tests

---
 pandas/tests/test_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 31fa278f906f5..ce1e3d492741d 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1212,7 +1212,7 @@ def test_unique_datetime_series(arr, expected):
 
 
 @pytest.mark.parametrize('array, expected_type, dtype', [
-    (np.array([0, 1]), np.ndarray, 'int64'),
+    (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
     (np.array(['a', 'b']), np.ndarray, 'object'),
     (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
     (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
@@ -1242,7 +1242,7 @@ def test_values_consistent(array, expected_type, dtype):
 
 
 @pytest.mark.parametrize('array, expected', [
-    (np.array([0, 1]), np.array([0, 1])),
+    (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
     (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
     (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')),
     (pd.DatetimeIndex(['2017-01-01T00:00:00']),

From 268aabcb88f8fcb803693bd5796b0cfcf244fab2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:42:26 -0600
Subject: [PATCH 055/119] Linting

---
 pandas/core/indexing.py  | 6 +++---
 pandas/core/internals.py | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 97b7d1064e8bc..1d07900a4d5df 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -618,9 +618,9 @@ def can_do_equal_len():
                     return
 
             if isinstance(value, (ABCSeries, dict)):
-                # TODO: ExtensionBlock.setitem this causes issues with setting for
-                # extensionarrays that store dicts. Need to decide if it's worth
-                # supporting that case.
+                # TODO: ExtensionBlock.setitem this causes issues with setting
+                # for extensionarrays that store dicts. Need to decide if it's
+                # worth supporting that or now
                 value = self._align_series(indexer, Series(value))
 
             elif isinstance(value, ABCDataFrame):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index c69ce53fbf53f..fffbe18d3008c 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -15,7 +15,6 @@
 
 from pandas.core.base import PandasObject
 
-from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import (
     ExtensionDtype, DatetimeTZDtype,
     CategoricalDtype)

From d671259c25413d849ae015e13d9db195aa467876 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:47:22 -0600
Subject: [PATCH 056/119] Move to extension

---
 pandas/tests/{extension_arrays => extension}/__init__.py         | 0
 pandas/tests/{extension_arrays => extension}/base.py             | 0
 pandas/tests/{extension_arrays => extension}/test_categorical.py | 0
 pandas/tests/{extension_arrays => extension}/test_decimal.py     | 0
 pandas/tests/{extension_arrays => extension}/test_json.py        | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename pandas/tests/{extension_arrays => extension}/__init__.py (100%)
 rename pandas/tests/{extension_arrays => extension}/base.py (100%)
 rename pandas/tests/{extension_arrays => extension}/test_categorical.py (100%)
 rename pandas/tests/{extension_arrays => extension}/test_decimal.py (100%)
 rename pandas/tests/{extension_arrays => extension}/test_json.py (100%)

diff --git a/pandas/tests/extension_arrays/__init__.py b/pandas/tests/extension/__init__.py
similarity index 100%
rename from pandas/tests/extension_arrays/__init__.py
rename to pandas/tests/extension/__init__.py
diff --git a/pandas/tests/extension_arrays/base.py b/pandas/tests/extension/base.py
similarity index 100%
rename from pandas/tests/extension_arrays/base.py
rename to pandas/tests/extension/base.py
diff --git a/pandas/tests/extension_arrays/test_categorical.py b/pandas/tests/extension/test_categorical.py
similarity index 100%
rename from pandas/tests/extension_arrays/test_categorical.py
rename to pandas/tests/extension/test_categorical.py
diff --git a/pandas/tests/extension_arrays/test_decimal.py b/pandas/tests/extension/test_decimal.py
similarity index 100%
rename from pandas/tests/extension_arrays/test_decimal.py
rename to pandas/tests/extension/test_decimal.py
diff --git a/pandas/tests/extension_arrays/test_json.py b/pandas/tests/extension/test_json.py
similarity index 100%
rename from pandas/tests/extension_arrays/test_json.py
rename to pandas/tests/extension/test_json.py

From 815d202e96e910a64a292f6815737447ffdc1847 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 11 Feb 2018 14:13:50 -0600
Subject: [PATCH 057/119] Push _ndarray_values to ExtensionArray

Now IndexOpsMixin._ndarray_values will dispatch all the way down to the EA.
Subclasses like Categorical can override it as they see fit.
---
 pandas/core/arrays/base.py        | 12 ++++++++++++
 pandas/core/arrays/categorical.py |  4 ++++
 pandas/core/base.py               | 15 ++++++---------
 pandas/core/dtypes/common.py      |  2 +-
 pandas/core/indexes/category.py   |  4 ----
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 553e1e0ac2066..e618dc6b69b2d 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -266,3 +266,15 @@ def _can_hold_na(self):
         Setting this to false will optimize some operations like fillna.
         """
         return True
+
+    @property
+    def _ndarray_values(self):
+        # type: () -> np.ndarray
+        """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+        This method is not part of the pandas interface.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+        """
+        return np.array(self)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 62c6a6b16cbe9..8d2cf9d2b2f92 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -410,6 +410,10 @@ def dtype(self):
         """The :class:`~pandas.api.types.CategoricalDtype` for this instance"""
         return self._dtype
 
+    @property
+    def _ndarray_values(self):
+        return self.codes
+
     @property
     def _constructor(self):
         return Categorical
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 01dba132e00c5..0e70e3eb64fcb 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -14,7 +14,8 @@
     is_scalar,
     is_datetimelike,
     is_categorical_dtype,
-    is_extension_type)
+    is_extension_type,
+    is_extension_array_dtype)
 
 from pandas.util._validators import validate_bool_kwarg
 
@@ -772,18 +773,14 @@ def base(self):
     def _ndarray_values(self):
         """The data as an ndarray, possibly losing information.
 
-        The expectation is that this is cheap to compute.
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
 
         - categorical -> codes
-
-        See '_values' for more.
         """
         # type: () -> np.ndarray
-        from pandas.core.dtypes.common import is_categorical_dtype
-
-        if is_categorical_dtype(self):
-            return self._values.codes
-
+        if is_extension_array_dtype(self):
+            return self.values._ndarray_values
         return self.values
 
     @property
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index c66e7fcfc6978..c2b71bc316fe8 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1709,7 +1709,7 @@ def is_extension_array_dtype(arr_or_dtype):
     from pandas.core.arrays import ExtensionArray
 
     # we want to unpack series, anything else?
-    if isinstance(arr_or_dtype, ABCSeries):
+    if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
         arr_or_dtype = arr_or_dtype._values
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 5aa940499a368..d71b7ea774f52 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -293,10 +293,6 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
-    @property
-    def _ndarray_values(self):
-        return self._data.codes
-
     @property
     def itemsize(self):
         return self.values.itemsize

From a727b217f42e959f9ebb355e911f3ec641db0b49 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 11 Feb 2018 14:27:46 -0600
Subject: [PATCH 058/119] Clean up tolist

---
 pandas/core/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 0e70e3eb64fcb..0b4c03d6b4b25 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -830,10 +830,8 @@ def tolist(self):
 
         if is_datetimelike(self):
             return [com._maybe_box_datetimelike(x) for x in self._values]
-        elif is_categorical_dtype(self):
-            return self.values.tolist()
         else:
-            return self._ndarray_values.tolist()
+            return self._values.tolist()
 
     def __iter__(self):
         """

From f368c29d6a45832f95181a8a6e8b7411d87763c7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 11 Feb 2018 14:33:46 -0600
Subject: [PATCH 059/119] Move test locations

---
 .../tests/indexes/datetimes/test_datetime.py  | 15 ++++
 pandas/tests/indexes/test_multi.py            | 48 +++++++++++
 pandas/tests/test_base.py                     | 82 -------------------
 3 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index a75ace2933b71..e9176e749564e 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -469,3 +469,18 @@ def test_factorize_dst(self):
             arr, res = obj.factorize()
             tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
             tm.assert_index_equal(res, idx)
+
+    @pytest.mark.parametrize('arr, expected', [
+        (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
+        (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
+         pd.DatetimeIndex(['2017'], tz='US/Eastern')),
+    ])
+    def test_unique(self, arr, expected):
+        result = arr.unique()
+
+        if isinstance(expected, np.ndarray):
+            tm.assert_numpy_array_equal(result, expected)
+        if isinstance(expected, pd.Series):
+            tm.assert_series_equal(result, expected)
+        if isinstance(expected, pd.DatetimeIndex):
+            tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index e59456b8a2d5e..97370b279245c 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -962,6 +962,54 @@ def test_values_boxed(self):
         # Check that code branches for boxed values produce identical results
         tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
 
+    def test_values_multiindex_datetimeindex(self):
+        # Test to ensure we hit the boxing / nobox part of MI.values
+        ints = np.arange(10**18, 10**18 + 5)
+        naive = pd.DatetimeIndex(ints)
+        aware = pd.DatetimeIndex(ints, tz='US/Central')
+
+        idx = pd.MultiIndex.from_arrays([naive, aware])
+        result = idx.values
+
+        outer = pd.DatetimeIndex([x[0] for x in result])
+        tm.assert_index_equal(outer, naive)
+
+        inner = pd.DatetimeIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, aware)
+
+        # n_lev > n_lab
+        result = idx[:2].values
+
+        outer = pd.DatetimeIndex([x[0] for x in result])
+        tm.assert_index_equal(outer, naive[:2])
+
+        inner = pd.DatetimeIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, aware[:2])
+
+
+    def test_values_multiindex_periodindex():
+        # Test to ensure we hit the boxing / nobox part of MI.values
+        ints = np.arange(2007, 2012)
+        pidx = pd.PeriodIndex(ints, freq='D')
+
+        idx = pd.MultiIndex.from_arrays([ints, pidx])
+        result = idx.values
+
+        outer = pd.Int64Index([x[0] for x in result])
+        tm.assert_index_equal(outer, pd.Int64Index(ints))
+
+        inner = pd.PeriodIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, pidx)
+
+        # n_lev > n_lab
+        result = idx[:2].values
+
+        outer = pd.Int64Index([x[0] for x in result])
+        tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
+
+        inner = pd.PeriodIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, pidx[:2])
+
     def test_append(self):
         result = self.index[:3].append(self.index[3:])
         assert result.equals(self.index)
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index ce1e3d492741d..4b5ad336139b0 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1178,39 +1178,6 @@ def test_iter_box(self):
             assert res == exp
 
 
-@pytest.mark.parametrize('arr, expected', [
-    (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
-    (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
-     pd.DatetimeIndex(['2017'], tz='US/Eastern')),
-])
-def test_unique_datetime_index(arr, expected):
-    result = arr.unique()
-
-    if isinstance(expected, np.ndarray):
-        tm.assert_numpy_array_equal(result, expected)
-    if isinstance(expected, pd.Series):
-        tm.assert_series_equal(result, expected)
-    if isinstance(expected, pd.DatetimeIndex):
-        tm.assert_index_equal(result, expected)
-
-
-@pytest.mark.parametrize('arr, expected', [
-    (pd.Series(pd.DatetimeIndex(['2017', '2017'])),
-     np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
-    (pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
-     np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
-])
-def test_unique_datetime_series(arr, expected):
-    result = arr.unique()
-
-    if isinstance(expected, np.ndarray):
-        tm.assert_numpy_array_equal(result, expected)
-    if isinstance(expected, pd.Series):
-        tm.assert_series_equal(result, expected)
-    if isinstance(expected, pd.DatetimeIndex):
-        tm.assert_index_equal(result, expected)
-
-
 @pytest.mark.parametrize('array, expected_type, dtype', [
     (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
     (np.array(['a', 'b']), np.ndarray, 'object'),
@@ -1260,52 +1227,3 @@ def test_ndarray_values(array, expected):
     r_values = pd.Index(array)._ndarray_values
     tm.assert_numpy_array_equal(l_values, r_values)
     tm.assert_numpy_array_equal(l_values, expected)
-
-
-def test_values_multiindex_datetimeindex():
-    # Test to ensure we hit the boxing / nobox part of MI.values
-    ints = np.arange(10**18, 10**18 + 5)
-    naive = pd.DatetimeIndex(ints)
-    aware = pd.DatetimeIndex(ints, tz='US/Central')
-
-    idx = pd.MultiIndex.from_arrays([naive, aware])
-    result = idx.values
-
-    outer = pd.DatetimeIndex([x[0] for x in result])
-    tm.assert_index_equal(outer, naive)
-
-    inner = pd.DatetimeIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, aware)
-
-    # n_lev > n_lab
-    result = idx[:2].values
-
-    outer = pd.DatetimeIndex([x[0] for x in result])
-    tm.assert_index_equal(outer, naive[:2])
-
-    inner = pd.DatetimeIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, aware[:2])
-
-
-def test_values_multiindex_periodindex():
-    # Test to ensure we hit the boxing / nobox part of MI.values
-    ints = np.arange(2007, 2012)
-    pidx = pd.PeriodIndex(ints, freq='D')
-
-    idx = pd.MultiIndex.from_arrays([ints, pidx])
-    result = idx.values
-
-    outer = pd.Int64Index([x[0] for x in result])
-    tm.assert_index_equal(outer, pd.Int64Index(ints))
-
-    inner = pd.PeriodIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, pidx)
-
-    # n_lev > n_lab
-    result = idx[:2].values
-
-    outer = pd.Int64Index([x[0] for x in result])
-    tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
-
-    inner = pd.PeriodIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, pidx[:2])

From d74c5c96040882378e3598e0df27e59aff57de51 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 06:33:05 -0600
Subject: [PATCH 060/119] Fixed test

---
 pandas/tests/indexes/test_multi.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 97370b279245c..cd6a5c761d0c2 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -986,8 +986,7 @@ def test_values_multiindex_datetimeindex(self):
         inner = pd.DatetimeIndex([x[1] for x in result])
         tm.assert_index_equal(inner, aware[:2])
 
-
-    def test_values_multiindex_periodindex():
+    def test_values_multiindex_periodindex(self):
         # Test to ensure we hit the boxing / nobox part of MI.values
         ints = np.arange(2007, 2012)
         pidx = pd.PeriodIndex(ints, freq='D')

From 8104ee5d8a887454fec6869eb1f4e63fe74d72e6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 08:40:56 -0600
Subject: [PATCH 061/119] REF: Update per comments

---
 pandas/core/base.py                             | 2 +-
 pandas/core/dtypes/concat.py                    | 2 +-
 pandas/core/indexes/category.py                 | 6 +-----
 pandas/core/indexes/multi.py                    | 2 +-
 pandas/io/formats/format.py                     | 2 +-
 pandas/tests/indexes/datetimes/test_datetime.py | 8 +-------
 6 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 0b4c03d6b4b25..8081e20faaeb3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -745,7 +745,7 @@ def itemsize(self):
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self._ndarray_values.nbytes
+        return self.values.nbytes
 
     @property
     def strides(self):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b36dc03bbc82b..d306d0d78f1f4 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -480,7 +480,7 @@ def _concat_datetimetz(to_concat, name=None):
 
 def _concat_index_same_dtype(indexes, klass=None):
     klass = klass if klass is not None else indexes[0].__class__
-    return klass(np.concatenate([x._ndarray_values for x in indexes]))
+    return klass(np.concatenate([x._values for x in indexes]))
 
 
 def _concat_index_asobject(to_concat, name=None):
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index d71b7ea774f52..7d4a864b465e8 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -295,13 +295,9 @@ def values(self):
 
     @property
     def itemsize(self):
+        # Size of the items in categories, not codes.
         return self.values.itemsize
 
-    @property
-    def nbytes(self):
-        """ return the number of bytes in the underlying data """
-        return self.values.nbytes
-
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 907bbb2e8762e..94dbd8b884e47 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1319,7 +1319,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
             arrays = [[]] * len(names)
         elif isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
-                tuples = tuples._ndarray_values
+                tuples = tuples._values
 
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index d590499faa65e..621641747f376 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1897,7 +1897,7 @@ def _format(x):
 
         vals = self.values
         if isinstance(vals, Index):
-            vals = vals._ndarray_values
+            vals = vals._values
         elif isinstance(vals, ABCSparseArray):
             vals = vals.values
 
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index e9176e749564e..05678b0c8dd45 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -477,10 +477,4 @@ def test_factorize_dst(self):
     ])
     def test_unique(self, arr, expected):
         result = arr.unique()
-
-        if isinstance(expected, np.ndarray):
-            tm.assert_numpy_array_equal(result, expected)
-        if isinstance(expected, pd.Series):
-            tm.assert_series_equal(result, expected)
-        if isinstance(expected, pd.DatetimeIndex):
-            tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)

From f8e29b918f7b4cc306ff7b18efa549e17aedbbe9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 09:53:55 -0600
Subject: [PATCH 062/119] lint

---
 pandas/core/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 8081e20faaeb3..cf48b419b7df1 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -13,7 +13,6 @@
     is_list_like,
     is_scalar,
     is_datetimelike,
-    is_categorical_dtype,
     is_extension_type,
     is_extension_array_dtype)
 

From 0cd9faa5b42df01c96a8dddb7f7a73cea32d0a91 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 13:04:50 -0600
Subject: [PATCH 063/119] REF: Use _values for size and shape

---
 pandas/core/base.py              |  4 ++--
 pandas/core/indexes/datetimes.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index cf48b419b7df1..f6f1ba982e1d9 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -711,7 +711,7 @@ def transpose(self, *args, **kwargs):
     @property
     def shape(self):
         """ return a tuple of the shape of the underlying data """
-        return self._ndarray_values.shape
+        return self._values.shape
 
     @property
     def ndim(self):
@@ -754,7 +754,7 @@ def strides(self):
     @property
     def size(self):
         """ return the number of elements in the underlying data """
-        return self._ndarray_values.size
+        return self._values.size
 
     @property
     def flags(self):
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 22ce690b3d420..689610af7603f 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -694,6 +694,20 @@ def tzinfo(self):
         """
         return self.tz
 
+    @property
+    def size(self):
+        # TODO: Remove this when we have a DatetimeTZArray
+        # Necessary to avoid recursion error since DTI._values is a DTI
+        # for TZ-aware
+        return self._ndarray_values.size
+
+    @property
+    def shape(self):
+        # TODO: Remove this when we have a DatetimeTZArray
+        # Necessary to avoid recursion error since DTI._values is a DTI
+        # for TZ-aware
+        return self._ndarray_values.shape
+
     @cache_readonly
     def _timezone(self):
         """ Comparable timezone both for pytz / dateutil"""

From 8fcdb7040345e1d0017367695354d9c858c71e09 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 13:09:13 -0600
Subject: [PATCH 064/119] PERF: Implement size, shape for IntervalIndex

---
 pandas/core/indexes/interval.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 3bf783b5a2faa..d431ea1e51e31 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -680,6 +680,16 @@ def length(self):
                    'e.g. Intervals with string endpoints')
             raise TypeError(msg)
 
+    @property
+    def size(self):
+        # Avoid materializing self.values
+        return self.left.size
+
+    @property
+    def shape(self):
+        # Avoid materializing self.values
+        return self.left.shape
+
     def __len__(self):
         return len(self.left)
 

From 34a6a22e2255eb11e5c6b6c5478350fb84ce656e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 13:11:00 -0600
Subject: [PATCH 065/119] PERF: Avoid materializing values for PeriodIndex
 shape, size

---
 pandas/core/indexes/period.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index e90d3827fe84e..8f2d7d382a16e 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -477,6 +477,16 @@ def _to_embed(self, keep_tz=False, dtype=None):
 
         return self.astype(object).values
 
+    @property
+    def size(self):
+        # Avoid materializing self._values
+        return self._ndarray_values.size
+
+    @property
+    def shape(self):
+        # Avoid materializing self._values
+        return self._ndarray_values.shape
+
     @property
     def _formatter_func(self):
         return lambda x: "'%s'" % x

From d6e8051d1ebab7cf99bd7ac23eea348d0e3a0d4c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 20:55:19 -0600
Subject: [PATCH 066/119] Cleanup

---
 pandas/core/base.py         | 3 +--
 pandas/core/indexes/base.py | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index f6f1ba982e1d9..0ca029ffd4c25 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -744,7 +744,7 @@ def itemsize(self):
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self.values.nbytes
+        return self._values.nbytes
 
     @property
     def strides(self):
@@ -988,7 +988,6 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     def unique(self):
         values = self._values
 
-        # TODO: Make unique part of the ExtensionArray interface.
         if hasattr(values, 'unique'):
 
             result = values.unique()
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a09a4c59a819a..be7c1624936bf 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -599,7 +599,7 @@ def values(self):
     @property
     def _values(self):
         # type: () -> Union[ExtensionArray, Index]
-        # TODO: remove index types as they become is extension arrays
+        # TODO(EA): remove index types as they become extension arrays
         """The best array representation.
 
         This is an ndarray, ExtensionArray, or Index subclass. This differs
@@ -2264,7 +2264,7 @@ def union(self, other):
             other = other.astype('O')
             return this.union(other)
 
-        # TODO: setops-refactor, clean all this up
+        # TODO(EA): setops-refactor, clean all this up
         if is_period_dtype(self) or is_datetime64tz_dtype(self):
             lvals = self._ndarray_values
         else:
@@ -2357,7 +2357,7 @@ def intersection(self, other):
             other = other.astype('O')
             return this.intersection(other)
 
-        # TODO: setops-refactor, clean all this up
+        # TODO(EA): setops-refactor, clean all this up
         if is_period_dtype(self):
             lvals = self._ndarray_values
         else:

From 3af8a21ea0e13ba5fc73db464f6e327552c71b0e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 05:54:27 -0600
Subject: [PATCH 067/119] Override nbytes

---
 pandas/core/indexes/datetimes.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 689610af7603f..cc9ce1f3fd5eb 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -708,6 +708,13 @@ def shape(self):
         # for TZ-aware
         return self._ndarray_values.shape
 
+    @property
+    def nbytes(self):
+        # TODO: Remove this when we have a DatetimeTZArray
+        # Necessary to avoid recursion error since DTI._values is a DTI
+        # for TZ-aware
+        return self._ndarray_values.nbytes
+
     @cache_readonly
     def _timezone(self):
         """ Comparable timezone both for pytz / dateutil"""

From 1e8e87e7ed20d07f422fd7b518b33f3c0fbc0512 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 06:42:01 -0600
Subject: [PATCH 068/119] Remove unused change

---
 pandas/core/indexes/base.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 47ded9c6f4cd2..281618ffefef4 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2004,11 +2004,6 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
         if is_categorical_dtype(values.dtype):
             values = np.array(values)
 
-        elif isinstance(values, ExtensionArray):
-            # This is still un-exercised within pandas, since all our
-            # extension dtypes have custom indexes.
-            values = values._formatting_values()
-
         elif is_object_dtype(values.dtype):
             values = lib.maybe_convert_objects(values, safe=1)
 

From 0f5e4f06478a1ed5a956a33220a2114399551377 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 06:49:35 -0600
Subject: [PATCH 069/119] Docs

---
 pandas/core/arrays/base.py | 36 +++++++++++++++++++++++++-----------
 pandas/core/indexing.py    |  6 +++---
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 3646a045fa465..e9d56a0e95461 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -204,6 +204,25 @@ def isna(self):
         """
         raise AbstractMethodError(self)
 
+    def value_counts(self, dropna=True):
+        """Compute a histogram of the counts of non-null values.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of NaN
+
+        Returns
+        -------
+        value_counts : Series
+        """
+        from pandas import value_counts
+
+        if dropna:
+            self = self[~self.isna()]
+
+        return value_counts(np.array(self))
+
     # ------------------------------------------------------------------------
     # Indexing methods
     # ------------------------------------------------------------------------
@@ -235,9 +254,8 @@ def take(self, indexer, allow_fill=True, fill_value=None):
 
         Examples
         --------
-        Suppose the extension array somehow backed by a NumPy array and that
-        the underlying structured array is stored as ``self.data``. Then
-        ``take`` may be written as
+        Suppose the extension array is backed by a NumPy array stored as
+        ``self.data``. Then ``take`` may be written as
 
         .. code-block:: python
 
@@ -246,6 +264,10 @@ def take(self, indexer, allow_fill=True, fill_value=None):
                result = self.data.take(indexer)
                result[mask] = self._fill_value
                return type(self)(result)
+
+        See Also
+        --------
+        numpy.take
         """
         raise AbstractMethodError(self)
 
@@ -305,14 +327,6 @@ def _can_hold_na(self):
         """
         return True
 
-    def value_counts(self, dropna=True):
-        from pandas import value_counts
-
-        if dropna:
-            self = self[~self.isna()]
-
-        return value_counts(np.array(self))
-
     @property
     def _ndarray_values(self):
         # type: () -> np.ndarray
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 1d07900a4d5df..50f3c7a6b3d3d 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -618,9 +618,9 @@ def can_do_equal_len():
                     return
 
             if isinstance(value, (ABCSeries, dict)):
-                # TODO: ExtensionBlock.setitem this causes issues with setting
-                # for extensionarrays that store dicts. Need to decide if it's
-                # worth supporting that or now
+                # TODO(EA): ExtensionBlock.setitem this causes issues with
+                # setting for extensionarrays that store dicts. Need to decide
+                # if it's worth supporting that.
                 value = self._align_series(indexer, Series(value))
 
             elif isinstance(value, ABCDataFrame):

From c4dab88c29b72a4efb3a4cee7df210cdf9555361 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 08:56:33 -0600
Subject: [PATCH 070/119] Test cleanpu

---
 pandas/core/internals.py                   |  2 +-
 pandas/tests/extension/base.py             |  7 +---
 pandas/tests/extension/test_categorical.py |  1 -
 pandas/tests/extension/test_json.py        | 40 ----------------------
 4 files changed, 2 insertions(+), 48 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fffbe18d3008c..e48c4202a7da8 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -3473,7 +3473,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
         else:
             align_keys = []
 
-        # TODO: may interfere with ExtensionBlock.setitem for blocks
+        # TODO(EA): may interfere with ExtensionBlock.setitem for blocks
         # with a .values attribute.
         aligned_args = dict((k, kwargs[k])
                             for k in align_keys
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index dc9bca653e6f3..51d9da1fe8bab 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -398,17 +398,12 @@ def test_isna(self, data_missing):
         expected = pd.Series(expected)
         tm.assert_series_equal(result, expected)
 
-    def test_dropna(self, data_missing):
-        result = pd.Series(data_missing).dropna()
-        expected = pd.Series(data_missing).iloc[[1]]
-        tm.assert_series_equal(result, expected)
-
     def test_align(self, data):
         a = data[:3]
         b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
-        # TODO: assumes that the ctor can take a list of scalars of the type
+        # Assumes that the ctor can take a list of scalars of the type
         e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
         e2 = pd.Series(type(data)([data._fill_value] + list(b)))
         tm.assert_series_equal(r1, e1)
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 237963bc38415..402c53706294b 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -50,7 +50,6 @@ def test_align(self, data):
         b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
-        # TODO: assumes that the ctor can take a list of scalars of the type
         e1 = pd.Series(type(data)(list(a) + [data._fill_value],
                                   dtype=data.dtype))
         e2 = pd.Series(type(data)([data._fill_value] + list(b),
diff --git a/pandas/tests/extension/test_json.py b/pandas/tests/extension/test_json.py
index 515272a4850f9..6d2d227a709fe 100644
--- a/pandas/tests/extension/test_json.py
+++ b/pandas/tests/extension/test_json.py
@@ -133,43 +133,3 @@ def na_cmp(self):
     @pytest.mark.skip(reason="Unhashable")
     def test_value_counts(self, all_data, dropna):
         pass
-
-    # @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    # def test_set_scalar(self):
-    #     pass
-    #
-
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_loc_scalar_mixed(self):
-        # This fails on an np.ndarary(dict) call in _setitem_with_indexer
-        pass
-
-    # @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    # def test_set_loc_scalar_single(self):
-    #     pass
-    #
-
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_loc_scalar_multiple_homogoneous(self):
-        # This fails in _setitem_with_indexer with a
-        # ValueError: Must have equal len keys and value when setting with
-        # and iterable
-        pass
-
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_iloc_scalar_mixed(self):
-        # This fails in _setitem_with_indexer with a
-        # ValueError: Must have equal len keys and value when setting with an
-        # iterable
-        pass
-
-    # @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    # def test_set_iloc_scalar_single(self):
-    #     pass
-    #
-    @pytest.mark.xfail(reason="Difficulty setting sized objects.")
-    def test_set_iloc_scalar_multiple_homogoneous(self):
-        # this fails in _setitem_with_indexer with a
-        # ValueError: Must have equal len keys and value when setting with an
-        # iterable
-        pass

From a312ba5c59c2e96854a286bde74d7fd4562afbf8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 11:10:22 -0600
Subject: [PATCH 071/119] Always set PANDAS_TESTING_MODE

---
 .travis.yml | 7 ++++---
 circle.yml  | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4cbe7f86bd2fa..0129582acdefa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,6 +20,7 @@ env:
     # cd pandas-dev/pandas
     # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas
     - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
+    - PANDAS_TESTING_MODE: "deprecate"
 
 git:
     # for cloning
@@ -55,7 +56,7 @@ matrix:
         - JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true
     - dist: trusty
       env:
-        - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true COVERAGE=true
+        - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" CONDA_FORGE=true COVERAGE=true
     # In allow_failures
     - dist: trusty
       env:
@@ -71,7 +72,7 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
+        - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network"
     # In allow_failures
     - dist: trusty
       env:
@@ -96,7 +97,7 @@ matrix:
             - xsel
       - dist: trusty
         env:
-          - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
+          - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network"
       - dist: trusty
         env:
           - JOB="3.6_ASV" ASV=true
diff --git a/circle.yml b/circle.yml
index 9d49145af54e3..dd322c80d73a0 100644
--- a/circle.yml
+++ b/circle.yml
@@ -2,6 +2,7 @@ machine:
   environment:
     # these are globally set
     MINICONDA_DIR: /home/ubuntu/miniconda3
+    PANDAS_TESTING_MODE: deprecate
 
 
 database:

From 758689feb26851a0cdddef61f7b0227c4b23ad20 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 15:44:20 -0600
Subject: [PATCH 072/119] Revert "Always set PANDAS_TESTING_MODE"

This reverts commit a312ba5c59c2e96854a286bde74d7fd4562afbf8.
---
 .travis.yml | 7 +++----
 circle.yml  | 1 -
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 0129582acdefa..4cbe7f86bd2fa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,6 @@ env:
     # cd pandas-dev/pandas
     # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas
     - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
-    - PANDAS_TESTING_MODE: "deprecate"
 
 git:
     # for cloning
@@ -56,7 +55,7 @@ matrix:
         - JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true
     - dist: trusty
       env:
-        - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" CONDA_FORGE=true COVERAGE=true
+        - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true COVERAGE=true
     # In allow_failures
     - dist: trusty
       env:
@@ -72,7 +71,7 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network"
+        - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
     # In allow_failures
     - dist: trusty
       env:
@@ -97,7 +96,7 @@ matrix:
             - xsel
       - dist: trusty
         env:
-          - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network"
+          - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
       - dist: trusty
         env:
           - JOB="3.6_ASV" ASV=true
diff --git a/circle.yml b/circle.yml
index dd322c80d73a0..9d49145af54e3 100644
--- a/circle.yml
+++ b/circle.yml
@@ -2,7 +2,6 @@ machine:
   environment:
     # these are globally set
     MINICONDA_DIR: /home/ubuntu/miniconda3
-    PANDAS_TESTING_MODE: deprecate
 
 
 database:

From 02c3d401771a308b88b1b5d98827c1bb489f223b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 16:31:18 -0600
Subject: [PATCH 073/119] Explicitly catch warnings or not

---
 pandas/tests/io/test_parquet.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 11cbea8ce6331..7434e58610a34 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -154,10 +154,21 @@ def check_round_trip(df, engine=None, path=None,
         write_kwargs['engine'] = engine
         read_kwargs['engine'] = engine
 
+    should_warn = (engine == 'pyarrow' and
+                   pyarrow.__version__ <= LooseVersion("0.8.0") and
+                   any(pd.api.types.is_datetime64tz_dtype(dtype)
+                       for dtype in df.dtypes))
+
+    if should_warn:
+        warning_type = DeprecationWarning
+    else:
+        warning_type = None
+
     def compare(repeat):
         for _ in range(repeat):
             df.to_parquet(path, **write_kwargs)
-            with catch_warnings(record=True):
+            with tm.assert_produces_warning(warning_type,
+                                            check_stacklevel=False):
                 actual = read_parquet(path, **read_kwargs)
             tm.assert_frame_equal(expected, actual,
                                   check_names=check_names)

From 9e17037cfb914f715a136df19995e98aa4449ede Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 17:08:50 -0600
Subject: [PATCH 074/119] fastparquet warnings

---
 pandas/tests/io/test_parquet.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 7434e58610a34..01446962dccef 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -154,12 +154,19 @@ def check_round_trip(df, engine=None, path=None,
         write_kwargs['engine'] = engine
         read_kwargs['engine'] = engine
 
-    should_warn = (engine == 'pyarrow' and
-                   pyarrow.__version__ <= LooseVersion("0.8.0") and
-                   any(pd.api.types.is_datetime64tz_dtype(dtype)
-                       for dtype in df.dtypes))
-
-    if should_warn:
+    if (engine == 'pyarrow' and
+            pyarrow.__version__ <= LooseVersion("0.8.0") and
+            any(pd.api.types.is_datetime64tz_dtype(dtype)
+                for dtype in df.dtypes)):
+        # Use of deprecated fastpath in make_block
+        warning_type = DeprecationWarning
+    elif (engine == 'fastparquet' and
+          fastparquet.__version__ <= LooseVersion("0.1.4") and
+          df.select_dtypes(['bool', 'object'])
+            .isin([True, False]).any().any()
+          and (path is None or not path.startswith('s3://'))):
+        # use of deprecated np.fromstring for boolean columns
+        # https://github.com/dask/fastparquet/issues/302
         warning_type = DeprecationWarning
     else:
         warning_type = None

From 4599db453346e09ffdb84c5289e343d79213aed0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 14 Feb 2018 06:31:45 -0600
Subject: [PATCH 075/119] Unicode literals strikes again.

Only catch fp warning for newer numpy
---
 pandas/tests/io/test_parquet.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 01446962dccef..b7ee42d6d66f5 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -155,17 +155,23 @@ def check_round_trip(df, engine=None, path=None,
         read_kwargs['engine'] = engine
 
     if (engine == 'pyarrow' and
-            pyarrow.__version__ <= LooseVersion("0.8.0") and
+            LooseVersion(pyarrow.__version__) <= LooseVersion("0.8.0") and
             any(pd.api.types.is_datetime64tz_dtype(dtype)
                 for dtype in df.dtypes)):
         # Use of deprecated fastpath in make_block
+        # Deprecated in pandas 0.23 and removed in pyarrow 0.9
+        # Remove this when all pyarrow builds >= 0.9
         warning_type = DeprecationWarning
     elif (engine == 'fastparquet' and
-          fastparquet.__version__ <= LooseVersion("0.1.4") and
+          LooseVersion(fastparquet.__version__) <= LooseVersion("0.1.4") and
+          LooseVersion(np.__version__) >= LooseVersion("1.14.0") and
           df.select_dtypes(['bool', 'object'])
-            .isin([True, False]).any().any()
-          and (path is None or not path.startswith('s3://'))):
+            .isin([True, False]).any().any() and
+          (path is None or not path.startswith('s3://'))):
         # use of deprecated np.fromstring for boolean columns
+        # Deprecated in numpy 1.14
+        # Used in fastparquet <= 0.1.4
+        # Remove when all fastparquet builds >= 0.1.5
         # https://github.com/dask/fastparquet/issues/302
         warning_type = DeprecationWarning
     else:

From d34d9cadd8526adf06dda9ff53b2104a13530d4e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 14 Feb 2018 13:33:10 -0600
Subject: [PATCH 076/119] Restore circle env var

---
 circle.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/circle.yml b/circle.yml
index 9d49145af54e3..dd322c80d73a0 100644
--- a/circle.yml
+++ b/circle.yml
@@ -2,6 +2,7 @@ machine:
   environment:
     # these are globally set
     MINICONDA_DIR: /home/ubuntu/miniconda3
+    PANDAS_TESTING_MODE: deprecate
 
 
 database:

From 29d252827514f5c14433f8b874a5a41d5a22372f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 14 Feb 2018 14:55:54 -0600
Subject: [PATCH 077/119] More parquet test catching

---
 pandas/tests/io/test_parquet.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index b7ee42d6d66f5..4ba7336a986e4 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -166,14 +166,19 @@ def check_round_trip(df, engine=None, path=None,
           LooseVersion(fastparquet.__version__) <= LooseVersion("0.1.4") and
           LooseVersion(np.__version__) >= LooseVersion("1.14.0") and
           df.select_dtypes(['bool', 'object'])
-            .isin([True, False]).any().any() and
-          (path is None or not path.startswith('s3://'))):
+            .isin([True, False]).any().any()):
         # use of deprecated np.fromstring for boolean columns
         # Deprecated in numpy 1.14
         # Used in fastparquet <= 0.1.4
         # Remove when all fastparquet builds >= 0.1.5
         # https://github.com/dask/fastparquet/issues/302
         warning_type = DeprecationWarning
+    elif (engine == 'fastparquet' and
+          LooseVersion(fastparquet.__version__) <= LooseVersion("0.1.4") and
+          any(pd.api.types.is_bool_dtype(df[col]) for col in df.columns)):
+        # Use of deprecated `dtype` in `make_block` that's hit only for
+        # bool dtypes with no Nones.
+        warning_type = DeprecationWarning
     else:
         warning_type = None
 
@@ -248,7 +253,16 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
     with tm.ensure_clean() as path:
         df.to_parquet(path, engine=pa, compression=None)
 
-        result = read_parquet(path, engine=fp)
+        if (LooseVersion(fastparquet.__version__) <= LooseVersion('0.1.4') and
+                LooseVersion(np.__version__) >= LooseVersion('1.14.0')):
+            # fastparquet used np.fromstring, deprecated in numpy 1.14.0
+            expected_warning = DeprecationWarning
+        else:
+            expected_warning = None
+
+        with tm.assert_produces_warning(expected_warning):
+            result = read_parquet(path, engine=fp)
+
         tm.assert_frame_equal(result, df)
 
         result = read_parquet(path, engine=fp, columns=['a', 'd'])

From 412c951c6e9e2af49866da2ce5f3cf9015bf88a7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 14 Feb 2018 16:55:31 -0600
Subject: [PATCH 078/119] No stacklevel

---
 pandas/tests/io/test_parquet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 4ba7336a986e4..9ba2c92844995 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -260,7 +260,8 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
         else:
             expected_warning = None
 
-        with tm.assert_produces_warning(expected_warning):
+        with tm.assert_produces_warning(expected_warning,
+                                        check_stacklevel=False):
             result = read_parquet(path, engine=fp)
 
         tm.assert_frame_equal(result, df)

From 78834f1c165a2a7ffec5a06abc8972bb5631390c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Feb 2018 06:58:51 -0600
Subject: [PATCH 079/119] Lower bound on FP

---
 pandas/tests/io/test_parquet.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 9ba2c92844995..5c2553979cc6d 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -154,6 +154,15 @@ def check_round_trip(df, engine=None, path=None,
         write_kwargs['engine'] = engine
         read_kwargs['engine'] = engine
 
+    fastparquet_make_block_dtype = (
+        # Use of deprecated `dtype` in `make_block` that's hit only for
+        # bool dtypes with no Nones.
+        engine == 'fastparquet' and
+        LooseVersion("0.1.1") < LooseVersion(fastparquet.__version__) <=
+        LooseVersion("0.1.4") and
+        any(pd.api.types.is_bool_dtype(df[col]) for col in df.columns)
+    )
+
     if (engine == 'pyarrow' and
             LooseVersion(pyarrow.__version__) <= LooseVersion("0.8.0") and
             any(pd.api.types.is_datetime64tz_dtype(dtype)
@@ -173,11 +182,7 @@ def check_round_trip(df, engine=None, path=None,
         # Remove when all fastparquet builds >= 0.1.5
         # https://github.com/dask/fastparquet/issues/302
         warning_type = DeprecationWarning
-    elif (engine == 'fastparquet' and
-          LooseVersion(fastparquet.__version__) <= LooseVersion("0.1.4") and
-          any(pd.api.types.is_bool_dtype(df[col]) for col in df.columns)):
-        # Use of deprecated `dtype` in `make_block` that's hit only for
-        # bool dtypes with no Nones.
+    elif fastparquet_make_block_dtype:
         warning_type = DeprecationWarning
     else:
         warning_type = None

From f8eac55e3f1ece94bc4a173cd84874faeb73fc5a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Feb 2018 08:27:22 -0600
Subject: [PATCH 080/119] Exact bound for FP

---
 pandas/tests/io/test_parquet.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 5c2553979cc6d..69b651839f80a 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -158,8 +158,7 @@ def check_round_trip(df, engine=None, path=None,
         # Use of deprecated `dtype` in `make_block` that's hit only for
         # bool dtypes with no Nones.
         engine == 'fastparquet' and
-        LooseVersion("0.1.1") < LooseVersion(fastparquet.__version__) <=
-        LooseVersion("0.1.4") and
+        LooseVersion(fastparquet.__version__) == LooseVersion("0.1.4") and
         any(pd.api.types.is_bool_dtype(df[col]) for col in df.columns)
     )
 
@@ -171,6 +170,9 @@ def check_round_trip(df, engine=None, path=None,
         # Deprecated in pandas 0.23 and removed in pyarrow 0.9
         # Remove this when all pyarrow builds >= 0.9
         warning_type = DeprecationWarning
+    # elif (engine == 'fastparquet' and
+    #         LooseVersion(fastparquet.__version__) == LooseVersion('0.1.3')):
+    #     warning_type = DeprecationWarning
     elif (engine == 'fastparquet' and
           LooseVersion(fastparquet.__version__) <= LooseVersion("0.1.4") and
           LooseVersion(np.__version__) >= LooseVersion("1.14.0") and

From f09c86334493cfe57b994547f3fdacb2afbc9f4c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Feb 2018 10:37:44 -0600
Subject: [PATCH 081/119] Don't use fastpath for ExtensionBlock make_block

---
 pandas/core/internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index e48c4202a7da8..b42138343de19 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -4867,7 +4867,7 @@ def form_blocks(arrays, names, axes):
         for i, _, array in items_dict['ExtensionBlock']:
             external_blocks.append(
                 make_block(array, klass=ExtensionBlock,
-                           fastpath=True, placement=[i]))
+                           placement=[i]))
         blocks.extend(external_blocks)
 
     if len(extra_locs):

From cedb63d5b53c80c13d712dd6152acaefb5622801 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 09:12:04 -0600
Subject: [PATCH 082/119] Consistently use _values

---
 pandas/core/algorithms.py | 2 +-
 pandas/core/series.py     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 427ec5af270bb..099a1411ebae7 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -546,7 +546,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
         if is_extension_array_dtype(values) or is_sparse(values):
 
             # handle Categorical and sparse,
-            result = Series(values).values.value_counts(dropna=dropna)
+            result = Series(values)._values.value_counts(dropna=dropna)
             result.name = name
             counts = result.values
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9e36b95c95120..b8338dcfdec63 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2568,8 +2568,8 @@ def _reindex_indexer(self, new_index, indexer, copy):
             return self
 
         # be subclass-friendly
-        if isinstance(self.values, ExtensionArray):
-            new_values = self.values.take(indexer)
+        if isinstance(self._values, ExtensionArray):
+            new_values = self._values.take(indexer)
         else:
             new_values = algorithms.take_1d(self.get_values(), indexer)
 

From cae2c26c0c17b91c466d94d0ce7e0483598ad8fc Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 09:30:24 -0600
Subject: [PATCH 083/119] TST: Additional constructor tests

---
 pandas/tests/extension/base.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index 51d9da1fe8bab..3639c042a9336 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -133,8 +133,13 @@ def test_series_constructor(self, data):
         assert isinstance(result._data.blocks[0], ExtensionBlock)
         assert result._data.blocks[0].values is data
 
+        # Series[EA] is unboxed / boxed correctly
+        result2 = pd.Series(result)
+        assert result2.dtype == data.dtype
+        assert isinstance(result2._data.blocks[0], ExtensionBlock)
+
     @pytest.mark.parametrize("from_series", [True, False])
-    def dataframe_constructor(self, data, from_series):
+    def test_dataframe_constructor_from_dict(self, data, from_series):
         if from_series:
             data = pd.Series(data)
         result = pd.DataFrame({"A": data})
@@ -142,6 +147,12 @@ def dataframe_constructor(self, data, from_series):
         assert result.shape == (len(data), 1)
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
+    def test_dataframe_from_series(self, data):
+        result = pd.DataFrame(pd.Series(data))
+        assert result.dtypes[0] == data.dtype
+        assert result.shape == (len(data), 1)
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+
     @pytest.mark.xfail(reason="GH-19342")
     def test_series_given_mismatched_index_raises(self, data):
         msg = 'Wrong number of items passed 3, placement implies 4'

From 808809612512fcfe960f081be05096ab5f679486 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 09:30:54 -0600
Subject: [PATCH 084/119] CLN: de-nested a bit

---
 pandas/core/dtypes/missing.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 002839af6daf2..170fd518f55a6 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -137,22 +137,20 @@ def _isna_ndarraylike(obj):
         else:
             values = obj
         result = values.isna()
+    elif is_interval_dtype(values):
+         # TODO(IntervalArray): remove this if block
+         from pandas import IntervalIndex
+         result = IntervalIndex(obj).isna()
     elif is_string_dtype(dtype):
-        if is_interval_dtype(values):
-            # TODO(IntervalArray): remove this if block
-            from pandas import IntervalIndex
-            result = IntervalIndex(obj).isna()
-        else:
-
-            # Working around NumPy ticket 1542
-            shape = values.shape
+        # Working around NumPy ticket 1542
+        shape = values.shape
 
-            if is_string_like_dtype(dtype):
-                result = np.zeros(values.shape, dtype=bool)
-            else:
-                result = np.empty(shape, dtype=bool)
-                vec = libmissing.isnaobj(values.ravel())
-                result[...] = vec.reshape(shape)
+        if is_string_like_dtype(dtype):
+            result = np.zeros(values.shape, dtype=bool)
+        else:
+            result = np.empty(shape, dtype=bool)
+            vec = libmissing.isnaobj(values.ravel())
+            result[...] = vec.reshape(shape)
 
     elif needs_i8_conversion(obj):
         # this is the NaT pattern

From 8aed325b43e3f2c11cf58504b9755c7a836923a2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 13:38:46 -0600
Subject: [PATCH 085/119] _fill_value handling

---
 pandas/core/arrays/base.py               | 24 ++++++++++++-----------
 pandas/core/arrays/categorical.py        |  2 +-
 pandas/core/internals.py                 | 11 +++++------
 pandas/tests/categorical/test_missing.py | 25 ++++++++++++++++++++++--
 4 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index e9d56a0e95461..9e416eb8d064c 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -27,7 +27,7 @@ class ExtensionArray(object):
     * copy
     * _concat_same_type
 
-    Some additional methods are required to satisfy pandas' internal, private
+    Some additional methods are available to satisfy pandas' internal, private
     block API.
 
     * _can_hold_na
@@ -98,16 +98,17 @@ def __setitem__(self, key, value):
             When called from, e.g. ``Series.__setitem__``, ``key`` will
             always be an ndarray of integers.
         value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
-            ExtensionArrays may
+            value or values to be set of ``key``.
 
         Notes
         -----
         This method is not required to satisfy the interface. If an
         ExtensionArray chooses to implement __setitem__, then some semantics
-        should be observed.
+        should be observed:
 
         * Setting multiple values : ExtensionArrays should support setting
-          multiple values at once, ``key`` will be a sequence of integers.
+          multiple values at once, ``key`` will be a sequence of integers and
+          ``value`` will be a same-length sequence.
 
         * Broadcasting : For a sequence ``key`` and a scalar ``value``,
           each position in ``key`` should be set to ``value``.
@@ -116,9 +117,6 @@ def __setitem__(self, key, value):
           example, a string like ``'2018-01-01'`` is coerced to a datetime
           when setting on a datetime64ns array. In general, if the
         ``__init__`` method coerces that value, then so should ``__setitem__``.
-
-        When called from, e.g. ``Series.__setitem__``, ``key`` will always
-        be an ndarray of positions.
         """
         raise NotImplementedError(_not_implemented_message.format(
             type(self), '__setitem__')
@@ -240,8 +238,8 @@ def take(self, indexer, allow_fill=True, fill_value=None):
             will be done. This short-circuits computation of a mask. Result is
             undefined if allow_fill == False and -1 is present in indexer.
         fill_value : any, default None
-            Fill value to replace -1 values with. By default, this uses
-            the missing value sentinel for this type, ``self._fill_value``.
+            Fill value to replace -1 values with. If applicable, this should
+            use the sentinel missing value for this type.
 
         Notes
         -----
@@ -262,7 +260,7 @@ def take(self, indexer, allow_fill=True, fill_value=None):
            def take(self, indexer, allow_fill=True, fill_value=None):
                mask = indexer == -1
                result = self.data.take(indexer)
-               result[mask] = self._fill_value
+               result[mask] = self._fill_value  # NA for this type
                return type(self)(result)
 
         See Also
@@ -292,7 +290,11 @@ def copy(self, deep=False):
     @property
     def _fill_value(self):
         # type: () -> Any
-        """The missing value for this type, e.g. np.nan"""
+        """The missing value for this type, e.g. np.nan. Default None.
+
+        This is not currently used by pandas directly. It is used in the
+        provided test suite for extension arrays.
+        """
         return None
 
     def _formatting_values(self):
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d1b231b21f496..784844256d79d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2143,7 +2143,7 @@ def _can_hold_na(self):
 
     @property
     def _fill_value(self):
-        return np.nan
+        return self.categories._na_value
 
     @classmethod
     def _concat_same_type(self, to_concat):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index b42138343de19..7127da1ace5cb 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1878,7 +1878,7 @@ def _holder(self):
 
     @property
     def _can_hold_na(self):
-        # The default ExtensionBlock._can_hold_na is True
+        # The default ExtensionArray._can_hold_na is True
         return self._holder._can_hold_na
 
     @property
@@ -4862,12 +4862,11 @@ def form_blocks(arrays, names, axes):
 
     if len(items_dict['ExtensionBlock']):
 
-        external_blocks = []
+        external_blocks = [
+            make_block(array, klass=ExtensionBlock, placement=[i])
+            for i, _, array in items_dict['ExtensionBlock']
+        ]
 
-        for i, _, array in items_dict['ExtensionBlock']:
-            external_blocks.append(
-                make_block(array, klass=ExtensionBlock,
-                           placement=[i]))
         blocks.extend(external_blocks)
 
     if len(extra_locs):
diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py
index 79758dee5cfda..061f7f91faae9 100644
--- a/pandas/tests/categorical/test_missing.py
+++ b/pandas/tests/categorical/test_missing.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
-
 import numpy as np
+import pytest
 
 import pandas.util.testing as tm
-from pandas import (Categorical, Index, isna)
+from pandas import (Categorical, Index, DatetimeIndex, isna, NaT,
+                    TimedeltaIndex)
 from pandas.compat import lrange
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
@@ -53,3 +54,23 @@ def test_set_item_nan(self):
 
         exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
         tm.assert_categorical_equal(cat, exp)
+
+    @pytest.mark.parametrize('arr', [
+        DatetimeIndex(['2017', '2018']),
+        DatetimeIndex(['2017', '2018'], tz='US/Central'),
+        DatetimeIndex(['2017', '2018'], tz='US/Central'),
+        TimedeltaIndex(['10s', '201s']),
+    ])
+    def test_fill_value_nat(self, arr):
+        cat = Categorical(arr)
+        assert cat._fill_value is NaT
+
+    @pytest.mark.parametrize('arr', [
+        [0, 1],
+        [True, False],
+        ['a', 'b'],
+        [0.0, 1.0],
+    ])
+    def test_fill_value_nan(self, arr):
+        cat = Categorical(arr)
+        assert isna(cat._fill_value)

From 453728a09e38380dfc9cecc1b9a6c68a8d4f1384 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 14:10:42 -0600
Subject: [PATCH 086/119] Handle user provided dtype in constructors.

When the dtype matches, we allow it to proceed.

When the dtype would require coercion, we raise.
---
 pandas/core/series.py                  | 21 ++++++++++----
 pandas/tests/extension/test_decimal.py | 38 ++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index b8338dcfdec63..9e98908f601c8 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -21,6 +21,7 @@
     is_integer, is_integer_dtype,
     is_float_dtype,
     is_extension_type,
+    is_extension_array_dtype,
     is_datetime64tz_dtype,
     is_timedelta64_dtype,
     is_list_like,
@@ -208,13 +209,15 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                                          '`data` argument and a different '
                                          '`index` argument.  `copy` must '
                                          'be False.')
-            elif isinstance(data, Categorical):
+
+            elif is_extension_array_dtype(data) and dtype is not None:
                 # GH12574: Allow dtype=category only, otherwise error
-                if ((dtype is not None) and
-                        not is_categorical_dtype(dtype)):
-                    raise ValueError("cannot specify a dtype with a "
-                                     "Categorical unless "
-                                     "dtype='category'")
+                if not data.dtype.is_dtype(dtype):
+                    raise ValueError("Cannot specify a dtype '{}' with an "
+                                     "extension array of a different "
+                                     "dtype ('{}').".format(dtype,
+                                                            data.dtype))
+
             elif (isinstance(data, types.GeneratorType) or
                   (compat.PY3 and isinstance(data, map))):
                 data = list(data)
@@ -3206,6 +3209,12 @@ def _try_cast(arr, take_fast_path):
     elif isinstance(data, ExtensionArray):
         subarr = data
 
+        if dtype is not None and not data.dtype.is_dtype(dtype):
+            msg = ("Cannot coerce extension array to dtype '{typ}'. "
+                   "Do the coercion before passing to the constructor "
+                   "instead.".format(typ=dtype))
+            raise ValueError(msg)
+
         if copy:
             subarr = data.copy()
         return subarr
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index 687e645825a75..62f7966455b48 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -141,3 +141,41 @@ def test_value_counts(self, all_data, dropna):
         expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
 
         tm.assert_series_equal(result, expected)
+
+
+def test_series_constructor_with_dtype_coercion_raises():
+    xpr = ("Cannot coerce data to extension dtype 'decimal'. Pass the "
+           "extension array for 'decimal' directly instead.")
+    with tm.assert_raises_regex(ValueError, xpr):
+        pd.Series([0, 1, 2], dtype=DecimalDtype())
+
+
+def test_series_constructor_with_same_dtype_ok():
+    arr = DecimalArray([decimal.Decimal('10.0')])
+    result = pd.Series(arr, dtype=DecimalDtype())
+    expected = pd.Series(arr)
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_constructor_with_different_dtype_raises():
+    arr = DecimalArray([decimal.Decimal('10.0')])
+    xpr = "Cannot specify a dtype 'int64' .* \('decimal'\)."
+
+    with tm.assert_raises_regex(ValueError, xpr):
+        pd.Series(arr, dtype='int64')
+
+
+def test_dataframe_constructor_with_same_dtype_ok():
+    arr = DecimalArray([decimal.Decimal('10.0')])
+
+    result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
+    expected = pd.DataFrame({"A": arr})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dataframe_constructor_with_different_dtype_raises():
+    arr = DecimalArray([decimal.Decimal('10.0')])
+
+    xpr = "Cannot coerce extension array to dtype 'int64'. "
+    with tm.assert_raises_regex(ValueError, xpr):
+        pd.DataFrame({"A": arr}, dtype='int64')

From cc13c8dd66513dce6cee9e69bb9623579c120cee Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 14:22:25 -0600
Subject: [PATCH 087/119] Document ExtensionBlock._maybe_coerce_values

Also changes to use _values as we should
---
 pandas/core/internals.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7127da1ace5cb..986a6674128be 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1865,10 +1865,21 @@ def __init__(self, values, placement, ndim=None):
         super(ExtensionBlock, self).__init__(values, placement, ndim)
 
     def _maybe_coerce_values(self, values):
-        # Unboxes Series / Index
-        # Doesn't change any underlying dtypes.
+        """Unbox to an extension array.
+
+        This will unbox an ExtensionArray stored in an Index or Series.
+        ExtensionArrays pass through. No dtype coercion is done.
+
+        Parameters
+        ----------
+        values : Index, Series, ExtensionArray
+
+        Returns
+        -------
+        ExtensionArray
+        """
         if isinstance(values, (ABCIndexClass, ABCSeries)):
-            values = values.values
+            values = values._values
         return values
 
     @property

From f90ac0732915d40f213f7a9571606acfafec355b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 14:56:21 -0600
Subject: [PATCH 088/119] Created ABCExtensionArray

---
 pandas/core/arrays/base.py    | 1 +
 pandas/core/dtypes/base.py    | 2 +-
 pandas/core/dtypes/generic.py | 2 ++
 pandas/core/dtypes/missing.py | 7 +++----
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 9e416eb8d064c..8db518552559f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -54,6 +54,7 @@ class ExtensionArray(object):
     the class, i.e. ``ExtensionArray(extension_array)`` should return
     an instance, not error.
     """
+    _typ = 'extension'  # For pandas.core.dtypes.generic.ABCExtensionArray
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 2f071a3b3cf71..17d375e67808b 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -122,7 +122,7 @@ def is_dtype(cls, dtype):
         1. ``cls.construct_from_string(dtype)`` is an instance
            of ``cls``.
         2. ``dtype`` is an object and is an instance of ``cls``
-        3. 'dtype' is a class and is ``cls`` or a subclass of ``cls``.
+        3. ``dtype`` is a class and is ``cls`` or a subclass of ``cls``.
         """
         if isinstance(dtype, str):
             try:
diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py
index b032cb6f14d4c..b841322bf93e1 100644
--- a/pandas/core/dtypes/generic.py
+++ b/pandas/core/dtypes/generic.py
@@ -57,6 +57,8 @@ def _check(cls, inst):
 ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ",
                                        ("dateoffset",))
 ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval", ))
+ABCExtensionArray = create_pandas_abc_type("ABCExtensionArray", "_typ",
+                                           ("extension",))
 
 
 class _ABCGeneric(type):
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 170fd518f55a6..b4a05a24aabc9 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -5,7 +5,8 @@
 from pandas._libs import lib, missing as libmissing
 from pandas._libs.tslib import NaT, iNaT
 from .generic import (ABCMultiIndex, ABCSeries,
-                      ABCIndexClass, ABCGeneric)
+                      ABCIndexClass, ABCGeneric,
+                      ABCExtensionArray)
 from .common import (is_string_dtype, is_datetimelike,
                      is_datetimelike_v_numeric, is_float_dtype,
                      is_datetime64_dtype, is_datetime64tz_dtype,
@@ -53,15 +54,13 @@ def isna(obj):
 
 
 def _isna_new(obj):
-    from ..arrays import ExtensionArray
-
     if is_scalar(obj):
         return libmissing.checknull(obj)
     # hack (for now) because MI registers as ndarray
     elif isinstance(obj, ABCMultiIndex):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
-                          ExtensionArray)):
+                          ABCExtensionArray)):
         return _isna_ndarraylike(obj)
     elif isinstance(obj, ABCGeneric):
         return obj._constructor(obj._data.isna(func=isna))

From 4a03b26170b39b41727a64d6cadd43652685ce01 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 14:56:44 -0600
Subject: [PATCH 089/119] TST: Tests for is_object_dtype and is_string_dtype
 and EAs

---
 pandas/tests/extension/base.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index 3639c042a9336..eb55c2c8c28f6 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -43,6 +43,12 @@ def test_is_dtype_from_self(self, dtype):
         result = type(dtype).is_dtype(dtype)
         assert result is True
 
+    def test_is_not_string_type(self, dtype):
+        return not pd.api.types.is_string_dtype(dtype)
+
+    def test_is_not_object_type(self, dtype):
+        return not pd.api.types.is_object_dtype(dtype)
+
 
 class BaseArrayTests(object):
     """Base class for extension array classes.

From 635223fc006a3ade4a3e8cdb01b51043ebb74983 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 21:03:49 -0600
Subject: [PATCH 090/119] fixup! Handle user provided dtype in constructors.

---
 pandas/core/series.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9e98908f601c8..7e98ff4deb5d7 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3152,6 +3152,13 @@ def _sanitize_array(data, index, dtype=None, copy=False,
     if dtype is not None:
         dtype = pandas_dtype(dtype)
 
+    if is_extension_array_dtype(dtype) and not is_extension_array_dtype(data):
+        # Just check for any extension dtype data here. We validatate that
+        # the exact types match later.
+        raise ValueError("Cannot coerce data to extension dtype '{type}'. "
+                         "Pass the extension array for '{type}' "
+                         "directly instead.".format(type=dtype))
+
     if isinstance(data, ma.MaskedArray):
         mask = ma.getmaskarray(data)
         if mask.any():

From cf423a72b9d345d69c22bc8e91a3fe20845ea0d8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 21:34:55 -0600
Subject: [PATCH 091/119] Doc for setitem

---
 pandas/core/arrays/base.py | 10 ++++++++--
 pandas/core/frame.py       |  2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 8db518552559f..3d32adbc35f53 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -96,8 +96,14 @@ def __setitem__(self, key, value):
         Parameters
         ----------
         key : int or ndarray
-            When called from, e.g. ``Series.__setitem__``, ``key`` will
-            always be an ndarray of integers.
+            When called from, e.g. ``Series.__setitem__``, ``key`` will be
+            one of
+
+            * scalar int
+            * ndarray of integers.
+            * boolean ndarray
+            * slice object
+
         value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
             value or values to be set of ``key``.
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 86b6405a2617a..5eb729ffd77b1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5638,6 +5638,8 @@ def count(self, axis=0, level=None, numeric_only=False):
             result = Series(0, index=frame._get_agg_axis(axis))
         else:
             if frame._is_mixed_type or frame._data.any_extension_types:
+                # the or any_extension_types is really only hit for single-
+                # column frames with an extension array
                 result = notna(frame).sum(axis=axis)
             else:
                 counts = notna(frame.values).sum(axis=axis)

From 2d1a66c4ff44ba88210533e0e281c6fb411ef9fe Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 22:07:20 -0600
Subject: [PATCH 092/119] Split base tests

---
 pandas/tests/extension/base.py             | 243 +++++----------------
 pandas/tests/extension/conftest.py         |  41 ++++
 pandas/tests/extension/test_categorical.py |  64 ++++--
 pandas/tests/extension/test_decimal.py     |  54 +++--
 pandas/tests/extension/test_json.py        |  60 +++--
 5 files changed, 214 insertions(+), 248 deletions(-)
 create mode 100644 pandas/tests/extension/conftest.py

diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index eb55c2c8c28f6..fb60b79e75ac5 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -1,5 +1,3 @@
-import operator
-
 import numpy as np
 import pytest
 
@@ -14,11 +12,6 @@
 class BaseDtypeTests(object):
     """Base class for ExtensionDtype classes"""
 
-    @pytest.fixture
-    def dtype(self):
-        """A fixture providing the ExtensionDtype to validate."""
-        raise NotImplementedError
-
     def test_name(self, dtype):
         assert isinstance(dtype.name, str)
 
@@ -50,46 +43,8 @@ def test_is_not_object_type(self, dtype):
         return not pd.api.types.is_object_dtype(dtype)
 
 
-class BaseArrayTests(object):
-    """Base class for extension array classes.
-
-    Subclasses should implement the following fixtures
-
-    * data
-    * data_missing
-    """
-
-    # ------------------------------------------------------------------------
-    # Fixtures
-    # ------------------------------------------------------------------------
-    @pytest.fixture
-    def data(self):
-        """Length-100 array for this type."""
-        raise NotImplementedError
-
-    @pytest.fixture
-    def data_missing(self):
-        """Length-2 array with [NA, Valid]"""
-        raise NotImplementedError
-
-    @pytest.fixture(params=['data', 'data_missing'])
-    def all_data(self, request, data, data_missing):
-        if request.param == 'data':
-            return data
-        elif request.param == 'data_missing':
-            return data_missing
-
-    @pytest.fixture
-    def na_cmp(self):
-        """Binary operator for comparing NA values.
-
-        Should return a function of two arguments that returns
-        True if both arguments are (scalar) NA for your type.
-
-        By defult, uses ``operator.or``
-        """
-        return operator.is_
-
+class BaseInterfaceTests(object):
+    """Tests that the basic interface is satisfied."""
     # ------------------------------------------------------------------------
     # Interface
     # ------------------------------------------------------------------------
@@ -128,9 +83,14 @@ def test_dtype_name_in_info(self, data):
         result = buf.getvalue()
         assert data.dtype.name in result
 
-    # ------------------------------------------------------------------------
-    # Constructors
-    # ------------------------------------------------------------------------
+    def test_is_extension_array_dtype(self, data):
+        assert is_extension_array_dtype(data)
+        assert is_extension_array_dtype(data.dtype)
+        assert is_extension_array_dtype(pd.Series(data))
+        assert isinstance(data.dtype, ExtensionDtype)
+
+
+class BaseConstructorsTests(object):
 
     def test_series_constructor(self, data):
         result = pd.Series(data)
@@ -167,10 +127,9 @@ def test_series_given_mismatched_index_raises(self, data):
 
         assert m.match(msg)
 
-    # ------------------------------------------------------------------------
-    # Reshaping
-    # ------------------------------------------------------------------------
 
+class BaseReshapingTests(object):
+    """Tests for reshaping and concatenation."""
     def test_concat(self, data):
         result = pd.concat([
             pd.Series(data),
@@ -180,9 +139,20 @@ def test_concat(self, data):
         assert result.dtype == data.dtype
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
-    # ------------------------------------------------------------------------
-    # Indexing - getting
-    # ------------------------------------------------------------------------
+    def test_align(self, data):
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+
+        # Assumes that the ctor can take a list of scalars of the type
+        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
+        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
+        tm.assert_series_equal(r1, e1)
+        tm.assert_series_equal(r2, e2)
+
+
+class BaseGetitemTests(object):
+    """Tests for ExtensionArray.__getitem__."""
 
     def test_iloc_series(self, data):
         ser = pd.Series(data)
@@ -246,12 +216,6 @@ def test_loc_frame(self, data):
         result = df.loc[:3, 'A']
         tm.assert_series_equal(result, expected)
 
-    def test_is_extension_array_dtype(self, data):
-        assert is_extension_array_dtype(data)
-        assert is_extension_array_dtype(data.dtype)
-        assert is_extension_array_dtype(pd.Series(data))
-        assert isinstance(data.dtype, ExtensionDtype)
-
     def test_getitem_scalar(self, data):
         result = data[0]
         assert isinstance(result, data.dtype.type)
@@ -301,107 +265,8 @@ def test_take_sequence(self, data):
         assert result.iloc[1] == data[1]
         assert result.iloc[2] == data[3]
 
-    # ------------------------------------------------------------------------
-    # Indexing - Setting
-    # ------------------------------------------------------------------------
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_setitem_scalar(self, data):
-        arr = pd.Series(data)
-        arr[0] = data[1]
-        assert arr[0] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_setitem_sequence(self, data):
-        arr = pd.Series(data)
-        original = data.copy()
-
-        arr[[0, 1]] = [data[1], data[0]]
-        assert arr[0] == original[1]
-        assert arr[1] == original[0]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_setitem_sequence_broadcasts(self, data):
-        arr = pd.Series(data)
-
-        arr[[0, 1]] = data[2]
-        assert arr[0] == data[2]
-        assert arr[1] == data[2]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    @pytest.mark.parametrize('setter', ['loc', 'iloc'])
-    def test_set_scalar(self, data, setter):
-        arr = pd.Series(data)
-        setter = getattr(arr, setter)
-        operator.setitem(setter, 0, data[1])
-        assert arr[0] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_loc_scalar_mixed(self, data):
-        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
-        df.loc[0, 'B'] = data[1]
-        assert df.loc[0, 'B'] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_loc_scalar_single(self, data):
-        df = pd.DataFrame({"B": data})
-        df.loc[10, 'B'] = data[1]
-        assert df.loc[10, 'B'] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_loc_scalar_multiple_homogoneous(self, data):
-        df = pd.DataFrame({"A": data, "B": data})
-        df.loc[10, 'B'] = data[1]
-        assert df.loc[10, 'B'] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_iloc_scalar_mixed(self, data):
-        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
-        df.iloc[0, 1] = data[1]
-        assert df.loc[0, 'B'] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_iloc_scalar_single(self, data):
-        df = pd.DataFrame({"B": data})
-        df.iloc[10, 0] = data[1]
-        assert df.loc[10, 'B'] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_iloc_scalar_multiple_homogoneous(self, data):
-        df = pd.DataFrame({"A": data, "B": data})
-        df.iloc[10, 1] = data[1]
-        assert df.loc[10, 'B'] == data[1]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_mask_aligned(self, data):
-        ser = pd.Series(data)
-        mask = np.zeros(len(data), dtype=bool)
-        mask[:2] = True
-
-        ser[mask] = data[5:7]
-        assert ser[0] == data[5]
-        assert ser[1] == data[6]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_set_mask_broadcast(self, data):
-        ser = pd.Series(data)
-        mask = np.zeros(len(data), dtype=bool)
-        mask[:2] = True
-
-        ser[mask] = data[10]
-        assert ser[0] == data[10]
-        assert ser[1] == data[10]
-
-    @pytest.mark.xfail(reason="ExtensionBlock.__setitem__ not implemented.")
-    def test_setitem_expand_columns(self, data):
-        df = pd.DataFrame({"A": data})
-        df['B'] = 1
-        assert len(df.columns) == 2
-
-    # ------------------------------------------------------------------------
-    # Methods
-    # ------------------------------------------------------------------------
 
+class BaseMissingTests(object):
     def test_isna(self, data_missing):
         if data_missing._can_hold_na:
             expected = np.array([True, False])
@@ -415,36 +280,6 @@ def test_isna(self, data_missing):
         expected = pd.Series(expected)
         tm.assert_series_equal(result, expected)
 
-    def test_align(self, data):
-        a = data[:3]
-        b = data[2:5]
-        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
-
-        # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
-        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
-        tm.assert_series_equal(r1, e1)
-        tm.assert_series_equal(r2, e2)
-
-    @pytest.mark.parametrize('dropna', [True, False])
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-
-        tm.assert_series_equal(result, expected)
-
-    def test_count(self, data_missing):
-        df = pd.DataFrame({"A": data_missing})
-        result = df.count(axis='columns')
-        expected = pd.Series([0, 1])
-        tm.assert_series_equal(result, expected)
-
     def test_dropna_series(self, data_missing):
         ser = pd.Series(data_missing)
         result = ser.dropna()
@@ -470,3 +305,27 @@ def test_dropna_frame(self, data_missing):
         result = df.dropna()
         expected = df.iloc[:0]
         tm.assert_frame_equal(result, expected)
+
+
+class BaseMethodsTests(object):
+    """Various Series and DataFrame methods."""
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
+
+    def test_count(self, data_missing):
+        df = pd.DataFrame({"A": data_missing})
+        result = df.count(axis='columns')
+        expected = pd.Series([0, 1])
+        tm.assert_series_equal(result, expected)
+
diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
new file mode 100644
index 0000000000000..d49c6184c5494
--- /dev/null
+++ b/pandas/tests/extension/conftest.py
@@ -0,0 +1,41 @@
+import operator
+
+import pytest
+
+
+@pytest.fixture
+def dtype():
+    """A fixture providing the ExtensionDtype to validate."""
+    raise NotImplementedError
+
+
+@pytest.fixture
+def data():
+    """Length-100 array for this type."""
+    raise NotImplementedError
+
+
+@pytest.fixture
+def data_missing():
+    """Length-2 array with [NA, Valid]"""
+    raise NotImplementedError
+
+
+@pytest.fixture(params=['data', 'data_missing'])
+def all_data(request, data, data_missing):
+    if request.param == 'data':
+        return data
+    elif request.param == 'data_missing':
+        return data_missing
+
+
+@pytest.fixture
+def na_cmp():
+    """Binary operator for comparing NA values.
+
+    Should return a function of two arguments that returns
+    True if both arguments are (scalar) NA for your type.
+
+    By defult, uses ``operator.or``
+    """
+    return operator.is_
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 402c53706294b..cc22fdc953859 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -7,43 +7,46 @@
 import pandas.util.testing as tm
 from pandas.api.types import CategoricalDtype
 from pandas import Categorical
-from .base import BaseArrayTests, BaseDtypeTests
-
-
-class TestCategoricalDtype(BaseDtypeTests):
-    @pytest.fixture
-    def dtype(self):
-        return CategoricalDtype()
+from . import base
 
 
 def make_data():
     return np.random.choice(list(string.ascii_letters), size=100)
 
 
-class TestCategoricalArray(BaseArrayTests):
+@pytest.fixture
+def dtype():
+    return CategoricalDtype()
+
+
+@pytest.fixture
+def data():
+    """Length-100 PeriodArray for semantics test."""
+    return Categorical(make_data())
+
+
+@pytest.fixture
+def data_missing():
+    """Length 2 array with [NA, Valid]"""
+    return Categorical([np.nan, 'A'])
+
 
-    @pytest.fixture
-    def data(self):
-        """Length-100 PeriodArray for semantics test."""
-        return Categorical(make_data())
+class TestDtype(base.BaseDtypeTests):
+    pass
 
-    @pytest.fixture
-    def data_missing(self):
-        """Length 2 array with [NA, Valid]"""
-        return Categorical([np.nan, 'A'])
 
+class TestInterface(base.BaseInterfaceTests):
     @pytest.mark.skip(reason="Memory usage doesn't match")
     def test_memory_usage(self):
         # Is this deliberate?
         pass
 
-    @pytest.mark.skip(reason="Backwards compatability")
-    def test_getitem_scalar(self):
-        # CategoricalDtype.type isn't "correct" since it should
-        # be a parent of the elements (object). But don't want
-        # to break things by changing.
-        pass
 
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(base.BaseReshapingTests):
     def test_align(self, data):
         # Override to pass through dtype
         a = data[:3]
@@ -57,6 +60,23 @@ def test_align(self, data):
         tm.assert_series_equal(r1, e1)
         tm.assert_series_equal(r2, e2)
 
+
+class TestGetitem(base.BaseGetitemTests):
+    @pytest.mark.skip(reason="Backwards compatability")
+    def test_getitem_scalar(self):
+        # CategoricalDtype.type isn't "correct" since it should
+        # be a parent of the elements (object). But don't want
+        # to break things by changing.
+        pass
+
+
+class TestMissing(base.BaseMissingTests):
+    pass
+
+
+class TestMethods(base.BaseMethodsTests):
+    pass
+
     @pytest.mark.skip(reason="Different value_counts semantics.")
     def test_value_counts(self, all_data, dropna):
         pass
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index 62f7966455b48..c574c9556f508 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -11,7 +11,7 @@
 from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.base import ExtensionDtype
 
-from .base import BaseDtypeTests, BaseArrayTests
+from . import base
 
 
 class DecimalDtype(ExtensionDtype):
@@ -92,27 +92,39 @@ def make_data():
     return [decimal.Decimal(random.random()) for _ in range(100)]
 
 
-class TestDecimalDtype(BaseDtypeTests):
+@pytest.fixture
+def dtype():
+    return DecimalDtype()
 
-    @pytest.fixture
-    def dtype(self):
-        return DecimalDtype()
 
+@pytest.fixture
+def data():
+    return DecimalArray(make_data())
 
-class TestDecimalArray(BaseArrayTests):
 
-    @pytest.fixture
-    def data(self):
-        return DecimalArray(make_data())
+@pytest.fixture
+def data_missing():
+    return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
 
-    @pytest.fixture
-    def data_missing(self):
-        return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
 
-    @pytest.fixture
-    def na_cmp(self):
-        return lambda x, y: x.is_nan() and y.is_nan()
+@pytest.fixture
+def na_cmp():
+    return lambda x, y: x.is_nan() and y.is_nan()
 
+
+class TestDtype(base.BaseDtypeTests):
+    pass
+
+
+class TestInterface(base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(base.BaseReshapingTests):
     def test_align(self, data):
         a = data[:3]
         b = data[2:5]
@@ -129,7 +141,17 @@ def test_align(self, data):
         assert r2[0].is_nan()
         assert e2[0].is_nan()
 
-    @pytest.mark.skip(reason="NaN Sorting")
+
+class TestGetitem(base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(base.BaseMissingTests):
+    pass
+
+
+class TestMethods(base.BaseMethodsTests):
+    @pytest.mark.xfail(reason="NaN Sorting")
     def test_value_counts(self, all_data, dropna):
         all_data = all_data[:10]
         if dropna:
diff --git a/pandas/tests/extension/test_json.py b/pandas/tests/extension/test_json.py
index 6d2d227a709fe..466b8c191b533 100644
--- a/pandas/tests/extension/test_json.py
+++ b/pandas/tests/extension/test_json.py
@@ -13,8 +13,7 @@
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.arrays import ExtensionArray
 
-from .base import BaseArrayTests, BaseDtypeTests
-
+from . import base
 
 pytestmark = pytest.mark.skipif(sys.version_info[0] == 2,
                                 reason="Py2 doesn't have a UserDict")
@@ -108,28 +107,53 @@ def make_data():
         for _ in range(random.randint(0, 10))]) for _ in range(100)]
 
 
-class TestJSONDtype(BaseDtypeTests):
-    @pytest.fixture
-    def dtype(self):
-        return JSONDtype()
+@pytest.fixture
+def dtype():
+    return JSONDtype()
+
+
+@pytest.fixture
+def data():
+    """Length-100 PeriodArray for semantics test."""
+    return JSONArray(make_data())
+
+
+@pytest.fixture
+def data_missing():
+    """Length 2 array with [NA, Valid]"""
+    return JSONArray([{}, {'a': 10}])
+
+
+@pytest.fixture
+def na_cmp():
+    return operator.eq
+
+
+class TestDtype(base.BaseDtypeTests):
+    pass
+
+
+class TestInterface(base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(base.BaseReshapingTests):
+    pass
 
 
-class TestJSON(BaseArrayTests):
+class TestGetitem(base.BaseGetitemTests):
+    pass
 
-    @pytest.fixture
-    def data(self):
-        """Length-100 PeriodArray for semantics test."""
-        return JSONArray(make_data())
 
-    @pytest.fixture
-    def data_missing(self):
-        """Length 2 array with [NA, Valid]"""
-        return JSONArray([{}, {'a': 10}])
+class TestMissing(base.BaseMissingTests):
+    pass
 
-    @pytest.fixture
-    def na_cmp(self):
-        return operator.eq
 
+class TestMethods(base.BaseMethodsTests):
     @pytest.mark.skip(reason="Unhashable")
     def test_value_counts(self, all_data, dropna):
         pass

From c849865fccbd9f35dbe91fac6c583c6d920d9990 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 22:14:46 -0600
Subject: [PATCH 093/119] Revert test_parquet changes

---
 circle.yml                      |  1 -
 pandas/tests/io/test_parquet.py | 50 ++-------------------------------
 2 files changed, 2 insertions(+), 49 deletions(-)

diff --git a/circle.yml b/circle.yml
index dd322c80d73a0..9d49145af54e3 100644
--- a/circle.yml
+++ b/circle.yml
@@ -2,7 +2,6 @@ machine:
   environment:
     # these are globally set
     MINICONDA_DIR: /home/ubuntu/miniconda3
-    PANDAS_TESTING_MODE: deprecate
 
 
 database:
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 69b651839f80a..11cbea8ce6331 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -154,46 +154,10 @@ def check_round_trip(df, engine=None, path=None,
         write_kwargs['engine'] = engine
         read_kwargs['engine'] = engine
 
-    fastparquet_make_block_dtype = (
-        # Use of deprecated `dtype` in `make_block` that's hit only for
-        # bool dtypes with no Nones.
-        engine == 'fastparquet' and
-        LooseVersion(fastparquet.__version__) == LooseVersion("0.1.4") and
-        any(pd.api.types.is_bool_dtype(df[col]) for col in df.columns)
-    )
-
-    if (engine == 'pyarrow' and
-            LooseVersion(pyarrow.__version__) <= LooseVersion("0.8.0") and
-            any(pd.api.types.is_datetime64tz_dtype(dtype)
-                for dtype in df.dtypes)):
-        # Use of deprecated fastpath in make_block
-        # Deprecated in pandas 0.23 and removed in pyarrow 0.9
-        # Remove this when all pyarrow builds >= 0.9
-        warning_type = DeprecationWarning
-    # elif (engine == 'fastparquet' and
-    #         LooseVersion(fastparquet.__version__) == LooseVersion('0.1.3')):
-    #     warning_type = DeprecationWarning
-    elif (engine == 'fastparquet' and
-          LooseVersion(fastparquet.__version__) <= LooseVersion("0.1.4") and
-          LooseVersion(np.__version__) >= LooseVersion("1.14.0") and
-          df.select_dtypes(['bool', 'object'])
-            .isin([True, False]).any().any()):
-        # use of deprecated np.fromstring for boolean columns
-        # Deprecated in numpy 1.14
-        # Used in fastparquet <= 0.1.4
-        # Remove when all fastparquet builds >= 0.1.5
-        # https://github.com/dask/fastparquet/issues/302
-        warning_type = DeprecationWarning
-    elif fastparquet_make_block_dtype:
-        warning_type = DeprecationWarning
-    else:
-        warning_type = None
-
     def compare(repeat):
         for _ in range(repeat):
             df.to_parquet(path, **write_kwargs)
-            with tm.assert_produces_warning(warning_type,
-                                            check_stacklevel=False):
+            with catch_warnings(record=True):
                 actual = read_parquet(path, **read_kwargs)
             tm.assert_frame_equal(expected, actual,
                                   check_names=check_names)
@@ -260,17 +224,7 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
     with tm.ensure_clean() as path:
         df.to_parquet(path, engine=pa, compression=None)
 
-        if (LooseVersion(fastparquet.__version__) <= LooseVersion('0.1.4') and
-                LooseVersion(np.__version__) >= LooseVersion('1.14.0')):
-            # fastparquet used np.fromstring, deprecated in numpy 1.14.0
-            expected_warning = DeprecationWarning
-        else:
-            expected_warning = None
-
-        with tm.assert_produces_warning(expected_warning,
-                                        check_stacklevel=False):
-            result = read_parquet(path, engine=fp)
-
+        result = read_parquet(path, engine=fp)
         tm.assert_frame_equal(result, df)
 
         result = read_parquet(path, engine=fp, columns=['a', 'd'])

From c3ec8226ed3bf37361f45b5760cc3a3f51ea9fc5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 22:32:24 -0600
Subject: [PATCH 094/119] API: Removed _fill_value from the interface

---
 pandas/core/arrays/base.py                 | 10 ---------
 pandas/core/arrays/categorical.py          |  4 ----
 pandas/tests/categorical/test_missing.py   | 24 +---------------------
 pandas/tests/extension/base.py             | 10 ++++-----
 pandas/tests/extension/conftest.py         |  6 ++++++
 pandas/tests/extension/test_categorical.py | 11 +++++++---
 pandas/tests/extension/test_decimal.py     | 15 +++++++++-----
 pandas/tests/extension/test_json.py        | 11 +++++++---
 8 files changed, 38 insertions(+), 53 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 3d32adbc35f53..9c7c395ef994f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -32,7 +32,6 @@ class ExtensionArray(object):
 
     * _can_hold_na
     * _formatting_values
-    * _fill_value
 
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
@@ -294,15 +293,6 @@ def copy(self, deep=False):
     # ------------------------------------------------------------------------
     # Block-related methods
     # ------------------------------------------------------------------------
-    @property
-    def _fill_value(self):
-        # type: () -> Any
-        """The missing value for this type, e.g. np.nan. Default None.
-
-        This is not currently used by pandas directly. It is used in the
-        provided test suite for extension arrays.
-        """
-        return None
 
     def _formatting_values(self):
         # type: () -> np.ndarray
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 784844256d79d..bcf9cb7646704 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2141,10 +2141,6 @@ def repeat(self, repeats, *args, **kwargs):
     def _can_hold_na(self):
         return True
 
-    @property
-    def _fill_value(self):
-        return self.categories._na_value
-
     @classmethod
     def _concat_same_type(self, to_concat):
         from pandas.core.dtypes.concat import _concat_categorical
diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py
index 061f7f91faae9..c8ac6a6ef14f8 100644
--- a/pandas/tests/categorical/test_missing.py
+++ b/pandas/tests/categorical/test_missing.py
@@ -1,10 +1,8 @@
 # -*- coding: utf-8 -*-
 import numpy as np
-import pytest
 
 import pandas.util.testing as tm
-from pandas import (Categorical, Index, DatetimeIndex, isna, NaT,
-                    TimedeltaIndex)
+from pandas import Categorical, Index, isna
 from pandas.compat import lrange
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
@@ -54,23 +52,3 @@ def test_set_item_nan(self):
 
         exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
         tm.assert_categorical_equal(cat, exp)
-
-    @pytest.mark.parametrize('arr', [
-        DatetimeIndex(['2017', '2018']),
-        DatetimeIndex(['2017', '2018'], tz='US/Central'),
-        DatetimeIndex(['2017', '2018'], tz='US/Central'),
-        TimedeltaIndex(['10s', '201s']),
-    ])
-    def test_fill_value_nat(self, arr):
-        cat = Categorical(arr)
-        assert cat._fill_value is NaT
-
-    @pytest.mark.parametrize('arr', [
-        [0, 1],
-        [True, False],
-        ['a', 'b'],
-        [0.0, 1.0],
-    ])
-    def test_fill_value_nan(self, arr):
-        cat = Categorical(arr)
-        assert isna(cat._fill_value)
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index fb60b79e75ac5..815f4a61dea7e 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -139,14 +139,14 @@ def test_concat(self, data):
         assert result.dtype == data.dtype
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
-    def test_align(self, data):
+    def test_align(self, data, na_value):
         a = data[:3]
         b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
         # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
-        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
+        e1 = pd.Series(type(data)(list(a) + [na_value]))
+        e2 = pd.Series(type(data)([na_value] + list(b)))
         tm.assert_series_equal(r1, e1)
         tm.assert_series_equal(r2, e2)
 
@@ -223,9 +223,9 @@ def test_getitem_scalar(self, data):
         result = pd.Series(data)[0]
         assert isinstance(result, data.dtype.type)
 
-    def test_getitem_scalar_na(self, data_missing, na_cmp):
+    def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
         result = data_missing[0]
-        assert na_cmp(result, data_missing._fill_value)
+        assert na_cmp(result, na_value)
 
     def test_getitem_mask(self, data):
         # Empty mask, raw array
diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
index d49c6184c5494..3e57dde385f6c 100644
--- a/pandas/tests/extension/conftest.py
+++ b/pandas/tests/extension/conftest.py
@@ -39,3 +39,9 @@ def na_cmp():
     By defult, uses ``operator.or``
     """
     return operator.is_
+
+
+@pytest.fixture
+def na_value(self):
+    """The scalar missing value for this type."""
+    return None
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index cc22fdc953859..af56afed07f4d 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -31,6 +31,11 @@ def data_missing():
     return Categorical([np.nan, 'A'])
 
 
+@pytest.fixture
+def na_value():
+    return np.nan
+
+
 class TestDtype(base.BaseDtypeTests):
     pass
 
@@ -47,15 +52,15 @@ class TestConstructors(base.BaseConstructorsTests):
 
 
 class TestReshaping(base.BaseReshapingTests):
-    def test_align(self, data):
+    def test_align(self, data, na_value):
         # Override to pass through dtype
         a = data[:3]
         b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
-        e1 = pd.Series(type(data)(list(a) + [data._fill_value],
+        e1 = pd.Series(type(data)(list(a) + [na_value],
                                   dtype=data.dtype))
-        e2 = pd.Series(type(data)([data._fill_value] + list(b),
+        e2 = pd.Series(type(data)([na_value] + list(b),
                                   dtype=data.dtype))
         tm.assert_series_equal(r1, e1)
         tm.assert_series_equal(r2, e2)
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index c574c9556f508..1758d0ed89d49 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -75,12 +75,12 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         mask = indexer == -1
 
         out = self.values.take(indexer)
-        out[mask] = self._fill_value
+        out[mask] = self._na_value
 
         return type(self)(out)
 
     @property
-    def _fill_value(self):
+    def _na_value(self):
         return decimal.Decimal('NaN')
 
     @classmethod
@@ -112,6 +112,11 @@ def na_cmp():
     return lambda x, y: x.is_nan() and y.is_nan()
 
 
+@pytest.fixture
+def na_value():
+    return decimal.Decimal("NaN")
+
+
 class TestDtype(base.BaseDtypeTests):
     pass
 
@@ -125,14 +130,14 @@ class TestConstructors(base.BaseConstructorsTests):
 
 
 class TestReshaping(base.BaseReshapingTests):
-    def test_align(self, data):
+    def test_align(self, data, na_value):
         a = data[:3]
         b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
         # NaN handling
-        e1 = pd.Series(type(data)(list(a) + [data._fill_value]))
-        e2 = pd.Series(type(data)([data._fill_value] + list(b)))
+        e1 = pd.Series(type(data)(list(a) + [na_value]))
+        e2 = pd.Series(type(data)([na_value] + list(b)))
         tm.assert_series_equal(r1.iloc[:3], e1.iloc[:3])
         assert r1[3].is_nan()
         assert e1[3].is_nan()
diff --git a/pandas/tests/extension/test_json.py b/pandas/tests/extension/test_json.py
index 466b8c191b533..547ed31de8d72 100644
--- a/pandas/tests/extension/test_json.py
+++ b/pandas/tests/extension/test_json.py
@@ -80,10 +80,10 @@ def nbytes(self):
         return sys.getsizeof(self.data)
 
     def isna(self):
-        return np.array([x == self._fill_value for x in self.data])
+        return np.array([x == self._na_value for x in self.data])
 
     def take(self, indexer, allow_fill=True, fill_value=None):
-        output = [self.data[loc] if loc != -1 else self._fill_value
+        output = [self.data[loc] if loc != -1 else self._na_value
                   for loc in indexer]
         return type(self)(output)
 
@@ -91,7 +91,7 @@ def copy(self, deep=False):
         return type(self)(self.data[:])
 
     @property
-    def _fill_value(self):
+    def _na_value(self):
         return {}
 
     @classmethod
@@ -124,6 +124,11 @@ def data_missing():
     return JSONArray([{}, {'a': 10}])
 
 
+@pytest.fixture
+def na_value():
+    return {}
+
+
 @pytest.fixture
 def na_cmp():
     return operator.eq

From f4cf45c2f122c4c81e40da7a864dbd69b70512eb Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 16 Feb 2018 22:40:09 -0600
Subject: [PATCH 095/119] Push coercion to extension dtype till later

---
 pandas/core/series.py                  | 7 -------
 pandas/tests/extension/test_decimal.py | 7 -------
 2 files changed, 14 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7e98ff4deb5d7..9e98908f601c8 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3152,13 +3152,6 @@ def _sanitize_array(data, index, dtype=None, copy=False,
     if dtype is not None:
         dtype = pandas_dtype(dtype)
 
-    if is_extension_array_dtype(dtype) and not is_extension_array_dtype(data):
-        # Just check for any extension dtype data here. We validatate that
-        # the exact types match later.
-        raise ValueError("Cannot coerce data to extension dtype '{type}'. "
-                         "Pass the extension array for '{type}' "
-                         "directly instead.".format(type=dtype))
-
     if isinstance(data, ma.MaskedArray):
         mask = ma.getmaskarray(data)
         if mask.any():
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index 1758d0ed89d49..dad2bc45c5e08 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -170,13 +170,6 @@ def test_value_counts(self, all_data, dropna):
         tm.assert_series_equal(result, expected)
 
 
-def test_series_constructor_with_dtype_coercion_raises():
-    xpr = ("Cannot coerce data to extension dtype 'decimal'. Pass the "
-           "extension array for 'decimal' directly instead.")
-    with tm.assert_raises_regex(ValueError, xpr):
-        pd.Series([0, 1, 2], dtype=DecimalDtype())
-
-
 def test_series_constructor_with_same_dtype_ok():
     arr = DecimalArray([decimal.Decimal('10.0')])
     result = pd.Series(arr, dtype=DecimalDtype())

From 9c5d47953184a21049eaa076fabbd74e36c28ce7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 06:03:52 -0600
Subject: [PATCH 096/119] Linting

---
 pandas/core/arrays/base.py     | 1 +
 pandas/core/dtypes/missing.py  | 6 +++---
 pandas/tests/extension/base.py | 1 -
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 9c7c395ef994f..ed3cb99e8a998 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -57,6 +57,7 @@ class ExtensionArray(object):
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
+
     def __getitem__(self, item):
         # type (Any) -> Any
         """Select a subset of self.
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index b4a05a24aabc9..36dbb0ee4b98f 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -137,9 +137,9 @@ def _isna_ndarraylike(obj):
             values = obj
         result = values.isna()
     elif is_interval_dtype(values):
-         # TODO(IntervalArray): remove this if block
-         from pandas import IntervalIndex
-         result = IntervalIndex(obj).isna()
+        # TODO(IntervalArray): remove this if block
+        from pandas import IntervalIndex
+        result = IntervalIndex(obj).isna()
     elif is_string_dtype(dtype):
         # Working around NumPy ticket 1542
         shape = values.shape
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index 815f4a61dea7e..40d619d1516a7 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -328,4 +328,3 @@ def test_count(self, data_missing):
         result = df.count(axis='columns')
         expected = pd.Series([0, 1])
         tm.assert_series_equal(result, expected)
-

From 1175c0dc1abc6af4b62bddfdc32f4237e736f5ad Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 06:19:38 -0600
Subject: [PATCH 097/119] ERR: Better error message for coercion to 3rd party
 dtypes

---
 pandas/core/series.py                  | 9 +++++++++
 pandas/tests/extension/test_decimal.py | 9 ++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9e98908f601c8..70f7444fae69d 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3173,8 +3173,17 @@ def _try_cast(arr, take_fast_path):
                 subarr = np.array(subarr, dtype=dtype, copy=copy)
         except (ValueError, TypeError):
             if is_categorical_dtype(dtype):
+                # We *do* allow casting to categorical, since we know
+                # that Categorical is the only array type for 'category'.
                 subarr = Categorical(arr, dtype.categories,
                                      ordered=dtype.ordered)
+            elif is_extension_array_dtype(dtype):
+                # We don't allow casting to third party dtypes, since we don't
+                # know what array belongs to which type.
+                msg = ("Cannot cast data to extension dtype '{}'. "
+                       "Pass the extension array directly.".format(dtype))
+                raise ValueError(msg)
+
             elif dtype is not None and raise_cast_failure:
                 raise
             else:
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index dad2bc45c5e08..d6ddd09d1f356 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -170,6 +170,13 @@ def test_value_counts(self, all_data, dropna):
         tm.assert_series_equal(result, expected)
 
 
+def test_series_constructor_coerce_data_to_extension_dtype_raises():
+    xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
+           "extension array directly.")
+    with tm.assert_raises_regex(ValueError, xpr):
+        pd.Series([0, 1, 2], dtype=DecimalDtype())
+
+
 def test_series_constructor_with_same_dtype_ok():
     arr = DecimalArray([decimal.Decimal('10.0')])
     result = pd.Series(arr, dtype=DecimalDtype())
@@ -177,7 +184,7 @@ def test_series_constructor_with_same_dtype_ok():
     tm.assert_series_equal(result, expected)
 
 
-def test_series_constructor_with_different_dtype_raises():
+def test_series_constructor_coerce_extension_array_to_dtype_raises():
     arr = DecimalArray([decimal.Decimal('10.0')])
     xpr = "Cannot specify a dtype 'int64' .* \('decimal'\)."
 

From c816d99b83f05abd37094f720fe0a2b04473df79 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 06:34:13 -0600
Subject: [PATCH 098/119] CLN: Make take_nd EA aware

---
 pandas/core/algorithms.py | 19 ++++++++++++++-----
 pandas/core/series.py     |  7 +------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 099a1411ebae7..20ca7eaaf2a53 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1291,10 +1291,12 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
     """
     Specialized Cython take which sets NaN values in one pass
 
+    This dispatches to ``take`` defined on ExtensionArrays.
+
     Parameters
     ----------
-    arr : ndarray
-        Input array
+    arr : ndarray, ExtensionArray, DatetimeIndex, IntervalIndex, SparseArray
+        Input array. SparseArray is densified with ``get_values``
     indexer : ndarray
         1-D array of indices to take, subarrays corresponding to -1 value
         indicies are filed with fill_value
@@ -1314,16 +1316,23 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
         If False, indexer is assumed to contain no -1 values so no filling
         will be done.  This short-circuits computation of a mask.  Result is
         undefined if allow_fill == False and -1 is present in indexer.
+
+    Returns
+    -------
+    subarray : object
+        May be the same type as the input, or cast to an ndarray.
     """
 
+    # TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs
     # dispatch to internal type takes
-    if is_categorical(arr):
-        return arr.take_nd(indexer, fill_value=fill_value,
-                           allow_fill=allow_fill)
+    if is_extension_array_dtype(arr):
+        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
     elif is_datetimetz(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
     elif is_interval_dtype(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
+    elif is_sparse(arr):
+        arr = arr.get_values()
 
     if indexer is None:
         indexer = np.arange(arr.shape[axis], dtype=np.int64)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 70f7444fae69d..0a078401fd4e9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2570,12 +2570,7 @@ def _reindex_indexer(self, new_index, indexer, copy):
                 return self.copy()
             return self
 
-        # be subclass-friendly
-        if isinstance(self._values, ExtensionArray):
-            new_values = self._values.take(indexer)
-        else:
-            new_values = algorithms.take_1d(self.get_values(), indexer)
-
+        new_values = algorithms.take_1d(self._values, indexer)
         return self._constructor(new_values, index=new_index)
 
     def _needs_reindex_multi(self, axes, method, level):

From 9c9f59ec125fa5d7aeda0be63529807daf928a7f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 13:15:52 -0600
Subject: [PATCH 099/119] Revert sparse changes

---
 pandas/core/algorithms.py | 9 ++++-----
 pandas/core/series.py     | 8 +++++++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 20ca7eaaf2a53..d22fe1e3bcb47 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -20,7 +20,7 @@
     is_period_dtype,
     is_numeric_dtype, is_float_dtype,
     is_bool_dtype, needs_i8_conversion,
-    is_categorical, is_datetimetz,
+    is_datetimetz,
     is_datetime64_any_dtype, is_datetime64tz_dtype,
     is_timedelta64_dtype, is_interval_dtype,
     is_scalar, is_list_like,
@@ -1291,12 +1291,13 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
     """
     Specialized Cython take which sets NaN values in one pass
 
-    This dispatches to ``take`` defined on ExtensionArrays.
+    This dispatches to ``take`` defined on ExtensionArrays. It does not
+    currently dispatch to ``SparseArray.take`` for sparse ``arr``.
 
     Parameters
     ----------
     arr : ndarray, ExtensionArray, DatetimeIndex, IntervalIndex, SparseArray
-        Input array. SparseArray is densified with ``get_values``
+        Input array.
     indexer : ndarray
         1-D array of indices to take, subarrays corresponding to -1 value
         indicies are filed with fill_value
@@ -1331,8 +1332,6 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
     elif is_interval_dtype(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
-    elif is_sparse(arr):
-        arr = arr.get_values()
 
     if indexer is None:
         indexer = np.arange(arr.shape[axis], dtype=np.int64)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 0a078401fd4e9..ea7bf36dc4a0a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -29,6 +29,7 @@
     is_iterator,
     is_dict_like,
     is_scalar,
+    is_sparse,
     _is_unorderable_exception,
     _ensure_platform_int,
     pandas_dtype)
@@ -2570,7 +2571,12 @@ def _reindex_indexer(self, new_index, indexer, copy):
                 return self.copy()
             return self
 
-        new_values = algorithms.take_1d(self._values, indexer)
+        if is_sparse(self):
+            arr = self.get_values()
+        else:
+            arr = self._values
+
+        new_values = algorithms.take_1d(arr, indexer)
         return self._constructor(new_values, index=new_index)
 
     def _needs_reindex_multi(self, axes, method, level):

From 08af9a36e8700e7f1aefec4f514a7dde6a04f16c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 14:12:58 -0600
Subject: [PATCH 100/119] Other _typ for ABCExtensionArray

---
 pandas/core/arrays/base.py    | 6 ++++--
 pandas/core/dtypes/generic.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index ed3cb99e8a998..887326625e2ad 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -53,7 +53,9 @@ class ExtensionArray(object):
     the class, i.e. ``ExtensionArray(extension_array)`` should return
     an instance, not error.
     """
-    _typ = 'extension'  # For pandas.core.dtypes.generic.ABCExtensionArray
+    # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
+    # Don't override this.
+    _typ = 'extension'
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
@@ -267,7 +269,7 @@ def take(self, indexer, allow_fill=True, fill_value=None):
            def take(self, indexer, allow_fill=True, fill_value=None):
                mask = indexer == -1
                result = self.data.take(indexer)
-               result[mask] = self._fill_value  # NA for this type
+               result[mask] = np.nan  # NA for this type
                return type(self)(result)
 
         See Also
diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py
index b841322bf93e1..cb54c94d29205 100644
--- a/pandas/core/dtypes/generic.py
+++ b/pandas/core/dtypes/generic.py
@@ -58,7 +58,7 @@ def _check(cls, inst):
                                        ("dateoffset",))
 ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval", ))
 ABCExtensionArray = create_pandas_abc_type("ABCExtensionArray", "_typ",
-                                           ("extension",))
+                                           ("extension", "categorical",))
 
 
 class _ABCGeneric(type):

From 2e992f7fe6780db48054c9b425448d95138174a9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 14:14:06 -0600
Subject: [PATCH 101/119] Test cleanup and expansion.

Tests for concating and aligning frames
---
 pandas/tests/extension/base.py             | 67 ++++++++++++++++++++--
 pandas/tests/extension/conftest.py         |  3 +-
 pandas/tests/extension/test_categorical.py | 19 ++----
 pandas/tests/extension/test_decimal.py     | 25 +++++++-
 4 files changed, 92 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index 40d619d1516a7..4e69123aa3cbc 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -1,3 +1,37 @@
+"""Base test suite for extension arrays.
+
+These tests are intended for third-party libraries to subclass to validate
+that their extension arrays and dtypes satisfy the interface. Moving or
+renaming the tests should not be done lightly.
+
+Libraries are expected to implement a few pytest fixtures to provide data
+for the tests. The fixtures may be located in either
+
+* The same module as your test class.
+* A ``conftest.py`` in the same directory as your test class.
+
+The full list of fixtures may be found in the ``conftest.py`` next to this
+file.
+
+.. code-block:: python
+
+   import pytest
+   from pandas.tests.extension.base import BaseDtypeTests
+
+
+   @pytest.fixture
+   def dtype():
+       return MyDtype()
+
+
+   class TestMyDtype(BaseDtypeTests):
+       pass
+
+
+Your class ``TestDtype`` will inherit all the tests defined on
+``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
+wherever the test requires it. You're free to implement additional tests.
+"""
 import numpy as np
 import pytest
 
@@ -130,13 +164,21 @@ def test_series_given_mismatched_index_raises(self, data):
 
 class BaseReshapingTests(object):
     """Tests for reshaping and concatenation."""
-    def test_concat(self, data):
-        result = pd.concat([
-            pd.Series(data),
-            pd.Series(data),
-        ], ignore_index=True)
+    @pytest.mark.parametrize('in_frame', [True, False])
+    def test_concat(self, data, in_frame):
+        wrapped = pd.Series(data)
+        if in_frame:
+            wrapped = pd.DataFrame(wrapped)
+        result = pd.concat([wrapped, wrapped], ignore_index=True)
+
         assert len(result) == len(data) * 2
-        assert result.dtype == data.dtype
+
+        if in_frame:
+            dtype = result.dtypes[0]
+        else:
+            dtype = result.dtype
+
+        assert dtype == data.dtype
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
     def test_align(self, data, na_value):
@@ -150,6 +192,19 @@ def test_align(self, data, na_value):
         tm.assert_series_equal(r1, e1)
         tm.assert_series_equal(r2, e2)
 
+    def test_align_frame(self, data, na_value):
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.DataFrame({'A': a}).align(
+            pd.DataFrame({'A': b}, index=[1, 2, 3])
+        )
+
+        # Assumes that the ctor can take a list of scalars of the type
+        e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])})
+        e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))})
+        tm.assert_frame_equal(r1, e1)
+        tm.assert_frame_equal(r2, e2)
+
 
 class BaseGetitemTests(object):
     """Tests for ExtensionArray.__getitem__."""
diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
index 3e57dde385f6c..9664d237afcb6 100644
--- a/pandas/tests/extension/conftest.py
+++ b/pandas/tests/extension/conftest.py
@@ -23,6 +23,7 @@ def data_missing():
 
 @pytest.fixture(params=['data', 'data_missing'])
 def all_data(request, data, data_missing):
+    """Parametrized fixture giving 'data' and 'data_missing'"""
     if request.param == 'data':
         return data
     elif request.param == 'data_missing':
@@ -43,5 +44,5 @@ def na_cmp():
 
 @pytest.fixture
 def na_value(self):
-    """The scalar missing value for this type."""
+    """The scalar missing value for this type. Default 'None'"""
     return None
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index af56afed07f4d..c1732653e993a 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -3,8 +3,6 @@
 import pytest
 import numpy as np
 
-import pandas as pd
-import pandas.util.testing as tm
 from pandas.api.types import CategoricalDtype
 from pandas import Categorical
 from . import base
@@ -52,18 +50,13 @@ class TestConstructors(base.BaseConstructorsTests):
 
 
 class TestReshaping(base.BaseReshapingTests):
+    @pytest.mark.skip(reason="Unobserved categories preseved in concat.")
     def test_align(self, data, na_value):
-        # Override to pass through dtype
-        a = data[:3]
-        b = data[2:5]
-        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+        pass
 
-        e1 = pd.Series(type(data)(list(a) + [na_value],
-                                  dtype=data.dtype))
-        e2 = pd.Series(type(data)([na_value] + list(b),
-                                  dtype=data.dtype))
-        tm.assert_series_equal(r1, e1)
-        tm.assert_series_equal(r2, e2)
+    @pytest.mark.skip(reason="Unobserved categories preseved in concat.")
+    def test_align_frame(self, data, na_value):
+        pass
 
 
 class TestGetitem(base.BaseGetitemTests):
@@ -82,6 +75,6 @@ class TestMissing(base.BaseMissingTests):
 class TestMethods(base.BaseMethodsTests):
     pass
 
-    @pytest.mark.skip(reason="Different value_counts semantics.")
+    @pytest.mark.skip(reason="Unobserved categories included")
     def test_value_counts(self, all_data, dropna):
         pass
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index d6ddd09d1f356..cb0e96b59b919 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -38,8 +38,6 @@ def __init__(self, values):
     def __getitem__(self, item):
         if isinstance(item, numbers.Integral):
             return self.values[item]
-        elif isinstance(item, np.ndarray) and item.dtype == 'bool':
-            return type(self)([x for x, m in zip(self, item) if m])
         else:
             return type(self)(self.values[item])
 
@@ -130,7 +128,10 @@ class TestConstructors(base.BaseConstructorsTests):
 
 
 class TestReshaping(base.BaseReshapingTests):
+
     def test_align(self, data, na_value):
+        # Have to override since assert_series_equal doesn't
+        # compare Decimal(NaN) properly.
         a = data[:3]
         b = data[2:5]
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
@@ -146,6 +147,26 @@ def test_align(self, data, na_value):
         assert r2[0].is_nan()
         assert e2[0].is_nan()
 
+    def test_align_frame(self, data, na_value):
+        # Override for Decimal(NaN) comparison
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.DataFrame({'A': a}).align(
+            pd.DataFrame({'A': b}, index=[1, 2, 3])
+        )
+
+        # Assumes that the ctor can take a list of scalars of the type
+        e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])})
+        e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))})
+
+        tm.assert_frame_equal(r1.iloc[:3], e1.iloc[:3])
+        assert r1.loc[3, 'A'].is_nan()
+        assert e1.loc[3, 'A'].is_nan()
+
+        tm.assert_frame_equal(r2.iloc[1:], e2.iloc[1:])
+        assert r2.loc[0, 'A'].is_nan()
+        assert e2.loc[0, 'A'].is_nan()
+
 
 class TestGetitem(base.BaseGetitemTests):
     pass

From cc5cc3e2e2d8e9c98a78752fd4f83e54f327c708 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 14:14:31 -0600
Subject: [PATCH 102/119] Copy if copy

---
 pandas/core/series.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index ea7bf36dc4a0a..47ea5743d2a52 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3134,6 +3134,8 @@ def _sanitize_index(data, index, copy=False):
         pass
     elif isinstance(data, (PeriodIndex, DatetimeIndex)):
         data = data._values
+        if copy:
+            data = data.copy()
 
     elif isinstance(data, np.ndarray):
 

From 704ee67378a747fd454ba8e751b5a5802c36b520 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 17 Feb 2018 19:26:48 -0600
Subject: [PATCH 103/119] TST: remove self param for fixture

---
 pandas/tests/extension/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
index 9664d237afcb6..f86849b9cbd61 100644
--- a/pandas/tests/extension/conftest.py
+++ b/pandas/tests/extension/conftest.py
@@ -43,6 +43,6 @@ def na_cmp():
 
 
 @pytest.fixture
-def na_value(self):
+def na_value():
     """The scalar missing value for this type. Default 'None'"""
     return None

From 8bf0334eaafd949756b52711a60961e6adf5e57f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 20 Feb 2018 09:15:42 -0600
Subject: [PATCH 104/119] Remove unnescessary EA handling in Series ctor

---
 pandas/core/series.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index d18cd4baa297b..ec4d914f10b0c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -243,10 +243,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                                        copy=copy)
                 elif copy:
                     data = data.copy()
-            elif isinstance(data, ExtensionArray):
-                if copy:
-                    data = data.copy()
-                data = SingleBlockManager(data, index, fastpath=True)
             else:
                 data = _sanitize_array(data, index, dtype, copy,
                                        raise_cast_failure=True)

From c8d88da0d68ee70ea7f68462e8e29da4a7c22d1e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 20 Feb 2018 09:24:48 -0600
Subject: [PATCH 105/119] API: Removed value_counts

Moved setitem notes to comment
---
 pandas/core/algorithms.py              |  2 +-
 pandas/core/arrays/base.py             | 60 ++++++++++----------------
 pandas/tests/extension/test_decimal.py |  3 +-
 3 files changed, 26 insertions(+), 39 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index d22fe1e3bcb47..2ea714fa2738c 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -543,7 +543,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
     else:
 
-        if is_extension_array_dtype(values) or is_sparse(values):
+        if is_categorical_dtype(values) or is_sparse(values):
 
             # handle Categorical and sparse,
             result = Series(values)._values.value_counts(dropna=dropna)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 887326625e2ad..0df256fa0cddd 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -95,9 +95,12 @@ def __setitem__(self, key, value):
         # type: (Union[int, np.ndarray], Any) -> None
         """Set one or more values inplace.
 
+        This method is not required to satisfy the pandas extension array
+        interface.
+
         Parameters
         ----------
-        key : int or ndarray
+        key : int, ndarray, or slice
             When called from, e.g. ``Series.__setitem__``, ``key`` will be
             one of
 
@@ -109,24 +112,26 @@ def __setitem__(self, key, value):
         value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
             value or values to be set of ``key``.
 
-        Notes
-        -----
-        This method is not required to satisfy the interface. If an
-        ExtensionArray chooses to implement __setitem__, then some semantics
-        should be observed:
-
-        * Setting multiple values : ExtensionArrays should support setting
-          multiple values at once, ``key`` will be a sequence of integers and
-          ``value`` will be a same-length sequence.
-
-        * Broadcasting : For a sequence ``key`` and a scalar ``value``,
-          each position in ``key`` should be set to ``value``.
-
-        * Coercion : Most users will expect basic coercion to work. For
-          example, a string like ``'2018-01-01'`` is coerced to a datetime
-          when setting on a datetime64ns array. In general, if the
-        ``__init__`` method coerces that value, then so should ``__setitem__``.
+        Returns
+        -------
+        None
         """
+        # Some notes to the ExtensionArray implementor who may have ended up
+        # here. While this method is not required for the interface, if you
+        # *do* choose to implement __setitem__, then some semantics should be
+        # observed:
+        #
+        # * Setting multiple values : ExtensionArrays should support setting
+        #   multiple values at once, 'key' will be a sequence of integers and
+        #  'value' will be a same-length sequence.
+        #
+        # * Broadcasting : For a sequence 'key' and a scalar 'value',
+        #   each position in 'key' should be set to 'value'.
+        #
+        # * Coercion : Most users will expect basic coercion to work. For
+        #   example, a string like '2018-01-01' is coerced to a datetime
+        #   when setting on a datetime64ns array. In general, if the
+        #   __init__ method coerces that value, then so should __setitem__
         raise NotImplementedError(_not_implemented_message.format(
             type(self), '__setitem__')
         )
@@ -211,25 +216,6 @@ def isna(self):
         """
         raise AbstractMethodError(self)
 
-    def value_counts(self, dropna=True):
-        """Compute a histogram of the counts of non-null values.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of NaN
-
-        Returns
-        -------
-        value_counts : Series
-        """
-        from pandas import value_counts
-
-        if dropna:
-            self = self[~self.isna()]
-
-        return value_counts(np.array(self))
-
     # ------------------------------------------------------------------------
     # Indexing methods
     # ------------------------------------------------------------------------
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index cb0e96b59b919..9cd19840bb274 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -177,7 +177,8 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.xfail(reason="NaN Sorting")
+    @pytest.mark.parametrize('dropna', [True, False])
+    @pytest.mark.xfail(reason="value_counts not implemented yet.")
     def test_value_counts(self, all_data, dropna):
         all_data = all_data[:10]
         if dropna:

From 24f3b6095b9da19e0a56f93e12fed2d1909c8527 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 20 Feb 2018 09:41:41 -0600
Subject: [PATCH 106/119] More doc notes

---
 pandas/core/arrays/base.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 0df256fa0cddd..b89080349058c 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -147,12 +147,12 @@ def __len__(self):
         raise AbstractMethodError(self)
 
     def __iter__(self):
-        """Iterate over elements.
+        """Iterate over elements of the array.
 
-        This needs to be implemented so that pandas recognizes extension arrays
-        as list-like. The default implementation makes successive calls to
-        ``__getitem__``, which may be slower than necessary.
         """
+        # This needs to be implemented so that pandas recognizes extension arrays
+        # as list-like. The default implementation makes successive calls to
+        # ``__getitem__``, which may be slower than necessary.
         for i in range(len(self)):
             yield self[i]
 
@@ -181,9 +181,9 @@ def nbytes(self):
         # type: () -> int
         """The number of bytes needed to store this object in memory.
 
-        If this is expensive to compute, return an approximate lower bound
-        on the number of bytes needed.
         """
+        # If this is expensive to compute, return an approximate lower bound
+        # on the number of bytes needed.
         raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------

From 50bd5dd89ee586847f317a773742730c3320ed5e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 20 Feb 2018 10:08:37 -0600
Subject: [PATCH 107/119] Handle expanding a DataFrame with an EA

---
 pandas/core/internals.py       |  8 ++++++--
 pandas/tests/extension/base.py | 12 ++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 1dccbf2e7ff96..bad0626206e80 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -59,6 +59,7 @@
 from pandas.core.dtypes.generic import (
     ABCSeries,
     ABCDatetimeIndex,
+    ABCExtensionArray,
     ABCIndexClass)
 import pandas.core.common as com
 import pandas.core.algorithms as algos
@@ -4141,7 +4142,10 @@ def set(self, item, value, check=False):
         # FIXME: refactor, clearly separate broadcasting & zip-like assignment
         #        can prob also fix the various if tests for sparse/categorical
 
-        value_is_extension_type = is_extension_type(value)
+        # TODO(EA): Remove an is_extension_ when all extension types satisfy
+        # the interface
+        value_is_extension_type = (is_extension_type(value) or
+                                   is_extension_array_dtype(value))
 
         # categorical/spares/datetimetz
         if value_is_extension_type:
@@ -5198,7 +5202,7 @@ def _safe_reshape(arr, new_shape):
     """
     if isinstance(arr, ABCSeries):
         arr = arr._values
-    if not isinstance(arr, Categorical):
+    if not isinstance(arr, ABCExtensionArray):
         arr = arr.reshape(new_shape)
     return arr
 
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index 4e69123aa3cbc..a7d80e870d550 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -205,6 +205,18 @@ def test_align_frame(self, data, na_value):
         tm.assert_frame_equal(r1, e1)
         tm.assert_frame_equal(r2, e2)
 
+    def test_set_frame_expand_regular_with_extension(self, data):
+        df = pd.DataFrame({"A": [1] * len(data)})
+        df['B'] = data
+        expected = pd.DataFrame({"A": [1] * len(data), "B": data})
+        tm.assert_frame_equal(df, expected)
+
+    def test_set_frame_expand_extension_with_regular(self, data):
+        df = pd.DataFrame({'A': data})
+        df['B'] = [1] * len(data)
+        expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
+        tm.assert_frame_equal(df, expected)
+
 
 class BaseGetitemTests(object):
     """Tests for ExtensionArray.__getitem__."""

From 879bc8425a8f45ed5cdbc8403455b8fd6bd7f25c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 20 Feb 2018 11:38:56 -0600
Subject: [PATCH 108/119] Added ExtensionDtype.__eq__

Support for astype
---
 pandas/core/dtypes/base.py                 | 27 ++++++++++++++++++++++
 pandas/core/dtypes/dtypes.py               |  7 ------
 pandas/tests/extension/base.py             | 23 +++++++++++++++++-
 pandas/tests/extension/test_categorical.py |  5 ++++
 pandas/tests/extension/test_decimal.py     |  4 ++++
 pandas/tests/extension/test_json.py        |  5 ++++
 6 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 17d375e67808b..65e8cb5dd00a7 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -1,6 +1,7 @@
 """Extend pandas with custom array types"""
 import inspect
 
+from pandas import compat
 from pandas.errors import AbstractMethodError
 
 
@@ -25,6 +26,32 @@ class ExtensionDtype(object):
     def __str__(self):
         return self.name
 
+    def __eq__(self, other):
+        """Check whether 'other' is equal to self.
+
+        By default, 'other' is considered equal if
+
+        * it's a string matching 'self.name'.
+        * it's an instance of this type.
+
+        Parameters
+        ----------
+        other : Any
+
+        Returns
+        -------
+        bool
+        """
+        if isinstance(other, compat.string_types):
+            return other == self.name
+        elif isinstance(other, type(self)):
+            return True
+        else:
+            return False
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     @property
     def type(self):
         # type: () -> type
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 99e4033f104db..a972cb942c620 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -66,13 +66,6 @@ def __hash__(self):
         raise NotImplementedError("sub-classes should implement an __hash__ "
                                   "method")
 
-    def __eq__(self, other):
-        raise NotImplementedError("sub-classes should implement an __eq__ "
-                                  "method")
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
     def __getstate__(self):
         # pickle support; we don't want to pickle the cache
         return {k: getattr(self, k, None) for k in self._metadata}
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index a7d80e870d550..fbf084617f252 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -38,7 +38,7 @@ class TestMyDtype(BaseDtypeTests):
 import pandas as pd
 import pandas.util.testing as tm
 from pandas.compat import StringIO
-from pandas.core.internals import ExtensionBlock
+from pandas.core.internals import ExtensionBlock, ObjectBlock
 from pandas.core.dtypes.common import is_extension_array_dtype
 from pandas.core.dtypes.dtypes import ExtensionDtype
 
@@ -76,6 +76,13 @@ def test_is_not_string_type(self, dtype):
     def test_is_not_object_type(self, dtype):
         return not pd.api.types.is_object_dtype(dtype)
 
+    def test_eq_with_str(self, dtype):
+        assert dtype == dtype.name
+        assert dtype != dtype.name + '-suffix'
+
+    def test_eq_with_numpy_object(self, dtype):
+        assert dtype != np.dtype('object')
+
 
 class BaseInterfaceTests(object):
     """Tests that the basic interface is satisfied."""
@@ -395,3 +402,17 @@ def test_count(self, data_missing):
         result = df.count(axis='columns')
         expected = pd.Series([0, 1])
         tm.assert_series_equal(result, expected)
+
+    def test_apply_simple_series(self, data):
+        result = pd.Series(data).apply(id)
+        assert isinstance(result, pd.Series)
+
+
+class BaseCastingTests(object):
+    """Casting to and from ExtensionDtypes"""
+
+    def test_astype_object_series(self, all_data):
+        ser = pd.Series({"A": all_data})
+        result = ser.astype(object)
+        assert isinstance(result._data.blocks[0], ObjectBlock)
+
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index c1732653e993a..a7d0c02a5b2a2 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -78,3 +78,8 @@ class TestMethods(base.BaseMethodsTests):
     @pytest.mark.skip(reason="Unobserved categories included")
     def test_value_counts(self, all_data, dropna):
         pass
+
+
+class TestCasting(base.BaseCastingTests):
+    pass
+
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/test_decimal.py
index 9cd19840bb274..46ca08f94c72b 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/test_decimal.py
@@ -192,6 +192,10 @@ def test_value_counts(self, all_data, dropna):
         tm.assert_series_equal(result, expected)
 
 
+class TestCasting(base.BaseCastingTests):
+    pass
+
+
 def test_series_constructor_coerce_data_to_extension_dtype_raises():
     xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
            "extension array directly.")
diff --git a/pandas/tests/extension/test_json.py b/pandas/tests/extension/test_json.py
index 547ed31de8d72..a662465425045 100644
--- a/pandas/tests/extension/test_json.py
+++ b/pandas/tests/extension/test_json.py
@@ -162,3 +162,8 @@ class TestMethods(base.BaseMethodsTests):
     @pytest.mark.skip(reason="Unhashable")
     def test_value_counts(self, all_data, dropna):
         pass
+
+
+class TestCasting(base.BaseCastingTests):
+    pass
+

From 33c9d1f1142e91f23fc01fb4ce0ddeb7e4461e88 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 21 Feb 2018 06:58:23 -0600
Subject: [PATCH 109/119] linting

---
 pandas/core/arrays/base.py                 | 6 +++---
 pandas/tests/extension/base.py             | 1 -
 pandas/tests/extension/test_categorical.py | 1 -
 pandas/tests/extension/test_json.py        | 1 -
 4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index b89080349058c..cec881394a021 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -150,9 +150,9 @@ def __iter__(self):
         """Iterate over elements of the array.
 
         """
-        # This needs to be implemented so that pandas recognizes extension arrays
-        # as list-like. The default implementation makes successive calls to
-        # ``__getitem__``, which may be slower than necessary.
+        # This needs to be implemented so that pandas recognizes extension
+        # arrays as list-like. The default implementation makes successive
+        # calls to ``__getitem__``, which may be slower than necessary.
         for i in range(len(self)):
             yield self[i]
 
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index fbf084617f252..deaa48a6f6a26 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -415,4 +415,3 @@ def test_astype_object_series(self, all_data):
         ser = pd.Series({"A": all_data})
         result = ser.astype(object)
         assert isinstance(result._data.blocks[0], ObjectBlock)
-
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index a7d0c02a5b2a2..fbec835a72ce8 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -82,4 +82,3 @@ def test_value_counts(self, all_data, dropna):
 
 class TestCasting(base.BaseCastingTests):
     pass
-
diff --git a/pandas/tests/extension/test_json.py b/pandas/tests/extension/test_json.py
index a662465425045..ecaa36b6db9c9 100644
--- a/pandas/tests/extension/test_json.py
+++ b/pandas/tests/extension/test_json.py
@@ -166,4 +166,3 @@ def test_value_counts(self, all_data, dropna):
 
 class TestCasting(base.BaseCastingTests):
     pass
-

From f07c166e0447dd927d09f624b05724f7f8914ff3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 21 Feb 2018 09:07:36 -0600
Subject: [PATCH 110/119] REF: is_dtype_equal refactor

Moved from PandasExtensionDtype to ExtensionDtype with one modification:
catch TypeError explicitly.
---
 pandas/core/dtypes/base.py     | 29 +++++++++++++++++------------
 pandas/core/dtypes/dtypes.py   | 18 ------------------
 pandas/tests/extension/base.py |  3 +++
 3 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 65e8cb5dd00a7..a1f20870e1124 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -1,5 +1,5 @@
 """Extend pandas with custom array types"""
-import inspect
+import numpy as np
 
 from pandas import compat
 from pandas.errors import AbstractMethodError
@@ -131,12 +131,12 @@ def construct_from_string(cls, string):
 
     @classmethod
     def is_dtype(cls, dtype):
-        """Check if we match 'dtype'
+        """Check if we match 'dtype'.
 
         Parameters
         ----------
         dtype : str, object, or type
-            The dtype to check.
+            The object to check.
 
         Returns
         -------
@@ -150,13 +150,18 @@ def is_dtype(cls, dtype):
            of ``cls``.
         2. ``dtype`` is an object and is an instance of ``cls``
         3. ``dtype`` is a class and is ``cls`` or a subclass of ``cls``.
+        4. ``dtype`` has a ``dtype`` attribute, and any of the above
+           conditions is true for ``dtype.dtype``.
         """
-        if isinstance(dtype, str):
-            try:
-                return isinstance(cls.construct_from_string(dtype), cls)
-            except TypeError:
-                return False
-        elif inspect.isclass(dtype):
-            return issubclass(dtype, cls)
-        else:
-            return isinstance(dtype, cls)
+        dtype = getattr(dtype, 'dtype', dtype)
+
+        if isinstance(dtype, np.dtype):
+            return False
+        elif dtype is None:
+            return False
+        elif isinstance(dtype, cls):
+            return True
+        try:
+            return cls.construct_from_string(dtype) is not None
+        except TypeError:
+            return False
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index a972cb942c620..d262a71933915 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -75,24 +75,6 @@ def reset_cache(cls):
         """ clear the cache """
         cls._cache = {}
 
-    @classmethod
-    def is_dtype(cls, dtype):
-        """ Return a boolean if the passed type is an actual dtype that
-        we can match (via string or type)
-        """
-        if hasattr(dtype, 'dtype'):
-            dtype = dtype.dtype
-        if isinstance(dtype, np.dtype):
-            return False
-        elif dtype is None:
-            return False
-        elif isinstance(dtype, cls):
-            return True
-        try:
-            return cls.construct_from_string(dtype) is not None
-        except:
-            return False
-
 
 class CategoricalDtypeType(type):
     """
diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
index deaa48a6f6a26..6ec93734a2787 100644
--- a/pandas/tests/extension/base.py
+++ b/pandas/tests/extension/base.py
@@ -66,6 +66,9 @@ def test_is_dtype_from_name(self, dtype):
         result = type(dtype).is_dtype(dtype.name)
         assert result is True
 
+    def test_is_dtype_unboxes_dtype(self, data, dtype):
+        assert dtype.is_dtype(data) is True
+
     def test_is_dtype_from_self(self, dtype):
         result = type(dtype).is_dtype(dtype)
         assert result is True

From 79d43b1aafb3e0ec67bbf0a5c9d6d6312a76d14b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 21 Feb 2018 09:24:13 -0600
Subject: [PATCH 111/119] Remove reference to dtype being a class

---
 pandas/core/dtypes/base.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index a1f20870e1124..d54d980d02ffa 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -135,7 +135,7 @@ def is_dtype(cls, dtype):
 
         Parameters
         ----------
-        dtype : str, object, or type
+        dtype : object
             The object to check.
 
         Returns
@@ -149,8 +149,7 @@ def is_dtype(cls, dtype):
         1. ``cls.construct_from_string(dtype)`` is an instance
            of ``cls``.
         2. ``dtype`` is an object and is an instance of ``cls``
-        3. ``dtype`` is a class and is ``cls`` or a subclass of ``cls``.
-        4. ``dtype`` has a ``dtype`` attribute, and any of the above
+        3. ``dtype`` has a ``dtype`` attribute, and any of the above
            conditions is true for ``dtype.dtype``.
         """
         dtype = getattr(dtype, 'dtype', dtype)

From a1ebf5301e4dc12d77a8a76f78f6cea117fa1917 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 21 Feb 2018 19:55:07 -0600
Subject: [PATCH 112/119] move

---
 pandas/tests/{internals => extension}/test_external_block.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pandas/tests/{internals => extension}/test_external_block.py (100%)

diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/extension/test_external_block.py
similarity index 100%
rename from pandas/tests/internals/test_external_block.py
rename to pandas/tests/extension/test_external_block.py

From aa57cad421cb96a559318ae72ef53eadd2d818ef Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 08:27:20 -0600
Subject: [PATCH 113/119] Moved sparse check to take_nd

---
 pandas/core/algorithms.py | 3 +++
 pandas/core/series.py     | 7 +------
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index a545bb1005405..c175fe4c9ebff 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1335,6 +1335,9 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
     elif is_interval_dtype(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
 
+    if is_sparse(arr):
+        arr = arr.get_values()
+
     if indexer is None:
         indexer = np.arange(arr.shape[axis], dtype=np.int64)
         dtype, fill_value = arr.dtype, arr.dtype.type()
diff --git a/pandas/core/series.py b/pandas/core/series.py
index ec4d914f10b0c..c9df1d60895d2 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2565,12 +2565,7 @@ def _reindex_indexer(self, new_index, indexer, copy):
                 return self.copy()
             return self
 
-        if is_sparse(self):
-            arr = self.get_values()
-        else:
-            arr = self._values
-
-        new_values = algorithms.take_1d(arr, indexer)
+        new_values = algorithms.take_1d(self._values, indexer)
         return self._constructor(new_values, index=new_index)
 
     def _needs_reindex_multi(self, axes, method, level):

From c82748cb51461460a19ce6de6c3fb3424fce5cc4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 08:30:54 -0600
Subject: [PATCH 114/119] Docstring

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c175fe4c9ebff..d616e3f92aa4d 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1298,7 +1298,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
 
     Parameters
     ----------
-    arr : ndarray, ExtensionArray, DatetimeIndex, IntervalIndex, SparseArray
+    arr : array-like
         Input array.
     indexer : ndarray
         1-D array of indices to take, subarrays corresponding to -1 value
@@ -1322,7 +1322,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
 
     Returns
     -------
-    subarray : object
+    subarray : array-like
         May be the same type as the input, or cast to an ndarray.
     """
 

From e91934364cb2eb161ea6bf3623fcb49a673c3b7e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 08:51:05 -0600
Subject: [PATCH 115/119] Split tests

---
 pandas/tests/extension/base.py                | 420 ------------------
 pandas/tests/extension/base/__init__.py       |  42 ++
 pandas/tests/extension/base/casting.py        |  11 +
 pandas/tests/extension/base/constructors.py   |  43 ++
 pandas/tests/extension/base/dtype.py          |  46 ++
 pandas/tests/extension/base/getitem.py        | 119 +++++
 pandas/tests/extension/base/interface.py      |  53 +++
 pandas/tests/extension/base/methods.py        |  32 ++
 pandas/tests/extension/base/missing.py        |  45 ++
 pandas/tests/extension/base/reshaping.py      |  61 +++
 pandas/tests/extension/category/__init__.py   |   0
 .../{ => category}/test_categorical.py        |   2 +-
 pandas/tests/extension/decimal/__init__.py    |   0
 .../tests/extension/decimal/decimal_array.py  |  86 ++++
 .../extension/{ => decimal}/test_decimal.py   |  84 +---
 pandas/tests/extension/json/__init__.py       |   0
 .../{test_json.py => json/json_array.py}      |  69 ---
 pandas/tests/extension/json/test_json.py      |  73 +++
 18 files changed, 614 insertions(+), 572 deletions(-)
 delete mode 100644 pandas/tests/extension/base.py
 create mode 100644 pandas/tests/extension/base/__init__.py
 create mode 100644 pandas/tests/extension/base/casting.py
 create mode 100644 pandas/tests/extension/base/constructors.py
 create mode 100644 pandas/tests/extension/base/dtype.py
 create mode 100644 pandas/tests/extension/base/getitem.py
 create mode 100644 pandas/tests/extension/base/interface.py
 create mode 100644 pandas/tests/extension/base/methods.py
 create mode 100644 pandas/tests/extension/base/missing.py
 create mode 100644 pandas/tests/extension/base/reshaping.py
 create mode 100644 pandas/tests/extension/category/__init__.py
 rename pandas/tests/extension/{ => category}/test_categorical.py (97%)
 create mode 100644 pandas/tests/extension/decimal/__init__.py
 create mode 100644 pandas/tests/extension/decimal/decimal_array.py
 rename pandas/tests/extension/{ => decimal}/test_decimal.py (66%)
 create mode 100644 pandas/tests/extension/json/__init__.py
 rename pandas/tests/extension/{test_json.py => json/json_array.py} (72%)
 create mode 100644 pandas/tests/extension/json/test_json.py

diff --git a/pandas/tests/extension/base.py b/pandas/tests/extension/base.py
deleted file mode 100644
index 6ec93734a2787..0000000000000
--- a/pandas/tests/extension/base.py
+++ /dev/null
@@ -1,420 +0,0 @@
-"""Base test suite for extension arrays.
-
-These tests are intended for third-party libraries to subclass to validate
-that their extension arrays and dtypes satisfy the interface. Moving or
-renaming the tests should not be done lightly.
-
-Libraries are expected to implement a few pytest fixtures to provide data
-for the tests. The fixtures may be located in either
-
-* The same module as your test class.
-* A ``conftest.py`` in the same directory as your test class.
-
-The full list of fixtures may be found in the ``conftest.py`` next to this
-file.
-
-.. code-block:: python
-
-   import pytest
-   from pandas.tests.extension.base import BaseDtypeTests
-
-
-   @pytest.fixture
-   def dtype():
-       return MyDtype()
-
-
-   class TestMyDtype(BaseDtypeTests):
-       pass
-
-
-Your class ``TestDtype`` will inherit all the tests defined on
-``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
-wherever the test requires it. You're free to implement additional tests.
-"""
-import numpy as np
-import pytest
-
-import pandas as pd
-import pandas.util.testing as tm
-from pandas.compat import StringIO
-from pandas.core.internals import ExtensionBlock, ObjectBlock
-from pandas.core.dtypes.common import is_extension_array_dtype
-from pandas.core.dtypes.dtypes import ExtensionDtype
-
-
-class BaseDtypeTests(object):
-    """Base class for ExtensionDtype classes"""
-
-    def test_name(self, dtype):
-        assert isinstance(dtype.name, str)
-
-    def test_kind(self, dtype):
-        valid = set('biufcmMOSUV')
-        if dtype.kind is not None:
-            assert dtype.kind in valid
-
-    def test_construct_from_string_own_name(self, dtype):
-        result = dtype.construct_from_string(dtype.name)
-        assert type(result) is type(dtype)
-
-        # check OK as classmethod
-        result = type(dtype).construct_from_string(dtype.name)
-        assert type(result) is type(dtype)
-
-    def test_is_dtype_from_name(self, dtype):
-        result = type(dtype).is_dtype(dtype.name)
-        assert result is True
-
-    def test_is_dtype_unboxes_dtype(self, data, dtype):
-        assert dtype.is_dtype(data) is True
-
-    def test_is_dtype_from_self(self, dtype):
-        result = type(dtype).is_dtype(dtype)
-        assert result is True
-
-    def test_is_not_string_type(self, dtype):
-        return not pd.api.types.is_string_dtype(dtype)
-
-    def test_is_not_object_type(self, dtype):
-        return not pd.api.types.is_object_dtype(dtype)
-
-    def test_eq_with_str(self, dtype):
-        assert dtype == dtype.name
-        assert dtype != dtype.name + '-suffix'
-
-    def test_eq_with_numpy_object(self, dtype):
-        assert dtype != np.dtype('object')
-
-
-class BaseInterfaceTests(object):
-    """Tests that the basic interface is satisfied."""
-    # ------------------------------------------------------------------------
-    # Interface
-    # ------------------------------------------------------------------------
-
-    def test_len(self, data):
-        assert len(data) == 100
-
-    def test_ndim(self, data):
-        assert data.ndim == 1
-
-    def test_can_hold_na_valid(self, data):
-        assert data._can_hold_na in {True, False}
-
-    def test_memory_usage(self, data):
-        s = pd.Series(data)
-        result = s.memory_usage(index=False)
-        assert result == s.nbytes
-
-    def test_array_interface(self, data):
-        result = np.array(data)
-        assert result[0] == data[0]
-
-    def test_as_ndarray_with_dtype_kind(self, data):
-        np.array(data, dtype=data.dtype.kind)
-
-    def test_repr(self, data):
-        ser = pd.Series(data)
-        assert data.dtype.name in repr(ser)
-
-        df = pd.DataFrame({"A": data})
-        repr(df)
-
-    def test_dtype_name_in_info(self, data):
-        buf = StringIO()
-        pd.DataFrame({"A": data}).info(buf=buf)
-        result = buf.getvalue()
-        assert data.dtype.name in result
-
-    def test_is_extension_array_dtype(self, data):
-        assert is_extension_array_dtype(data)
-        assert is_extension_array_dtype(data.dtype)
-        assert is_extension_array_dtype(pd.Series(data))
-        assert isinstance(data.dtype, ExtensionDtype)
-
-
-class BaseConstructorsTests(object):
-
-    def test_series_constructor(self, data):
-        result = pd.Series(data)
-        assert result.dtype == data.dtype
-        assert len(result) == len(data)
-        assert isinstance(result._data.blocks[0], ExtensionBlock)
-        assert result._data.blocks[0].values is data
-
-        # Series[EA] is unboxed / boxed correctly
-        result2 = pd.Series(result)
-        assert result2.dtype == data.dtype
-        assert isinstance(result2._data.blocks[0], ExtensionBlock)
-
-    @pytest.mark.parametrize("from_series", [True, False])
-    def test_dataframe_constructor_from_dict(self, data, from_series):
-        if from_series:
-            data = pd.Series(data)
-        result = pd.DataFrame({"A": data})
-        assert result.dtypes['A'] == data.dtype
-        assert result.shape == (len(data), 1)
-        assert isinstance(result._data.blocks[0], ExtensionBlock)
-
-    def test_dataframe_from_series(self, data):
-        result = pd.DataFrame(pd.Series(data))
-        assert result.dtypes[0] == data.dtype
-        assert result.shape == (len(data), 1)
-        assert isinstance(result._data.blocks[0], ExtensionBlock)
-
-    @pytest.mark.xfail(reason="GH-19342")
-    def test_series_given_mismatched_index_raises(self, data):
-        msg = 'Wrong number of items passed 3, placement implies 4'
-        with tm.assert_raises_regex(ValueError, None) as m:
-            pd.Series(data[:3], index=[0, 1, 2, 3, 4])
-
-        assert m.match(msg)
-
-
-class BaseReshapingTests(object):
-    """Tests for reshaping and concatenation."""
-    @pytest.mark.parametrize('in_frame', [True, False])
-    def test_concat(self, data, in_frame):
-        wrapped = pd.Series(data)
-        if in_frame:
-            wrapped = pd.DataFrame(wrapped)
-        result = pd.concat([wrapped, wrapped], ignore_index=True)
-
-        assert len(result) == len(data) * 2
-
-        if in_frame:
-            dtype = result.dtypes[0]
-        else:
-            dtype = result.dtype
-
-        assert dtype == data.dtype
-        assert isinstance(result._data.blocks[0], ExtensionBlock)
-
-    def test_align(self, data, na_value):
-        a = data[:3]
-        b = data[2:5]
-        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
-
-        # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(type(data)(list(a) + [na_value]))
-        e2 = pd.Series(type(data)([na_value] + list(b)))
-        tm.assert_series_equal(r1, e1)
-        tm.assert_series_equal(r2, e2)
-
-    def test_align_frame(self, data, na_value):
-        a = data[:3]
-        b = data[2:5]
-        r1, r2 = pd.DataFrame({'A': a}).align(
-            pd.DataFrame({'A': b}, index=[1, 2, 3])
-        )
-
-        # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])})
-        e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))})
-        tm.assert_frame_equal(r1, e1)
-        tm.assert_frame_equal(r2, e2)
-
-    def test_set_frame_expand_regular_with_extension(self, data):
-        df = pd.DataFrame({"A": [1] * len(data)})
-        df['B'] = data
-        expected = pd.DataFrame({"A": [1] * len(data), "B": data})
-        tm.assert_frame_equal(df, expected)
-
-    def test_set_frame_expand_extension_with_regular(self, data):
-        df = pd.DataFrame({'A': data})
-        df['B'] = [1] * len(data)
-        expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
-        tm.assert_frame_equal(df, expected)
-
-
-class BaseGetitemTests(object):
-    """Tests for ExtensionArray.__getitem__."""
-
-    def test_iloc_series(self, data):
-        ser = pd.Series(data)
-        result = ser.iloc[:4]
-        expected = pd.Series(data[:4])
-        tm.assert_series_equal(result, expected)
-
-        result = ser.iloc[[0, 1, 2, 3]]
-        tm.assert_series_equal(result, expected)
-
-    def test_iloc_frame(self, data):
-        df = pd.DataFrame({"A": data, 'B': np.arange(len(data))})
-        expected = pd.DataFrame({"A": data[:4]})
-
-        # slice -> frame
-        result = df.iloc[:4, [0]]
-        tm.assert_frame_equal(result, expected)
-
-        # sequence -> frame
-        result = df.iloc[[0, 1, 2, 3], [0]]
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.Series(data[:4], name='A')
-
-        # slice -> series
-        result = df.iloc[:4, 0]
-        tm.assert_series_equal(result, expected)
-
-        # sequence -> series
-        result = df.iloc[:4, 0]
-        tm.assert_series_equal(result, expected)
-
-    def test_loc_series(self, data):
-        ser = pd.Series(data)
-        result = ser.loc[:3]
-        expected = pd.Series(data[:4])
-        tm.assert_series_equal(result, expected)
-
-        result = ser.loc[[0, 1, 2, 3]]
-        tm.assert_series_equal(result, expected)
-
-    def test_loc_frame(self, data):
-        df = pd.DataFrame({"A": data, 'B': np.arange(len(data))})
-        expected = pd.DataFrame({"A": data[:4]})
-
-        # slice -> frame
-        result = df.loc[:3, ['A']]
-        tm.assert_frame_equal(result, expected)
-
-        # sequence -> frame
-        result = df.loc[[0, 1, 2, 3], ['A']]
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.Series(data[:4], name='A')
-
-        # slice -> series
-        result = df.loc[:3, 'A']
-        tm.assert_series_equal(result, expected)
-
-        # sequence -> series
-        result = df.loc[:3, 'A']
-        tm.assert_series_equal(result, expected)
-
-    def test_getitem_scalar(self, data):
-        result = data[0]
-        assert isinstance(result, data.dtype.type)
-
-        result = pd.Series(data)[0]
-        assert isinstance(result, data.dtype.type)
-
-    def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
-        result = data_missing[0]
-        assert na_cmp(result, na_value)
-
-    def test_getitem_mask(self, data):
-        # Empty mask, raw array
-        mask = np.zeros(len(data), dtype=bool)
-        result = data[mask]
-        assert len(result) == 0
-        assert isinstance(result, type(data))
-
-        # Empty mask, in series
-        mask = np.zeros(len(data), dtype=bool)
-        result = pd.Series(data)[mask]
-        assert len(result) == 0
-        assert result.dtype == data.dtype
-
-        # non-empty mask, raw array
-        mask[0] = True
-        result = data[mask]
-        assert len(result) == 1
-        assert isinstance(result, type(data))
-
-        # non-empty mask, in series
-        result = pd.Series(data)[mask]
-        assert len(result) == 1
-        assert result.dtype == data.dtype
-
-    def test_getitem_slice(self, data):
-        # getitem[slice] should return an array
-        result = data[slice(0)]  # empty
-        assert isinstance(result, type(data))
-
-        result = data[slice(1)]  # scalar
-        assert isinstance(result, type(data))
-
-    def test_take_sequence(self, data):
-        result = pd.Series(data)[[0, 1, 3]]
-        assert result.iloc[0] == data[0]
-        assert result.iloc[1] == data[1]
-        assert result.iloc[2] == data[3]
-
-
-class BaseMissingTests(object):
-    def test_isna(self, data_missing):
-        if data_missing._can_hold_na:
-            expected = np.array([True, False])
-        else:
-            expected = np.array([False, False])
-
-        result = pd.isna(data_missing)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = pd.Series(data_missing).isna()
-        expected = pd.Series(expected)
-        tm.assert_series_equal(result, expected)
-
-    def test_dropna_series(self, data_missing):
-        ser = pd.Series(data_missing)
-        result = ser.dropna()
-        expected = ser.iloc[[1]]
-        tm.assert_series_equal(result, expected)
-
-    def test_dropna_frame(self, data_missing):
-        df = pd.DataFrame({"A": data_missing})
-
-        # defaults
-        result = df.dropna()
-        expected = df.iloc[[1]]
-        tm.assert_frame_equal(result, expected)
-
-        # axis = 1
-        result = df.dropna(axis='columns')
-        expected = pd.DataFrame(index=[0, 1])
-        tm.assert_frame_equal(result, expected)
-
-        # multiple
-        df = pd.DataFrame({"A": data_missing,
-                           "B": [1, np.nan]})
-        result = df.dropna()
-        expected = df.iloc[:0]
-        tm.assert_frame_equal(result, expected)
-
-
-class BaseMethodsTests(object):
-    """Various Series and DataFrame methods."""
-
-    @pytest.mark.parametrize('dropna', [True, False])
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-
-        tm.assert_series_equal(result, expected)
-
-    def test_count(self, data_missing):
-        df = pd.DataFrame({"A": data_missing})
-        result = df.count(axis='columns')
-        expected = pd.Series([0, 1])
-        tm.assert_series_equal(result, expected)
-
-    def test_apply_simple_series(self, data):
-        result = pd.Series(data).apply(id)
-        assert isinstance(result, pd.Series)
-
-
-class BaseCastingTests(object):
-    """Casting to and from ExtensionDtypes"""
-
-    def test_astype_object_series(self, all_data):
-        ser = pd.Series({"A": all_data})
-        result = ser.astype(object)
-        assert isinstance(result._data.blocks[0], ObjectBlock)
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
new file mode 100644
index 0000000000000..2273ef1f3e110
--- /dev/null
+++ b/pandas/tests/extension/base/__init__.py
@@ -0,0 +1,42 @@
+"""Base test suite for extension arrays.
+
+These tests are intended for third-party libraries to subclass to validate
+that their extension arrays and dtypes satisfy the interface. Moving or
+renaming the tests should not be done lightly.
+
+Libraries are expected to implement a few pytest fixtures to provide data
+for the tests. The fixtures may be located in either
+
+* The same module as your test class.
+* A ``conftest.py`` in the same directory as your test class.
+
+The full list of fixtures may be found in the ``conftest.py`` next to this
+file.
+
+.. code-block:: python
+
+   import pytest
+   from pandas.tests.extension.base import BaseDtypeTests
+
+
+   @pytest.fixture
+   def dtype():
+       return MyDtype()
+
+
+   class TestMyDtype(BaseDtypeTests):
+       pass
+
+
+Your class ``TestDtype`` will inherit all the tests defined on
+``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
+wherever the test requires it. You're free to implement additional tests.
+"""
+from .casting import BaseCastingTests  # noqa
+from .constructors import BaseConstructorsTests  # noqa
+from .dtype import BaseDtypeTests  # noqa
+from .getitem import BaseGetitemTests  # noqa
+from .interface import BaseInterfaceTests  # noqa
+from .methods import BaseMethodsTests  # noqa
+from .missing import BaseMissingTests  # noqa
+from .reshaping import BaseReshapingTests  # noqa
diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
new file mode 100644
index 0000000000000..bcfbf0a247269
--- /dev/null
+++ b/pandas/tests/extension/base/casting.py
@@ -0,0 +1,11 @@
+import pandas as pd
+from pandas.core.internals import ObjectBlock
+
+
+class BaseCastingTests(object):
+    """Casting to and from ExtensionDtypes"""
+
+    def test_astype_object_series(self, all_data):
+        ser = pd.Series({"A": all_data})
+        result = ser.astype(object)
+        assert isinstance(result._data.blocks[0], ObjectBlock)
diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py
new file mode 100644
index 0000000000000..7ad100e6289e9
--- /dev/null
+++ b/pandas/tests/extension/base/constructors.py
@@ -0,0 +1,43 @@
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.core.internals import ExtensionBlock
+
+
+class BaseConstructorsTests(object):
+
+    def test_series_constructor(self, data):
+        result = pd.Series(data)
+        assert result.dtype == data.dtype
+        assert len(result) == len(data)
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+        assert result._data.blocks[0].values is data
+
+        # Series[EA] is unboxed / boxed correctly
+        result2 = pd.Series(result)
+        assert result2.dtype == data.dtype
+        assert isinstance(result2._data.blocks[0], ExtensionBlock)
+
+    @pytest.mark.parametrize("from_series", [True, False])
+    def test_dataframe_constructor_from_dict(self, data, from_series):
+        if from_series:
+            data = pd.Series(data)
+        result = pd.DataFrame({"A": data})
+        assert result.dtypes['A'] == data.dtype
+        assert result.shape == (len(data), 1)
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+
+    def test_dataframe_from_series(self, data):
+        result = pd.DataFrame(pd.Series(data))
+        assert result.dtypes[0] == data.dtype
+        assert result.shape == (len(data), 1)
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+
+    @pytest.mark.xfail(reason="GH-19342")
+    def test_series_given_mismatched_index_raises(self, data):
+        msg = 'Wrong number of items passed 3, placement implies 4'
+        with tm.assert_raises_regex(ValueError, None) as m:
+            pd.Series(data[:3], index=[0, 1, 2, 3, 4])
+
+        assert m.match(msg)
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
new file mode 100644
index 0000000000000..f5015bd469f13
--- /dev/null
+++ b/pandas/tests/extension/base/dtype.py
@@ -0,0 +1,46 @@
+import numpy as np
+import pandas as pd
+
+
+class BaseDtypeTests(object):
+    """Base class for ExtensionDtype classes"""
+
+    def test_name(self, dtype):
+        assert isinstance(dtype.name, str)
+
+    def test_kind(self, dtype):
+        valid = set('biufcmMOSUV')
+        if dtype.kind is not None:
+            assert dtype.kind in valid
+
+    def test_construct_from_string_own_name(self, dtype):
+        result = dtype.construct_from_string(dtype.name)
+        assert type(result) is type(dtype)
+
+        # check OK as classmethod
+        result = type(dtype).construct_from_string(dtype.name)
+        assert type(result) is type(dtype)
+
+    def test_is_dtype_from_name(self, dtype):
+        result = type(dtype).is_dtype(dtype.name)
+        assert result is True
+
+    def test_is_dtype_unboxes_dtype(self, data, dtype):
+        assert dtype.is_dtype(data) is True
+
+    def test_is_dtype_from_self(self, dtype):
+        result = type(dtype).is_dtype(dtype)
+        assert result is True
+
+    def test_is_not_string_type(self, dtype):
+        return not pd.api.types.is_string_dtype(dtype)
+
+    def test_is_not_object_type(self, dtype):
+        return not pd.api.types.is_object_dtype(dtype)
+
+    def test_eq_with_str(self, dtype):
+        assert dtype == dtype.name
+        assert dtype != dtype.name + '-suffix'
+
+    def test_eq_with_numpy_object(self, dtype):
+        assert dtype != np.dtype('object')
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
new file mode 100644
index 0000000000000..f43971e928cac
--- /dev/null
+++ b/pandas/tests/extension/base/getitem.py
@@ -0,0 +1,119 @@
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+class BaseGetitemTests(object):
+    """Tests for ExtensionArray.__getitem__."""
+
+    def test_iloc_series(self, data):
+        ser = pd.Series(data)
+        result = ser.iloc[:4]
+        expected = pd.Series(data[:4])
+        tm.assert_series_equal(result, expected)
+
+        result = ser.iloc[[0, 1, 2, 3]]
+        tm.assert_series_equal(result, expected)
+
+    def test_iloc_frame(self, data):
+        df = pd.DataFrame({"A": data, 'B': np.arange(len(data))})
+        expected = pd.DataFrame({"A": data[:4]})
+
+        # slice -> frame
+        result = df.iloc[:4, [0]]
+        tm.assert_frame_equal(result, expected)
+
+        # sequence -> frame
+        result = df.iloc[[0, 1, 2, 3], [0]]
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.Series(data[:4], name='A')
+
+        # slice -> series
+        result = df.iloc[:4, 0]
+        tm.assert_series_equal(result, expected)
+
+        # sequence -> series
+        result = df.iloc[:4, 0]
+        tm.assert_series_equal(result, expected)
+
+    def test_loc_series(self, data):
+        ser = pd.Series(data)
+        result = ser.loc[:3]
+        expected = pd.Series(data[:4])
+        tm.assert_series_equal(result, expected)
+
+        result = ser.loc[[0, 1, 2, 3]]
+        tm.assert_series_equal(result, expected)
+
+    def test_loc_frame(self, data):
+        df = pd.DataFrame({"A": data, 'B': np.arange(len(data))})
+        expected = pd.DataFrame({"A": data[:4]})
+
+        # slice -> frame
+        result = df.loc[:3, ['A']]
+        tm.assert_frame_equal(result, expected)
+
+        # sequence -> frame
+        result = df.loc[[0, 1, 2, 3], ['A']]
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.Series(data[:4], name='A')
+
+        # slice -> series
+        result = df.loc[:3, 'A']
+        tm.assert_series_equal(result, expected)
+
+        # sequence -> series
+        result = df.loc[:3, 'A']
+        tm.assert_series_equal(result, expected)
+
+    def test_getitem_scalar(self, data):
+        result = data[0]
+        assert isinstance(result, data.dtype.type)
+
+        result = pd.Series(data)[0]
+        assert isinstance(result, data.dtype.type)
+
+    def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
+        result = data_missing[0]
+        assert na_cmp(result, na_value)
+
+    def test_getitem_mask(self, data):
+        # Empty mask, raw array
+        mask = np.zeros(len(data), dtype=bool)
+        result = data[mask]
+        assert len(result) == 0
+        assert isinstance(result, type(data))
+
+        # Empty mask, in series
+        mask = np.zeros(len(data), dtype=bool)
+        result = pd.Series(data)[mask]
+        assert len(result) == 0
+        assert result.dtype == data.dtype
+
+        # non-empty mask, raw array
+        mask[0] = True
+        result = data[mask]
+        assert len(result) == 1
+        assert isinstance(result, type(data))
+
+        # non-empty mask, in series
+        result = pd.Series(data)[mask]
+        assert len(result) == 1
+        assert result.dtype == data.dtype
+
+    def test_getitem_slice(self, data):
+        # getitem[slice] should return an array
+        result = data[slice(0)]  # empty
+        assert isinstance(result, type(data))
+
+        result = data[slice(1)]  # scalar
+        assert isinstance(result, type(data))
+
+    def test_take_sequence(self, data):
+        result = pd.Series(data)[[0, 1, 3]]
+        assert result.iloc[0] == data[0]
+        assert result.iloc[1] == data[1]
+        assert result.iloc[2] == data[3]
diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
new file mode 100644
index 0000000000000..8f17131a9482b
--- /dev/null
+++ b/pandas/tests/extension/base/interface.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+import pandas as pd
+from pandas.compat import StringIO
+from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.dtypes import ExtensionDtype
+
+
+class BaseInterfaceTests(object):
+    """Tests that the basic interface is satisfied."""
+    # ------------------------------------------------------------------------
+    # Interface
+    # ------------------------------------------------------------------------
+
+    def test_len(self, data):
+        assert len(data) == 100
+
+    def test_ndim(self, data):
+        assert data.ndim == 1
+
+    def test_can_hold_na_valid(self, data):
+        assert data._can_hold_na in {True, False}
+
+    def test_memory_usage(self, data):
+        s = pd.Series(data)
+        result = s.memory_usage(index=False)
+        assert result == s.nbytes
+
+    def test_array_interface(self, data):
+        result = np.array(data)
+        assert result[0] == data[0]
+
+    def test_as_ndarray_with_dtype_kind(self, data):
+        np.array(data, dtype=data.dtype.kind)
+
+    def test_repr(self, data):
+        ser = pd.Series(data)
+        assert data.dtype.name in repr(ser)
+
+        df = pd.DataFrame({"A": data})
+        repr(df)
+
+    def test_dtype_name_in_info(self, data):
+        buf = StringIO()
+        pd.DataFrame({"A": data}).info(buf=buf)
+        result = buf.getvalue()
+        assert data.dtype.name in result
+
+    def test_is_extension_array_dtype(self, data):
+        assert is_extension_array_dtype(data)
+        assert is_extension_array_dtype(data.dtype)
+        assert is_extension_array_dtype(pd.Series(data))
+        assert isinstance(data.dtype, ExtensionDtype)
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
new file mode 100644
index 0000000000000..c77811ca63926
--- /dev/null
+++ b/pandas/tests/extension/base/methods.py
@@ -0,0 +1,32 @@
+import pytest
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+class BaseMethodsTests(object):
+    """Various Series and DataFrame methods."""
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
+
+    def test_count(self, data_missing):
+        df = pd.DataFrame({"A": data_missing})
+        result = df.count(axis='columns')
+        expected = pd.Series([0, 1])
+        tm.assert_series_equal(result, expected)
+
+    def test_apply_simple_series(self, data):
+        result = pd.Series(data).apply(id)
+        assert isinstance(result, pd.Series)
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
new file mode 100644
index 0000000000000..1d6f2eea1f1f9
--- /dev/null
+++ b/pandas/tests/extension/base/missing.py
@@ -0,0 +1,45 @@
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+class BaseMissingTests(object):
+    def test_isna(self, data_missing):
+        if data_missing._can_hold_na:
+            expected = np.array([True, False])
+        else:
+            expected = np.array([False, False])
+
+        result = pd.isna(data_missing)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = pd.Series(data_missing).isna()
+        expected = pd.Series(expected)
+        tm.assert_series_equal(result, expected)
+
+    def test_dropna_series(self, data_missing):
+        ser = pd.Series(data_missing)
+        result = ser.dropna()
+        expected = ser.iloc[[1]]
+        tm.assert_series_equal(result, expected)
+
+    def test_dropna_frame(self, data_missing):
+        df = pd.DataFrame({"A": data_missing})
+
+        # defaults
+        result = df.dropna()
+        expected = df.iloc[[1]]
+        tm.assert_frame_equal(result, expected)
+
+        # axis = 1
+        result = df.dropna(axis='columns')
+        expected = pd.DataFrame(index=[0, 1])
+        tm.assert_frame_equal(result, expected)
+
+        # multiple
+        df = pd.DataFrame({"A": data_missing,
+                           "B": [1, np.nan]})
+        result = df.dropna()
+        expected = df.iloc[:0]
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
new file mode 100644
index 0000000000000..d8f577c6fa50d
--- /dev/null
+++ b/pandas/tests/extension/base/reshaping.py
@@ -0,0 +1,61 @@
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.core.internals import ExtensionBlock
+
+
+class BaseReshapingTests(object):
+    """Tests for reshaping and concatenation."""
+    @pytest.mark.parametrize('in_frame', [True, False])
+    def test_concat(self, data, in_frame):
+        wrapped = pd.Series(data)
+        if in_frame:
+            wrapped = pd.DataFrame(wrapped)
+        result = pd.concat([wrapped, wrapped], ignore_index=True)
+
+        assert len(result) == len(data) * 2
+
+        if in_frame:
+            dtype = result.dtypes[0]
+        else:
+            dtype = result.dtype
+
+        assert dtype == data.dtype
+        assert isinstance(result._data.blocks[0], ExtensionBlock)
+
+    def test_align(self, data, na_value):
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
+
+        # Assumes that the ctor can take a list of scalars of the type
+        e1 = pd.Series(type(data)(list(a) + [na_value]))
+        e2 = pd.Series(type(data)([na_value] + list(b)))
+        tm.assert_series_equal(r1, e1)
+        tm.assert_series_equal(r2, e2)
+
+    def test_align_frame(self, data, na_value):
+        a = data[:3]
+        b = data[2:5]
+        r1, r2 = pd.DataFrame({'A': a}).align(
+            pd.DataFrame({'A': b}, index=[1, 2, 3])
+        )
+
+        # Assumes that the ctor can take a list of scalars of the type
+        e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])})
+        e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))})
+        tm.assert_frame_equal(r1, e1)
+        tm.assert_frame_equal(r2, e2)
+
+    def test_set_frame_expand_regular_with_extension(self, data):
+        df = pd.DataFrame({"A": [1] * len(data)})
+        df['B'] = data
+        expected = pd.DataFrame({"A": [1] * len(data), "B": data})
+        tm.assert_frame_equal(df, expected)
+
+    def test_set_frame_expand_extension_with_regular(self, data):
+        df = pd.DataFrame({'A': data})
+        df['B'] = [1] * len(data)
+        expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
+        tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
similarity index 97%
rename from pandas/tests/extension/test_categorical.py
rename to pandas/tests/extension/category/test_categorical.py
index fbec835a72ce8..ec548fca6d901 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/category/test_categorical.py
@@ -5,7 +5,7 @@
 
 from pandas.api.types import CategoricalDtype
 from pandas import Categorical
-from . import base
+from pandas.tests.extension import base
 
 
 def make_data():
diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/extension/decimal/decimal_array.py b/pandas/tests/extension/decimal/decimal_array.py
new file mode 100644
index 0000000000000..f526ac5996a10
--- /dev/null
+++ b/pandas/tests/extension/decimal/decimal_array.py
@@ -0,0 +1,86 @@
+import decimal
+import numbers
+import random
+import sys
+
+import numpy as np
+
+import pandas as pd
+from pandas.core.arrays import ExtensionArray
+from pandas.core.dtypes.base import ExtensionDtype
+
+
+class DecimalDtype(ExtensionDtype):
+    type = decimal.Decimal
+    name = 'decimal'
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+
+class DecimalArray(ExtensionArray):
+    dtype = DecimalDtype()
+
+    def __init__(self, values):
+        values = np.asarray(values, dtype=object)
+
+        self.values = values
+
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self.values[item]
+        else:
+            return type(self)(self.values[item])
+
+    def copy(self, deep=False):
+        if deep:
+            return type(self)(self.values.copy())
+        return type(self)(self)
+
+    def __setitem__(self, key, value):
+        if pd.api.types.is_list_like(value):
+            value = [decimal.Decimal(v) for v in value]
+        else:
+            value = decimal.Decimal(value)
+        self.values[key] = value
+
+    def __len__(self):
+        return len(self.values)
+
+    def __repr__(self):
+        return repr(self.values)
+
+    @property
+    def nbytes(self):
+        n = len(self)
+        if n:
+            return n * sys.getsizeof(self[0])
+        return 0
+
+    def isna(self):
+        return np.array([x.is_nan() for x in self.values])
+
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        mask = indexer == -1
+
+        out = self.values.take(indexer)
+        out[mask] = self._na_value
+
+        return type(self)(out)
+
+    @property
+    def _na_value(self):
+        return decimal.Decimal('NaN')
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([x.values for x in to_concat]))
+
+
+def make_data():
+    return [decimal.Decimal(random.random()) for _ in range(100)]
diff --git a/pandas/tests/extension/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
similarity index 66%
rename from pandas/tests/extension/test_decimal.py
rename to pandas/tests/extension/decimal/test_decimal.py
index 46ca08f94c72b..9f65ae9a35b6c 100644
--- a/pandas/tests/extension/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -1,93 +1,13 @@
 import decimal
-import numbers
-import random
-import sys
 
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
 import pytest
 
-from pandas.core.arrays import ExtensionArray
-from pandas.core.dtypes.base import ExtensionDtype
+from pandas.tests.extension import base
 
-from . import base
-
-
-class DecimalDtype(ExtensionDtype):
-    type = decimal.Decimal
-    name = 'decimal'
-
-    @classmethod
-    def construct_from_string(cls, string):
-        if string == cls.name:
-            return cls()
-        else:
-            raise TypeError("Cannot construct a '{}' from "
-                            "'{}'".format(cls, string))
-
-
-class DecimalArray(ExtensionArray):
-    dtype = DecimalDtype()
-
-    def __init__(self, values):
-        values = np.asarray(values, dtype=object)
-
-        self.values = values
-
-    def __getitem__(self, item):
-        if isinstance(item, numbers.Integral):
-            return self.values[item]
-        else:
-            return type(self)(self.values[item])
-
-    def copy(self, deep=False):
-        if deep:
-            return type(self)(self.values.copy())
-        return type(self)(self)
-
-    def __setitem__(self, key, value):
-        if pd.api.types.is_list_like(value):
-            value = [decimal.Decimal(v) for v in value]
-        else:
-            value = decimal.Decimal(value)
-        self.values[key] = value
-
-    def __len__(self):
-        return len(self.values)
-
-    def __repr__(self):
-        return repr(self.values)
-
-    @property
-    def nbytes(self):
-        n = len(self)
-        if n:
-            return n * sys.getsizeof(self[0])
-        return 0
-
-    def isna(self):
-        return np.array([x.is_nan() for x in self.values])
-
-    def take(self, indexer, allow_fill=True, fill_value=None):
-        mask = indexer == -1
-
-        out = self.values.take(indexer)
-        out[mask] = self._na_value
-
-        return type(self)(out)
-
-    @property
-    def _na_value(self):
-        return decimal.Decimal('NaN')
-
-    @classmethod
-    def _concat_same_type(cls, to_concat):
-        return cls(np.concatenate([x.values for x in to_concat]))
-
-
-def make_data():
-    return [decimal.Decimal(random.random()) for _ in range(100)]
+from .decimal_array import DecimalDtype, DecimalArray, make_data
 
 
 @pytest.fixture
diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/extension/test_json.py b/pandas/tests/extension/json/json_array.py
similarity index 72%
rename from pandas/tests/extension/test_json.py
rename to pandas/tests/extension/json/json_array.py
index ecaa36b6db9c9..90aac93c68f64 100644
--- a/pandas/tests/extension/test_json.py
+++ b/pandas/tests/extension/json/json_array.py
@@ -1,23 +1,15 @@
 import collections
 import itertools
 import numbers
-import operator
 import random
 import string
 import sys
 
 import numpy as np
-import pytest
-
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.arrays import ExtensionArray
 
-from . import base
-
-pytestmark = pytest.mark.skipif(sys.version_info[0] == 2,
-                                reason="Py2 doesn't have a UserDict")
-
 
 class JSONDtype(ExtensionDtype):
     type = collections.Mapping
@@ -105,64 +97,3 @@ def make_data():
     return [collections.UserDict([
         (random.choice(string.ascii_letters), random.randint(0, 100))
         for _ in range(random.randint(0, 10))]) for _ in range(100)]
-
-
-@pytest.fixture
-def dtype():
-    return JSONDtype()
-
-
-@pytest.fixture
-def data():
-    """Length-100 PeriodArray for semantics test."""
-    return JSONArray(make_data())
-
-
-@pytest.fixture
-def data_missing():
-    """Length 2 array with [NA, Valid]"""
-    return JSONArray([{}, {'a': 10}])
-
-
-@pytest.fixture
-def na_value():
-    return {}
-
-
-@pytest.fixture
-def na_cmp():
-    return operator.eq
-
-
-class TestDtype(base.BaseDtypeTests):
-    pass
-
-
-class TestInterface(base.BaseInterfaceTests):
-    pass
-
-
-class TestConstructors(base.BaseConstructorsTests):
-    pass
-
-
-class TestReshaping(base.BaseReshapingTests):
-    pass
-
-
-class TestGetitem(base.BaseGetitemTests):
-    pass
-
-
-class TestMissing(base.BaseMissingTests):
-    pass
-
-
-class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="Unhashable")
-    def test_value_counts(self, all_data, dropna):
-        pass
-
-
-class TestCasting(base.BaseCastingTests):
-    pass
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
new file mode 100644
index 0000000000000..4790dcd973542
--- /dev/null
+++ b/pandas/tests/extension/json/test_json.py
@@ -0,0 +1,73 @@
+import operator
+import sys
+
+import pytest
+
+
+from pandas.tests.extension import base
+
+from .json_array import JSONArray, JSONDtype, make_data
+
+pytestmark = pytest.mark.skipif(sys.version_info[0] == 2,
+                                reason="Py2 doesn't have a UserDict")
+
+
+@pytest.fixture
+def dtype():
+    return JSONDtype()
+
+
+@pytest.fixture
+def data():
+    """Length-100 PeriodArray for semantics test."""
+    return JSONArray(make_data())
+
+
+@pytest.fixture
+def data_missing():
+    """Length 2 array with [NA, Valid]"""
+    return JSONArray([{}, {'a': 10}])
+
+
+@pytest.fixture
+def na_value():
+    return {}
+
+
+@pytest.fixture
+def na_cmp():
+    return operator.eq
+
+
+class TestDtype(base.BaseDtypeTests):
+    pass
+
+
+class TestInterface(base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(base.BaseReshapingTests):
+    pass
+
+
+class TestGetitem(base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(base.BaseMissingTests):
+    pass
+
+
+class TestMethods(base.BaseMethodsTests):
+    @pytest.mark.skip(reason="Unhashable")
+    def test_value_counts(self, all_data, dropna):
+        pass
+
+
+class TestCasting(base.BaseCastingTests):
+    pass

From 1ea74dac9eb850a256d0023d5b55b7388681102f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 09:00:14 -0600
Subject: [PATCH 116/119] Revert index change

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ec877ec31e49c..1b781be8fa2b3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2829,7 +2829,7 @@ def reindexer(value):
 
             # turn me into an ndarray
             value = _sanitize_index(value, self.index, copy=False)
-            if not isinstance(value, (np.ndarray, Index, ExtensionArray)):
+            if not isinstance(value, (np.ndarray, Index)):
                 if isinstance(value, list) and len(value) > 0:
                     value = maybe_convert_platform(value)
                 else:

From 0c41a341aaff31248a5b34511f1dfbc81b7de65f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 09:00:31 -0600
Subject: [PATCH 117/119] Copy changes

---
 pandas/core/series.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index c9df1d60895d2..87132c5d2f313 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -180,10 +180,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                     name = data.name
 
                 if dtype is not None:
+                    # astype copies
                     data = data.astype(dtype)
-
-                # need to copy to avoid aliasing issues
-                data = data._values.copy()
+                else:
+                    # need to copy to avoid aliasing issues
+                    data = data._values.copy()
                 copy = False
 
             elif isinstance(data, np.ndarray):

From 009beceb3977652e2bcee443c5216cb54451cabb Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 09:17:57 -0600
Subject: [PATCH 118/119] Simplify EA implementation names

comments for object vs. str missing values
---
 pandas/core/dtypes/missing.py                                 | 2 ++
 pandas/tests/extension/decimal/{decimal_array.py => array.py} | 0
 pandas/tests/extension/decimal/test_decimal.py                | 2 +-
 pandas/tests/extension/json/{json_array.py => array.py}       | 0
 pandas/tests/extension/json/test_json.py                      | 2 +-
 5 files changed, 4 insertions(+), 2 deletions(-)
 rename pandas/tests/extension/decimal/{decimal_array.py => array.py} (100%)
 rename pandas/tests/extension/json/{json_array.py => array.py} (100%)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 36dbb0ee4b98f..01c88c269e7e0 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -145,8 +145,10 @@ def _isna_ndarraylike(obj):
         shape = values.shape
 
         if is_string_like_dtype(dtype):
+            # object array of strings
             result = np.zeros(values.shape, dtype=bool)
         else:
+            # object array of non-strings
             result = np.empty(shape, dtype=bool)
             vec = libmissing.isnaobj(values.ravel())
             result[...] = vec.reshape(shape)
diff --git a/pandas/tests/extension/decimal/decimal_array.py b/pandas/tests/extension/decimal/array.py
similarity index 100%
rename from pandas/tests/extension/decimal/decimal_array.py
rename to pandas/tests/extension/decimal/array.py
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 9f65ae9a35b6c..7b4d079ecad87 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -7,7 +7,7 @@
 
 from pandas.tests.extension import base
 
-from .decimal_array import DecimalDtype, DecimalArray, make_data
+from .array import DecimalDtype, DecimalArray, make_data
 
 
 @pytest.fixture
diff --git a/pandas/tests/extension/json/json_array.py b/pandas/tests/extension/json/array.py
similarity index 100%
rename from pandas/tests/extension/json/json_array.py
rename to pandas/tests/extension/json/array.py
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 4790dcd973542..e0721bb1d8d1a 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -6,7 +6,7 @@
 
 from pandas.tests.extension import base
 
-from .json_array import JSONArray, JSONDtype, make_data
+from .array import JSONArray, JSONDtype, make_data
 
 pytestmark = pytest.mark.skipif(sys.version_info[0] == 2,
                                 reason="Py2 doesn't have a UserDict")

From ea5562b99d55062b2bb0a5b359bd5375adb3a46e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 22 Feb 2018 10:07:34 -0600
Subject: [PATCH 119/119] Linting

---
 pandas/core/series.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 87132c5d2f313..12865bfe44a3b 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -29,7 +29,6 @@
     is_iterator,
     is_dict_like,
     is_scalar,
-    is_sparse,
     _is_unorderable_exception,
     _ensure_platform_int,
     pandas_dtype)