From 1231b9e2623fb0c68fb0c1124e399b0a18d74ffd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Feb 2018 12:01:26 -0600 Subject: [PATCH 1/5] API: Added ExtensionArray constructors Adds two new (private, but part of the interface) constructors to EA. Closes #19906 --- pandas/core/arrays/base.py | 49 +++++++++++++++++++++++++ pandas/tests/extension/decimal/array.py | 8 ++++ pandas/tests/extension/json/array.py | 16 ++++++-- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cec881394a021..78b598a2b1a78 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -18,6 +18,8 @@ class ExtensionArray(object): The interface includes the following abstract methods that must be implemented by subclasses: + * _from_extension_array + * _from_scalars * __getitem__ * __len__ * dtype @@ -56,6 +58,53 @@ class ExtensionArray(object): # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. _typ = 'extension' + + # ------------------------------------------------------------------------ + # Constructors + # ------------------------------------------------------------------------ + @classmethod + def _from_extension_array(cls, array, copy=True): + """Construct a new ExtensionArray from an existing instance. + + Parameters + ---------- + array : ExtensionArray + An extension array of the same type as cls. + copy : bool, default True + Whether a copy should be made using ``array.copy``. Note that + even if ``copy=False`` there's no guarantee that the underlying + data of the two arrays is the same. + + Returns + ------- + ExtensionArray + + See Also + -------- + _from_scalars + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_scalars(cls, scalars): + """Construct a new ExtensionArray from a sequence of scalars. + + Parameters + ---------- + scalars : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type``. + + Returns + ------- + ExtensionArray + + See Also + -------- + _from_extension_array + """ + raise AbstractMethodError(cls) + # ------------------------------------------------------------------------ # Must be a Sequence # ------------------------------------------------------------------------ diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 8b2eaadeca99e..bf32d5f8e36da 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -32,6 +32,14 @@ def __init__(self, values): self.values = values + @classmethod + def _from_extension_array(cls, array, copy=True): + return cls(array) + + @classmethod + def _from_scalars(cls, scalars): + return cls(scalars) + def __getitem__(self, item): if isinstance(item, numbers.Integral): return self.values[item] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 90aac93c68f64..28332527957b0 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -33,13 +33,21 @@ def __init__(self, values): raise TypeError self.data = values + @classmethod + def _from_extension_array(cls, array, copy=True): + return cls(array) + + @classmethod + def _from_scalars(cls, scalars): + return cls(scalars) + def __getitem__(self, item): if isinstance(item, numbers.Integral): return self.data[item] elif isinstance(item, np.ndarray) and item.dtype == 'bool': - return type(self)([x for x, m in zip(self, item) if m]) + return self._from_scalars([x for x, m in zip(self, item) if m]) else: - return type(self)(self.data[item]) + return self._from_extension_array(self.data[item]) def __setitem__(self, key, value): if isinstance(key, numbers.Integral): @@ -77,10 +85,10 @@ def isna(self): def take(self, indexer, allow_fill=True, fill_value=None): output = [self.data[loc] if loc != -1 else self._na_value for loc in indexer] - return type(self)(output) + return self._from_scalars(output) def copy(self, deep=False): - return type(self)(self.data[:]) + return self._from_extension_array(self.data[:]) @property def _na_value(self): From a5e1a56f9495da9d6f7b066d45821d65f4d4938c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Feb 2018 20:42:24 -0600 Subject: [PATCH 2/5] Removed _from_extension_array --- pandas/core/arrays/base.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 78b598a2b1a78..2b4723099e9e1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -18,7 +18,6 @@ class ExtensionArray(object): The interface includes the following abstract methods that must be implemented by subclasses: - * _from_extension_array * _from_scalars * __getitem__ * __len__ @@ -62,29 +61,6 @@ class ExtensionArray(object): # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ - @classmethod - def _from_extension_array(cls, array, copy=True): - """Construct a new ExtensionArray from an existing instance. - - Parameters - ---------- - array : ExtensionArray - An extension array of the same type as cls. - copy : bool, default True - Whether a copy should be made using ``array.copy``. Note that - even if ``copy=False`` there's no guarantee that the underlying - data of the two arrays is the same. - - Returns - ------- - ExtensionArray - - See Also - -------- - _from_scalars - """ - raise AbstractMethodError(cls) - @classmethod def _from_scalars(cls, scalars): """Construct a new ExtensionArray from a sequence of scalars. @@ -98,10 +74,6 @@ def _from_scalars(cls, scalars): Returns ------- ExtensionArray - - See Also - -------- - _from_extension_array """ raise AbstractMethodError(cls) From 10170882b351c60bd1874967a0e3afc1cb92c705 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Feb 2018 20:42:34 -0600 Subject: [PATCH 3/5] Added test for _from_scalars --- pandas/core/arrays/categorical.py | 4 ++++ pandas/tests/extension/base/constructors.py | 5 +++++ pandas/tests/extension/decimal/array.py | 4 ---- pandas/tests/extension/json/array.py | 8 ++------ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6eeabf0148d0..4afced39ff200 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -364,6 +364,10 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, self._dtype = self._dtype.update_dtype(dtype) self._codes = coerce_indexer_dtype(codes, dtype.categories) + @classmethod + def _from_scalars(cls, scalars): + return cls(scalars) + @property def categories(self): """The categories of this categorical. diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 2d5d747aec5a7..142bc92e7ee1e 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -9,6 +9,11 @@ class BaseConstructorsTests(BaseExtensionTests): + def test_array_from_scalars(self, data): + scalars = [data[0], data[1], data[2]] + result = data._from_scalars(scalars) + assert isinstance(result, type(data)) + def test_series_constructor(self, data): result = pd.Series(data) assert result.dtype == data.dtype diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index bf32d5f8e36da..ea72f28e00428 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -32,10 +32,6 @@ def __init__(self, values): self.values = values - @classmethod - def _from_extension_array(cls, array, copy=True): - return cls(array) - @classmethod def _from_scalars(cls, scalars): return cls(scalars) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 28332527957b0..7aa7cbc7a3830 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -33,10 +33,6 @@ def __init__(self, values): raise TypeError self.data = values - @classmethod - def _from_extension_array(cls, array, copy=True): - return cls(array) - @classmethod def _from_scalars(cls, scalars): return cls(scalars) @@ -47,7 +43,7 @@ def __getitem__(self, item): elif isinstance(item, np.ndarray) and item.dtype == 'bool': return self._from_scalars([x for x, m in zip(self, item) if m]) else: - return self._from_extension_array(self.data[item]) + return type(self)(self.data[item]) def __setitem__(self, key, value): if isinstance(key, numbers.Integral): @@ -88,7 +84,7 @@ def take(self, indexer, allow_fill=True, fill_value=None): return self._from_scalars(output) def copy(self, deep=False): - return self._from_extension_array(self.data[:]) + return type(self)(self.data[:]) @property def _na_value(self): From 7cd22ce4fae69d098406e214c736f3346abfad33 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 06:32:25 -0600 Subject: [PATCH 4/5] Rename --- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/tests/extension/base/constructors.py | 2 +- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/extension/json/array.py | 8 +++++--- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 2b4723099e9e1..f950ab6caaf5c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -62,7 +62,7 @@ class ExtensionArray(object): # Constructors # ------------------------------------------------------------------------ @classmethod - def _from_scalars(cls, scalars): + def _constructor_from_sequence(cls, scalars): """Construct a new ExtensionArray from a sequence of scalars. Parameters diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4afced39ff200..e23dc3b3e5b89 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -365,7 +365,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, self._codes = coerce_indexer_dtype(codes, dtype.categories) @classmethod - def _from_scalars(cls, scalars): + def _constructor_from_sequence(cls, scalars): return cls(scalars) @property diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 142bc92e7ee1e..4ac04d71338fd 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -11,7 +11,7 @@ class BaseConstructorsTests(BaseExtensionTests): def test_array_from_scalars(self, data): scalars = [data[0], data[1], data[2]] - result = data._from_scalars(scalars) + result = data._constructor_from_sequence(scalars) assert isinstance(result, type(data)) def test_series_constructor(self, data): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index ea72f28e00428..736556e4be20d 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -33,7 +33,7 @@ def __init__(self, values): self.values = values @classmethod - def _from_scalars(cls, scalars): + def _constructor_from_sequence(cls, scalars): return cls(scalars) def __getitem__(self, item): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 7aa7cbc7a3830..21addf9d1549f 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -34,14 +34,16 @@ def __init__(self, values): self.data = values @classmethod - def _from_scalars(cls, scalars): + def _constructor_from_sequence(cls, scalars): return cls(scalars) def __getitem__(self, item): if isinstance(item, numbers.Integral): return self.data[item] elif isinstance(item, np.ndarray) and item.dtype == 'bool': - return self._from_scalars([x for x, m in zip(self, item) if m]) + return self._constructor_from_sequence([ + x for x, m in zip(self, item) if m + ]) else: return type(self)(self.data[item]) @@ -81,7 +83,7 @@ def isna(self): def take(self, indexer, allow_fill=True, fill_value=None): output = [self.data[loc] if loc != -1 else self._na_value for loc in indexer] - return self._from_scalars(output) + return self._constructor_from_sequence(output) def copy(self, deep=False): return type(self)(self.data[:]) From 8406f119ef8daebcd8e4eccc7d7b3cc21643910c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 06:51:12 -0600 Subject: [PATCH 5/5] DOC: last one --- pandas/core/arrays/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index f950ab6caaf5c..37074b563efbd 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -18,7 +18,7 @@ class ExtensionArray(object): The interface includes the following abstract methods that must be implemented by subclasses: - * _from_scalars + * _constructor_from_sequence * __getitem__ * __len__ * dtype @@ -70,7 +70,6 @@ def _constructor_from_sequence(cls, scalars): scalars : Sequence Each element will be an instance of the scalar type for this array, ``cls.dtype.type``. - Returns ------- ExtensionArray