diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 8a515661920f3..51555c57b2288 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -3,8 +3,8 @@ register_index_accessor, register_series_accessor) from pandas.core.algorithms import take # noqa -from pandas.core.arrays.base import (ExtensionArray, # noqa - ExtensionScalarOpsMixin) +from pandas.core.arrays import (ExtensionArray, # noqa + ExtensionScalarOpsMixin) from pandas.core.dtypes.dtypes import ( # noqa ExtensionDtype, register_extension_dtype ) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 713a5b1120beb..59c162251c58f 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -56,6 +56,8 @@ def load_reduce(self): # If classes are moved, provide compat here. _class_locations_map = { + ('pandas.core.sparse.array', 'SparseArray'): + ('pandas.core.arrays', 'SparseArray'), # 15477 ('pandas.core.base', 'FrozenNDArray'): @@ -88,7 +90,7 @@ def load_reduce(self): # 15998 top-level dirs moving ('pandas.sparse.array', 'SparseArray'): - ('pandas.core.sparse.array', 'SparseArray'), + ('pandas.core.arrays.sparse', 'SparseArray'), ('pandas.sparse.series', 'SparseSeries'): ('pandas.core.sparse.series', 'SparseSeries'), ('pandas.sparse.frame', 'SparseDataFrame'): diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 29f258bf1b29e..0537b79541641 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -8,3 +8,4 @@ from .timedeltas import TimedeltaArrayMixin # noqa from .integer import ( # noqa IntegerArray, integer_array) +from .sparse import SparseArray # noqa diff --git a/pandas/core/sparse/array.py b/pandas/core/arrays/sparse.py similarity index 85% rename from pandas/core/sparse/array.py rename to pandas/core/arrays/sparse.py index 15b5118db2230..f5e54e4425444 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/arrays/sparse.py @@ -4,6 +4,7 @@ from __future__ import division # pylint: disable=E1101,E1103,W0231 +import re import operator import numbers import numpy as np @@ -16,8 +17,10 @@ from pandas.errors import PerformanceWarning from pandas.compat.numpy import function as nv -from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.generic import ( ABCSparseSeries, ABCSeries, ABCIndexClass ) @@ -45,7 +48,252 @@ import pandas.core.algorithms as algos import pandas.io.formats.printing as printing -from pandas.core.sparse.dtype import SparseDtype + +# ---------------------------------------------------------------------------- +# Dtype + +@register_extension_dtype +class SparseDtype(ExtensionDtype): + """ + Dtype for data stored in :class:`SparseArray`. + + This dtype implements the pandas ExtensionDtype interface. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 + The dtype of the underlying array storing the non-fill value values. + fill_value : scalar, optional. + The scalar value not stored in the SparseArray. By default, this + depends on `dtype`. + + ========== ========== + dtype na_value + ========== ========== + float ``np.nan`` + int ``0`` + bool ``False`` + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + ========== ========== + + The default value may be overridden by specifying a `fill_value`. + """ + # We include `_is_na_fill_value` in the metadata to avoid hash collisions + # between SparseDtype(float, 0.0) and SparseDtype(float, nan). + # Without is_na_fill_value in the comparison, those would be equal since + # hash(nan) is (sometimes?) 0. + _metadata = ('_dtype', '_fill_value', '_is_na_fill_value') + + def __init__(self, dtype=np.float64, fill_value=None): + # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None + from pandas.core.dtypes.missing import na_value_for_dtype + from pandas.core.dtypes.common import ( + pandas_dtype, is_string_dtype, is_scalar + ) + + if isinstance(dtype, type(self)): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + dtype = pandas_dtype(dtype) + if is_string_dtype(dtype): + dtype = np.dtype('object') + + if fill_value is None: + fill_value = na_value_for_dtype(dtype) + + if not is_scalar(fill_value): + raise ValueError("fill_value must be a scalar. Got {} " + "instead".format(fill_value)) + self._dtype = dtype + self._fill_value = fill_value + + def __hash__(self): + # Python3 doesn't inherit __hash__ when a base class overrides + # __eq__, so we explicitly do it here. + return super(SparseDtype, self).__hash__() + + def __eq__(self, other): + # We have to override __eq__ to handle NA values in _metadata. + # The base class does simple == checks, which fail for NA. + if isinstance(other, compat.string_types): + try: + other = self.construct_from_string(other) + except TypeError: + return False + + if isinstance(other, type(self)): + subtype = self.subtype == other.subtype + if self._is_na_fill_value: + # this case is complicated by two things: + # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) + # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) + # i.e. we want to treat any floating-point NaN as equal, but + # not a floating-point NaN and a datetime NaT. + fill_value = ( + other._is_na_fill_value and + isinstance(self.fill_value, type(other.fill_value)) or + isinstance(other.fill_value, type(self.fill_value)) + ) + else: + fill_value = self.fill_value == other.fill_value + + return subtype and fill_value + return False + + @property + def fill_value(self): + """ + The fill value of the array. + + Converting the SparseArray to a dense ndarray will fill the + array with this value. + + .. warning:: + + It's possible to end up with a SparseArray that has ``fill_value`` + values in ``sp_values``. This can occur, for example, when setting + ``SparseArray.fill_value`` directly. + """ + return self._fill_value + + @property + def _is_na_fill_value(self): + from pandas.core.dtypes.missing import isna + return isna(self.fill_value) + + @property + def _is_numeric(self): + from pandas.core.dtypes.common import is_object_dtype + return not is_object_dtype(self.subtype) + + @property + def _is_boolean(self): + from pandas.core.dtypes.common import is_bool_dtype + return is_bool_dtype(self.subtype) + + @property + def kind(self): + return self.subtype.kind + + @property + def type(self): + return self.subtype.type + + @property + def subtype(self): + return self._dtype + + @property + def name(self): + return 'Sparse[{}, {}]'.format(self.subtype.name, self.fill_value) + + def __repr__(self): + return self.name + + @classmethod + def construct_array_type(cls): + return SparseArray + + @classmethod + def construct_from_string(cls, string): + """ + Construct a SparseDtype from a string form. + + Parameters + ---------- + string : str + Can take the following forms. + + string dtype + ================ ============================ + 'int' SparseDtype[np.int64, 0] + 'Sparse' SparseDtype[np.float64, nan] + 'Sparse[int]' SparseDtype[np.int64, 0] + 'Sparse[int, 0]' SparseDtype[np.int64, 0] + ================ ============================ + + It is not possible to specify non-default fill values + with a string. An argument like ``'Sparse[int, 1]'`` + will raise a ``TypeError`` because the default fill value + for integers is 0. + + Returns + ------- + SparseDtype + """ + msg = "Could not construct SparseDtype from '{}'".format(string) + if string.startswith("Sparse"): + try: + sub_type, has_fill_value = cls._parse_subtype(string) + result = SparseDtype(sub_type) + except Exception: + raise TypeError(msg) + else: + msg = ("Could not construct SparseDtype from '{}'.\n\nIt " + "looks like the fill_value in the string is not " + "the default for the dtype. Non-default fill_values " + "are not supported. Use the 'SparseDtype()' " + "constructor instead.") + if has_fill_value and str(result) != string: + raise TypeError(msg.format(string)) + return result + else: + raise TypeError(msg) + + @staticmethod + def _parse_subtype(dtype): + """ + Parse a string to get the subtype + + Parameters + ---------- + dtype : str + A string like + + * Sparse[subtype] + * Sparse[subtype, fill_value] + + Returns + ------- + subtype : str + + Raises + ------ + ValueError + When the subtype cannot be extracted. + """ + xpr = re.compile( + r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$" + ) + m = xpr.match(dtype) + has_fill_value = False + if m: + subtype = m.groupdict()['subtype'] + has_fill_value = m.groupdict()['fill_value'] or has_fill_value + elif dtype == "Sparse": + subtype = 'float64' + else: + raise ValueError("Cannot parse {}".format(dtype)) + return subtype, has_fill_value + + @classmethod + def is_dtype(cls, dtype): + dtype = getattr(dtype, 'dtype', dtype) + if (isinstance(dtype, compat.string_types) and + dtype.startswith("Sparse")): + sub_type, _ = cls._parse_subtype(dtype) + dtype = np.dtype(sub_type) + elif isinstance(dtype, cls): + return True + return isinstance(dtype, np.dtype) or dtype == 'Sparse' + +# ---------------------------------------------------------------------------- +# Array _sparray_doc_kwargs = dict(klass='SparseArray') diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 7a4e7022f7819..22da546355df6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1,5 +1,4 @@ """ common type operations """ - import numpy as np from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) @@ -12,7 +11,6 @@ PeriodDtype, IntervalDtype, PandasExtensionDtype, ExtensionDtype, _pandas_registry) -from pandas.core.sparse.dtype import SparseDtype from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, @@ -23,7 +21,6 @@ is_file_like, is_re, is_re_compilable, is_sequence, is_nested_list_like, is_named_tuple, is_array_like, is_decimal, is_complex, is_interval) - _POSSIBLY_CAST_DTYPES = {np.dtype(t).name for t in ['O', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64']} @@ -181,7 +178,7 @@ def is_sparse(arr): >>> is_sparse(bsr_matrix([1, 2, 3])) False """ - from pandas.core.sparse.dtype import SparseDtype + from pandas.core.arrays.sparse import SparseDtype dtype = getattr(arr, 'dtype', arr) return isinstance(dtype, SparseDtype) @@ -1928,10 +1925,13 @@ def _get_dtype_type(arr_or_dtype): elif is_interval_dtype(arr_or_dtype): return Interval return _get_dtype_type(np.dtype(arr_or_dtype)) - elif isinstance(arr_or_dtype, (ABCSparseSeries, ABCSparseArray, - SparseDtype)): - dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) - return dtype.type + else: + from pandas.core.arrays.sparse import SparseDtype + if isinstance(arr_or_dtype, (ABCSparseSeries, + ABCSparseArray, + SparseDtype)): + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return dtype.type try: return arr_or_dtype.dtype.type except AttributeError: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ac824708245d2..91fbaf736aae8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -556,7 +556,7 @@ def _concat_sparse(to_concat, axis=0, typs=None): a single array, preserving the combined dtypes """ - from pandas.core.sparse.array import SparseArray + from pandas.core.arrays import SparseArray fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e9be7a3e9afb8..064a1b72eb4c8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1763,7 +1763,7 @@ def to_sparse(self, fill_value=None, kind='block'): >>> type(sdf) """ - from pandas.core.sparse.frame import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame(self._series, index=self.index, columns=self.columns, default_kind=kind, default_fill_value=fill_value) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3667d7c5e39dc..dd0bb1ab8bacb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -29,7 +29,7 @@ from pandas.core.base import PandasObject import pandas.core.algorithms as algos -from pandas.core.sparse.array import _maybe_to_sparse +from pandas.core.arrays.sparse import _maybe_to_sparse from pandas.core.index import Index, MultiIndex, ensure_index from pandas.core.indexing import maybe_convert_indices diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 640b2812d3e85..8d1ed6486a456 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2116,7 +2116,7 @@ def _sparse_series_op(left, right, op, name): new_index = left.index new_name = get_op_result_name(left, right) - from pandas.core.sparse.array import _sparse_array_op + from pandas.core.arrays.sparse import _sparse_array_op lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name) result = _sparse_array_op(lvalues, rvalues, op, name) return left._constructor(result, index=new_index, name=new_name) @@ -2130,7 +2130,7 @@ def _arith_method_SPARSE_ARRAY(cls, op, special): op_name = _get_op_name(op, special) def wrapper(self, other): - from pandas.core.sparse.array import ( + from pandas.core.arrays.sparse.array import ( SparseArray, _sparse_array_op, _wrap_result, _get_fill) if isinstance(other, np.ndarray): if len(self) != len(other): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 7bee1ba0e2eb2..03b77f0e787f0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -19,7 +19,7 @@ from pandas.core.frame import DataFrame from pandas.core.sparse.api import SparseDataFrame, SparseSeries -from pandas.core.sparse.array import SparseArray +from pandas.core.arrays import SparseArray from pandas._libs.sparse import IntIndex from pandas.core.arrays import Categorical diff --git a/pandas/core/series.py b/pandas/core/series.py index 4f6bca93d377b..b4566ebd36d13 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1384,7 +1384,7 @@ def to_sparse(self, kind='block', fill_value=None): """ # TODO: deprecate from pandas.core.sparse.series import SparseSeries - from pandas.core.sparse.array import SparseArray + from pandas.core.arrays import SparseArray values = SparseArray(self, kind=kind, fill_value=fill_value) return SparseSeries( diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py index 0fb0396e34669..e3be241bcdd70 100644 --- a/pandas/core/sparse/api.py +++ b/pandas/core/sparse/api.py @@ -1,6 +1,5 @@ # pylint: disable=W0611 # flake8: noqa -from pandas.core.sparse.array import SparseArray +from pandas.core.arrays.sparse import SparseArray, SparseDtype from pandas.core.sparse.series import SparseSeries from pandas.core.sparse.frame import SparseDataFrame -from pandas.core.sparse.dtype import SparseDtype diff --git a/pandas/core/sparse/dtype.py b/pandas/core/sparse/dtype.py deleted file mode 100644 index 7f99bf8b58847..0000000000000 --- a/pandas/core/sparse/dtype.py +++ /dev/null @@ -1,249 +0,0 @@ -import re - -import numpy as np - -from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.dtypes import register_extension_dtype -from pandas import compat - - -@register_extension_dtype -class SparseDtype(ExtensionDtype): - """ - Dtype for data stored in :class:`SparseArray`. - - This dtype implements the pandas ExtensionDtype interface. - - .. versionadded:: 0.24.0 - - Parameters - ---------- - dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 - The dtype of the underlying array storing the non-fill value values. - fill_value : scalar, optional. - The scalar value not stored in the SparseArray. By default, this - depends on `dtype`. - - ========== ========== - dtype na_value - ========== ========== - float ``np.nan`` - int ``0`` - bool ``False`` - datetime64 ``pd.NaT`` - timedelta64 ``pd.NaT`` - ========== ========== - - The default value may be overridden by specifying a `fill_value`. - """ - # We include `_is_na_fill_value` in the metadata to avoid hash collisions - # between SparseDtype(float, 0.0) and SparseDtype(float, nan). - # Without is_na_fill_value in the comparison, those would be equal since - # hash(nan) is (sometimes?) 0. - _metadata = ('_dtype', '_fill_value', '_is_na_fill_value') - - def __init__(self, dtype=np.float64, fill_value=None): - # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None - from pandas.core.dtypes.missing import na_value_for_dtype - from pandas.core.dtypes.common import ( - pandas_dtype, is_string_dtype, is_scalar - ) - - if isinstance(dtype, type(self)): - if fill_value is None: - fill_value = dtype.fill_value - dtype = dtype.subtype - - dtype = pandas_dtype(dtype) - if is_string_dtype(dtype): - dtype = np.dtype('object') - - if fill_value is None: - fill_value = na_value_for_dtype(dtype) - - if not is_scalar(fill_value): - raise ValueError("fill_value must be a scalar. Got {} " - "instead".format(fill_value)) - self._dtype = dtype - self._fill_value = fill_value - - def __hash__(self): - # Python3 doesn't inherit __hash__ when a base class overrides - # __eq__, so we explicitly do it here. - return super(SparseDtype, self).__hash__() - - def __eq__(self, other): - # We have to override __eq__ to handle NA values in _metadata. - # The base class does simple == checks, which fail for NA. - if isinstance(other, compat.string_types): - try: - other = self.construct_from_string(other) - except TypeError: - return False - - if isinstance(other, type(self)): - subtype = self.subtype == other.subtype - if self._is_na_fill_value: - # this case is complicated by two things: - # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) - # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) - # i.e. we want to treat any floating-point NaN as equal, but - # not a floating-point NaN and a datetime NaT. - fill_value = ( - other._is_na_fill_value and - isinstance(self.fill_value, type(other.fill_value)) or - isinstance(other.fill_value, type(self.fill_value)) - ) - else: - fill_value = self.fill_value == other.fill_value - - return subtype and fill_value - return False - - @property - def fill_value(self): - """ - The fill value of the array. - - Converting the SparseArray to a dense ndarray will fill the - array with this value. - - .. warning:: - - It's possible to end up with a SparseArray that has ``fill_value`` - values in ``sp_values``. This can occur, for example, when setting - ``SparseArray.fill_value`` directly. - """ - return self._fill_value - - @property - def _is_na_fill_value(self): - from pandas.core.dtypes.missing import isna - return isna(self.fill_value) - - @property - def _is_numeric(self): - from pandas.core.dtypes.common import is_object_dtype - return not is_object_dtype(self.subtype) - - @property - def _is_boolean(self): - from pandas.core.dtypes.common import is_bool_dtype - return is_bool_dtype(self.subtype) - - @property - def kind(self): - return self.subtype.kind - - @property - def type(self): - return self.subtype.type - - @property - def subtype(self): - return self._dtype - - @property - def name(self): - return 'Sparse[{}, {}]'.format(self.subtype.name, self.fill_value) - - def __repr__(self): - return self.name - - @classmethod - def construct_array_type(cls): - from .array import SparseArray - return SparseArray - - @classmethod - def construct_from_string(cls, string): - """ - Construct a SparseDtype from a string form. - - Parameters - ---------- - string : str - Can take the following forms. - - string dtype - ================ ============================ - 'int' SparseDtype[np.int64, 0] - 'Sparse' SparseDtype[np.float64, nan] - 'Sparse[int]' SparseDtype[np.int64, 0] - 'Sparse[int, 0]' SparseDtype[np.int64, 0] - ================ ============================ - - It is not possible to specify non-default fill values - with a string. An argument like ``'Sparse[int, 1]'`` - will raise a ``TypeError`` because the default fill value - for integers is 0. - - Returns - ------- - SparseDtype - """ - msg = "Could not construct SparseDtype from '{}'".format(string) - if string.startswith("Sparse"): - try: - sub_type, has_fill_value = cls._parse_subtype(string) - result = SparseDtype(sub_type) - except Exception: - raise TypeError(msg) - else: - msg = ("Could not construct SparseDtype from '{}'.\n\nIt " - "looks like the fill_value in the string is not " - "the default for the dtype. Non-default fill_values " - "are not supported. Use the 'SparseDtype()' " - "constructor instead.") - if has_fill_value and str(result) != string: - raise TypeError(msg.format(string)) - return result - else: - raise TypeError(msg) - - @staticmethod - def _parse_subtype(dtype): - """ - Parse a string to get the subtype - - Parameters - ---------- - dtype : str - A string like - - * Sparse[subtype] - * Sparse[subtype, fill_value] - - Returns - ------- - subtype : str - - Raises - ------ - ValueError - When the subtype cannot be extracted. - """ - xpr = re.compile( - r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$" - ) - m = xpr.match(dtype) - has_fill_value = False - if m: - subtype = m.groupdict()['subtype'] - has_fill_value = m.groupdict()['fill_value'] or has_fill_value - elif dtype == "Sparse": - subtype = 'float64' - else: - raise ValueError("Cannot parse {}".format(dtype)) - return subtype, has_fill_value - - @classmethod - def is_dtype(cls, dtype): - dtype = getattr(dtype, 'dtype', dtype) - if (isinstance(dtype, compat.string_types) and - dtype.startswith("Sparse")): - sub_type, _ = cls._parse_subtype(dtype) - dtype = np.dtype(sub_type) - elif isinstance(dtype, cls): - return True - return isinstance(dtype, np.dtype) or dtype == 'Sparse' diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 36b6ea089f459..2ed275e3bbd2d 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -22,8 +22,8 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) import pandas.core.generic as generic -from pandas.core.sparse.series import SparseSeries, SparseArray -from pandas.core.sparse.dtype import SparseDtype +from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.sparse.series import SparseSeries from pandas._libs.sparse import BlockIndex, get_blocks from pandas.util._decorators import Appender import pandas.core.ops as ops diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index eebf26bbb9708..35ddd623878d0 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -24,7 +24,7 @@ import pandas._libs.index as libindex from pandas.util._decorators import Appender, Substitution -from pandas.core.sparse.array import ( +from pandas.core.arrays import ( SparseArray, ) from pandas._libs.sparse import BlockIndex, IntIndex diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e41885d525653..6a2cfd4d4a7b3 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,7 +2,6 @@ data hash pandas / numpy objects """ import itertools - import numpy as np from pandas._libs import hashing, tslibs from pandas.core.dtypes.generic import ( diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 638b76c780852..135f9e89eaaef 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -70,7 +70,7 @@ from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager, make_block, _safe_reshape from pandas.core.sparse.api import SparseSeries, SparseDataFrame -from pandas.core.sparse.array import BlockIndex, IntIndex +from pandas.core.arrays.sparse import BlockIndex, IntIndex from pandas.io.common import get_filepath_or_buffer, _stringify_path from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index de193db846c50..9cceff30c9e0e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -50,7 +50,7 @@ from pandas.core.internals import (BlockManager, make_block, _block2d_to_blocknd, _factor_indexer, _block_shape) -from pandas.core.sparse.array import BlockIndex, IntIndex +from pandas.core.arrays.sparse import BlockIndex, IntIndex from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing diff --git a/pandas/tests/arrays/sparse/__init__.py b/pandas/tests/arrays/sparse/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py similarity index 100% rename from pandas/tests/sparse/test_arithmetics.py rename to pandas/tests/arrays/sparse/test_arithmetics.py diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py similarity index 100% rename from pandas/tests/sparse/test_array.py rename to pandas/tests/arrays/sparse/test_array.py diff --git a/pandas/tests/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py similarity index 100% rename from pandas/tests/sparse/test_dtype.py rename to pandas/tests/arrays/sparse/test_dtype.py diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py similarity index 99% rename from pandas/tests/sparse/test_libsparse.py rename to pandas/tests/arrays/sparse/test_libsparse.py index 3b90d93cee7a4..3d867cdda1d42 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index +from pandas.core.arrays.sparse import IntIndex, BlockIndex, _make_index import pandas._libs.sparse as splib TEST_LENGTH = 20 diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 11bf1cb6e9f05..ca0435141c2e2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -2,8 +2,7 @@ import pandas as pd import numpy as np -from pandas.core.sparse.dtype import SparseDtype -from pandas import SparseArray +from pandas import SparseArray, SparseDtype from pandas.errors import PerformanceWarning from pandas.tests.extension import base import pandas.util.testing as tm diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index f27600d830a93..bf7247caa5d4a 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -223,13 +223,15 @@ def test_concat_empty_series_dtypes(self): result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64')]) # TODO: release-note: concat sparse dtype - assert result.dtype == pd.core.sparse.dtype.SparseDtype(np.float64) + expected = pd.core.sparse.api.SparseDtype(np.float64) + assert result.dtype == expected assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='object')]) # TODO: release-note: concat sparse dtype - assert result.dtype == pd.core.sparse.dtype.SparseDtype('object') + expected = pd.core.sparse.api.SparseDtype('object') + assert result.dtype == expected assert result.ftype == 'object:sparse' def test_combine_first_dt64(self): diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index d539dfa456740..70e44a9d2d40f 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -2,7 +2,7 @@ # pylint: disable-msg=E1101,W0612 import numpy as np import pandas as pd -from pandas.core.sparse.dtype import SparseDtype +from pandas import SparseDtype import pandas.util.testing as tm diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index a1ec8314841e3..7a8b5b5ad407b 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -18,11 +18,10 @@ from pandas.compat import range, PY36 from pandas.core.reshape.util import cartesian_product -from pandas.core.sparse.api import SparseDtype import pandas.core.sparse.frame as spf from pandas._libs.sparse import BlockIndex, IntIndex -from pandas.core.sparse.api import SparseSeries +from pandas import SparseSeries, SparseDtype from pandas.tests.series.test_api import SharedWithSparse