-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: Move boxing logic to BlockManager #12752
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,8 @@ | |
from collections import defaultdict | ||
|
||
import numpy as np | ||
from numpy import percentile as _quantile | ||
|
||
from pandas.core.base import PandasObject | ||
|
||
from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE, | ||
|
@@ -131,6 +133,8 @@ def get_values(self, dtype=None): | |
return an internal format, currently just the ndarray | ||
this is often overriden to handle to_dense like operations | ||
""" | ||
if com.is_object_dtype(dtype): | ||
return self.values.astype(object) | ||
return self.values | ||
|
||
def to_dense(self): | ||
|
@@ -141,6 +145,10 @@ def to_object_block(self, mgr): | |
values = self.get_values(dtype=object) | ||
return self.make_block(values, klass=ObjectBlock) | ||
|
||
@property | ||
def _na_value(self): | ||
return np.nan | ||
|
||
@property | ||
def fill_value(self): | ||
return np.nan | ||
|
@@ -1247,6 +1255,19 @@ def equals(self, other): | |
return False | ||
return array_equivalent(self.values, other.values) | ||
|
||
def quantile(self, values, qs, **kwargs): | ||
if len(values) == 0: | ||
if com.is_list_like(qs): | ||
return np.array([self.fill_value]) | ||
else: | ||
return self._na_value | ||
|
||
if com.is_list_like(qs): | ||
values = [_quantile(values, x * 100, **kwargs) for x in qs] | ||
return np.array(values) | ||
else: | ||
return _quantile(values, qs * 100, **kwargs) | ||
|
||
|
||
class NonConsolidatableMixIn(object): | ||
""" hold methods for the nonconsolidatable blocks """ | ||
|
@@ -1455,15 +1476,55 @@ def should_store(self, value): | |
return com.is_integer_dtype(value) and value.dtype == self.dtype | ||
|
||
|
||
class TimeDeltaBlock(IntBlock): | ||
class DatetimeLikeBlockMixin(object): | ||
|
||
@property | ||
def _na_value(self): | ||
return tslib.NaT | ||
|
||
@property | ||
def fill_value(self): | ||
return tslib.iNaT | ||
|
||
def _try_operate(self, values): | ||
""" return a version to operate on """ | ||
return values.view('i8') | ||
|
||
def get_values(self, dtype=None): | ||
""" | ||
return object dtype as boxed values, such as Timestamps/Timedelta | ||
""" | ||
if com.is_object_dtype(dtype): | ||
return lib.map_infer(self.values.ravel(), | ||
self._box_func).reshape(self.values.shape) | ||
return self.values | ||
|
||
def quantile(self, values, qs, **kwargs): | ||
values = values.view('i8') | ||
mask = values == self.fill_value | ||
if mask.any(): | ||
values = values[~mask] | ||
result = Block.quantile(self, values, qs, **kwargs) | ||
|
||
if com.is_datetime64tz_dtype(self): | ||
# ToDo: Temp logic to avoid GH 12619 and GH 12772 | ||
# which affects to DatetimeBlockTZ_try_coerce_result for np.ndarray | ||
if isinstance(result, np.ndarray) and values.ndim > 0: | ||
result = self._holder(result, tz='UTC') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok for now. yeah trying to avoid check like this! thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but actually could/should this logic actually be in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally yes, but to avoid any side effect ATM. |
||
result = result.tz_convert(self.values.tz) | ||
return result | ||
return self._try_coerce_result(result) | ||
|
||
|
||
class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): | ||
__slots__ = () | ||
is_timedelta = True | ||
_can_hold_na = True | ||
is_numeric = False | ||
|
||
@property | ||
def fill_value(self): | ||
return tslib.iNaT | ||
def _box_func(self): | ||
return lambda x: tslib.Timedelta(x, unit='ns') | ||
|
||
def fillna(self, value, **kwargs): | ||
|
||
|
@@ -1516,19 +1577,15 @@ def _try_coerce_args(self, values, other): | |
|
||
return values, values_mask, other, other_mask | ||
|
||
def _try_operate(self, values): | ||
""" return a version to operate on """ | ||
return values.view('i8') | ||
|
||
def _try_coerce_result(self, result): | ||
""" reverse of try_coerce_args / try_operate """ | ||
if isinstance(result, np.ndarray): | ||
mask = isnull(result) | ||
if result.dtype.kind in ['i', 'f', 'O']: | ||
result = result.astype('m8[ns]') | ||
result[mask] = tslib.iNaT | ||
elif isinstance(result, np.integer): | ||
result = lib.Timedelta(result) | ||
elif isinstance(result, (np.integer, np.float)): | ||
result = self._box_func(result) | ||
return result | ||
|
||
def should_store(self, value): | ||
|
@@ -1558,13 +1615,6 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, | |
dtype=object) | ||
return rvalues | ||
|
||
def get_values(self, dtype=None): | ||
# return object dtypes as Timedelta | ||
if dtype == object: | ||
return lib.map_infer(self.values.ravel(), | ||
lib.Timedelta).reshape(self.values.shape) | ||
return self.values | ||
|
||
|
||
class BoolBlock(NumericBlock): | ||
__slots__ = () | ||
|
@@ -1954,7 +2004,7 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): | |
return values.reshape(1, len(values)) | ||
|
||
|
||
class DatetimeBlock(Block): | ||
class DatetimeBlock(DatetimeLikeBlockMixin, Block): | ||
__slots__ = () | ||
is_datetime = True | ||
_can_hold_na = True | ||
|
@@ -1998,10 +2048,6 @@ def _try_cast(self, element): | |
except: | ||
return element | ||
|
||
def _try_operate(self, values): | ||
""" return a version to operate on """ | ||
return values.view('i8') | ||
|
||
def _try_coerce_args(self, values, other): | ||
""" | ||
Coerce values and other to dtype 'i8'. NaN and NaT convert to | ||
|
@@ -2029,7 +2075,7 @@ def _try_coerce_args(self, values, other): | |
other = tslib.iNaT | ||
other_mask = True | ||
elif isinstance(other, (datetime, np.datetime64, date)): | ||
other = lib.Timestamp(other) | ||
other = self._box_func(other) | ||
if getattr(other, 'tz') is not None: | ||
raise TypeError("cannot coerce a Timestamp with a tz on a " | ||
"naive Block") | ||
|
@@ -2056,13 +2102,13 @@ def _try_coerce_result(self, result): | |
if isinstance(result, np.ndarray): | ||
if result.dtype.kind in ['i', 'f', 'O']: | ||
result = result.astype('M8[ns]') | ||
elif isinstance(result, (np.integer, np.datetime64)): | ||
result = lib.Timestamp(result) | ||
elif isinstance(result, (np.integer, np.float, np.datetime64)): | ||
result = self._box_func(result) | ||
return result | ||
|
||
@property | ||
def fill_value(self): | ||
return tslib.iNaT | ||
def _box_func(self): | ||
return tslib.Timestamp | ||
|
||
def to_native_types(self, slicer=None, na_rep=None, date_format=None, | ||
quoting=None, **kwargs): | ||
|
@@ -2098,13 +2144,6 @@ def set(self, locs, values, check=False): | |
|
||
self.values[locs] = values | ||
|
||
def get_values(self, dtype=None): | ||
# return object dtype as Timestamps | ||
if dtype == object: | ||
return lib.map_infer( | ||
self.values.ravel(), lib.Timestamp).reshape(self.values.shape) | ||
return self.values | ||
|
||
|
||
class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): | ||
""" implement a datetime64 block with a tz attribute """ | ||
|
@@ -2145,7 +2184,7 @@ def external_values(self): | |
|
||
def get_values(self, dtype=None): | ||
# return object dtype as Timestamps with the zones | ||
if dtype == object: | ||
if com.is_object_dtype(dtype): | ||
f = lambda x: lib.Timestamp(x, tz=self.values.tz) | ||
return lib.map_infer( | ||
self.values.ravel(), f).reshape(self.values.shape) | ||
|
@@ -2228,10 +2267,14 @@ def _try_coerce_result(self, result): | |
|
||
if isinstance(result, np.ndarray): | ||
result = self._holder(result, tz=self.values.tz) | ||
elif isinstance(result, (np.integer, np.datetime64)): | ||
elif isinstance(result, (np.integer, np.float, np.datetime64)): | ||
result = lib.Timestamp(result, tz=self.values.tz) | ||
return result | ||
|
||
@property | ||
def _box_func(self): | ||
return lambda x: tslib.Timestamp(x, tz=self.dtype.tz) | ||
|
||
def shift(self, periods, axis=0, mgr=None): | ||
""" shift the block by periods """ | ||
|
||
|
@@ -3852,6 +3895,14 @@ def get_values(self): | |
""" return a dense type view """ | ||
return np.array(self._block.to_dense(), copy=False) | ||
|
||
@property | ||
def asobject(self): | ||
""" | ||
return a object dtype array. datetime/timedelta like values are boxed | ||
to Timestamp/Timedelta instances. | ||
""" | ||
return self._block.get_values(dtype=object) | ||
|
||
@property | ||
def itemsize(self): | ||
return self._block.values.itemsize | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think maybe move this to
compat
as in numpy > 1.9 we can importnanpercentile
(not sure we have enough test coverage, maybe make another issue about this and can fix later)