-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: de-duplicate DataFrame/SparseDataFrame arithmetic code #23414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cb79d12
9c8a9c7
32a327b
e737d45
08f80fa
7f7ed51
ea4ce51
4d99777
f4c99ed
7810dc6
635c575
bf88f23
2ddb05a
86fa938
3c2f323
8f3e9e9
63544b4
7ecb03f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4935,6 +4935,33 @@ def reorder_levels(self, order, axis=0): | |
# ---------------------------------------------------------------------- | ||
# Arithmetic / combination related | ||
|
||
def _wrap_dispatched_op(self, result, other, func, axis=None): | ||
""" | ||
Wrap the result of an arithmetic/comparison operation performed | ||
via ops.dispatch_to_series in a properly-indexed DataFrame. | ||
|
||
Parameters | ||
---------- | ||
result : dict[int:Series] | ||
other : DataFrame, Series, or scalar | ||
func : binary operator | ||
axis : {0, 1, "index", "columns"} | ||
|
||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Returns | ||
------- | ||
DataFrame | ||
|
||
Notes | ||
----- | ||
The `other`, `func`, and `axis` arguments are not used here but are | ||
included to keep the signature consistent with that in SparseDataFrame. | ||
""" | ||
result = self._constructor(result, index=self.index, copy=False) | ||
# Pin columns instead of passing to constructor for compat with | ||
# non-unique columns case | ||
result.columns = self.columns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this need to call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This maintains the current behavior. At some point we can/should make a concerted effort to be internally-consistent about calling |
||
return result | ||
|
||
def _combine_frame(self, other, func, fill_value=None, level=None): | ||
this, other = self.align(other, join='outer', level=level, copy=False) | ||
new_index, new_columns = this.index, this.columns | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -339,9 +339,10 @@ def _apply_columns(self, func): | |
for col, series in compat.iteritems(self): | ||
new_data[col] = func(series) | ||
|
||
return self._constructor( | ||
data=new_data, index=self.index, columns=self.columns, | ||
default_fill_value=self.default_fill_value).__finalize__(self) | ||
# pass dummy arguments for func and axis; `other` just needs to be | ||
# a scalar. | ||
return self._wrap_dispatched_op(new_data, other=None, | ||
func=None, axis=None) | ||
|
||
def astype(self, dtype): | ||
return self._apply_columns(lambda x: x.astype(dtype)) | ||
|
@@ -547,6 +548,31 @@ def xs(self, key, axis=0, copy=False): | |
# ---------------------------------------------------------------------- | ||
# Arithmetic-related methods | ||
|
||
def _wrap_dispatched_op(self, result, other, func, axis=None): | ||
""" | ||
Wrap the result of an arithmetic/comparison operation performed | ||
via ops.dispatch_to_series in a properly-indexed DataFrame. | ||
|
||
Parameters | ||
---------- | ||
result : dict[int:Series] | ||
other : DataFrame, Series, scalar | ||
func : binary function | ||
axis : {None, "columns"} | ||
|
||
Returns | ||
------- | ||
SparseDataFrame | ||
""" | ||
|
||
fill_value = self._get_op_result_fill_value(other, func, axis) | ||
|
||
res = self._constructor(result, index=self.index, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you passing columns to the constructor here, whereas in DataFrame you are setting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm very specifically not changing the existing behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, let's do that then. in reality you could actually call the superclass (maybe should just do that), if you have the ability to pass a fill_value |
||
columns=self.columns, | ||
default_fill_value=fill_value) | ||
|
||
return res.__finalize__(self) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why would this call finalize? why would the DataFrame one not? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No idea, that's why I opened #23028. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, we basically need to always call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll do this, but am not wild about it. Current There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm this actually makes things less consistent within DataFrame (though slightly more consistent between DataFrame - SparseDataFrame) since lots of ops dont go through this path. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you just call the super function here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The DataFrame constructor doesn't take a |
||
|
||
def _combine_frame(self, other, func, fill_value=None, level=None): | ||
if level is not None: | ||
raise NotImplementedError("'level' argument is not supported") | ||
|
@@ -557,46 +583,33 @@ def _combine_frame(self, other, func, fill_value=None, level=None): | |
if self.empty and other.empty: | ||
return self._constructor(index=new_index).__finalize__(self) | ||
|
||
new_data = {} | ||
if fill_value is not None: | ||
# TODO: be a bit more intelligent here | ||
for col in new_columns: | ||
if col in this and col in other: | ||
dleft = this[col].to_dense() | ||
dright = other[col].to_dense() | ||
result = dleft._binop(dright, func, fill_value=fill_value) | ||
result = result.to_sparse(fill_value=this[col].fill_value) | ||
new_data[col] = result | ||
else: | ||
def _arith_op(left, right): | ||
# analogous to _arith_op defined in DataFrame._combine_frame | ||
dleft = left.to_dense() | ||
dright = right.to_dense() | ||
result = dleft._binop(dright, func, fill_value=fill_value) | ||
result = result.to_sparse(fill_value=left.fill_value) | ||
return result | ||
|
||
for col in new_columns: | ||
if col in this and col in other: | ||
new_data[col] = func(this[col], other[col]) | ||
assert all(col in this and col in other for col in new_columns) | ||
|
||
new_fill_value = self._get_op_result_fill_value(other, func) | ||
new_data = {col: _arith_op(this[col], other[col]) | ||
for col in new_columns} | ||
|
||
return self._constructor(data=new_data, index=new_index, | ||
columns=new_columns, | ||
default_fill_value=new_fill_value | ||
).__finalize__(self) | ||
return this._wrap_dispatched_op(new_data, other, func, axis=None) | ||
|
||
def _combine_match_index(self, other, func, level=None): | ||
new_data = {} | ||
|
||
if level is not None: | ||
raise NotImplementedError("'level' argument is not supported") | ||
|
||
this, other = self.align(other, join='outer', axis=0, level=level, | ||
copy=False) | ||
|
||
for col, series in compat.iteritems(this): | ||
new_data[col] = func(series.values, other.values) | ||
new_data = {col: func(this[col].values, other.values) | ||
for col in this.columns} | ||
|
||
fill_value = self._get_op_result_fill_value(other, func) | ||
|
||
return self._constructor( | ||
new_data, index=this.index, columns=self.columns, | ||
default_fill_value=fill_value).__finalize__(self) | ||
return self._wrap_dispatched_op(new_data, other, func, axis=None) | ||
|
||
def _combine_match_columns(self, other, func, level=None): | ||
# patched version of DataFrame._combine_match_columns to account for | ||
|
@@ -611,29 +624,33 @@ def _combine_match_columns(self, other, func, level=None): | |
copy=False) | ||
assert left.columns.equals(right.index) | ||
|
||
new_data = {} | ||
|
||
for col in left.columns: | ||
new_data[col] = func(left[col], float(right[col])) | ||
new_data = {col: func(left[col], float(right[col])) | ||
for col in left.columns} | ||
|
||
return self._constructor( | ||
new_data, index=left.index, columns=left.columns, | ||
default_fill_value=self.default_fill_value).__finalize__(self) | ||
return self._wrap_dispatched_op(new_data, other, func, "columns") | ||
|
||
def _combine_const(self, other, func): | ||
return self._apply_columns(lambda x: func(x, other)) | ||
new_data = {col: func(self[col], other) for col in self.columns} | ||
|
||
return self._wrap_dispatched_op(new_data, other, func, axis=None) | ||
|
||
def _get_op_result_fill_value(self, other, func): | ||
def _get_op_result_fill_value(self, other, func, axis=None): | ||
own_default = self.default_fill_value | ||
|
||
if isinstance(other, DataFrame): | ||
axis = self._get_axis_name(axis) if axis is not None else None | ||
|
||
if axis == "columns": | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# i.e. called via _combine_match_columns | ||
fill_value = self.default_fill_value | ||
|
||
elif isinstance(other, DataFrame): | ||
# i.e. called from _combine_frame | ||
|
||
other_default = getattr(other, 'default_fill_value', np.nan) | ||
|
||
# if the fill values are the same use them? or use a valid one | ||
if own_default == other_default: | ||
# TOOD: won't this evaluate as False if both are np.nan? | ||
# TODO: won't this evaluate as False if both are np.nan? | ||
fill_value = own_default | ||
elif np.isnan(own_default) and not np.isnan(other_default): | ||
fill_value = other_default | ||
|
@@ -652,6 +669,10 @@ def _get_op_result_fill_value(self, other, func): | |
fill_value = func(np.float64(own_default), | ||
np.float64(other.fill_value)) | ||
|
||
elif np.ndim(other) == 0: | ||
# i.e. called via _combine_const | ||
fill_value = self.default_fill_value | ||
|
||
else: | ||
raise NotImplementedError(type(other)) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.