Skip to content

Commit

Permalink
Fix inplace operations without inplace keyword on emtpy dataframes (#983
Browse files Browse the repository at this point in the history
)

* Resolves #982
* Adds clause for operations that default to pandas that do not return
  any value
  * In these cases, we build the object based on the class name of the
    object in pandas
* Add some error checking cases to the default code
* Add more test cases for the emtpy dataframe case.
  • Loading branch information
devin-petersohn authored Jan 10, 2020
1 parent 11082ec commit c36592d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
22 changes: 19 additions & 3 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,23 @@ def _default_to_pandas(self, op, *args, **kwargs):
k: v._to_pandas() if hasattr(v, "_to_pandas") else v
for k, v in kwargs.items()
}
pandas_obj = self._to_pandas()
if callable(op):
result = op(self._to_pandas(), *args, **kwargs)
result = op(pandas_obj, *args, **kwargs)
elif isinstance(op, str):
# The inner `getattr` is ensuring that we are treating this object (whether
# it is a DataFrame, Series, etc.) as a pandas object. The outer `getattr`
# will get the operation (`op`) from the pandas version of the class and run
# it on the object after we have converted it to pandas.
result = getattr(getattr(pandas, self.__name__), op)(
self._to_pandas(), *args, **kwargs
pandas_obj, *args, **kwargs
)
# SparseDataFrames cannot be serialize by arrow and cause problems for Modin.
else:
ErrorMessage.catch_bugs_and_request_email(
failure_condition=True,
extra_log="{} is an unsupported operation".format(op),
)
# SparseDataFrames cannot be serialized by arrow and cause problems for Modin.
# For now we will use pandas.
if isinstance(result, type(self)) and not isinstance(
result, (pandas.SparseDataFrame, pandas.SparseSeries)
Expand All @@ -243,6 +249,14 @@ def _default_to_pandas(self, op, *args, **kwargs):
from .series import Series

return Series(result)
# inplace
elif result is None:
import modin.pandas as pd

return self._create_or_update_from_compiler(
getattr(pd, type(pandas_obj).__name__)(pandas_obj)._query_compiler,
inplace=True,
)
else:
try:
if (
Expand Down Expand Up @@ -3415,6 +3429,8 @@ def __getattribute__(self, item):
"_reduce_dimension",
"__repr__",
"__len__",
"_create_or_update_from_compiler",
"_update_inplace",
]
if item not in default_behaviors:
method = object.__getattribute__(self, item)
Expand Down
12 changes: 12 additions & 0 deletions modin/pandas/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,18 @@ def test_empty_df(self):
assert len(df.index) == 0
assert len(df.columns) == 0

df = pd.DataFrame()
pd_df = pandas.DataFrame()
df["a"] = [1, 2, 3, 4, 5]
pd_df["a"] = [1, 2, 3, 4, 5]
df_equals(df, pd_df)

df = pd.DataFrame()
pd_df = pandas.DataFrame()
df["a"] = list("ABCDEF")
pd_df["a"] = list("ABCDEF")
df_equals(df, pd_df)

@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_abs(self, request, data):
modin_df = pd.DataFrame(data)
Expand Down

0 comments on commit c36592d

Please sign in to comment.