Skip to content

Commit

Permalink
Refactor and some fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Yaroslav <yaroslav.igoshev@intel.com>
  • Loading branch information
YarShev committed Feb 22, 2022
1 parent 2296535 commit c83c529
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 45 deletions.
2 changes: 2 additions & 0 deletions modin/core/dataframe/base/dataframe/protocol/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Tests for the DataFrame exchange protocol."""
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Perform basic sanity checks for the DataFrame exchange protocol."""
"""Basic sanity checks for the DataFrame exchange protocol."""

import pytest
import modin.pandas as pd


def test_sanity():
"""Test that the DataFrame protocol module is valid and could be imported correctly."""
from modin.core.dataframe.base.dataframe.protocol.dataframe import ( # noqa
from modin.core.dataframe.base.dataframe.protocol import ( # noqa
ProtocolDataframe,
)

Expand All @@ -31,7 +31,7 @@ class TestPassed(BaseException):
pass

def dummy_io_method(*args, **kwargs):
"""Dummy method emulating that the code path reached exchange protocol implementation."""
"""Dummy method emulating that the code path reached the exchange protocol implementation."""
raise TestPassed

from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Test common utility functions for the DataFrame exchange protocol."""
"""Tests for common utility functions of the DataFrame exchange protocol."""

import pytest
import pandas
Expand Down
2 changes: 1 addition & 1 deletion modin/core/execution/dispatching/factories/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def _from_non_pandas(cls, *args, **kwargs):
@classmethod
@doc(
_doc_io_method_template,
source="a DataFrame object supporting exchange protocol (__dataframe__())",
source="a DataFrame object supporting exchange protocol `__dataframe__()`",
params=_doc_io_method_all_params,
method="utils.from_dataframe",
)
Expand Down
4 changes: 2 additions & 2 deletions modin/core/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,12 @@ def from_arrow(cls, at):
@classmethod
def from_dataframe(cls, df):
"""
Create a Modin `query_compiler` from a DataFrame object supporting exchange protocol (__dataframe__()).
Create a Modin QueryCompiler from a DataFrame supporting the DataFrame exchange protocol `__dataframe__()`.
Parameters
----------
df : DataFrame
The DataFrame object supporting dataframe exchange protocol.
The DataFrame object supporting the DataFrame exchange protocol.
Returns
-------
Expand Down
14 changes: 7 additions & 7 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def to_numpy(self, **kwargs): # noqa: PR02

# END To NumPy

# Exchange protocol
# Dataframe exchange protocol

def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True) -> dict:
"""
Expand All @@ -388,21 +388,21 @@ def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True) -> di
Returns
-------
dict
A dictionary object following the dataframe protocol specification.
A dictionary object following the DataFrame protocol specification.
"""
raise NotImplementedError(
"The selected execution does not implement DataFrame exchange protocol."
"The selected execution does not implement the DataFrame exchange protocol."
)

@classmethod
def from_dataframe(cls, df, data_cls):
"""
Build QueryCompiler from a DataFrame object supporting dataframe exchange protocol (__dataframe__()).
Build QueryCompiler from a DataFrame object supporting the dataframe exchange protocol `__dataframe__()`.
Parameters
----------
df : DataFrame
The DataFrame object supporting dataframe exchange protocol.
The DataFrame object supporting the dataframe exchange protocol.
data_cls : type
:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
(or its descendant) to convert to.
Expand All @@ -413,10 +413,10 @@ def from_dataframe(cls, df, data_cls):
QueryCompiler containing data from the DataFrame.
"""
raise NotImplementedError(
"The selected execution does not implement import via dataframe exchange protocol."
"The selected execution does not implement import via the DataFrame exchange protocol."
)

# END Exchange protocol
# END Dataframe exchange protocol

# Abstract inter-data operations (e.g. add, sub)
# These operations require two DataFrames and will change the shape of the
Expand Down
29 changes: 0 additions & 29 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,35 +556,6 @@ def __constructor__(self, *args, **kwargs):
"""
return type(self)(*args, **kwargs)

def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> dict:
"""
Get a DataFrame exchange protocol object representing data of the Modin DataFrame.
See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
Parameters
----------
nan_as_null : bool, default: False
A keyword intended for the consumer to tell the producer
to overwrite null values in the data with ``NaN`` (or ``NaT``).
This currently has no effect; once support for nullable extension
dtypes is added, this value should be propagated to columns.
allow_copy : bool, default: True
A keyword that defines whether or not the library is allowed
to make a copy of the data. For example, copying data would be necessary
if a library supports strided buffers, given that this protocol
specifies contiguous buffers. Currently, if the flag is set to ``False``
and a copy is needed, a ``RuntimeError`` will be raised.
Returns
-------
dict
A dictionary object following the dataframe protocol specification.
"""
return self._query_compiler.to_dataframe(
nan_as_null=nan_as_null, allow_copy=allow_copy
)

def abs(self):
self._validate_dtypes(numeric_only=True)
return self.__constructor__(query_compiler=self._query_compiler.abs())
Expand Down
29 changes: 29 additions & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2623,6 +2623,35 @@ def __delitem__(self, key):
__rmod__ = rmod
__rdiv__ = rdiv

def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> dict:
"""
Get a Modin DataFrame that implements the dataframe exchange protocol.
See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
Parameters
----------
nan_as_null : bool, default: False
A keyword intended for the consumer to tell the producer
to overwrite null values in the data with ``NaN`` (or ``NaT``).
This currently has no effect; once support for nullable extension
dtypes is added, this value should be propagated to columns.
allow_copy : bool, default: True
A keyword that defines whether or not the library is allowed
to make a copy of the data. For example, copying data would be necessary
if a library supports strided buffers, given that this protocol
specifies contiguous buffers. Currently, if the flag is set to ``False``
and a copy is needed, a ``RuntimeError`` will be raised.
Returns
-------
dict
A dictionary object following the dataframe protocol specification.
"""
return self._query_compiler.to_dataframe(
nan_as_null=nan_as_null, allow_copy=allow_copy
)

@property
def attrs(self): # noqa: D200
"""
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,14 @@ def from_arrow(at):

def from_dataframe(df):
"""
Convert a DataFrame implementing dataframe exchange protocol to a Modin DataFrame.
Convert a DataFrame implementing the dataframe exchange protocol to a Modin DataFrame.
See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
Parameters
----------
df : DataFrame
The DataFrame object supporting dataframe exchange protocol.
The DataFrame object supporting the dataframe exchange protocol.
Returns
-------
Expand Down

0 comments on commit c83c529

Please sign in to comment.