From 0e57d6b28ab5a1c1ea9ab4264c567d04e662d2c4 Mon Sep 17 00:00:00 2001 From: sfc-gh-mvashishtha Date: Wed, 15 Jan 2025 20:55:51 -0800 Subject: [PATCH 1/2] REFACTOR-#7418: Rename to_dataframe to to_interchange_dataframe. Signed-off-by: sfc-gh-mvashishtha --- modin/conftest.py | 4 +++- modin/core/storage_formats/base/query_compiler.py | 7 ++++++- modin/core/storage_formats/pandas/native_query_compiler.py | 4 +++- modin/core/storage_formats/pandas/query_compiler.py | 4 +++- modin/pandas/dataframe.py | 2 +- .../interchange/dataframe_protocol/base/test_sanity.py | 2 +- modin/tests/test_executions_api.py | 2 +- 7 files changed, 18 insertions(+), 7 deletions(-) diff --git a/modin/conftest.py b/modin/conftest.py index a025b9783c7..5967590bed0 100644 --- a/modin/conftest.py +++ b/modin/conftest.py @@ -183,7 +183,9 @@ def from_arrow(cls, at, data_cls): def free(self): pass - def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True): + def to_interchange_dataframe( + self, nan_as_null: bool = False, allow_copy: bool = True + ): raise NotImplementedError( "The selected execution does not implement the DataFrame exchange protocol." ) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index dc7f2fce17b..f7be7ecd02c 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -45,6 +45,9 @@ StrDefault, StructDefault, ) +from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import ( + ProtocolDataframe, +) from modin.error_message import ErrorMessage from modin.logging import ClassLogger from modin.logging.config import LogLevel @@ -472,7 +475,9 @@ def to_numpy(self, **kwargs): # noqa: PR02 # Dataframe exchange protocol @abc.abstractmethod - def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True): + def to_interchange_dataframe( + self, nan_as_null: bool = False, allow_copy: bool = True + ) -> ProtocolDataframe: """ Get a DataFrame exchange protocol object representing data of the Modin DataFrame. diff --git a/modin/core/storage_formats/pandas/native_query_compiler.py b/modin/core/storage_formats/pandas/native_query_compiler.py index 12f9da6ef46..1d1e39ac414 100644 --- a/modin/core/storage_formats/pandas/native_query_compiler.py +++ b/modin/core/storage_formats/pandas/native_query_compiler.py @@ -1236,7 +1236,9 @@ def finalize(self): # Dataframe exchange protocol - def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True): + def to_interchange_dataframe( + self, nan_as_null: bool = False, allow_copy: bool = True + ): return self._modin_frame.__dataframe__( nan_as_null=nan_as_null, allow_copy=allow_copy ) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index b62f2aa4474..473ff65cc8b 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -381,7 +381,9 @@ def from_arrow(cls, at, data_cls): # Dataframe exchange protocol - def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True): + def to_interchange_dataframe( + self, nan_as_null: bool = False, allow_copy: bool = True + ): return self._modin_frame.__dataframe__( nan_as_null=nan_as_null, allow_copy=allow_copy ) diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index 4f47c9374e9..b85a2f6c8a2 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -2917,7 +2917,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): ProtocolDataframe A dataframe object following the dataframe protocol specification. """ - return self._query_compiler.to_dataframe( + return self._query_compiler.to_interchange_dataframe( nan_as_null=nan_as_null, allow_copy=allow_copy ) diff --git a/modin/tests/interchange/dataframe_protocol/base/test_sanity.py b/modin/tests/interchange/dataframe_protocol/base/test_sanity.py index 4d96250e433..646039624c2 100644 --- a/modin/tests/interchange/dataframe_protocol/base/test_sanity.py +++ b/modin/tests/interchange/dataframe_protocol/base/test_sanity.py @@ -39,7 +39,7 @@ def dummy_io_method(*args, **kwargs): query_compiler_cls = get_unique_base_execution query_compiler_cls.from_dataframe = dummy_io_method - query_compiler_cls.to_dataframe = dummy_io_method + query_compiler_cls.to_interchange_dataframe = dummy_io_method from modin.pandas.io import from_dataframe diff --git a/modin/tests/test_executions_api.py b/modin/tests/test_executions_api.py index f1cd635232b..01bf4ec2072 100644 --- a/modin/tests/test_executions_api.py +++ b/modin/tests/test_executions_api.py @@ -30,7 +30,7 @@ def test_base_abstract_methods(): "from_arrow", "default_to_pandas", "from_dataframe", - "to_dataframe", + "to_interchange_dataframe", ] not_implemented_methods = BASE_EXECUTION.__abstractmethods__.difference( From fb6bba720e39d8490c477f670d2bc26c22e4641c Mon Sep 17 00:00:00 2001 From: sfc-gh-mvashishtha Date: Thu, 16 Jan 2025 11:16:58 -0800 Subject: [PATCH 2/2] Rename internal from_dataframe methods to from_interchange_dataframe Signed-off-by: sfc-gh-mvashishtha --- modin/conftest.py | 2 +- .../core/dataframe/pandas/dataframe/dataframe.py | 2 +- .../execution/dispatching/factories/dispatcher.py | 6 +++--- .../execution/dispatching/factories/factories.py | 6 +++--- modin/core/io/io.py | 4 ++-- modin/core/storage_formats/base/query_compiler.py | 4 ++-- .../pandas/native_query_compiler.py | 5 ++++- .../core/storage_formats/pandas/query_compiler.py | 7 +++++-- modin/pandas/io.py | 15 ++++++++++----- .../dataframe_protocol/base/test_sanity.py | 2 +- modin/tests/test_executions_api.py | 2 +- 11 files changed, 33 insertions(+), 22 deletions(-) diff --git a/modin/conftest.py b/modin/conftest.py index 5967590bed0..9a12688358a 100644 --- a/modin/conftest.py +++ b/modin/conftest.py @@ -191,7 +191,7 @@ def to_interchange_dataframe( ) @classmethod - def from_dataframe(cls, df, data_cls): + def from_interchange_dataframe(cls, df, data_cls): raise NotImplementedError( "The selected execution does not implement the DataFrame exchange protocol." ) diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py index 5456f28f127..043de08f432 100644 --- a/modin/core/dataframe/pandas/dataframe/dataframe.py +++ b/modin/core/dataframe/pandas/dataframe/dataframe.py @@ -4807,7 +4807,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): ) @classmethod - def from_dataframe(cls, df: ProtocolDataframe) -> PandasDataframe: + def from_interchange_dataframe(cls, df: ProtocolDataframe) -> PandasDataframe: """ Convert a DataFrame implementing the dataframe exchange protocol to a Core Modin Dataframe. diff --git a/modin/core/execution/dispatching/factories/dispatcher.py b/modin/core/execution/dispatching/factories/dispatcher.py index 0bbe84af9aa..b7a0bb4b3d7 100644 --- a/modin/core/execution/dispatching/factories/dispatcher.py +++ b/modin/core/execution/dispatching/factories/dispatcher.py @@ -177,9 +177,9 @@ def from_non_pandas(cls, *args, **kwargs): return cls.get_factory()._from_non_pandas(*args, **kwargs) @classmethod - @_inherit_docstrings(factories.BaseFactory._from_dataframe) - def from_dataframe(cls, *args, **kwargs): - return cls.get_factory()._from_dataframe(*args, **kwargs) + @_inherit_docstrings(factories.BaseFactory._from_interchange_dataframe) + def from_interchange_dataframe(cls, *args, **kwargs): + return cls.get_factory()._from_interchange_dataframe(*args, **kwargs) @classmethod @_inherit_docstrings(factories.BaseFactory._from_ray) diff --git a/modin/core/execution/dispatching/factories/factories.py b/modin/core/execution/dispatching/factories/factories.py index deda5113287..ba6d1eef6a1 100644 --- a/modin/core/execution/dispatching/factories/factories.py +++ b/modin/core/execution/dispatching/factories/factories.py @@ -200,10 +200,10 @@ def _from_non_pandas(cls, *args, **kwargs): _doc_io_method_template, source="a DataFrame object supporting exchange protocol `__dataframe__()`", params=_doc_io_method_all_params, - method="io.from_dataframe", + method="io.from_interchange_dataframe", ) - def _from_dataframe(cls, *args, **kwargs): - return cls.io_cls.from_dataframe(*args, **kwargs) + def _from_interchange_dataframe(cls, *args, **kwargs): + return cls.io_cls.from_interchange_dataframe(*args, **kwargs) @classmethod @doc( diff --git a/modin/core/io/io.py b/modin/core/io/io.py index cb7647e2207..42d0cbdbe84 100644 --- a/modin/core/io/io.py +++ b/modin/core/io/io.py @@ -100,7 +100,7 @@ def from_arrow(cls, at): return cls.query_compiler_cls.from_arrow(at, cls.frame_cls) @classmethod - def from_dataframe(cls, df): + def from_interchange_dataframe(cls, df): """ Create a Modin QueryCompiler from a DataFrame supporting the DataFrame exchange protocol `__dataframe__()`. @@ -114,7 +114,7 @@ def from_dataframe(cls, df): BaseQueryCompiler QueryCompiler containing data from the DataFrame. """ - return cls.query_compiler_cls.from_dataframe(df, cls.frame_cls) + return cls.query_compiler_cls.from_interchange_dataframe(df, cls.frame_cls) @classmethod def from_ray(cls, ray_obj): diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index f7be7ecd02c..1b4284a1d15 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -506,13 +506,13 @@ def to_interchange_dataframe( @classmethod @abc.abstractmethod - def from_dataframe(cls, df, data_cls): + def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls): """ Build QueryCompiler from a DataFrame object supporting the dataframe exchange protocol `__dataframe__()`. Parameters ---------- - df : DataFrame + df : ProtocolDataframe The DataFrame object supporting the dataframe exchange protocol. data_cls : type :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class diff --git a/modin/core/storage_formats/pandas/native_query_compiler.py b/modin/core/storage_formats/pandas/native_query_compiler.py index 1d1e39ac414..7660c05f2f4 100644 --- a/modin/core/storage_formats/pandas/native_query_compiler.py +++ b/modin/core/storage_formats/pandas/native_query_compiler.py @@ -24,6 +24,9 @@ import pandas from pandas.core.dtypes.common import is_list_like, is_scalar +from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import ( + ProtocolDataframe, +) from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler from modin.core.storage_formats.pandas.query_compiler_caster import QueryCompilerCaster from modin.utils import ( @@ -1244,7 +1247,7 @@ def to_interchange_dataframe( ) @classmethod - def from_dataframe(cls, df, data_cls): + def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls): return cls(pandas.api.interchange.from_dataframe(df)) # END Dataframe exchange protocol diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 473ff65cc8b..656b636dbf5 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -59,6 +59,9 @@ GroupByDefault, SeriesGroupByDefault, ) +from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import ( + ProtocolDataframe, +) from modin.core.dataframe.pandas.metadata import ( DtypesDescriptor, ModinDtypes, @@ -389,8 +392,8 @@ def to_interchange_dataframe( ) @classmethod - def from_dataframe(cls, df, data_cls): - return cls(data_cls.from_dataframe(df)) + def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls): + return cls(data_cls.from_interchange_dataframe(df)) # END Dataframe exchange protocol diff --git a/modin/pandas/io.py b/modin/pandas/io.py index 508d1b2a4d5..ecd539358db 100644 --- a/modin/pandas/io.py +++ b/modin/pandas/io.py @@ -65,6 +65,9 @@ from pandas.io.parsers.readers import _c_parser_defaults from modin.config import ModinNumpy +from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import ( + ProtocolDataframe, +) from modin.error_message import ErrorMessage from modin.logging import ClassLogger, enable_logging from modin.utils import ( @@ -1013,16 +1016,16 @@ def from_arrow(at) -> DataFrame: return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_arrow(at)) -def from_dataframe(df) -> DataFrame: +def from_dataframe(df: ProtocolDataframe) -> DataFrame: """ - Convert a DataFrame implementing the dataframe exchange protocol to a Modin DataFrame. + Convert a DataFrame implementing the dataframe interchange protocol to a Modin DataFrame. See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html. Parameters ---------- - df : DataFrame - The DataFrame object supporting the dataframe exchange protocol. + df : ProtocolDataframe + An object supporting the dataframe interchange protocol. Returns ------- @@ -1031,7 +1034,9 @@ def from_dataframe(df) -> DataFrame: """ from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher - return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_dataframe(df)) + return ModinObjects.DataFrame( + query_compiler=FactoryDispatcher.from_interchange_dataframe(df) + ) def from_ray(ray_obj) -> DataFrame: diff --git a/modin/tests/interchange/dataframe_protocol/base/test_sanity.py b/modin/tests/interchange/dataframe_protocol/base/test_sanity.py index 646039624c2..ddd8fcfb08a 100644 --- a/modin/tests/interchange/dataframe_protocol/base/test_sanity.py +++ b/modin/tests/interchange/dataframe_protocol/base/test_sanity.py @@ -38,7 +38,7 @@ def dummy_io_method(*args, **kwargs): raise TestPassed query_compiler_cls = get_unique_base_execution - query_compiler_cls.from_dataframe = dummy_io_method + query_compiler_cls.from_interchange_dataframe = dummy_io_method query_compiler_cls.to_interchange_dataframe = dummy_io_method from modin.pandas.io import from_dataframe diff --git a/modin/tests/test_executions_api.py b/modin/tests/test_executions_api.py index 01bf4ec2072..cde0d7f37af 100644 --- a/modin/tests/test_executions_api.py +++ b/modin/tests/test_executions_api.py @@ -29,7 +29,7 @@ def test_base_abstract_methods(): "from_pandas", "from_arrow", "default_to_pandas", - "from_dataframe", + "from_interchange_dataframe", "to_interchange_dataframe", ]