Skip to content

Commit ed474fe

Browse files
committed
FIX-#6022: support lazy import of modin.pandas module (#6023)
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
1 parent f2422e9 commit ed474fe

File tree

5 files changed

+81
-17
lines changed

5 files changed

+81
-17
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,9 @@ jobs:
663663
- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py
664664
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
665665
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
666+
- run: |
667+
python -m pip install lazy_import
668+
mpiexec -n 1 python -m pytest modin/pandas/test/integrations/
666669
- uses: ./.github/workflows/upload-coverage
667670

668671
test-all:
@@ -823,6 +826,10 @@ jobs:
823826
if: matrix.engine == 'python' || matrix.test_task == 'group_4'
824827
- run: python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
825828
if: matrix.engine == 'python' || matrix.test_task == 'group_4'
829+
- run: |
830+
python -m pip install lazy_import
831+
python -m pytest modin/pandas/test/integrations/
832+
if: matrix.engine == 'python' || matrix.test_task == 'group_4'
826833
- uses: ./.github/workflows/upload-coverage
827834

828835
test-experimental:

modin/core/execution/ray/implementations/pandas_on_ray/partitioning/virtual_partition.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,6 @@
2626
from modin.utils import _inherit_docstrings
2727

2828

29-
# If Ray has not been initialized yet by Modin,
30-
# it will be initialized when calling `RayWrapper.put`.
31-
_DEPLOY_AXIS_FUNC = RayWrapper.put(PandasDataframeAxisPartition.deploy_axis_func)
32-
_DRAIN = RayWrapper.put(PandasDataframeAxisPartition.drain)
33-
34-
3529
class PandasOnRayDataframeVirtualPartition(PandasDataframeAxisPartition):
3630
"""
3731
The class implements the interface in ``PandasDataframeAxisPartition``.
@@ -58,6 +52,24 @@ class PandasOnRayDataframeVirtualPartition(PandasDataframeAxisPartition):
5852
instance_type = ray.ObjectRef
5953
axis = None
6054

55+
# these variables are intentionally initialized at runtime (see #6023)
56+
_DEPLOY_AXIS_FUNC = None
57+
_DRAIN_FUNC = None
58+
59+
@classmethod
60+
def _get_deploy_axis_func(cls): # noqa: GL08
61+
if cls._DEPLOY_AXIS_FUNC is None:
62+
cls._DEPLOY_AXIS_FUNC = RayWrapper.put(
63+
PandasDataframeAxisPartition.deploy_axis_func
64+
)
65+
return cls._DEPLOY_AXIS_FUNC
66+
67+
@classmethod
68+
def _get_drain_func(cls): # noqa: GL08
69+
if cls._DRAIN_FUNC is None:
70+
cls._DRAIN_FUNC = RayWrapper.put(PandasDataframeAxisPartition.drain)
71+
return cls._DRAIN_FUNC
72+
6173
def __init__(
6274
self,
6375
list_of_partitions,
@@ -200,7 +212,7 @@ def deploy_axis_func(
200212
num_returns=(num_splits if lengths is None else len(lengths)) * 4,
201213
**({"max_retries": max_retries} if max_retries is not None else {}),
202214
).remote(
203-
_DEPLOY_AXIS_FUNC,
215+
cls._get_deploy_axis_func(),
204216
axis,
205217
func,
206218
f_args,
@@ -473,7 +485,7 @@ def drain_call_queue(self, num_splits=None):
473485
_ = self.list_of_blocks
474486
return
475487
drained = super(PandasOnRayDataframeVirtualPartition, self).apply(
476-
_DRAIN, num_splits=num_splits, call_queue=self.call_queue
488+
self._get_drain_func(), num_splits=num_splits, call_queue=self.call_queue
477489
)
478490
self._list_of_block_partitions = drained
479491
self.call_queue = []

modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/virtual_partition.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,6 @@
2525
from modin.utils import _inherit_docstrings
2626

2727

28-
# If unidist has not been initialized yet by Modin,
29-
# unidist itself handles initialization when calling `unidist.put`,
30-
# which is called inside of `UnidistWrapper.put`.
31-
_DEPLOY_AXIS_FUNC = UnidistWrapper.put(PandasDataframeAxisPartition.deploy_axis_func)
32-
_DRAIN = UnidistWrapper.put(PandasDataframeAxisPartition.drain)
33-
34-
3528
class PandasOnUnidistDataframeVirtualPartition(PandasDataframeAxisPartition):
3629
"""
3730
The class implements the interface in ``PandasDataframeAxisPartition``.
@@ -58,6 +51,24 @@ class PandasOnUnidistDataframeVirtualPartition(PandasDataframeAxisPartition):
5851
instance_type = unidist.core.base.object_ref.ObjectRef
5952
axis = None
6053

54+
# these variables are intentionally initialized at runtime (see #6023)
55+
_DEPLOY_AXIS_FUNC = None
56+
_DRAIN_FUNC = None
57+
58+
@classmethod
59+
def _get_deploy_axis_func(cls): # noqa: GL08
60+
if cls._DEPLOY_AXIS_FUNC is None:
61+
cls._DEPLOY_AXIS_FUNC = UnidistWrapper.put(
62+
PandasDataframeAxisPartition.deploy_axis_func
63+
)
64+
return cls._DEPLOY_AXIS_FUNC
65+
66+
@classmethod
67+
def _get_drain_func(cls): # noqa: GL08
68+
if cls._DRAIN_FUNC is None:
69+
cls._DRAIN_FUNC = UnidistWrapper.put(PandasDataframeAxisPartition.drain)
70+
return cls._DRAIN_FUNC
71+
6172
def __init__(
6273
self,
6374
list_of_partitions,
@@ -199,7 +210,7 @@ def deploy_axis_func(
199210
num_returns=(num_splits if lengths is None else len(lengths)) * 4,
200211
**({"max_retries": max_retries} if max_retries is not None else {}),
201212
).remote(
202-
_DEPLOY_AXIS_FUNC,
213+
cls._get_deploy_axis_func(),
203214
axis,
204215
func,
205216
f_args,
@@ -466,7 +477,7 @@ def drain_call_queue(self, num_splits=None):
466477
_ = self.list_of_blocks
467478
return
468479
drained = super(PandasOnUnidistDataframeVirtualPartition, self).apply(
469-
_DRAIN, num_splits=num_splits, call_queue=self.call_queue
480+
self._get_drain_func(), num_splits=num_splits, call_queue=self.call_queue
470481
)
471482
self._list_of_block_partitions = drained
472483
self.call_queue = []
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
import lazy_import
15+
16+
pandas = lazy_import.lazy_module("pandas")
17+
pyarrow = lazy_import.lazy_module("pyarrow")
18+
from modin import pandas as pd # noqa: E402
19+
20+
21+
def test_dataframe_constructor():
22+
pd.DataFrame({"col1": [1, 2, 3], "col2": list("abc")})

0 commit comments

Comments
 (0)