Revert #261 and #262 (merlin.core.compat changes) (#263)

* Revert "Run with import without gpu (#261)" This reverts commit 1011afb. * Revert "Update `merlin.core.compat` to use `HAS_GPU` and add add'l libraries (#262)" This reverts commit e365820. --------- Co-authored-by: Julio Perez <37191411+jperez999@users.noreply.github.com>
NVIDIA-Merlin · Mar 29, 2023 · 4593f4a · 4593f4a
1 parent 1011afb
commit 4593f4a
Show file tree

Hide file tree

Showing 16 changed files with 86 additions and 136 deletions.
diff --git a/merlin/core/compat.py b/merlin/core/compat.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,11 @@
 # pylint: disable=unused-import
 import os
 
+try:
+    from numba import cuda
+except ImportError:
+    cuda = None
+
 from dask.distributed.diagnostics import nvml
 
 
@@ -45,42 +50,21 @@ def _get_gpu_count():
 
 HAS_GPU = _get_gpu_count() > 0
 
-if HAS_GPU:
-    try:
-        from numba import cuda
-    except ImportError:
-        cuda = None
-
-    try:
-        import cudf
-    except ImportError:
-        cudf = None
-
-    try:
-        import cupy
-    except ImportError:
-        cupy = None
-
-    try:
-        import dask_cudf
-    except ImportError:
-        dask_cudf = None
-
-else:
-    cuda = None
-    cudf = None
-    cupy = None
-    dask_cudf = None
 
 try:
     import numpy
 except ImportError:
     numpy = None
 
 try:
-    import pandas
+    import cupy
 except ImportError:
-    pandas = None
+    cupy = None
+
+try:
+    import cudf
+except ImportError:
+    cudf = None
 
 try:
     import tensorflow

diff --git a/merlin/core/utils.py b/merlin/core/utils.py
@@ -96,12 +96,12 @@ def device_mem_size(kind="total", cpu=False):
         When kind is provided with an unsupported value.
     """
     # Use psutil (if available) for cpu mode
-    if (cpu or not cuda) and psutil:
+    if cpu and psutil:
         if kind == "total":
             return psutil.virtual_memory().total
         elif kind == "free":
             return psutil.virtual_memory().free
-    elif cpu or not cuda:
+    elif cpu:
         warnings.warn("Please install psutil for full cpu=True support.")
         # Assume 1GB of memory
         return int(1e9)

diff --git a/merlin/dtypes/mappings/cudf.py b/merlin/dtypes/mappings/cudf.py
@@ -15,7 +15,6 @@
 #
 import numpy as np
 
-from merlin.core.compat import cudf
 from merlin.core.dispatch import is_string_dtype
 from merlin.dtypes.mapping import DTypeMapping, NumpyPreprocessor
 from merlin.dtypes.registry import _dtype_registry
@@ -42,16 +41,17 @@ def cudf_translator(raw_dtype) -> np.dtype:
         return category_type
 
 
-if cudf:
-    try:
-        # We only want to register this mapping if cudf is available, even though
-        # the mapping itself doesn't use cudf (yet?)
+try:
+    # We only want to register this mapping if cudf is available, even though
+    # the mapping itself doesn't use cudf (yet?)
 
-        cudf_dtypes = DTypeMapping(
-            translator=NumpyPreprocessor("cudf", cudf_translator, attrs=["_categories"]),
-        )
-        _dtype_registry.register("cudf", cudf_dtypes)
-    except ImportError as exc:
-        from warnings import warn
+    import cudf  # pylint:disable=unused-import # noqa: F401
 
-        warn(f"cuDF dtype mappings did not load successfully due to an error: {exc.msg}")
+    cudf_dtypes = DTypeMapping(
+        translator=NumpyPreprocessor("cudf", cudf_translator, attrs=["_categories"]),
+    )
+    _dtype_registry.register("cudf", cudf_dtypes)
+except ImportError as exc:
+    from warnings import warn
+
+    warn(f"cuDF dtype mappings did not load successfully due to an error: {exc.msg}")
diff --git a/merlin/io/avro.py b/merlin/io/avro.py
@@ -15,11 +15,11 @@
 #
 import warnings
 
+import cudf
 import uavro as ua
 from dask.base import tokenize
 from dask.dataframe.core import new_dd_object
 
-from merlin.core.compat import cudf
 from merlin.io.dataset_engine import DatasetEngine
 
 

diff --git a/merlin/io/csv.py b/merlin/io/csv.py
@@ -16,13 +16,17 @@
 import functools
 
 import dask.dataframe as dd
+
+try:
+    import dask_cudf
+except ImportError:
+    dask_cudf = None
+import numpy as np
 from dask.bytes import read_bytes
 from dask.utils import parse_bytes
 from fsspec.core import get_fs_token_paths
 from fsspec.utils import infer_compression
 
-from merlin.core.compat import dask_cudf
-from merlin.core.compat import numpy as np
 from merlin.io.dataset_engine import DatasetEngine
 
 

diff --git a/merlin/io/dataframe_engine.py b/merlin/io/dataframe_engine.py
@@ -13,10 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+try:
+    import cudf
+    import dask_cudf
+except ImportError:
+    cudf = None
 from dask.dataframe.core import new_dd_object
 from dask.highlevelgraph import HighLevelGraph
 
-from merlin.core.compat import cudf, dask_cudf
 from merlin.io.dataset_engine import DatasetEngine
 
 

diff --git a/merlin/io/dataset.py b/merlin/io/dataset.py
@@ -30,7 +30,7 @@
 from fsspec.core import get_fs_token_paths
 from fsspec.utils import stringify_path
 
-from merlin.core.compat import HAS_GPU, cudf
+from merlin.core.compat import HAS_GPU
 from merlin.core.dispatch import (
     convert_data,
     hex_to_int,
@@ -49,6 +49,12 @@
 from merlin.schema import ColumnSchema, Schema
 from merlin.schema.io.tensorflow_metadata import TensorflowMetadata
 
+try:
+    import cudf
+except ImportError:
+    cudf = None
+
+
 MERLIN_METADATA_DIR_NAME = ".merlin"
 LOG = logging.getLogger("merlin")
 

diff --git a/merlin/io/fsspec_utils.py b/merlin/io/fsspec_utils.py
@@ -28,17 +28,17 @@
 else:
     fsspec_parquet = None
 
-from merlin.core.compat import cudf
-
-if cudf:
+try:
+    import cudf
     from cudf.core.column import as_column, build_categorical_column
 
     if fsspec_parquet and (Version(cudf.__version__) < Version("21.12.0")):
         # This version of cudf does not support
         # reads from python-file objects. Avoid
         # using the `fsspec.parquet` module
         fsspec_parquet = None
-
+except ImportError:
+    cudf = None
 
 #
 # Parquet-Specific Utilities

diff --git a/merlin/io/parquet.py b/merlin/io/parquet.py
@@ -27,13 +27,13 @@
 
 from packaging.version import Version
 
-from merlin.core.compat import cudf
-
-if cudf:
+try:
+    import cudf
     import dask_cudf
     from cudf.io.parquet import ParquetWriter as pwriter_cudf
     from dask_cudf.io.parquet import CudfEngine
-
+except ImportError:
+    cudf = None
 import dask
 import dask.dataframe as dd
 import fsspec

diff --git a/merlin/io/shuffle.py b/merlin/io/shuffle.py
@@ -19,15 +19,14 @@
 import pandas as pd
 from packaging.version import Version
 
-from merlin.core.compat import cudf
-
 _IGNORE_INDEX_SUPPORTED = Version(pd.__version__) >= Version("1.3.0")
 
-if cudf:
-    try:
-        _CUDF_IGNORE_INDEX_SUPPORTED = Version(cudf.__version__) >= Version("22.04.0")
-    except ImportError:
-        _CUDF_IGNORE_INDEX_SUPPORTED = None
+try:
+    import cudf
+
+    _CUDF_IGNORE_INDEX_SUPPORTED = Version(cudf.__version__) >= Version("22.04.0")
+except ImportError:
+    _CUDF_IGNORE_INDEX_SUPPORTED = None
 
 
 class Shuffle(enum.Enum):

diff --git a/merlin/io/worker.py b/merlin/io/worker.py
@@ -18,11 +18,14 @@
 import threading
 
 import pandas as pd
+
+try:
+    import cudf
+except ImportError:
+    cudf = None
 import pyarrow as pa
 from dask.distributed import get_worker
 
-from merlin.core.compat import cudf
-
 # Use global variable as the default
 # cache when there are no distributed workers.
 # Use a thread lock to "handle" multiple Dask

diff --git a/merlin/io/writer.py b/merlin/io/writer.py
@@ -19,10 +19,14 @@
 import threading
 from typing import Optional
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 import numpy as np
 from fsspec.core import get_fs_token_paths
 
-from merlin.core.compat import cupy as cp
 from merlin.core.dispatch import annotate
 from merlin.io.shuffle import shuffle_df
 

diff --git a/merlin/table/conversions.py b/merlin/table/conversions.py
@@ -42,20 +42,6 @@ def _from_dlpack_gpu(to, capsule):
 
 
 def to_dlpack_col(column: TensorColumn, to_dlpack_fn: Callable) -> _DlpackColumn:
-    """Creates  dlpack representation of the TensorColumn supplied.
-
-    Parameters
-    ----------
-    column : TensorColumn
-        Original data to be put in dlpack capsule(s)
-    to_dlpack_fn : Callable
-        The logic to use to create dlpack representation
-
-    Returns
-    -------
-    _DlpackColumn
-        A TensorColumn with values and offsets represented as dlpack capsules
-    """
     vals_cap = to_dlpack_fn(column.values)
     offs_cap = to_dlpack_fn(column.offsets) if column.offsets is not None else None
     return _DlpackColumn(vals_cap, offs_cap, column)
@@ -64,24 +50,6 @@ def to_dlpack_col(column: TensorColumn, to_dlpack_fn: Callable) -> _DlpackColumn
 def from_dlpack_col(
     dlpack_col: _DlpackColumn, from_dlpack_fn: Callable, target_col_type: Type
 ) -> TensorColumn:
-    """Unwraps a DLpack representation of a TensorColumn and creates a
-    TensorColumn of the target_col_type. This function is used in conjunction
-    with to_dlpack_col.
-
-    Parameters
-    ----------
-    dlpack_col : _DlpackColumn
-        The dlpack representation of the original TensorColum
-    from_dlpack_fn : Callable
-        Function containing logic to unwrap dlpack capsule
-    target_col_type : Type
-        Desired TensorColumn return type from unwrap of dlpack representation.
-
-    Returns
-    -------
-    TensorColumn
-        A TensorColumn of type target_col_type.
-    """
     target_array_type = target_col_type.array_type()
 
     values = from_dlpack_fn(target_array_type, dlpack_col.values)
@@ -117,25 +85,6 @@ def convert_col(
     _to_dlpack_fn: Callable = None,
     _from_dlpack_fn: Callable = None,
 ):
-    """Convert a TensorColumn to a Different TensorColumn,
-    uses DLPack (zero copy) to transfer between TensorColumns
-
-    Parameters
-    ----------
-    column : TensorColumn
-        The Column to be transformed
-    target_type : Type
-        The desired TensorColumn, to be produced
-    _to_dlpack_fn : Callable, optional
-        cached to_dlpack function, by default None
-    _from_dlpack_fn : Callable, optional
-        cached from_dlpack function, by default None
-
-    Returns
-    -------
-    TensorColumn
-        A TensorColumn of the type identified in target_type parameter.
-    """
     # If there's nothing to do, take a shortcut
     if isinstance(column, target_type):
         return column
@@ -153,9 +102,6 @@ def convert_col(
 
 
 def df_from_tensor_table(table):
-    """
-    Create a dataframe from a TensorTable
-    """
     device = "cpu" if table.device == Device.CPU else None
     df_dict = {}
     for col_name, col_data in table.items():
@@ -185,7 +131,8 @@ def _tensor_table_from_pandas_df(df: pd.DataFrame):
 
 @tensor_table_from_df.register_lazy("cudf")
 def _register_tensor_table_from_cudf_df():
-    from merlin.core.compat import cudf
+    import cudf
+
     from merlin.table import CupyColumn
 
     @tensor_table_from_df.register(cudf.DataFrame)