Skip to content

Commit

Permalink
Revert #261 and #262 (merlin.core.compat changes) (#263)
Browse files Browse the repository at this point in the history
* Revert "Run with import without gpu (#261)"

This reverts commit 1011afb.

* Revert "Update `merlin.core.compat` to use `HAS_GPU` and add add'l libraries (#262)"

This reverts commit e365820.

---------

Co-authored-by: Julio Perez <37191411+jperez999@users.noreply.github.com>
  • Loading branch information
karlhigley and jperez999 authored Mar 29, 2023
1 parent 1011afb commit 4593f4a
Show file tree
Hide file tree
Showing 16 changed files with 86 additions and 136 deletions.
42 changes: 13 additions & 29 deletions merlin/core/compat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,11 @@
# pylint: disable=unused-import
import os

try:
from numba import cuda
except ImportError:
cuda = None

from dask.distributed.diagnostics import nvml


Expand Down Expand Up @@ -45,42 +50,21 @@ def _get_gpu_count():

HAS_GPU = _get_gpu_count() > 0

if HAS_GPU:
try:
from numba import cuda
except ImportError:
cuda = None

try:
import cudf
except ImportError:
cudf = None

try:
import cupy
except ImportError:
cupy = None

try:
import dask_cudf
except ImportError:
dask_cudf = None

else:
cuda = None
cudf = None
cupy = None
dask_cudf = None

try:
import numpy
except ImportError:
numpy = None

try:
import pandas
import cupy
except ImportError:
pandas = None
cupy = None

try:
import cudf
except ImportError:
cudf = None

try:
import tensorflow
Expand Down
4 changes: 2 additions & 2 deletions merlin/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@ def device_mem_size(kind="total", cpu=False):
When kind is provided with an unsupported value.
"""
# Use psutil (if available) for cpu mode
if (cpu or not cuda) and psutil:
if cpu and psutil:
if kind == "total":
return psutil.virtual_memory().total
elif kind == "free":
return psutil.virtual_memory().free
elif cpu or not cuda:
elif cpu:
warnings.warn("Please install psutil for full cpu=True support.")
# Assume 1GB of memory
return int(1e9)
Expand Down
24 changes: 12 additions & 12 deletions merlin/dtypes/mappings/cudf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#
import numpy as np

from merlin.core.compat import cudf
from merlin.core.dispatch import is_string_dtype
from merlin.dtypes.mapping import DTypeMapping, NumpyPreprocessor
from merlin.dtypes.registry import _dtype_registry
Expand All @@ -42,16 +41,17 @@ def cudf_translator(raw_dtype) -> np.dtype:
return category_type


if cudf:
try:
# We only want to register this mapping if cudf is available, even though
# the mapping itself doesn't use cudf (yet?)
try:
# We only want to register this mapping if cudf is available, even though
# the mapping itself doesn't use cudf (yet?)

cudf_dtypes = DTypeMapping(
translator=NumpyPreprocessor("cudf", cudf_translator, attrs=["_categories"]),
)
_dtype_registry.register("cudf", cudf_dtypes)
except ImportError as exc:
from warnings import warn
import cudf # pylint:disable=unused-import # noqa: F401

warn(f"cuDF dtype mappings did not load successfully due to an error: {exc.msg}")
cudf_dtypes = DTypeMapping(
translator=NumpyPreprocessor("cudf", cudf_translator, attrs=["_categories"]),
)
_dtype_registry.register("cudf", cudf_dtypes)
except ImportError as exc:
from warnings import warn

warn(f"cuDF dtype mappings did not load successfully due to an error: {exc.msg}")
2 changes: 1 addition & 1 deletion merlin/io/avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
#
import warnings

import cudf
import uavro as ua
from dask.base import tokenize
from dask.dataframe.core import new_dd_object

from merlin.core.compat import cudf
from merlin.io.dataset_engine import DatasetEngine


Expand Down
8 changes: 6 additions & 2 deletions merlin/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
import functools

import dask.dataframe as dd

try:
import dask_cudf
except ImportError:
dask_cudf = None
import numpy as np
from dask.bytes import read_bytes
from dask.utils import parse_bytes
from fsspec.core import get_fs_token_paths
from fsspec.utils import infer_compression

from merlin.core.compat import dask_cudf
from merlin.core.compat import numpy as np
from merlin.io.dataset_engine import DatasetEngine


Expand Down
6 changes: 5 additions & 1 deletion merlin/io/dataframe_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
try:
import cudf
import dask_cudf
except ImportError:
cudf = None
from dask.dataframe.core import new_dd_object
from dask.highlevelgraph import HighLevelGraph

from merlin.core.compat import cudf, dask_cudf
from merlin.io.dataset_engine import DatasetEngine


Expand Down
8 changes: 7 additions & 1 deletion merlin/io/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from fsspec.core import get_fs_token_paths
from fsspec.utils import stringify_path

from merlin.core.compat import HAS_GPU, cudf
from merlin.core.compat import HAS_GPU
from merlin.core.dispatch import (
convert_data,
hex_to_int,
Expand All @@ -49,6 +49,12 @@
from merlin.schema import ColumnSchema, Schema
from merlin.schema.io.tensorflow_metadata import TensorflowMetadata

try:
import cudf
except ImportError:
cudf = None


MERLIN_METADATA_DIR_NAME = ".merlin"
LOG = logging.getLogger("merlin")

Expand Down
8 changes: 4 additions & 4 deletions merlin/io/fsspec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@
else:
fsspec_parquet = None

from merlin.core.compat import cudf

if cudf:
try:
import cudf
from cudf.core.column import as_column, build_categorical_column

if fsspec_parquet and (Version(cudf.__version__) < Version("21.12.0")):
# This version of cudf does not support
# reads from python-file objects. Avoid
# using the `fsspec.parquet` module
fsspec_parquet = None

except ImportError:
cudf = None

#
# Parquet-Specific Utilities
Expand Down
8 changes: 4 additions & 4 deletions merlin/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@

from packaging.version import Version

from merlin.core.compat import cudf

if cudf:
try:
import cudf
import dask_cudf
from cudf.io.parquet import ParquetWriter as pwriter_cudf
from dask_cudf.io.parquet import CudfEngine

except ImportError:
cudf = None
import dask
import dask.dataframe as dd
import fsspec
Expand Down
13 changes: 6 additions & 7 deletions merlin/io/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@
import pandas as pd
from packaging.version import Version

from merlin.core.compat import cudf

_IGNORE_INDEX_SUPPORTED = Version(pd.__version__) >= Version("1.3.0")

if cudf:
try:
_CUDF_IGNORE_INDEX_SUPPORTED = Version(cudf.__version__) >= Version("22.04.0")
except ImportError:
_CUDF_IGNORE_INDEX_SUPPORTED = None
try:
import cudf

_CUDF_IGNORE_INDEX_SUPPORTED = Version(cudf.__version__) >= Version("22.04.0")
except ImportError:
_CUDF_IGNORE_INDEX_SUPPORTED = None


class Shuffle(enum.Enum):
Expand Down
7 changes: 5 additions & 2 deletions merlin/io/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
import threading

import pandas as pd

try:
import cudf
except ImportError:
cudf = None
import pyarrow as pa
from dask.distributed import get_worker

from merlin.core.compat import cudf

# Use global variable as the default
# cache when there are no distributed workers.
# Use a thread lock to "handle" multiple Dask
Expand Down
6 changes: 5 additions & 1 deletion merlin/io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
import threading
from typing import Optional

try:
import cupy as cp
except ImportError:
cp = None

import numpy as np
from fsspec.core import get_fs_token_paths

from merlin.core.compat import cupy as cp
from merlin.core.dispatch import annotate
from merlin.io.shuffle import shuffle_df

Expand Down
57 changes: 2 additions & 55 deletions merlin/table/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,6 @@ def _from_dlpack_gpu(to, capsule):


def to_dlpack_col(column: TensorColumn, to_dlpack_fn: Callable) -> _DlpackColumn:
"""Creates dlpack representation of the TensorColumn supplied.
Parameters
----------
column : TensorColumn
Original data to be put in dlpack capsule(s)
to_dlpack_fn : Callable
The logic to use to create dlpack representation
Returns
-------
_DlpackColumn
A TensorColumn with values and offsets represented as dlpack capsules
"""
vals_cap = to_dlpack_fn(column.values)
offs_cap = to_dlpack_fn(column.offsets) if column.offsets is not None else None
return _DlpackColumn(vals_cap, offs_cap, column)
Expand All @@ -64,24 +50,6 @@ def to_dlpack_col(column: TensorColumn, to_dlpack_fn: Callable) -> _DlpackColumn
def from_dlpack_col(
dlpack_col: _DlpackColumn, from_dlpack_fn: Callable, target_col_type: Type
) -> TensorColumn:
"""Unwraps a DLpack representation of a TensorColumn and creates a
TensorColumn of the target_col_type. This function is used in conjunction
with to_dlpack_col.
Parameters
----------
dlpack_col : _DlpackColumn
The dlpack representation of the original TensorColum
from_dlpack_fn : Callable
Function containing logic to unwrap dlpack capsule
target_col_type : Type
Desired TensorColumn return type from unwrap of dlpack representation.
Returns
-------
TensorColumn
A TensorColumn of type target_col_type.
"""
target_array_type = target_col_type.array_type()

values = from_dlpack_fn(target_array_type, dlpack_col.values)
Expand Down Expand Up @@ -117,25 +85,6 @@ def convert_col(
_to_dlpack_fn: Callable = None,
_from_dlpack_fn: Callable = None,
):
"""Convert a TensorColumn to a Different TensorColumn,
uses DLPack (zero copy) to transfer between TensorColumns
Parameters
----------
column : TensorColumn
The Column to be transformed
target_type : Type
The desired TensorColumn, to be produced
_to_dlpack_fn : Callable, optional
cached to_dlpack function, by default None
_from_dlpack_fn : Callable, optional
cached from_dlpack function, by default None
Returns
-------
TensorColumn
A TensorColumn of the type identified in target_type parameter.
"""
# If there's nothing to do, take a shortcut
if isinstance(column, target_type):
return column
Expand All @@ -153,9 +102,6 @@ def convert_col(


def df_from_tensor_table(table):
"""
Create a dataframe from a TensorTable
"""
device = "cpu" if table.device == Device.CPU else None
df_dict = {}
for col_name, col_data in table.items():
Expand Down Expand Up @@ -185,7 +131,8 @@ def _tensor_table_from_pandas_df(df: pd.DataFrame):

@tensor_table_from_df.register_lazy("cudf")
def _register_tensor_table_from_cudf_df():
from merlin.core.compat import cudf
import cudf

from merlin.table import CupyColumn

@tensor_table_from_df.register(cudf.DataFrame)
Expand Down
Loading

0 comments on commit 4593f4a

Please sign in to comment.