From b3e934c8c012ac627d3c4f34489b22c13ee475be Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 22 May 2024 13:42:02 -0700
Subject: [PATCH] use explicit mappings

---
 python/cudf/cudf/_lib/pylibcudf/column.pyx  | 25 ++++++++--
 python/cudf/cudf/_lib/pylibcudf/interop.pyx | 42 ++++++++++------
 python/cudf/cudf/_lib/pylibcudf/types.pyx   | 53 --------------------
 python/cudf/cudf/_lib/types.pyx             | 55 +++++++++++++++++++--
 4 files changed, 100 insertions(+), 75 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
index 38f4c2691d2..e726eca154f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -22,8 +22,6 @@ import functools
 
 import numpy as np
 
-from .types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
-
 
 cdef class Column:
     """A container of nullable device data as a column of elements.
@@ -353,8 +351,27 @@ cdef class ListColumnView:
 
 @functools.cache
 def _datatype_from_dtype_desc(desc):
-    desc = np.dtype(desc)
-    mapping = SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
+    mapping = {
+        'u1': type_id.UINT8,
+        'u2': type_id.UINT16,
+        'u4': type_id.UINT32,
+        'u8': type_id.UINT64,
+        'i1': type_id.INT8,
+        'i2': type_id.INT16,
+        'i4': type_id.INT32,
+        'i8': type_id.INT64,
+        'f4': type_id.FLOAT32,
+        'f8': type_id.FLOAT64,
+        'b1': type_id.BOOL8,
+        'M8[s]': type_id.TIMESTAMP_SECONDS,
+        'M8[ms]': type_id.TIMESTAMP_MILLISECONDS,
+        'M8[us]': type_id.TIMESTAMP_MICROSECONDS,
+        'M8[ns]': type_id.TIMESTAMP_NANOSECONDS,
+        'm8[s]': type_id.DURATION_SECONDS,
+        'm8[ms]': type_id.DURATION_MILLISECONDS,
+        'm8[us]': type_id.DURATION_MICROSECONDS,
+        'm8[ns]': type_id.DURATION_NANOSECONDS,
+    }
     if desc not in mapping:
         raise ValueError(f"Unsupported dtype: {desc}")
     return DataType(mapping[desc])
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
index 00d2454e864..befdc4463cd 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
@@ -28,18 +28,37 @@ from cudf._lib.pylibcudf.libcudf.wrappers.decimals cimport (
     scale_type,
 )
 
-import numpy as np
-
-from .types import (
-    LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
-    SUPPORTED_NUMPY_TO_LIBCUDF_TYPES,
-)
-
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
 from .types cimport DataType, type_id
 
+ARROW_TO_PYLIBCUDF_TYPES = {
+    pa.int8(): type_id.INT8,
+    pa.int16(): type_id.INT16,
+    pa.int32(): type_id.INT32,
+    pa.int64(): type_id.INT64,
+    pa.uint8(): type_id.UINT8,
+    pa.uint16(): type_id.UINT16,
+    pa.uint32(): type_id.UINT32,
+    pa.uint64(): type_id.UINT64,
+    pa.float32(): type_id.FLOAT32,
+    pa.float64(): type_id.FLOAT64,
+    pa.bool_(): type_id.BOOL8,
+    pa.string(): type_id.STRING,
+    pa.duration('s'): type_id.DURATION_SECONDS,
+    pa.duration('ms'): type_id.DURATION_MILLISECONDS,
+    pa.duration('us'): type_id.DURATION_MICROSECONDS,
+    pa.duration('ns'): type_id.DURATION_NANOSECONDS,
+    pa.timestamp('s'): type_id.TIMESTAMP_SECONDS,
+    pa.timestamp('ms'): type_id.TIMESTAMP_MILLISECONDS,
+    pa.timestamp('us'): type_id.TIMESTAMP_MICROSECONDS,
+    pa.timestamp('ns'): type_id.TIMESTAMP_NANOSECONDS,
+}
+
+LIBCUDF_TO_ARROW_TYPES = {
+    v: k for k, v in ARROW_TO_PYLIBCUDF_TYPES.items()
+}
 
 cdef column_metadata _metadata_to_libcudf(metadata):
     """Convert a ColumnMetadata object to C++ column_metadata.
@@ -93,10 +112,7 @@ def _from_arrow_datatype(pyarrow_object):
     elif isinstance(pyarrow_object, pa.ListType):
         return DataType(type_id.LIST)
     else:
-        return DataType(
-            SUPPORTED_NUMPY_TO_LIBCUDF_TYPES.get(
-                np.dtype(pyarrow_object.to_pandas_dtype()))
-            )
+        return DataType(ARROW_TO_PYLIBCUDF_TYPES.get(pyarrow_object))
 
 
 @from_arrow.register(pa.Table)
@@ -201,9 +217,7 @@ def _to_arrow_datatype(cudf_object):
         raise ValueError(
             f"Cannot convert {cudf_object} to PyArrow type"
         )
-    return pa.from_numpy_dtype(
-        LIBCUDF_TO_SUPPORTED_NUMPY_TYPES.get(cudf_object)
-    )
+    return ARROW_TO_PYLIBCUDF_TYPES.get(cudf_object.id())
 
 
 @to_arrow.register(Table)
diff --git a/python/cudf/cudf/_lib/pylibcudf/types.pyx b/python/cudf/cudf/_lib/pylibcudf/types.pyx
index 42c8324999e..841a600df06 100644
--- a/python/cudf/cudf/_lib/pylibcudf/types.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/types.pyx
@@ -15,8 +15,6 @@ from cudf._lib.pylibcudf.libcudf.types import null_order as NullOrder  # no-cyth
 from cudf._lib.pylibcudf.libcudf.types import order as Order  # no-cython-lint, isort:skip
 from cudf._lib.pylibcudf.libcudf.types import sorted as Sorted  # no-cython-lint, isort:skip
 
-import numpy as np
-
 
 cdef class DataType:
     """Indicator for the logical data type of an element in a column.
@@ -66,54 +64,3 @@ cdef class DataType:
         cdef DataType ret = DataType.__new__(DataType, type_id.EMPTY)
         ret.c_obj = dt
         return ret
-
-SUPPORTED_NUMPY_TO_LIBCUDF_TYPES = {
-    np.dtype("int8"): TypeId.INT8,
-    np.dtype("int16"): TypeId.INT16,
-    np.dtype("int32"): TypeId.INT32,
-    np.dtype("int64"): TypeId.INT64,
-    np.dtype("uint8"): TypeId.UINT8,
-    np.dtype("uint16"): TypeId.UINT16,
-    np.dtype("uint32"): TypeId.UINT32,
-    np.dtype("uint64"): TypeId.UINT64,
-    np.dtype("float32"): TypeId.FLOAT32,
-    np.dtype("float64"): TypeId.FLOAT64,
-    np.dtype("datetime64[s]"): TypeId.TIMESTAMP_SECONDS,
-    np.dtype("datetime64[ms]"): TypeId.TIMESTAMP_MILLISECONDS,
-    np.dtype("datetime64[us]"): TypeId.TIMESTAMP_MICROSECONDS,
-    np.dtype("datetime64[ns]"): TypeId.TIMESTAMP_NANOSECONDS,
-    np.dtype("object"): TypeId.STRING,
-    np.dtype("bool"): TypeId.BOOL8,
-    np.dtype("timedelta64[s]"): TypeId.DURATION_SECONDS,
-    np.dtype("timedelta64[ms]"): TypeId.DURATION_MILLISECONDS,
-    np.dtype("timedelta64[us]"): TypeId.DURATION_MICROSECONDS,
-    np.dtype("timedelta64[ns]"): TypeId.DURATION_NANOSECONDS,
-}
-
-LIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
-    # There's no equivalent to EMPTY in cudf.  We translate EMPTY
-    # columns from libcudf to ``int8`` columns of all nulls in Python.
-    # ``int8`` is chosen because it uses the least amount of memory.
-    TypeId.EMPTY: np.dtype("int8"),
-    TypeId.INT8: np.dtype("int8"),
-    TypeId.INT16: np.dtype("int16"),
-    TypeId.INT32: np.dtype("int32"),
-    TypeId.INT64: np.dtype("int64"),
-    TypeId.UINT8: np.dtype("uint8"),
-    TypeId.UINT16: np.dtype("uint16"),
-    TypeId.UINT32: np.dtype("uint32"),
-    TypeId.UINT64: np.dtype("uint64"),
-    TypeId.FLOAT32: np.dtype("float32"),
-    TypeId.FLOAT64: np.dtype("float64"),
-    TypeId.BOOL8: np.dtype("bool"),
-    TypeId.TIMESTAMP_SECONDS: np.dtype("datetime64[s]"),
-    TypeId.TIMESTAMP_MILLISECONDS: np.dtype("datetime64[ms]"),
-    TypeId.TIMESTAMP_MICROSECONDS: np.dtype("datetime64[us]"),
-    TypeId.TIMESTAMP_NANOSECONDS: np.dtype("datetime64[ns]"),
-    TypeId.DURATION_SECONDS: np.dtype("timedelta64[s]"),
-    TypeId.DURATION_MILLISECONDS: np.dtype("timedelta64[ms]"),
-    TypeId.DURATION_MICROSECONDS: np.dtype("timedelta64[us]"),
-    TypeId.DURATION_NANOSECONDS: np.dtype("timedelta64[ns]"),
-    TypeId.STRING: np.dtype("object"),
-    TypeId.STRUCT: np.dtype("object"),
-}
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index 48ff0170bc3..895e1afc502 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -20,10 +20,6 @@ from cudf._lib.types cimport (
 
 import cudf
 from cudf._lib import pylibcudf
-from cudf._lib.pylibcudf.types import (
-    LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
-    SUPPORTED_NUMPY_TO_LIBCUDF_TYPES,
-)
 
 size_type_dtype = np.dtype("int32")
 
@@ -79,11 +75,62 @@ class TypeId(IntEnum):
     STRUCT = <underlying_type_t_type_id> libcudf_types.type_id.STRUCT
 
 
+SUPPORTED_NUMPY_TO_LIBCUDF_TYPES = {
+    np.dtype("int8"): TypeId.INT8,
+    np.dtype("int16"): TypeId.INT16,
+    np.dtype("int32"): TypeId.INT32,
+    np.dtype("int64"): TypeId.INT64,
+    np.dtype("uint8"): TypeId.UINT8,
+    np.dtype("uint16"): TypeId.UINT16,
+    np.dtype("uint32"): TypeId.UINT32,
+    np.dtype("uint64"): TypeId.UINT64,
+    np.dtype("float32"): TypeId.FLOAT32,
+    np.dtype("float64"): TypeId.FLOAT64,
+    np.dtype("datetime64[s]"): TypeId.TIMESTAMP_SECONDS,
+    np.dtype("datetime64[ms]"): TypeId.TIMESTAMP_MILLISECONDS,
+    np.dtype("datetime64[us]"): TypeId.TIMESTAMP_MICROSECONDS,
+    np.dtype("datetime64[ns]"): TypeId.TIMESTAMP_NANOSECONDS,
+    np.dtype("object"): TypeId.STRING,
+    np.dtype("bool"): TypeId.BOOL8,
+    np.dtype("timedelta64[s]"): TypeId.DURATION_SECONDS,
+    np.dtype("timedelta64[ms]"): TypeId.DURATION_MILLISECONDS,
+    np.dtype("timedelta64[us]"): TypeId.DURATION_MICROSECONDS,
+    np.dtype("timedelta64[ns]"): TypeId.DURATION_NANOSECONDS,
+}
+
 SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = {
     k: pylibcudf.TypeId(v).value
     for k, v in SUPPORTED_NUMPY_TO_LIBCUDF_TYPES.items()
 }
 
+LIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
+    # There's no equivalent to EMPTY in cudf.  We translate EMPTY
+    # columns from libcudf to ``int8`` columns of all nulls in Python.
+    # ``int8`` is chosen because it uses the least amount of memory.
+    TypeId.EMPTY: np.dtype("int8"),
+    TypeId.INT8: np.dtype("int8"),
+    TypeId.INT16: np.dtype("int16"),
+    TypeId.INT32: np.dtype("int32"),
+    TypeId.INT64: np.dtype("int64"),
+    TypeId.UINT8: np.dtype("uint8"),
+    TypeId.UINT16: np.dtype("uint16"),
+    TypeId.UINT32: np.dtype("uint32"),
+    TypeId.UINT64: np.dtype("uint64"),
+    TypeId.FLOAT32: np.dtype("float32"),
+    TypeId.FLOAT64: np.dtype("float64"),
+    TypeId.BOOL8: np.dtype("bool"),
+    TypeId.TIMESTAMP_SECONDS: np.dtype("datetime64[s]"),
+    TypeId.TIMESTAMP_MILLISECONDS: np.dtype("datetime64[ms]"),
+    TypeId.TIMESTAMP_MICROSECONDS: np.dtype("datetime64[us]"),
+    TypeId.TIMESTAMP_NANOSECONDS: np.dtype("datetime64[ns]"),
+    TypeId.DURATION_SECONDS: np.dtype("timedelta64[s]"),
+    TypeId.DURATION_MILLISECONDS: np.dtype("timedelta64[ms]"),
+    TypeId.DURATION_MICROSECONDS: np.dtype("timedelta64[us]"),
+    TypeId.DURATION_NANOSECONDS: np.dtype("timedelta64[ns]"),
+    TypeId.STRING: np.dtype("object"),
+    TypeId.STRUCT: np.dtype("object"),
+}
+
 PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
     pylibcudf.TypeId(k).value: v
     for k, v in LIBCUDF_TO_SUPPORTED_NUMPY_TYPES.items()