Skip to content

Commit

Permalink
Remove cudf._libs.types.pyx (#17665)
Browse files Browse the repository at this point in the history
Contributes to #17317

1. Moves some Python routines/objects to `cudf/utils/dtypes.py`
2. Moves specific column only routines directly to `cudf/_libs/column.pyx`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #17665
mroeschke authored Jan 9, 2025

Verified

This commit was signed with the committer’s verified signature.
renovate-bot Mend Renovate
1 parent 2310159 commit a8a4197
Showing 26 changed files with 251 additions and 316 deletions.
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx)
set(cython_sources column.pyx scalar.pyx strings_udf.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
4 changes: 3 additions & 1 deletion python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

from typing import Literal

@@ -13,6 +13,8 @@ from pylibcudf.libcudf.column.column_view cimport (
from pylibcudf.libcudf.types cimport size_type
from rmm.librmm.device_buffer cimport device_buffer

cdef dtype_from_lists_column_view(column_view cv)
cdef dtype_from_column_view(column_view cv)

cdef class Column:
cdef public:
100 changes: 86 additions & 14 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.


from typing import Literal
@@ -19,24 +19,21 @@ from cudf.core.buffer import (
as_buffer,
cuda_array_interface_wrapper,
)
from cudf.utils.dtypes import _get_base_dtype
from cudf.utils.dtypes import (
_get_base_dtype,
dtype_to_pylibcudf_type,
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
)

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libc.stdint cimport uintptr_t, int32_t
from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from rmm.pylibrmm.device_buffer cimport DeviceBuffer

from cudf._lib.types cimport (
dtype_from_column_view,
dtype_to_pylibcudf_type,
)

from cudf._lib.types import dtype_from_pylibcudf_column

from pylibcudf cimport DataType as plc_DataType
from pylibcudf cimport DataType as plc_DataType, Column as plc_Column
cimport pylibcudf.libcudf.copying as cpp_copying
cimport pylibcudf.libcudf.types as libcudf_types
cimport pylibcudf.libcudf.unary as libcudf_unary
@@ -45,6 +42,7 @@ from pylibcudf.libcudf.column.column_factories cimport (
make_numeric_column
)
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from pylibcudf.libcudf.scalar.scalar cimport scalar

@@ -64,6 +62,80 @@ cdef get_element(column_view col_view, size_type index):
)


def dtype_from_pylibcudf_column(plc_Column col not None):
type_ = col.type()
tid = type_.id()

if tid == pylibcudf.TypeId.LIST:
child = col.list_view().child()
return cudf.ListDtype(dtype_from_pylibcudf_column(child))
elif tid == pylibcudf.TypeId.STRUCT:
fields = {
str(i): dtype_from_pylibcudf_column(col.child(i))
for i in range(col.num_children())
}
return cudf.StructDtype(fields)
elif tid == pylibcudf.TypeId.DECIMAL64:
return cudf.Decimal64Dtype(
precision=cudf.Decimal64Dtype.MAX_PRECISION,
scale=-type_.scale()
)
elif tid == pylibcudf.TypeId.DECIMAL32:
return cudf.Decimal32Dtype(
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-type_.scale()
)
elif tid == pylibcudf.TypeId.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-type_.scale()
)
else:
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]


cdef dtype_from_lists_column_view(column_view cv):
# lists_column_view have no default constructor, so we heap
# allocate it to get around Cython's limitation of requiring
# default constructors for stack allocated objects
cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv)
cdef column_view child = lv.get()[0].child()

if child.type().id() == libcudf_types.type_id.LIST:
return cudf.ListDtype(dtype_from_lists_column_view(child))
else:
return cudf.ListDtype(dtype_from_column_view(child))


cdef dtype_from_column_view(column_view cv):
cdef libcudf_types.type_id tid = cv.type().id()
if tid == libcudf_types.type_id.LIST:
return dtype_from_lists_column_view(cv)
elif tid == libcudf_types.type_id.STRUCT:
fields = {
str(i): dtype_from_column_view(cv.child(i))
for i in range(cv.num_children())
}
return cudf.StructDtype(fields)
elif tid == libcudf_types.type_id.DECIMAL64:
return cudf.Decimal64Dtype(
precision=cudf.Decimal64Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL32:
return cudf.Decimal32Dtype(
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
else:
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[<int32_t>(tid)]


cdef class Column:
"""
A Column stores columnar data in device memory.
@@ -361,7 +433,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[mutable_column_view] children
cdef void* data
@@ -424,7 +496,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[column_view] children
cdef void* data
49 changes: 15 additions & 34 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

import copy

@@ -14,17 +14,16 @@ import pylibcudf as plc

import cudf
from cudf.core.dtypes import ListDtype, StructDtype
from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
from cudf.core.missing import NA, NaT
from cudf.utils.dtypes import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES

# We currently need this cimport because some of the implementations here
# access the c_obj of the scalar, and because we need to be able to call
# pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
# DeviceScalar is phased out entirely from cuDF Cython (at which point
# cudf.Scalar will be directly backed by pylibcudf.Scalar).
from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID
from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar
from pylibcudf cimport Scalar as plc_Scalar
from pylibcudf.libcudf.scalar.scalar cimport scalar


def _replace_nested(obj, check, replacement):
@@ -223,40 +222,22 @@ cdef class DeviceScalar:
return s

cdef void _set_dtype(self, dtype=None):
cdef plc_TypeID cdtype_id = self.c_value.type().id()
cdtype_id = self.c_value.type().id()
if dtype is not None:
self._dtype = dtype
elif cdtype_id in {
plc_TypeID.DECIMAL32,
plc_TypeID.DECIMAL64,
plc_TypeID.DECIMAL128,
plc.TypeID.DECIMAL32,
plc.TypeID.DECIMAL64,
plc.TypeID.DECIMAL128,
}:
raise TypeError(
"Must pass a dtype when constructing from a fixed-point scalar"
)
elif cdtype_id == plc_TypeID.STRUCT:
struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view()
self._dtype = StructDtype({
str(i): dtype_from_column_view(struct_table_view.column(i))
for i in range(struct_table_view.num_columns())
})
elif cdtype_id == plc_TypeID.LIST:
if (
<list_scalar*>self.get_raw_ptr()
)[0].view().type().id() == plc_TypeID.LIST:
self._dtype = dtype_from_column_view(
(<list_scalar*>self.get_raw_ptr())[0].view()
)
else:
self._dtype = ListDtype(
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(
(<list_scalar*>self.get_raw_ptr())[0]
.view().type().id()
)
]
)
elif cdtype_id == plc.TypeID.STRUCT:
self._dtype = StructDtype.from_arrow(
plc.interop.to_arrow(self.c_value).type
)
elif cdtype_id == plc.TypeID.LIST:
self._dtype = ListDtype.from_arrow(plc.interop.to_arrow(self.c_value).type)
else:
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(cdtype_id)
]
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[cdtype_id]
11 changes: 0 additions & 11 deletions python/cudf/cudf/_lib/types.pxd

This file was deleted.

Loading

0 comments on commit a8a4197

Please sign in to comment.