rapidsai · rapids-bot · Jun 15, 2021 · May 26, 2021 · May 26, 2021 · May 26, 2021
@@ -41,6 +41,7 @@
 from cudf.core.buffer import Buffer
 from cudf.core.dtypes import (
     CategoricalDtype,
+    Decimal64Dtype,
     IntervalDtype,
     ListDtype,
     StructDtype,
@@ -1267,6 +1268,32 @@ def scatter_to_table(
             }
         )
 
+    def _apply_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
+        if isinstance(dtype, CategoricalDtype) and not (
+            isinstance(self, cudf.core.column.CategoricalColumn)
+        ):
+            self = build_categorical_column(
+                categories=dtype.categories._values,
+                codes=as_column(self.base_data, dtype=self.dtype),
+                mask=self.base_mask,
+                ordered=dtype.ordered,
+                size=self.size,
+                offset=self.offset,
+                null_count=self.null_count,
+            )
+
+        if isinstance(dtype, StructDtype) and isinstance(
+            self, cudf.core.column.StructColumn
+        ):
+            self = self._rename_fields(dtype.fields.keys())
+
+        if isinstance(dtype, Decimal64Dtype) and isinstance(
+            self, cudf.core.column.DecimalColumn
+        ):
+            self.dtype.precision = dtype.precision
+
+        return self
+
     def _copy_type_metadata(self: ColumnBase, other: ColumnBase) -> ColumnBase:
         """
         Copies type metadata from self onto other, returning a new column.
@@ -1276,6 +1303,8 @@ def _copy_type_metadata(self: ColumnBase, other: ColumnBase) -> ColumnBase:
           and the children of `other`.
         * if none of the above, return `other` without any changes
         """
+        other = other._apply_type_metadata(self.dtype)
+
         # TODO: This logic should probably be moved to a common nested column
         # class.
         if isinstance(other, type(self)):
@@ -2200,6 +2229,17 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
 
 
+def _cudf_dtype_from_arrow_type(arrow_type: Dtype) -> Dtype:
+    if pa.types.is_decimal(arrow_type):
+        return Decimal64Dtype.from_arrow(arrow_type)
+    elif pa.types.is_struct(arrow_type):
+        return StructDtype.from_arrow(arrow_type)
+    elif pa.types.is_list(arrow_type):
+        return ListDtype.from_arrow(arrow_type)
+
+    return arrow_type
+
+
 def _copy_type_metadata_from_arrow(
     arrow_array: pa.array, cudf_column: ColumnBase
 ) -> ColumnBase:
@@ -2211,13 +2251,10 @@ def _copy_type_metadata_from_arrow(
     * When `arrow_array` is decimal type and `cudf_column` is
     Decimal64Dtype, copy precisions.
     """
-    if pa.types.is_decimal(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.DecimalColumn
-    ):
-        cudf_column.dtype.precision = arrow_array.type.precision
-    elif pa.types.is_struct(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.StructColumn
-    ):
+    cudf_dtype = _cudf_dtype_from_arrow_type(arrow_array.type)
+    cudf_column = cudf_column._apply_type_metadata(cudf_dtype)
+
+    if isinstance(cudf_column, cudf.core.column.StructColumn):
         base_children = tuple(
             _copy_type_metadata_from_arrow(arrow_array.field(i), col_child)
             for i, col_child in enumerate(cudf_column.base_children)
@@ -2226,30 +2263,30 @@ def _copy_type_metadata_from_arrow(
         return cudf.core.column.StructColumn(
             data=None,
             size=cudf_column.base_size,
-            dtype=StructDtype.from_arrow(arrow_array.type),
+            dtype=cudf_dtype,
             mask=cudf_column.base_mask,
             offset=cudf_column.offset,
             null_count=cudf_column.null_count,
             children=base_children,
         )
-    elif pa.types.is_list(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.ListColumn
+
+    elif isinstance(cudf_column, cudf.core.column.ListColumn) and (
+        arrow_array.values and cudf_column.base_children
     ):
-        if arrow_array.values and cudf_column.base_children:
-            base_children = (
-                cudf_column.base_children[0],
-                _copy_type_metadata_from_arrow(
-                    arrow_array.values, cudf_column.base_children[1]
-                ),
-            )
-            return cudf.core.column.ListColumn(
-                size=cudf_column.base_size,
-                dtype=ListDtype.from_arrow(arrow_array.type),
-                mask=cudf_column.base_mask,
-                offset=cudf_column.offset,
-                null_count=cudf_column.null_count,
-                children=base_children,
-            )
+        base_children = (
+            cudf_column.base_children[0],
+            _copy_type_metadata_from_arrow(
+                arrow_array.values, cudf_column.base_children[1]
+            ),
+        )
+        return cudf.core.column.ListColumn(
+            size=cudf_column.base_size,
+            dtype=cudf_dtype,
+            mask=cudf_column.base_mask,
+            offset=cudf_column.offset,
+            null_count=cudf_column.null_count,
+            children=base_children,
+        )
 
     return cudf_column
 

@@ -367,6 +367,32 @@ def test_as_column_buffer(data, expected):
     assert_eq(cudf.Series(actual_column), cudf.Series(expected))
 
 
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (
+            pa.array([100, 200, 300], type=pa.decimal128(3)),
+            cudf.core.column.as_column(
+                [100, 200, 300], dtype=cudf.core.dtypes.Decimal64Dtype(3, 0)
+            ),
+        ),
+        (
+            pa.array([{"a": 1, "b": 3}, {"c": 2, "d": 4}]),
+            cudf.core.column.as_column([{"a": 1, "b": 3}, {"c": 2, "d": 4}]),
+        ),
+        (
+            pa.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
+            cudf.core.column.as_column(
+                [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]
+            ),
+        ),
+    ],
+)
+def test_as_column_arrow_array(data, expected):
+    actual_column = cudf.core.column.as_column(data)
+    assert_eq(cudf.Series(actual_column), cudf.Series(expected))
+
+
 @pytest.mark.parametrize(
     "pd_dtype,expect_dtype",
     [