Apply Merlin dtypes to Systems code base (#239)

* Apply Merlin dtypes to Systems code base * Adjust `tox.ini` to run against Merlin dtypes branch of Core * Placate `pylint` about `dtype` not being callable * Specify the output dtypes in Feast test and adjust dtypes before casting * Adjust dtype conversion during export to use the `md.dtype` syntax * Use Merlin dtypes in `DictArray` * Adjust `tox.ini` to run against the Core dtypes branch * Update tox.ini to use main fork of Merlin Core * Update tox.ini to use main fork of Merlin Core * Update tox.ini to use main fork of Merlin Core * Update tox.ini to use main fork of Merlin Core * Add `to_numpy` to NVT tests
NVIDIA-Merlin · Jan 17, 2023 · 5c6b8f3 · 5c6b8f3
1 parent 4b9c41c
commit 5c6b8f3
Show file tree

Hide file tree

Showing 8 changed files with 33 additions and 35 deletions.
diff --git a/merlin/systems/dag/dictarray.py b/merlin/systems/dag/dictarray.py
@@ -26,6 +26,7 @@
     is_list_dtype,
 )
 
+import merlin.dtypes as md
 from merlin.core.protocols import SeriesLike
 
 try:
@@ -53,7 +54,7 @@ def __init__(self, values, row_lengths=None):
 
         self.values = _make_array(values)
         self.row_lengths = _make_array(row_lengths)
-        self.dtype = self.values.dtype
+        self.dtype = md.dtype(self.values.dtype)
 
         if isinstance(self.values, np.ndarray):
             self._device = Device.CPU

diff --git a/merlin/systems/dag/ops/feast.py b/merlin/systems/dag/ops/feast.py
@@ -306,7 +306,7 @@ def transform(
 
             feature_value = feast_response[feature_name]
             feature_array = array_lib.array([feature_value]).T.astype(
-                self.output_schema[prefixed_name].dtype
+                self.output_schema[prefixed_name].dtype.to_numpy
             )
             output_tensors[prefixed_name] = feature_array
 
@@ -330,15 +330,15 @@ def transform(
 
             # create a numpy array
             feature_array = array_lib.array(feature_value).T.astype(
-                self.output_schema[prefixed_name].dtype
+                self.output_schema[prefixed_name].dtype.to_numpy
             )
 
             # if we're a list but not ragged, construct row lengths
             if not row_lengths:
                 row_lengths = [len(feature_array)]
 
             feature_row_lengths = array_lib.array(
-                [row_lengths], dtype=self.output_schema[prefixed_name].dtype
+                [row_lengths], dtype=self.output_schema[prefixed_name].dtype.to_numpy
             ).T
 
             output_tensors[prefixed_name] = (feature_array, feature_row_lengths)

diff --git a/merlin/systems/triton/__init__.py b/merlin/systems/triton/__init__.py
@@ -15,6 +15,7 @@
 import json
 import os
 
+import numpy as np
 import pandas as pd
 
 # this needs to be before any modules that import protobuf
@@ -68,25 +69,37 @@ def _convert_df_to_dict(schema, batch, dtype="int32"):
 
             if col_schema.is_ragged:
                 df_dict[col_name + "__values"] = col.list.leaves.values_host.astype(
-                    col_schema.dtype
+                    col_schema.dtype.to_numpy
                 )
                 df_dict[col_name + "__lengths"] = col._column.offsets.values_host.astype(dtype)
             else:
                 values = col.list.leaves.values_host
-                values = values.reshape(*shape).astype(col_schema.dtype)
+                values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)
                 df_dict[col_name] = values
 
         else:
             values = col.values if isinstance(col, pd.Series) else col.values_host
-            values = values.reshape(*shape).astype(col_schema.dtype)
+            values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)
             df_dict[col_name] = values
     return df_dict
 
 
 def _convert_column_to_triton_input(col_name, col_values, input_class=grpcclient.InferInput):
-    dtype = np_to_triton_dtype(col_values.dtype)
+    # Triton's mapping of numpy types to Triton types doesn't know how to handle string types,
+    # so we need to map them to object ourselves before we call np_to_triton_dtype
+    col_dtype = col_values.dtype
+    if isinstance(col_dtype, type(np.dtype("str"))):
+        col_dtype = np.dtype("O")
+
+    dtype = np_to_triton_dtype(col_dtype)
     input_tensor = input_class(col_name, col_values.shape, dtype)
+
+    # set_data_from_numpy checks the type against what was supplied when we created the tensor
+    # using np_to_triton_dtype, so the workaround above isn't enough to make them match here.
+    # Do one last `astype` cast to make absolutely sure the dtypes match.
+    col_values = col_values.astype(col_dtype)
     input_tensor.set_data_from_numpy(col_values)
+
     return input_tensor
 
 

diff --git a/merlin/systems/triton/export.py b/merlin/systems/triton/export.py
@@ -19,6 +19,7 @@
 
 import tritonclient.grpc.model_config_pb2 as model_config  # noqa
 
+import merlin.dtypes as md  # noqa
 from merlin.core.dispatch import is_string_dtype  # noqa
 
 
@@ -45,31 +46,14 @@ def _add_model_param(col_schema, paramclass, params, dims=None):
 
 def _convert_dtype(dtype):
     """converts a dtype to the appropriate triton proto type"""
-
-    if dtype and not isinstance(dtype, str):
-        dtype_name = dtype.name if hasattr(dtype, "name") else dtype.__name__
-    else:
-        dtype_name = dtype
-
-    dtypes = {
-        "float64": model_config.TYPE_FP64,
-        "float32": model_config.TYPE_FP32,
-        "float16": model_config.TYPE_FP16,
-        "int64": model_config.TYPE_INT64,
-        "int32": model_config.TYPE_INT32,
-        "int16": model_config.TYPE_INT16,
-        "int8": model_config.TYPE_INT8,
-        "uint64": model_config.TYPE_UINT64,
-        "uint32": model_config.TYPE_UINT32,
-        "uint16": model_config.TYPE_UINT16,
-        "uint8": model_config.TYPE_UINT8,
-        "bool": model_config.TYPE_BOOL,
-    }
+    dtype = md.dtype(dtype)
+    try:
+        return dtype.to("triton")
+    except ValueError:
+        dtype = dtype.to_numpy
 
     if is_string_dtype(dtype):
         return model_config.TYPE_STRING
-    elif dtype_name in dtypes:
-        return dtypes[dtype_name]
     else:
         raise ValueError(f"Can't convert {dtype} to a Triton dtype")
 

diff --git a/tests/unit/systems/dag/runtimes/local/ops/nvtabular/test_ensemble.py b/tests/unit/systems/dag/runtimes/local/ops/nvtabular/test_ensemble.py
@@ -47,7 +47,7 @@ def test_workflow_op_serving_triton(tmpdir, dataset, engine, runtime):
     input_data = {}
     for col_name, col_schema in workflow.input_schema.column_schemas.items():
         col_dtype = col_schema.dtype
-        input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype)
+        input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype.to_numpy)
     dictarray = DictArray(input_data)
     response = wkflow_ensemble.transform(dictarray, runtime=runtime)
 

diff --git a/tests/unit/systems/dag/runtimes/triton/ops/workflow/test_ensemble.py b/tests/unit/systems/dag/runtimes/triton/ops/workflow/test_ensemble.py
@@ -64,7 +64,7 @@ def test_workflow_op_serving_triton(
     input_data = {}
     inputs = []
     for col_name, col_schema in workflow.input_schema.column_schemas.items():
-        col_dtype = col_schema.dtype
+        col_dtype = col_schema.dtype.to_numpy
         input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype)
 
         triton_input = grpcclient.InferInput(

diff --git a/tests/unit/systems/ops/feast/test_op.py b/tests/unit/systems/ops/feast/test_op.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+import merlin.dtypes as md
 from merlin.dag import ColumnSelector
 from merlin.schema import ColumnSchema, Schema
 from merlin.systems.dag import DictArray
@@ -179,8 +180,8 @@ def test_feast_transform(prefix, is_ragged):
         )
         output_schema = Schema(
             [
-                ColumnSchema(feature_name),
-                ColumnSchema(feature_mh, is_list=True, is_ragged=is_ragged),
+                ColumnSchema(feature_name, dtype=md.float32),
+                ColumnSchema(feature_mh, dtype=md.float32, is_list=True, is_ragged=is_ragged),
             ]
         )
 

diff --git a/tox.ini b/tox.ini
@@ -20,7 +20,6 @@ commands =
     python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/models.git
     python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git@{posargs:main}
     python -m pip install .
-
     python -m pytest --cov-report term --cov=merlin -rxs tests/unit
 
 [testenv:test-gpu]