Skip to content

Commit

Permalink
Apply Merlin dtypes to Systems code base (#239)
Browse files Browse the repository at this point in the history
* Apply Merlin dtypes to Systems code base

* Adjust `tox.ini` to run against Merlin dtypes branch of Core

* Placate `pylint` about `dtype` not being callable

* Specify the output dtypes in Feast test and adjust dtypes before casting

* Adjust dtype conversion during export to use the `md.dtype` syntax

* Use Merlin dtypes in `DictArray`

* Adjust `tox.ini` to run against the Core dtypes branch

* Update tox.ini to use main fork of Merlin Core

* Update tox.ini to use main fork of Merlin Core

* Update tox.ini to use main fork of Merlin Core

* Update tox.ini to use main fork of Merlin Core

* Add `to_numpy` to NVT tests
  • Loading branch information
karlhigley authored Jan 17, 2023
1 parent 4b9c41c commit 5c6b8f3
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 35 deletions.
3 changes: 2 additions & 1 deletion merlin/systems/dag/dictarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
is_list_dtype,
)

import merlin.dtypes as md
from merlin.core.protocols import SeriesLike

try:
Expand Down Expand Up @@ -53,7 +54,7 @@ def __init__(self, values, row_lengths=None):

self.values = _make_array(values)
self.row_lengths = _make_array(row_lengths)
self.dtype = self.values.dtype
self.dtype = md.dtype(self.values.dtype)

if isinstance(self.values, np.ndarray):
self._device = Device.CPU
Expand Down
6 changes: 3 additions & 3 deletions merlin/systems/dag/ops/feast.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def transform(

feature_value = feast_response[feature_name]
feature_array = array_lib.array([feature_value]).T.astype(
self.output_schema[prefixed_name].dtype
self.output_schema[prefixed_name].dtype.to_numpy
)
output_tensors[prefixed_name] = feature_array

Expand All @@ -330,15 +330,15 @@ def transform(

# create a numpy array
feature_array = array_lib.array(feature_value).T.astype(
self.output_schema[prefixed_name].dtype
self.output_schema[prefixed_name].dtype.to_numpy
)

# if we're a list but not ragged, construct row lengths
if not row_lengths:
row_lengths = [len(feature_array)]

feature_row_lengths = array_lib.array(
[row_lengths], dtype=self.output_schema[prefixed_name].dtype
[row_lengths], dtype=self.output_schema[prefixed_name].dtype.to_numpy
).T

output_tensors[prefixed_name] = (feature_array, feature_row_lengths)
Expand Down
21 changes: 17 additions & 4 deletions merlin/systems/triton/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json
import os

import numpy as np
import pandas as pd

# this needs to be before any modules that import protobuf
Expand Down Expand Up @@ -68,25 +69,37 @@ def _convert_df_to_dict(schema, batch, dtype="int32"):

if col_schema.is_ragged:
df_dict[col_name + "__values"] = col.list.leaves.values_host.astype(
col_schema.dtype
col_schema.dtype.to_numpy
)
df_dict[col_name + "__lengths"] = col._column.offsets.values_host.astype(dtype)
else:
values = col.list.leaves.values_host
values = values.reshape(*shape).astype(col_schema.dtype)
values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)
df_dict[col_name] = values

else:
values = col.values if isinstance(col, pd.Series) else col.values_host
values = values.reshape(*shape).astype(col_schema.dtype)
values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)
df_dict[col_name] = values
return df_dict


def _convert_column_to_triton_input(col_name, col_values, input_class=grpcclient.InferInput):
dtype = np_to_triton_dtype(col_values.dtype)
# Triton's mapping of numpy types to Triton types doesn't know how to handle string types,
# so we need to map them to object ourselves before we call np_to_triton_dtype
col_dtype = col_values.dtype
if isinstance(col_dtype, type(np.dtype("str"))):
col_dtype = np.dtype("O")

dtype = np_to_triton_dtype(col_dtype)
input_tensor = input_class(col_name, col_values.shape, dtype)

# set_data_from_numpy checks the type against what was supplied when we created the tensor
# using np_to_triton_dtype, so the workaround above isn't enough to make them match here.
# Do one last `astype` cast to make absolutely sure the dtypes match.
col_values = col_values.astype(col_dtype)
input_tensor.set_data_from_numpy(col_values)

return input_tensor


Expand Down
28 changes: 6 additions & 22 deletions merlin/systems/triton/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import tritonclient.grpc.model_config_pb2 as model_config # noqa

import merlin.dtypes as md # noqa
from merlin.core.dispatch import is_string_dtype # noqa


Expand All @@ -45,31 +46,14 @@ def _add_model_param(col_schema, paramclass, params, dims=None):

def _convert_dtype(dtype):
"""converts a dtype to the appropriate triton proto type"""

if dtype and not isinstance(dtype, str):
dtype_name = dtype.name if hasattr(dtype, "name") else dtype.__name__
else:
dtype_name = dtype

dtypes = {
"float64": model_config.TYPE_FP64,
"float32": model_config.TYPE_FP32,
"float16": model_config.TYPE_FP16,
"int64": model_config.TYPE_INT64,
"int32": model_config.TYPE_INT32,
"int16": model_config.TYPE_INT16,
"int8": model_config.TYPE_INT8,
"uint64": model_config.TYPE_UINT64,
"uint32": model_config.TYPE_UINT32,
"uint16": model_config.TYPE_UINT16,
"uint8": model_config.TYPE_UINT8,
"bool": model_config.TYPE_BOOL,
}
dtype = md.dtype(dtype)
try:
return dtype.to("triton")
except ValueError:
dtype = dtype.to_numpy

if is_string_dtype(dtype):
return model_config.TYPE_STRING
elif dtype_name in dtypes:
return dtypes[dtype_name]
else:
raise ValueError(f"Can't convert {dtype} to a Triton dtype")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_workflow_op_serving_triton(tmpdir, dataset, engine, runtime):
input_data = {}
for col_name, col_schema in workflow.input_schema.column_schemas.items():
col_dtype = col_schema.dtype
input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype)
input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype.to_numpy)
dictarray = DictArray(input_data)
response = wkflow_ensemble.transform(dictarray, runtime=runtime)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_workflow_op_serving_triton(
input_data = {}
inputs = []
for col_name, col_schema in workflow.input_schema.column_schemas.items():
col_dtype = col_schema.dtype
col_dtype = col_schema.dtype.to_numpy
input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype)

triton_input = grpcclient.InferInput(
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/systems/ops/feast/test_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pytest

import merlin.dtypes as md
from merlin.dag import ColumnSelector
from merlin.schema import ColumnSchema, Schema
from merlin.systems.dag import DictArray
Expand Down Expand Up @@ -179,8 +180,8 @@ def test_feast_transform(prefix, is_ragged):
)
output_schema = Schema(
[
ColumnSchema(feature_name),
ColumnSchema(feature_mh, is_list=True, is_ragged=is_ragged),
ColumnSchema(feature_name, dtype=md.float32),
ColumnSchema(feature_mh, dtype=md.float32, is_list=True, is_ragged=is_ragged),
]
)

Expand Down
1 change: 0 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ commands =
python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/models.git
python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git@{posargs:main}
python -m pip install .

python -m pytest --cov-report term --cov=merlin -rxs tests/unit

[testenv:test-gpu]
Expand Down

0 comments on commit 5c6b8f3

Please sign in to comment.