Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply Merlin dtypes to Systems code base #239

Merged
merged 15 commits into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion merlin/systems/dag/dictarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
is_list_dtype,
)

import merlin.dtypes as md
from merlin.core.protocols import SeriesLike

try:
Expand Down Expand Up @@ -53,7 +54,7 @@ def __init__(self, values, row_lengths=None):

self.values = _make_array(values)
self.row_lengths = _make_array(row_lengths)
self.dtype = self.values.dtype
self.dtype = md.dtype(self.values.dtype)

if isinstance(self.values, np.ndarray):
self._device = Device.CPU
Expand Down
6 changes: 3 additions & 3 deletions merlin/systems/dag/ops/feast.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def transform(

feature_value = feast_response[feature_name]
feature_array = array_lib.array([feature_value]).T.astype(
self.output_schema[prefixed_name].dtype
self.output_schema[prefixed_name].dtype.to_numpy
)
output_tensors[prefixed_name] = feature_array

Expand All @@ -330,15 +330,15 @@ def transform(

# create a numpy array
feature_array = array_lib.array(feature_value).T.astype(
self.output_schema[prefixed_name].dtype
self.output_schema[prefixed_name].dtype.to_numpy
)

# if we're a list but not ragged, construct row lengths
if not row_lengths:
row_lengths = [len(feature_array)]

feature_row_lengths = array_lib.array(
[row_lengths], dtype=self.output_schema[prefixed_name].dtype
[row_lengths], dtype=self.output_schema[prefixed_name].dtype.to_numpy
).T

output_tensors[prefixed_name] = (feature_array, feature_row_lengths)
Expand Down
21 changes: 17 additions & 4 deletions merlin/systems/triton/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json
import os

import numpy as np
import pandas as pd

# this needs to be before any modules that import protobuf
Expand Down Expand Up @@ -68,25 +69,37 @@ def _convert_df_to_dict(schema, batch, dtype="int32"):

if col_schema.is_ragged:
df_dict[col_name + "__values"] = col.list.leaves.values_host.astype(
col_schema.dtype
col_schema.dtype.to_numpy
)
df_dict[col_name + "__lengths"] = col._column.offsets.values_host.astype(dtype)
else:
values = col.list.leaves.values_host
values = values.reshape(*shape).astype(col_schema.dtype)
values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)
df_dict[col_name] = values

else:
values = col.values if isinstance(col, pd.Series) else col.values_host
values = values.reshape(*shape).astype(col_schema.dtype)
values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)
df_dict[col_name] = values
return df_dict


def _convert_column_to_triton_input(col_name, col_values, input_class=grpcclient.InferInput):
dtype = np_to_triton_dtype(col_values.dtype)
# Triton's mapping of numpy types to Triton types doesn't know how to handle string types,
# so we need to map them to object ourselves before we call np_to_triton_dtype
col_dtype = col_values.dtype
if isinstance(col_dtype, type(np.dtype("str"))):
col_dtype = np.dtype("O")

dtype = np_to_triton_dtype(col_dtype)
input_tensor = input_class(col_name, col_values.shape, dtype)

# set_data_from_numpy checks the type against what was supplied when we created the tensor
# using np_to_triton_dtype, so the workaround above isn't enough to make them match here.
# Do one last `astype` cast to make absolutely sure the dtypes match.
col_values = col_values.astype(col_dtype)
input_tensor.set_data_from_numpy(col_values)

return input_tensor


Expand Down
28 changes: 6 additions & 22 deletions merlin/systems/triton/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import tritonclient.grpc.model_config_pb2 as model_config # noqa

import merlin.dtypes as md # noqa
from merlin.core.dispatch import is_string_dtype # noqa


Expand All @@ -45,31 +46,14 @@ def _add_model_param(col_schema, paramclass, params, dims=None):

def _convert_dtype(dtype):
"""converts a dtype to the appropriate triton proto type"""

if dtype and not isinstance(dtype, str):
dtype_name = dtype.name if hasattr(dtype, "name") else dtype.__name__
else:
dtype_name = dtype

dtypes = {
"float64": model_config.TYPE_FP64,
"float32": model_config.TYPE_FP32,
"float16": model_config.TYPE_FP16,
"int64": model_config.TYPE_INT64,
"int32": model_config.TYPE_INT32,
"int16": model_config.TYPE_INT16,
"int8": model_config.TYPE_INT8,
"uint64": model_config.TYPE_UINT64,
"uint32": model_config.TYPE_UINT32,
"uint16": model_config.TYPE_UINT16,
"uint8": model_config.TYPE_UINT8,
"bool": model_config.TYPE_BOOL,
}
dtype = md.dtype(dtype)
try:
return dtype.to("triton")
except ValueError:
dtype = dtype.to_numpy

if is_string_dtype(dtype):
return model_config.TYPE_STRING
elif dtype_name in dtypes:
return dtypes[dtype_name]
else:
raise ValueError(f"Can't convert {dtype} to a Triton dtype")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_workflow_op_serving_triton(tmpdir, dataset, engine, runtime):
input_data = {}
for col_name, col_schema in workflow.input_schema.column_schemas.items():
col_dtype = col_schema.dtype
input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype)
input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype.to_numpy)
dictarray = DictArray(input_data)
response = wkflow_ensemble.transform(dictarray, runtime=runtime)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_workflow_op_serving_triton(
input_data = {}
inputs = []
for col_name, col_schema in workflow.input_schema.column_schemas.items():
col_dtype = col_schema.dtype
col_dtype = col_schema.dtype.to_numpy
input_data[col_name] = np.array([[2], [3], [4]]).astype(col_dtype)

triton_input = grpcclient.InferInput(
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/systems/ops/feast/test_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pytest

import merlin.dtypes as md
from merlin.dag import ColumnSelector
from merlin.schema import ColumnSchema, Schema
from merlin.systems.dag import DictArray
Expand Down Expand Up @@ -179,8 +180,8 @@ def test_feast_transform(prefix, is_ragged):
)
output_schema = Schema(
[
ColumnSchema(feature_name),
ColumnSchema(feature_mh, is_list=True, is_ragged=is_ragged),
ColumnSchema(feature_name, dtype=md.float32),
ColumnSchema(feature_mh, dtype=md.float32, is_list=True, is_ragged=is_ragged),
]
)

Expand Down
1 change: 0 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ commands =
python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/models.git
python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git@{posargs:main}
python -m pip install .

python -m pytest --cov-report term --cov=merlin -rxs tests/unit

[testenv:test-gpu]
Expand Down