From 979a5f9c5f51b61e4188b67865e7486849a22a47 Mon Sep 17 00:00:00 2001
From: Oliver Holworthy <oholworthy@nvidia.com>
Date: Wed, 12 Apr 2023 11:07:02 +0100
Subject: [PATCH 1/2] Enable CategorifyTransform cpp op to run on int16 types

---
 cpp/nvtabular/inference/categorify.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cpp/nvtabular/inference/categorify.cc b/cpp/nvtabular/inference/categorify.cc
index 734610f073e..90c3c336228 100644
--- a/cpp/nvtabular/inference/categorify.cc
+++ b/cpp/nvtabular/inference/categorify.cc
@@ -93,6 +93,9 @@ namespace nvtabular
           case 'u':
             switch (dtype.itemsize())
             {
+            case 2:
+              insert_int_mapping<uint16_t>(values);
+              return;
             case 4:
               insert_int_mapping<uint32_t>(values);
               return;
@@ -104,6 +107,9 @@ namespace nvtabular
           case 'i':
             switch (dtype.itemsize())
             {
+            case 2:
+              insert_int_mapping<int16_t>(values);
+              return;
             case 4:
               insert_int_mapping<int32_t>(values);
               return;
@@ -198,6 +204,8 @@ namespace nvtabular
           case 'u':
             switch (itemsize)
             {
+            case 2:
+              return transform_int<uint16_t>(input);
             case 4:
               return transform_int<uint32_t>(input);
             case 8:
@@ -207,6 +215,8 @@ namespace nvtabular
           case 'i':
             switch (itemsize)
             {
+            case 2:
+              return transform_int<int16_t>(input);
             case 4:
               return transform_int<int32_t>(input);
             case 8:

From 82fd2f341cc63aaef77d780b0fe601dea302faf2 Mon Sep 17 00:00:00 2001
From: Oliver Holworthy <oholworthy@nvidia.com>
Date: Wed, 12 Apr 2023 11:51:32 +0100
Subject: [PATCH 2/2] Add test for categorify inference op with different types

---
 tests/unit/ops/test_categorify.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/unit/ops/test_categorify.py b/tests/unit/ops/test_categorify.py
index c92030c7e05..2c90488b2b0 100644
--- a/tests/unit/ops/test_categorify.py
+++ b/tests/unit/ops/test_categorify.py
@@ -695,3 +695,29 @@ def test_categorify_joint_list(cpu):
 
     assert compare_a == [1, 5, 2, 3]
     assert compare_e == [2, 3, 1, 4, 1]
+
+
+def test_categorify_inference():
+    num_rows = 100
+    a_char, z_char = np.array(["a", "z"]).view("int32")
+    input_tensors = {
+        "unicode_string": np.random.randint(
+            low=a_char, high=z_char, size=num_rows * 10, dtype="int32"
+        ).view("U10"),
+        "int16_feature": np.random.randint(0, 10, dtype="int16", size=num_rows),
+        "int32_feature": np.random.randint(0, 10, dtype="int32", size=num_rows),
+        "int64_feature": np.random.randint(0, 10, dtype="int64", size=num_rows),
+        "uint16_feature": np.random.randint(0, 10, dtype="uint16", size=num_rows),
+        "uint32_feature": np.random.randint(0, 10, dtype="uint32", size=num_rows),
+        "uint64_feature": np.random.randint(0, 10, dtype="uint64", size=num_rows),
+    }
+    df = dispatch.make_df(input_tensors)
+    cat_names = df.columns
+    cats = cat_names >> nvt.ops.Categorify()
+    workflow = nvt.Workflow(cats)
+    workflow.fit(nvt.Dataset(df))
+    model_config = {}
+    inference_op = cats.op.inference_initialize(cats.input_columns, model_config)
+    output_tensors = inference_op.transform(cats.input_columns, input_tensors)
+    for key in input_tensors:
+        assert output_tensors[key].dtype == np.dtype("int64")