From 65c63738e8c24f9aba0287528a371952871daefe Mon Sep 17 00:00:00 2001
From: Julio <jperez@nvidia.com>
Date: Wed, 21 Jun 2023 18:13:17 -0400
Subject: [PATCH 1/5] updates notebooks for multistage with subgraphs

---
 ...ding-Recommender-Systems-with-Merlin.ipynb | 1693 ++++++++---------
 ...lti-stage-RecSys-with-Merlin-Systems.ipynb |  218 ++-
 2 files changed, 922 insertions(+), 989 deletions(-)

diff --git a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
index dd22a1378..990c568ed 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
@@ -7,7 +7,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n",
+    "# Copyright 2023 NVIDIA Corporation. All Rights Reserved.\n",
     "#\n",
     "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
     "# you may not use this file except in compliance with the License.\n",
@@ -129,11 +129,11 @@
    "outputs": [],
    "source": [
     "# for running this example on GPU, install the following libraries\n",
-    "# %pip install \"feast==0.31\" faiss-gpu\n",
+    "# %pip install \"feast<0.20\" faiss-gpu\n",
     "\n",
     "# for running this example on CPU, uncomment the following lines\n",
-    "# %pip install tensorflow-cpu \"feast==0.31\" faiss-cpu\n",
-    "# %pip uninstall cudf"
+    "# %pip install tensorflow-cpu \"feast<0.20\" faiss-cpu\n",
+    "# %pip uninstall cudf\n"
    ]
   },
   {
@@ -146,61 +146,50 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-20 23:45:23.539085: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
-      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n"
+      "2023-06-21 21:24:41.476144: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n",
-      "[INFO]: sparse_operation_kit is imported\n",
-      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n",
-      "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n",
-      "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n"
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-20 23:45:31.002019: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
+      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
+      "2023-06-21 21:24:43.274327: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
+      "2023-06-21 21:24:43.274369: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 2dca7910ae98\n",
+      "2023-06-21 21:24:43.274380: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 2dca7910ae98\n",
+      "2023-06-21 21:24:43.274481: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program\n",
+      "2023-06-21 21:24:43.274508: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 515.65.1\n",
+      "2023-06-21 21:24:43.621683: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-06-20 23:45:31.232986: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
-      "2023-06-20 23:45:31.233033: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
-      "2023-06-20 23:45:31.233242: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8192 MB memory:  -> device: 0, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:06:00.0, compute capability: 7.0\n",
       "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[SOK INFO] Initialize finished, communication tool: horovod\n"
-     ]
     }
    ],
    "source": [
     "import os\n",
-    "# for running this example on CPU, comment out the line below\n",
-    "os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"\n",
-    "\n",
     "import nvtabular as nvt\n",
     "from nvtabular.ops import Rename, Filter, Dropna, LambdaOp, Categorify, \\\n",
     "    TagAsUserFeatures, TagAsUserID, TagAsItemFeatures, TagAsItemID, AddMetadata\n",
     "\n",
     "from merlin.schema.tags import Tags\n",
-    "\n",
+    "from merlin.dag.ops.subgraph import Subgraph\n",
     "import merlin.models.tf as mm\n",
     "from merlin.io.dataset import Dataset\n",
     "from merlin.datasets.ecommerce import transform_aliccp\n",
     "import tensorflow as tf\n",
     "\n",
-    "import logging"
+    "# for running this example on CPU, comment out the line below\n",
+    "os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\""
    ]
   },
   {
@@ -211,6 +200,8 @@
    "outputs": [],
    "source": [
     "# disable INFO and DEBUG logging everywhere\n",
+    "import logging\n",
+    "\n",
     "logging.disable(logging.WARNING)"
    ]
   },
@@ -251,7 +242,16 @@
    "execution_count": 6,
    "id": "b44b3378-7297-4946-a271-742a9239bc3e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
     "from merlin.datasets.synthetic import generate_data\n",
     "\n",
@@ -269,1078 +269,972 @@
   },
   {
    "cell_type": "markdown",
-   "id": "2e428d01-f2f0-42d4-85d0-0986bb83a847",
-   "metadata": {},
-   "source": [
-    "### Feature Engineering with NVTabular"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "d4bf870c-30cf-4074-88d3-b75981b3a873",
+   "id": "7bd843be-dfba-4f8b-bac1-608e6571352d",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "output_path = os.path.join(DATA_FOLDER, \"processed_nvt\")"
+    "### Set up a feature store with Feast"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "1e7bfb5c-88ed-4cf9-8a17-98c0284adb36",
+   "id": "c543b71c-6ba2-4e43-8779-8bffb62d2cee",
    "metadata": {},
    "source": [
-    "In the following NVTabular workflow, notice that we apply the `Dropna()` Operator at the end. We add the Operator to remove rows with missing values in the final DataFrame after the preceding transformations. Although, the synthetic dataset that we generate and use in this notebook does not have null entries, you might have null entries in your `user_id` and `item_id` columns in your own custom dataset. Therefore, while applying `Dropna()` we will not be registering null `user_id_raw` and `item_id_raw` values in the feature store, and will be avoiding potential issues that can occur because of any null entries."
+    "Before we move onto the next step, we need to create a Feast feature repository. [Feast](https://feast.dev/) is an end-to-end open source feature store for machine learning. Feast (Feature Store) is a customizable operational data system that re-uses existing infrastructure to manage and serve machine learning features to real-time models.\n",
+    "\n",
+    "We will create the feature repo in the current working directory, which is `BASE_DIR` for us."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "f91ada78-4e4d-4415-ab94-e351aa454e9e",
+   "execution_count": 7,
+   "id": "2e7e96d2-9cd2-40d1-b356-8cd76b57bb4a",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Creating a new Feast repository in \u001b[1m\u001b[32m/raid/workshared/merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo\u001b[0m.\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
-    "user_id_raw = [\"user_id\"] >> Rename(postfix='_raw') >> LambdaOp(lambda col: col.astype(\"int32\")) >> TagAsUserFeatures()\n",
-    "item_id_raw = [\"item_id\"] >> Rename(postfix='_raw') >> LambdaOp(lambda col: col.astype(\"int32\")) >> TagAsItemFeatures()\n",
-    "\n",
-    "user_id = [\"user_id\"] >> Categorify(dtype=\"int32\") >> TagAsUserID()\n",
-    "item_id = [\"item_id\"] >> Categorify(dtype=\"int32\") >> TagAsItemID()\n",
-    "\n",
-    "item_features = (\n",
-    "    [\"item_category\", \"item_shop\", \"item_brand\"] >> Categorify(dtype=\"int32\") >> TagAsItemFeatures()\n",
-    ")\n",
-    "\n",
-    "user_features = (\n",
-    "    [\n",
-    "        \"user_shops\",\n",
-    "        \"user_profile\",\n",
-    "        \"user_group\",\n",
-    "        \"user_gender\",\n",
-    "        \"user_age\",\n",
-    "        \"user_consumption_2\",\n",
-    "        \"user_is_occupied\",\n",
-    "        \"user_geography\",\n",
-    "        \"user_intentions\",\n",
-    "        \"user_brands\",\n",
-    "        \"user_categories\",\n",
-    "    ] >> Categorify(dtype=\"int32\") >> TagAsUserFeatures()\n",
-    ")\n",
-    "\n",
-    "targets = [\"click\"] >> AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, \"target\"])\n",
-    "\n",
-    "outputs = user_id + item_id + item_features + user_features + user_id_raw + item_id_raw + targets\n",
-    "\n",
-    "# add dropna op to filter rows with nulls\n",
-    "outputs = outputs >> Dropna()"
+    "!rm -rf $BASE_DIR/feast_repo\n",
+    "!cd $BASE_DIR && feast init feast_repo"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "71aae006-a161-4127-889a-8f433a9f7362",
+   "id": "f6d4d773-144e-4e34-82cd-f2b50fce601c",
    "metadata": {},
    "source": [
-    "Let's call `transform_aliccp` utility function to be able to perform `fit` and `transform` steps on the raw dataset applying the operators defined in the NVTabular workflow pipeline below, and also save our workflow model. After fit and transform, the processed parquet files are saved to output_path."
+    "You should be seeing a message like <i>Creating a new Feast repository in ... </i> printed out above. Now, navigate to the `feature_repo` folder and remove the demo parquet file created by default, and `examples.py` file."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "814e8438-642a-4f03-baaf-44dab8d1b5e5",
+   "execution_count": 8,
+   "id": "26ba2521-ed1b-4c2b-afdd-26b4a5a9c008",
    "metadata": {},
    "outputs": [],
    "source": [
-    "transform_aliccp(\n",
-    "    (train_raw, valid_raw), output_path, nvt_workflow=outputs, workflow_name=\"workflow\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "09c87748-af61-42b8-8574-1afe3d71118f",
-   "metadata": {},
-   "source": [
-    "### Training a Retrieval Model with Two-Tower Model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e644fcba-7b0b-44c0-97fd-80f4fcb01191",
-   "metadata": {},
-   "source": [
-    "We start with the offline candidate retrieval stage. We are going to train a Two-Tower model for item retrieval. To learn more about the Two-tower model you can visit [05-Retrieval-Model.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/05-Retrieval-Model.ipynb)."
+    "feature_repo_path = os.path.join(BASE_DIR, \"feast_repo/feature_repo\")\n",
+    "if os.path.exists(f\"{feature_repo_path}/example_repo.py\"):\n",
+    "    os.remove(f\"{feature_repo_path}/example_repo.py\")\n",
+    "if os.path.exists(f\"{feature_repo_path}/data/driver_stats.parquet\"):\n",
+    "    os.remove(f\"{feature_repo_path}/data/driver_stats.parquet\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cf9bca46-a6b6-4a73-afd8-fe2869c60748",
+   "id": "24ae0e29-c156-4df9-8977-238786160a8c",
    "metadata": {},
    "source": [
-    "#### Feature Engineering with NVTabular"
+    "### Exporting user and item features"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "da2b09cc-09fb-4814-a1cb-7e6168d9eb4b",
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "ea0b369c-2f01-42e3-9f3c-74c3ff4a6d64",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "We are going to process our raw categorical features by encoding them using `Categorify()` operator and tag the features with `user` or `item` tags in the schema file. To learn more about [NVTabular](https://github.com/NVIDIA-Merlin/NVTabular) and the schema object visit this example [notebook](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/02-Merlin-Models-and-NVTabular-integration.ipynb) in the Merlin Models repo."
+    "from merlin.models.utils.dataset import unique_rows_by_features\n",
+    "\n",
+    "user_features = (\n",
+    "    unique_rows_by_features(train_raw, Tags.USER, Tags.USER_ID)\n",
+    "    .compute()\n",
+    "    .reset_index(drop=True)\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "f3bc7abd-8d97-452b-a4af-5227821a99c9",
+   "id": "4f2d12f5-c753-4392-b113-965d97d2fe35",
    "metadata": {},
    "source": [
-    "Define a new output path to store the filtered datasets and schema files."
+    "We will artificially add `datetime` and `created` timestamp columns to our user_features dataframe. This required by Feast to track the user-item features and their creation time and to determine which version to use when we query Feast."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "id": "df72a793-194b-44f4-80c3-aaa368a9a01e",
+   "id": "d30bd2f8-8a78-4df7-9bc4-42bd741c5b99",
    "metadata": {},
    "outputs": [],
    "source": [
-    "output_path2 = os.path.join(DATA_FOLDER, \"processed/retrieval\")"
+    "from datetime import datetime\n",
+    "\n",
+    "user_features[\"datetime\"] = datetime.now()\n",
+    "user_features[\"datetime\"] = user_features[\"datetime\"].astype(\"datetime64[ns]\")\n",
+    "user_features[\"created\"] = datetime.now()\n",
+    "user_features[\"created\"] = user_features[\"created\"].astype(\"datetime64[ns]\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "id": "251d4697-8f9c-4c93-8de4-c3480a8378de",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_tt = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"))\n",
-    "valid_tt = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ffd7e2ac-a251-49d0-943b-e9272c852ba6",
+   "id": "d4998cd1-9dcd-4911-8f23-372e197b41e9",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>user_shops</th>\n",
+       "      <th>user_profile</th>\n",
+       "      <th>user_group</th>\n",
+       "      <th>user_gender</th>\n",
+       "      <th>user_age</th>\n",
+       "      <th>user_consumption_1</th>\n",
+       "      <th>user_consumption_2</th>\n",
+       "      <th>user_is_occupied</th>\n",
+       "      <th>user_geography</th>\n",
+       "      <th>user_intentions</th>\n",
+       "      <th>user_brands</th>\n",
+       "      <th>user_categories</th>\n",
+       "      <th>datetime</th>\n",
+       "      <th>created</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>7</td>\n",
+       "      <td>658</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>191</td>\n",
+       "      <td>327</td>\n",
+       "      <td>35</td>\n",
+       "      <td>2023-06-21 21:24:49.082804</td>\n",
+       "      <td>2023-06-21 21:24:49.085539</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    user_id  user_shops  user_profile  user_group  user_gender  user_age  \\\n",
+       "38        7         658             1           1            1         1   \n",
+       "\n",
+       "    user_consumption_1  user_consumption_2  user_is_occupied  user_geography  \\\n",
+       "38                   1                   1                 1               1   \n",
+       "\n",
+       "    user_intentions  user_brands  user_categories                   datetime  \\\n",
+       "38              191          327               35 2023-06-21 21:24:49.082804   \n",
+       "\n",
+       "                      created  \n",
+       "38 2023-06-21 21:24:49.085539  "
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "We select only positive interaction rows where `click==1` in the dataset with `Filter()` operator."
+    "user_features[user_features[\"user_id\"] == 7]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "id": "7e085a6d-74ad-4c24-8e7c-4e449c15f471",
+   "id": "2981b3ed-6156-49f0-aa14-326a3853a58a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "inputs = train_tt.schema.column_names\n",
-    "outputs = inputs >> Filter(f=lambda df: df[\"click\"] == 1)\n",
-    "\n",
-    "workflow2 = nvt.Workflow(outputs)\n",
-    "\n",
-    "workflow2.fit(train_tt)\n",
-    "\n",
-    "workflow2.transform(train_tt).to_parquet(\n",
-    "    output_path=os.path.join(output_path2, \"train\")\n",
-    ")\n",
-    "\n",
-    "workflow2.transform(valid_tt).to_parquet(\n",
-    "    output_path=os.path.join(output_path2, \"valid\")\n",
+    "user_features.to_parquet(\n",
+    "    os.path.join(feature_repo_path, \"data\", \"user_features.parquet\")\n",
     ")"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "cc4721ae-7228-4d3f-9586-dcdfefecc19f",
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0a33a668-8e2a-4546-8f54-0060d405ba91",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "NVTabular exported the schema file, `schema.pbtxt` a protobuf text file, of our processed dataset. To learn more about the schema object and schema file you can explore [02-Merlin-Models-and-NVTabular-integration.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/02-Merlin-Models-and-NVTabular-integration.ipynb) notebook."
+    "item_features = (\n",
+    "    unique_rows_by_features(train_raw, Tags.ITEM, Tags.ITEM_ID)\n",
+    "    .compute()\n",
+    "    .reset_index(drop=True)\n",
+    ")"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "aa025b80-0f18-437c-a85f-4edcb89f4222",
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "68a694d6-926f-4b0f-8edc-8cc7ac85ade7",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "**Read filtered parquet files as Dataset objects.**"
+    "item_features[\"datetime\"] = datetime.now()\n",
+    "item_features[\"datetime\"] = item_features[\"datetime\"].astype(\"datetime64[ns]\")\n",
+    "item_features[\"created\"] = datetime.now()\n",
+    "item_features[\"created\"] = item_features[\"created\"].astype(\"datetime64[ns]\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "252a8e60-b447-46b5-ade6-3557cbafa797",
+   "execution_count": 15,
+   "id": "6c03fa22-b112-4243-bbe1-1cd7260cb85b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>item_id</th>\n",
+       "      <th>item_category</th>\n",
+       "      <th>item_shop</th>\n",
+       "      <th>item_brand</th>\n",
+       "      <th>item_intention</th>\n",
+       "      <th>datetime</th>\n",
+       "      <th>created</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>29</td>\n",
+       "      <td>138</td>\n",
+       "      <td>9672</td>\n",
+       "      <td>3331</td>\n",
+       "      <td>1541</td>\n",
+       "      <td>2023-06-21 21:24:49.145983</td>\n",
+       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1037</td>\n",
+       "      <td>357</td>\n",
+       "      <td>166</td>\n",
+       "      <td>2023-06-21 21:24:49.145983</td>\n",
+       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>17</td>\n",
+       "      <td>79</td>\n",
+       "      <td>5527</td>\n",
+       "      <td>1904</td>\n",
+       "      <td>881</td>\n",
+       "      <td>2023-06-21 21:24:49.145983</td>\n",
+       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>155</td>\n",
+       "      <td>756</td>\n",
+       "      <td>53196</td>\n",
+       "      <td>18320</td>\n",
+       "      <td>8471</td>\n",
+       "      <td>2023-06-21 21:24:49.145983</td>\n",
+       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>19</td>\n",
+       "      <td>89</td>\n",
+       "      <td>6218</td>\n",
+       "      <td>2142</td>\n",
+       "      <td>991</td>\n",
+       "      <td>2023-06-21 21:24:49.145983</td>\n",
+       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   item_id  item_category  item_shop  item_brand  item_intention  \\\n",
+       "0       29            138       9672        3331            1541   \n",
+       "1        4             15       1037         357             166   \n",
+       "2       17             79       5527        1904             881   \n",
+       "3      155            756      53196       18320            8471   \n",
+       "4       19             89       6218        2142             991   \n",
+       "\n",
+       "                    datetime                    created  \n",
+       "0 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
+       "1 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
+       "2 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
+       "3 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
+       "4 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  "
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "item_features.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "c312884b-a1f8-4e08-8068-696e06a9bf46",
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_tt = Dataset(os.path.join(output_path2, \"train\", \"*.parquet\"), part_size=\"500MB\")\n",
-    "valid_tt = Dataset(os.path.join(output_path2, \"valid\", \"*.parquet\"), part_size=\"500MB\")"
+    "# save to disk\n",
+    "item_features.to_parquet(\n",
+    "    os.path.join(feature_repo_path, \"data\", \"item_features.parquet\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2e428d01-f2f0-42d4-85d0-0986bb83a847",
+   "metadata": {},
+   "source": [
+    "### Feature Engineering with NVTabular"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "71063653-2f39-4b54-8399-145d6f281d4d",
+   "execution_count": 17,
+   "id": "d4bf870c-30cf-4074-88d3-b75981b3a873",
    "metadata": {},
    "outputs": [],
    "source": [
-    "schema = train_tt.schema.select_by_tag([Tags.ITEM_ID, Tags.USER_ID, Tags.ITEM, Tags.USER]).without(['user_id_raw', 'item_id_raw', 'click'])\n",
-    "train_tt.schema = schema\n",
-    "valid_tt.schema = schema"
+    "output_path = os.path.join(DATA_FOLDER, \"processed_nvt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e7bfb5c-88ed-4cf9-8a17-98c0284adb36",
+   "metadata": {},
+   "source": [
+    "In the following NVTabular workflow, notice that we apply the `Dropna()` Operator at the end. We add the Operator to remove rows with missing values in the final DataFrame after the preceding transformations. Although, the synthetic dataset that we generate and use in this notebook does not have null entries, you might have null entries in your `user_id` and `item_id` columns in your own custom dataset. Therefore, while applying `Dropna()` we will not be registering null `user_id_raw` and `item_id_raw` values in the feature store, and will be avoiding potential issues that can occur because of any null entries."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "9312511a-f368-42f2-93d2-eb95aebbf46c",
+   "execution_count": 18,
+   "id": "f91ada78-4e4d-4415-ab94-e351aa454e9e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_tt = mm.TwoTowerModel(\n",
-    "    schema,\n",
-    "    query_tower=mm.MLPBlock([128, 64], no_activation_last_layer=True),\n",
-    "    samplers=[mm.InBatchSampler()],\n",
-    "    embedding_options=mm.EmbeddingOptions(infer_embedding_sizes=True),\n",
-    ")"
+    "user_id_raw = [\"user_id\"] >> Rename(postfix='_raw') >> LambdaOp(lambda col: col.astype(\"int32\")) >> TagAsUserFeatures()\n",
+    "item_id_raw = [\"item_id\"] >> Rename(postfix='_raw') >> LambdaOp(lambda col: col.astype(\"int32\")) >> TagAsItemFeatures()\n",
+    "\n",
+    "\n",
+    "item_cat = Categorify(dtype=\"int32\")\n",
+    "items = ([\"item_id\",\"item_category\", \"item_shop\", \"item_brand\"] >> item_cat)\n",
+    "\n",
+    "subgraph_item = Subgraph(\n",
+    "     \"item\", \n",
+    "     Subgraph(\"items_cat\", items) + \n",
+    "    (items[\"item_id\"] >> TagAsItemID()) + \n",
+    "    (items[\"item_category\", \"item_shop\", \"item_brand\"] >> TagAsItemFeatures())\n",
+    ")\n",
+    "subgraph_user = Subgraph(\n",
+    "    \"user\",\n",
+    "    ([\"user_id\"] >> Categorify(dtype=\"int32\") >> TagAsUserID()) +\n",
+    "    (\n",
+    "        [\n",
+    "            \"user_shops\",\n",
+    "            \"user_profile\",\n",
+    "            \"user_group\",\n",
+    "            \"user_gender\",\n",
+    "            \"user_age\",\n",
+    "            \"user_consumption_2\",\n",
+    "            \"user_is_occupied\",\n",
+    "            \"user_geography\",\n",
+    "            \"user_intentions\",\n",
+    "            \"user_brands\",\n",
+    "            \"user_categories\",\n",
+    "        ] >> Categorify(dtype=\"int32\") >> TagAsUserFeatures()\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "targets = [\"click\"] >> AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, \"target\"])\n",
+    "outputs = subgraph_user + subgraph_item + targets\n",
+    "\n",
+    "# add dropna op to filter rows with nulls\n",
+    "outputs = outputs >> Dropna()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "71aae006-a161-4127-889a-8f433a9f7362",
+   "metadata": {},
+   "source": [
+    "Let's call `transform_aliccp` utility function to be able to perform `fit` and `transform` steps on the raw dataset applying the operators defined in the NVTabular workflow pipeline below, and also save our workflow model. After fit and transform, the processed parquet files are saved to output_path."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "4d47cb8b-e06a-4932-9a19-fb244ef43152",
+   "execution_count": 19,
+   "id": "814e8438-642a-4f03-baaf-44dab8d1b5e5",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
       "  warnings.warn(\n"
      ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "5/5 [==============================] - 18s 712ms/step - loss: 8.9090 - recall_at_10: 0.0069 - ndcg_at_10: 0.0045 - regularization_loss: 0.0000e+00 - loss_batch: 8.5771 - val_loss: 8.9027 - val_recall_at_10: 0.0113 - val_ndcg_at_10: 0.0072 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.7921\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<keras.callbacks.History at 0x7f784806fb80>"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "model_tt.compile(\n",
-    "    optimizer=\"adam\",\n",
-    "    run_eagerly=False,\n",
-    "    loss=\"categorical_crossentropy\",\n",
-    "    metrics=[mm.RecallAt(10), mm.NDCGAt(10)],\n",
-    ")\n",
-    "model_tt.fit(train_tt, validation_data=valid_tt, batch_size=1024 * 8, epochs=1)"
+    "transform_aliccp(\n",
+    "    (train_raw, valid_raw), output_path, nvt_workflow=outputs, workflow_name=\"workflow\"\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "80d83007-f9e8-408f-9f65-a0e9e19cb586",
+   "id": "09c87748-af61-42b8-8574-1afe3d71118f",
    "metadata": {},
    "source": [
-    "### Exporting query (user) model"
+    "### Training a Retrieval Model with Two-Tower Model"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "22af58a9-5525-454a-bf25-a9df0462aa53",
-   "metadata": {},
-   "source": [
-    "We export the query tower to use it later during the model deployment stage with Merlin Systems."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "d2370f13-ff9a-4ee0-ba1e-451c7bec0f8a",
+   "id": "e644fcba-7b0b-44c0-97fd-80f4fcb01191",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "query_tower = model_tt.retrieval_block.query_block()\n",
-    "query_tower.save(os.path.join(BASE_DIR, \"query_tower\"))"
+    "We start with the offline candidate retrieval stage. We are going to train a Two-Tower model for item retrieval. To learn more about the Two-tower model you can visit [05-Retrieval-Model.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/05-Retrieval-Model.ipynb)."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "e16401d4",
-   "metadata": {
-    "tags": []
-   },
+   "id": "cf9bca46-a6b6-4a73-afd8-fe2869c60748",
+   "metadata": {},
    "source": [
-    "### Training a Ranking Model with DLRM"
+    "#### Feature Engineering with NVTabular"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b72e8a2a-fc4a-43ab-934c-6d941c56aad2",
+   "id": "da2b09cc-09fb-4814-a1cb-7e6168d9eb4b",
    "metadata": {},
    "source": [
-    "Now we will move onto training an offline ranking model. This ranking model will be used for scoring our retrieved items."
+    "We are going to process our raw categorical features by encoding them using `Categorify()` operator and tag the features with `user` or `item` tags in the schema file. To learn more about [NVTabular](https://github.com/NVIDIA-Merlin/NVTabular) and the schema object visit this example [notebook](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/02-Merlin-Models-and-NVTabular-integration.ipynb) in the Merlin Models repo."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "c4f2b234",
+   "id": "f3bc7abd-8d97-452b-a4af-5227821a99c9",
    "metadata": {},
    "source": [
-    "Read processed parquet files. We use the `schema` object to define our model."
+    "Define a new output path to store the filtered datasets and schema files."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "cb870461-6ac2-49b2-ba6a-2da6ecb57f1d",
+   "execution_count": 20,
+   "id": "df72a793-194b-44f4-80c3-aaa368a9a01e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# define train and valid dataset objects\n",
-    "train = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"), part_size=\"500MB\")\n",
-    "valid = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"), part_size=\"500MB\")\n",
-    "\n",
-    "# define schema object\n",
-    "schema = train.schema.without(['user_id_raw', 'item_id_raw'])"
+    "output_path2 = os.path.join(DATA_FOLDER, \"processed/retrieval\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "id": "30e4ebc2",
+   "execution_count": 21,
+   "id": "251d4697-8f9c-4c93-8de4-c3480a8378de",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "'click'"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
     }
    ],
    "source": [
-    "target_column = schema.select_by_tag(Tags.TARGET).column_names[0]\n",
-    "target_column"
+    "train_tt = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"))\n",
+    "valid_tt = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"))"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "8f68e26b",
-   "metadata": {},
-   "source": [
-    "Deep Learning Recommendation Model [(DLRM)](https://arxiv.org/abs/1906.00091) architecture is a popular neural network model originally proposed by Facebook in 2019. The model was introduced as a personalization deep learning model that uses embeddings to process sparse features that represent categorical data and a multilayer perceptron (MLP) to process dense features, then interacts these features explicitly using the statistical techniques proposed in [here](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=5694074). To learn more about DLRM architetcture please visit `Exploring-different-models` [notebook](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/04-Exporting-ranking-models.ipynb) in the Merlin Models GH repo."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "e4325080",
+   "id": "ffd7e2ac-a251-49d0-943b-e9272c852ba6",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "model = mm.DLRMModel(\n",
-    "    schema,\n",
-    "    embedding_dim=64,\n",
-    "    bottom_block=mm.MLPBlock([128, 64]),\n",
-    "    top_block=mm.MLPBlock([128, 64, 32]),\n",
-    "    prediction_tasks=mm.BinaryClassificationTask(target_column),\n",
-    ")"
+    "We select only positive interaction rows where `click==1` in the dataset with `Filter()` operator."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "id": "bfe2aa9e",
+   "execution_count": 22,
+   "id": "7e085a6d-74ad-4c24-8e7c-4e449c15f471",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "5/5 [==============================] - 9s 519ms/step - loss: 0.6932 - auc: 0.5008 - regularization_loss: 0.0000e+00 - loss_batch: 0.6931 - val_loss: 0.6932 - val_auc: 0.5034 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6932\n"
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<keras.callbacks.History at 0x7f77ea27f700>"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "model.compile(optimizer=\"adam\", run_eagerly=False, metrics=[tf.keras.metrics.AUC()])\n",
-    "model.fit(train, validation_data=valid, batch_size=16 * 1024)"
+    "inputs = train_tt.schema.column_names\n",
+    "outputs = inputs >> Filter(f=lambda df: df[\"click\"] == 1)\n",
+    "\n",
+    "workflow2 = nvt.Workflow(outputs)\n",
+    "\n",
+    "workflow2.fit(train_tt)\n",
+    "\n",
+    "workflow2.transform(train_tt).to_parquet(\n",
+    "    output_path=os.path.join(output_path2, \"train\")\n",
+    ")\n",
+    "\n",
+    "workflow2.transform(valid_tt).to_parquet(\n",
+    "    output_path=os.path.join(output_path2, \"valid\")\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "498c4d49-7a59-4260-87b9-b86b66f2c67f",
+   "id": "cc4721ae-7228-4d3f-9586-dcdfefecc19f",
    "metadata": {},
    "source": [
-    "Let's save our DLRM model to be able to load back at the deployment stage. "
+    "NVTabular exported the schema file, `schema.pbtxt` a protobuf text file, of our processed dataset. To learn more about the schema object and schema file you can explore [02-Merlin-Models-and-NVTabular-integration.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/02-Merlin-Models-and-NVTabular-integration.ipynb) notebook."
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "00447c12-ea80-4d98-ab47-cc1a982a6958",
+   "cell_type": "markdown",
+   "id": "aa025b80-0f18-437c-a85f-4edcb89f4222",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "model.save(os.path.join(BASE_DIR, \"dlrm\"))"
+    "**Read filtered parquet files as Dataset objects.**"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "d64a3f3f-81d8-489c-835f-c62f76df22d5",
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "252a8e60-b447-46b5-ade6-3557cbafa797",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "In the following cells we are going to export the required user and item features files, and save the query (user) tower model and item embeddings to disk. If you want to read more about exporting retrieval models, please visit [05-Retrieval-Model.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/05-Retrieval-Model.ipynb) notebook in Merlin Models library repo."
+    "train_tt = Dataset(os.path.join(output_path2, \"train\", \"*.parquet\"), part_size=\"500MB\")\n",
+    "valid_tt = Dataset(os.path.join(output_path2, \"valid\", \"*.parquet\"), part_size=\"500MB\")"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "5da1f434-f5a1-4478-b588-7e7ec17e6a88",
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "71063653-2f39-4b54-8399-145d6f281d4d",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "### Set up a feature store with Feast"
+    "schema = train_tt.schema.select_by_tag([Tags.ITEM_ID, Tags.USER_ID, Tags.ITEM, Tags.USER]).without(['user_id_raw', 'item_id_raw', 'click'])\n",
+    "train_tt.schema = schema\n",
+    "valid_tt.schema = schema"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "99a4e939-d3cf-44f0-9012-d2af3264ee25",
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "9312511a-f368-42f2-93d2-eb95aebbf46c",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Before we move onto the next step, we need to create a Feast feature repository. [Feast](https://feast.dev/) is an end-to-end open source feature store for machine learning. Feast (Feature Store) is a customizable operational data system that re-uses existing infrastructure to manage and serve machine learning features to real-time models.\n",
-    "\n",
-    "We will create the feature repo in the current working directory, which is `BASE_DIR` for us."
+    "model_tt = mm.TwoTowerModel(\n",
+    "    schema,\n",
+    "    query_tower=mm.MLPBlock([128, 64], no_activation_last_layer=True),\n",
+    "    samplers=[mm.InBatchSampler()],\n",
+    "    embedding_options=mm.EmbeddingOptions(infer_embedding_sizes=True),\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "id": "2e7e96d2-9cd2-40d1-b356-8cd76b57bb4a",
+   "execution_count": 26,
+   "id": "4d47cb8b-e06a-4932-9a19-fb244ef43152",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "  warnings.warn(\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "Creating a new Feast repository in \u001b[1m\u001b[32m/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo\u001b[0m.\n",
-      "\n"
+      "5/5 [==============================] - 13s 1s/step - loss: 8.9092 - recall_at_10: 0.0076 - ndcg_at_10: 0.0058 - regularization_loss: 0.0000e+00 - loss_batch: 8.5704 - val_loss: 8.9050 - val_recall_at_10: 0.0121 - val_ndcg_at_10: 0.0097 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.7986\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x7fa6141c7940>"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "!rm -rf $BASE_DIR/feast_repo\n",
-    "!cd $BASE_DIR && feast init feast_repo"
+    "model_tt.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    run_eagerly=False,\n",
+    "    loss=\"categorical_crossentropy\",\n",
+    "    metrics=[mm.RecallAt(10), mm.NDCGAt(10)],\n",
+    ")\n",
+    "model_tt.fit(train_tt, validation_data=valid_tt, batch_size=1024 * 8, epochs=1)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "5e630e53-8336-487a-9ceb-133b1538acfb",
+   "id": "80d83007-f9e8-408f-9f65-a0e9e19cb586",
    "metadata": {},
    "source": [
-    "You should be seeing a message like <i>Creating a new Feast repository in ... </i> printed out above. Now, navigate to the `feature_repo` folder and remove the demo parquet file created by default, and `examples.py` file."
+    "### Exporting query (user) model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22af58a9-5525-454a-bf25-a9df0462aa53",
+   "metadata": {},
+   "source": [
+    "We export the query tower to use it later during the model deployment stage with Merlin Systems."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "id": "26ba2521-ed1b-4c2b-afdd-26b4a5a9c008",
+   "execution_count": 27,
+   "id": "d2370f13-ff9a-4ee0-ba1e-451c7bec0f8a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "feature_repo_path = os.path.join(BASE_DIR, \"feast_repo/feature_repo\")\n",
-    "if os.path.exists(f\"{feature_repo_path}/example_repo.py\"):\n",
-    "    os.remove(f\"{feature_repo_path}/example_repo.py\")\n",
-    "if os.path.exists(f\"{feature_repo_path}/data/driver_stats.parquet\"):\n",
-    "    os.remove(f\"{feature_repo_path}/data/driver_stats.parquet\")"
+    "query_tower = model_tt.retrieval_block.query_block()\n",
+    "query_tower.save(os.path.join(BASE_DIR, \"query_tower\"))"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "78315676-eb6c-405a-b1fd-3174ea328406",
+   "id": "e16401d4",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Training a Ranking Model with DLRM"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b72e8a2a-fc4a-43ab-934c-6d941c56aad2",
    "metadata": {},
    "source": [
-    "### Exporting user and item features"
+    "Now we will move onto training an offline ranking model. This ranking model will be used for scoring our retrieved items."
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "ea0b369c-2f01-42e3-9f3c-74c3ff4a6d64",
+   "cell_type": "markdown",
+   "id": "c4f2b234",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "from merlin.models.utils.dataset import unique_rows_by_features\n",
-    "\n",
-    "user_features = (\n",
-    "    unique_rows_by_features(train, Tags.USER, Tags.USER_ID)\n",
-    "    .compute()\n",
-    "    .reset_index(drop=True)\n",
-    ")"
+    "Read processed parquet files. We use the `schema` object to define our model."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "id": "6b0949f9-e67a-414f-9d74-65f138e820a8",
+   "execution_count": 28,
+   "id": "cb870461-6ac2-49b2-ba6a-2da6ecb57f1d",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>user_id</th>\n",
-       "      <th>user_shops</th>\n",
-       "      <th>user_profile</th>\n",
-       "      <th>user_group</th>\n",
-       "      <th>user_gender</th>\n",
-       "      <th>user_age</th>\n",
-       "      <th>user_consumption_2</th>\n",
-       "      <th>user_is_occupied</th>\n",
-       "      <th>user_geography</th>\n",
-       "      <th>user_intentions</th>\n",
-       "      <th>user_brands</th>\n",
-       "      <th>user_categories</th>\n",
-       "      <th>user_id_raw</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>9</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# define train and valid dataset objects\n",
+    "train = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"), part_size=\"500MB\")\n",
+    "valid = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"), part_size=\"500MB\")\n",
+    "\n",
+    "# define schema object\n",
+    "schema = train.schema.without(['user_id_raw', 'item_id_raw'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "30e4ebc2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
       "text/plain": [
-       "   user_id  user_shops  user_profile  user_group  user_gender  user_age  \\\n",
-       "0        3           3             3           3            3         3   \n",
-       "1        4           4             3           3            3         3   \n",
-       "2        5           5             3           3            3         3   \n",
-       "3        6           6             3           3            3         3   \n",
-       "4        7           7             3           3            3         3   \n",
-       "\n",
-       "   user_consumption_2  user_is_occupied  user_geography  user_intentions  \\\n",
-       "0                   3                 3               3                3   \n",
-       "1                   3                 3               3                4   \n",
-       "2                   3                 3               3                5   \n",
-       "3                   3                 3               3                6   \n",
-       "4                   3                 3               3                7   \n",
-       "\n",
-       "   user_brands  user_categories  user_id_raw  \n",
-       "0            3                3            6  \n",
-       "1            4                4            8  \n",
-       "2            5                5            7  \n",
-       "3            6                6            5  \n",
-       "4            7                7            9  "
+       "'click'"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "user_features.head()"
+    "target_column = schema.select_by_tag(Tags.TARGET).column_names[0]\n",
+    "target_column"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "4a46bd8c-1337-4c74-a85b-25348a897d90",
+   "id": "8f68e26b",
    "metadata": {},
    "source": [
-    "We will artificially add `datetime` and `created` timestamp columns to our user_features dataframe. This required by Feast to track the user-item features and their creation time and to determine which version to use when we query Feast."
+    "Deep Learning Recommendation Model [(DLRM)](https://arxiv.org/abs/1906.00091) architecture is a popular neural network model originally proposed by Facebook in 2019. The model was introduced as a personalization deep learning model that uses embeddings to process sparse features that represent categorical data and a multilayer perceptron (MLP) to process dense features, then interacts these features explicitly using the statistical techniques proposed in [here](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=5694074). To learn more about DLRM architetcture please visit `Exploring-different-models` [notebook](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/04-Exporting-ranking-models.ipynb) in the Merlin Models GH repo."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "id": "d30bd2f8-8a78-4df7-9bc4-42bd741c5b99",
+   "execution_count": 30,
+   "id": "e4325080",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from datetime import datetime\n",
-    "\n",
-    "user_features[\"datetime\"] = datetime.now()\n",
-    "user_features[\"datetime\"] = user_features[\"datetime\"].astype(\"datetime64[ns]\")\n",
-    "user_features[\"created\"] = datetime.now()\n",
-    "user_features[\"created\"] = user_features[\"created\"].astype(\"datetime64[ns]\")"
+    "model = mm.DLRMModel(\n",
+    "    schema,\n",
+    "    embedding_dim=64,\n",
+    "    bottom_block=mm.MLPBlock([128, 64]),\n",
+    "    top_block=mm.MLPBlock([128, 64, 32]),\n",
+    "    prediction_tasks=mm.BinaryClassificationTask(target_column),\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "id": "d4998cd1-9dcd-4911-8f23-372e197b41e9",
+   "execution_count": 31,
+   "id": "bfe2aa9e",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5/5 [==============================] - 5s 271ms/step - loss: 0.6932 - auc: 0.4989 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.4994 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6932\n"
+     ]
+    },
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>user_id</th>\n",
-       "      <th>user_shops</th>\n",
-       "      <th>user_profile</th>\n",
-       "      <th>user_group</th>\n",
-       "      <th>user_gender</th>\n",
-       "      <th>user_age</th>\n",
-       "      <th>user_consumption_2</th>\n",
-       "      <th>user_is_occupied</th>\n",
-       "      <th>user_geography</th>\n",
-       "      <th>user_intentions</th>\n",
-       "      <th>user_brands</th>\n",
-       "      <th>user_categories</th>\n",
-       "      <th>user_id_raw</th>\n",
-       "      <th>datetime</th>\n",
-       "      <th>created</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>6</td>\n",
-       "      <td>2023-06-20 23:47:09.436667</td>\n",
-       "      <td>2023-06-20 23:47:09.438518</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>2023-06-20 23:47:09.436667</td>\n",
-       "      <td>2023-06-20 23:47:09.438518</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>7</td>\n",
-       "      <td>2023-06-20 23:47:09.436667</td>\n",
-       "      <td>2023-06-20 23:47:09.438518</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2023-06-20 23:47:09.436667</td>\n",
-       "      <td>2023-06-20 23:47:09.438518</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>9</td>\n",
-       "      <td>2023-06-20 23:47:09.436667</td>\n",
-       "      <td>2023-06-20 23:47:09.438518</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "   user_id  user_shops  user_profile  user_group  user_gender  user_age  \\\n",
-       "0        3           3             3           3            3         3   \n",
-       "1        4           4             3           3            3         3   \n",
-       "2        5           5             3           3            3         3   \n",
-       "3        6           6             3           3            3         3   \n",
-       "4        7           7             3           3            3         3   \n",
-       "\n",
-       "   user_consumption_2  user_is_occupied  user_geography  user_intentions  \\\n",
-       "0                   3                 3               3                3   \n",
-       "1                   3                 3               3                4   \n",
-       "2                   3                 3               3                5   \n",
-       "3                   3                 3               3                6   \n",
-       "4                   3                 3               3                7   \n",
-       "\n",
-       "   user_brands  user_categories  user_id_raw                   datetime  \\\n",
-       "0            3                3            6 2023-06-20 23:47:09.436667   \n",
-       "1            4                4            8 2023-06-20 23:47:09.436667   \n",
-       "2            5                5            7 2023-06-20 23:47:09.436667   \n",
-       "3            6                6            5 2023-06-20 23:47:09.436667   \n",
-       "4            7                7            9 2023-06-20 23:47:09.436667   \n",
-       "\n",
-       "                     created  \n",
-       "0 2023-06-20 23:47:09.438518  \n",
-       "1 2023-06-20 23:47:09.438518  \n",
-       "2 2023-06-20 23:47:09.438518  \n",
-       "3 2023-06-20 23:47:09.438518  \n",
-       "4 2023-06-20 23:47:09.438518  "
+       "<keras.callbacks.History at 0x7fa5f26b69d0>"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "user_features.head()"
+    "model.compile(optimizer=\"adam\", run_eagerly=False, metrics=[tf.keras.metrics.AUC()])\n",
+    "model.fit(train, validation_data=valid, batch_size=16 * 1024)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "2981b3ed-6156-49f0-aa14-326a3853a58a",
+   "cell_type": "markdown",
+   "id": "498c4d49-7a59-4260-87b9-b86b66f2c67f",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "user_features.to_parquet(os.path.join(feature_repo_path, \"data\", \"user_features.parquet\"))"
+    "Let's save our DLRM model to be able to load back at the deployment stage. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
-   "id": "0a33a668-8e2a-4546-8f54-0060d405ba91",
+   "execution_count": 32,
+   "id": "00447c12-ea80-4d98-ab47-cc1a982a6958",
    "metadata": {},
    "outputs": [],
    "source": [
-    "item_features = (\n",
-    "    unique_rows_by_features(train, Tags.ITEM, Tags.ITEM_ID)\n",
-    "    .compute()\n",
-    "    .reset_index(drop=True)\n",
-    ")"
+    "model.save(os.path.join(BASE_DIR, \"dlrm\"))"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "68a694d6-926f-4b0f-8edc-8cc7ac85ade7",
+   "cell_type": "markdown",
+   "id": "d64a3f3f-81d8-489c-835f-c62f76df22d5",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "item_features[\"datetime\"] = datetime.now()\n",
-    "item_features[\"datetime\"] = item_features[\"datetime\"].astype(\"datetime64[ns]\")\n",
-    "item_features[\"created\"] = datetime.now()\n",
-    "item_features[\"created\"] = item_features[\"created\"].astype(\"datetime64[ns]\")"
+    "In the following cells we are going to export the required user and item features files, and save the query (user) tower model and item embeddings to disk. If you want to read more about exporting retrieval models, please visit [05-Retrieval-Model.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/05-Retrieval-Model.ipynb) notebook in Merlin Models library repo."
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "6c03fa22-b112-4243-bbe1-1cd7260cb85b",
+   "cell_type": "markdown",
+   "id": "ff30ceab-b264-4509-9c5b-5a10425e143b",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>item_id</th>\n",
-       "      <th>item_category</th>\n",
-       "      <th>item_shop</th>\n",
-       "      <th>item_brand</th>\n",
-       "      <th>item_id_raw</th>\n",
-       "      <th>datetime</th>\n",
-       "      <th>created</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>6</td>\n",
-       "      <td>2023-06-20 23:47:09.557793</td>\n",
-       "      <td>2023-06-20 23:47:09.559325</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>7</td>\n",
-       "      <td>2023-06-20 23:47:09.557793</td>\n",
-       "      <td>2023-06-20 23:47:09.559325</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>10</td>\n",
-       "      <td>2023-06-20 23:47:09.557793</td>\n",
-       "      <td>2023-06-20 23:47:09.559325</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>8</td>\n",
-       "      <td>2023-06-20 23:47:09.557793</td>\n",
-       "      <td>2023-06-20 23:47:09.559325</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2023-06-20 23:47:09.557793</td>\n",
-       "      <td>2023-06-20 23:47:09.559325</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   item_id  item_category  item_shop  item_brand  item_id_raw  \\\n",
-       "0        3              3          3           3            6   \n",
-       "1        4              4          4           4            7   \n",
-       "2        5              5          5           5           10   \n",
-       "3        6              6          6           6            8   \n",
-       "4        7              7          7           7            5   \n",
-       "\n",
-       "                    datetime                    created  \n",
-       "0 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325  \n",
-       "1 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325  \n",
-       "2 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325  \n",
-       "3 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325  \n",
-       "4 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325  "
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "item_features.head()"
+    "### Extract and save Item embeddings"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 33,
-   "id": "c312884b-a1f8-4e08-8068-696e06a9bf46",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save to disk\n",
-    "item_features.to_parquet(\n",
-    "    os.path.join(feature_repo_path, \"data\", \"item_features.parquet\")\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ff30ceab-b264-4509-9c5b-5a10425e143b",
+   "id": "e62f65f8-e8f1-447e-9500-5960807c36f2",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "### Extract and save Item embeddings"
+    "nvt_wkflow = nvt.Workflow.load(output_path  + \"/workflow\")\n",
+    "cat_wkflow = nvt_wkflow.get_subworkflow(\"items_cat\")\n",
+    "item_features_ds = Dataset(item_features, schema=schema)\n",
+    "item_feature_cat_ds = cat_wkflow.transform(item_features_ds).to_ddf().compute()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 34,
-   "id": "00f1fe65-882e-4962-bb16-19a130fda215",
+   "id": "6a4848a7-aa4f-4f8a-8b40-6c8458ac4fcd",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
     "item_embs = model_tt.item_embeddings(\n",
-    "    Dataset(item_features, schema=schema), batch_size=1024\n",
+    "    Dataset(item_feature_cat_ds, schema=schema), batch_size=1024\n",
     ")\n",
-    "item_embs_df = item_embs.compute(scheduler=\"synchronous\")"
+    "item_embs_df = item_embs.compute(scheduler=\"synchronous\")\n",
+    "item_embs_df[\"item_id\"] = item_features[\"item_id\"]"
    ]
   },
   {
@@ -1409,123 +1303,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>3</td>\n",
-       "      <td>-0.055164</td>\n",
-       "      <td>-0.018832</td>\n",
-       "      <td>-0.009478</td>\n",
-       "      <td>-0.016874</td>\n",
-       "      <td>0.015988</td>\n",
-       "      <td>-0.022928</td>\n",
-       "      <td>0.022611</td>\n",
-       "      <td>-0.030984</td>\n",
-       "      <td>-0.045701</td>\n",
+       "      <td>29</td>\n",
+       "      <td>-0.004295</td>\n",
+       "      <td>-0.032384</td>\n",
+       "      <td>-0.044917</td>\n",
+       "      <td>0.039122</td>\n",
+       "      <td>-0.016758</td>\n",
+       "      <td>-0.059473</td>\n",
+       "      <td>-0.007811</td>\n",
+       "      <td>0.004419</td>\n",
+       "      <td>-0.044857</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.007060</td>\n",
-       "      <td>0.032204</td>\n",
-       "      <td>0.011515</td>\n",
-       "      <td>0.012811</td>\n",
-       "      <td>0.002650</td>\n",
-       "      <td>0.023448</td>\n",
-       "      <td>0.021759</td>\n",
-       "      <td>-0.011316</td>\n",
-       "      <td>-0.035275</td>\n",
-       "      <td>-0.004572</td>\n",
+       "      <td>0.019839</td>\n",
+       "      <td>0.031614</td>\n",
+       "      <td>0.066616</td>\n",
+       "      <td>-0.023459</td>\n",
+       "      <td>0.039532</td>\n",
+       "      <td>-0.025300</td>\n",
+       "      <td>0.002040</td>\n",
+       "      <td>0.010800</td>\n",
+       "      <td>-0.031893</td>\n",
+       "      <td>0.009897</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>4</td>\n",
-       "      <td>-0.027412</td>\n",
-       "      <td>-0.007417</td>\n",
-       "      <td>-0.023730</td>\n",
-       "      <td>-0.008385</td>\n",
-       "      <td>0.028241</td>\n",
-       "      <td>-0.004143</td>\n",
-       "      <td>0.001301</td>\n",
-       "      <td>-0.040613</td>\n",
-       "      <td>-0.020645</td>\n",
+       "      <td>0.007210</td>\n",
+       "      <td>-0.004949</td>\n",
+       "      <td>-0.021168</td>\n",
+       "      <td>0.039533</td>\n",
+       "      <td>-0.004339</td>\n",
+       "      <td>-0.026979</td>\n",
+       "      <td>0.018726</td>\n",
+       "      <td>-0.034300</td>\n",
+       "      <td>-0.010744</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.001835</td>\n",
-       "      <td>0.010697</td>\n",
-       "      <td>0.006311</td>\n",
-       "      <td>0.007290</td>\n",
-       "      <td>-0.014959</td>\n",
-       "      <td>0.025217</td>\n",
-       "      <td>0.041697</td>\n",
-       "      <td>-0.012126</td>\n",
-       "      <td>-0.022523</td>\n",
-       "      <td>-0.001903</td>\n",
+       "      <td>0.021441</td>\n",
+       "      <td>-0.008866</td>\n",
+       "      <td>0.018915</td>\n",
+       "      <td>0.001428</td>\n",
+       "      <td>0.007287</td>\n",
+       "      <td>0.003946</td>\n",
+       "      <td>-0.029646</td>\n",
+       "      <td>-0.023998</td>\n",
+       "      <td>-0.021912</td>\n",
+       "      <td>0.005516</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>-0.009581</td>\n",
-       "      <td>0.016263</td>\n",
-       "      <td>-0.027931</td>\n",
-       "      <td>-0.023079</td>\n",
-       "      <td>0.006483</td>\n",
-       "      <td>0.006133</td>\n",
-       "      <td>-0.027449</td>\n",
-       "      <td>0.027797</td>\n",
-       "      <td>0.045743</td>\n",
+       "      <td>17</td>\n",
+       "      <td>0.034115</td>\n",
+       "      <td>-0.007572</td>\n",
+       "      <td>-0.045769</td>\n",
+       "      <td>0.038766</td>\n",
+       "      <td>-0.018994</td>\n",
+       "      <td>-0.003735</td>\n",
+       "      <td>-0.013748</td>\n",
+       "      <td>0.003397</td>\n",
+       "      <td>0.018028</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.003662</td>\n",
-       "      <td>0.054940</td>\n",
-       "      <td>0.013501</td>\n",
-       "      <td>-0.004127</td>\n",
-       "      <td>-0.001858</td>\n",
-       "      <td>-0.000462</td>\n",
-       "      <td>-0.018047</td>\n",
-       "      <td>0.036427</td>\n",
-       "      <td>0.009524</td>\n",
-       "      <td>0.006689</td>\n",
+       "      <td>0.002467</td>\n",
+       "      <td>0.029187</td>\n",
+       "      <td>-0.023114</td>\n",
+       "      <td>0.007315</td>\n",
+       "      <td>0.001796</td>\n",
+       "      <td>0.013247</td>\n",
+       "      <td>0.011309</td>\n",
+       "      <td>-0.004574</td>\n",
+       "      <td>-0.011722</td>\n",
+       "      <td>0.004382</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>6</td>\n",
-       "      <td>-0.007599</td>\n",
-       "      <td>-0.012074</td>\n",
-       "      <td>0.024879</td>\n",
-       "      <td>-0.008080</td>\n",
-       "      <td>-0.025010</td>\n",
-       "      <td>-0.000266</td>\n",
-       "      <td>0.005489</td>\n",
-       "      <td>-0.014263</td>\n",
-       "      <td>-0.019343</td>\n",
+       "      <td>155</td>\n",
+       "      <td>-0.014619</td>\n",
+       "      <td>-0.001738</td>\n",
+       "      <td>-0.006829</td>\n",
+       "      <td>0.019568</td>\n",
+       "      <td>-0.025870</td>\n",
+       "      <td>-0.043351</td>\n",
+       "      <td>0.007577</td>\n",
+       "      <td>-0.038977</td>\n",
+       "      <td>-0.015209</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.030220</td>\n",
-       "      <td>0.011863</td>\n",
-       "      <td>-0.008515</td>\n",
-       "      <td>0.011286</td>\n",
-       "      <td>-0.000907</td>\n",
-       "      <td>0.014882</td>\n",
-       "      <td>0.035699</td>\n",
-       "      <td>-0.007068</td>\n",
-       "      <td>0.012995</td>\n",
-       "      <td>0.001644</td>\n",
+       "      <td>-0.011659</td>\n",
+       "      <td>0.011859</td>\n",
+       "      <td>-0.004721</td>\n",
+       "      <td>0.002480</td>\n",
+       "      <td>0.040565</td>\n",
+       "      <td>-0.023915</td>\n",
+       "      <td>-0.039050</td>\n",
+       "      <td>-0.013832</td>\n",
+       "      <td>-0.028899</td>\n",
+       "      <td>0.034076</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>7</td>\n",
-       "      <td>-0.070002</td>\n",
-       "      <td>0.001031</td>\n",
-       "      <td>-0.001309</td>\n",
-       "      <td>-0.014118</td>\n",
-       "      <td>-0.036672</td>\n",
-       "      <td>-0.012943</td>\n",
-       "      <td>0.009711</td>\n",
-       "      <td>-0.008856</td>\n",
-       "      <td>-0.032054</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0.027284</td>\n",
+       "      <td>-0.039710</td>\n",
+       "      <td>-0.013016</td>\n",
+       "      <td>-0.021763</td>\n",
+       "      <td>-0.019920</td>\n",
+       "      <td>-0.019573</td>\n",
+       "      <td>0.004436</td>\n",
+       "      <td>0.005504</td>\n",
+       "      <td>-0.018312</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.023113</td>\n",
-       "      <td>0.000600</td>\n",
-       "      <td>-0.005711</td>\n",
-       "      <td>0.044277</td>\n",
-       "      <td>-0.004765</td>\n",
-       "      <td>0.016184</td>\n",
-       "      <td>0.028223</td>\n",
-       "      <td>0.002914</td>\n",
-       "      <td>0.032516</td>\n",
-       "      <td>0.026521</td>\n",
+       "      <td>0.007124</td>\n",
+       "      <td>0.005688</td>\n",
+       "      <td>0.018035</td>\n",
+       "      <td>0.018919</td>\n",
+       "      <td>0.020091</td>\n",
+       "      <td>-0.017181</td>\n",
+       "      <td>0.027977</td>\n",
+       "      <td>-0.032007</td>\n",
+       "      <td>-0.005940</td>\n",
+       "      <td>0.013642</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1534,25 +1428,25 @@
       ],
       "text/plain": [
        "   item_id         0         1         2         3         4         5  \\\n",
-       "0        3 -0.055164 -0.018832 -0.009478 -0.016874  0.015988 -0.022928   \n",
-       "1        4 -0.027412 -0.007417 -0.023730 -0.008385  0.028241 -0.004143   \n",
-       "2        5 -0.009581  0.016263 -0.027931 -0.023079  0.006483  0.006133   \n",
-       "3        6 -0.007599 -0.012074  0.024879 -0.008080 -0.025010 -0.000266   \n",
-       "4        7 -0.070002  0.001031 -0.001309 -0.014118 -0.036672 -0.012943   \n",
+       "0       29 -0.004295 -0.032384 -0.044917  0.039122 -0.016758 -0.059473   \n",
+       "1        4  0.007210 -0.004949 -0.021168  0.039533 -0.004339 -0.026979   \n",
+       "2       17  0.034115 -0.007572 -0.045769  0.038766 -0.018994 -0.003735   \n",
+       "3      155 -0.014619 -0.001738 -0.006829  0.019568 -0.025870 -0.043351   \n",
+       "4       19  0.027284 -0.039710 -0.013016 -0.021763 -0.019920 -0.019573   \n",
        "\n",
        "          6         7         8  ...        54        55        56        57  \\\n",
-       "0  0.022611 -0.030984 -0.045701  ...  0.007060  0.032204  0.011515  0.012811   \n",
-       "1  0.001301 -0.040613 -0.020645  ...  0.001835  0.010697  0.006311  0.007290   \n",
-       "2 -0.027449  0.027797  0.045743  ... -0.003662  0.054940  0.013501 -0.004127   \n",
-       "3  0.005489 -0.014263 -0.019343  ... -0.030220  0.011863 -0.008515  0.011286   \n",
-       "4  0.009711 -0.008856 -0.032054  ... -0.023113  0.000600 -0.005711  0.044277   \n",
+       "0 -0.007811  0.004419 -0.044857  ...  0.019839  0.031614  0.066616 -0.023459   \n",
+       "1  0.018726 -0.034300 -0.010744  ...  0.021441 -0.008866  0.018915  0.001428   \n",
+       "2 -0.013748  0.003397  0.018028  ...  0.002467  0.029187 -0.023114  0.007315   \n",
+       "3  0.007577 -0.038977 -0.015209  ... -0.011659  0.011859 -0.004721  0.002480   \n",
+       "4  0.004436  0.005504 -0.018312  ...  0.007124  0.005688  0.018035  0.018919   \n",
        "\n",
        "         58        59        60        61        62        63  \n",
-       "0  0.002650  0.023448  0.021759 -0.011316 -0.035275 -0.004572  \n",
-       "1 -0.014959  0.025217  0.041697 -0.012126 -0.022523 -0.001903  \n",
-       "2 -0.001858 -0.000462 -0.018047  0.036427  0.009524  0.006689  \n",
-       "3 -0.000907  0.014882  0.035699 -0.007068  0.012995  0.001644  \n",
-       "4 -0.004765  0.016184  0.028223  0.002914  0.032516  0.026521  \n",
+       "0  0.039532 -0.025300  0.002040  0.010800 -0.031893  0.009897  \n",
+       "1  0.007287  0.003946 -0.029646 -0.023998 -0.021912  0.005516  \n",
+       "2  0.001796  0.013247  0.011309 -0.004574 -0.011722  0.004382  \n",
+       "3  0.040565 -0.023915 -0.039050 -0.013832 -0.028899  0.034076  \n",
+       "4  0.020091 -0.017181  0.027977 -0.032007 -0.005940  0.013642  \n",
        "\n",
        "[5 rows x 65 columns]"
       ]
@@ -1614,11 +1508,11 @@
     "    created_timestamp_column=\"created\",\n",
     ")\n",
     "\n",
-    "user_raw = Entity(name=\"user_id_raw\", value_type=ValueType.INT32, join_keys=[\"user_id_raw\"],)\n",
+    "user = Entity(name=\"user_id\", value_type=ValueType.INT32, join_keys=[\"user_id\"],)\n",
     "\n",
     "user_features_view = FeatureView(\n",
     "    name=\"user_features\",\n",
-    "    entities=[user_raw],\n",
+    "    entities=[user],\n",
     "    ttl=timedelta(0),\n",
     "    schema=[\n",
     "        Field(name=\"user_shops\", dtype=Int32),\n",
@@ -1632,7 +1526,6 @@
     "        Field(name=\"user_intentions\", dtype=Int32),\n",
     "        Field(name=\"user_brands\", dtype=Int32),\n",
     "        Field(name=\"user_categories\", dtype=Int32),\n",
-    "        Field(name=\"user_id\", dtype=Int32),\n",
     "    ],\n",
     "    online=True,\n",
     "    source=user_features,\n",
@@ -1676,7 +1569,6 @@
     "        Field(name=\"item_category\", dtype=Int32),\n",
     "        Field(name=\"item_shop\", dtype=Int32),\n",
     "        Field(name=\"item_brand\", dtype=Int32),\n",
-    "        Field(name=\"item_id_raw\", dtype=Int32),\n",
     "    ],\n",
     "    online=True,\n",
     "    source=item_features,\n",
@@ -1749,8 +1641,9 @@
    "source": [
     "import seedir as sd\n",
     "\n",
+    "feature_repo_path = os.path.join(BASE_DIR, \"feast_repo\")\n",
     "sd.seedir(\n",
-    "    os.path.join(BASE_DIR, \"feast_repo\"),\n",
+    "    feature_repo_path,\n",
     "    style=\"lines\",\n",
     "    itemlimit=10,\n",
     "    depthlimit=3,\n",
@@ -1769,6 +1662,14 @@
     "\n",
     "For the next step, move on to the `02-Deploying-multi-stage-Recsys-with-Merlin-Systems.ipynb` notebook to deploy our saved models as an ensemble to TIS and obtain prediction results for a given request."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c5bd646-8121-4f32-bff8-137d50e3b8a2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
index e2b0e5470..965509a13 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
@@ -7,7 +7,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n",
+    "# Copyright 2023 NVIDIA Corporation. All Rights Reserved.\n",
     "#\n",
     "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
     "# you may not use this file except in compliance with the License.\n",
@@ -75,11 +75,11 @@
     "\n",
     "In case you need to install them for running this example on GPU, execute the following script in a cell.\n",
     "```\n",
-    "%pip install \"feast==0.31\" faiss-gpu\n",
+    "%pip install \"feast<0.31\" faiss-gpu\n",
     "```\n",
     "or the following script in a cell for CPU.\n",
     "```\n",
-    "%pip install tensorflow-cpu \"feast==0.31\" faiss-cpu\n",
+    "%pip install tensorflow-cpu \"feast<0.31\" faiss-cpu\n",
     "```"
    ]
   },
@@ -93,13 +93,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:37: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  _descriptor.FieldDescriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:30: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  _INTEGERSTATISTICS = _descriptor.Descriptor(\n",
-      "2023-06-20 23:49:49.177129: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "2023-06-21 21:37:09.169418: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
       "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_shape_pb2.py:18: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
       "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
@@ -121,10 +115,28 @@
       "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
       "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/resource_handle_pb2.py:39: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
       "  _descriptor.FieldDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/resource_handle_pb2.py:32: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _RESOURCEHANDLEPROTO_DTYPEANDSHAPE = _descriptor.Descriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_pb2.py:21: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_pb2.py:40: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _descriptor.FieldDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_pb2.py:33: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _TENSORPROTO = _descriptor.Descriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/attr_value_pb2.py:21: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/attr_value_pb2.py:40: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _descriptor.FieldDescriptor(\n",
       "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
       "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
       "/usr/local/lib/python3.8/dist-packages/nvtabular/loader/__init__.py:19: DeprecationWarning: The `nvtabular.loader` module has moved to a new repository, at https://github.com/NVIDIA-Merlin/dataloader .  Support for importing from `nvtabular.loader` is deprecated, and will be removed in a future version. Please update your imports to refer to `merlinloader`.\n",
-      "  warnings.warn(\n"
+      "  warnings.warn(\n",
+      "2023-06-21 21:37:11.305888: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
+      "2023-06-21 21:37:11.305925: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 2dca7910ae98\n",
+      "2023-06-21 21:37:11.305933: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 2dca7910ae98\n",
+      "2023-06-21 21:37:11.306011: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 525.85.12\n",
+      "2023-06-21 21:37:11.306030: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 515.65.1\n",
+      "2023-06-21 21:37:11.306037: E tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 515.65.1 does not match DSO version 525.85.12 -- cannot find working devices in this configuration\n"
      ]
     }
    ],
@@ -140,7 +152,8 @@
     "from merlin.systems.dag.ops.softmax_sampling import SoftmaxSampling\n",
     "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n",
     "from merlin.systems.dag.ops.unroll_features import UnrollFeatures\n",
-    "from merlin.systems.triton.utils import send_triton_request"
+    "from merlin.systems.triton.utils import send_triton_request\n",
+    "from merlin.systems.dag.ops.workflow import TransformWorkflow"
    ]
   },
   {
@@ -184,15 +197,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo/feature_repo\n",
-      "Created entity \u001b[1m\u001b[32muser_id_raw\u001b[0m\n",
-      "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n",
-      "Created feature view \u001b[1m\u001b[32muser_features\u001b[0m\n",
-      "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n",
-      "\n",
-      "Created sqlite table \u001b[1m\u001b[32mfeast_repo_item_features\u001b[0m\n",
-      "Created sqlite table \u001b[1m\u001b[32mfeast_repo_user_features\u001b[0m\n",
-      "\n"
+      "/raid/workshared/merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo/feature_repo\n",
+      "\u001b[1m\u001b[94mNo changes to registry\n",
+      "\u001b[1m\u001b[94mNo changes to infrastructure\n"
      ]
     }
    ],
@@ -228,9 +235,9 @@
       "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views from \u001b[1m\u001b[32m1995-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
       "\u001b[1m\u001b[32muser_features\u001b[0m:\n",
-      "100%|███████████████████████████████████████████████████████████| 456/456 [00:00<00:00, 1136.51it/s]\n",
+      "100%|███████████████████████████████████████████████████████████| 457/457 [00:00<00:00, 2914.62it/s]\n",
       "\u001b[1m\u001b[32mitem_features\u001b[0m:\n",
-      "100%|███████████████████████████████████████████████████████████| 436/436 [00:00<00:00, 2878.99it/s]\n"
+      "100%|███████████████████████████████████████████████████████████| 451/451 [00:00<00:00, 8542.45it/s]\n"
      ]
     }
    ],
@@ -345,7 +352,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING clustering 436 points to 32 centroids: please provide at least 1248 training points\n"
+      "WARNING clustering 451 points to 32 centroids: please provide at least 1248 training points\n"
      ]
     }
    ],
@@ -394,9 +401,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-20 23:50:04+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
+      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-21 21:37:18+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-20 23:50:04+00:00\u001b[0m:\n"
+      "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-21 21:37:18+00:00\u001b[0m:\n"
      ]
     },
     {
@@ -410,14 +417,28 @@
    "source": [
     "from merlin.systems.dag.ops.feast import QueryFeast \n",
     "\n",
-    "user_features = [\"user_id_raw\"] >> QueryFeast.from_feature_view(\n",
+    "user_attributes = [\"user_id\"] >> QueryFeast.from_feature_view(\n",
     "    store=feature_store,\n",
     "    view=\"user_features\",\n",
-    "    column=\"user_id_raw\",\n",
-    "    include_id=False,\n",
+    "    column=\"user_id\",\n",
+    "    include_id=True,\n",
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "f11299b6-20d4-4687-bb0e-b855a9bcb9eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nvtabular import Workflow\n",
+    "\n",
+    "nvt_workflow = Workflow.load('/workspace/data/processed_nvt/workflow')\n",
+    "user_subgraph = nvt_workflow.get_subworkflow(\"user\")\n",
+    "user_features = user_attributes >> TransformWorkflow(user_subgraph)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "27e25be7-3ff0-49c2-a3fc-03ec4d615e77",
@@ -428,7 +449,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "id": "21139caa-3a51-42e6-b006-21a92c95f1bc",
    "metadata": {},
    "outputs": [
@@ -438,7 +459,7 @@
        "<function tensorflow.python.dlpack.dlpack.from_dlpack(dlcapsule)>"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -447,12 +468,13 @@
     "# prevent TF to claim all GPU memory\n",
     "from merlin.dataloader.tf_utils import configure_tensorflow\n",
     "\n",
+    "\n",
     "configure_tensorflow()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "id": "47c2d9b1-51dc-4549-977d-d7941ee6486c",
    "metadata": {},
    "outputs": [
@@ -460,24 +482,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-20 23:50:06.005776: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-06-20 23:50:09.981326: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8192 MB memory:  -> device: 0, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:06:00.0, compute capability: 7.0\n",
-      "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 52). These functions will not be directly callable after loading.\n"
+      "2023-06-21 21:37:19.332291: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmp7n9o9yv2/assets\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmp7n9o9yv2/assets\n"
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
      ]
     }
    ],
@@ -502,7 +515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "id": "b270f663-0ae1-4356-acd4-5f8c986abf4d",
    "metadata": {},
    "outputs": [
@@ -510,9 +523,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-20 23:50:17+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
+      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-21 21:37:21+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-20 23:50:17+00:00\u001b[0m:\n"
+      "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-21 21:37:21+00:00\u001b[0m:\n"
      ]
     },
     {
@@ -524,7 +537,7 @@
     }
    ],
    "source": [
-    "item_features = retrieval[\"candidate_ids\"] >> QueryFeast.from_feature_view(\n",
+    "item_attributes = retrieval[\"candidate_ids\"] >> QueryFeast.from_feature_view(\n",
     "    store=feature_store,\n",
     "    view=\"item_features\",\n",
     "    column=\"candidate_ids\",\n",
@@ -533,6 +546,17 @@
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "0d0a4531-665c-48a1-98a9-216c955449b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "item_subgraph = nvt_workflow.get_subworkflow(\"item\")\n",
+    "item_features = item_attributes >> TransformWorkflow(item_subgraph)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "304a4d09-db05-4666-b520-75dbbbc7ab17",
@@ -543,7 +567,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 18,
    "id": "eb0ef434-03a5-4a36-afb9-e19a43243c64",
    "metadata": {},
    "outputs": [],
@@ -578,32 +602,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 19,
    "id": "ce31723e-af4d-4827-bb60-3a9fafcd9da6",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 98). These functions will not be directly callable after loading.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmpbt6mf1gw/assets\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmpbt6mf1gw/assets\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "ranking = combined_features >> PredictTensorflow(ranking_model_path)"
    ]
@@ -618,14 +620,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
    "id": "7f65598b-e3e7-4238-a73e-19d00c3deb26",
    "metadata": {},
    "outputs": [],
    "source": [
     "top_k=10\n",
-    "ordering = combined_features[\"item_id_raw\"] >> SoftmaxSampling(\n",
-    "    relevance_col=ranking[\"click/binary_classification_task\"], topk=top_k, temperature=20.0\n",
+    "ordering = combined_features[\"item_id\"] >> SoftmaxSampling(\n",
+    "    relevance_col=ranking[\"click/binary_classification_task\"], topk=top_k, temperature=0.00000001\n",
     ")"
    ]
   },
@@ -650,7 +652,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "id": "b28c452f-543c-45a4-9995-130ca6919669",
    "metadata": {},
    "outputs": [],
@@ -669,21 +671,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
    "id": "9c8b7b94-5559-4587-a272-4d9de2d53dd1",
    "metadata": {},
    "outputs": [],
    "source": [
     "request_schema = Schema(\n",
     "    [\n",
-    "        ColumnSchema(\"user_id_raw\", dtype=np.int32),\n",
+    "        ColumnSchema(\"user_id\", dtype=np.int32),\n",
     "    ]\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
    "id": "6c64d686-aed5-42f8-b517-482b4237c69f",
    "metadata": {},
    "outputs": [
@@ -717,7 +719,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
    "id": "89182219-40a6-458c-af0e-7a8e83f364aa",
    "metadata": {},
    "outputs": [
@@ -726,7 +728,25 @@
      "output_type": "stream",
      "text": [
       "poc_ensemble/\n",
-      "├─0_predicttensorflowtriton/\n",
+      "├─0_transformworkflowtriton/\n",
+      "│ ├─1/\n",
+      "│ │ ├─model.py\n",
+      "│ │ └─workflow/\n",
+      "│ │   ├─categories/\n",
+      "│ │   │ ├─unique.user_age.parquet\n",
+      "│ │   │ ├─unique.user_brands.parquet\n",
+      "│ │   │ ├─unique.user_categories.parquet\n",
+      "│ │   │ ├─unique.user_consumption_2.parquet\n",
+      "│ │   │ ├─unique.user_gender.parquet\n",
+      "│ │   │ ├─unique.user_geography.parquet\n",
+      "│ │   │ ├─unique.user_group.parquet\n",
+      "│ │   │ ├─unique.user_id.parquet\n",
+      "│ │   │ ├─unique.user_intentions.parquet\n",
+      "│ │   │ └─unique.user_is_occupied.parquet\n",
+      "│ │   ├─metadata.json\n",
+      "│ │   └─workflow.pkl\n",
+      "│ └─config.pbtxt\n",
+      "├─1_predicttensorflowtriton/\n",
       "│ ├─1/\n",
       "│ │ └─model.savedmodel/\n",
       "│ │   ├─assets/\n",
@@ -737,7 +757,19 @@
       "│ │     ├─variables.data-00000-of-00001\n",
       "│ │     └─variables.index\n",
       "│ └─config.pbtxt\n",
-      "├─1_predicttensorflowtriton/\n",
+      "├─2_transformworkflowtriton/\n",
+      "│ ├─1/\n",
+      "│ │ ├─model.py\n",
+      "│ │ └─workflow/\n",
+      "│ │   ├─categories/\n",
+      "│ │   │ ├─unique.item_brand.parquet\n",
+      "│ │   │ ├─unique.item_category.parquet\n",
+      "│ │   │ ├─unique.item_id.parquet\n",
+      "│ │   │ └─unique.item_shop.parquet\n",
+      "│ │   ├─metadata.json\n",
+      "│ │   └─workflow.pkl\n",
+      "│ └─config.pbtxt\n",
+      "├─3_predicttensorflowtriton/\n",
       "│ ├─1/\n",
       "│ │ └─model.savedmodel/\n",
       "│ │   ├─.merlin/\n",
@@ -816,7 +848,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "id": "d08a8975-9c32-467b-99ec-df66319f854b",
    "metadata": {},
    "outputs": [
@@ -824,8 +856,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "   user_id_raw\n",
-      "0            7\n"
+      "   user_id\n",
+      "0        7\n"
      ]
     }
    ],
@@ -834,8 +866,8 @@
     "from merlin.core.dispatch import make_df\n",
     "\n",
     "# create a request to be sent to TIS\n",
-    "request = make_df({\"user_id_raw\": [7]})\n",
-    "request[\"user_id_raw\"] = request[\"user_id_raw\"].astype(np.int32)\n",
+    "request = make_df({\"user_id\": [7]})\n",
+    "request[\"user_id\"] = request[\"user_id\"].astype(np.int32)\n",
     "print(request)"
    ]
   },
@@ -849,20 +881,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 26,
    "id": "74ec62f2-5935-45c6-8058-e1cdade6f80f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'ordered_ids': array([[266, 381, 145,   6, 232, 651,  83, 244, 107,  69]], dtype=int32),\n",
-       " 'ordered_scores': array([[0.50194645, 0.50282484, 0.50340647, 0.5027974 , 0.50236404,\n",
-       "         0.50230837, 0.50244445, 0.5022982 , 0.50169003, 0.50216776]],\n",
+       "{'ordered_ids': array([[343,  72, 248,  74,  91, 394, 194, 306, 333, 266]], dtype=int32),\n",
+       " 'ordered_scores': array([[0.49981913, 0.49877545, 0.49930254, 0.5005477 , 0.5007775 ,\n",
+       "         0.4999408 , 0.49992177, 0.50006884, 0.50042826, 0.4995823 ]],\n",
        "       dtype=float32)}"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }

From a996c7bafa0235fab9521f13fb00188ee84846b9 Mon Sep 17 00:00:00 2001
From: Julio <jperez@nvidia.com>
Date: Tue, 27 Jun 2023 17:38:52 -0400
Subject: [PATCH 2/5] remove unnecessary raw_id calls

---
 .../01-Building-Recommender-Systems-with-Merlin.ipynb     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
index 990c568ed..3dcfd9d08 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
@@ -129,10 +129,10 @@
    "outputs": [],
    "source": [
     "# for running this example on GPU, install the following libraries\n",
-    "# %pip install \"feast<0.20\" faiss-gpu\n",
+    "# %pip install \"feast==0.31\" faiss-gpu\n",
     "\n",
     "# for running this example on CPU, uncomment the following lines\n",
-    "# %pip install tensorflow-cpu \"feast<0.20\" faiss-cpu\n",
+    "# %pip install tensorflow-cpu \"feast==0.31\" faiss-cpu\n",
     "# %pip uninstall cudf\n"
    ]
   },
@@ -930,7 +930,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "schema = train_tt.schema.select_by_tag([Tags.ITEM_ID, Tags.USER_ID, Tags.ITEM, Tags.USER]).without(['user_id_raw', 'item_id_raw', 'click'])\n",
+    "schema = train_tt.schema.select_by_tag([Tags.ITEM_ID, Tags.USER_ID, Tags.ITEM, Tags.USER]).without(['click'])\n",
     "train_tt.schema = schema\n",
     "valid_tt.schema = schema"
    ]
@@ -1068,7 +1068,7 @@
     "valid = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"), part_size=\"500MB\")\n",
     "\n",
     "# define schema object\n",
-    "schema = train.schema.without(['user_id_raw', 'item_id_raw'])"
+    "schema = train.schema"
    ]
   },
   {

From f877254fc3d3bc40dc7dc0146295acd3a641d7de Mon Sep 17 00:00:00 2001
From: Julio <jperez@nvidia.com>
Date: Thu, 29 Jun 2023 15:22:35 -0400
Subject: [PATCH 3/5] changes to multistage example to clean up embeddings and
 update faiss setup

---
 ...ding-Recommender-Systems-with-Merlin.ipynb | 611 ++++--------------
 ...lti-stage-RecSys-with-Merlin-Systems.ipynb | 112 ++--
 2 files changed, 185 insertions(+), 538 deletions(-)

diff --git a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
index 3dcfd9d08..3dbe42dc5 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
@@ -146,33 +146,43 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-21 21:24:41.476144: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+      "2023-06-29 19:20:02.816099: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
+      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n"
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n",
+      "[INFO]: sparse_operation_kit is imported\n",
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n",
+      "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n",
+      "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
-      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
-      "2023-06-21 21:24:43.274327: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
-      "2023-06-21 21:24:43.274369: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 2dca7910ae98\n",
-      "2023-06-21 21:24:43.274380: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 2dca7910ae98\n",
-      "2023-06-21 21:24:43.274481: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program\n",
-      "2023-06-21 21:24:43.274508: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 515.65.1\n",
-      "2023-06-21 21:24:43.621683: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "2023-06-29 19:20:07.245419: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-06-29 19:20:08.267091: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+      "2023-06-29 19:20:08.267138: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:15:00.0, compute capability: 7.5\n",
+      "2023-06-29 19:20:08.268109: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+      "2023-06-29 19:20:08.268137: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 24576 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:2d:00.0, compute capability: 7.5\n",
       "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[SOK INFO] Initialize finished, communication tool: horovod\n"
+     ]
     }
    ],
    "source": [
@@ -225,7 +235,7 @@
     "DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data/\")\n",
     "# set up the base dir for feature store\n",
     "BASE_DIR = os.environ.get(\n",
-    "    \"BASE_DIR\", \"/Merlin/examples/Building-and-deploying-multi-stage-RecSys/\"\n",
+    "    \"BASE_DIR\", \"/raid/workshared/merlin/examples/Building-and-deploying-multi-stage-RecSys/\"\n",
     ")"
    ]
   },
@@ -242,16 +252,7 @@
    "execution_count": 6,
    "id": "b44b3378-7297-4946-a271-742a9239bc3e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from merlin.datasets.synthetic import generate_data\n",
     "\n",
@@ -341,16 +342,7 @@
    "execution_count": 9,
    "id": "ea0b369c-2f01-42e3-9f3c-74c3ff4a6d64",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from merlin.models.utils.dataset import unique_rows_by_features\n",
     "\n",
@@ -430,9 +422,9 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>38</th>\n",
+       "      <th>6</th>\n",
        "      <td>7</td>\n",
-       "      <td>658</td>\n",
+       "      <td>530</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
@@ -441,28 +433,28 @@
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>191</td>\n",
-       "      <td>327</td>\n",
-       "      <td>35</td>\n",
-       "      <td>2023-06-21 21:24:49.082804</td>\n",
-       "      <td>2023-06-21 21:24:49.085539</td>\n",
+       "      <td>154</td>\n",
+       "      <td>264</td>\n",
+       "      <td>28</td>\n",
+       "      <td>2023-06-29 19:20:20.311986</td>\n",
+       "      <td>2023-06-29 19:20:20.314307</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "    user_id  user_shops  user_profile  user_group  user_gender  user_age  \\\n",
-       "38        7         658             1           1            1         1   \n",
+       "   user_id  user_shops  user_profile  user_group  user_gender  user_age  \\\n",
+       "6        7         530             1           1            1         1   \n",
        "\n",
-       "    user_consumption_1  user_consumption_2  user_is_occupied  user_geography  \\\n",
-       "38                   1                   1                 1               1   \n",
+       "   user_consumption_1  user_consumption_2  user_is_occupied  user_geography  \\\n",
+       "6                   1                   1                 1               1   \n",
        "\n",
-       "    user_intentions  user_brands  user_categories                   datetime  \\\n",
-       "38              191          327               35 2023-06-21 21:24:49.082804   \n",
+       "   user_intentions  user_brands  user_categories                   datetime  \\\n",
+       "6              154          264               28 2023-06-29 19:20:20.311986   \n",
        "\n",
-       "                      created  \n",
-       "38 2023-06-21 21:24:49.085539  "
+       "                     created  \n",
+       "6 2023-06-29 19:20:20.314307  "
       ]
      },
      "execution_count": 11,
@@ -491,16 +483,7 @@
    "execution_count": 13,
    "id": "0a33a668-8e2a-4546-8f54-0060d405ba91",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "item_features = (\n",
     "    unique_rows_by_features(train_raw, Tags.ITEM, Tags.ITEM_ID)\n",
@@ -561,53 +544,53 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>29</td>\n",
-       "      <td>138</td>\n",
-       "      <td>9672</td>\n",
-       "      <td>3331</td>\n",
-       "      <td>1541</td>\n",
-       "      <td>2023-06-21 21:24:49.145983</td>\n",
-       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-06-29 19:20:20.413296</td>\n",
+       "      <td>2023-06-29 19:20:20.414521</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>15</td>\n",
-       "      <td>1037</td>\n",
-       "      <td>357</td>\n",
-       "      <td>166</td>\n",
-       "      <td>2023-06-21 21:24:49.145983</td>\n",
-       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>457</td>\n",
+       "      <td>158</td>\n",
+       "      <td>73</td>\n",
+       "      <td>2023-06-29 19:20:20.413296</td>\n",
+       "      <td>2023-06-29 19:20:20.414521</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>17</td>\n",
-       "      <td>79</td>\n",
-       "      <td>5527</td>\n",
-       "      <td>1904</td>\n",
-       "      <td>881</td>\n",
-       "      <td>2023-06-21 21:24:49.145983</td>\n",
-       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "      <td>3</td>\n",
+       "      <td>13</td>\n",
+       "      <td>914</td>\n",
+       "      <td>315</td>\n",
+       "      <td>146</td>\n",
+       "      <td>2023-06-29 19:20:20.413296</td>\n",
+       "      <td>2023-06-29 19:20:20.414521</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>155</td>\n",
-       "      <td>756</td>\n",
-       "      <td>53196</td>\n",
-       "      <td>18320</td>\n",
-       "      <td>8471</td>\n",
-       "      <td>2023-06-21 21:24:49.145983</td>\n",
-       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "      <td>4</td>\n",
+       "      <td>20</td>\n",
+       "      <td>1371</td>\n",
+       "      <td>473</td>\n",
+       "      <td>219</td>\n",
+       "      <td>2023-06-29 19:20:20.413296</td>\n",
+       "      <td>2023-06-29 19:20:20.414521</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>19</td>\n",
-       "      <td>89</td>\n",
-       "      <td>6218</td>\n",
-       "      <td>2142</td>\n",
-       "      <td>991</td>\n",
-       "      <td>2023-06-21 21:24:49.145983</td>\n",
-       "      <td>2023-06-21 21:24:49.147882</td>\n",
+       "      <td>5</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1828</td>\n",
+       "      <td>630</td>\n",
+       "      <td>292</td>\n",
+       "      <td>2023-06-29 19:20:20.413296</td>\n",
+       "      <td>2023-06-29 19:20:20.414521</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -615,18 +598,18 @@
       ],
       "text/plain": [
        "   item_id  item_category  item_shop  item_brand  item_intention  \\\n",
-       "0       29            138       9672        3331            1541   \n",
-       "1        4             15       1037         357             166   \n",
-       "2       17             79       5527        1904             881   \n",
-       "3      155            756      53196       18320            8471   \n",
-       "4       19             89       6218        2142             991   \n",
+       "0        1              1          1           1               1   \n",
+       "1        2              7        457         158              73   \n",
+       "2        3             13        914         315             146   \n",
+       "3        4             20       1371         473             219   \n",
+       "4        5             26       1828         630             292   \n",
        "\n",
        "                    datetime                    created  \n",
-       "0 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
-       "1 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
-       "2 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
-       "3 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  \n",
-       "4 2023-06-21 21:24:49.145983 2023-06-21 21:24:49.147882  "
+       "0 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
+       "1 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
+       "2 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
+       "3 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
+       "4 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  "
       ]
      },
      "execution_count": 15,
@@ -721,7 +704,8 @@
     "outputs = subgraph_user + subgraph_item + targets\n",
     "\n",
     "# add dropna op to filter rows with nulls\n",
-    "outputs = outputs >> Dropna()"
+    "outputs = outputs >> Dropna()\n",
+    "nvt_wkflow = nvt.Workflow(outputs)"
    ]
   },
   {
@@ -737,35 +721,10 @@
    "execution_count": 19,
    "id": "814e8438-642a-4f03-baaf-44dab8d1b5e5",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "transform_aliccp(\n",
-    "    (train_raw, valid_raw), output_path, nvt_workflow=outputs, workflow_name=\"workflow\"\n",
+    "    (train_raw, valid_raw), output_path, nvt_workflow=nvt_wkflow, workflow_name=\"workflow\"\n",
     ")"
    ]
   },
@@ -824,18 +783,7 @@
    "execution_count": 21,
    "id": "251d4697-8f9c-4c93-8de4-c3480a8378de",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "train_tt = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"))\n",
     "valid_tt = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"))"
@@ -854,33 +802,18 @@
    "execution_count": 22,
    "id": "7e085a6d-74ad-4c24-8e7c-4e449c15f471",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "inputs = train_tt.schema.column_names\n",
     "outputs = inputs >> Filter(f=lambda df: df[\"click\"] == 1)\n",
     "\n",
-    "workflow2 = nvt.Workflow(outputs)\n",
-    "\n",
-    "workflow2.fit(train_tt)\n",
+    "nvt_wkflow.fit(train_tt)\n",
     "\n",
-    "workflow2.transform(train_tt).to_parquet(\n",
+    "nvt_wkflow.transform(train_tt).to_parquet(\n",
     "    output_path=os.path.join(output_path2, \"train\")\n",
     ")\n",
     "\n",
-    "workflow2.transform(valid_tt).to_parquet(\n",
+    "nvt_wkflow.transform(valid_tt).to_parquet(\n",
     "    output_path=os.path.join(output_path2, \"valid\")\n",
     ")"
    ]
@@ -906,18 +839,7 @@
    "execution_count": 23,
    "id": "252a8e60-b447-46b5-ade6-3557cbafa797",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "train_tt = Dataset(os.path.join(output_path2, \"train\", \"*.parquet\"), part_size=\"500MB\")\n",
     "valid_tt = Dataset(os.path.join(output_path2, \"valid\", \"*.parquet\"), part_size=\"500MB\")"
@@ -968,13 +890,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5/5 [==============================] - 13s 1s/step - loss: 8.9092 - recall_at_10: 0.0076 - ndcg_at_10: 0.0058 - regularization_loss: 0.0000e+00 - loss_batch: 8.5704 - val_loss: 8.9050 - val_recall_at_10: 0.0121 - val_ndcg_at_10: 0.0097 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.7986\n"
+      "9/9 [==============================] - 10s 275ms/step - loss: 8.9538 - recall_at_10: 0.0055 - ndcg_at_10: 0.0038 - regularization_loss: 0.0000e+00 - loss_batch: 8.8710 - val_loss: 8.9181 - val_recall_at_10: 0.0165 - val_ndcg_at_10: 0.0109 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.5802\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.callbacks.History at 0x7fa6141c7940>"
+       "<keras.callbacks.History at 0x7f75811a0f70>"
       ]
      },
      "execution_count": 26,
@@ -1050,18 +972,7 @@
    "execution_count": 28,
    "id": "cb870461-6ac2-49b2-ba6a-2da6ecb57f1d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# define train and valid dataset objects\n",
     "train = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"), part_size=\"500MB\")\n",
@@ -1127,13 +1038,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5/5 [==============================] - 5s 271ms/step - loss: 0.6932 - auc: 0.4989 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.4994 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6932\n"
+      "5/5 [==============================] - 5s 312ms/step - loss: 0.6931 - auc: 0.4991 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.4983 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6931\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.callbacks.History at 0x7fa5f26b69d0>"
+       "<keras.callbacks.History at 0x7f75507fd670>"
       ]
      },
      "execution_count": 31,
@@ -1156,7 +1067,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "id": "00447c12-ea80-4d98-ab47-cc1a982a6958",
    "metadata": {},
    "outputs": [],
@@ -1182,287 +1093,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": null,
    "id": "e62f65f8-e8f1-447e-9500-5960807c36f2",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "nvt_wkflow = nvt.Workflow.load(output_path  + \"/workflow\")\n",
-    "cat_wkflow = nvt_wkflow.get_subworkflow(\"items_cat\")\n",
-    "item_features_ds = Dataset(item_features, schema=schema)\n",
-    "item_feature_cat_ds = cat_wkflow.transform(item_features_ds).to_ddf().compute()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "6a4848a7-aa4f-4f8a-8b40-6c8458ac4fcd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n",
-      "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:267: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "item_embs = model_tt.item_embeddings(\n",
-    "    Dataset(item_feature_cat_ds, schema=schema), batch_size=1024\n",
-    ")\n",
-    "item_embs_df = item_embs.compute(scheduler=\"synchronous\")\n",
-    "item_embs_df[\"item_id\"] = item_features[\"item_id\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "cf8b82ea-6cce-4dab-ad17-114b5e7eabd4",
-   "metadata": {},
    "outputs": [],
    "source": [
-    "# select only item_id together with embedding columns\n",
-    "item_embeddings = item_embs_df.drop(\n",
-    "    columns=[\"item_category\", \"item_shop\", \"item_brand\"]\n",
-    ")"
+    "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n",
+    "from merlin.systems.dag.ops.workflow import TransformWorkflow\n",
+    "\n",
+    "workflow =  nvt.Workflow([\"item_id\"] + (['item_id', 'item_brand', 'item_category', 'item_shop'] >> TransformWorkflow(nvt_wkflow.get_subworkflow(\"item\")) >> PredictTensorflow(model_tt.first.item_block())))\n",
+    "item_embeddings = workflow.fit_transform(Dataset(item_features)).to_ddf().compute()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "id": "e02f0957-6665-400a-80c0-60b307466caf",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>item_id</th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>...</th>\n",
-       "      <th>54</th>\n",
-       "      <th>55</th>\n",
-       "      <th>56</th>\n",
-       "      <th>57</th>\n",
-       "      <th>58</th>\n",
-       "      <th>59</th>\n",
-       "      <th>60</th>\n",
-       "      <th>61</th>\n",
-       "      <th>62</th>\n",
-       "      <th>63</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>29</td>\n",
-       "      <td>-0.004295</td>\n",
-       "      <td>-0.032384</td>\n",
-       "      <td>-0.044917</td>\n",
-       "      <td>0.039122</td>\n",
-       "      <td>-0.016758</td>\n",
-       "      <td>-0.059473</td>\n",
-       "      <td>-0.007811</td>\n",
-       "      <td>0.004419</td>\n",
-       "      <td>-0.044857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.019839</td>\n",
-       "      <td>0.031614</td>\n",
-       "      <td>0.066616</td>\n",
-       "      <td>-0.023459</td>\n",
-       "      <td>0.039532</td>\n",
-       "      <td>-0.025300</td>\n",
-       "      <td>0.002040</td>\n",
-       "      <td>0.010800</td>\n",
-       "      <td>-0.031893</td>\n",
-       "      <td>0.009897</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>0.007210</td>\n",
-       "      <td>-0.004949</td>\n",
-       "      <td>-0.021168</td>\n",
-       "      <td>0.039533</td>\n",
-       "      <td>-0.004339</td>\n",
-       "      <td>-0.026979</td>\n",
-       "      <td>0.018726</td>\n",
-       "      <td>-0.034300</td>\n",
-       "      <td>-0.010744</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.021441</td>\n",
-       "      <td>-0.008866</td>\n",
-       "      <td>0.018915</td>\n",
-       "      <td>0.001428</td>\n",
-       "      <td>0.007287</td>\n",
-       "      <td>0.003946</td>\n",
-       "      <td>-0.029646</td>\n",
-       "      <td>-0.023998</td>\n",
-       "      <td>-0.021912</td>\n",
-       "      <td>0.005516</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>17</td>\n",
-       "      <td>0.034115</td>\n",
-       "      <td>-0.007572</td>\n",
-       "      <td>-0.045769</td>\n",
-       "      <td>0.038766</td>\n",
-       "      <td>-0.018994</td>\n",
-       "      <td>-0.003735</td>\n",
-       "      <td>-0.013748</td>\n",
-       "      <td>0.003397</td>\n",
-       "      <td>0.018028</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.002467</td>\n",
-       "      <td>0.029187</td>\n",
-       "      <td>-0.023114</td>\n",
-       "      <td>0.007315</td>\n",
-       "      <td>0.001796</td>\n",
-       "      <td>0.013247</td>\n",
-       "      <td>0.011309</td>\n",
-       "      <td>-0.004574</td>\n",
-       "      <td>-0.011722</td>\n",
-       "      <td>0.004382</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>155</td>\n",
-       "      <td>-0.014619</td>\n",
-       "      <td>-0.001738</td>\n",
-       "      <td>-0.006829</td>\n",
-       "      <td>0.019568</td>\n",
-       "      <td>-0.025870</td>\n",
-       "      <td>-0.043351</td>\n",
-       "      <td>0.007577</td>\n",
-       "      <td>-0.038977</td>\n",
-       "      <td>-0.015209</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.011659</td>\n",
-       "      <td>0.011859</td>\n",
-       "      <td>-0.004721</td>\n",
-       "      <td>0.002480</td>\n",
-       "      <td>0.040565</td>\n",
-       "      <td>-0.023915</td>\n",
-       "      <td>-0.039050</td>\n",
-       "      <td>-0.013832</td>\n",
-       "      <td>-0.028899</td>\n",
-       "      <td>0.034076</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>19</td>\n",
-       "      <td>0.027284</td>\n",
-       "      <td>-0.039710</td>\n",
-       "      <td>-0.013016</td>\n",
-       "      <td>-0.021763</td>\n",
-       "      <td>-0.019920</td>\n",
-       "      <td>-0.019573</td>\n",
-       "      <td>0.004436</td>\n",
-       "      <td>0.005504</td>\n",
-       "      <td>-0.018312</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.007124</td>\n",
-       "      <td>0.005688</td>\n",
-       "      <td>0.018035</td>\n",
-       "      <td>0.018919</td>\n",
-       "      <td>0.020091</td>\n",
-       "      <td>-0.017181</td>\n",
-       "      <td>0.027977</td>\n",
-       "      <td>-0.032007</td>\n",
-       "      <td>-0.005940</td>\n",
-       "      <td>0.013642</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 65 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   item_id         0         1         2         3         4         5  \\\n",
-       "0       29 -0.004295 -0.032384 -0.044917  0.039122 -0.016758 -0.059473   \n",
-       "1        4  0.007210 -0.004949 -0.021168  0.039533 -0.004339 -0.026979   \n",
-       "2       17  0.034115 -0.007572 -0.045769  0.038766 -0.018994 -0.003735   \n",
-       "3      155 -0.014619 -0.001738 -0.006829  0.019568 -0.025870 -0.043351   \n",
-       "4       19  0.027284 -0.039710 -0.013016 -0.021763 -0.019920 -0.019573   \n",
-       "\n",
-       "          6         7         8  ...        54        55        56        57  \\\n",
-       "0 -0.007811  0.004419 -0.044857  ...  0.019839  0.031614  0.066616 -0.023459   \n",
-       "1  0.018726 -0.034300 -0.010744  ...  0.021441 -0.008866  0.018915  0.001428   \n",
-       "2 -0.013748  0.003397  0.018028  ...  0.002467  0.029187 -0.023114  0.007315   \n",
-       "3  0.007577 -0.038977 -0.015209  ... -0.011659  0.011859 -0.004721  0.002480   \n",
-       "4  0.004436  0.005504 -0.018312  ...  0.007124  0.005688  0.018035  0.018919   \n",
-       "\n",
-       "         58        59        60        61        62        63  \n",
-       "0  0.039532 -0.025300  0.002040  0.010800 -0.031893  0.009897  \n",
-       "1  0.007287  0.003946 -0.029646 -0.023998 -0.021912  0.005516  \n",
-       "2  0.001796  0.013247  0.011309 -0.004574 -0.011722  0.004382  \n",
-       "3  0.040565 -0.023915 -0.039050 -0.013832 -0.028899  0.034076  \n",
-       "4  0.020091 -0.017181  0.027977 -0.032007 -0.005940  0.013642  \n",
-       "\n",
-       "[5 rows x 65 columns]"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "item_embeddings.head()"
+    "item_embeddings.tail()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
    "id": "66d7271e-0ea6-4568-ac5a-04089735f542",
    "metadata": {},
    "outputs": [],
@@ -1489,7 +1144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
    "id": "4ee27d67-e35a-42c5-8025-ed73f35c8e13",
    "metadata": {},
    "outputs": [],
@@ -1540,7 +1195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "id": "48a5927c-840d-410c-8f5b-bebce4f79640",
    "metadata": {},
    "outputs": [],
@@ -1591,19 +1246,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "id": "57133c1e-18d9-4ccb-9704-cdebd271985e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: seedir in /usr/local/lib/python3.8/dist-packages (0.4.2)\n",
-      "Requirement already satisfied: natsort in /usr/local/lib/python3.8/dist-packages (from seedir) (8.4.0)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# install seedir\n",
     "!pip install seedir"
@@ -1611,33 +1257,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "id": "986d53ea-c946-4046-a390-6d3b8801d280",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "feast_repo/\n",
-      "├─README.md\n",
-      "├─__init__.py\n",
-      "└─feature_repo/\n",
-      "  ├─__init__.py\n",
-      "  ├─__pycache__/\n",
-      "  │ ├─__init__.cpython-38.pyc\n",
-      "  │ ├─example_repo.cpython-38.pyc\n",
-      "  │ └─test_workflow.cpython-38.pyc\n",
-      "  ├─data/\n",
-      "  │ ├─item_features.parquet\n",
-      "  │ └─user_features.parquet\n",
-      "  ├─feature_store.yaml\n",
-      "  ├─item_features.py\n",
-      "  ├─test_workflow.py\n",
-      "  └─user_features.py\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import seedir as sd\n",
     "\n",
diff --git a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
index 965509a13..3d1d417b0 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
@@ -93,7 +93,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-21 21:37:09.169418: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:37: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _descriptor.FieldDescriptor(\n",
+      "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:30: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
+      "  _INTEGERSTATISTICS = _descriptor.Descriptor(\n",
+      "2023-06-29 19:13:17.254704: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
       "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_shape_pb2.py:18: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
       "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
@@ -115,28 +121,10 @@
       "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
       "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/resource_handle_pb2.py:39: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
       "  _descriptor.FieldDescriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/resource_handle_pb2.py:32: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  _RESOURCEHANDLEPROTO_DTYPEANDSHAPE = _descriptor.Descriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_pb2.py:21: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_pb2.py:40: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  _descriptor.FieldDescriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_pb2.py:33: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  _TENSORPROTO = _descriptor.Descriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/attr_value_pb2.py:21: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
-      "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/attr_value_pb2.py:40: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
-      "  _descriptor.FieldDescriptor(\n",
       "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
       "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
       "/usr/local/lib/python3.8/dist-packages/nvtabular/loader/__init__.py:19: DeprecationWarning: The `nvtabular.loader` module has moved to a new repository, at https://github.com/NVIDIA-Merlin/dataloader .  Support for importing from `nvtabular.loader` is deprecated, and will be removed in a future version. Please update your imports to refer to `merlinloader`.\n",
-      "  warnings.warn(\n",
-      "2023-06-21 21:37:11.305888: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
-      "2023-06-21 21:37:11.305925: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 2dca7910ae98\n",
-      "2023-06-21 21:37:11.305933: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 2dca7910ae98\n",
-      "2023-06-21 21:37:11.306011: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 525.85.12\n",
-      "2023-06-21 21:37:11.306030: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 515.65.1\n",
-      "2023-06-21 21:37:11.306037: E tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 515.65.1 does not match DSO version 525.85.12 -- cannot find working devices in this configuration\n"
+      "  warnings.warn(\n"
      ]
     }
    ],
@@ -198,8 +186,14 @@
      "output_type": "stream",
      "text": [
       "/raid/workshared/merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo/feature_repo\n",
-      "\u001b[1m\u001b[94mNo changes to registry\n",
-      "\u001b[1m\u001b[94mNo changes to infrastructure\n"
+      "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n",
+      "Created entity \u001b[1m\u001b[32muser_id\u001b[0m\n",
+      "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n",
+      "Created feature view \u001b[1m\u001b[32muser_features\u001b[0m\n",
+      "\n",
+      "Created sqlite table \u001b[1m\u001b[32mfeast_repo_item_features\u001b[0m\n",
+      "Created sqlite table \u001b[1m\u001b[32mfeast_repo_user_features\u001b[0m\n",
+      "\n"
      ]
     }
    ],
@@ -234,10 +228,10 @@
      "text": [
       "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views from \u001b[1m\u001b[32m1995-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32muser_features\u001b[0m:\n",
-      "100%|███████████████████████████████████████████████████████████| 457/457 [00:00<00:00, 2914.62it/s]\n",
       "\u001b[1m\u001b[32mitem_features\u001b[0m:\n",
-      "100%|███████████████████████████████████████████████████████████| 451/451 [00:00<00:00, 8542.45it/s]\n"
+      "100%|███████████████████████████████████████████████████████████| 450/450 [00:00<00:00, 5815.84it/s]\n",
+      "\u001b[1m\u001b[32muser_features\u001b[0m:\n",
+      "100%|███████████████████████████████████████████████████████████| 448/448 [00:00<00:00, 1758.64it/s]\n"
      ]
     }
    ],
@@ -344,7 +338,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "0b6cc5bf-d07c-4963-a748-6e2b4827ee36",
    "metadata": {},
    "outputs": [
@@ -352,16 +346,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING clustering 451 points to 32 centroids: please provide at least 1248 training points\n"
+      "WARNING clustering 450 points to 32 centroids: please provide at least 1248 training points\n"
      ]
     }
    ],
    "source": [
     "from merlin.systems.dag.ops.faiss import QueryFaiss, setup_faiss \n",
     "\n",
-    "item_embeddings = np.ascontiguousarray(\n",
-    "    pd.read_parquet(os.path.join(BASE_DIR, \"item_embeddings.parquet\")).to_numpy()\n",
-    ")\n",
+    "item_embeddings = pd.read_parquet(os.path.join(BASE_DIR, \"item_embeddings.parquet\"))\n",
     "setup_faiss(item_embeddings, faiss_index_path)"
    ]
   },
@@ -375,7 +367,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "3bc00e04-c70c-4882-9952-66f4dbb97bdc",
    "metadata": {},
    "outputs": [],
@@ -393,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "id": "3decbe7b-03e3-4978-baac-03f6a0b078c9",
    "metadata": {},
    "outputs": [
@@ -401,9 +393,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-21 21:37:18+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
+      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-29 19:14:10+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-21 21:37:18+00:00\u001b[0m:\n"
+      "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-29 19:14:10+00:00\u001b[0m:\n"
      ]
     },
     {
@@ -427,7 +419,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "id": "f11299b6-20d4-4687-bb0e-b855a9bcb9eb",
    "metadata": {},
    "outputs": [],
@@ -482,15 +474,25 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-21 21:37:19.332291: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+      "2023-06-29 19:14:11.423802: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-06-29 19:14:14.615977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:15:00.0, compute capability: 7.5\n",
+      "2023-06-29 19:14:14.616886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 24576 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:2d:00.0, compute capability: 7.5\n",
+      "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 52). These functions will not be directly callable after loading.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+      "INFO:tensorflow:Assets written to: /tmp/tmpqzazhnjq/assets\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /tmp/tmpqzazhnjq/assets\n"
      ]
     }
    ],
@@ -523,9 +525,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-21 21:37:21+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
+      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-29 19:14:18+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-21 21:37:21+00:00\u001b[0m:\n"
+      "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-29 19:14:18+00:00\u001b[0m:\n"
      ]
     },
     {
@@ -605,7 +607,29 @@
    "execution_count": 19,
    "id": "ce31723e-af4d-4827-bb60-3a9fafcd9da6",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 98). These functions will not be directly callable after loading.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /tmp/tmp6epm9p86/assets\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /tmp/tmp6epm9p86/assets\n"
+     ]
+    }
+   ],
    "source": [
     "ranking = combined_features >> PredictTensorflow(ranking_model_path)"
    ]
@@ -888,9 +912,9 @@
     {
      "data": {
       "text/plain": [
-       "{'ordered_ids': array([[343,  72, 248,  74,  91, 394, 194, 306, 333, 266]], dtype=int32),\n",
-       " 'ordered_scores': array([[0.49981913, 0.49877545, 0.49930254, 0.5005477 , 0.5007775 ,\n",
-       "         0.4999408 , 0.49992177, 0.50006884, 0.50042826, 0.4995823 ]],\n",
+       "{'ordered_ids': array([[ 52, 102,  42, 204, 312, 117, 414, 258,  14, 450]], dtype=int32),\n",
+       " 'ordered_scores': array([[0.5010059 , 0.5018582 , 0.5001918 , 0.50212526, 0.5004832 ,\n",
+       "         0.5006511 , 0.50049436, 0.5014268 , 0.5005215 , 0.5017036 ]],\n",
        "       dtype=float32)}"
       ]
      },

From 68b66af9358d73ce2e6c90346a08318e805deea9 Mon Sep 17 00:00:00 2001
From: Julio <jperez@nvidia.com>
Date: Thu, 29 Jun 2023 15:59:50 -0400
Subject: [PATCH 4/5] update notebooks with output

---
 ...ding-Recommender-Systems-with-Merlin.ipynb | 233 +++++++++++++-----
 ...lti-stage-RecSys-with-Merlin-Systems.ipynb |  82 +++---
 2 files changed, 208 insertions(+), 107 deletions(-)

diff --git a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
index 3dbe42dc5..9a0038917 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
@@ -146,7 +146,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-29 19:20:02.816099: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "2023-06-29 19:49:32.836544: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
       "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
       "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n"
@@ -167,12 +167,12 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-29 19:20:07.245419: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "2023-06-29 19:49:37.094972: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-06-29 19:20:08.267091: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
-      "2023-06-29 19:20:08.267138: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:15:00.0, compute capability: 7.5\n",
-      "2023-06-29 19:20:08.268109: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
-      "2023-06-29 19:20:08.268137: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 24576 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:2d:00.0, compute capability: 7.5\n",
+      "2023-06-29 19:49:38.134481: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+      "2023-06-29 19:49:38.134526: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:15:00.0, compute capability: 7.5\n",
+      "2023-06-29 19:49:38.135533: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+      "2023-06-29 19:49:38.135562: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 24576 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:2d:00.0, compute capability: 7.5\n",
       "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
@@ -235,7 +235,7 @@
     "DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data/\")\n",
     "# set up the base dir for feature store\n",
     "BASE_DIR = os.environ.get(\n",
-    "    \"BASE_DIR\", \"/raid/workshared/merlin/examples/Building-and-deploying-multi-stage-RecSys/\"\n",
+    "    \"BASE_DIR\", \"/Merlin/examples/Building-and-deploying-multi-stage-RecSys/\"\n",
     ")"
    ]
   },
@@ -424,7 +424,7 @@
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>7</td>\n",
-       "      <td>530</td>\n",
+       "      <td>590</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
@@ -433,11 +433,11 @@
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>154</td>\n",
-       "      <td>264</td>\n",
-       "      <td>28</td>\n",
-       "      <td>2023-06-29 19:20:20.311986</td>\n",
-       "      <td>2023-06-29 19:20:20.314307</td>\n",
+       "      <td>171</td>\n",
+       "      <td>293</td>\n",
+       "      <td>31</td>\n",
+       "      <td>2023-06-29 19:49:50.300270</td>\n",
+       "      <td>2023-06-29 19:49:50.303330</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -445,16 +445,16 @@
       ],
       "text/plain": [
        "   user_id  user_shops  user_profile  user_group  user_gender  user_age  \\\n",
-       "6        7         530             1           1            1         1   \n",
+       "6        7         590             1           1            1         1   \n",
        "\n",
        "   user_consumption_1  user_consumption_2  user_is_occupied  user_geography  \\\n",
        "6                   1                   1                 1               1   \n",
        "\n",
        "   user_intentions  user_brands  user_categories                   datetime  \\\n",
-       "6              154          264               28 2023-06-29 19:20:20.311986   \n",
+       "6              171          293               31 2023-06-29 19:49:50.300270   \n",
        "\n",
        "                     created  \n",
-       "6 2023-06-29 19:20:20.314307  "
+       "6 2023-06-29 19:49:50.303330  "
       ]
      },
      "execution_count": 11,
@@ -549,48 +549,48 @@
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>2023-06-29 19:20:20.413296</td>\n",
-       "      <td>2023-06-29 19:20:20.414521</td>\n",
+       "      <td>2023-06-29 19:49:50.410715</td>\n",
+       "      <td>2023-06-29 19:49:50.412307</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>2</td>\n",
-       "      <td>7</td>\n",
-       "      <td>457</td>\n",
-       "      <td>158</td>\n",
-       "      <td>73</td>\n",
-       "      <td>2023-06-29 19:20:20.413296</td>\n",
-       "      <td>2023-06-29 19:20:20.414521</td>\n",
+       "      <td>6</td>\n",
+       "      <td>412</td>\n",
+       "      <td>142</td>\n",
+       "      <td>66</td>\n",
+       "      <td>2023-06-29 19:49:50.410715</td>\n",
+       "      <td>2023-06-29 19:49:50.412307</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>3</td>\n",
-       "      <td>13</td>\n",
-       "      <td>914</td>\n",
-       "      <td>315</td>\n",
-       "      <td>146</td>\n",
-       "      <td>2023-06-29 19:20:20.413296</td>\n",
-       "      <td>2023-06-29 19:20:20.414521</td>\n",
+       "      <td>12</td>\n",
+       "      <td>824</td>\n",
+       "      <td>284</td>\n",
+       "      <td>132</td>\n",
+       "      <td>2023-06-29 19:49:50.410715</td>\n",
+       "      <td>2023-06-29 19:49:50.412307</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>4</td>\n",
-       "      <td>20</td>\n",
-       "      <td>1371</td>\n",
-       "      <td>473</td>\n",
-       "      <td>219</td>\n",
-       "      <td>2023-06-29 19:20:20.413296</td>\n",
-       "      <td>2023-06-29 19:20:20.414521</td>\n",
+       "      <td>18</td>\n",
+       "      <td>1236</td>\n",
+       "      <td>426</td>\n",
+       "      <td>197</td>\n",
+       "      <td>2023-06-29 19:49:50.410715</td>\n",
+       "      <td>2023-06-29 19:49:50.412307</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>5</td>\n",
-       "      <td>26</td>\n",
-       "      <td>1828</td>\n",
-       "      <td>630</td>\n",
-       "      <td>292</td>\n",
-       "      <td>2023-06-29 19:20:20.413296</td>\n",
-       "      <td>2023-06-29 19:20:20.414521</td>\n",
+       "      <td>24</td>\n",
+       "      <td>1648</td>\n",
+       "      <td>568</td>\n",
+       "      <td>263</td>\n",
+       "      <td>2023-06-29 19:49:50.410715</td>\n",
+       "      <td>2023-06-29 19:49:50.412307</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -599,17 +599,17 @@
       "text/plain": [
        "   item_id  item_category  item_shop  item_brand  item_intention  \\\n",
        "0        1              1          1           1               1   \n",
-       "1        2              7        457         158              73   \n",
-       "2        3             13        914         315             146   \n",
-       "3        4             20       1371         473             219   \n",
-       "4        5             26       1828         630             292   \n",
+       "1        2              6        412         142              66   \n",
+       "2        3             12        824         284             132   \n",
+       "3        4             18       1236         426             197   \n",
+       "4        5             24       1648         568             263   \n",
        "\n",
        "                    datetime                    created  \n",
-       "0 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
-       "1 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
-       "2 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
-       "3 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  \n",
-       "4 2023-06-29 19:20:20.413296 2023-06-29 19:20:20.414521  "
+       "0 2023-06-29 19:49:50.410715 2023-06-29 19:49:50.412307  \n",
+       "1 2023-06-29 19:49:50.410715 2023-06-29 19:49:50.412307  \n",
+       "2 2023-06-29 19:49:50.410715 2023-06-29 19:49:50.412307  \n",
+       "3 2023-06-29 19:49:50.410715 2023-06-29 19:49:50.412307  \n",
+       "4 2023-06-29 19:49:50.410715 2023-06-29 19:49:50.412307  "
       ]
      },
      "execution_count": 15,
@@ -890,13 +890,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "9/9 [==============================] - 10s 275ms/step - loss: 8.9538 - recall_at_10: 0.0055 - ndcg_at_10: 0.0038 - regularization_loss: 0.0000e+00 - loss_batch: 8.8710 - val_loss: 8.9181 - val_recall_at_10: 0.0165 - val_ndcg_at_10: 0.0109 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.5802\n"
+      "9/9 [==============================] - 11s 275ms/step - loss: 8.9538 - recall_at_10: 0.0101 - ndcg_at_10: 0.0067 - regularization_loss: 0.0000e+00 - loss_batch: 8.8711 - val_loss: 8.9179 - val_recall_at_10: 0.0212 - val_ndcg_at_10: 0.0155 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.5806\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.callbacks.History at 0x7f75811a0f70>"
+       "<keras.callbacks.History at 0x7fd4b04139d0>"
       ]
      },
      "execution_count": 26,
@@ -1038,13 +1038,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5/5 [==============================] - 5s 312ms/step - loss: 0.6931 - auc: 0.4991 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.4983 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6931\n"
+      "5/5 [==============================] - 5s 305ms/step - loss: 0.6932 - auc: 0.5005 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.5029 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6931\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.callbacks.History at 0x7f75507fd670>"
+       "<keras.callbacks.History at 0x7fd449398a30>"
       ]
      },
      "execution_count": 31,
@@ -1067,7 +1067,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "id": "00447c12-ea80-4d98-ab47-cc1a982a6958",
    "metadata": {},
    "outputs": [],
@@ -1093,7 +1093,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 33,
    "id": "e62f65f8-e8f1-447e-9500-5960807c36f2",
    "metadata": {},
    "outputs": [],
@@ -1107,17 +1107,86 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "id": "e02f0957-6665-400a-80c0-60b307466caf",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>item_id</th>\n",
+       "      <th>output_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>453</th>\n",
+       "      <td>945</td>\n",
+       "      <td>[0.012117806822061539, -0.02241620607674122, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>454</th>\n",
+       "      <td>948</td>\n",
+       "      <td>[0.012117806822061539, -0.02241620607674122, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>455</th>\n",
+       "      <td>956</td>\n",
+       "      <td>[0.012117806822061539, -0.02241620607674122, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>456</th>\n",
+       "      <td>1437</td>\n",
+       "      <td>[0.012117806822061539, -0.02241620607674122, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>457</th>\n",
+       "      <td>1469</td>\n",
+       "      <td>[0.012117806822061539, -0.02241620607674122, 0...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     item_id                                           output_1\n",
+       "453      945  [0.012117806822061539, -0.02241620607674122, 0...\n",
+       "454      948  [0.012117806822061539, -0.02241620607674122, 0...\n",
+       "455      956  [0.012117806822061539, -0.02241620607674122, 0...\n",
+       "456     1437  [0.012117806822061539, -0.02241620607674122, 0...\n",
+       "457     1469  [0.012117806822061539, -0.02241620607674122, 0..."
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "item_embeddings.tail()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "id": "66d7271e-0ea6-4568-ac5a-04089735f542",
    "metadata": {},
    "outputs": [],
@@ -1144,7 +1213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "id": "4ee27d67-e35a-42c5-8025-ed73f35c8e13",
    "metadata": {},
    "outputs": [],
@@ -1195,7 +1264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "id": "48a5927c-840d-410c-8f5b-bebce4f79640",
    "metadata": {},
    "outputs": [],
@@ -1246,10 +1315,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "id": "57133c1e-18d9-4ccb-9704-cdebd271985e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: seedir in /usr/local/lib/python3.8/dist-packages (0.4.2)\n",
+      "Requirement already satisfied: natsort in /usr/local/lib/python3.8/dist-packages (from seedir) (8.4.0)\n"
+     ]
+    }
+   ],
    "source": [
     "# install seedir\n",
     "!pip install seedir"
@@ -1257,10 +1335,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "id": "986d53ea-c946-4046-a390-6d3b8801d280",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "feast_repo/\n",
+      "├─README.md\n",
+      "├─__init__.py\n",
+      "└─feature_repo/\n",
+      "  ├─__init__.py\n",
+      "  ├─__pycache__/\n",
+      "  │ ├─__init__.cpython-38.pyc\n",
+      "  │ ├─example_repo.cpython-38.pyc\n",
+      "  │ └─test_workflow.cpython-38.pyc\n",
+      "  ├─data/\n",
+      "  │ ├─item_features.parquet\n",
+      "  │ └─user_features.parquet\n",
+      "  ├─feature_store.yaml\n",
+      "  ├─item_features.py\n",
+      "  ├─test_workflow.py\n",
+      "  └─user_features.py\n"
+     ]
+    }
+   ],
    "source": [
     "import seedir as sd\n",
     "\n",
diff --git a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
index 3d1d417b0..15f0060d3 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
@@ -99,7 +99,7 @@
       "  _descriptor.FieldDescriptor(\n",
       "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:30: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
       "  _INTEGERSTATISTICS = _descriptor.Descriptor(\n",
-      "2023-06-29 19:13:17.254704: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "2023-06-29 19:50:56.885234: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
       "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_shape_pb2.py:18: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n",
       "  DESCRIPTOR = _descriptor.FileDescriptor(\n",
@@ -186,10 +186,10 @@
      "output_type": "stream",
      "text": [
       "/raid/workshared/merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo/feature_repo\n",
-      "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n",
       "Created entity \u001b[1m\u001b[32muser_id\u001b[0m\n",
-      "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n",
+      "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n",
       "Created feature view \u001b[1m\u001b[32muser_features\u001b[0m\n",
+      "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n",
       "\n",
       "Created sqlite table \u001b[1m\u001b[32mfeast_repo_item_features\u001b[0m\n",
       "Created sqlite table \u001b[1m\u001b[32mfeast_repo_user_features\u001b[0m\n",
@@ -228,10 +228,10 @@
      "text": [
       "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views from \u001b[1m\u001b[32m1995-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32mitem_features\u001b[0m:\n",
-      "100%|███████████████████████████████████████████████████████████| 450/450 [00:00<00:00, 5815.84it/s]\n",
       "\u001b[1m\u001b[32muser_features\u001b[0m:\n",
-      "100%|███████████████████████████████████████████████████████████| 448/448 [00:00<00:00, 1758.64it/s]\n"
+      "100%|███████████████████████████████████████████████████████████| 460/460 [00:00<00:00, 2521.27it/s]\n",
+      "\u001b[1m\u001b[32mitem_features\u001b[0m:\n",
+      "100%|███████████████████████████████████████████████████████████| 458/458 [00:00<00:00, 3335.12it/s]\n"
      ]
     }
    ],
@@ -338,7 +338,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "0b6cc5bf-d07c-4963-a748-6e2b4827ee36",
    "metadata": {},
    "outputs": [
@@ -346,7 +346,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING clustering 450 points to 32 centroids: please provide at least 1248 training points\n"
+      "WARNING clustering 458 points to 32 centroids: please provide at least 1248 training points\n"
      ]
     }
    ],
@@ -354,7 +354,7 @@
     "from merlin.systems.dag.ops.faiss import QueryFaiss, setup_faiss \n",
     "\n",
     "item_embeddings = pd.read_parquet(os.path.join(BASE_DIR, \"item_embeddings.parquet\"))\n",
-    "setup_faiss(item_embeddings, faiss_index_path)"
+    "setup_faiss(item_embeddings, faiss_index_path, embedding_column=\"output_1\")"
    ]
   },
   {
@@ -367,7 +367,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "3bc00e04-c70c-4882-9952-66f4dbb97bdc",
    "metadata": {},
    "outputs": [],
@@ -385,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "id": "3decbe7b-03e3-4978-baac-03f6a0b078c9",
    "metadata": {},
    "outputs": [
@@ -393,9 +393,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-29 19:14:10+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
+      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-29 19:51:06+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-29 19:14:10+00:00\u001b[0m:\n"
+      "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-29 19:51:06+00:00\u001b[0m:\n"
      ]
     },
     {
@@ -419,7 +419,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "id": "f11299b6-20d4-4687-bb0e-b855a9bcb9eb",
    "metadata": {},
    "outputs": [],
@@ -441,7 +441,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "id": "21139caa-3a51-42e6-b006-21a92c95f1bc",
    "metadata": {},
    "outputs": [
@@ -451,7 +451,7 @@
        "<function tensorflow.python.dlpack.dlpack.from_dlpack(dlcapsule)>"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -466,7 +466,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "id": "47c2d9b1-51dc-4549-977d-d7941ee6486c",
    "metadata": {},
    "outputs": [
@@ -474,10 +474,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-06-29 19:14:11.423802: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "2023-06-29 19:51:07.269579: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-06-29 19:14:14.615977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:15:00.0, compute capability: 7.5\n",
-      "2023-06-29 19:14:14.616886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 24576 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:2d:00.0, compute capability: 7.5\n",
+      "2023-06-29 19:51:10.430459: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:15:00.0, compute capability: 7.5\n",
+      "2023-06-29 19:51:10.431356: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 24576 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:2d:00.0, compute capability: 7.5\n",
       "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 52). These functions will not be directly callable after loading.\n"
      ]
     },
@@ -485,14 +485,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmpqzazhnjq/assets\n"
+      "INFO:tensorflow:Assets written to: /tmp/tmpdalflmaz/assets\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmpqzazhnjq/assets\n"
+      "INFO:tensorflow:Assets written to: /tmp/tmpdalflmaz/assets\n"
      ]
     }
    ],
@@ -517,7 +517,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "id": "b270f663-0ae1-4356-acd4-5f8c986abf4d",
    "metadata": {},
    "outputs": [
@@ -525,9 +525,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-29 19:14:18+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
+      "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-29 19:51:14+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n",
       "\n",
-      "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-29 19:14:18+00:00\u001b[0m:\n"
+      "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-29 19:51:14+00:00\u001b[0m:\n"
      ]
     },
     {
@@ -550,7 +550,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "id": "0d0a4531-665c-48a1-98a9-216c955449b7",
    "metadata": {},
    "outputs": [],
@@ -569,7 +569,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "id": "eb0ef434-03a5-4a36-afb9-e19a43243c64",
    "metadata": {},
    "outputs": [],
@@ -604,7 +604,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "id": "ce31723e-af4d-4827-bb60-3a9fafcd9da6",
    "metadata": {},
    "outputs": [
@@ -619,14 +619,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmp6epm9p86/assets\n"
+      "INFO:tensorflow:Assets written to: /tmp/tmpqdd_jn5e/assets\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Assets written to: /tmp/tmp6epm9p86/assets\n"
+      "INFO:tensorflow:Assets written to: /tmp/tmpqdd_jn5e/assets\n"
      ]
     }
    ],
@@ -644,7 +644,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
    "id": "7f65598b-e3e7-4238-a73e-19d00c3deb26",
    "metadata": {},
    "outputs": [],
@@ -676,7 +676,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "id": "b28c452f-543c-45a4-9995-130ca6919669",
    "metadata": {},
    "outputs": [],
@@ -695,7 +695,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 21,
    "id": "9c8b7b94-5559-4587-a272-4d9de2d53dd1",
    "metadata": {},
    "outputs": [],
@@ -709,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 22,
    "id": "6c64d686-aed5-42f8-b517-482b4237c69f",
    "metadata": {},
    "outputs": [
@@ -743,7 +743,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 23,
    "id": "89182219-40a6-458c-af0e-7a8e83f364aa",
    "metadata": {},
    "outputs": [
@@ -872,7 +872,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 24,
    "id": "d08a8975-9c32-467b-99ec-df66319f854b",
    "metadata": {},
    "outputs": [
@@ -905,20 +905,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 25,
    "id": "74ec62f2-5935-45c6-8058-e1cdade6f80f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'ordered_ids': array([[ 52, 102,  42, 204, 312, 117, 414, 258,  14, 450]], dtype=int32),\n",
-       " 'ordered_scores': array([[0.5010059 , 0.5018582 , 0.5001918 , 0.50212526, 0.5004832 ,\n",
-       "         0.5006511 , 0.50049436, 0.5014268 , 0.5005215 , 0.5017036 ]],\n",
+       "{'ordered_ids': array([[100, 168, 324,  79, 361, 294, 267, 289, 397, 189]], dtype=int32),\n",
+       " 'ordered_scores': array([[0.5016385 , 0.50176895, 0.5017176 , 0.5024097 , 0.5018236 ,\n",
+       "         0.5018286 , 0.50162375, 0.5015677 , 0.50175667, 0.5014358 ]],\n",
        "       dtype=float32)}"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }

From 3a70d016432c529170fbae437a8b52594a47a6d6 Mon Sep 17 00:00:00 2001
From: Julio <jperez@nvidia.com>
Date: Tue, 4 Jul 2023 16:09:34 -0400
Subject: [PATCH 5/5] add data folder env for notebook 2 and fix unit test

---
 ...lti-stage-RecSys-with-Merlin-Systems.ipynb | 35 ++++++++++++++++++-
 ...t_building_deploying_multi_stage_RecSys.py |  2 +-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
index 15f0060d3..e97257251 100644
--- a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
+++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb
@@ -27,6 +27,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "03166488-1651-4025-84ed-4e9e5db34933",
    "metadata": {},
@@ -43,6 +44,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "38d75184-cd24-4fe3-90f4-d76028626576",
    "metadata": {},
@@ -51,6 +53,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "da9dadb5-6eec-4a1b-99f9-929523f5cc07",
    "metadata": {},
@@ -59,6 +62,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "538677a3-acc6-48f6-acb6-d5bb5fe2e2d2",
    "metadata": {},
@@ -67,6 +71,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "a27e18d7-b3e4-481c-b69e-23193b212c56",
    "metadata": {},
@@ -145,6 +150,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "55ead20e-c573-462e-9aa2-c3494bf0129f",
    "metadata": {},
@@ -153,6 +159,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e2ac115e-4794-4a69-a962-8481f6e86df3",
    "metadata": {},
@@ -170,6 +177,8 @@
    "outputs": [],
    "source": [
     "BASE_DIR = os.environ.get(\"BASE_DIR\", \"/Merlin/examples/Building-and-deploying-multi-stage-RecSys/\")\n",
+    "DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data/\")\n",
+    "\n",
     "\n",
     "# define feature repo path\n",
     "feast_repo_path = os.path.join(BASE_DIR, \"feast_repo/feature_repo/\")"
@@ -203,6 +212,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c641fcd2-bd11-4569-80d4-2ae5e01a5cad",
    "metadata": {},
@@ -240,6 +250,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8fcc26e6-f6f3-4e44-bf3c-3b8e66dc9fd6",
    "metadata": {},
@@ -280,6 +291,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e768637c-0a4d-404b-8b58-7182fef0ab0e",
    "metadata": {},
@@ -288,6 +300,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "efada1e1-2556-4a26-b0ba-9cb96b3b151f",
    "metadata": {},
@@ -307,6 +320,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "2aa037c0-7dad-427c-98bb-3da413e8fd14",
    "metadata": {},
@@ -327,6 +341,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8b996019-bd2a-44e0-b004-4f412b300d63",
    "metadata": {},
@@ -358,6 +373,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "46697177-512a-473e-8cca-9fe51d3daa03",
    "metadata": {},
@@ -376,6 +392,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "5c45df06-0cbe-4b52-ac1f-786e763895d7",
    "metadata": {},
@@ -426,12 +443,13 @@
    "source": [
     "from nvtabular import Workflow\n",
     "\n",
-    "nvt_workflow = Workflow.load('/workspace/data/processed_nvt/workflow')\n",
+    "nvt_workflow = Workflow.load(os.path.join(DATA_FOLDER, 'processed_nvt/workflow'))\n",
     "user_subgraph = nvt_workflow.get_subworkflow(\"user\")\n",
     "user_features = user_attributes >> TransformWorkflow(user_subgraph)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "27e25be7-3ff0-49c2-a3fc-03ec4d615e77",
    "metadata": {},
@@ -508,6 +526,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8ce4429c-1fe1-4304-bcdf-badebe3b5485",
    "metadata": {},
@@ -560,6 +579,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "304a4d09-db05-4666-b520-75dbbbc7ab17",
    "metadata": {},
@@ -595,6 +615,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7fb0ce66-6b6c-43be-885e-a5435c3bbd9e",
    "metadata": {},
@@ -635,6 +656,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7f86fa47-de61-4007-ab55-9076e12ce963",
    "metadata": {},
@@ -656,6 +678,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f4e2e389-d884-44a1-8e32-4916a0eb43cf",
    "metadata": {},
@@ -667,6 +690,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "50bc2e4f-5e58-4ad4-8ae5-d79ad286978f",
    "metadata": {},
@@ -686,6 +710,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "a061bd82-e553-4823-8d14-3ae88a458c14",
    "metadata": {},
@@ -734,6 +759,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "276eedd8-5dc0-4ad0-8725-c8da60fea693",
    "metadata": {},
@@ -823,6 +849,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "fe7962cc-f26d-4a4a-b5a3-d214e0f37456",
    "metadata": {
@@ -833,6 +860,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8c07c620-7d6c-4275-87fe-e5b94335bdb9",
    "metadata": {},
@@ -847,6 +875,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "6c0a798f-6abf-4cbb-87f8-f60a6e757092",
    "metadata": {},
@@ -855,6 +884,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "3b0794b1-b9e0-4508-bf6e-cc823ac5c693",
    "metadata": {},
@@ -863,6 +893,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "af9efbde-4dac-42f1-9ace-096f75bac2b5",
    "metadata": {},
@@ -896,6 +927,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "28e9e27f-6658-4302-b142-08b05215e48f",
    "metadata": {},
@@ -929,6 +961,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "b4605dbe-5f97-4b31-8ee4-ce7c1cb69d97",
    "metadata": {},
diff --git a/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py b/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py
index 435742499..138ea554f 100644
--- a/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py
+++ b/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py
@@ -74,7 +74,7 @@ def test_func(tmpdir):
             df_lib = get_lib()
             train = df_lib.read_parquet(
                 os.path.join("{tmpdir / "data"}/processed_nvt/", "train", "part_0.parquet"),
-                columns=["user_id_raw"],
+                columns=["user_id"],
             )
             batch = train[:1]
             from merlin.systems.triton.utils import run_ensemble_on_tritonserver