NVIDIA-Merlin · jperez999 · Jul 12, 2022 · May 13, 2022 · May 13, 2022 · May 18, 2022
diff --git a/ci/test_integration.sh b/ci/test_integration.sh
@@ -0,0 +1,20 @@
+#
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#!/bin/bash
+set -e
+
+pytest -rsx ./tests/integration/
diff --git a/...ilding-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/...ilding-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb
@@ -126,7 +126,24 @@
    "outputs": [
     {
      "data": {
-      "application/javascript": "\n            setTimeout(function() {\n                var nbb_cell_id = 1;\n                var nbb_unformatted_code = \"%load_ext nb_black\\n# for running this example on GPU, install the following libraries\\n# %pip install tensorflow \\\"feast<0.20\\\" faiss-gpu\\n\\n# for running this example on CPU, uncomment the following lines\\n# %pip install tensorflow-cpu \\\"feast<0.20\\\" faiss-cpu\\n# %pip uninstall cudf\";\n                var nbb_formatted_code = \"%load_ext nb_black\\n# for running this example on GPU, install the following libraries\\n# %pip install tensorflow \\\"feast<0.20\\\" faiss-gpu\\n\\n# for running this example on CPU, uncomment the following lines\\n# %pip install tensorflow-cpu \\\"feast<0.20\\\" faiss-cpu\\n# %pip uninstall cudf\";\n                var nbb_cells = Jupyter.notebook.get_cells();\n                for (var i = 0; i < nbb_cells.length; ++i) {\n                    if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n                        if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n                             nbb_cells[i].set_text(nbb_formatted_code);\n                        }\n                        break;\n                    }\n                }\n            }, 500);\n            ",
+      "application/javascript": [
+       "\n",
+       "            setTimeout(function() {\n",
+       "                var nbb_cell_id = 1;\n",
+       "                var nbb_unformatted_code = \"%load_ext nb_black\\n# for running this example on GPU, install the following libraries\\n# %pip install tensorflow \\\"feast<0.20\\\" faiss-gpu\\n\\n# for running this example on CPU, uncomment the following lines\\n# %pip install tensorflow-cpu \\\"feast<0.20\\\" faiss-cpu\\n# %pip uninstall cudf\";\n",
+       "                var nbb_formatted_code = \"%load_ext nb_black\\n# for running this example on GPU, install the following libraries\\n# %pip install tensorflow \\\"feast<0.20\\\" faiss-gpu\\n\\n# for running this example on CPU, uncomment the following lines\\n# %pip install tensorflow-cpu \\\"feast<0.20\\\" faiss-cpu\\n# %pip uninstall cudf\";\n",
+       "                var nbb_cells = Jupyter.notebook.get_cells();\n",
+       "                for (var i = 0; i < nbb_cells.length; ++i) {\n",
+       "                    if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
+       "                        if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
+       "                             nbb_cells[i].set_text(nbb_formatted_code);\n",
+       "                        }\n",
+       "                        break;\n",
+       "                    }\n",
+       "                }\n",
+       "            }, 500);\n",
+       "            "
+      ],
       "text/plain": [
        "<IPython.core.display.Javascript object>"
       ]
@@ -234,7 +251,7 @@
    "source": [
     "from merlin.datasets.synthetic import generate_data\n",
     "\n",
-    "NUM_ROWS = 100000\n",
+    "NUM_ROWS = os.environ.get(\"NUM_ROWS\", 100_000)\n",
     "train, valid = generate_data(\"aliccp-raw\", int(NUM_ROWS), set_sizes=(0.7, 0.3))"
    ]
   },
@@ -677,6 +694,7 @@
     }
    ],
    "source": [
+    "!rm -rf $BASE_DIR/feature_repo\n",
     "!cd $BASE_DIR && feast init feature_repo"
    ]
   },
@@ -1712,7 +1730,7 @@
    "hash": "2758ff992bb32b90e83258e2e763c5fcee80c4002721441c6c0d17c649a641dd"
   },
   "kernelspec": {
-   "display_name": "Python 3.9.5 ('base')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1726,7 +1744,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.5"
+   "version": "3.8.10"
   },
   "merlin": {
    "containers": [

diff --git a/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py b/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py
@@ -0,0 +1,87 @@
+import os
+
+from testbook import testbook
+
+from tests.conftest import REPO_ROOT
+
+import pytest
+
+pytest.importorskip("tensorflow")
+pytest.importorskip("feast")
+pytest.importorskip("faiss")
+
+@testbook(
+    REPO_ROOT
+    / "examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb",
+    execute=False,
+    timeout=2400
+)
+def test_func(tb2):
+    with testbook(
+        REPO_ROOT
+        / "examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb",
+        execute=False,
+        timeout=450
+    ) as tb1:
+        tb1.inject(
+            """
+            import os
+            os.environ["DATA_FOLDER"] = "/tmp/data/"
+            os.system("mkdir -p /tmp/examples")
+            os.environ["BASE_DIR"] = "/tmp/examples/"
+            """
+        )
+        tb1.execute_cell(list(range(0, 16)))
+        tb1.execute_cell(list(range(17, 22)))
+        tb1.inject("""
+                from pathlib import Path
+                from merlin.datasets.ecommerce import transform_aliccp
+
+                import glob
+
+                #transform_aliccp(Path('/raid/data/aliccp'), output_path, nvt_workflow=outputs, workflow_name='workflow_ranking')
+                train = Dataset(sorted(glob.glob('/raid/data/aliccp/train/*.parquet'))[0:2])
+                valid = Dataset(sorted(glob.glob('/raid/data/aliccp/test/*.parquet'))[0:2])
+
+                transform_aliccp(
+                    (train, valid), output_path, nvt_workflow=outputs, workflow_name="workflow_ranking"
+                )
+        """)
+        tb1.execute_cell(list(range(23, len(tb1.cells))))
+
+        assert os.path.isdir("/tmp/examples/dlrm")
+        assert os.path.isdir("/tmp/examples/feature_repo")
+        assert os.path.isdir("/tmp/examples/query_tower")
+        assert os.path.isfile("/tmp/examples/item_embeddings.parquet")
+        assert os.path.isfile("/tmp/examples/feature_repo/user_features.py")
+        assert os.path.isfile("/tmp/examples/feature_repo/item_features.py")
+
+    tb2.inject(
+        """
+        import os
+        os.environ["DATA_FOLDER"] = "/tmp/data/"
+        os.environ["BASE_DIR"] = "/tmp/examples/"
+        """
+    )
+    NUM_OF_CELLS = len(tb2.cells)
+    tb2.execute_cell(list(range(0, NUM_OF_CELLS - 3)))
+    top_k = tb2.ref("top_k")
+    outputs = tb2.ref("outputs")
+    request = tb2.ref("request")
+    assert outputs[0] == "ordered_ids"
+    tb2.inject(
+        """
+        import shutil
+        from merlin.models.loader.tf_utils import configure_tensorflow
+        configure_tensorflow()
+        from merlin.systems.triton.utils import run_ensemble_on_tritonserver
+        response = run_ensemble_on_tritonserver(
+            "/tmp/examples/poc_ensemble", outputs, request, "ensemble_model"
+        )
+        response = [x.tolist()[0] for x in response["ordered_ids"]]
+        shutil.rmtree("/tmp/examples/", ignore_errors=True)
+        """
+    )
+    tb2.execute_cell(NUM_OF_CELLS - 2)
+    response = tb2.ref("response")
+    assert len(response) == top_k
diff --git a/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py b/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py
@@ -10,7 +10,6 @@
 pytest.importorskip("feast")
 pytest.importorskip("faiss")
 
-
 def test_func():
     with testbook(
         REPO_ROOT