From cfd1434b00d4242ad025a1be08dc8aa81e95e532 Mon Sep 17 00:00:00 2001 From: qqiao Date: Tue, 13 Jun 2023 02:56:04 -0700 Subject: [PATCH 01/13] Update the dockerfile for new upstream --- docker/dockerfile.merlin | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/docker/dockerfile.merlin b/docker/dockerfile.merlin index 153f84f39..3606957b9 100644 --- a/docker/dockerfile.merlin +++ b/docker/dockerfile.merlin @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.2 -ARG TRITON_VERSION=23.03 -ARG DLFW_VERSION=23.03 +ARG TRITON_VERSION=23.05 +ARG DLFW_VERSION=23.05 ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-min @@ -40,10 +40,10 @@ ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin # Set up NVIDIA package repository RUN apt clean && apt update -y --fix-missing && \ apt install -y --no-install-recommends software-properties-common && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ + mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" && \ apt install -y --no-install-recommends \ autoconf \ automake \ @@ -93,7 +93,7 @@ RUN ln -s /usr/bin/python3 /usr/bin/python RUN pip install --no-cache-dir --upgrade pip; pip install --no-cache-dir "cmake<3.25.0" ninja scikit-build pandas==1.5.2 \ fastrlock nvidia-pyindex pybind11 pytest \ - transformers==4.12 tensorflow-metadata betterproto \ + transformers tensorflow-metadata betterproto \ cachetools graphviz nvtx scipy "scikit-learn<1.2" \ tritonclient[all]==2.29.0 grpcio-channelz fiddle wandb npy-append-array \ git+https://github.com/rapidsai/asvdb.git@main \ @@ -101,7 +101,7 @@ RUN pip install --no-cache-dir --upgrade pip; pip install --no-cache-dir "cmake< lightfm implicit \ numba "cuda-python>=11.5,<12.0" fsspec==2022.5.0 llvmlite \ pynvml==11.4.1 -RUN pip install --no-cache-dir numpy==1.22.4 protobuf==3.20.3 onnx onnxruntime==1.11.1 pycuda +RUN pip install --no-cache-dir numpy==1.22.4 protobuf==3.20.3 onnx onnxruntime pycuda RUN pip install --no-cache-dir dask==${DASK_VER} distributed==${DASK_VER} dask[dataframe]==${DASK_VER} RUN pip install --no-cache-dir onnx_graphsurgeon --index-url https://pypi.ngc.nvidia.com @@ -134,9 +134,9 @@ RUN git clone --branch v1.7.2 https://github.com/facebookresearch/faiss.git buil rm -rf build-env # Install spdlog -RUN git clone --branch v1.9.2 https://github.com/gabime/spdlog.git build-env && \ +RUN git clone --branch v1.11.0 https://github.com/gabime/spdlog.git build-env && \ pushd build-env && \ - mkdir build && cd build && cmake .. && make -j && make install && \ + mkdir build && cd build && cmake -DSPDLOG_BUILD_SHARED=ON .. && make -j && make install && \ popd && \ rm -rf build-env @@ -159,10 +159,10 @@ ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin # Set up NVIDIA package repository RUN apt update -y --fix-missing && \ apt install -y --no-install-recommends software-properties-common && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ + mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" && \ apt install -y --no-install-recommends \ ca-certificates \ clang-format \ @@ -219,6 +219,8 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JAVA_HOME}/lib:${JAVA_HOME}/lib/server # Includes COPY --chown=1000:1000 --from=build /usr/local/include/spdlog/ /usr/local/include/spdlog/ +COPY --chown=1000:1000 --from=build /usr/local/lib/libspdlog* /usr/local/lib/ +COPY --chown=1000:1000 --from=build /usr/local/lib/cmake/spdlog /usr/local/lib/cmake/spdlog/ # Binaries COPY --chown=1000:1000 --from=build /usr/local/bin/cmake /usr/local/bin/ @@ -245,9 +247,11 @@ COPY --chown=1000:1000 --from=triton /usr/local/cuda-12.1/targets/x86_64-linux/l ENV PATH=/opt/tritonserver/bin:${PATH}: ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/tritonserver/lib +ARG PYTHON_VERSION=3.10 + # Python Packages -COPY --chown=1000:1000 --from=build /usr/local/lib/python3.8/dist-packages /usr/local/lib/python3.8/dist-packages/ -ENV PYTHONPATH=$PYTHONPATH:/usr/local/lib/python3.8/dist-packages/ +COPY --chown=1000:1000 --from=build /usr/local/lib/python${PYTHON_VERSION}/dist-packages /usr/local/lib/python${PYTHON_VERSION}/dist-packages/ +ENV PYTHONPATH=$PYTHONPATH:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/ # rapids components from the DLFW image @@ -261,12 +265,13 @@ COPY --chown=1000:1000 --from=dlfw /usr/include/parquet /usr/include/parquet/ COPY --chown=1000:1000 --from=dlfw /usr/include/arrow /usr/include/arrow/ COPY --chown=1000:1000 --from=dlfw /usr/include/cudf /usr/include/cudf/ COPY --chown=1000:1000 --from=dlfw /usr/include/rmm /usr/include/rmm/ +COPY --chown=1000:1000 --from=dlfw /usr/include/fmt /usr/include/fmt/ + # ptx compiler required by cubinlinker COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h RUN git clone https://github.com/rapidsai/ptxcompiler.git /ptx && cd /ptx/ && python setup.py develop; -ARG PYTHON_VERSION=3.8 COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cuda /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cuda COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow @@ -383,6 +388,7 @@ RUN if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \ git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \ cd /hugectr && \ git submodule update --init --recursive && \ + cd third_party/librdkafka && ./configure && make -j$(nproc) && make install && cd ../.. && \ mkdir build && \ cd build && \ if [[ "${INSTALL_HDFS}" == "false" ]]; then \ From 8b0f75302f7f2ab9e4c6d3b3e0327a1528b19542 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Wed, 14 Jun 2023 11:15:45 -0700 Subject: [PATCH 02/13] Updates to make build compatible with 23.05 base image. --- docker/dockerfile.merlin | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/docker/dockerfile.merlin b/docker/dockerfile.merlin index 3606957b9..ccf1075d6 100644 --- a/docker/dockerfile.merlin +++ b/docker/dockerfile.merlin @@ -125,7 +125,7 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/tritonserver/lib # don't include it https://github.com/kyamagu/faiss-wheels/issues/54) RUN git clone --branch v1.7.2 https://github.com/facebookresearch/faiss.git build-env && \ pushd build-env && \ - cmake -B build . -DFAISS_ENABLE_GPU=ON -DFAISS_ENABLE_PYTHON=ON -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES="60;70;80" && \ + cmake -B build . -DFAISS_ENABLE_GPU=ON -DFAISS_ENABLE_PYTHON=ON -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES="60;70;80;90" && \ make -C build -j $(nproc) faiss swigfaiss && \ pushd build/faiss/python && \ python setup.py install && \ @@ -133,13 +133,6 @@ RUN git clone --branch v1.7.2 https://github.com/facebookresearch/faiss.git buil popd && \ rm -rf build-env -# Install spdlog -RUN git clone --branch v1.11.0 https://github.com/gabime/spdlog.git build-env && \ - pushd build-env && \ - mkdir build && cd build && cmake -DSPDLOG_BUILD_SHARED=ON .. && make -j && make install && \ - popd && \ - rm -rf build-env - # Clean up RUN rm -rf /repos @@ -217,11 +210,6 @@ RUN ln -s /usr/bin/python3 /usr/bin/python ENV JAVA_HOME=/usr/lib/jvm/default-java ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JAVA_HOME}/lib:${JAVA_HOME}/lib/server -# Includes -COPY --chown=1000:1000 --from=build /usr/local/include/spdlog/ /usr/local/include/spdlog/ -COPY --chown=1000:1000 --from=build /usr/local/lib/libspdlog* /usr/local/lib/ -COPY --chown=1000:1000 --from=build /usr/local/lib/cmake/spdlog /usr/local/lib/cmake/spdlog/ - # Binaries COPY --chown=1000:1000 --from=build /usr/local/bin/cmake /usr/local/bin/ COPY --chown=1000:1000 --from=build /usr/local/bin/pytest /usr/local/bin/ @@ -247,7 +235,7 @@ COPY --chown=1000:1000 --from=triton /usr/local/cuda-12.1/targets/x86_64-linux/l ENV PATH=/opt/tritonserver/bin:${PATH}: ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/tritonserver/lib -ARG PYTHON_VERSION=3.10 +ENV PYTHON_VERSION=3.10 # Python Packages COPY --chown=1000:1000 --from=build /usr/local/lib/python${PYTHON_VERSION}/dist-packages /usr/local/lib/python${PYTHON_VERSION}/dist-packages/ @@ -261,11 +249,13 @@ COPY --chown=1000:1000 --from=dlfw /usr/lib/libparquet* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/Arrow /usr/lib/cmake/Arrow/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/Parquet /usr/lib/cmake/Parquet/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libnvcomp* /usr/lib/ + +COPY --chown=1000:1000 --from=dlfw /usr/include/fmt /usr/include/fmt/ +COPY --chown=1000:1000 --from=dlfw /usr/include/spdlog /usr/include/spdlog/ +COPY --chown=1000:1000 --from=dlfw /usr/include/rmm /usr/include/rmm/ COPY --chown=1000:1000 --from=dlfw /usr/include/parquet /usr/include/parquet/ COPY --chown=1000:1000 --from=dlfw /usr/include/arrow /usr/include/arrow/ COPY --chown=1000:1000 --from=dlfw /usr/include/cudf /usr/include/cudf/ -COPY --chown=1000:1000 --from=dlfw /usr/include/rmm /usr/include/rmm/ -COPY --chown=1000:1000 --from=dlfw /usr/include/fmt /usr/include/fmt/ # ptx compiler required by cubinlinker COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a @@ -388,7 +378,6 @@ RUN if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \ git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \ cd /hugectr && \ git submodule update --init --recursive && \ - cd third_party/librdkafka && ./configure && make -j$(nproc) && make install && cd ../.. && \ mkdir build && \ cd build && \ if [[ "${INSTALL_HDFS}" == "false" ]]; then \ From ba943fac0094426e2dd3b16476f83822a9415f6b Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Tue, 20 Jun 2023 06:02:04 -0700 Subject: [PATCH 03/13] Simplified build process for HugeCTR training image. --- docker/dockerfile.ctr | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/docker/dockerfile.ctr b/docker/dockerfile.ctr index e054f0ad5..7f3c8c918 100644 --- a/docker/dockerfile.ctr +++ b/docker/dockerfile.ctr @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.2 -ARG MERLIN_VERSION=22.12 -ARG TRITON_VERSION=22.11 +ARG MERLIN_VERSION=23.05 +ARG TRITON_VERSION=23.05 ARG BASE_IMAGE=nvcr.io/nvstaging/merlin/merlin-base:${MERLIN_VERSION} @@ -9,16 +9,6 @@ FROM ${BASE_IMAGE} as base ARG HUGECTR_VER=main ARG HUGECTR_BACKEND_VER=main -# Envs -ENV CUDA_SHORT_VERSION=11.6 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib -ENV CUDA_HOME=/usr/local/cuda -ENV CUDA_PATH=$CUDA_HOME -ENV CUDA_CUDA_LIBRARY=${CUDA_HOME}/lib64/stubs -ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin -ENV PATH=$PATH:/usr/lib/x86_64-linux-gnu/ -RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 - RUN pip install --no-cache-dir --upgrade notebook ipython RUN pip install --no-cache-dir mpi4py @@ -29,12 +19,11 @@ RUN cd /opt/hpcx/ompi/include/openmpi/opal/mca/hwloc/hwloc201 && rm -rfv hwloc20 RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://download.open-mpi.org/release/hwloc/v2.4/hwloc-${HWLOC_VER}.tar.gz && \ mkdir -p /var/tmp && tar -x -f /var/tmp/hwloc-${HWLOC_VER}.tar.gz -C /var/tmp && \ cd /var/tmp/hwloc-${HWLOC_VER} && \ - ./configure CPPFLAGS="-I/usr/local/cuda/include/ -L/usr/local/cuda/lib64/" LDFLAGS="-L/usr/local/cuda/lib64" --enable-cuda && \ + ./configure CPPFLAGS="-I${CUDA_HOME}/include/ -L${CUDA_HOME}/lib64/" LDFLAGS="-L${CUDA_HOME}/lib64" --enable-cuda && \ make -j$(nproc) && make install && \ rm -rf /var/tmp/hwloc-${HWLOC_VER} /var/tmp/hwloc-${HWLOC_VER}.tar.gz - # ----------------------------------------------------------------------------- # HugeCTR + Dependencies @@ -62,21 +51,18 @@ RUN ln -s /usr/lib/libcudf.so /usr/lib/libcudf_base.so RUN ln -s /usr/lib/libcudf.so /usr/lib/libcudf_io.so RUN ln -s /usr/lib/x86_64-linux-gnu/libibverbs.so.1 /usr/lib/x86_64-linux-gnu/libibverbs.so -RUN rm -rf /usr/lib/x86_64-linux-gnu/libibverbs.so && \ - ln -s /usr/lib/x86_64-linux-gnu/libibverbs.so.1.14.36.0 /usr/lib/x86_64-linux-gnu/libibverbs.so - # Install HugeCTR ARG HUGECTR_HOME=/usr/local/hugectr RUN if [[ "${HUGECTR_DEV_MODE}" == "false" ]]; then \ - rm -rf /usr/local/hugectr/lib/libgmock* /usr/local/hugectr/lib/pkgconfig/gmock* /usr/local/hugectr/include/gmock && \ - rm -rf /usr/local/hugectr/lib/libgtest* /usr/local/hugectr/lib/pkgconfig/gtest* /usr/local/hugectr/include/gtest && \ + rm -rf ${HUGECTR_HOME}/lib/libgmock* ${HUGECTR_HOME}/lib/pkgconfig/gmock* ${HUGECTR_HOME}/include/gmock && \ + rm -rf ${HUGECTR_HOME}/lib/libgtest* ${HUGECTR_HOME}/lib/pkgconfig/gtest* ${HUGECTR_HOME}/include/gtest && \ git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \ cd /hugectr && \ git submodule update --init --recursive && \ mkdir build && \ cd build && \ LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH && \ - export PATH=$PATH:/usr/local/cuda-${CUDA_SHORT_VERSION}/compat && \ + export PATH=$PATH:/usr/local/cuda-$(echo $CUDA_VERSION | awk -F'.' '{print $1"."$2}')/compat && \ if [[ "${INSTALL_HDFS}" == "false" ]]; then \ cmake -DCMAKE_BUILD_TYPE=Release -DSM="60;61;70;75;80;90" -DENABLE_MULTINODES=ON .. \ ; else \ @@ -119,9 +105,6 @@ RUN if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \ ; fi RUN ln -s ${HUGECTR_HOME}/backends/hugectr /opt/tritonserver/backends/hugectr -# Remove fake lib -RUN rm /usr/local/cuda/lib64/stubs/libcuda.so.1 - # Clean up RUN rm -rf /usr/local/share/jupyter/lab/staging/node_modules/marked RUN rm -rf /usr/local/share/jupyter/lab/staging/node_modules/node-fetch From 1a9bbb4a5736a312310e797066028ca5abd9b878 Mon Sep 17 00:00:00 2001 From: qqiao Date: Mon, 3 Jul 2023 05:17:14 -0700 Subject: [PATCH 04/13] Update tf and torch dockerfile for new upstream image --- docker/dockerfile.tf | 23 ++++++++++++----------- docker/dockerfile.torch | 23 ++++++++++++----------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/docker/dockerfile.tf b/docker/dockerfile.tf index 988eb66bb..26f6a8ea8 100644 --- a/docker/dockerfile.tf +++ b/docker/dockerfile.tf @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1.2 -ARG MERLIN_VERSION=22.12 -ARG TRITON_VERSION=22.11 -ARG TENSORFLOW_VERSION=22.11 +ARG MERLIN_VERSION=23.05 +ARG TRITON_VERSION=23.05 +ARG TENSORFLOW_VERSION=23.05 ARG DLFW_IMAGE=nvcr.io/nvidia/tensorflow:${TENSORFLOW_VERSION}-tf2-py3 ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 @@ -20,15 +20,16 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backe RUN pip install --no-cache-dir tensorflow protobuf==3.20.3 wrapt==1.14.0 \ && pip uninstall tensorflow keras -y +ENV PYTHON_VERSION=3.10 # DLFW Tensorflow packages -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/tensorflow /usr/local/lib/python3.8/dist-packages/tensorflow/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/tensorflow-*.dist-info /usr/local/lib/python3.8/dist-packages/tensorflow.dist-info/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/keras /usr/local/lib/python3.8/dist-packages/keras/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/keras-*.dist-info /usr/local/lib/python3.8/dist-packages/keras.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/keras /usr/local/lib/python${PYTHON_VERSION}/dist-packages/keras/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/keras-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/keras.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/bin/saved_model_cli /usr/local/bin/saved_model_cli COPY --chown=1000:1000 --from=dlfw /usr/local/lib/tensorflow/ /usr/local/lib/tensorflow/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/horovod /usr/local/lib/python3.8/dist-packages/horovod/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/horovod-*.dist-info /usr/local/lib/python3.8/dist-packages/horovod.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/horovod /usr/local/lib/python${PYTHON_VERSION}/dist-packages/horovod/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/horovod-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/horovod.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/bin/horovodrun /usr/local/bin/horovodrun # Need to install transformers after tensorflow has been pulled in, so it builds artifacts correctly. @@ -42,7 +43,7 @@ ARG _CI_JOB_TOKEN="" ARG HUGECTR_VER=main ENV CPATH=$CPATH:${HUGECTR_HOME}/include \ - LD_LIBRARY_PATH=${HUGECTR_HOME}/lib:/usr/local/lib/python3.8/dist-packages/tensorflow:$LD_LIBRARY_PATH \ + LD_LIBRARY_PATH=${HUGECTR_HOME}/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow:$LD_LIBRARY_PATH \ LIBRARY_PATH=${HUGECTR_HOME}/lib:$LIBRARY_PATH \ SOK_COMPILE_UNIT_TEST=ON @@ -69,7 +70,7 @@ RUN if [ "$HUGECTR_DEV_MODE" == "false" ]; then \ mv /hugectr/ci ~/hugectr-ci && mv /hugectr/sparse_operation_kit ~/hugectr-sparse_operation_kit && \ rm -rf /hugectr && mkdir -p /hugectr && \ mv ~/hugectr-ci /hugectr/ci && mv ~/hugectr-sparse_operation_kit /hugectr/sparse_operation_kit; \ - fi; \ + fi && \ if [ "$INSTALL_DISTRIBUTED_EMBEDDINGS" == "true" ]; then \ git clone --branch ${TFDE_VER} --depth 1 https://github.com/NVIDIA-Merlin/distributed-embeddings.git /distributed_embeddings/ && \ cd /distributed_embeddings && git submodule update --init --recursive && \ diff --git a/docker/dockerfile.torch b/docker/dockerfile.torch index e2f192972..676747233 100644 --- a/docker/dockerfile.torch +++ b/docker/dockerfile.torch @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1.2 -ARG MERLIN_VERSION=22.12 -ARG TRITON_VERSION=22.11 -ARG TORCH_VERSION=22.11 +ARG MERLIN_VERSION=23.05 +ARG TRITON_VERSION=23.05 +ARG TORCH_VERSION=23.05 ARG DLFW_IMAGE=nvcr.io/nvidia/pytorch:${TORCH_VERSION}-py3 ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 @@ -19,24 +19,25 @@ RUN apt update -y --fix-missing && \ apt clean && \ rm -rf /var/lib/apt/lists/* +ENV PYTHON_VERSION=3.10 # Torch Metrics and Lightning (without torch) RUN pip install --no-cache-dir --no-deps torch torchmetrics pytorch-lightning lightning-utilities \ && pip install --no-cache-dir --upgrade pip \ && pip install sympy \ - && rm -rf /usr/local/lib/python3.8/dist-packages/torch \ - && rm -rf /usr/local/lib/python3.8/dist-packages/caffe2 + && rm -rf /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch \ + && rm -rf /usr/local/lib/python${PYTHON_VERSION}/dist-packages/caffe2 # Triton Torch backend COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/pytorch backends/pytorch # DLFW Python packages -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/numba /usr/local/lib/python3.8/dist-packages/numba -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/numpy /usr/local/lib/python3.8/dist-packages/numpy -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/torch /usr/local/lib/python3.8/dist-packages/torch +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numpy /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numpy +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/numba-*.dist-info /usr/local/lib/python3.8/dist-packages/numba.dist-info/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/numpy-*.dist-info /usr/local/lib/python3.8/dist-packages/numpy.dist-info/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python3.8/dist-packages/torch-*.egg-info /usr/local/lib/python3.8/dist-packages/torch.egg-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numpy-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numpy.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-*.egg-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch.egg-info/ # Add all torch libraries to /usr/local From 0f9f78fa73ed4e044ca37244cf48af2af04e9741 Mon Sep 17 00:00:00 2001 From: qqiao Date: Mon, 3 Jul 2023 07:42:32 -0700 Subject: [PATCH 05/13] Remove env PYTHON_VERSION since it's already in base. --- docker/dockerfile.tf | 1 - docker/dockerfile.torch | 1 - 2 files changed, 2 deletions(-) diff --git a/docker/dockerfile.tf b/docker/dockerfile.tf index 26f6a8ea8..4c7a2dc8a 100644 --- a/docker/dockerfile.tf +++ b/docker/dockerfile.tf @@ -20,7 +20,6 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backe RUN pip install --no-cache-dir tensorflow protobuf==3.20.3 wrapt==1.14.0 \ && pip uninstall tensorflow keras -y -ENV PYTHON_VERSION=3.10 # DLFW Tensorflow packages COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow.dist-info/ diff --git a/docker/dockerfile.torch b/docker/dockerfile.torch index 676747233..d9fa27e3e 100644 --- a/docker/dockerfile.torch +++ b/docker/dockerfile.torch @@ -19,7 +19,6 @@ RUN apt update -y --fix-missing && \ apt clean && \ rm -rf /var/lib/apt/lists/* -ENV PYTHON_VERSION=3.10 # Torch Metrics and Lightning (without torch) RUN pip install --no-cache-dir --no-deps torch torchmetrics pytorch-lightning lightning-utilities \ && pip install --no-cache-dir --upgrade pip \ From d94148346d34126af9b63e15b705c2f8ab4649e5 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Tue, 4 Jul 2023 03:01:48 -0700 Subject: [PATCH 06/13] Tick base image up to 23.06, fix `tritonclient` dependency. --- docker/dockerfile.merlin | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docker/dockerfile.merlin b/docker/dockerfile.merlin index ccf1075d6..07a50b7e5 100644 --- a/docker/dockerfile.merlin +++ b/docker/dockerfile.merlin @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.2 -ARG TRITON_VERSION=23.05 -ARG DLFW_VERSION=23.05 +ARG TRITON_VERSION=23.06 +ARG DLFW_VERSION=23.06 ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-min @@ -88,14 +88,12 @@ RUN ln -s /usr/bin/python3 /usr/bin/python # A fix has already been merged but not yet released: # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7859 # 2023-02-22: pynvml==11.5.0 is currently incompatible with our version of dask/distributed -# tritonclient[all]==2.29.0: latest tritonclient removes the perf_* binaries, so specified to version 2.29.0 -#cupy-cuda12x RUN pip install --no-cache-dir --upgrade pip; pip install --no-cache-dir "cmake<3.25.0" ninja scikit-build pandas==1.5.2 \ fastrlock nvidia-pyindex pybind11 pytest \ transformers tensorflow-metadata betterproto \ cachetools graphviz nvtx scipy "scikit-learn<1.2" \ - tritonclient[all]==2.29.0 grpcio-channelz fiddle wandb npy-append-array \ + tritonclient[all] grpcio-channelz fiddle wandb npy-append-array \ git+https://github.com/rapidsai/asvdb.git@main \ xgboost==1.6.2 lightgbm treelite==2.4.0 treelite_runtime==2.4.0 \ lightfm implicit \ From 3f178f5eba4e13e6e463e59feb7328afaee3ef09 Mon Sep 17 00:00:00 2001 From: qqiao Date: Tue, 4 Jul 2023 05:30:57 -0700 Subject: [PATCH 07/13] Update base to 23.06 --- docker/dockerfile.tf | 6 +++--- docker/dockerfile.torch | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docker/dockerfile.tf b/docker/dockerfile.tf index 4c7a2dc8a..0dafdff29 100644 --- a/docker/dockerfile.tf +++ b/docker/dockerfile.tf @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1.2 -ARG MERLIN_VERSION=23.05 -ARG TRITON_VERSION=23.05 -ARG TENSORFLOW_VERSION=23.05 +ARG MERLIN_VERSION=23.06 +ARG TRITON_VERSION=23.06 +ARG TENSORFLOW_VERSION=23.06 ARG DLFW_IMAGE=nvcr.io/nvidia/tensorflow:${TENSORFLOW_VERSION}-tf2-py3 ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 diff --git a/docker/dockerfile.torch b/docker/dockerfile.torch index d9fa27e3e..bbc87040f 100644 --- a/docker/dockerfile.torch +++ b/docker/dockerfile.torch @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1.2 -ARG MERLIN_VERSION=23.05 -ARG TRITON_VERSION=23.05 -ARG TORCH_VERSION=23.05 +ARG MERLIN_VERSION=23.06 +ARG TRITON_VERSION=23.06 +ARG TORCH_VERSION=23.06 ARG DLFW_IMAGE=nvcr.io/nvidia/pytorch:${TORCH_VERSION}-py3 ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 From 3ca7659df7cceebf185075e84875af45885cee30 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Tue, 4 Jul 2023 05:41:19 -0700 Subject: [PATCH 08/13] Tick up base image version. --- docker/dockerfile.ctr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/dockerfile.ctr b/docker/dockerfile.ctr index 7f3c8c918..f9e767438 100644 --- a/docker/dockerfile.ctr +++ b/docker/dockerfile.ctr @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.2 -ARG MERLIN_VERSION=23.05 -ARG TRITON_VERSION=23.05 +ARG MERLIN_VERSION=23.06 +ARG TRITON_VERSION=23.06 ARG BASE_IMAGE=nvcr.io/nvstaging/merlin/merlin-base:${MERLIN_VERSION} From d07f67422b5acf9adb6fd00ba01dc2b828385bd4 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Wed, 5 Jul 2023 08:39:19 +0000 Subject: [PATCH 09/13] Merge branch 'main' into fix-update_base_23.05 --- CHANGELOG.md | 134 ++ README.md | 2 +- ci/dockerfile.ci | 20 +- docs/data.json | 120 ++ .../scripts/preproc/preprocessing.py | 45 +- examples/ranking/README.md | 47 + ...g-DLRM-model-with-Models-and-Systems.ipynb | 1775 +++++++++++++++++ ...n-Implicit-Model-With-Merlin-Systems.ipynb | 488 +++++ ...An-XGboost-Model-With-Merlin-Systems.ipynb | 545 +++++ .../examples/quick_start/test_preproc.py | 292 +++ .../examples/quick_start/test_ranking.py | 502 +++++ ...i_building_deploying_multi_stage_RecSys.py | 1 - ...g_an_implicit_model_with_merlin_systems.py | 59 + ...ng_an_xgboost_model_with_merlin_systems.py | 50 + ...ving_ranking_models_with_merlin_systems.py | 47 + 15 files changed, 4101 insertions(+), 26 deletions(-) create mode 100644 examples/ranking/README.md create mode 100644 examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb create mode 100644 examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb create mode 100644 examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb create mode 100644 tests/integration/examples/quick_start/test_preproc.py create mode 100644 tests/integration/examples/quick_start/test_ranking.py create mode 100644 tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py create mode 100644 tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py create mode 100644 tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ab073bdb..ec7b35b5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,139 @@ # Merlin Changelog + + +## [23.06] + +### NVTabular + +#### Major Changes + +* Moved some functionality from NVTabular to `merlin-core`, but left alias implace for import backwards compatibility. Some examples are `LambdaOp`, `AddMetadataOp`, `StatOperator`, `WorkflowNode`, and others. [#1823](https://github.com/NVIDIA-Merlin/NVTabular/pull/1823), [#1825](https://github.com/NVIDIA-Merlin/NVTabular/pull/1825) +* Updated `Categorify` to correctly handle nulls [#1836](https://github.com/NVIDIA-Merlin/NVTabular/pull/1836). + +#### Added + +* Added support for retrieving subworkflows using get_subworkflow API. Returns a subgraph wrapped in a new workflow object. [#1842](https://github.com/NVIDIA-Merlin/NVTabular/pull/1842) + +#### Deprecated/Removed + +* Removed the `nvtabular.inference` module. This functionality now exists in `merlin-systems` [#1822](https://github.com/NVIDIA-Merlin/NVTabular/pull/1822) +#### Fixed Bugs + +### Models + +#### Added + +* Add support of transformer-based retrieval models [#1128](https://github.com/NVIDIA-Merlin/models/pull/1128) + +### Merlin + +#### Added + +* Improvements in Quick-start for ranking example [#1014](https://github.com/NVIDIA-Merlin/Merlin/pull/1014) + * In `preprocessing.py`, added support to target encoding features, configurable through these new CLI arguments: `--target_encoding_features`, `--target_encoding_targets`, `--target_encoding_kfold`, `--target_encoding_smoothing`. + * In `ranking.py`: added support to select some columns to keep (`--keep_columns`) or remove (`--ignore_columns`) from at dataloading / training / evaluation. + +#### Fixed Bugs + +* Fixed in Quick-start for ranking example [#1017](https://github.com/NVIDIA-Merlin/Merlin/pull/1017): + * Fixed `preprocessing.py`, which was not standardizing and tagging continuous columns properly + * Fixed Wide&Deep and DeepFM models to use the updated API + +### Transformers4Rec + +#### Added + +* Improved docstring coverage [#706](https://github.com/NVIDIA-Merlin/Transformers4Rec/pull/706) + +#### Fixed Bugs + +* Add support for providing a scalar cut-off in metrics, and Fix recall@1 that results higher than the upper cut-offs sometimes. [#720](https://github.com/NVIDIA-Merlin/Transformers4Rec/pull/720) +* Fix the CLM performance mismatch between model evaluation and manual inference [#723](https://github.com/NVIDIA-Merlin/Transformers4Rec/pull/723) +* Fixed OOM issues when evaluating/predicting [#721](https://github.com/NVIDIA-Merlin/Transformers4Rec/pull/721) + * API breaking notice: This fix changes the default output of `trainer.predict()` API, that returns a `PredictionOutput` object with a predictions property. Before this change, when the `predict_top_k` option was not set (default) the predictions property was as 2D tensor (batch size, item cardinality) with the scores for all the items. As now we set `T4RecTrainingArguments.predict_top_k` by default, the predictions property returns a tuple with `(top-100 predicted item ids, top-100 prediction scores)`. + +### Core + +#### Major Changes + +* Merged NVTabular Operator base class with Base Operator in core. + +#### Added + +* Migrated some operators from NVTabular to core, allowing use in `merlin-systems`. (i.e. `LambdaOp` - changed to user defined function (UDF) and add metadata operator). +* Created subgraph operator to allow for recall and use of parts of a graph + +### Systems + +#### Added + +* Added Test cases to debut functional support for core operators in systems ensembles +* Added API to retrieve sub ensembles. + ## [23.05] ### NVTabular diff --git a/README.md b/README.md index 539d89926..93957a42f 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ real-world use cases. ## Merlin Is Built On -**[cuDF](https://github.com/rapidsai/cudf)**
Merlin relies on cuDF for +**[RAPIDS cuDF](https://github.com/rapidsai/cudf)**
Merlin relies on cuDF for GPU-accelerated DataFrame operations used in feature engineering. **[Dask](https://www.dask.org/)**
Merlin relies on Dask to distribute and scale diff --git a/ci/dockerfile.ci b/ci/dockerfile.ci index fcc51b32b..7d9865d40 100644 --- a/ci/dockerfile.ci +++ b/ci/dockerfile.ci @@ -162,11 +162,11 @@ ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin # Set up NVIDIA package repository RUN apt update -y --fix-missing && \ - apt install -y --no-install-recommends software-properties-common && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" && \ + apt install -y --no-install-recommends software-properties-common +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ + mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" && \ apt install -y --no-install-recommends \ ca-certificates \ clang-format \ @@ -266,9 +266,9 @@ COPY --chown=1000:1000 --from=dlfw /usr/include/arrow /usr/include/arrow/ COPY --chown=1000:1000 --from=dlfw /usr/include/cudf /usr/include/cudf/ COPY --chown=1000:1000 --from=dlfw /usr/include/rmm /usr/include/rmm/ # ptx compiler required by cubinlinker -# COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a -# COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h -# RUN git clone https://github.com/rapidsai/ptxcompiler.git /ptx && cd /ptx/ && python setup.py develop; +COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a +COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h +RUN git clone https://github.com/rapidsai/ptxcompiler.git /ptx && cd /ptx/ && pip install .; ARG PYTHON_VERSION=3.10 # Python Packages @@ -285,7 +285,7 @@ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-p COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupyx /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupyx COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_backends /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_backends COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba -# COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cudf-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cudf.dist-info/ @@ -295,7 +295,7 @@ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-p COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numba.dist-info/ -# COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cubinlinker.dist-info/ RUN pip install --no-cache-dir tensorflow && pip uninstall tensorflow keras -y diff --git a/docs/data.json b/docs/data.json index a5033b174..9e355dc04 100644 --- a/docs/data.json +++ b/docs/data.json @@ -359,6 +359,46 @@ "timestamp_utc": "2023-06-07T20:14:32.616950", "transformers4rec": "23.5.0", "triton": "2.31.0" + }, + "23.06": { + "base_container": "Triton version 23.04", + "compressedSize": "6.95 GB", + "cublas": "12.1.3.1", + "cuda": "12.1.0.023", + "cudf": "23.02.00", + "cudnn": "8.9.0.131", + "cufft": "11.0.2.4", + "curand": "10.3.2.56", + "cusolver": "11.4.4.55", + "cusparse": "12.0.2.55", + "cutensor": "1.7.0.1", + "dgx_system": "* DGX-1\n* DGX-2\n* DGX A100\n* DGX Station", + "distributed_embeddings": "Not applicable", + "gpu_model": "* `NVIDIA Ampere GPU Architecture `_\n* `Turing `_\n* `Volta `_\n* `Pascal `_", + "hugectr": "23.6.0", + "hugectr2onnx": "Not applicable", + "merlin.core": "23.6.0", + "merlin.dataloader": "23.6.0", + "merlin.models": "23.6.0", + "merlin.systems": "23.6.0", + "nvidia_driver": "NVIDIA Driver version 465.19.01\nor later is required. However,\nif you're running on Data Center\nGPUs (formerly Tesla) such as T4,\nyou can use any of the following\nNVIDIA Driver versions:\n\n* 418.40 (or later R418)\n* 440.33 (or later R440)\n* 450.51 (or later R450)\n* 460.27 (or later R460)\n\n**Note**: The CUDA Driver\nCompatibility Package does not\nsupport all drivers.", + "nvidia_pytorch": "Not applicable", + "nvidia_tensorflow": "Not applicable", + "nvtabular": "23.6.0", + "openmpi": "4.1.4", + "os": "Ubuntu 20.04.5 LTS", + "python_major": "3", + "pytorch": "Not applicable", + "release": "23.06", + "rmm": "23.02.00", + "size": "781.9 GB", + "sm": "60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90, 60, 61, 70, 75, 80, 90", + "sparse_operation_kit": "Not applicable", + "tensorrt": "8.6.1.2+cuda12.0.1.011", + "tf": "Not applicable", + "timestamp_utc": "2023-06-29T07:26:25.583573", + "transformers4rec": "23.6.0", + "triton": "2.33.0" } }, "nvcr.io/nvidia/merlin/merlin-inference": { @@ -1035,6 +1075,46 @@ "timestamp_utc": "2023-06-07T20:13:59.481254", "transformers4rec": "23.5.0", "triton": "2.31.0" + }, + "23.06": { + "base_container": "Triton version 23.04", + "compressedSize": "8.39 GB", + "cublas": "12.1.3.1", + "cuda": "12.1.0.023", + "cudf": "23.02.00", + "cudnn": "8.9.0.131", + "cufft": "11.0.2.4", + "curand": "10.3.2.56", + "cusolver": "11.4.4.55", + "cusparse": "12.0.2.55", + "cutensor": "1.7.0.1", + "dgx_system": "* DGX-1\n* DGX-2\n* DGX A100\n* DGX Station", + "distributed_embeddings": "Not applicable", + "gpu_model": "* `NVIDIA Ampere GPU Architecture `_\n* `Turing `_\n* `Volta `_\n* `Pascal `_", + "hugectr": "Not applicable", + "hugectr2onnx": "Not applicable", + "merlin.core": "23.6.0", + "merlin.dataloader": "23.6.0", + "merlin.models": "23.6.0", + "merlin.systems": "23.6.0", + "nvidia_driver": "NVIDIA Driver version 465.19.01\nor later is required. However,\nif you're running on Data Center\nGPUs (formerly Tesla) such as T4,\nyou can use any of the following\nNVIDIA Driver versions:\n\n* 418.40 (or later R418)\n* 440.33 (or later R440)\n* 450.51 (or later R450)\n* 460.27 (or later R460)\n\n**Note**: The CUDA Driver\nCompatibility Package does not\nsupport all drivers.", + "nvidia_pytorch": "Not applicable", + "nvidia_tensorflow": "Not applicable", + "nvtabular": "23.6.0", + "openmpi": "4.1.4", + "os": "Ubuntu 20.04.5 LTS", + "python_major": "3", + "pytorch": "2.0.1", + "release": "23.06", + "rmm": "23.02.00", + "size": "784.75 GB", + "sm": "Not applicable", + "sparse_operation_kit": "Not applicable", + "tensorrt": "8.6.1.2+cuda12.0.1.011", + "tf": "Not applicable", + "timestamp_utc": "2023-06-29T07:25:52.907749", + "transformers4rec": "23.6.0", + "triton": "2.33.0" } }, "nvcr.io/nvidia/merlin/merlin-pytorch-inference": { @@ -1830,6 +1910,46 @@ "timestamp_utc": "2023-06-07T20:13:21.204966", "transformers4rec": "23.5.0", "triton": "2.31.0" + }, + "23.06": { + "base_container": "Triton version 23.04", + "compressedSize": "8.2 GB", + "cublas": "12.1.3.1", + "cuda": "12.1.0.023", + "cudf": "23.02.00", + "cudnn": "8.9.0.131", + "cufft": "11.0.2.4", + "curand": "10.3.2.56", + "cusolver": "11.4.4.55", + "cusparse": "12.0.2.55", + "cutensor": "1.7.0.1", + "dgx_system": "* DGX-1\n* DGX-2\n* DGX A100\n* DGX Station", + "distributed_embeddings": "Not applicable", + "gpu_model": "* `NVIDIA Ampere GPU Architecture `_\n* `Turing `_\n* `Volta `_\n* `Pascal `_", + "hugectr": "Not applicable", + "hugectr2onnx": "Not applicable", + "merlin.core": "23.6.0", + "merlin.dataloader": "23.6.0", + "merlin.models": "23.6.0", + "merlin.systems": "23.6.0", + "nvidia_driver": "NVIDIA Driver version 465.19.01\nor later is required. However,\nif you're running on Data Center\nGPUs (formerly Tesla) such as T4,\nyou can use any of the following\nNVIDIA Driver versions:\n\n* 418.40 (or later R418)\n* 440.33 (or later R440)\n* 450.51 (or later R450)\n* 460.27 (or later R460)\n\n**Note**: The CUDA Driver\nCompatibility Package does not\nsupport all drivers.", + "nvidia_pytorch": "Not applicable", + "nvidia_tensorflow": "Not applicable", + "nvtabular": "23.6.0", + "openmpi": "4.1.4", + "os": "Ubuntu 20.04.5 LTS", + "python_major": "3", + "pytorch": "Not applicable", + "release": "23.06", + "rmm": "23.02.00", + "size": "785.21 GB", + "sm": "Not applicable", + "sparse_operation_kit": "1.2.0", + "tensorrt": "8.6.1.2+cuda12.0.1.011", + "tf": "2.12.0", + "timestamp_utc": "2023-06-29T07:25:15.869683", + "transformers4rec": "23.6.0", + "triton": "2.33.0" } }, "nvcr.io/nvidia/merlin/merlin-tensorflow-inference": { diff --git a/examples/quick_start/scripts/preproc/preprocessing.py b/examples/quick_start/scripts/preproc/preprocessing.py index b3c62ef3e..43843600f 100644 --- a/examples/quick_start/scripts/preproc/preprocessing.py +++ b/examples/quick_start/scripts/preproc/preprocessing.py @@ -1,6 +1,7 @@ import gc import logging import os +import shutil from functools import reduce from typing import Optional @@ -12,6 +13,7 @@ from .args_parsing import parse_arguments INDEX_TMP_COL = "__index" +NVT_OUTPUT_FOLDER = "nvt_outputs" def filter_by_freq(df_to_filter, df_for_stats, column, min_freq=None, max_freq=None): @@ -221,7 +223,8 @@ def generate_nvt_features(self): for col in args.categorical_features: feats[col] = [col] >> nvt_ops.Categorify( - freq_threshold=args.categ_min_freq_capping + freq_threshold=args.categ_min_freq_capping, + out_path=NVT_OUTPUT_FOLDER, ) for col in args.continuous_features: feats[col] = [col] @@ -244,14 +247,13 @@ def generate_nvt_features(self): if args.target_encoding_targets and args.target_encoding_features: for target_col in args.target_encoding_targets: - feats[f"{target_col}_te_features"] = ( - args.target_encoding_features - >> nvt.ops.TargetEncoding( - [target_col], - kfold=args.target_encoding_kfold, - p_smooth=args.target_encoding_smoothing, - out_dtype="float32", - ) + feats[ + f"{target_col}_te_features" + ] = args.target_encoding_features >> nvt.ops.TargetEncoding( + [target_col], + kfold=args.target_encoding_kfold, + p_smooth=args.target_encoding_smoothing, + out_dtype="float32", ) for col in args.user_features: @@ -322,7 +324,9 @@ def merge_dataset_features_values( ).excluding_by_name([INDEX_TMP_COL]) dataset_joint = nvt.Dataset( - dataset_joint, schema=schema_joint, cpu=not self.gpu, + dataset_joint, + schema=schema_joint, + cpu=not self.gpu, ) return dataset_joint @@ -430,6 +434,16 @@ def run(self): output_dataset_path = args.output_path + nvt_outputs_folder = os.path.join(output_dataset_path, NVT_OUTPUT_FOLDER) + + if os.path.exists(nvt_outputs_folder): + logging.info( + "The NVTabular output folder already exists and is " + "being deleted: {nvt_outputs_folder}" + ) + # Delete Folder code + shutil.rmtree(nvt_outputs_folder) + train_dataset = nvt.Dataset(ddf, cpu=not self.gpu) # Processing features and targets in separate workflows, because # targets might not be available for test/predict_dataset @@ -442,7 +456,8 @@ def run(self): train_dataset_features, train_dataset_targets, "train", args ) train_dataset_preproc.to_parquet( - output_train_dataset_path, output_files=args.output_num_partitions, + output_train_dataset_path, + output_files=args.output_num_partitions, ) if args.eval_data_path or args.dataset_split_strategy: @@ -459,7 +474,8 @@ def run(self): eval_dataset_features, eval_dataset_targets, "eval", args ) eval_dataset_preproc.to_parquet( - output_eval_dataset_path, output_files=args.output_num_partitions, + output_eval_dataset_path, + output_files=args.output_num_partitions, ) if args.predict_data_path: @@ -484,9 +500,10 @@ def run(self): logging.info(f"Saving predict/test set: {output_predict_dataset_path}") new_predict_dataset.to_parquet( - output_predict_dataset_path, output_files=args.output_num_partitions, + output_predict_dataset_path, + output_files=args.output_num_partitions, ) - nvt_save_path = os.path.join(output_dataset_path, "workflow") + nvt_save_path = os.path.join(nvt_outputs_folder, "workflow") logging.info(f"Saving nvtabular workflow to: {nvt_save_path}") nvt_workflow_features.save(nvt_save_path) diff --git a/examples/ranking/README.md b/examples/ranking/README.md new file mode 100644 index 000000000..ac1e58f26 --- /dev/null +++ b/examples/ranking/README.md @@ -0,0 +1,47 @@ +# Training and Deploying Ranking models with Merlin + +Ranking models are probably the most common use case in recommender systems. The examples under this folder are designed to demonstrate how to build, train and evaluate a ranking model (e.g. DLRM) using Merlin Models and deploy on [Triton Inference Server](https://github.com/triton-inference-server/server) with Merlin Systems. Currently we support models built with TensorFlow framework, and traditional-ml models like XGBoost and python-based models with implicit datasets. Examples built with PyTorch framework are being developed and will be added here soon. + +To learn more about ranking models, please visit this documentation [page](https://nvidia-merlin.github.io/Merlin/stable/guide/recommender_models.html#). + +## Running the Example Notebooks + +Docker containers are available from the NVIDIA GPU Cloud. +We use the latest stable version of the [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) container to run the example notebooks. To run the example notebooks using Docker containers, perform the following steps: + + +1. Pull and start the container by running the following command: + + ```shell + docker run --gpus all --rm -it \ + -p 8888:8888 -p 8797:8787 -p 8796:8786 --ipc=host \ + nvcr.io/nvidia/merlin/merlin-tensorflow:23.XX /bin/bash + ``` + + > You can find the release tags and more information on the [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow) container page. + + The container opens a shell when the run command execution is completed. + Your shell prompt should look similar to the following example: + + ```shell + root@2efa5b50b909: + ``` + +2. Start the JupyterLab server by running the following command: + + ```shell + jupyter-lab --allow-root --ip='0.0.0.0' + ``` + + View the messages in your terminal to identify the URL for JupyterLab. + The messages in your terminal show similar lines to the following example: + + ```shell + Or copy and paste one of these URLs: + http://2efa5b50b909:8888/lab?token=9b537d1fda9e4e9cadc673ba2a472e247deee69a6229ff8d + or http://127.0.0.1:8888/lab?token=9b537d1fda9e4e9cadc673ba2a472e247deee69a6229ff8d + ``` + +3. Open a browser and use the `127.0.0.1` URL provided in the messages by JupyterLab. + +4. After you log in to JupyterLab, navigate to the `/Merlin/examples/ranking` directory to try out the example notebooks. diff --git a/examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb b/examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb new file mode 100644 index 000000000..0f213f3dc --- /dev/null +++ b/examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb @@ -0,0 +1,1775 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "bc80cfdd", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ================================\n", + "\n", + "# Each user is responsible for checking the content of datasets and the\n", + "# applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "51acf955", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "\n", + "\n", + "# Exporting Ranking Models\n", + "\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) container. \n", + "\n", + "In this example notebook we demonstrate how to export (save) NVTabular `workflow` and a `ranking model` for model deployment with [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) library. \n", + "\n", + "Learning Objectives:\n", + "\n", + "- Export NVTabular workflow for model deployment\n", + "- Export TensorFlow DLRM model for model deployment\n", + "- Load saved NVTabular Workflow\n", + "- Load saved trained Merlin Models model\n", + "- Create Ensemble Graph\n", + "- Export Ensemble Graph\n", + "- Deploy model on Triton Inference Server\n", + "\n", + "We will follow the steps below:\n", + "- Prepare the data with NVTabular and export NVTabular workflow\n", + "- Train a DLRM model with Merlin Models and export the trained model\n", + "- Launch Triton server and deploy trained models on Triton\n", + "- Send request to Triton and receive back the response" + ] + }, + { + "cell_type": "markdown", + "id": "93e4fec3", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Importing Libraries" + ] + }, + { + "cell_type": "markdown", + "id": "eab14a7d", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Let's start with importing the libraries that we'll use in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "37d5020c", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:03:00.600621: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n", + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Initialize finished, communication tool: horovod\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:03:07.070258: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-06-28 21:03:07.070303: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:226] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-06-28 21:03:07.070448: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1638] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16249 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "\n", + "import nvtabular as nvt\n", + "from nvtabular.ops import *\n", + "import numpy as np\n", + "\n", + "from merlin.models.utils.example_utils import workflow_fit_transform\n", + "from merlin.schema.tags import Tags\n", + "\n", + "import merlin.models.tf as mm\n", + "from merlin.io.dataset import Dataset\n", + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "id": "cbb650a7", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Feature Engineering with NVTabular" + ] + }, + { + "cell_type": "markdown", + "id": "0c715cd5", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "We use the synthetic train and test datasets generated by mimicking the real [Ali-CCP: Alibaba Click and Conversion Prediction](https://tianchi.aliyun.com/dataset/dataDetail?dataId=408#1) dataset to build our recommender system ranking models. \n", + "\n", + "If you would like to use real Ali-CCP dataset instead, you can download the training and test datasets on [tianchi.aliyun.com](https://tianchi.aliyun.com/dataset/dataDetail?dataId=408#1). You can then use [get_aliccp()](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/datasets/ecommerce/aliccp/dataset.py#L43) function to curate the raw csv files and save them as parquet files." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "44c7457b-08c4-4453-bacc-5c8eef7042d8", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.datasets.synthetic import generate_data\n", + "\n", + "DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data/\")\n", + "NUM_ROWS = os.environ.get(\"NUM_ROWS\", 1000000)\n", + "SYNTHETIC_DATA = eval(os.environ.get(\"SYNTHETIC_DATA\", \"True\"))\n", + "BATCH_SIZE = int(os.environ.get(\"BATCH_SIZE\", 512))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b6651cc8", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "if SYNTHETIC_DATA:\n", + " train, valid = generate_data(\"aliccp-raw\", int(NUM_ROWS), set_sizes=(0.8, 0.2))\n", + " # save the datasets as parquet files\n", + " train.to_ddf().to_parquet(os.path.join(DATA_FOLDER, \"train\"))\n", + " valid.to_ddf().to_parquet(os.path.join(DATA_FOLDER, \"valid\"))" + ] + }, + { + "cell_type": "markdown", + "id": "ecf0e794", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Let's define our input and output paths." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1124f2c1", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_path = os.path.join(DATA_FOLDER, \"train\", \"*.parquet\")\n", + "valid_path = os.path.join(DATA_FOLDER, \"valid\", \"*.parquet\")\n", + "output_path = os.path.join(DATA_FOLDER, \"processed\")" + ] + }, + { + "cell_type": "markdown", + "id": "2e1162c0", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "After we execute `fit()` and `transform()` functions on the raw dataset applying the operators defined in the NVTabular workflow pipeline below, the processed parquet files are saved to `output_path`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "89b3ddc6", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.61 s, sys: 1.09 s, total: 3.7 s\n", + "Wall time: 3.68 s\n" + ] + } + ], + "source": [ + "%%time\n", + "category_temp_directory = os.path.join(DATA_FOLDER, \"categories\")\n", + "user_id = [\"user_id\"] >> Categorify(out_path=category_temp_directory) >> TagAsUserID()\n", + "item_id = [\"item_id\"] >> Categorify(out_path=category_temp_directory) >> TagAsItemID()\n", + "targets = [\"click\"] >> AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, \"target\"])\n", + "\n", + "item_features = [\"item_category\", \"item_shop\", \"item_brand\"] >> Categorify(out_path=category_temp_directory) >> TagAsItemFeatures()\n", + "\n", + "user_features = (\n", + " [\n", + " \"user_shops\",\n", + " \"user_profile\",\n", + " \"user_group\",\n", + " \"user_gender\",\n", + " \"user_age\",\n", + " \"user_consumption_2\",\n", + " \"user_is_occupied\",\n", + " \"user_geography\",\n", + " \"user_intentions\",\n", + " \"user_brands\",\n", + " \"user_categories\",\n", + " ]\n", + " >> Categorify(out_path=category_temp_directory)\n", + " >> TagAsUserFeatures()\n", + ")\n", + "\n", + "outputs = user_id + item_id + item_features + user_features + targets\n", + "\n", + "workflow = nvt.Workflow(outputs)\n", + "\n", + "train_dataset = nvt.Dataset(train_path)\n", + "valid_dataset = nvt.Dataset(valid_path)\n", + "\n", + "workflow.fit(train_dataset)\n", + "workflow.transform(train_dataset).to_parquet(output_path=output_path + \"/train/\")\n", + "workflow.transform(valid_dataset).to_parquet(output_path=output_path + \"/valid/\")" + ] + }, + { + "cell_type": "markdown", + "id": "8afd8b10", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "We save NVTabular `workflow` model in the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3e367206", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "workflow.save(os.path.join(DATA_FOLDER, \"workflow\"))" + ] + }, + { + "cell_type": "markdown", + "id": "be619646", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Let's check out our saved workflow model folder." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5e03167a", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: seedir in /usr/local/lib/python3.8/dist-packages (0.4.2)\n", + "Requirement already satisfied: natsort in /usr/local/lib/python3.8/dist-packages (from seedir) (8.4.0)\n" + ] + } + ], + "source": [ + "!pip install seedir" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "aeafadbe", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data/\n", + "├─categories/\n", + "│ └─categories/\n", + "│ ├─meta.item_brand.parquet\n", + "│ ├─meta.item_category.parquet\n", + "│ ├─meta.item_id.parquet\n", + "│ ├─meta.item_shop.parquet\n", + "│ ├─meta.user_age.parquet\n", + "│ ├─meta.user_brands.parquet\n", + "│ ├─meta.user_categories.parquet\n", + "│ ├─meta.user_consumption_2.parquet\n", + "│ ├─meta.user_gender.parquet\n", + "│ └─meta.user_geography.parquet\n", + "├─dlrm/\n", + "│ ├─.merlin/\n", + "│ │ ├─input_schema.json\n", + "│ │ └─output_schema.json\n", + "│ ├─assets/\n", + "│ ├─fingerprint.pb\n", + "│ ├─keras_metadata.pb\n", + "│ ├─saved_model.pb\n", + "│ └─variables/\n", + "│ ├─variables.data-00000-of-00001\n", + "│ └─variables.index\n", + "├─processed/\n", + "│ ├─train/\n", + "│ │ ├─.merlin/\n", + "│ │ ├─_file_list.txt\n", + "│ │ ├─_metadata\n", + "│ │ ├─_metadata.json\n", + "│ │ ├─part_0.parquet\n", + "│ │ └─schema.pbtxt\n", + "│ └─valid/\n", + "│ ├─.merlin/\n", + "│ ├─_file_list.txt\n", + "│ ├─_metadata\n", + "│ ├─_metadata.json\n", + "│ ├─part_0.parquet\n", + "│ └─schema.pbtxt\n", + "├─train/\n", + "│ └─part.0.parquet\n", + "├─valid/\n", + "│ └─part.0.parquet\n", + "└─workflow/\n", + " ├─categories/\n", + " │ ├─unique.item_brand.parquet\n", + " │ ├─unique.item_category.parquet\n", + " │ ├─unique.item_id.parquet\n", + " │ ├─unique.item_shop.parquet\n", + " │ ├─unique.user_age.parquet\n", + " │ ├─unique.user_brands.parquet\n", + " │ ├─unique.user_categories.parquet\n", + " │ ├─unique.user_consumption_2.parquet\n", + " │ ├─unique.user_gender.parquet\n", + " │ └─unique.user_geography.parquet\n", + " ├─metadata.json\n", + " └─workflow.pkl\n" + ] + } + ], + "source": [ + "import seedir as sd\n", + "\n", + "sd.seedir(\n", + " DATA_FOLDER,\n", + " style=\"lines\",\n", + " itemlimit=10,\n", + " depthlimit=3,\n", + " exclude_folders=\".ipynb_checkpoints\",\n", + " sort=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "93f8e0ee", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Build and Train a DLRM model" + ] + }, + { + "cell_type": "markdown", + "id": "56f24b6b", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "In this example, we build, train, and export a Deep Learning Recommendation Model [(DLRM)](https://arxiv.org/abs/1906.00091) architecture. To learn more about how to train different deep learning models, how easily transition from one model to another and the seamless integration between data preparation and model training visit [03-Exploring-different-models.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/03-Exploring-different-models.ipynb) notebook." + ] + }, + { + "cell_type": "markdown", + "id": "5ceb8dcc", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "NVTabular workflow above exports a schema file, schema.pbtxt, of our processed dataset. To learn more about the schema object, schema file and `tags`, you can explore [02-Merlin-Models-and-NVTabular-integration.ipynb](02-Merlin-Models-and-NVTabular-integration.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "be3a3421", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# define train and valid dataset objects\n", + "train = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"))\n", + "valid = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"))\n", + "\n", + "# define schema object\n", + "schema = train.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b164b7ff", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'click'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target_column = schema.select_by_tag(Tags.TARGET).column_names[0]\n", + "target_column" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "71847bb9", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "model = mm.DLRMModel(\n", + " schema,\n", + " embedding_dim=64,\n", + " bottom_block=mm.MLPBlock([128, 64]),\n", + " top_block=mm.MLPBlock([128, 64, 32]),\n", + " prediction_tasks=mm.BinaryOutput(target_column),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d009deb7", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:03:36.828993: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n", + "\t [[{{node Placeholder/_0}}]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1563/1563 [==============================] - ETA: 0s - loss: 0.6932 - auc: 0.4998 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:04:40.190967: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n", + "\t [[{{node Placeholder/_0}}]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1563/1563 [==============================] - 69s 38ms/step - loss: 0.6932 - auc: 0.4998 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.5000 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6932\n", + "CPU times: user 1min 51s, sys: 14.1 s, total: 2min 5s\n", + "Wall time: 1min 11s\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "model.compile(\"adam\", run_eagerly=False, metrics=[tf.keras.metrics.AUC()])\n", + "model.fit(train, validation_data=valid, batch_size=BATCH_SIZE)" + ] + }, + { + "cell_type": "markdown", + "id": "adc7051d", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Save model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f999a063", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, dense_9_layer_call_fn while saving (showing 5 of 96). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/dlrm/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/dlrm/assets\n" + ] + } + ], + "source": [ + "model.save(os.path.join(DATA_FOLDER, \"dlrm\"))" + ] + }, + { + "cell_type": "markdown", + "id": "2a9235b9", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "We have NVTabular wokflow and DLRM model exported, now it is time to move on to the next step: model deployment with [Merlin Systems](https://github.com/NVIDIA-Merlin/systems). " + ] + }, + { + "cell_type": "markdown", + "id": "c4f2667e", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Deploying the model with Merlin Systems" + ] + }, + { + "cell_type": "markdown", + "id": "ee302de0", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "The last step of machine learning (ML)/deep learning (DL) pipeline is to deploy the ETL workflow and saved model into production. In the production setting, we want to transform the input data as done during training (ETL). We need to apply the same mean/std for continuous features and use the same categorical mapping to convert the categories to continuous integer before we use the DL model for a prediction. Therefore, we deploy the NVTabular workflow with the Tensorflow model as an ensemble model to Triton Inference using [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) library very easily. The ensemble model guarantees that the same transformation is applied to the raw inputs.\n", + "\n", + "In the next steps, we will learn how to deploy NVTabular workflow and the trained DLRM model into [Triton Inference Server](https://github.com/triton-inference-server/server) with [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) library. NVIDIA Triton Inference Server (TIS) simplifies the deployment of AI models at scale in production. TIS provides a cloud and edge inferencing solution optimized for both CPUs and GPUs. It supports a number of different machine learning frameworks such as TensorFlow and PyTorch." + ] + }, + { + "cell_type": "markdown", + "id": "84002b14-5be5-4896-96ac-ea058bf8b7e3", + "metadata": {}, + "source": [ + "First, we load the `nvtabular.Workflow` that we created in with this [example](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/04-Exporting-ranking-models.ipynb). " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3e6b6cf0-2867-4cce-ade6-d0d86e2f7de7", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.workflow import Workflow\n", + "\n", + "workflow = Workflow.load(os.path.join(DATA_FOLDER, \"workflow\"))" + ] + }, + { + "cell_type": "markdown", + "id": "f206c105-0cd2-4710-af63-a8862307b67e", + "metadata": {}, + "source": [ + "After we load the workflow, we remove the label columns from it's inputs. This removes all columns with the TARGET tag from the workflow. We do this because we need to set the workflow to only require the features needed to predict, not train, when creating an inference pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "73729623-89af-442f-82ad-0ffad6e73fd3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from merlin.schema.tags import Tags\n", + "\n", + "label_columns = workflow.output_schema.select_by_tag(Tags.TARGET).column_names\n", + "workflow.remove_inputs(label_columns)" + ] + }, + { + "cell_type": "markdown", + "id": "09eddb76-971c-4a69-bfda-e1fe127b2582", + "metadata": {}, + "source": [ + "After loading the workflow, we load the model. This model was trained with the output of the workflow from the Exporting Ranking Models example from Merlin Models.\n", + "\n", + "First, we need to import the Merlin Models library. Loading a TensorFlow model, which is based on custom subclasses, requires to the subclass definition. Otherwise, TensorFlow cannot load correctly load the model." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ec339d34-3667-4fe2-a9f5-ecb770e5c9a3", + "metadata": {}, + "outputs": [], + "source": [ + "tf_model_path = os.path.join(DATA_FOLDER, \"dlrm\")\n", + "\n", + "model = tf.keras.models.load_model(tf_model_path)" + ] + }, + { + "cell_type": "markdown", + "id": "b93b911f-4c6b-427b-a1f9-888f0f8c7613", + "metadata": {}, + "source": [ + "### Create the Ensemble Graph" + ] + }, + { + "cell_type": "markdown", + "id": "98f74ec8-c4e7-49e7-ba86-eae2f8f0b744", + "metadata": {}, + "source": [ + "After we have both the model and the workflow loaded, we can create the ensemble graph. You create the graph. The goal is to illustrate the path of data through your full system. In this example we only serve a workflow with a model, but you can add other components that help you meet your business logic requirements.\n", + "\n", + "Because this example has two components—a model and a workflow—we require two operators. These operators, also known as inference operators, are meant to abstract away all the \"hard parts\" of loading a specific component, such as a workflow or model, into Triton Inference Server.\n", + "\n", + "The following code block shows how to use two inference operators:\n", + "\n", + "- **TransformWorkflow:**
\n", + " This operator ensures that the workflow is correctly saved and packaged with the required config so the server will know how to load it.\n", + "\n", + "- **PredictTensorflow:**
\n", + " This operator will do something similar with the model, loaded before.\n", + "\n", + "Let's give it a try." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "86a951b0-1d44-456e-9dc1-d77e98223248", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_2_layer_call_fn, model_context_2_layer_call_and_return_conditional_losses, prepare_list_features_2_layer_call_fn, prepare_list_features_2_layer_call_and_return_conditional_losses, dense_9_layer_call_fn while saving (showing 5 of 96). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpomjyo5xq/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpomjyo5xq/assets\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", + "\n", + "serving_operators = workflow.input_schema.column_names >> TransformWorkflow(workflow) >> PredictTensorflow(model)" + ] + }, + { + "cell_type": "markdown", + "id": "ef8324aa-756f-4df8-9c51-595e473b0ce5", + "metadata": {}, + "source": [ + "### Export Graph as Ensemble" + ] + }, + { + "cell_type": "markdown", + "id": "59a33309-4a82-41ac-8439-2a6aa95241af", + "metadata": {}, + "source": [ + "The last step is to create the ensemble artifacts that Triton Inference Server can consume. To make these artifacts, we import the Ensemble class. The class is responsible for interpreting the graph and exporting the correct files for the server.\n", + "\n", + "After you run the following cell, you'll see that we create a ColumnSchema for the expected inputs to the workflow. The workflow is a Schema.\n", + "\n", + "When you are creating an Ensemble object you supply the graph and a schema representing the starting input of the graph. the inputs to the ensemble graph are the inputs to the first operator of your graph.\n", + "\n", + "After you have created the Ensemble you export the graph, supplying an export path for the Ensemble.export function.\n", + "\n", + "This returns an ensemble config which represents the entire inference pipeline and a list of node-specific configs.\n", + "\n", + "Let's take a look below." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "4e305ed1-4c19-470c-82c0-0635f2d1d851", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.num_bucketsproperties.freq_thresholdproperties.max_sizeproperties.cat_pathproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.embedding_sizes.cardinalityproperties.embedding_sizes.dimension
0user_id(Tags.CATEGORICAL, Tags.ID, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_id77366
1item_id(Tags.CATEGORICAL, Tags.ITEM, Tags.ID)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_id79067
2item_category(Tags.CATEGORICAL, Tags.ITEM)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_category79067
3item_shop(Tags.CATEGORICAL, Tags.ITEM)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_shop79067
4item_brand(Tags.CATEGORICAL, Tags.ITEM)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_brand79067
5user_shops(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_shops77366
6user_profile(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...073user_profile7418
7user_group(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...013user_group1416
8user_gender(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...04user_gender516
9user_age(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...08user_age916
10user_consumption_2(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...05user_consumption_2616
11user_is_occupied(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...04user_is_occupied516
12user_geography(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...06user_geography716
13user_intentions(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_intentions77366
14user_brands(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_brands77366
15user_categories(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_categories77366
\n", + "
" + ], + "text/plain": [ + "[{'name': 'user_id', 'tags': {, , }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_id.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_id'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_id', 'tags': {, , }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_id.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_id'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_category', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_category.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_category'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_shop', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_shop.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_shop'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_brand', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_brand.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_brand'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_shops', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_shops.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_shops'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_profile', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_profile.parquet', 'domain': {'min': 0, 'max': 73, 'name': 'user_profile'}, 'embedding_sizes': {'cardinality': 74, 'dimension': 18}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_group', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_group.parquet', 'domain': {'min': 0, 'max': 13, 'name': 'user_group'}, 'embedding_sizes': {'cardinality': 14, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_gender', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_gender.parquet', 'domain': {'min': 0, 'max': 4, 'name': 'user_gender'}, 'embedding_sizes': {'cardinality': 5, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_age', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_age.parquet', 'domain': {'min': 0, 'max': 8, 'name': 'user_age'}, 'embedding_sizes': {'cardinality': 9, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_consumption_2', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_consumption_2.parquet', 'domain': {'min': 0, 'max': 5, 'name': 'user_consumption_2'}, 'embedding_sizes': {'cardinality': 6, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_is_occupied', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_is_occupied.parquet', 'domain': {'min': 0, 'max': 4, 'name': 'user_is_occupied'}, 'embedding_sizes': {'cardinality': 5, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_geography', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_geography.parquet', 'domain': {'min': 0, 'max': 6, 'name': 'user_geography'}, 'embedding_sizes': {'cardinality': 7, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_intentions', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_intentions.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_intentions'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_brands', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_brands.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_brands'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_categories', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_categories.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_categories'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "workflow.output_schema" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "440ce7a2-9a6d-47aa-9506-b7027ab0ffa9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_2_layer_call_fn, model_context_2_layer_call_and_return_conditional_losses, prepare_list_features_2_layer_call_fn, prepare_list_features_2_layer_call_and_return_conditional_losses, dense_9_layer_call_fn while saving (showing 5 of 96). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, workflow.input_schema)\n", + "\n", + "export_path = os.path.join(DATA_FOLDER, \"ensemble\")\n", + "\n", + "ens_conf, node_confs = ensemble.export(export_path)" + ] + }, + { + "cell_type": "markdown", + "id": "987010e0-fae5-48dc-9781-f68e1e0035f3", + "metadata": {}, + "source": [ + "Display the path to the directory with the ensemble." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "cfa8da6c-1f81-4ee2-b8d2-3dca75aeaedc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/workspace/data/ensemble\n" + ] + } + ], + "source": [ + "print(export_path)" + ] + }, + { + "cell_type": "markdown", + "id": "8d47dd90-39df-4c0b-849c-c06f11977512", + "metadata": {}, + "source": [ + "### Verification of Ensemble Artifacts" + ] + }, + { + "cell_type": "markdown", + "id": "6d09fb14-92d8-44d4-b727-91ecf0e2dd71", + "metadata": {}, + "source": [ + "After we export the ensemble, we can check the export path for the graph's artifacts. The directory structure represents an ordering number followed by an operator identifier such as `0_transformworkflow`, `1_predicttensorflow`, and so on.\n", + "\n", + "Inside each of those directories, the export method writes a config.pbtxt file and a directory with a number. The number indicates the version and begins at 1. The artifacts for each operator are found inside the version folder. These artifacts vary depending on the operator in use.\n", + "\n", + "Install the seedir python package so we can view some of the directory contents." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "be28d294-f9f2-4086-9d79-5bd9d93c603a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ensemble/\n", + "├─0_transformworkflowtriton/\n", + "│ ├─1/\n", + "│ │ ├─model.py\n", + "│ │ └─workflow/\n", + "│ └─config.pbtxt\n", + "├─1_predicttensorflowtriton/\n", + "│ ├─1/\n", + "│ │ └─model.savedmodel/\n", + "│ └─config.pbtxt\n", + "└─executor_model/\n", + " ├─1/\n", + " │ ├─ensemble/\n", + " │ └─model.py\n", + " └─config.pbtxt\n" + ] + } + ], + "source": [ + "sd.seedir(export_path, style='lines', itemlimit=10, depthlimit=3, exclude_folders='.ipynb_checkpoints', sort=True)" + ] + }, + { + "cell_type": "markdown", + "id": "9d0bc00b-3d39-4093-90d6-6bc4c84507f9", + "metadata": {}, + "source": [ + "### Starting Triton Server" + ] + }, + { + "cell_type": "markdown", + "id": "d42fb70f-a9b3-4240-b190-b6e68b0c9b88", + "metadata": {}, + "source": [ + "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in all the Merlin inference containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server documentation.\n", + "\n", + "You can start the server by running the following command:\n", + "```\n", + "tritonserver --model-repository=/workspace/data/ensemble\n", + "\n", + "For the --model-repository argument, specify the same value as the export_path that you specified previously in the ensemble.export method.\n", + "```\n", + "After you run the tritonserver command, wait until your terminal shows messages like the following example:\n", + "```\n", + "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000\n", + "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0c838873-ee04-45bb-9725-01105c66956b", + "metadata": {}, + "source": [ + "### Retrieving Recommendations from Triton Inference Server\n", + "Now that our server is running, we can send requests to it. This request is composed of values that correspond to the request schema that was created when we exported the ensemble graph.\n", + "\n", + "In the code below we create a request to send to triton and send it. We will then analyze the response, to show the full experience.\n", + "\n", + "First we need to ensure that we have a client connected to the server that we started. To do this, we use the Triton HTTP client library." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bc26afee-8853-4bb0-a027-68613248088c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "client created.\n" + ] + } + ], + "source": [ + "import tritonclient.http as client\n", + "\n", + "# Create a triton client\n", + "try:\n", + " triton_client = client.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", + " print(\"client created.\")\n", + "except Exception as e:\n", + " print(\"channel creation failed: \" + str(e))" + ] + }, + { + "cell_type": "markdown", + "id": "23fb2d2e-60f2-4406-82aa-f040403729a3", + "metadata": {}, + "source": [ + "After we create the client and verified it is connected to the server instance, we can communicate with the server and ensure all the models are loaded correctly." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b12afdf8-1e73-4eb3-9ea1-85dd4773a649", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GET /v2/health/live, headers None\n", + "\n", + "POST /v2/repository/index, headers None\n", + "\n", + "\n", + "bytearray(b'[{\"name\":\"0_transformworkflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"1_predicttensorflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"executor_model\",\"version\":\"1\",\"state\":\"READY\"}]')\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'name': '0_transformworkflowtriton', 'version': '1', 'state': 'READY'},\n", + " {'name': '1_predicttensorflowtriton', 'version': '1', 'state': 'READY'},\n", + " {'name': 'executor_model', 'version': '1', 'state': 'READY'}]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# ensure triton is in a good state\n", + "triton_client.is_server_live()\n", + "triton_client.get_model_repository_index()" + ] + }, + { + "cell_type": "markdown", + "id": "420d5603-568f-41f4-ac0d-c5852b9f33dd", + "metadata": {}, + "source": [ + "After verifying the models are correctly loaded by the server, we use some original, raw validation data and send it as an inference request to the server.\n", + "\n", + "The df_lib object is cudf if a GPU is available and pandas otherwise." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5bb6aec6-0a7e-41a7-b42d-0a378c530d28", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_iditem_iditem_categoryitem_shopitem_branduser_shopsuser_profileuser_groupuser_genderuser_ageuser_consumption_2user_is_occupieduser_geographyuser_intentionsuser_brandsuser_categories
__null_dask_index__
800000252685593620451670211111148483088
80000128134128509821879211111154493498
80000292423882557111111116227730
\n", + "
" + ], + "text/plain": [ + " user_id item_id item_category item_shop item_brand \\\n", + "__null_dask_index__ \n", + "800000 25 26 85 5936 2045 \n", + "800001 28 13 41 2850 982 \n", + "800002 9 2 4 238 82 \n", + "\n", + " user_shops user_profile user_group user_gender \\\n", + "__null_dask_index__ \n", + "800000 1670 2 1 1 \n", + "800001 1879 2 1 1 \n", + "800002 557 1 1 1 \n", + "\n", + " user_age user_consumption_2 user_is_occupied \\\n", + "__null_dask_index__ \n", + "800000 1 1 1 \n", + "800001 1 1 1 \n", + "800002 1 1 1 \n", + "\n", + " user_geography user_intentions user_brands \\\n", + "__null_dask_index__ \n", + "800000 1 484 830 \n", + "800001 1 544 934 \n", + "800002 1 162 277 \n", + "\n", + " user_categories \n", + "__null_dask_index__ \n", + "800000 88 \n", + "800001 98 \n", + "800002 30 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from merlin.core.dispatch import get_lib\n", + "\n", + "df_lib = get_lib()\n", + "\n", + "# read in data for request\n", + "batch = df_lib.read_parquet(\n", + " os.path.join(DATA_FOLDER,\"valid\", \"part.0.parquet\"), columns=workflow.input_schema.column_names\n", + ").head(3)\n", + "batch" + ] + }, + { + "cell_type": "markdown", + "id": "06b23899-dbb4-4701-bfa3-0d21da333159", + "metadata": {}, + "source": [ + "After we isolate our batch, we convert the dataframe representation into inputs for Triton. We also declare the outputs that we expect to receive from the model." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "080d84dc-9c09-4d94-8ced-8d160ca88f01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['click/binary_output']\n" + ] + } + ], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "import tritonclient.grpc as grpcclient\n", + "# create inputs and outputs\n", + "\n", + "inputs = convert_df_to_triton_input(workflow.input_schema, batch, grpcclient.InferInput)\n", + "\n", + "output_cols = ensemble.graph.output_schema.column_names\n", + "print(output_cols)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in output_cols\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "01208b4d-5478-48a3-9114-b904b2ca2167", + "metadata": {}, + "source": [ + "Now that our inputs and outputs are created, we can use the triton_client that we created earlier to send the inference request." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "95dea3b8-92aa-41f9-a1b4-2cb516c6b793", + "metadata": {}, + "outputs": [], + "source": [ + "# send request to tritonserver\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, request_id=\"1\", outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "id": "b921c299-df76-45ef-9acc-5b17bc52bd3a", + "metadata": {}, + "source": [ + "When the server completes the inference request, it returns a response, i.e. likelihood per request. This response is parsed to get the desired predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "b766ef55-5661-4268-aed9-6f4096cce58d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.5002032]\n", + " [0.5001995]\n", + " [0.5001995]]\n" + ] + } + ], + "source": [ + "predictions = response.as_numpy('click/binary_output')\n", + "print(predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "24ee5636-600a-4422-8165-f70e8e847031", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This sample notebook started with data preprocessing and model training. We learned how to create an ensemble graph, verify the ensemble artifacts in the file system, and then put the ensemble into production with Triton Inference Server. Finally, we sent a simple inference request to the server and printed the response." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "merlin": { + "containers": [ + "nvcr.io/nvidia/merlin/merlin-tensorflow:latest" + ] + }, + "vscode": { + "interpreter": { + "hash": "a398807c5c2ed8e5ff9d9890488d007fa99cbabcec733962e21659a28c5da99b" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb b/examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb new file mode 100644 index 000000000..d645a172f --- /dev/null +++ b/examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb @@ -0,0 +1,488 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5cdba80f", + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2022 NVIDIA Corporation. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================\n", + "\n", + "# Each user is responsible for checking the content of datasets and the\n", + "# applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "77acbcad", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Serving an Implicit Model with Merlin Systems\n", + "\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow) container. This Jupyter notebook example demonstrates how to deploy an `Implicit` model to Triton Inference Server (TIS) and generate prediction results for a given query.\n", + "\n", + "## Overview\n", + "\n", + "NVIDIA Merlin is an open source framework that accelerates and scales end-to-end recommender system pipelines. The Merlin framework is broken up into several sub components, these include: Merlin-Core, Merlin-Models, NVTabular and Merlin-Systems. Merlin Systems will be the focus of this example.\n", + "\n", + "The purpose of the Merlin Systems library is to make it easy for Merlin users to quickly deploy their recommender systems from development to [Triton Inference Server](https://github.com/triton-inference-server/server). We extended the same user-friendly API users are accustomed to in NVTabular and leverage it to accommodate deploying recommender system components to TIS. \n", + "\n", + "### Learning objectives\n", + "\n", + "In this notebook, we learn how to deploy an NVTabular Workflow and a trained `Implicit` model from Merlin Models to Triton.\n", + "- Create Ensemble Graph\n", + "- Export Ensemble Graph\n", + "- Run Triton server\n", + "- Send request to Triton and verify results\n", + "\n", + "### Dataset\n", + "\n", + "We use the [MovieLens 100k Dataset](https://grouplens.org/datasets/movielens/100k/). It consists of ratings a user has given a movie along with some metadata for the user and the movie. We train an Implicit model to predict the rating based on user and item features and proceed to deploy it to the Triton Inference Server.\n", + "\n", + "It is important to note that the steps taken in this notebook are generalized and can be applied to any set of workflows and models. \n", + "\n", + "### Tools\n", + "\n", + "- NVTabular\n", + "- Merlin Models\n", + "- Merlin Systems\n", + "- Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "6efad6b8", + "metadata": {}, + "source": [ + "## Prerequisite: Preparing the data and Training Implicit" + ] + }, + { + "cell_type": "markdown", + "id": "356ef8c9", + "metadata": {}, + "source": [ + "In this tutorial our objective is to demonstrate how to serve an `Implicit` model. In order for us to be able to do so, we begin by downloading data and training a model. We breeze through these activities below.\n", + "\n", + "If you would like to learn more about training an `Implicit` model using the Merlin Models library, please consult this [tutorial](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/07-Train-traditional-ML-models-using-the-Merlin-Models-API.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edea28d0", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import nvtabular as nvt\n", + "import numpy as np\n", + "from merlin.schema.tags import Tags\n", + "from merlin.models.implicit import BayesianPersonalizedRanking\n", + "\n", + "from merlin.datasets.entertainment import get_movielens\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b756a12f", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble_export_path = os.environ.get(\"OUTPUT_DATA_DIR\", \"ensemble\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c10a993", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "train, _ = get_movielens(variant='ml-100k')\n", + "\n", + "# the implicit model expects a `user_id` column hence the need to rename it\n", + "train = nvt.Dataset(train.compute().rename(columns = {'userId': 'user_id'}))\n", + "\n", + "user_id = ['user_id'] >> nvt.ops.Categorify() >> nvt.ops.TagAsUserID()\n", + "movieId = ['movieId'] >> nvt.ops.Categorify() >> nvt.ops.TagAsItemID()\n", + "\n", + "train_workflow = nvt.Workflow(user_id + movieId)\n", + "train_transformed = train_workflow.fit_transform(train)" + ] + }, + { + "cell_type": "markdown", + "id": "ff168b4a", + "metadata": {}, + "source": [ + "Having preprocessed our data, let's train our model." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d0b55be5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2022-09-05 09:32:07.681291: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-09-05 09:32:07.681740: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-09-05 09:32:07.681877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", + " warnings.warn(\n", + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 777.52it/s, train_auc=85.42%, skipped=29.68%]\n" + ] + } + ], + "source": [ + "model = BayesianPersonalizedRanking()\n", + "model.fit(train_transformed)" + ] + }, + { + "cell_type": "markdown", + "id": "f4a3cf39", + "metadata": {}, + "source": [ + "## Create the Ensemble Graph" + ] + }, + { + "cell_type": "markdown", + "id": "dc40083e", + "metadata": {}, + "source": [ + "Let us now define an `Ensemble` that will be used for serving predictions on the Triton Inference Server.\n", + "\n", + "An `Ensemble` defines operations to be performed on incoming requests. It begins with specifying what fields the inference request will contain.\n", + "\n", + "Our model was trained on data that included the `movieId` column. However, in production, this information will not be available to us, this is what we will be trying to predict.\n", + "\n", + "In general, you want to define a preprocessing workflow once and apply it throughout the lifecycle of your model, from training all the way to serving in production. Redefining the workflows on the go, or using custom written code for these operations, can be a source of subtle bugs.\n", + "\n", + "In order to ensure we process our data in the same way in production as we do in training, let us now modify the training preprocessing pipeline and use it to construct our inference workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fa8dc34a", + "metadata": {}, + "outputs": [], + "source": [ + "inf_workflow = train_workflow.remove_inputs(['movieId'])" + ] + }, + { + "cell_type": "markdown", + "id": "d71c5636", + "metadata": {}, + "source": [ + "Equipped with the modified data preprocessing workflow, let us define the full set of inference operations we will want to run on the Triton Inference Server.\n", + "\n", + "We begin by stating what data the server can expect (`inf_workflow.input_schema.column_names`). We proceed to wrap our `inf_workflow` in `TransformWorkflow` -- an operator we can leverage for executing our NVTabular workflow during serving.\n", + "\n", + "Last but not least, having received and preprocessed the data, we instruct the Triton Inference Server to perform inference using the model that we trained. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "de9e2237", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.dag.ops.implicit import PredictImplicit\n", + "from merlin.systems.dag.ensemble import Ensemble\n", + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "\n", + "inf_ops = inf_workflow.input_schema.column_names >> TransformWorkflow(inf_workflow) \\\n", + " >> PredictImplicit(model.implicit_model)" + ] + }, + { + "cell_type": "markdown", + "id": "76dad9c3", + "metadata": {}, + "source": [ + "With inference operations defined, all that remains now is outputting the ensemble to disk so that it can be loaded up when Triton starts." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e23a7fc3", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble = Ensemble(inf_ops, inf_workflow.input_schema)\n", + "ensemble.export(ensemble_export_path);" + ] + }, + { + "cell_type": "markdown", + "id": "c9165dfd", + "metadata": {}, + "source": [ + "## Starting the Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "353e8602", + "metadata": {}, + "source": [ + "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in Merlin Tensorflow and Merlin PyTorch containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation).\n", + "\n", + "You can start the server by running the following command:\n", + "\n", + "```shell\n", + "tritonserver --model-repository=ensemble\n", + "```\n", + "\n", + "For the `--model-repository` argument, specify the same value as the `export_path` that you specified previously in the `ensemble.export` method.\n", + "\n", + "After you run the `tritonserver` command, wait until your terminal shows messages like the following example:\n", + "\n", + "```shell\n", + "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000\n", + "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "65b7e4e8", + "metadata": {}, + "source": [ + "## Retrieving Recommendations from Triton Inference Server\n", + "\n", + "Now that our server is running, we can send requests to it. This request is composed of values that correspond to the request schema that was created when we exported the ensemble graph.\n", + "\n", + "In the code below we create a request to send to Triton and send it. We will then analyze the response, to show the full experience.\n", + "\n", + "We begin by obtaining 10 examples from our train data to include in the request." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2d61751b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id
06
115
270
386
496
5109
6143
7183
8609
9858
\n", + "
" + ], + "text/plain": [ + " user_id\n", + "0 6\n", + "1 15\n", + "2 70\n", + "3 86\n", + "4 96\n", + "5 109\n", + "6 143\n", + "7 183\n", + "8 609\n", + "9 858" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ten_examples = train.compute()['user_id'].unique().sample(10).sort_values().to_frame().reset_index(drop=True)\n", + "ten_examples" + ] + }, + { + "cell_type": "markdown", + "id": "7808bc12", + "metadata": {}, + "source": [ + "Let's now package the information up as inputs and send it to Triton for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2fefd5b8", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "import tritonclient.grpc as grpcclient\n", + "\n", + "inputs = convert_df_to_triton_input(inf_workflow.input_schema, ten_examples, grpcclient.InferInput)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in inf_ops.output_schema.column_names\n", + "]\n", + "# send request to tritonserver\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "id": "3dc7909f", + "metadata": {}, + "source": [ + "We can now compare the predictions from the server to those from our local model." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6ddd35cc", + "metadata": {}, + "outputs": [], + "source": [ + "predictions_from_triton = response.as_numpy(outputs[0].name())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6f28fdfe", + "metadata": {}, + "outputs": [], + "source": [ + "local_predictions = model.predict(inf_workflow.transform(nvt.Dataset(ten_examples)))[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e946de27", + "metadata": {}, + "outputs": [], + "source": [ + "np.testing.assert_allclose(predictions_from_triton, local_predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "d8aa4456", + "metadata": {}, + "source": [ + "We managed to preprocess the data in the same way in serving as we did during training and obtain the same predictions!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb b/examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb new file mode 100644 index 000000000..88cc2be8c --- /dev/null +++ b/examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb @@ -0,0 +1,545 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5cdba80f", + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2022 NVIDIA Corporation. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================\n", + "\n", + "# Each user is responsible for checking the content of datasets and the\n", + "# applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "77acbcad", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Serving an XGBoost Model with Merlin Systems\n", + "\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow) container. This Jupyter notebook example demonstrates how to deploy an `XGBoost` model to Triton Inference Server (TIS) and generate prediction results for a given query.\n", + "\n", + "## Overview\n", + "\n", + "NVIDIA Merlin is an open source framework that accelerates and scales end-to-end recommender system pipelines. The Merlin framework is broken up into several sub components, these include: Merlin-Core, Merlin-Models, NVTabular and Merlin-Systems. Merlin Systems will be the focus of this example.\n", + "\n", + "The purpose of the Merlin Systems library is to make it easy for Merlin users to quickly deploy their recommender systems from development to [Triton Inference Server](https://github.com/triton-inference-server/server). We extended the same user-friendly API users are accustomed to in NVTabular and leveraged it to accommodate deploying recommender system components to TIS. \n", + "\n", + "### Learning objectives\n", + "\n", + "In this notebook, we learn how to deploy a NVTabular Workflow and a trained XGBoost model from Merlin Models to Triton.\n", + "- Create Ensemble Graph\n", + "- Export Ensemble Graph\n", + "- Run Triton server\n", + "- Send request to Triton and verify results\n", + "\n", + "### Dataset\n", + "\n", + "We use the [MovieLens 100k Dataset](https://grouplens.org/datasets/movielens/100k/). It consists of ratings a user has given a movie along with some metadata for the user and the movie. We train an XGBoost model to predict the rating based on user and item features and proceed to deploy it to the Triton Inference Server.\n", + "\n", + "It is important to note that the steps take in this notebook are generalized and can be applied to any set of workflow and models. \n", + "\n", + "### Tools\n", + "\n", + "- NVTabular\n", + "- Merlin Models\n", + "- Merlin Systems\n", + "- Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "6efad6b8", + "metadata": {}, + "source": [ + "## Prerequisite: Preparing the data and Training XGBoost" + ] + }, + { + "cell_type": "markdown", + "id": "356ef8c9", + "metadata": {}, + "source": [ + "In this tutorial our objective is to demonstrate how to serve an `XGBoost` model. In order for us to be able to do so, we begin by downloading data and training a model. We breeze through these activities below.\n", + "\n", + "If you would like to learn more about training an `XGBoost` model using the Merlin Models library, please consult this [tutorial](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/07-Train-an-xgboost-model-using-the-Merlin-Models-API.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0385d38", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.core.utils import Distributed\n", + "from merlin.models.xgb import XGBoost\n", + "import nvtabular as nvt\n", + "import numpy as np\n", + "from merlin.schema.tags import Tags\n", + "\n", + "from merlin.datasets.entertainment import get_movielens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f79d5736", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble_export_path = os.environ.get(\"OUTPUT_DATA_DIR\", \"ensemble\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0a2d3208", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-08-05 22:27:29.446602: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-08-05 22:27:29.447091: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-08-05 22:27:29.447227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "downloading ml-100k.zip: 4.94MB [00:03, 1.45MB/s] \n", + "unzipping files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 262.32files/s]\n", + "INFO:merlin.datasets.entertainment.movielens.dataset:starting ETL..\n", + "/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", + " warnings.warn(\n", + "2022-08-05 22:27:39,947 - distributed.diskutils - INFO - Found stale lock file and directory '/workspace/dask-worker-space/worker-oqemvhkv', purging\n", + "2022-08-05 22:27:39,947 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n", + "[22:27:41] task [xgboost.dask]:tcp://127.0.0.1:41809 got new rank 0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0]\ttrain-rmse:2.36952\n", + "[20]\ttrain-rmse:0.95316\n", + "[40]\ttrain-rmse:0.92447\n", + "[60]\ttrain-rmse:0.90741\n", + "[80]\ttrain-rmse:0.89437\n", + "[84]\ttrain-rmse:0.89138\n" + ] + } + ], + "source": [ + "\n", + "train, _ = get_movielens(variant='ml-100k')\n", + "\n", + "preprocess_categories = ['movieId', 'userId', 'genres'] >> nvt.ops.Categorify(freq_threshold=2, dtype=np.int32)\n", + "preprocess_rating = ['rating'] >> nvt.ops.AddTags(tags=[Tags.TARGET, Tags.REGRESSION])\n", + "\n", + "train_workflow = nvt.Workflow(preprocess_categories + preprocess_rating + train.schema.without(['rating_binary', 'title']).column_names)\n", + "train_transformed = train_workflow.fit_transform(train)\n", + "\n", + "with Distributed():\n", + " model = XGBoost(schema=train_transformed.schema)\n", + " model.fit(\n", + " train_transformed,\n", + " num_boost_round=85,\n", + " verbose_eval=20\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f4a3cf39", + "metadata": {}, + "source": [ + "## Create the Ensemble Graph" + ] + }, + { + "cell_type": "markdown", + "id": "dc40083e", + "metadata": {}, + "source": [ + "Let us now define an `Ensemble` that will be used for serving predictions on the Triton Inference Server.\n", + "\n", + "An `Ensemble` defines operations to be performed on incoming requests. It begins with specifying what fields that the inference request will contain.\n", + "\n", + "Our model was trained on data that included the `target` column. However, in production, this information will not be available to us.\n", + "\n", + "In general, you want to define a preprocessing workflow once and apply it throughout the lifecycle of your model, from training all the way to serving in production. Redefining the workflows on the go, or using custom written code for these operations, can be a source of subtle bugs.\n", + "\n", + "In order to ensure we process our data in the same way in production as we do in training, let us now modify the training preprocessing pipeline and use it to construct our inference workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fa8dc34a", + "metadata": {}, + "outputs": [], + "source": [ + "inf_workflow = train_workflow.remove_inputs(['rating'])" + ] + }, + { + "cell_type": "markdown", + "id": "d71c5636", + "metadata": {}, + "source": [ + "Equipped with the modified data preprocessing workflow, let us define the full set of inference operations we will want to run on the Triton Inference Server.\n", + "\n", + "We begin by stating what data the server can expect (`inf_workflow.input_schema.column_names`). We proceed to wrap our `inf_workflow` in `TransformWorkflow` -- an operator we can leverage for executing our NVTabular workflow during serving.\n", + "\n", + "Last but not least, having received and preprocessed the data, we instruct the Triton Inference Server to perform inference using the model that we trained. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "de9e2237", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.dag.ops.fil import PredictForest\n", + "from merlin.systems.dag.ensemble import Ensemble\n", + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "\n", + "inf_ops = inf_workflow.input_schema.column_names >> TransformWorkflow(inf_workflow) \\\n", + " >> PredictForest(model, inf_workflow.output_schema)" + ] + }, + { + "cell_type": "markdown", + "id": "76dad9c3", + "metadata": {}, + "source": [ + "With inference operations defined, all that remains now is outputting the ensemble to disk so that it can be loaded up when Triton starts." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e23a7fc3", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble = Ensemble(inf_ops, inf_workflow.input_schema)\n", + "ensemble.export(ensemble_export_path);" + ] + }, + { + "cell_type": "markdown", + "id": "c9165dfd", + "metadata": {}, + "source": [ + "## Starting the Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "353e8602", + "metadata": {}, + "source": [ + "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in Merlin Tensorflow and Merlin PyTorch containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation).\n", + "\n", + "You can start the server by running the following command:\n", + "\n", + "```shell\n", + "tritonserver --model-repository=ensemble\n", + "```\n", + "\n", + "For the `--model-repository` argument, specify the same value as the `export_path` that you specified previously in the `ensemble.export` method.\n", + "\n", + "After you run the `tritonserver` command, wait until your terminal shows messages like the following example:\n", + "\n", + "```shell\n", + "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000\n", + "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "65b7e4e8", + "metadata": {}, + "source": [ + "## Retrieving Recommendations from Triton Inference Server\n", + "\n", + "Now that our server is running, we can send requests to it. This request is composed of values that correspond to the request schema that was created when we exported the ensemble graph.\n", + "\n", + "In the code below we create a request to send to Triton and send it. We will then analyze the response, to show the full experience.\n", + "\n", + "We begin by obtaining 10 examples from our train data to include in the request." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2d61751b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movieIduserIdgenresTE_movieId_ratinguserId_countgenderzip_coderatingrating_binaryagetitle
0777430.7798765.572154177511Toy Story (1995)
12317713-0.8966195.572154177301GoldenEye (1995)
23667717-0.9546325.572154177411Four Rooms (1995)
3967789-0.0938095.572154177301Get Shorty (1995)
43837725-0.5393765.572154177301Copycat (1995)
\n", + "
" + ], + "text/plain": [ + " movieId userId genres TE_movieId_rating userId_count gender zip_code \\\n", + "0 7 77 43 0.779876 5.572154 1 77 \n", + "1 231 77 13 -0.896619 5.572154 1 77 \n", + "2 366 77 17 -0.954632 5.572154 1 77 \n", + "3 96 77 89 -0.093809 5.572154 1 77 \n", + "4 383 77 25 -0.539376 5.572154 1 77 \n", + "\n", + " rating rating_binary age title \n", + "0 5 1 1 Toy Story (1995) \n", + "1 3 0 1 GoldenEye (1995) \n", + "2 4 1 1 Four Rooms (1995) \n", + "3 3 0 1 Get Shorty (1995) \n", + "4 3 0 1 Copycat (1995) " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ten_examples = train.compute()\n", + "ten_examples.head()" + ] + }, + { + "cell_type": "markdown", + "id": "7808bc12", + "metadata": {}, + "source": [ + "Let's now package the information up as inputs and send it to Triton for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2fefd5b8", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "import tritonclient.grpc as grpcclient\n", + "\n", + "ten_examples = train.compute().drop(columns=['rating', 'title', 'rating_binary'])[:10]\n", + "inputs = convert_df_to_triton_input(inf_workflow.input_schema, ten_examples, grpcclient.InferInput)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in inf_ops.output_schema.column_names\n", + "]\n", + "# send request to tritonserver\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "id": "3dc7909f", + "metadata": {}, + "source": [ + "We can now compare the predictions from the server to those from our local model." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6ddd35cc", + "metadata": {}, + "outputs": [], + "source": [ + "predictions_from_triton = response.as_numpy(outputs[0].name())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6f28fdfe", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/distributed/node.py:180: UserWarning: Port 8787 is already in use.\n", + "Perhaps you already have a cluster running?\n", + "Hosting the HTTP server on port 35647 instead\n", + " warnings.warn(\n", + "2022-08-05 22:28:22,197 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n" + ] + } + ], + "source": [ + "with Distributed():\n", + " local_predictions = model.predict(train_transformed)[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e946de27", + "metadata": {}, + "outputs": [], + "source": [ + "assert np.allclose(predictions_from_triton, local_predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "d8aa4456", + "metadata": {}, + "source": [ + "We managed to preprocess the data in the same way in serving as we did during training and obtain the same predictions!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/integration/examples/quick_start/test_preproc.py b/tests/integration/examples/quick_start/test_preproc.py new file mode 100644 index 000000000..73204271d --- /dev/null +++ b/tests/integration/examples/quick_start/test_preproc.py @@ -0,0 +1,292 @@ +import os +import tempfile + +import cudf +import numpy as np +import pytest +from examples.quick_start.scripts.preproc.preprocessing import PreprocessingRunner +from merlin.schema import Tags +from merlin.schema.io.tensorflow_metadata import TensorflowMetadata + +STANDARD_CI_TENREC_DATA_PATH = "/raid/data/tenrec_ci/" + + +def kwargs_to_cli_ags(**kwargs): + cli_args = [] + for k, v in kwargs.items(): + cli_args.append(f"--{k}") + if v is not None: + cli_args.append(str(v)) + args = PreprocessingRunner.parse_cli_args(cli_args) + return args + + +@pytest.fixture +def tenrec_data_path(): + data_path = os.getenv("CI_TENREC_DATA_PATH", STANDARD_CI_TENREC_DATA_PATH) + return data_path + + +def get_schema_from_path(path): + tf_metadata = TensorflowMetadata.from_proto_text_file(str(path)) + schema = tf_metadata.to_merlin_schema() + return schema + + +def check_schema(path, categ_cols_max_values=None): + schema = get_schema_from_path(path) + assert set(schema.column_names) == set( + [ + "user_id", + "item_id", + "video_category", + "gender", + "age", + "click", + "follow", + "like", + "share", + "watching_times", + "TE_user_id_follow", + "TE_item_id_follow", + "TE_user_id_click", + "TE_item_id_click", + ] + ) + + assert set(schema.select_by_tag(Tags.USER_ID).column_names) == set(["user_id"]) + assert set(schema.select_by_tag(Tags.ITEM_ID).column_names) == set(["item_id"]) + assert set(schema.select_by_tag(Tags.CATEGORICAL).column_names) == set( + ["user_id", "item_id", "video_category", "gender"] + ) + target_encoding_feats = [ + "TE_user_id_click", + "TE_user_id_follow", + "TE_item_id_click", + "TE_item_id_follow", + ] + assert set(schema.select_by_tag(Tags.CONTINUOUS).column_names) == set( + ["age"] + target_encoding_feats + ) + assert set(schema.select_by_tag(Tags.BINARY_CLASSIFICATION).column_names) == set( + ["click", "follow", "like", "share"] + ) + assert set(schema.select_by_tag(Tags.REGRESSION).column_names) == set( + ["watching_times"] + ) + assert set(schema.select_by_tag(Tags.TARGET).column_names) == set( + ["click", "follow", "like", "share", "watching_times"] + ) + + if categ_cols_max_values: + categ_features = schema.select_by_tag(Tags.CATEGORICAL).column_names + for col in categ_features: + assert schema[col].int_domain.max == categ_cols_max_values[col] + + return schema + + +@pytest.mark.parametrize("use_dask_cluster", [True, False]) +def test_ranking_preprocessing(tenrec_data_path, use_dask_cluster): + with tempfile.TemporaryDirectory() as tmp_output_folder: + additional_kwargs = {} + if use_dask_cluster: + additional_kwargs["enable_dask_cuda_cluster"] = None + additional_kwargs["persist_intermediate_files"] = None + + args = kwargs_to_cli_ags( + data_path=os.path.join(tenrec_data_path, "raw/QK-video-10M.csv"), + input_data_format="csv", + csv_na_values="\\N", + output_path=tmp_output_folder, + categorical_features="user_id,item_id,video_category,gender", + continuous_features="age", + target_encoding_features="user_id,item_id", + target_encoding_targets="click,follow", + binary_classif_targets="click,follow,like,share", + regression_targets="watching_times", + to_int32="user_id,item_id", + to_int16="watching_times", + to_int8="gender,age,video_category,click,follow,like,share", + user_id_feature="user_id", + item_id_feature="item_id", + **additional_kwargs, + ) + runner = PreprocessingRunner(args) + runner.run() + + expected_max_values = { + "user_id": 296088, + "item_id": 617033, + "video_category": 2, + "gender": 3, + "click": 1, + "follow": 1, + "like": 1, + "share": 1, + "watching_times": 528, + } + + schema = check_schema( + os.path.join(tmp_output_folder, "train/"), expected_max_values + ) + + expected_dtypes = { + "user_id": np.dtype("int64"), + "item_id": np.dtype("int64"), + "video_category": np.dtype("int64"), + "gender": np.dtype("int64"), + "age": np.dtype("float64"), + "TE_user_id_click": np.dtype("float32"), + "TE_item_id_click": np.dtype("float32"), + "TE_user_id_follow": np.dtype("float32"), + "TE_item_id_follow": np.dtype("float32"), + "click": np.dtype("int8"), + "follow": np.dtype("int8"), + "like": np.dtype("int8"), + "share": np.dtype("int8"), + "watching_times": np.dtype("int16"), + } + + train_df = cudf.read_parquet(os.path.join(tmp_output_folder, "train/*.parquet")) + assert not train_df.isna().max().max() # Check if there are null values + assert len(train_df) == 10000000 # row count + + assert train_df.dtypes.to_dict() == expected_dtypes + + categ_features = schema.select_by_tag(Tags.CATEGORICAL).column_names + target_features = schema.select_by_tag(Tags.TARGET).column_names + assert ( + train_df[categ_features + target_features].max().to_dict() + == expected_max_values + ) + + # Checking age standardization + assert 0.0 == pytest.approx(train_df["age"].mean(), abs=1e-3) + assert 1.0 == pytest.approx(train_df["age"].std(), abs=1e-3) + + # Check target encoding features + te_features = [ + "TE_user_id_follow", + "TE_item_id_follow", + "TE_user_id_click", + "TE_item_id_click", + ] + assert ( + train_df[te_features].min().min() >= 0 + and train_df[te_features].max().max() <= 1 + ) + + +@pytest.mark.parametrize("split_strategy", ["random", "random_by_user", "temporal"]) +def test_ranking_preprocessing_split_strategies(tenrec_data_path, split_strategy): + with tempfile.TemporaryDirectory() as tmp_output_folder: + additional_kwargs = {} + if split_strategy in ["random", "random_by_user"]: + additional_kwargs["random_split_eval_perc"] = 0.2 + elif split_strategy == "temporal": + additional_kwargs["timestamp_feature"] = "item_id" + additional_kwargs["dataset_split_temporal_timestamp"] = 15000 + + args = kwargs_to_cli_ags( + data_path=os.path.join(tenrec_data_path, "raw/QK-video-10M.csv"), + input_data_format="csv", + csv_na_values="\\N", + output_path=tmp_output_folder, + categorical_features="user_id,item_id,video_category,gender,age", + binary_classif_targets="click,follow,like,share", + regression_targets="watching_times", + to_int32="user_id,item_id", + to_int16="watching_times", + to_int8="gender,age,video_category,click,follow,like,share", + user_id_feature="user_id", + item_id_feature="item_id", + dataset_split_strategy=split_strategy, + **additional_kwargs, + ) + runner = PreprocessingRunner(args) + runner.run() + + total_rows = 10000000 + + train_df = cudf.read_parquet(os.path.join(tmp_output_folder, "train/*.parquet")) + rows_train = len(train_df) + + eval_df = cudf.read_parquet(os.path.join(tmp_output_folder, "eval/*.parquet")) + rows_eval = len(eval_df) + + assert rows_train + rows_eval == total_rows + + if split_strategy in ["random", "random_by_user"]: + assert 0.20 == pytest.approx(rows_eval / float(total_rows), abs=0.02) + + if split_strategy == "random_by_user": + assert train_df["user_id"].nunique() == pytest.approx( + eval_df["user_id"].nunique(), rel=0.05 + ) + + elif split_strategy == "temporal": + assert rows_train == 4636381 + assert rows_eval == 5363619 + + +def test_ranking_preprocessing_filter_strategies(tenrec_data_path): + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + data_path=os.path.join(tenrec_data_path, "raw/QK-video-10M.csv"), + input_data_format="csv", + csv_na_values="\\N", + output_path=tmp_output_folder, + categorical_features="user_id,item_id,video_category,gender,age", + binary_classif_targets="click,follow,like,share", + regression_targets="watching_times", + to_int32="user_id,item_id", + to_int16="watching_times", + to_int8="gender,age,video_category,click,follow,like,share", + user_id_feature="user_id", + item_id_feature="item_id", + filter_query="click==1 or (click==0 and follow==0 and like==0 and share==0)", + min_item_freq=5, + min_user_freq=5, + max_user_freq=200, + num_max_rounds_filtering=5, + ) + runner = PreprocessingRunner(args) + runner.run() + + total_rows = 9102904 + + train_df = cudf.read_parquet(os.path.join(tmp_output_folder, "train/*.parquet")) + assert len(train_df) == total_rows + + assert train_df.groupby("item_id").size().min() >= 5 + assert train_df.groupby("user_id").size().min() >= 5 + + +def test_ranking_preprocessing_freq_capping(tenrec_data_path): + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + data_path=os.path.join(tenrec_data_path, "raw/QK-video-10M.csv"), + input_data_format="csv", + csv_na_values="\\N", + output_path=tmp_output_folder, + categorical_features="user_id,item_id,video_category,gender,age", + binary_classif_targets="click,follow,like,share", + regression_targets="watching_times", + to_int32="user_id,item_id", + to_int16="watching_times", + to_int8="gender,age,video_category,click,follow,like,share", + user_id_feature="user_id", + item_id_feature="item_id", + categ_min_freq_capping=30, + ) + runner = PreprocessingRunner(args) + runner.run() + + total_rows = 10000000 + + train_df = cudf.read_parquet(os.path.join(tmp_output_folder, "train/*.parquet")) + assert len(train_df) == total_rows + + assert train_df.groupby("item_id").size().min() >= 30 + assert train_df.groupby("user_id").size().min() >= 30 diff --git a/tests/integration/examples/quick_start/test_ranking.py b/tests/integration/examples/quick_start/test_ranking.py new file mode 100644 index 000000000..7953e4254 --- /dev/null +++ b/tests/integration/examples/quick_start/test_ranking.py @@ -0,0 +1,502 @@ +import os +import tempfile +import time + +import pytest +from examples.quick_start.scripts.ranking.ranking import RankingTrainEvalRunner +from merlin.io.dataset import Dataset + +STANDARD_CI_TENREC_DATA_PATH = "/raid/data/tenrec_ci/" + + +@pytest.fixture +def tenrec_data_path(): + data_path = os.getenv("CI_TENREC_DATA_PATH", STANDARD_CI_TENREC_DATA_PATH) + return data_path + + +def kwargs_to_cli_ags(**kwargs): + cli_args = [] + for k, v in kwargs.items(): + cli_args.append(f"--{k}") + if v is not None: + cli_args.append(str(v)) + args = RankingTrainEvalRunner.parse_cli_args(cli_args) + return args + + +def get_datasets(path): + train_ds = Dataset(os.path.join(path, "preproc/train/*.parquet"), part_size="500MB") + eval_ds = Dataset(os.path.join(path, "preproc/eval/*.parquet"), part_size="500MB") + return train_ds, eval_ds + + +def test_ranking_single_task_mlp(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click", + stl_positive_class_weight=3, + model="mlp", + mlp_layers="64,32", + embedding_sizes_multiplier=6, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + ["loss", "auc", "prauc", "logloss", "regularization_loss", "loss_batch"] + ) + + assert metrics["loss"] < 0.7 + assert metrics["logloss"] < 0.7 + assert metrics["auc"] > 0.75 + assert metrics["prauc"] > 0.60 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] < 0.8 + # assert elapsed_time < 60 # 23s in a V100 + + +def test_ranking_single_task_dlrm(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click", + stl_positive_class_weight=3, + model="dlrm", + embeddings_dim=64, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + mlp_layers="64,32", + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + ["loss", "auc", "prauc", "logloss", "regularization_loss", "loss_batch"] + ) + + assert metrics["loss"] < 0.7 + assert metrics["logloss"] < 0.7 + assert metrics["auc"] > 0.75 + assert metrics["prauc"] > 0.60 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] < 0.8 + # assert elapsed_time < 60 + + +def test_ranking_single_task_dcn(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click", + stl_positive_class_weight=3, + model="dcn", + dcn_interacted_layer_num=5, + mlp_layers="64,32", + embedding_sizes_multiplier=6, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + ["loss", "auc", "prauc", "logloss", "regularization_loss", "loss_batch"] + ) + + assert metrics["loss"] < 0.7 + assert metrics["logloss"] < 0.7 + assert metrics["auc"] > 0.75 + assert metrics["prauc"] > 0.60 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] < 0.8 + # assert elapsed_time < 60 + + +def test_ranking_single_task_wide_n_deep(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click", + stl_positive_class_weight=3, + model="wide_n_deep", + wnd_hashed_cross_num_bins=5000, + wnd_ignore_combinations="item_id:video_category,user_id:gender,user_id:age", + wnd_wide_l2_reg=1e-5, + mlp_layers="64,32", + embedding_sizes_multiplier=6, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + ["loss", "auc", "prauc", "logloss", "regularization_loss", "loss_batch"] + ) + + assert metrics["loss"] < 0.7 + assert metrics["logloss"] < 0.7 + assert metrics["auc"] > 0.75 + assert metrics["prauc"] > 0.60 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] < 0.8 + # assert elapsed_time < 60 + + +def test_ranking_single_task_deepfm(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click", + stl_positive_class_weight=3, + model="deepfm", + mlp_layers="64,32", + embedding_sizes_multiplier=6, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + ["loss", "auc", "prauc", "logloss", "regularization_loss", "loss_batch"] + ) + + assert metrics["loss"] < 0.7 + assert metrics["logloss"] < 0.7 + assert metrics["auc"] > 0.75 + assert metrics["prauc"] > 0.60 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] < 0.8 + # assert elapsed_time < 120 + + +def test_ranking_multi_task_dlrm(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click,follow,watching_times", + mtl_pos_class_weight_click=1, + mtl_pos_class_weight_like=2, + mtl_loss_weight_click=1, + mtl_loss_weight_follow=2, + mtl_loss_weight_watching_times=5, + use_task_towers=True, + tower_layers=64, + model="dlrm", + embeddings_dim=64, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + mlp_layers="64,32", + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + [ + "loss", + "click/binary_output_loss", + "follow/binary_output_loss", + "click/binary_output/auc", + "click/binary_output/prauc", + "click/binary_output/logloss", + "follow/binary_output/auc", + "follow/binary_output/prauc", + "follow/binary_output/logloss", + "watching_times/regression_output_loss", + "watching_times/regression_output/root_mean_squared_error", + "regularization_loss", + "loss_batch", + ] + ) + + assert metrics["loss"] < 4 + assert metrics["click/binary_output_loss"] < 0.7 + assert metrics["follow/binary_output_loss"] < 0.1 + assert metrics["watching_times/regression_output_loss"] < 0.6 + assert metrics["click/binary_output/auc"] > 0.65 + assert metrics["click/binary_output/prauc"] > 0.5 + assert metrics["click/binary_output/logloss"] < 0.65 + assert metrics["follow/binary_output/auc"] > 0.35 + assert metrics["follow/binary_output/prauc"] > 0 + assert metrics["follow/binary_output/logloss"] < 0.1 + assert metrics["watching_times/regression_output/root_mean_squared_error"] < 0.8 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] > 0.0 + # assert elapsed_time < 60 + + +def test_ranking_multi_task_mmoe(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click,follow,watching_times", + mtl_pos_class_weight_click=1, + mtl_pos_class_weight_like=2, + mtl_loss_weight_click=1, + mtl_loss_weight_follow=2, + mtl_loss_weight_watching_times=5, + use_task_towers=True, + tower_layers=64, + model="mmoe", + mmoe_num_mlp_experts=4, + embedding_sizes_multiplier=5, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + mlp_layers="64,32", + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + [ + "loss", + "click/binary_output_loss", + "follow/binary_output_loss", + "click/binary_output/auc", + "click/binary_output/prauc", + "click/binary_output/logloss", + "follow/binary_output/auc", + "follow/binary_output/prauc", + "follow/binary_output/logloss", + "watching_times/regression_output_loss", + "watching_times/regression_output/root_mean_squared_error", + "regularization_loss", + "loss_batch", + "gate_click/binary_output_weight_0", + "gate_click/binary_output_weight_1", + "gate_click/binary_output_weight_2", + "gate_click/binary_output_weight_3", + "gate_follow/binary_output_weight_0", + "gate_follow/binary_output_weight_1", + "gate_follow/binary_output_weight_2", + "gate_follow/binary_output_weight_3", + "gate_watching_times/regression_output_weight_0", + "gate_watching_times/regression_output_weight_1", + "gate_watching_times/regression_output_weight_2", + "gate_watching_times/regression_output_weight_3", + ] + ) + + assert metrics["loss"] < 4 + assert metrics["click/binary_output_loss"] < 0.7 + assert metrics["follow/binary_output_loss"] < 0.1 + assert metrics["watching_times/regression_output_loss"] < 0.6 + assert metrics["click/binary_output/auc"] > 0.65 + assert metrics["click/binary_output/prauc"] > 0.5 + assert metrics["click/binary_output/logloss"] < 0.65 + assert metrics["follow/binary_output/auc"] > 0.35 + assert metrics["follow/binary_output/prauc"] > 0 + assert metrics["follow/binary_output/logloss"] < 0.1 + assert metrics["watching_times/regression_output/root_mean_squared_error"] < 0.8 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] > 0.0 + # assert elapsed_time < 60 + + +def test_ranking_multi_task_ple(tenrec_data_path): + args = kwargs_to_cli_ags() + train_ds, eval_ds = get_datasets(tenrec_data_path) + + with tempfile.TemporaryDirectory() as tmp_output_folder: + args = kwargs_to_cli_ags( + output_path=tmp_output_folder, + tasks="click,follow,watching_times", + mtl_pos_class_weight_click=1, + mtl_pos_class_weight_like=2, + mtl_loss_weight_click=1, + mtl_loss_weight_follow=2, + mtl_loss_weight_watching_times=5, + ple_num_layers=2, + use_task_towers=True, + tower_layers=64, + model="ple", + cgc_num_shared_experts=3, + cgc_num_task_experts=1, + embedding_sizes_multiplier=5, + l2_reg=1e-6, + embeddings_l2_reg=1e-8, + dropout=0.05, + mlp_layers="64,32", + lr=1e-4, + lr_decay_rate=0.99, + lr_decay_steps=100, + train_batch_size=8192, + eval_batch_size=8192, + epoch=3, + ) + + runner = RankingTrainEvalRunner(args, train_ds, train_ds, None, logger=None) + + current_time = time.time() + + metrics = runner.run() + + elapsed_time = time.time() - current_time + + assert set(metrics.keys()) == set( + [ + "loss", + "click/binary_output_loss", + "follow/binary_output_loss", + "click/binary_output/auc", + "click/binary_output/prauc", + "click/binary_output/logloss", + "follow/binary_output/auc", + "follow/binary_output/prauc", + "follow/binary_output/logloss", + "watching_times/regression_output_loss", + "watching_times/regression_output/root_mean_squared_error", + "regularization_loss", + "loss_batch", + "gate_click/binary_output_weight_0", + "gate_click/binary_output_weight_1", + "gate_click/binary_output_weight_2", + "gate_click/binary_output_weight_3", + "gate_follow/binary_output_weight_0", + "gate_follow/binary_output_weight_1", + "gate_follow/binary_output_weight_2", + "gate_follow/binary_output_weight_3", + "shared_gate_weight_0", + "shared_gate_weight_1", + "shared_gate_weight_2", + "shared_gate_weight_3", + "shared_gate_weight_4", + "shared_gate_weight_5", + "gate_watching_times/regression_output_weight_0", + "gate_watching_times/regression_output_weight_1", + "gate_watching_times/regression_output_weight_2", + "gate_watching_times/regression_output_weight_3", + ] + ) + + assert metrics["loss"] < 4 + assert metrics["click/binary_output_loss"] < 0.7 + assert metrics["follow/binary_output_loss"] < 0.1 + assert metrics["watching_times/regression_output_loss"] < 0.6 + assert metrics["click/binary_output/auc"] > 0.65 + assert metrics["click/binary_output/prauc"] > 0.5 + assert metrics["click/binary_output/logloss"] < 0.65 + assert metrics["follow/binary_output/auc"] > 0.35 + assert metrics["follow/binary_output/prauc"] > 0 + assert metrics["follow/binary_output/logloss"] < 0.1 + assert metrics["watching_times/regression_output/root_mean_squared_error"] < 0.8 + assert metrics["regularization_loss"] > 0.0 + assert metrics["loss_batch"] > 0.0 + # assert elapsed_time < 60 diff --git a/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py b/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py index be647c654..8e3733811 100644 --- a/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py +++ b/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py @@ -2,7 +2,6 @@ import pytest from testbook import testbook - from tests.conftest import REPO_ROOT pytest.importorskip("tensorflow") diff --git a/tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py b/tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py new file mode 100644 index 000000000..540c79c93 --- /dev/null +++ b/tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py @@ -0,0 +1,59 @@ +import shutil + +import pytest +from testbook import testbook + +from merlin.systems.triton.utils import run_triton_server +from merlin.core.compat import cudf +from tests.conftest import REPO_ROOT + +pytest.importorskip("implicit") +pytest.importorskip("merlin.models") + + +if cudf: + + _TRAIN_ON_GPU = [True, False] +else: + _TRAIN_ON_GPU = [False] + +TRITON_SERVER_PATH = shutil.which("tritonserver") + + +@pytest.mark.notebook +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") +@pytest.mark.parametrize("gpu", _TRAIN_ON_GPU) +def test_example_serving_implicit(gpu, tmpdir): + with testbook( + REPO_ROOT / "examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb", + execute=False, + timeout=180, + ) as tb: + tb.inject( + f""" + import os + os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}/ensemble" + os.environ["USE_GPU"] = "{int(gpu)}" + from unittest.mock import patch + from merlin.datasets.synthetic import generate_data + mock_train, mock_valid = generate_data( + input="movielens-100k", + num_rows=1000, + set_sizes=(0.8, 0.2) + ) + p1 = patch( + "merlin.datasets.entertainment.get_movielens", + return_value=[mock_train, mock_valid] + ) + p1.start() + """, + pop=True, + ) + + tb.execute_cell(list(range(0, 18))) + + with run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001): + tb.execute_cell(list(range(18, len(tb.cells) - 2))) + pft = tb.ref("predictions_from_triton") + lp = tb.ref("local_predictions") + assert pft.shape == lp.shape diff --git a/tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py b/tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py new file mode 100644 index 000000000..684a6f8a3 --- /dev/null +++ b/tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py @@ -0,0 +1,50 @@ +import shutil + +import pytest +from testbook import testbook + +from merlin.systems.triton.utils import run_triton_server +from tests.conftest import REPO_ROOT + +pytest.importorskip("tensorflow") +pytest.importorskip("merlin.models") +pytest.importorskip("xgboost") + +TRITON_SERVER_PATH = shutil.which("tritonserver") + + +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") +@pytest.mark.notebook +def test_example_serving_xgboost(tmpdir): + with testbook( + REPO_ROOT / "examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb", + execute=False, + timeout=180, + ) as tb: + tb.inject( + f""" + import os + os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}/ensemble" + from unittest.mock import patch + from merlin.datasets.synthetic import generate_data + mock_train, mock_valid = generate_data( + input="movielens-100k", + num_rows=1000, + set_sizes=(0.8, 0.2) + ) + p1 = patch( + "merlin.datasets.entertainment.get_movielens", + return_value=[mock_train, mock_valid] + ) + p1.start() + """ + ) + NUM_OF_CELLS = len(tb.cells) + + tb.execute_cell(list(range(0, 14))) + + with run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001): + tb.execute_cell(list(range(14, NUM_OF_CELLS - 1))) + pft = tb.ref("predictions_from_triton") + lp = tb.ref("local_predictions") + assert pft.shape == lp.shape diff --git a/tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py b/tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py new file mode 100644 index 000000000..086d6c80c --- /dev/null +++ b/tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py @@ -0,0 +1,47 @@ +import os +import shutil + +import pytest +from testbook import testbook + +from merlin.systems.triton.utils import run_triton_server + +from tests.conftest import REPO_ROOT + +pytest.importorskip("cudf") +pytest.importorskip("tensorflow") +pytest.importorskip("merlin.models") + +TRITON_SERVER_PATH = shutil.which("tritonserver") + + +@pytest.mark.notebook +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") +def test_serving_ranking_models(tmp_path): + with testbook( + REPO_ROOT / "examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb", + execute=False, + timeout=180, + ) as tb: + tb.inject( + f""" + import os + os.environ["DATA_FOLDER"] = "{tmp_path}" + os.environ["NUM_ROWS"] = "2000" + """ + ) + NUM_OF_CELLS = len(tb.cells) + print("num_cells:", NUM_OF_CELLS) + tb.execute_cell(list(range(0, NUM_OF_CELLS - 12))) + assert os.path.isdir(f"{tmp_path}/dlrm") + assert os.path.isdir(f"{tmp_path}/ensemble") + assert os.listdir(f"{tmp_path}/ensemble") + assert os.path.isdir(f"{tmp_path}/workflow") + + with run_triton_server(f"{tmp_path}/ensemble", grpc_port=8001): + tb.execute_cell(list(range(50, NUM_OF_CELLS - 1))) + + preds = tb.ref("predictions") + assert len(preds) == 3 + + From edbd126359fa914d100624280aa5e141af3ca587 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Wed, 5 Jul 2023 15:21:00 +0000 Subject: [PATCH 10/13] Allow cross-compiling on x86 + NVIDIA Grace (ARM64). --- docker/dockerfile.ctr | 9 +------ docker/dockerfile.merlin | 56 ++++++++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/docker/dockerfile.ctr b/docker/dockerfile.ctr index f9e767438..f79b5ed85 100644 --- a/docker/dockerfile.ctr +++ b/docker/dockerfile.ctr @@ -49,7 +49,7 @@ ENV HCOLL_ENABLE_MCAST=0 # link sub modules expected by hugectr cmake RUN ln -s /usr/lib/libcudf.so /usr/lib/libcudf_base.so RUN ln -s /usr/lib/libcudf.so /usr/lib/libcudf_io.so -RUN ln -s /usr/lib/x86_64-linux-gnu/libibverbs.so.1 /usr/lib/x86_64-linux-gnu/libibverbs.so +RUN ln -s libibverbs.so.1 $(find /usr/lib/*-linux-gnu/libibverbs.so.1 | sed -e 's/\.1$//g') # Install HugeCTR ARG HUGECTR_HOME=/usr/local/hugectr @@ -77,13 +77,6 @@ RUN if [[ "${HUGECTR_DEV_MODE}" == "false" ]]; then \ mv /hugectr/ci ~/hugectr-ci && rm -rf /hugectr && mkdir -p /hugectr && mv ~/hugectr-ci /hugectr/ci \ ; fi - -ENV PATH=$PATH:${HUGECTR_HOME}/bin \ - CPATH=$CPATH:${HUGECTR_HOME}/include \ - LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HUGECTR_HOME}/lib \ - PYTHONPATH=${PYTHONPATH}:${HUGECTR_HOME}/lib - - ARG _HUGECTR_BACKEND_REPO="github.com/triton-inference-server/hugectr_backend.git" ARG TRITON_VERSION # Install Triton inference backend. diff --git a/docker/dockerfile.merlin b/docker/dockerfile.merlin index 07a50b7e5..150ee9c0f 100644 --- a/docker/dockerfile.merlin +++ b/docker/dockerfile.merlin @@ -11,6 +11,9 @@ FROM ${DLFW_IMAGE} as dlfw FROM ${BASE_IMAGE} as build # Args +ARG TARGETOS +ARG TARGETARCH + ARG DASK_VER=2023.1.1 ARG MERLIN_VER=main ARG CORE_VER=main @@ -38,12 +41,13 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extra ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin # Set up NVIDIA package repository -RUN apt clean && apt update -y --fix-missing && \ +RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \ + apt clean && apt update -y --fix-missing && \ apt install -y --no-install-recommends software-properties-common && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/cuda-ubuntu2204.pin && \ mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/3bf863cc.pub && \ + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/ /" && \ apt install -y --no-install-recommends \ autoconf \ automake \ @@ -95,10 +99,11 @@ RUN pip install --no-cache-dir --upgrade pip; pip install --no-cache-dir "cmake< cachetools graphviz nvtx scipy "scikit-learn<1.2" \ tritonclient[all] grpcio-channelz fiddle wandb npy-append-array \ git+https://github.com/rapidsai/asvdb.git@main \ - xgboost==1.6.2 lightgbm treelite==2.4.0 treelite_runtime==2.4.0 \ + xgboost==1.6.2 lightgbm \ lightfm implicit \ numba "cuda-python>=11.5,<12.0" fsspec==2022.5.0 llvmlite \ pynvml==11.4.1 +RUN pip install --no-cache-dir treelite==2.4.0 treelite_runtime==2.4.0 RUN pip install --no-cache-dir numpy==1.22.4 protobuf==3.20.3 onnx onnxruntime pycuda RUN pip install --no-cache-dir dask==${DASK_VER} distributed==${DASK_VER} dask[dataframe]==${DASK_VER} RUN pip install --no-cache-dir onnx_graphsurgeon --index-url https://pypi.ngc.nvidia.com @@ -113,7 +118,8 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/ -COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil backends/fil/ +# NOTE 2023-07: fil-backend is not available on ARM. +COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/ COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/. ENV PATH=/opt/tritonserver/bin:${PATH}: @@ -139,6 +145,10 @@ CMD ["/bin/bash"] FROM ${BASE_IMAGE} as base +# Args +ARG TARGETOS +ARG TARGETARCH + # Envs ENV CUDA_HOME=/usr/local/cuda ENV CUDA_PATH=$CUDA_HOME @@ -148,12 +158,13 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extra ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin # Set up NVIDIA package repository -RUN apt update -y --fix-missing && \ +RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \ + apt update -y --fix-missing && \ apt install -y --no-install-recommends software-properties-common && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/cuda-ubuntu2204.pin && \ mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/3bf863cc.pub && \ + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/ /" && \ apt install -y --no-install-recommends \ ca-certificates \ clang-format \ @@ -196,9 +207,12 @@ RUN apt update -y --fix-missing && \ # Required to run Hadoop. openssh-server \ # [ HugeCTR ] - libaio-dev \ + libaio-dev && \ + # NOTE: libnvinfer is installed anyway, just Python bindings are missing on ARM. + if [[ "$TARGETARCH" != "arm64" ]]; then \ # TensorRT dependencies - python3-libnvinfer && \ + apt install -y --no-install-recommends python3-libnvinfer \ + ; fi && \ apt autoremove -y && \ apt clean && \ rm -rf /var/lib/apt/lists/* @@ -222,24 +236,28 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/bin bin/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/ -COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/python/ -COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil backends/fil/ -COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/tensorrt/ +COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/ +# NOTE 2023-07: fil-backend is not available on ARM. +COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/ +COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/ COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/. -COPY --chown=1000:1000 --from=triton /usr/lib/x86_64-linux-gnu/libdcgm.so.2 /usr/lib/x86_64-linux-gnu/libdcgm.so.2 -COPY --chown=1000:1000 --from=triton /usr/local/cuda-12.1/targets/x86_64-linux/lib/libcupti.so.12 /usr/local/cuda-12.1/targets/x86_64-linux/lib/libcupti.so.12 +COPY --chown=1000:1000 --from=triton /usr/lib/*-linux-gnu/libdcgm.so.2 /tmp +RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "aarch64" || echo "x86_64") && \ + mv /tmp/libdcgm.so.2 /usr/lib/${ARCH}-linux-gnu/libdcgm.so.2 && \ + chmod 644 /usr/lib/${ARCH}-linux-gnu/libdcgm.so.2 && \ + ln -s libdcgm.so.2 /usr/lib/${ARCH}-linux-gnu/libdcgm.so ENV PATH=/opt/tritonserver/bin:${PATH}: ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/tritonserver/lib +# python --version | sed -e 's/[A-Za-z ]*//g' | awk -F'.' '{print $1"."$2}' ENV PYTHON_VERSION=3.10 # Python Packages COPY --chown=1000:1000 --from=build /usr/local/lib/python${PYTHON_VERSION}/dist-packages /usr/local/lib/python${PYTHON_VERSION}/dist-packages/ ENV PYTHONPATH=$PYTHONPATH:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/ - # rapids components from the DLFW image COPY --chown=1000:1000 --from=dlfw /usr/lib/libcudf* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libarrow* /usr/lib/ @@ -256,8 +274,6 @@ COPY --chown=1000:1000 --from=dlfw /usr/include/arrow /usr/include/arrow/ COPY --chown=1000:1000 --from=dlfw /usr/include/cudf /usr/include/cudf/ # ptx compiler required by cubinlinker -COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a /usr/local/cuda-12.1/targets/x86_64-linux/lib/libnvptxcompiler_static.a -COPY --chown=1000:1000 --from=dlfw /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h /usr/local/cuda-12.1/targets/x86_64-linux/include/nvPTXCompiler.h RUN git clone https://github.com/rapidsai/ptxcompiler.git /ptx && cd /ptx/ && python setup.py develop; COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm From 3b0b3757bc027690374beb3bcd85f5af10058867 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Fri, 7 Jul 2023 13:46:35 +0000 Subject: [PATCH 11/13] Reverse two wrongful changes. --- docker/dockerfile.merlin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/dockerfile.merlin b/docker/dockerfile.merlin index 150ee9c0f..0d1669735 100644 --- a/docker/dockerfile.merlin +++ b/docker/dockerfile.merlin @@ -236,10 +236,10 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/bin bin/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/ COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/ -COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/ +COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/python/ # NOTE 2023-07: fil-backend is not available on ARM. COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/ -COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/ +COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/tensorrt/ COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/. COPY --chown=1000:1000 --from=triton /usr/lib/*-linux-gnu/libdcgm.so.2 /tmp RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "aarch64" || echo "x86_64") && \ From 838248f9e66ebc3e956795ce806041df4cf8f0ca Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Fri, 7 Jul 2023 13:47:07 +0000 Subject: [PATCH 12/13] Update TF dockerfile for x86 + Grace/ARM64 cross compile. --- docker/dockerfile.tf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docker/dockerfile.tf b/docker/dockerfile.tf index 0dafdff29..d69576691 100644 --- a/docker/dockerfile.tf +++ b/docker/dockerfile.tf @@ -41,19 +41,19 @@ ARG _HUGECTR_REPO="github.com/NVIDIA-Merlin/HugeCTR.git" ARG _CI_JOB_TOKEN="" ARG HUGECTR_VER=main -ENV CPATH=$CPATH:${HUGECTR_HOME}/include \ - LD_LIBRARY_PATH=${HUGECTR_HOME}/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow:$LD_LIBRARY_PATH \ +ENV LD_LIBRARY_PATH=/usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow:$LD_LIBRARY_PATH \ LIBRARY_PATH=${HUGECTR_HOME}/lib:$LIBRARY_PATH \ SOK_COMPILE_UNIT_TEST=ON RUN mkdir -p /usr/local/nvidia/lib64 && \ - ln -s /usr/local/cuda/lib64/libcusolver.so /usr/local/nvidia/lib64/libcusolver.so.10 + ln -s /usr/local/cuda/lib64/libcusolver.so /usr/local/nvidia/lib64/libcusolver.so -RUN ln -s /usr/lib/x86_64-linux-gnu/libibverbs.so.1 /usr/lib/x86_64-linux-gnu/libibverbs.so +RUN ln -s libibverbs.so.1 $(find /usr/lib/*-linux-gnu/libibverbs.so.1 | sed -e 's/\.1$//g') # Install distributed-embeddings and sok ARG INSTALL_DISTRIBUTED_EMBEDDINGS=false -ARG TFDE_VER=v0.3 +ARG TFDE_VER=v23.03.00 + RUN if [ "$HUGECTR_DEV_MODE" == "false" ]; then \ git clone --branch ${HUGECTR_VER} --depth 1 --recurse-submodules --shallow-submodules https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \ pushd /hugectr && \ @@ -65,14 +65,14 @@ RUN if [ "$HUGECTR_DEV_MODE" == "false" ]; then \ # Install HPS TF plugin cd ../hps_tf && \ python setup.py install && \ - popd &&\ - mv /hugectr/ci ~/hugectr-ci && mv /hugectr/sparse_operation_kit ~/hugectr-sparse_operation_kit && \ + popd && \ + mv /hugectr/ci ~/hugectr-ci && mv /hugectr/sparse_operation_kit ~/hugectr-sparse_operation_kit && \ rm -rf /hugectr && mkdir -p /hugectr && \ - mv ~/hugectr-ci /hugectr/ci && mv ~/hugectr-sparse_operation_kit /hugectr/sparse_operation_kit; \ - fi && \ + mv ~/hugectr-ci /hugectr/ci && mv ~/hugectr-sparse_operation_kit /hugectr/sparse_operation_kit \ + ; fi && \ if [ "$INSTALL_DISTRIBUTED_EMBEDDINGS" == "true" ]; then \ git clone --branch ${TFDE_VER} --depth 1 https://github.com/NVIDIA-Merlin/distributed-embeddings.git /distributed_embeddings/ && \ cd /distributed_embeddings && git submodule update --init --recursive && \ - make pip_pkg && pip install --no-cache-dir artifacts/*.whl && make clean; \ - fi; + make pip_pkg && pip install --no-cache-dir artifacts/*.whl && make clean \ + ; fi From 6d4c453cc00a7254de20cc95728343b7243d8265 Mon Sep 17 00:00:00 2001 From: Matthias Langer Date: Thu, 13 Jul 2023 00:48:58 -0700 Subject: [PATCH 13/13] Just add an empty linefor symmetry reasons. --- docker/dockerfile.merlin | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/dockerfile.merlin b/docker/dockerfile.merlin index 0d1669735..f76c8528d 100644 --- a/docker/dockerfile.merlin +++ b/docker/dockerfile.merlin @@ -258,6 +258,7 @@ ENV PYTHON_VERSION=3.10 COPY --chown=1000:1000 --from=build /usr/local/lib/python${PYTHON_VERSION}/dist-packages /usr/local/lib/python${PYTHON_VERSION}/dist-packages/ ENV PYTHONPATH=$PYTHONPATH:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/ + # rapids components from the DLFW image COPY --chown=1000:1000 --from=dlfw /usr/lib/libcudf* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libarrow* /usr/lib/