triton-inference-server · rmccorm4 · Dec 9, 2024 · Dec 10, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/build.py b/build.py
@@ -565,7 +565,7 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
     elif be == "tensorflow":
         args = tensorflow_cmake_args(images, library_paths)
     elif be == "python":
-        args = []
+        args = python_cmake_args()
     elif be == "dali":
         args = dali_cmake_args()
     elif be == "pytorch":
@@ -631,6 +631,18 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
     return cargs
 
 
+def python_cmake_args():
+    cargs = []
+    if target_platform() == "rhel":
+        cargs.append(
+            cmake_backend_arg(
+                "python", "PYBIND11_PYTHON_VERSION", "STRING", FLAGS.rhel_py_version
+            )
+        )
+
+    return cargs
+
+
 def pytorch_cmake_args(images):
     if "pytorch" in images:
         image = images["pytorch"]
@@ -957,6 +969,10 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
             pkg-config \\
             unzip \\
             wget \\
+            ncurses-devel \\
+            readline-devel \\
+            xz-devel \\
+            bzip2-devel \\
             zlib-devel \\
             libarchive-devel \\
             libxml2-devel \\
@@ -1216,6 +1232,8 @@ def create_dockerfile_linux(
     find /opt/tritonserver/python -maxdepth 1 -type f -name \\
     "tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[all]
 
+RUN pip3 install -r python/openai/requirements.txt
+
 """
     if not FLAGS.no_core_build:
         # Add feature labels for SageMaker endpoint
@@ -1542,7 +1560,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
 
 
 def change_default_python_version_rhel(version):
-    df = """
+    df = f"""
 # The python library version available for install via 'yum install python3.X-devel' does not
 # match the version of python inside the RHEL base container. This means that python packages
 # installed within the container will not be picked up by the python backend stub process pybind
@@ -1551,21 +1569,17 @@ def change_default_python_version_rhel(version):
 RUN curl https://pyenv.run | bash
 ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
 RUN eval "$(pyenv init -)"
-RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {} \\
-    && cp ${{PYENV_ROOT}}/versions/{}/lib/libpython3* /usr/lib64/""".format(
-        version, version
-    )
-    df += """
+RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {version} \\
+    && cp ${{PYENV_ROOT}}/versions/{version}/lib/libpython3* /usr/lib64/
+
 # RHEL image has several python versions. It's important
 # to set the correct version, otherwise, packages that are
 # pip installed will not be found during testing.
-ENV PYVER={} PYTHONPATH=/opt/python/v
+ENV PYVER={version} PYTHONPATH=/opt/python/v
 RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
 ENV PYBIN=${{PYTHONPATH}}/bin
 ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
-""".format(
-        version
-    )
+"""
     return df
 
 
@@ -1918,6 +1932,9 @@ def core_build(
         os.path.join(install_dir, "include", "triton", "core"),
     )
 
+    cmake_script.cpdir(
+        os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
+    )
     cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
     cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)
 

diff --git a/qa/L0_backend_python/common.sh b/qa/L0_backend_python/common.sh
@@ -42,7 +42,7 @@ install_conda() {
   eval "$(./miniconda/bin/conda shell.bash hook)"
 }
 
-install_build_deps() {
+install_build_deps_apt() {
   apt update && apt install software-properties-common rapidjson-dev -y
   # Using CMAKE installation instruction from:: https://apt.kitware.com/
   apt update -q=2 \
@@ -54,6 +54,18 @@ install_build_deps() {
     && apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3*
 }
 
+install_build_deps_yum() {
+  yum install rapidjson-devel -y
+}
+
+install_build_deps() {
+  if [[ ${TRITON_RHEL} -eq "1" ]]; then
+    install_build_deps_yum
+  else
+    install_build_deps_apt
+  fi
+}
+
 create_conda_env() {
   local python_version=$1
   local env_name=$2
@@ -74,6 +86,6 @@ create_python_backend_stub() {
   rm -rf python_backend
   git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
   (cd python_backend/ && mkdir builddir && cd builddir && \
-  cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG ../ && \
+  cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG -DPYBIND11_PYTHON_VERSION=$PY_VERSION ../ && \
   make -j18 triton-python-backend-stub)
 }
diff --git a/qa/L0_backend_python/env/test.sh b/qa/L0_backend_python/env/test.sh
@@ -44,6 +44,7 @@ install_conda
 # Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
 # the Python model indicates that the environment has been setup correctly.
 # Create a model with python 3.7 version
+export PY_VERSION="3.7"
 create_conda_env "3.7" "python-3-7"
 conda install numpy=1.20.1 -y
 conda install tensorflow=2.1.0 -y
@@ -67,6 +68,7 @@ conda deactivate
 # previous test.
 # Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
 # the Python model indicates that the environment has been setup correctly.
+export PY_VERSION="3.7.1"
 path_to_conda_pack="$PWD/python-3-7-1"
 create_conda_env_with_specified_path "3.7" $path_to_conda_pack
 conda install numpy=1.20.3 -y
@@ -89,6 +91,7 @@ conda deactivate
 # Create a model with python 3.6 version
 # Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
 # the Python model indicates that the environment has been setup correctly.
+export PY_VERSION="3.6"
 create_conda_env "3.6" "python-3-6"
 conda install -c conda-forge libstdcxx-ng=14 -y
 conda install numpy=1.18.1 -y
@@ -116,9 +119,13 @@ conda deactivate
 path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_12_environment.tar.gz'
 create_conda_env "3.12" "python-3-12"
 conda install -c conda-forge libstdcxx-ng=14 -y
+TF_VERSION="2.16.2"
 conda install numpy=1.26.4 -y
-conda install tensorflow=2.16.2 -y
-PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is 2.16.2"
+if [ $TRITON_RHEL -eq 1 ]; then
+    TF_VERSION="2.17.0"
+fi
+conda install tensorflow=${TF_VERSION} -y
+PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is ${TF_VERSION}"
 conda pack -o python3.12.tar.gz
 mkdir -p models/python_3_12/1/
 cp ../../python_models/python_version/config.pbtxt ./models/python_3_12
@@ -137,8 +144,7 @@ if [ "$SERVER_PID" == "0" ]; then
     exit 1
 fi
 
-kill $SERVER_PID
-wait $SERVER_PID
+kill_server
 
 set +e
 for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY37_VERSION_STRING" "$PY37_1_VERSION_STRING" "$PY312_VERSION_STRING"; do
@@ -154,6 +160,15 @@ done
 # NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
 #       stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
 export LC_ALL=INVALID
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill_server
+
 grep "Locale is (None, None)" $SERVER_LOG
     if [ $? -ne 0 ]; then
         cat $SERVER_LOG
@@ -175,8 +190,7 @@ if [ "$SERVER_PID" == "0" ]; then
     exit 1
 fi
 
-kill $SERVER_PID
-wait $SERVER_PID
+kill_server
 
 set +e
 grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG
@@ -207,8 +221,7 @@ touch -m models/python_3_12/python_3_12_environment.tar.gz
 # The environment should be re-extracted
 curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load
 
-kill $SERVER_PID
-wait $SERVER_PID
+kill_server
 
 set +e
 
@@ -248,6 +261,8 @@ rm -rf models/python_3_7
 aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
 
 rm $SERVER_LOG
+# Occasionally needs more time to load
+SERVER_TIMEOUT=420
 
 SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
 run_server
@@ -258,8 +273,7 @@ if [ "$SERVER_PID" == "0" ]; then
     exit 1
 fi
 
-kill $SERVER_PID
-wait $SERVER_PID
+kill_server
 
 set +e
 grep "$PY36_VERSION_STRING" $SERVER_LOG
@@ -292,8 +306,7 @@ if [ "$SERVER_PID" == "0" ]; then
     exit 1
 fi
 
-kill $SERVER_PID
-wait $SERVER_PID
+kill_server
 
 set +e
 for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do

diff --git a/qa/L0_openai/test.sh b/qa/L0_openai/test.sh
@@ -29,11 +29,10 @@
 
 function install_deps() {
     # Install python bindings for tritonserver and tritonfrontend
-    pip install /opt/tritonserver/python/triton*.whl
+    # pip install /opt/tritonserver/python/triton*.whl
 
     # Install application/testing requirements
     pushd openai/
-    pip install -r requirements.txt
     pip install -r requirements-test.txt
 
     if [ "${IMAGE_KIND}" == "TRTLLM" ]; then
@@ -49,13 +48,17 @@ function prepare_vllm() {
 }
 
 function prepare_tensorrtllm() {
+    # FIXME: Remove this when pre-installing deps in TRTLLM container
+    pip install -r requirements.txt
+
     MODEL="llama-3-8b-instruct"
     MODEL_REPO="tests/tensorrtllm_models"
     rm -rf ${MODEL_REPO}
 
-    # FIXME: This will require an upgrade each release to match the TRT-LLM version
+    # FIXME: This may require an upgrade each release to match the TRT-LLM version,
+    # so it's likely easier to user trtllm-build directly.
     # Use Triton CLI to prepare model repository for testing
-    pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.10
+    pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.1
     # NOTE: Could use ENGINE_DEST_PATH set to NFS mount for pre-built engines in future
     triton import \
         --model ${MODEL}  \