Not using TRTLLM script for postbuild installation

triton-inference-server · Oct 11, 2023 · 8d26937 · 8d26937
1 parent 3df0787
commit 8d26937
Showing 1 changed file with 17 additions and 14 deletions.
diff --git a/build.py b/build.py
@@ -1327,21 +1327,24 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
 RUN apt-get remove --purge -y tensorrt* libnvinfer*
 RUN pip uninstall -y tensorrt
 
-# Install new version of TRT using the script from TRT-LLM
-RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
-RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
-RUN cd tensorrtllm_backend && git submodule update --init --recursive
-RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
-RUN rm -fr tensorrtllm_backend
-    """.format(
-            backends[be],
-            os.environ["REMOVE_ME_TRTLLM_USERNAME"],
-            os.environ["REMOVE_ME_TRTLLM_TOKEN"],
-        )
+# Download & install internal TRT release
+ARG NEW_TRT_VERSION="9.1.0.3"
+ARG NEW_CUDA_VERSION="12.2"
+
+RUN ARCH="$(uname -i)" && \
+    if [ "$ARCH" = "arm64" ];then ARCH="aarch64";fi && \
+    if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi && \
+    if [ "$ARCH" = "x86_64" ];then DIR_NAME="x64-agnostic"; else DIR_NAME=${ARCH};fi && \
+    if [ "$ARCH" = "aarch64" ];then OS1="Ubuntu22_04" && OS2="Ubuntu-22.04"; else OS1="Linux" && OS2="Linux";fi && \
+    RELEASE_URL_TRT=http://cuda-repo.nvidia.com/release-candidates/Libraries/TensorRT/v9.1/${NEW_TRT_VERSION}-04908a66/${NEW_CUDA_VERSION}-r535/${OS1}-${DIR_NAME}/tar/TensorRT-${NEW_TRT_VERSION}.${OS2}.${ARCH}-gnu.cuda-${NEW_CUDA_VERSION}.tar.gz && \
+    wget ${RELEASE_URL_TRT} -O /workspace/TensorRT.tar && \
+    tar -xf /workspace/TensorRT.tar -C /usr/local/ && \
+    mv /usr/local/TensorRT-${NEW_TRT_VERSION} /usr/local/tensorrt && \
+    pip install /usr/local/tensorrt/python/tensorrt-*-cp310-*.whl && \
+    rm -rf /workspace/TensorRT.tar
 
-        df += """
-RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
 ENV TRT_ROOT=/usr/local/tensorrt
+ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:$LD_LIBRARY_PATH
 
 # Remove TRT contents that are not needed in runtime
 RUN ARCH="$(uname -i)" && \
@@ -1355,7 +1358,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     fi
 RUN pip cache purge
 
-ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH=/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """
 
     if "vllm" in backends: