Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TRT-LLM backend build to Triton #6365

Merged
merged 11 commits into from
Oct 4, 2023
80 changes: 78 additions & 2 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@
"2023.0.0", # ORT OpenVINO
"2023.0.0", # Standalone OpenVINO
"2.4.7", # DCGM version
"py310_23.1.0-1", # Conda version.
"py310_23.1.0-1", # Conda version
"9.1.0.1", # TRT version for building TRT-LLM backend
"12.2", # CUDA version for building TRT-LLM backend
)
}

Expand Down Expand Up @@ -564,6 +566,8 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
args = fastertransformer_cmake_args()
elif be == "tensorrt":
args = tensorrt_cmake_args()
elif be == "tensorrtllm":
args = tensorrtllm_cmake_args(images)
else:
args = []

Expand Down Expand Up @@ -859,6 +863,39 @@ def fastertransformer_cmake_args():
return cargs


def tensorrtllm_cmake_args(images):
cargs = [
cmake_backend_arg(
"tensorrtllm",
"TRT_LIB_DIR",
None,
"${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib",
),
cmake_backend_arg(
"tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include"
),
cmake_backend_arg(
"tensorrtllm",
"TRTLLM_BUILD_CONTAINER",
None,
images["base"],
),
cmake_backend_arg(
"tensorrtllm",
"TENSORRT_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][7],
),
cmake_backend_arg(
"tensorrtllm",
"CUDA_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][8],
),
]
return cargs


def install_dcgm_libraries(dcgm_version, target_machine):
if dcgm_version == "":
fail(
Expand Down Expand Up @@ -1278,6 +1315,27 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
pip3 install --upgrade numpy && \
rm -rf /var/lib/apt/lists/*
"""
# Add dependencies needed for tensorrtllm backend
if "tensorrtllm" in backends:
be = "tensorrtllm"
import importlib.util

import requests

# FIXME: Update the url
url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
backends[be]
)

response = requests.get(url)
spec = importlib.util.spec_from_loader(
"trtllm_buildscript", loader=None, origin=url
)
trtllm_buildscript = importlib.util.module_from_spec(spec)
exec(response.content, trtllm_buildscript.__dict__)
df += trtllm_buildscript.create_postbuild(
argmap["TRT_LLM_TRT_VERSION"], argmap["TRT_LLM_CUDA_VERSION"]
)

df += """
WORKDIR /opt/tritonserver
Expand Down Expand Up @@ -1441,6 +1499,8 @@ def create_build_dockerfiles(
if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP
else TRITON_VERSION_MAP[FLAGS.version][6],
}
dockerfileargmap["TRT_LLM_TRT_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][7]
dockerfileargmap["TRT_LLM_CUDA_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][8]

# For CPU-only image we need to copy some cuda libraries and dependencies
# since we are using PyTorch and TensorFlow containers that
Expand Down Expand Up @@ -1726,6 +1786,12 @@ def core_build(
cmake_script.blankln()


def tensorrtllm_prebuild(cmake_script):
# Export the TRT_ROOT environment variable
cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
krishung5 marked this conversation as resolved.
Show resolved Hide resolved
cmake_script.cmd("export ARCH=$(uname -m)")


def backend_build(
be,
cmake_script,
Expand All @@ -1746,7 +1812,16 @@ def backend_build(
cmake_script.comment()
cmake_script.mkdir(build_dir)
cmake_script.cwd(build_dir)
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
# FIXME: Use GitHub repo
if be == "tensorrtllm":
cmake_script.gitclone(
tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
backend_repo(be), tag, be, "https://gitlab-master.nvidia.com/krish"
)
else:
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)

if be == "tensorrtllm":
tensorrtllm_prebuild(cmake_script)

cmake_script.mkdir(repo_build_dir)
cmake_script.cwd(repo_build_dir)
Expand All @@ -1757,6 +1832,7 @@ def backend_build(

cmake_script.mkdir(os.path.join(install_dir, "backends"))
cmake_script.rmdir(os.path.join(install_dir, "backends", be))

cmake_script.cpdir(
os.path.join(repo_install_dir, "backends", be),
os.path.join(install_dir, "backends"),
Expand Down
Loading