Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add OpenAI-Compatible Server to NGC Container #7894

Closed
wants to merge 10 commits into from
Closed
39 changes: 28 additions & 11 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
elif be == "tensorflow":
args = tensorflow_cmake_args(images, library_paths)
elif be == "python":
args = []
args = python_cmake_args()
elif be == "dali":
args = dali_cmake_args()
elif be == "pytorch":
Expand Down Expand Up @@ -631,6 +631,18 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
return cargs


def python_cmake_args():
cargs = []
if target_platform() == "rhel":
cargs.append(
cmake_backend_arg(
"python", "PYBIND11_PYTHON_VERSION", "STRING", FLAGS.rhel_py_version
)
)

return cargs


def pytorch_cmake_args(images):
if "pytorch" in images:
image = images["pytorch"]
Expand Down Expand Up @@ -957,6 +969,10 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
pkg-config \\
unzip \\
wget \\
ncurses-devel \\
readline-devel \\
xz-devel \\
bzip2-devel \\
zlib-devel \\
libarchive-devel \\
libxml2-devel \\
Expand Down Expand Up @@ -1216,6 +1232,8 @@ def create_dockerfile_linux(
find /opt/tritonserver/python -maxdepth 1 -type f -name \\
"tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[all]

RUN pip3 install -r python/openai/requirements.txt

"""
if not FLAGS.no_core_build:
# Add feature labels for SageMaker endpoint
Expand Down Expand Up @@ -1542,7 +1560,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):


def change_default_python_version_rhel(version):
df = """
df = f"""
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
Expand All @@ -1551,21 +1569,17 @@ def change_default_python_version_rhel(version):
RUN curl https://pyenv.run | bash
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
RUN eval "$(pyenv init -)"
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {} \\
&& cp ${{PYENV_ROOT}}/versions/{}/lib/libpython3* /usr/lib64/""".format(
version, version
)
df += """
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {version} \\
&& cp ${{PYENV_ROOT}}/versions/{version}/lib/libpython3* /usr/lib64/

# RHEL image has several python versions. It's important
# to set the correct version, otherwise, packages that are
# pip installed will not be found during testing.
ENV PYVER={} PYTHONPATH=/opt/python/v
ENV PYVER={version} PYTHONPATH=/opt/python/v
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
ENV PYBIN=${{PYTHONPATH}}/bin
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
""".format(
version
)
"""
return df


Expand Down Expand Up @@ -1918,6 +1932,9 @@ def core_build(
os.path.join(install_dir, "include", "triton", "core"),
)

cmake_script.cpdir(
os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
)
cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)

Expand Down
16 changes: 14 additions & 2 deletions qa/L0_backend_python/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ install_conda() {
eval "$(./miniconda/bin/conda shell.bash hook)"
}

install_build_deps() {
install_build_deps_apt() {
apt update && apt install software-properties-common rapidjson-dev -y
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update -q=2 \
Expand All @@ -54,6 +54,18 @@ install_build_deps() {
&& apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3*
}

install_build_deps_yum() {
yum install rapidjson-devel -y
}

install_build_deps() {
if [[ ${TRITON_RHEL} -eq "1" ]]; then
install_build_deps_yum
else
install_build_deps_apt
fi
}

create_conda_env() {
local python_version=$1
local env_name=$2
Expand All @@ -74,6 +86,6 @@ create_python_backend_stub() {
rm -rf python_backend
git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
(cd python_backend/ && mkdir builddir && cd builddir && \
cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG ../ && \
cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG -DPYBIND11_PYTHON_VERSION=$PY_VERSION ../ && \
make -j18 triton-python-backend-stub)
}
37 changes: 25 additions & 12 deletions qa/L0_backend_python/env/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ install_conda
# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
# the Python model indicates that the environment has been setup correctly.
# Create a model with python 3.7 version
export PY_VERSION="3.7"
create_conda_env "3.7" "python-3-7"
conda install numpy=1.20.1 -y
conda install tensorflow=2.1.0 -y
Expand All @@ -67,6 +68,7 @@ conda deactivate
# previous test.
# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
# the Python model indicates that the environment has been setup correctly.
export PY_VERSION="3.7.1"
path_to_conda_pack="$PWD/python-3-7-1"
create_conda_env_with_specified_path "3.7" $path_to_conda_pack
conda install numpy=1.20.3 -y
Expand All @@ -89,6 +91,7 @@ conda deactivate
# Create a model with python 3.6 version
# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
# the Python model indicates that the environment has been setup correctly.
export PY_VERSION="3.6"
create_conda_env "3.6" "python-3-6"
conda install -c conda-forge libstdcxx-ng=14 -y
conda install numpy=1.18.1 -y
Expand Down Expand Up @@ -116,9 +119,13 @@ conda deactivate
path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_12_environment.tar.gz'
create_conda_env "3.12" "python-3-12"
conda install -c conda-forge libstdcxx-ng=14 -y
TF_VERSION="2.16.2"
conda install numpy=1.26.4 -y
conda install tensorflow=2.16.2 -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is 2.16.2"
if [ $TRITON_RHEL -eq 1 ]; then
TF_VERSION="2.17.0"
fi
conda install tensorflow=${TF_VERSION} -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is ${TF_VERSION}"
conda pack -o python3.12.tar.gz
mkdir -p models/python_3_12/1/
cp ../../python_models/python_version/config.pbtxt ./models/python_3_12
Expand All @@ -137,8 +144,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY37_VERSION_STRING" "$PY37_1_VERSION_STRING" "$PY312_VERSION_STRING"; do
Expand All @@ -154,6 +160,15 @@ done
# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
# stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
export LC_ALL=INVALID
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

kill_server

grep "Locale is (None, None)" $SERVER_LOG
if [ $? -ne 0 ]; then
cat $SERVER_LOG
Expand All @@ -175,8 +190,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG
Expand Down Expand Up @@ -207,8 +221,7 @@ touch -m models/python_3_12/python_3_12_environment.tar.gz
# The environment should be re-extracted
curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e

Expand Down Expand Up @@ -248,6 +261,8 @@ rm -rf models/python_3_7
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

rm $SERVER_LOG
# Occasionally needs more time to load
SERVER_TIMEOUT=420

SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
run_server
Expand All @@ -258,8 +273,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
grep "$PY36_VERSION_STRING" $SERVER_LOG
Expand Down Expand Up @@ -292,8 +306,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do
Expand Down
11 changes: 7 additions & 4 deletions qa/L0_openai/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@

function install_deps() {
# Install python bindings for tritonserver and tritonfrontend
pip install /opt/tritonserver/python/triton*.whl
# pip install /opt/tritonserver/python/triton*.whl

# Install application/testing requirements
pushd openai/
pip install -r requirements.txt
pip install -r requirements-test.txt

if [ "${IMAGE_KIND}" == "TRTLLM" ]; then
Expand All @@ -49,13 +48,17 @@ function prepare_vllm() {
}

function prepare_tensorrtllm() {
# FIXME: Remove this when pre-installing deps in TRTLLM container
pip install -r requirements.txt

MODEL="llama-3-8b-instruct"
MODEL_REPO="tests/tensorrtllm_models"
rm -rf ${MODEL_REPO}

# FIXME: This will require an upgrade each release to match the TRT-LLM version
# FIXME: This may require an upgrade each release to match the TRT-LLM version,
# so it's likely easier to user trtllm-build directly.
# Use Triton CLI to prepare model repository for testing
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.10
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.1
# NOTE: Could use ENGINE_DEST_PATH set to NFS mount for pre-built engines in future
triton import \
--model ${MODEL} \
Expand Down
Loading