Skip to content

Commit

Permalink
Merge branch 'main' into kprashanth-tritonfrontend-rfeatures
Browse files Browse the repository at this point in the history
  • Loading branch information
KrishnanPrash authored Dec 13, 2024
2 parents c415fdb + bcff3da commit dae9159
Show file tree
Hide file tree
Showing 21 changed files with 546 additions and 112 deletions.
42 changes: 28 additions & 14 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
elif be == "tensorflow":
args = tensorflow_cmake_args(images, library_paths)
elif be == "python":
args = []
args = python_cmake_args()
elif be == "dali":
args = dali_cmake_args()
elif be == "pytorch":
Expand Down Expand Up @@ -631,6 +631,18 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
return cargs


def python_cmake_args():
cargs = []
if target_platform() == "rhel":
cargs.append(
cmake_backend_arg(
"python", "PYBIND11_PYTHON_VERSION", "STRING", FLAGS.rhel_py_version
)
)

return cargs


def pytorch_cmake_args(images):
if "pytorch" in images:
image = images["pytorch"]
Expand Down Expand Up @@ -924,6 +936,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""
df += """
# Install docker docker buildx
Expand Down Expand Up @@ -957,6 +970,10 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
pkg-config \\
unzip \\
wget \\
ncurses-devel \\
readline-devel \\
xz-devel \\
bzip2-devel \\
zlib-devel \\
libarchive-devel \\
libxml2-devel \\
Expand Down Expand Up @@ -1025,6 +1042,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""
# Install the windows- or linux-specific buildbase dependencies
if target_platform() == "windows":
Expand All @@ -1035,7 +1053,6 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
df += """
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_BREAK_SYSTEM_PACKAGES=1
# Install docker docker buildx
RUN apt-get update \\
Expand Down Expand Up @@ -1159,6 +1176,7 @@ def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
Expand Down Expand Up @@ -1198,6 +1216,8 @@ def create_dockerfile_linux(
## Production stage: Create container with just inference server executable
############################################################################
FROM ${BASE_IMAGE}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

df += dockerfile_prepare_container_linux(
Expand Down Expand Up @@ -1399,7 +1419,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
if "python" in backends:
if target_platform() == "rhel":
df += """
ENV PIP_BREAK_SYSTEM_PACKAGES=1
# python3, python3-pip and some pip installs required for the python backend
RUN yum install -y \\
libarchive-devel \\
Expand All @@ -1418,7 +1437,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
"""
else:
df += """
ENV PIP_BREAK_SYSTEM_PACKAGES=1
# python3, python3-pip and some pip installs required for the python backend
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
Expand Down Expand Up @@ -1542,7 +1560,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):


def change_default_python_version_rhel(version):
df = """
df = f"""
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
Expand All @@ -1551,21 +1569,17 @@ def change_default_python_version_rhel(version):
RUN curl https://pyenv.run | bash
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
RUN eval "$(pyenv init -)"
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {} \\
&& cp ${{PYENV_ROOT}}/versions/{}/lib/libpython3* /usr/lib64/""".format(
version, version
)
df += """
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {version} \\
&& cp ${{PYENV_ROOT}}/versions/{version}/lib/libpython3* /usr/lib64/
# RHEL image has several python versions. It's important
# to set the correct version, otherwise, packages that are
# pip installed will not be found during testing.
ENV PYVER={} PYTHONPATH=/opt/python/v
ENV PYVER={version} PYTHONPATH=/opt/python/v
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
ENV PYBIN=${{PYTHONPATH}}/bin
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
""".format(
version
)
"""
return df


Expand Down
4 changes: 2 additions & 2 deletions python/openai/openai_frontend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def start_kserve_frontends(server, args):
from tritonfrontend import KServeGrpc, KServeHttp

http_options = KServeHttp.Options(address=args.host, port=args.kserve_http_port)
http_service = KServeHttp.Server(server, http_options)
http_service = KServeHttp(server, http_options)
http_service.start()

grpc_options = KServeGrpc.Options(address=args.host, port=args.kserve_grpc_port)
grpc_service = KServeGrpc.Server(server, grpc_options)
grpc_service = KServeGrpc(server, grpc_options)
grpc_service.start()

except ModuleNotFoundError:
Expand Down
3 changes: 3 additions & 0 deletions python/openai/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,7 @@

# FastAPI Application
fastapi==0.111.1
# Fix httpx version to avoid bug in openai library:
# https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332/3
httpx==0.27.2
openai==1.40.6
16 changes: 14 additions & 2 deletions qa/L0_backend_python/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ install_conda() {
eval "$(./miniconda/bin/conda shell.bash hook)"
}

install_build_deps() {
install_build_deps_apt() {
apt update && apt install software-properties-common rapidjson-dev -y
# Using CMAKE installation instruction from:: https://apt.kitware.com/
apt update -q=2 \
Expand All @@ -54,6 +54,18 @@ install_build_deps() {
&& apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3*
}

install_build_deps_yum() {
yum install rapidjson-devel -y
}

install_build_deps() {
if [[ ${TRITON_RHEL} -eq "1" ]]; then
install_build_deps_yum
else
install_build_deps_apt
fi
}

create_conda_env() {
local python_version=$1
local env_name=$2
Expand All @@ -74,6 +86,6 @@ create_python_backend_stub() {
rm -rf python_backend
git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
(cd python_backend/ && mkdir builddir && cd builddir && \
cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG ../ && \
cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG -DPYBIND11_PYTHON_VERSION=$PY_VERSION ../ && \
make -j18 triton-python-backend-stub)
}
37 changes: 25 additions & 12 deletions qa/L0_backend_python/env/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ install_conda
# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
# the Python model indicates that the environment has been setup correctly.
# Create a model with python 3.7 version
export PY_VERSION="3.7"
create_conda_env "3.7" "python-3-7"
conda install numpy=1.20.1 -y
conda install tensorflow=2.1.0 -y
Expand All @@ -67,6 +68,7 @@ conda deactivate
# previous test.
# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
# the Python model indicates that the environment has been setup correctly.
export PY_VERSION="3.7.1"
path_to_conda_pack="$PWD/python-3-7-1"
create_conda_env_with_specified_path "3.7" $path_to_conda_pack
conda install numpy=1.20.3 -y
Expand All @@ -89,6 +91,7 @@ conda deactivate
# Create a model with python 3.6 version
# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
# the Python model indicates that the environment has been setup correctly.
export PY_VERSION="3.6"
create_conda_env "3.6" "python-3-6"
conda install -c conda-forge libstdcxx-ng=14 -y
conda install numpy=1.18.1 -y
Expand Down Expand Up @@ -116,9 +119,13 @@ conda deactivate
path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_12_environment.tar.gz'
create_conda_env "3.12" "python-3-12"
conda install -c conda-forge libstdcxx-ng=14 -y
TF_VERSION="2.16.2"
conda install numpy=1.26.4 -y
conda install tensorflow=2.16.2 -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is 2.16.2"
if [ $TRITON_RHEL -eq 1 ]; then
TF_VERSION="2.17.0"
fi
conda install tensorflow=${TF_VERSION} -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is ${TF_VERSION}"
conda pack -o python3.12.tar.gz
mkdir -p models/python_3_12/1/
cp ../../python_models/python_version/config.pbtxt ./models/python_3_12
Expand All @@ -137,8 +144,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY37_VERSION_STRING" "$PY37_1_VERSION_STRING" "$PY312_VERSION_STRING"; do
Expand All @@ -154,6 +160,15 @@ done
# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
# stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
export LC_ALL=INVALID
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

kill_server

grep "Locale is (None, None)" $SERVER_LOG
if [ $? -ne 0 ]; then
cat $SERVER_LOG
Expand All @@ -175,8 +190,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG
Expand Down Expand Up @@ -207,8 +221,7 @@ touch -m models/python_3_12/python_3_12_environment.tar.gz
# The environment should be re-extracted
curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e

Expand Down Expand Up @@ -248,6 +261,8 @@ rm -rf models/python_3_7
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

rm $SERVER_LOG
# Occasionally needs more time to load
SERVER_TIMEOUT=420

SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
run_server
Expand All @@ -258,8 +273,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
grep "$PY36_VERSION_STRING" $SERVER_LOG
Expand Down Expand Up @@ -292,8 +306,7 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

kill $SERVER_PID
wait $SERVER_PID
kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do
Expand Down
27 changes: 27 additions & 0 deletions qa/L0_backend_python/io/io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,33 @@ def test_requested_output_decoupled(self):
self.assertTrue(np.allclose(gpu_output_data[1:], next_gpu_output_data))
self.assertTrue(user_data._completed_requests.empty())

# Assert a prior crash is fixed regarding requested output on a decoupled model.
def test_requested_output_decoupled_prior_crash(self):
model_name = "llm"
prompt = "test"

text_input_data = np.array([[prompt]]).astype(object)
inputs = [grpcclient.InferInput("text_input", text_input_data.shape, "BYTES")]
inputs[-1].set_data_from_numpy(text_input_data)

requested_outputs = [grpcclient.InferRequestedOutput("text_output")]

user_data = UserData()
with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
client.start_stream(callback=partial(callback, user_data))
client.async_stream_infer(
model_name=model_name, inputs=inputs, outputs=requested_outputs
)
client.stop_stream()

outputs = ""
while not user_data._completed_requests.empty():
result = user_data._completed_requests.get(block=False)
if isinstance(result, InferenceServerException):
raise result
outputs += str(result.as_numpy("text_output")[0], encoding="utf-8")
self.assertGreater(len(outputs), 0, "text_output is empty")


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit dae9159

Please sign in to comment.