-
Notifications
You must be signed in to change notification settings - Fork 187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update Triton to v2.11 #7142
Merged
smuzaffar
merged 8 commits into
cms-sw:IB/CMSSW_12_0_X/master
from
kpedro88:triton_v2.11
Jul 19, 2021
Merged
Update Triton to v2.11 #7142
Changes from 6 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
2f757ee
first attempt to install separate client repo; still ExternalProject …
kpedro88 2fb1145
bump cmake version
kpedro88 a52fab3
install common repo separately; some issues with TARGET_OBJECTS
kpedro88 e0f94d2
modify common repo when importing in client
kpedro88 7feb0e0
fix 'relocation cannot be used' error: compiles successfully
kpedro88 196d001
add missing functions to build
kpedro88 7ac09dc
model_config files as sources
kpedro88 43e05c3
update tool conf
kpedro88 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
### RPM external triton-inference-client-toolfile 1.0 | ||
Requires: triton-inference-client | ||
|
||
%prep | ||
|
||
%build | ||
|
||
%install | ||
mkdir -p %i/etc/scram.d | ||
cat << \EOF_TOOLFILE >%i/etc/scram.d/triton-inference-client.xml | ||
<tool name="triton-inference-client" version="@TOOL_VERSION@"> | ||
<info url="https://github.com/triton-inference-server/client"/> | ||
<lib name="grpcclient"/> | ||
<client> | ||
<environment name="TRITON_INFERENCE_CLIENT_BASE" default="@TOOL_ROOT@"/> | ||
<environment name="INCLUDE" default="$TRITON_INFERENCE_CLIENT_BASE/include"/> | ||
<environment name="LIBDIR" default="$TRITON_INFERENCE_CLIENT_BASE/lib"/> | ||
</client> | ||
<use name="protobuf"/> | ||
<use name="grpc"/> | ||
<use name="cuda"/> | ||
</tool> | ||
EOF_TOOLFILE | ||
|
||
## IMPORT scram-tools-post |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
### RPM external triton-inference-client 2.11.0 | ||
%define branch main | ||
%define github_user triton-inference-server | ||
%define tag_2_11_0 36cd3b3c839288c85b15e4df82cfe8fca3fff21b | ||
|
||
Source: git+https://github.com/%{github_user}/client.git?obj=%{branch}/%{tag_2_11_0}&export=%{n}-%{realversion}&output=/%{n}-%{realversion}.tgz | ||
BuildRequires: cmake git | ||
Requires: protobuf grpc cuda | ||
|
||
%prep | ||
|
||
%setup -n %{n}-%{realversion} | ||
|
||
%build | ||
|
||
# locations of CMakeLists.txt | ||
PROJ_DIR=../%{n}-%{realversion}/src/c++ | ||
CML_CPP=${PROJ_DIR}/CMakeLists.txt | ||
CML_LIB=${PROJ_DIR}/library/CMakeLists.txt | ||
|
||
# remove rapidjson dependence | ||
sed -i '/RapidJSON CONFIG REQUIRED/,+13d;' ${CML_LIB} | ||
sed -i '/triton-common-json/d' ${CML_LIB} | ||
# core repo not needed for grpc-client-only install | ||
sed -i '/FetchContent_MakeAvailable(repo-core)/d' ${CML_CPP} | ||
# remove attempts to install external libs | ||
sed -i '\~/../../_deps/repo-third-party-build/~d' ${CML_LIB} | ||
sed -i '\~/../../third-party/~d' ${CML_LIB} | ||
# keep typeinfo in .so by removing ldscript from properties | ||
sed -i '/set_target_properties/,+5d' ${CML_LIB} | ||
# change flag due to bug in gcc10 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95148 | ||
if [[ `gcc --version | head -1 | cut -d' ' -f3 | cut -d. -f1,2,3 | tr -d .` -gt 1000 ]] ; then | ||
sed -i -e "s|Werror|Wtype-limits|g" ${CML_LIB} | ||
fi | ||
|
||
# extracted from https://github.com/triton-inference-server/server/blob/v2.11.0/src/core/model_config.h | ||
cat << 'EOF' > ${PROJ_DIR}/library/model_config.h | ||
#pragma once | ||
|
||
#include <stdint.h> | ||
#include "model_config.pb.h" | ||
|
||
namespace nvidia { namespace inferenceserver { | ||
|
||
size_t GetDataTypeByteSize(const inference::DataType dtype); | ||
|
||
inference::DataType ProtocolStringToDataType(const std::string& dtype); | ||
|
||
inference::DataType ProtocolStringToDataType(const char* dtype, size_t len); | ||
|
||
}} // namespace nvidia::inferenceserver | ||
EOF | ||
|
||
# extracted from https://github.com/triton-inference-server/server/blob/v2.11.0/src/core/model_config.cc | ||
cat << 'EOF' > ${PROJ_DIR}/library/model_config.cc | ||
#include "model_config.h" | ||
|
||
namespace nvidia { namespace inferenceserver { | ||
|
||
size_t | ||
GetDataTypeByteSize(const inference::DataType dtype) | ||
{ | ||
switch (dtype) { | ||
case inference::DataType::TYPE_BOOL: | ||
return 1; | ||
case inference::DataType::TYPE_UINT8: | ||
return 1; | ||
case inference::DataType::TYPE_UINT16: | ||
return 2; | ||
case inference::DataType::TYPE_UINT32: | ||
return 4; | ||
case inference::DataType::TYPE_UINT64: | ||
return 8; | ||
case inference::DataType::TYPE_INT8: | ||
return 1; | ||
case inference::DataType::TYPE_INT16: | ||
return 2; | ||
case inference::DataType::TYPE_INT32: | ||
return 4; | ||
case inference::DataType::TYPE_INT64: | ||
return 8; | ||
case inference::DataType::TYPE_FP16: | ||
return 2; | ||
case inference::DataType::TYPE_FP32: | ||
return 4; | ||
case inference::DataType::TYPE_FP64: | ||
return 8; | ||
case inference::DataType::TYPE_STRING: | ||
return 0; | ||
default: | ||
break; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
inference::DataType | ||
ProtocolStringToDataType(const std::string& dtype) | ||
{ | ||
return ProtocolStringToDataType(dtype.c_str(), dtype.size()); | ||
} | ||
|
||
inference::DataType | ||
ProtocolStringToDataType(const char* dtype, size_t len) | ||
{ | ||
if (len < 4 || len > 6) { | ||
return inference::DataType::TYPE_INVALID; | ||
} | ||
|
||
if ((*dtype == 'I') && (len != 6)) { | ||
if ((dtype[1] == 'N') && (dtype[2] == 'T')) { | ||
if ((dtype[3] == '8') && (len == 4)) { | ||
return inference::DataType::TYPE_INT8; | ||
} else if ((dtype[3] == '1') && (dtype[4] == '6')) { | ||
return inference::DataType::TYPE_INT16; | ||
} else if ((dtype[3] == '3') && (dtype[4] == '2')) { | ||
return inference::DataType::TYPE_INT32; | ||
} else if ((dtype[3] == '6') && (dtype[4] == '4')) { | ||
return inference::DataType::TYPE_INT64; | ||
} | ||
} | ||
} else if ((*dtype == 'U') && (len != 4)) { | ||
if ((dtype[1] == 'I') && (dtype[2] == 'N') && (dtype[3] == 'T')) { | ||
if ((dtype[4] == '8') && (len == 5)) { | ||
return inference::DataType::TYPE_UINT8; | ||
} else if ((dtype[4] == '1') && (dtype[5] == '6')) { | ||
return inference::DataType::TYPE_UINT16; | ||
} else if ((dtype[4] == '3') && (dtype[5] == '2')) { | ||
return inference::DataType::TYPE_UINT32; | ||
} else if ((dtype[4] == '6') && (dtype[5] == '4')) { | ||
return inference::DataType::TYPE_UINT64; | ||
} | ||
} | ||
} else if ((*dtype == 'F') && (dtype[1] == 'P') && (len == 4)) { | ||
if ((dtype[2] == '1') && (dtype[3] == '6')) { | ||
return inference::DataType::TYPE_FP16; | ||
} else if ((dtype[2] == '3') && (dtype[3] == '2')) { | ||
return inference::DataType::TYPE_FP32; | ||
} else if ((dtype[2] == '6') && (dtype[3] == '4')) { | ||
return inference::DataType::TYPE_FP64; | ||
} | ||
} else if (*dtype == 'B') { | ||
if (dtype[1] == 'Y') { | ||
if (!strcmp(dtype + 2, "TES")) { | ||
return inference::DataType::TYPE_STRING; | ||
} | ||
} else if (!strcmp(dtype + 1, "OOL")) { | ||
return inference::DataType::TYPE_BOOL; | ||
} | ||
} | ||
|
||
return inference::DataType::TYPE_INVALID; | ||
} | ||
|
||
}} // namespace nvidia::inferenceserver | ||
EOF | ||
|
||
# add custom header to cmake build | ||
sed -i 's/grpc_client.cc common.cc/& model_config.cc/' ${CML_LIB} | ||
sed -i 's/grpc_client.h common.h/& model_config.h/' ${CML_LIB} | ||
sed -i '\~${CMAKE_CURRENT_SOURCE_DIR}/common.h~a ${CMAKE_CURRENT_SOURCE_DIR}/model_config.h' ${CML_LIB} | ||
|
||
rm -rf ../build | ||
mkdir ../build | ||
cd ../build | ||
|
||
common_tag_2_11_0=249232758855cc764c78a12964c2a5c09c388d87 | ||
mkdir repo-common && pushd repo-common && curl -k -L https://github.com/%{github_user}/common/archive/${common_tag_2_11_0}.tar.gz | tar -xz --strip=1 && popd | ||
|
||
# modifications to common repo (loaded by cmake through FetchContent_MakeAvailable) | ||
COMMON_DIR=$PWD/repo-common | ||
CML_TOP=${COMMON_DIR}/CMakeLists.txt | ||
CML_PRB=${COMMON_DIR}/protobuf/CMakeLists.txt | ||
|
||
# remove rapidjson dependence | ||
sed -i '/RapidJSON CONFIG REQUIRED/,+1d;' ${CML_TOP} | ||
sed -i '/JSON utilities/,+17d' ${CML_TOP} | ||
sed -i '/triton-common-json/d' ${CML_TOP} | ||
# remove python dependence | ||
sed -i '/Python REQUIRED COMPONENTS Interpreter/,+10d;' ${CML_PRB} | ||
# change flag due to bug in gcc10 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95148 | ||
if [[ `gcc --version | head -1 | cut -d' ' -f3 | cut -d. -f1,2,3 | tr -d .` -gt 1000 ]] ; then | ||
sed -i -e "s|Werror|Wtype-limits|g" ${CML_PRB} | ||
fi | ||
|
||
if [ $(%{cuda_gcc_support}) = true ]; then | ||
TRITON_ENABLE_GPU_VALUE=ON | ||
else | ||
TRITON_ENABLE_GPU_VALUE=OFF | ||
fi | ||
|
||
cmake ${PROJ_DIR} \ | ||
-DCMAKE_INSTALL_PREFIX="%{i}" \ | ||
-DCMAKE_INSTALL_LIBDIR=lib \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DTRITON_ENABLE_CC_HTTP=OFF \ | ||
-DTRITON_ENABLE_CC_GRPC=ON \ | ||
-DTRITON_ENABLE_PYTHON_HTTP=OFF \ | ||
-DTRITON_ENABLE_PYTHON_GRPC=OFF \ | ||
-DTRITON_ENABLE_PERF_ANALYZER=OFF \ | ||
-DTRITON_ENABLE_EXAMPLES=OFF \ | ||
-DTRITON_ENABLE_TESTS=OFF \ | ||
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU_VALUE} \ | ||
-DTRITON_VERSION=%{realversion} \ | ||
-DCMAKE_CXX_FLAGS="-Wno-error -fPIC" \ | ||
-DFETCHCONTENT_SOURCE_DIR_REPO-COMMON=${COMMON_DIR} \ | ||
|
||
make %{makeprocesses} | ||
|
||
%install | ||
cd ../build | ||
make install | ||
|
||
if [ $(%{cuda_gcc_support}) = true ] ; then | ||
# modify header for consistent definition of GPU support | ||
sed -i '/^#ifdef TRITON_ENABLE_GPU/i #define TRITON_ENABLE_GPU' %{i}/include/ipc.h | ||
fi | ||
|
||
# remove unneeded | ||
rm %{i}/include/triton/common/triton_json.h |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kpedro88 , instead of generating these files here, can you please add these generated files as
cmsdist/triton-inference-client/model_config.[h,cc].file
and then add following sourcesand then use