Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AOTInductor BERT CPP example #2931

Merged
merged 48 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
ab41f43
fix compile error on mac x86
lxning Feb 5, 2024
f833d48
Merge branch 'master' into cpp_mac_x86_compile
lxning Feb 5, 2024
9f85b38
update install libtorch
lxning Feb 5, 2024
b1fadca
fmt
lxning Feb 7, 2024
b430116
merge master
lxning Feb 7, 2024
0addfff
fmt
lxning Feb 8, 2024
e64ef7d
fmt
lxning Feb 8, 2024
ce0e65a
Set return type of bert model and dynamic shapes
mreso Feb 9, 2024
ceacda0
Merge branch 'master' into feat/bertcpp
lxning Feb 9, 2024
e827f2f
fix json value
lxning Feb 9, 2024
d0315a6
fix build on linux
lxning Feb 9, 2024
2692dd2
add linux dependency
lxning Feb 10, 2024
92a6238
replace sentenepice with tokenizers-cpp
lxning Feb 10, 2024
94d4309
update dependency
lxning Feb 11, 2024
fd5e145
add attention mask
lxning Feb 12, 2024
203f19e
fix compile error
lxning Feb 12, 2024
0d8f505
fix compile error
lxning Feb 12, 2024
45ae6b2
fmt
lxning Feb 12, 2024
558df11
Fmt
lxning Feb 12, 2024
9c2cdf3
tockenizer-cpp git submodule
lxning Feb 12, 2024
a7a551f
update handler
lxning Feb 12, 2024
748b734
fmt
lxning Feb 13, 2024
0bbfc18
fmt
lxning Feb 14, 2024
472df28
Merge branch 'master' into feat/bertcpp
lxning Feb 14, 2024
4b2a1ce
fmt
lxning Feb 14, 2024
0604533
unset env
lxning Feb 14, 2024
9922a99
fix path
lxning Feb 15, 2024
0e81e4f
Fix type error in bert aot example
mreso Feb 15, 2024
ac08078
fmt
lxning Feb 17, 2024
869b9a3
fmt
lxning Feb 18, 2024
294175e
Merge branch 'master' into feat/bertcpp
lxning Feb 18, 2024
d6fa808
update max setting
lxning Feb 18, 2024
c33f66c
fix lint
lxning Feb 18, 2024
caa5042
add limitation
lxning Feb 18, 2024
d39ba51
pinned folly to v2024.02.19.00
lxning Feb 23, 2024
0e1d773
pinned yam-cpp with tags/0.8.0
lxning Feb 23, 2024
f8c71d4
pinned yaml-cpp 0.8.0
lxning Feb 25, 2024
be81439
update build.sh
lxning Feb 25, 2024
71deb70
pinned yaml-cpp v0.8.0
lxning Feb 25, 2024
ebbf119
fmt
lxning Feb 25, 2024
df861f1
Merge branch 'master' into feat/bertcpp
lxning Feb 25, 2024
a0c710b
fix typo
lxning Feb 25, 2024
20d8799
add submodule kineto
lxning Feb 26, 2024
6accaf4
fmt
lxning Feb 26, 2024
ee74ad9
fix workflow
lxning Feb 26, 2024
9b67364
fix workflow
lxning Feb 26, 2024
9c1a33a
fix ubuntu version
lxning Feb 26, 2024
9ab5336
update readme
lxning Feb 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
project(torchserve_cpp VERSION 0.1)

set(CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -30,7 +30,7 @@ find_package(folly REQUIRED)
find_package(fmt REQUIRED)
find_package(gflags REQUIRED)
find_package(Torch REQUIRED)
find_package(yaml-cpp REQUIRED NO_CMAKE_PATH)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

include_directories(${TORCH_INCLUDE_DIRS})
Expand Down
1 change: 1 addition & 0 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
## Requirements
* C++17
* GCC version: gcc-9
* cmake version: 3.18+
lxning marked this conversation as resolved.
Show resolved Hide resolved
## Installation and Running TorchServe CPP

### Install dependencies
Expand Down
62 changes: 29 additions & 33 deletions cpp/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,32 +155,14 @@ function install_yaml_cpp() {
cd "$BWD" || exit
}

function install_sentencepiece() {
SENTENCEPIECE_SRC_DIR=$BASE_DIR/third-party/sentencepiece
SENTENCEPIECE_BUILD_DIR=$DEPS_DIR/sentencepiece-build

if [ ! -d "$SENTENCEPIECE_SRC_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Cloning sentencepiece repo ${COLOR_OFF}"
git clone https://github.com/google/sentencepiece.git "$SENTENCEPIECE_SRC_DIR"
cd $SENTENCEPIECE_SRC_DIR
git checkout tags/v0.1.99
fi

if [ ! -d "$SENTENCEPIECE_BUILD_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Building sentencepiece ${COLOR_OFF}"

mkdir $SENTENCEPIECE_BUILD_DIR
cd $SENTENCEPIECE_BUILD_DIR
cmake $SENTENCEPIECE_SRC_DIR
make -i $(nproc)
if [ "$PLATFORM" = "Linux" ]; then
sudo make install
sudo ldconfig -v
elif [ "$PLATFORM" = "Mac" ]; then
make install
fi

echo -e "${COLOR_GREEN}[ INFO ] sentencepiece is installed ${COLOR_OFF}"
function install_tokenizer_cpp() {
TOKENIZERS_CPP_SRC_DIR=$BASE_DIR/third-party/tokenizers-cpp

if [ ! -d "$TOKENIZERS_CPP_SRC_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Cloning tokenizers-cpp repo ${COLOR_OFF}"
git clone https://github.com/mlc-ai/tokenizers-cpp.git "$TOKENIZERS_CPP_SRC_DIR"
cd $TOKENIZERS_CPP_SRC_DIR
git submodule update --init --recursive
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to create a submodule in third-party directly instead of cloning it manually. That way we freeze a specific commit and nothing breaks if the tokenizer-cpp repo gets updated. See llama2.so for example. git submodule update --init --recursive is executed in build.sh for all our submodules.

fi

cd "$BWD" || exit
Expand Down Expand Up @@ -208,14 +190,28 @@ function prepare_test_files() {
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
fi
if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
# PT2.2 torch.expport does not support Mac
if [ "$PLATFORM" = "Linux" ]; then
if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
fi
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
fi
if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
pip install transformers
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will better fit into the cpp section of ts_scripts/install_dependencies.py

local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
export TOKENIZERS_PARALLELISM=false
cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
python aot_compile_export.py
mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
export TOKENIZERS_PARALLELISM=""
fi
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
fi
cd "$BWD" || exit
}

function build() {
Expand Down Expand Up @@ -401,7 +397,7 @@ install_folly
install_kineto
install_libtorch
install_yaml_cpp
install_sentencepiece
install_tokenizer_cpp
build_llama_cpp
prepare_test_files
build
Expand Down
9 changes: 7 additions & 2 deletions cpp/src/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@

add_subdirectory("../../../examples/cpp/babyllama/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/babyllama/babyllama_handler/")

add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")

add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/llamacpp/llamacpp_handler/")

add_subdirectory("../../../examples/cpp/mnist/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/mnist/mnist_handler/")

# PT2.2 torch.expport does not support Mac
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")

add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
endif()
7 changes: 6 additions & 1 deletion cpp/src/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ list(APPEND TS_UTILS_SOURCE_FILES ${TS_UTILS_SRC_DIR}/metrics/registry.cc)
add_library(ts_utils SHARED ${TS_UTILS_SOURCE_FILES})
target_include_directories(ts_utils PUBLIC ${TS_UTILS_SRC_DIR})
target_include_directories(ts_utils PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(ts_utils ${FOLLY_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} yaml-cpp::yaml-cpp)
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
target_link_libraries(ts_utils ${FOLLY_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} yaml-cpp::yaml-cpp)
else()
target_link_libraries(ts_utils ${FOLLY_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} yaml-cpp)
endif()

install(TARGETS ts_utils DESTINATION ${torchserve_cpp_SOURCE_DIR}/_build/libs)

list(APPEND FOO_SOURCE_FILES ${TS_UTILS_SRC_DIR}/ifoo.hh)
Expand Down
23 changes: 23 additions & 0 deletions cpp/test/examples/examples_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,26 @@ TEST_F(ModelPredictTest, TestLoadPredictLlamaCppHandler) {
base_dir + "llamacpp_handler", "llamacpp", -1, "", "", 1, false),
base_dir + "llamacpp_handler", base_dir + "prompt.txt", "llm_ts", 200);
}

TEST_F(ModelPredictTest, TestLoadPredictAotInductorBertHandler) {
std::string base_dir = "_build/test/resources/examples/aot_inductor/";
std::string file1 = base_dir + "bert_handler/bert-seq.so";
std::string file2 = base_dir + "bert_handler/tokenizer.json";

std::ifstream f1(file1);
std::ifstream f2(file2);

if (!f1.good() || !f2.good())
GTEST_SKIP() << "Skipping TestLoadPredictAotInductorBertHandler because "
"of missing files: "
<< file1 << " or " << file2;

this->LoadPredict(
std::make_shared<torchserve::LoadModelRequest>(
base_dir + "bert_handler", "bert_aot",
torch::cuda::is_available() ? 0 : -1, "", "", 1, false),
base_dir + "bert_handler",
base_dir + "bert_handler/sample_text.txt",
"bert_ts",
200);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"createdOn": "12/02/2024 21:09:26",
"runtime": "LSP",
"model": {
"modelName": "bertcppaot",
"handler": "libbert_handler:BertCppHandler",
"modelVersion": "1.0",
"configFile": "model-config.yaml"
},
"archiverVersion": "0.9.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"0":"Not Accepted",
"1":"Accepted"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
minWorkers: 1
maxWorkers: 1
batchSize: 2

handler:
model_so_path: "bert-seq.so"
tokenizer_path: "tokenizer.json"
mapping: "index_to_name.json"
model_name: "bert-base-uncased"
mode: "sequence_classification"
do_lower_case: true
num_labels: 2
max_length: 150
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Bloomberg has decided to publish a new report on the global economy.
5 changes: 5 additions & 0 deletions examples/cpp/aot_inductor/bert/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set(TOKENZIER_CPP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/tokenizers-cpp)
add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)
add_library(bert_handler SHARED src/bert_handler.cc)
target_include_directories(bert_handler PRIVATE ${TOKENZIER_CPP_PATH}/include)
target_link_libraries(bert_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} tokenizers_cpp)
60 changes: 60 additions & 0 deletions examples/cpp/aot_inductor/bert/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
This example uses AOTInductor to compile the [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) into an so file which is then executed using libtorch.
The handler C++ source code for this examples can be found [here](src).

### Setup
1. Follow the instructions in [README.md](../../../../cpp/README.md) to build the TorchServe C++ backend.

```
cd serve/cpp
./builld.sh
```

The build script will create the necessary artifact for this example.
To recreate these by hand you can follow the prepare_test_files function of the [build.sh](../../../../cpp/build.sh) script.
We will need the handler .so file as well as the bert-seq.so and tokenizer.json.

2. Create a [model-config.yaml](model-config.yaml)

```yaml
minWorkers: 1
maxWorkers: 1
batchSize: 2

handler:
model_so_path: "bert-seq.so"
tokenizer_path: "tokenizer.json"
mapping: "index_to_name.json"
model_name: "bert-base-uncased"
mode: "sequence_classification"
do_lower_case: true
num_labels: 2
max_length: 150
```

### Generate Model Artifact Folder

```bash
torch-model-archiver --model-name bertcppaot --version 1.0 --handler ../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/libbert_handler:BertCppHandler --runtime LSP --extra-files index_to_name.json,../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/bert-seq.so,../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/tokenizer.json --config-file model-config.yaml --archive-format no-archive
```

Create model store directory and move the folder `bertcppaot`

```
mkdir model_store
mv bertcppaot model_store/
```

### Inference

Start torchserve using the following command

```
torchserve --ncs --model-store model_store/ --models bertcppaot
```

Infer the model using the following command

```
curl http://localhost:8080/predictions/bertcppaot -T ../../../../cpp/test/resources/examples/aot_inductor/bert_handler/sample_text.txt
Not Accepted
```
121 changes: 121 additions & 0 deletions examples/cpp/aot_inductor/bert/aot_compile_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os
import sys

import torch
import yaml
from transformers import (
AutoConfig,
AutoModelForSequenceClassification,
AutoTokenizer,
set_seed,
)

set_seed(1)
# PT2.2 has limitation on the max
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you link the issue or PR here that describes the problem in 2.2?

MAX_BATCH_SIZE = 15
MAX_LENGTH = 511


def transformers_model_dowloader(
mode,
pretrained_model_name,
num_labels,
do_lower_case,
max_length,
batch_size,
):
print("Download model and tokenizer", pretrained_model_name)
# loading pre-trained model and tokenizer
if mode == "sequence_classification":
config = AutoConfig.from_pretrained(
pretrained_model_name,
num_labels=num_labels,
torchscript=False,
return_dict=False,
)
model = AutoModelForSequenceClassification.from_pretrained(
pretrained_model_name, config=config
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name, do_lower_case=do_lower_case
)
else:
sys.exit(f"mode={mode} has not been implemented in this cpp example yet.")

NEW_DIR = "./Transformer_model"
try:
os.mkdir(NEW_DIR)
except OSError:
print("Creation of directory %s failed" % NEW_DIR)
else:
print("Successfully created directory %s " % NEW_DIR)

print(
"Save model and tokenizer model based on the setting from setup_config",
pretrained_model_name,
"in directory",
NEW_DIR,
)

model.save_pretrained(NEW_DIR)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we saving the model here? Its already in the hub cache so no need to save it again.

tokenizer.save_pretrained(NEW_DIR)

with torch.no_grad():
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device=device)
dummy_input = "This is a dummy input for torch jit trace"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To not be misleading we should change this into "... for torch export"

inputs = tokenizer.encode_plus(
dummy_input,
max_length=max_length,
padding=True,
add_special_tokens=True,
return_tensors="pt",
)
input_ids = torch.cat([inputs["input_ids"]] * batch_size, 0).to(device)
attention_mask = torch.cat([inputs["attention_mask"]] * batch_size, 0).to(
device
)
batch_dim = torch.export.Dim("batch", min=1, max=MAX_BATCH_SIZE)
seq_len_dim = torch.export.Dim("seq_len", min=1, max=MAX_LENGTH)
torch._C._GLIBCXX_USE_CXX11_ABI = True
model_so_path = torch._export.aot_compile(
model,
(input_ids, attention_mask),
dynamic_shapes={
"input_ids": (batch_dim, seq_len_dim),
"attention_mask": (batch_dim, seq_len_dim),
},
options={
"aot_inductor.output_path": os.path.join(os.getcwd(), "bert-seq.so"),
"max_autotune": True,
},
)

return


if __name__ == "__main__":
dirname = os.path.dirname(__file__)
if len(sys.argv) > 1:
filename = os.path.join(dirname, sys.argv[1])
else:
filename = os.path.join(dirname, "model-config.yaml")
with open(filename, "r") as f:
settings = yaml.safe_load(f)

mode = settings["handler"]["mode"]
model_name = settings["handler"]["model_name"]
num_labels = int(settings["handler"]["num_labels"])
do_lower_case = bool(settings["handler"]["do_lower_case"])
max_length = int(settings["handler"]["max_length"])
batch_size = int(settings["batchSize"])
transformers_model_dowloader(
mode,
model_name,
num_labels,
do_lower_case,
max_length,
batch_size,
)
4 changes: 4 additions & 0 deletions examples/cpp/aot_inductor/bert/index_to_name.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"0":"Not Accepted",
"1":"Accepted"
}
Loading
Loading