feat(//examples/int8): Implement Makefile based execution for ptq and qat

peri044 · peri044 · commit b7f6d8a4eeae · 2021-08-10T14:20:09.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/examples/int8/ptq/Makefile b/examples/int8/ptq/Makefile
@@ -0,0 +1,15 @@
+CXX=g++
+DEP_DIR=$(PWD)/deps
+ROOT_DIR?="../../../" # path to TRTorch directory (including TRTorch)
+INCLUDE_DIRS=-I$(DEP_DIR)/libtorch/include -I$(DEP_DIR)/libtorch/include/torch/csrc/api/include/ -I$(DEP_DIR)/TensorRT-8.0.1.6/include/ -I$(ROOT_DIR) -I /usr/local/cuda-11.1/include -I$(DEP_DIR)/trtorch/include
+LIB_DIRS=-L$(DEP_DIR)/trtorch/lib -L$(DEP_DIR)/libtorch/lib -L /usr/local/cuda-11.1/lib64
+LIBS=-Wl,--no-as-needed -ltrtorch -Wl,--as-needed -ltorch -ltorch_cuda -ltorch_cpu -ltorch_global_deps -lbackend_with_compiler -lc10 -lc10_cuda -lpthread -lcudart
+SRCS=../datasets/cifar10.cpp ../benchmark/benchmark.cpp main.cpp
+
+TARGET=ptq
+
+$(TARGET):
+	$(CXX) $(SRCS) $(INCLUDE_DIRS) $(LIB_DIRS) $(LIBS) -o $(TARGET)
+
+clean:
+	$(RM) $(TARGET)
diff --git a/examples/int8/ptq/README.md b/examples/int8/ptq/README.md
@@ -120,25 +120,62 @@ This is a short example application that shows how to use TRTorch to perform pos
 ## Prerequisites
 
 1. Download CIFAR10 Dataset Binary version ([https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz](https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz))
-2. Train a network on CIFAR10 (see `training/` for a VGG16 recipie)
+2. Train a network on CIFAR10 (see `training/` for a VGG16 recipe)
 3. Export model to torchscript
 
-## Compilation
+## Compilation using bazel
 
 ``` shell
-bazel build //cpp/ptq --compilation_mode=opt
+bazel run //cpp/ptq --compilation_mode=opt <path-to-module> <path-to-cifar10>
 ```
 
 If you want insight into what is going under the hood or need debug symbols
 
 ``` shell
-bazel build //cpp/ptq --compilation_mode=dbg
+bazel run //cpp/ptq --compilation_mode=dbg <path-to-module> <path-to-cifar10>
 ```
 
-## Usage
+This will build a binary named `ptq` in `bazel-out/k8-<opt|dbg>/bin/cpp/int8/qat/` directory. Optionally you can add this to `$PATH` environment variable to run `ptq` from anywhere on your system.
 
-``` shell
-ptq <path-to-module> <path-to-cifar10>
+## Compilation using Makefile
+
+1) Download releases of <a href="https://pytorch.org">LibTorch</a>, <a href="https://github.com/NVIDIA/TRTorch/releases">TRTorch </a>and <a href="https://developer.nvidia.com/nvidia-tensorrt-download">TensorRT</a> and unpack them in the deps directory. Ensure CUDA is installed at `/usr/local/cuda` , if not you need to modify the CUDA include and lib paths in the Makefile.
+
+```sh
+cd examples/trtorchrt_example/deps
+# Download latest TRTorch release tar file (libtrtorch.tar.gz) from https://github.com/NVIDIA/TRTorch/releases
+tar -xvzf libtrtorch.tar.gz
+# unzip libtorch downloaded from pytorch.org
+unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cu111.zip
+# Extract TensorRT downloaded from developer.nvidia.com
+tar -xvzf TensorRT-8.0.1.6.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz
+```
+
+> If cuDNN is not installed on your system / in your LD_LIBRARY_PATH then do the following as well
+
+```sh
+cd deps
+mkdir cudnn && tar -xvzf <cuDNN TARBALL> --directory cudnn --strip-components=1
+cd ..
+```
+
+Set your LD_LIBRARY_PATH accordingly
+
+```sh
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/deps/trtorch/lib:$(pwd)/deps/libtorch/lib:$(pwd)/deps/TensorRT-8.0.1.6/lib:$(pwd)/deps/cudnn/lib64:/usr/local/cuda/lib64/
+```
+
+2) Build and run `ptq`
+
+We import header files `cifar10.h` and `benchmark.h` from `ROOT_DIR`. `ROOT_DIR` should point to the path where TRTorch is located `<path_to_TRTORCH>`.
+
+By default it is set to `../../../`. If your TRTorch directory structure is different, please set `ROOT_DIR` accordingly.  
+
+```sh
+cd examples/int8/ptq
+# This will generate a ptq binary
+make
+./ptq <path-to-module> <path-to-cifar10>
 ```
 
 ## Example Output
diff --git a/examples/int8/qat/Makefile b/examples/int8/qat/Makefile
@@ -0,0 +1,15 @@
+CXX=g++
+DEP_DIR=$(PWD)/deps
+ROOT_DIR?="../../../" # path to TRTorch directory (including TRTorch)
+INCLUDE_DIRS=-I$(DEP_DIR)/libtorch/include -I$(DEP_DIR)/libtorch/include/torch/csrc/api/include/ -I$(DEP_DIR)/TensorRT-8.0.1.6/include/ -I$(ROOT_DIR) -I /usr/local/cuda-11.1/include -I$(DEP_DIR)/trtorch/include
+LIB_DIRS=-L$(DEP_DIR)/trtorch/lib -L$(DEP_DIR)/libtorch/lib -L /usr/local/cuda-11.1/lib64
+LIBS=-Wl,--no-as-needed -ltrtorch -Wl,--as-needed -ltorch -ltorch_cuda -ltorch_cpu -ltorch_global_deps -lbackend_with_compiler -lc10 -lc10_cuda -lpthread -lcudart
+SRCS=../datasets/cifar10.cpp ../benchmark/benchmark.cpp main.cpp
+
+TARGET=qat
+
+$(TARGET):
+	$(CXX) $(SRCS) $(INCLUDE_DIRS) $(LIB_DIRS) $(LIBS) -o $(TARGET)
+
+clean:
+	$(RM) $(TARGET)
diff --git a/examples/int8/qat/README.md b/examples/int8/qat/README.md
@@ -14,23 +14,64 @@ This is a short example application that shows how to use TRTorch to perform inf
 1. Download CIFAR10 Dataset Binary version ([https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz](https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz))
 2. Train a network on CIFAR10 and perform quantization aware training on it. Refer to `cpp/int8/training/vgg16/README.md` for detailed instructions.
    Export the QAT model to Torchscript.
-3. Install NVIDIA's <a href="https://github.com/NVIDIA/TensorRT/tree/master/tools/pytorch-quantization">pytorch quantization toolkit</a>
+3. Install NVIDIA's <a href="https://github.com/NVIDIA/TensorRT/tree/master/tools/pytorch-quantization">pytorch quantization toolkit</a>
 4. TensorRT 8.0.1.6 or above
 
-## Compilation
+## Compilation using bazel
 
 ``` shell
-bazel build //cpp/qat --compilation_mode=opt
+bazel run //cpp/qat --compilation_mode=opt <path-to-module> <path-to-cifar10>
 ```
 
 If you want insight into what is going under the hood or need debug symbols
 
 ``` shell
-bazel build //cpp/qat --compilation_mode=dbg
+bazel run //cpp/qat --compilation_mode=dbg <path-to-module> <path-to-cifar10>
 ```
 
 This will build a binary named `qat` in `bazel-out/k8-<opt|dbg>/bin/cpp/int8/qat/` directory. Optionally you can add this to `$PATH` environment variable to run `qat` from anywhere on your system.
 
+## Compilation using Makefile
+
+1) Download releases of <a href="https://pytorch.org">LibTorch</a>, <a href="https://github.com/NVIDIA/TRTorch/releases">TRTorch </a>and <a href="https://developer.nvidia.com/nvidia-tensorrt-download">TensorRT</a> and unpack them in the deps directory. Ensure CUDA is installed at `/usr/local/cuda` , if not you need to modify the CUDA include and lib paths in the Makefile.
+
+```sh
+cd examples/trtorchrt_example/deps
+# Download latest TRTorch release tar file (libtrtorch.tar.gz) from https://github.com/NVIDIA/TRTorch/releases
+tar -xvzf libtrtorch.tar.gz
+# unzip libtorch downloaded from pytorch.org
+unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cu111.zip
+# Extract TensorRT downloaded from developer.nvidia.com
+tar -xvzf TensorRT-8.0.1.6.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz
+```
+
+> If cuDNN is not installed on your system / in your LD_LIBRARY_PATH then do the following as well
+
+```sh
+cd deps
+mkdir cudnn && tar -xvzf <cuDNN TARBALL> --directory cudnn --strip-components=1
+cd ..
+```
+
+Set your LD_LIBRARY_PATH accordingly
+
+```sh
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/deps/trtorch/lib:$(pwd)/deps/libtorch/lib:$(pwd)/deps/TensorRT-8.0.1.6/lib:$(pwd)/deps/cudnn/lib64:/usr/local/cuda/lib64/
+```
+
+2) Build and run `qat`
+
+We import header files `cifar10.h` and `benchmark.h` from `ROOT_DIR`. `ROOT_DIR` should point to the path where TRTorch is located `<path_to_TRTORCH>`.
+
+By default it is set to `../../../`. If your TRTorch directory structure is different, please set `ROOT_DIR` accordingly.  
+
+```sh
+cd examples/int8/qat
+# This will generate a qat binary
+make
+./qat <path-to-module> <path-to-cifar10>
+```
+
 ## Usage
 
 ``` shell
diff --git a/examples/int8/qat/main.cpp b/examples/int8/qat/main.cpp
@@ -34,7 +34,6 @@ torch::jit::Module compile_int8_qat_model(const std::string& data_dir, torch::ji
   /// Configure settings for compilation
   auto compile_spec = trtorch::CompileSpec(inputs);
   /// Set operating precision to INT8
-  // compile_spec.enabled_precisions.insert(torch::kF16);
   compile_spec.enabled_precisions.insert(torch::kI8);
   /// Set max batch size for the engine
   compile_spec.max_batch_size = 32;