Skip to content

Commit

Permalink
fix: Address issues in PR
Browse files Browse the repository at this point in the history
Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
  • Loading branch information
narendasan committed Apr 25, 2020
1 parent 46bb485 commit cd24f26
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 32 deletions.
2 changes: 1 addition & 1 deletion core/conversion/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ bool VerifyConverterSupportForBlock(const torch::jit::Block* b) {
if (!OpSupported(n)) {
auto schema = n->maybeSchema();
TRTORCH_CHECK(schema, "Unable to get schema for Node " << util::node_info(n) \
<< " (conversion.VerifyCoverterSupportForBloxk");
<< " (conversion.VerifyCoverterSupportForBlock");
std::stringstream ss;
ss << *schema;
unsupported_ops.insert(ss.str());
Expand Down
123 changes: 96 additions & 27 deletions cpp/api/README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# C++ API

Targets in module create the user facing C++ library for the TRTorch core.
Targets in module create the user facing C++ library for the TRTorch core.

## Building libtrtorch.so

### Debug build
### Debug build
``` shell
bazel build //cpp/api:libtrtorch.so --compilation_mode=dbg
```
Expand All @@ -26,12 +26,19 @@ bazel build //cpp/api:libtrtorch.so --cxxopt="-DNDEBUG"
> Temporary, will get real documentation soon
```c++
namespace trtorch {
/**
* Settings data structure for TRTorch compilation
*
*/
struct TRTORCH_API ExtraInfo {
//struct TRTORCH_API InputRangesArray {
/**
* @brief A struct to hold an input range (used by TensorRT Optimization profile)
*
* This struct can either hold a single vector representing an input shape, signifying a
* static input shape or a set of three input shapes representing the min, optiminal and max
* input shapes allowed for the engine.
*/
struct TRTORCH_API InputRange {
std::vector<int64_t> min;
std::vector<int64_t> opt;
Expand All @@ -46,7 +53,7 @@ struct TRTORCH_API ExtraInfo {
* Supported Data Types that can be used with TensorRT engines
*
* This class is compatable with c10::DataTypes (but will check for TRT support)
* so there should not be a reason that you need to use this type explictly.
* so there should not be a reason that you need to use this type explictly.
*/
class DataType {
public:
Expand All @@ -59,14 +66,14 @@ struct TRTORCH_API ExtraInfo {
* ex. trtorch::DataType type = DataType::kFloat;
*/
enum Value : int8_t {
/// FP32
/// FP32
kFloat,
/// FP16
kHalf,
/// INT8
/*kChar, char or int8? */
kChar,
};

DataType() = default;
constexpr DataType(Value t) : value(t) {}
DataType(c10::ScalarType t);
Expand All @@ -83,7 +90,7 @@ struct TRTORCH_API ExtraInfo {
*
* This class is compatable with c10::DeviceTypes (but will check for TRT support)
* but the only applicable value is at::kCUDA, which maps to DeviceType::kGPU
*
*
* To use the DataType class itself, interface using the enum vs. normal instatination
*
* ex. trtorch::DeviceType type = DeviceType::kGPU;
Expand Down Expand Up @@ -117,7 +124,7 @@ struct TRTORCH_API ExtraInfo {
};

/**
* Emum for selecting engine capability
* Emum for selecting engine capability
*/
enum class EngineCapability : int8_t {
kDEFAULT,
Expand All @@ -129,24 +136,24 @@ struct TRTORCH_API ExtraInfo {
: input_ranges(std::move(input_ranges)) {}
ExtraInfo(std::vector<std::vector<int64_t>> fixed_sizes);
ExtraInfo(std::vector<c10::ArrayRef<int64_t>> fixed_sizes);

// Defaults should reflect TensorRT defaults for BuilderConfig

/**
/**
* Sizes for inputs to engine, can either be a single size or a range
* defined by Min, Optimal, Max sizes
*
* Order is should match call order
* defined by Min, Optimal, Max sizes
*
* Order is should match call order
*/
std::vector<InputRange> input_ranges;

/**
* Default operating precision for the engine
* Default operating precision for the engine
*/
DataType op_precision = DataType::kFloat;

/**
* Build a refitable engine
* Build a refitable engine
*/
bool refit = false;

Expand All @@ -158,10 +165,10 @@ struct TRTORCH_API ExtraInfo {
/**
* Restrict operating type to only set default operation precision (op_precision)
*/
bool strict_type = false;
bool strict_types = false;

/**
* (Only used when targeting DLA (device))
* (Only used when targeting DLA (device))
* Lets engine run layers on GPU if they are not supported on DLA
*/
bool allow_gpu_fallback = true;
Expand Down Expand Up @@ -189,6 +196,16 @@ struct TRTORCH_API ExtraInfo {
* Maximum size of workspace given to TensorRT
*/
uint64_t workspace_size = 0;

/**
* Maximum batch size (must be =< 1 to be set, 0 means not set)
*/
uint64_t max_batch_size = 0;

/**
* Calibration dataloaders for each input for post training quantizatiom
*/
nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr;
};

/**
Expand All @@ -198,37 +215,89 @@ TRTORCH_API std::string get_build_info();

/**
* Dump the version information for TRTorch including base libtorch and TensorRT versions
* to stdout
* to stdout
*/
TRTORCH_API void dump_build_info();

/**
* @brief Check to see if a module is fully supported by the compiler
*
* @param module: torch::jit::script::Module - Existing TorchScript module
* @param method_name: std::string - Name of method to compile
*
* Takes a module and a method name and checks if the method graph contains purely
* convertable operators
*
* Will print out a list of unsupported operators if the graph is unsupported
*/
TRTORCH_API bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, std::string method_name);

/**
* @brief Compile a TorchScript module for NVIDIA GPUs using TensorRT
*
* @param module: torch::jit::script::Module - Existing TorchScript module
* @param info: trtorch::ExtraInfo - Compilation settings
* @param module: torch::jit::script::Module - Existing TorchScript module
* @param info: trtorch::ExtraInfo - Compilation settings
*
* Takes a existing TorchScript module and a set of settings to configure the compiler
* and will convert methods to JIT Graphs which call equivalent TensorRT engines
*
* Converts specifically the forward method of a TorchScript Module
*/
* Converts specifically the forward method of a TorchScript Module
*/
TRTORCH_API torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo info);

/**
* @brief Compile a TorchScript method for NVIDIA GPUs using TensorRT
*
* @param module: torch::jit::script::Module - Existing TorchScript module
* @param module: torch::jit::script::Module - Existing TorchScript module
* @param method_name: std::string - Name of method to compile
* @param info: trtorch::ExtraInfo - Compilation settings
* @param info: trtorch::ExtraInfo - Compilation settings
*
* Takes a existing TorchScript module and a set of settings to configure the compiler
* and will convert selected method to a serialized TensorRT engine which can be run with
* TensorRT
*/
TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::string method_name, ExtraInfo info);
TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& module, std::string method_name, ExtraInfo info);

namespace ptq {
/**
* @brief A factory to build a post training quantization calibrator from a torch dataloader
*
* Creates a calibrator to use for post training quantization
* If there are multiple inputs, the dataset should produce a example which is a vector (or similar container) of tensors vs a single tensor
*
* By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
* You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
* the calibrator class as a template parameter.
*
* e.g. trtorch::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader), calibration_cache_file, use_cache);
*/
template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2, typename DataLoader>
TRTORCH_API inline Int8Calibrator<Algorithm, DataLoader> make_int8_calibrator(DataLoader dataloader, const std::string& cache_file_path, bool use_cache) {
return Int8Calibrator<Algorithm, DataLoader>(std::move(dataloader), cache_file_path, use_cache);
}

/**
* @brief A factory to build a post training quantization calibrator from a torch dataloader that only uses the calibration cache
*
* Creates a calibrator to use for post training quantization which reads from a previously created calibration cache, therefore
* you can have a calibration cache generating program that requires a dataloader and a dataset, then save the cache to use later
* in a different program that needs to calibrate from scratch and not have the dataset dependency. However, the network should also
* be recalibrated if its structure changes, or the input data set changes, and it is the responsibility of the application to ensure this.
*
* By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
* You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
* the calibrator class as a template parameter.
*
* e.g. trtorch::ptq::make_int8_cache_calibrator<nvinfer1::IInt8MinMaxCalibrator>(calibration_cache_file);
*/
template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2>
TRTORCH_API inline Int8CacheCalibrator<Algorithm> make_int8_cache_calibrator(const std::string& cache_file_path) {
return Int8CacheCalibrator<Algorithm>(cache_file_path);
}
} // namespace ptq
} // namespace trtorch


```
Expand Down
2 changes: 1 addition & 1 deletion cpp/ptq/training/vgg16/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pip3 install -r requirements.txt --user

The following recipe should get somewhere between 89-92% accuracy on the CIFAR10 testset
```
python3 main.py --lr 0.01 --batch-size 256 --drop-ratio 0.15 --ckpt-dir $(pwd)/vgg16_ckpts --epochs 100
python3 main.py --lr 0.01 --batch-size 128 --drop-ratio 0.15 --ckpt-dir $(pwd)/vgg16_ckpts --epochs 100
```

> 545 was the seed used in testing
Expand Down
10 changes: 7 additions & 3 deletions cpp/ptq/training/vgg16/export_ckpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test(model, dataloader, crit):
class_probs = []
class_preds = []
model.eval()

with torch.no_grad():
for data, labels in dataloader:
data, labels = data.cuda(), labels.cuda(async=True)
Expand Down Expand Up @@ -53,21 +54,24 @@ def test(model, dataloader, crit):
weights = new_state_dict

model.load_state_dict(weights)
model.eval()

jit_model = torch.jit.trace(model, torch.rand([32, 3, 32, 32]).to("cuda"))

testing_dataset = datasets.CIFAR10(root='./data', train=False, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465),
(0.2023, 0.1994, 0.2010))]))
transforms.Normalize((0.4914, 0.4822, 0.4465),
(0.2023, 0.1994, 0.2010))]))

testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=32,
shuffle=False, num_workers=2)

crit = torch.nn.CrossEntropyLoss()
test_loss, test_acc = test(model, testing_dataloader, crit)
print("[PTH] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))
print("[JIT] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))

torch.jit.save(jit_model, "trained_vgg16.jit.pt")
jit_model = torch.jit.load("trained_vgg16.jit.pt")
test_loss, test_acc = test(jit_model, testing_dataloader, crit)
print("[JIT] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))

0 comments on commit cd24f26

Please sign in to comment.