Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Paddle-TRT] Full support for ops with persistable input #45545

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,6 @@ class CustomPluginCreater : public OpConverter {
auto &op_input_names = framework::OpMetaInfoHelper::GetInputs(op_info);
for (auto &param_name : op_input_names) {
for (auto &arg_name : op_desc.Input(param_name)) {
framework::Variable *X_v = nullptr;
X_v = scope.FindVar(arg_name);
// If this weight is not shared between ops, it need to be convtered to
// itensor
if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
ConvertWeight2ITensor(scope, arg_name);
}
inputs.push_back(engine_->GetITensor(arg_name));
}
}
Expand Down Expand Up @@ -193,14 +186,6 @@ class GenericPluginCreater : public OpConverter {

for (auto &param_name : phi_kernel_signature.input_names) {
for (auto &arg_name : op_desc.Input(param_name)) {
framework::Variable *X_v = nullptr;
X_v = scope.FindVar(arg_name);
// If this weight is not shared between ops, it need to be convtered to
// itensor
if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
ConvertWeight2ITensor(scope, arg_name);
}

inputs.push_back(engine_->GetITensor(arg_name));
auto *var = block_desc.FindVar(arg_name);
PADDLE_ENFORCE_NOT_NULL(
Expand Down
55 changes: 1 addition & 54 deletions paddle/fluid/inference/tensorrt/convert/op_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class OpConverter {
op_desc.Type()));

it->SetEngine(engine);
engine->SetScope(scope);
it->SetBlockDesc(block);
(*it)(op, scope, test_mode);

Expand Down Expand Up @@ -255,31 +256,6 @@ class OpConverter {
const framework::Scope& scope,
TensorRTEngine* engine) {
std::unique_lock<std::mutex> lk(mut_);
for (int i = 0; i < block.ops_size(); i++) {
SetEngine(engine);
const auto& op = block.ops(i);
framework::OpDesc op_desc(op, nullptr);
framework::Variable* X_v = nullptr;
std::string X_name;
// inputs : string -> std::vector<string>
auto inputs = op_desc.Inputs();
if (inputs.count("X")) {
X_name = op_desc.Input("X")[0];
} else if (inputs.count("Input")) {
X_name = op_desc.Input("Input")[0];
} else if (inputs.count("Y")) {
X_name = op_desc.Input("Y")[0];
}
X_v = scope.FindVar(X_name);
// If this weight is shared between ops, it needn't to be convtered to
// itensor once again
if (engine->GetITensorMap()->count(X_name)) {
continue;
}
if (X_v) {
ConvertWeight2ITensor(scope, X_name);
}
}
for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i);
ConvertOp(op, parameters, scope, engine, false, &block);
Expand Down Expand Up @@ -596,35 +572,6 @@ class OpConverter {
return Add1DConstantLayer(input_data, weight_name, scalar);
}

// For cases when input is not middle-tensor , but persistable tensor
// you should call this.
nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope,
const std::string& name) {
auto* var_v = scope.FindVar(name);
auto* var_t = var_v->GetMutable<framework::LoDTensor>();
auto weight = engine_->GetTrtWeight(name, *var_t);

// Now we have create weights, then we need create a itensor
auto var_dims = var_t->dims();
nvinfer1::Dims trt_in_shape;
trt_in_shape.nbDims = var_t->dims().size();
for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = var_dims[i];
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if (!engine_->with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = trt_in_shape.d[i + 1];
}
}
nvinfer1::ILayer* layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get());
engine_->SetITensor(name, layer->getOutput(0));
return layer->getOutput(0);
}

void RreplenishLayerAndOutput(
nvinfer1::ILayer* layer,
const std::string& layer_type,
Expand Down
46 changes: 41 additions & 5 deletions paddle/fluid/inference/tensorrt/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,47 @@ void TensorRTEngine::SetITensor(const std::string &name,
}

nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
PADDLE_ENFORCE_EQ(itensor_map_.count(name),
true,
platform::errors::NotFound(
"Tensor named %s is not found in TRT engine", name));
return itensor_map_[name];
if (itensor_map_.count(name)) {
return itensor_map_[name];
} else {
ConvertWeight2ITensor(name);
return itensor_map_[name];
}
}

// For cases when input is not middle-tensor , but persistable tensor
// you should call this.
nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
const std::string &name) {
auto *var_v = scope_->FindVar(name);
PADDLE_ENFORCE_NOT_NULL(
var_v,
platform::errors::NotFound("You are converting a persistable weight to a "
"tensor, but there is no "
"persistable variable called %s in scope.",
name));
auto *var_t = var_v->GetMutable<framework::LoDTensor>();
auto weight = this->GetTrtWeight(name, *var_t);

// Now we have create weights, then we need create a itensor
auto var_dims = var_t->dims();
nvinfer1::Dims trt_in_shape;
trt_in_shape.nbDims = var_t->dims().size();
for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = var_dims[i];
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if (!this->with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = trt_in_shape.d[i + 1];
}
}
nvinfer1::ILayer *layer =
TRT_ENGINE_ADD_LAYER(this, Constant, trt_in_shape, weight.get());
this->SetITensor(name, layer->getOutput(0));
return layer->getOutput(0);
}

std::unordered_map<std::string, nvinfer1::ITensor *>
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ limitations under the License. */
#include <unordered_set>
#include <utility>
#include <vector>

#include "NvInferRuntimeCommon.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
Expand Down Expand Up @@ -283,6 +283,7 @@ class TensorRTEngine {
void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
// Get an ITensor called name.
nvinfer1::ITensor* GetITensor(const std::string& name);
nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name);
std::unordered_map<std::string, nvinfer1::ITensor*>* GetITensorMap();

nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
Expand Down Expand Up @@ -691,12 +692,15 @@ class TensorRTEngine {
void GetEngineInfo();

void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; }
void SetScope(const framework::Scope& scope) { scope_ = &scope; }

private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling
// freshDeviceId().
void freshDeviceId();
// Used for convert weight into Itensor
const framework::Scope* scope_;

// the max batch size
int max_batch_;
Expand Down