Skip to content

Commit 1c0fe59

Browse files
committed
Merge branch 'develop' into LapackEigh
2 parents 7b8aa10 + b91e8ee commit 1c0fe59

22 files changed

+1610
-136
lines changed

paddle/fluid/inference/tensorrt/convert/pool2d_op.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ class Pool2dOpConverter : public OpConverter {
8787
bool adaptive = false;
8888
if (op_desc.HasAttr("adaptive"))
8989
adaptive = BOOST_GET_CONST(bool, op_desc.GetAttr("adaptive"));
90+
std::string padding_algorithm = "EXPLICIT";
91+
if (op_desc.HasAttr("padding_algorithm"))
92+
padding_algorithm =
93+
BOOST_GET_CONST(std::string, op_desc.GetAttr("padding_algorithm"));
9094

9195
nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
9296
nvinfer1::ReduceOperation reduce_operation =
@@ -124,6 +128,9 @@ class Pool2dOpConverter : public OpConverter {
124128
pool_layer->setStride(nv_strides);
125129
pool_layer->setPadding(nv_paddings);
126130
pool_layer->setAverageCountExcludesPadding(exclusive);
131+
if (padding_algorithm == "SAME") {
132+
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
133+
}
127134
layer = pool_layer;
128135
} else if (global_pooling) {
129136
auto *reduce_layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *input1,
@@ -159,6 +166,9 @@ class Pool2dOpConverter : public OpConverter {
159166
auto output_name = op_desc.Output("Out")[0];
160167
pool_layer->setStride(nv_strides);
161168
pool_layer->setPadding(nv_paddings);
169+
if (padding_algorithm == "SAME") {
170+
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
171+
}
162172
pool_layer->setAverageCountExcludesPadding(exclusive);
163173
pool_layer->setName(("pool2d (Output: " + output_name + ")").c_str());
164174
pool_layer->getOutput(0)->setName(output_name.c_str());
@@ -198,6 +208,9 @@ class Pool2dOpConverter : public OpConverter {
198208
"trt pool layer in converter could not be created."));
199209
pool_layer->setStride(nv_strides);
200210
pool_layer->setPadding(nv_paddings);
211+
if (padding_algorithm == "SAME") {
212+
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
213+
}
201214
pool_layer->setAverageCountExcludesPadding(exclusive);
202215
layer = pool_layer;
203216
} else {

paddle/fluid/inference/tensorrt/op_teller.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,22 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
172172
std::vector<int> paddings =
173173
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings"));
174174
if (paddings.size() > 2) return false;
175+
if (desc.HasAttr("exclusive")) {
176+
if (BOOST_GET_CONST(bool, desc.GetAttr("exclusive"))) {
177+
std::vector<int> ksize =
178+
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("ksize"));
179+
for (size_t i = 0; i < ksize.size(); i++) {
180+
if (ksize[i] <= paddings[i]) {
181+
VLOG(3) << "the padding size should be less than the filter size "
182+
"for exclusive-counting pooling.";
183+
return false;
184+
}
185+
}
186+
}
187+
}
188+
if (desc.HasAttr("ceil_mode")) {
189+
if (BOOST_GET_CONST(bool, desc.GetAttr("ceil_mode"))) return false;
190+
}
175191
if (desc.Input("X").size() != 1) {
176192
VLOG(3) << "TRT Pool2d expect 1 input, but got "
177193
<< desc.Input("X").size();
@@ -440,6 +456,10 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
440456
}
441457
}
442458

459+
if (op_type == "anchor_generator") {
460+
if (!with_dynamic_shape) return false;
461+
}
462+
443463
if (op_type == "yolo_box") {
444464
if (with_dynamic_shape) return false;
445465
bool has_attrs =

paddle/fluid/operators/determinant_op.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ class DeterminantGradOp : public framework::OperatorWithKernel {
4848
OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input",
4949
"DeterminantGradOp");
5050
OP_INOUT_CHECK(ctx->HasInput("Out"), "Input", "Out", "DeterminantGradOp");
51+
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input",
52+
framework::GradVarName("Out"), "DeterminantGradOp");
5153
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Input")), "Output",
5254
framework::GradVarName("Input"), "DeterminantGradOp");
5355

@@ -117,7 +119,8 @@ class SlogDeterminantGradOp : public framework::OperatorWithKernel {
117119
"SlogDeterminantGradOp");
118120
OP_INOUT_CHECK(ctx->HasInput("Out"), "Input", "Out",
119121
"SlogDeterminantGradOp");
120-
122+
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input",
123+
framework::GradVarName("Out"), "SlogDeterminantGradOp");
121124
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Input")), "Output",
122125
framework::GradVarName("Input"), "SlogDeterminantGradOp");
123126

@@ -179,13 +182,13 @@ REGISTER_OPERATOR(slogdeterminant, ops::SlogDeterminantOp,
179182
ops::SlogDeterminantGradOpMaker<paddle::imperative::OpBase>);
180183

181184
REGISTER_OPERATOR(slogdeterminant_grad,
182-
ops::DeterminantGradOp) // reuse det grad op
185+
ops::SlogDeterminantGradOp) // reuse det grad op
183186

184187
REGISTER_OP_CPU_KERNEL(
185188
slogdeterminant, ops::SlogDeterminantKernel<plat::CPUDeviceContext, float>,
186189
ops::SlogDeterminantKernel<plat::CPUDeviceContext, double>);
187190

188191
REGISTER_OP_CPU_KERNEL(
189192
slogdeterminant_grad,
190-
ops::DeterminantGradKernel<plat::CPUDeviceContext, float>,
191-
ops::DeterminantGradKernel<plat::CPUDeviceContext, double>);
193+
ops::SlogDeterminantGradKernel<plat::CPUDeviceContext, float>,
194+
ops::SlogDeterminantGradKernel<plat::CPUDeviceContext, double>);

paddle/fluid/operators/determinant_op.cu

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,42 +14,6 @@ limitations under the License. */
1414

1515
#include "paddle/fluid/framework/op_registry.h"
1616
#include "paddle/fluid/operators/determinant_op.h"
17-
#include "paddle/fluid/platform/cuda_primitives.h"
18-
19-
namespace paddle {
20-
namespace operators {
21-
22-
using platform::PADDLE_CUDA_NUM_THREADS;
23-
using Tensor = framework::Tensor;
24-
25-
template <typename T>
26-
__global__ void DeterminantGrad(const size_t numel, T* out) {
27-
int tid = threadIdx.x + blockIdx.x * blockDim.x;
28-
if (tid < numel) {
29-
out[tid] = static_cast<T>(1);
30-
}
31-
}
32-
33-
template <typename T>
34-
class DeterminantGradCUDAKernel : public framework::OpKernel<T> {
35-
public:
36-
void Compute(const framework::ExecutionContext& context) const override {
37-
const auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
38-
const T* dout_data = dout->data<T>();
39-
auto dout_dim = vectorize(dout->dims());
40-
41-
auto* dx = context.Output<Tensor>(framework::GradVarName("Input"));
42-
T* dx_data = dx->mutable_data<T>(context.GetPlace());
43-
44-
int64_t numel = dx->numel();
45-
for (int64_t idx = 0; idx < numel; idx++) {
46-
dx_data[idx] = static_cast<T>(1);
47-
}
48-
}
49-
};
50-
51-
} // namespace operators
52-
} // namespace paddle
5317

5418
namespace ops = paddle::operators;
5519
namespace plat = paddle::platform;

0 commit comments

Comments
 (0)