Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix quantize model deploy bugs when using MKLDNN #45920

Merged
merged 34 commits into from
Oct 13, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
786798e
fix immutable op quantize bugs
yeliang2258 Sep 9, 2022
7ef2d11
fix
yeliang2258 Sep 9, 2022
ad8dbcd
fix build bug
yeliang2258 Sep 9, 2022
19858d6
fix test
yeliang2258 Sep 13, 2022
f64fa14
Merge remote-tracking branch 'upstream/develop' into fix_immutable_op…
yeliang2258 Sep 13, 2022
253bd84
notest,test=inference
yeliang2258 Sep 13, 2022
b5dc071
fix ppyoloe acc drop bugs
yeliang2258 Sep 14, 2022
b280449
fix test
yeliang2258 Sep 14, 2022
059b7f0
fix test
yeliang2258 Sep 14, 2022
695c1d4
add test
yeliang2258 Sep 15, 2022
535ce7b
fix
yeliang2258 Sep 15, 2022
5015e5e
fix
yeliang2258 Sep 15, 2022
80e69d4
fix test
yeliang2258 Sep 15, 2022
17cc8bd
fix refined name bug
yeliang2258 Sep 15, 2022
26797da
fix test
yeliang2258 Sep 15, 2022
9986a32
bias fix
yeliang2258 Sep 15, 2022
b9d27c5
fix matmul weight dequant bug
yeliang2258 Sep 15, 2022
1cca64c
re-ci
yeliang2258 Sep 20, 2022
8d6bdcf
fix tester
yeliang2258 Sep 20, 2022
ed37165
fix test
yeliang2258 Sep 20, 2022
ef4496c
fix tester
yeliang2258 Sep 21, 2022
c8cfd32
Merge remote-tracking branch 'upstream/develop' into fix_immutable_op…
yeliang2258 Sep 21, 2022
635ee03
update weight dequantize func
yeliang2258 Sep 23, 2022
a065837
update code
yeliang2258 Oct 8, 2022
9302c3c
Merge remote-tracking branch 'upstream/develop' into fix_immutable_op…
yeliang2258 Oct 8, 2022
a44b576
update test for converage
yeliang2258 Oct 8, 2022
1aa87f4
Merge remote-tracking branch 'upstream/develop' into fix_immutable_op…
yeliang2258 Oct 9, 2022
1698a60
update test
yeliang2258 Oct 10, 2022
a628a9a
update cmake
yeliang2258 Oct 10, 2022
582f2dc
update cmakelist
yeliang2258 Oct 11, 2022
0967fc2
update code
yeliang2258 Oct 11, 2022
d40df92
rerun ci
yeliang2258 Oct 11, 2022
bbdacfe
Merge remote-tracking branch 'upstream/develop' into fix_immutable_op…
yeliang2258 Oct 12, 2022
1fc1f68
remove useless code
yeliang2258 Oct 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ std::vector<float> ComputePropagateScalesMkldnnPass::GetScales(Tensor* tensor,
for (int i = 0; i < columns; i++) {
float max_value = FLT_MIN;
for (int j = 0; j < rows; j++) {
max_value = std::max(max_value, std::abs(data[i + j * columns]));
max_value = std::max(max_value, std::abs(data[j + i * rows]));
}
max_value = 1.0 / max_value;
if (std::isinf(max_value) || std::isnan(max_value)) {
Expand Down
20 changes: 18 additions & 2 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,16 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
auto filter_scale_tensor = GetScaleTensorForNode(conv_filter);
EigenVectorArrayMap eigen_tensor{filter_scale_tensor.data<double>(),
filter_scale_tensor.numel()};
eigen_tensor *= static_cast<double>(S8_MAX);

// If the scale value of a weight is already multiplied by S8_MAX, it does
// not need to be multiplied again
if (std::find(change_weight_->begin(),
change_weight_->end(),
conv_filter->Name()) == change_weight_->end()) {
eigen_tensor *= static_cast<double>(S8_MAX);
change_weight_->push_back(conv_filter->Name());
}

std::vector<float> filter_scale{
filter_scale_tensor.data<double>(),
filter_scale_tensor.data<double>() + filter_scale_tensor.numel()};
Expand Down Expand Up @@ -699,6 +708,14 @@ void CPUQuantizePass::QuantizeImmutable(Graph* graph,
return;
}

// skip if the dtype of immutable_in is not float32
auto dtype = immutable_in->Var()->GetDataType();
if (dtype != proto::VarType::FP32) {
VLOG(0) << "dytpe: " << dtype;
yeliang2258 marked this conversation as resolved.
Show resolved Hide resolved
MarkAndLogCannotQuantizeOp(immutable_op, "The input dtype is not float.");
return;
}

if (!AreScalesPresentForNodes({immutable_out})) {
MarkAndLogCannotQuantizeOp(immutable_op,
"No scale available for the operator");
Expand Down Expand Up @@ -1170,7 +1187,6 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeImmutable(graph, "reshape2", "X");
QuantizeImmutable(graph, "transpose2", "X");
QuantizeImmutable(graph, "slice", "Input");
yeliang2258 marked this conversation as resolved.
Show resolved Hide resolved
QuantizeImmutable(graph, "shape", "Input");
QuantizeImmutable(graph, "nearest_interp", "X");
QuantizeImmutable(graph, "nearest_interp_v2", "X");
QuantizeElementwise(graph, "elementwise_add");
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ class CPUQuantizePass : public FusePassBase {
VarQuantScale string_pair_map = {};
VarQuantScale* const var_quant_scales_ = &string_pair_map;

// Save the scale values of which weights have been processed to avoid
// secondary processing
std::vector<std::string> change_weight = {};
std::vector<std::string>* const change_weight_ = &change_weight;

void GetQuantInfo(Graph* graph) const;
};

Expand Down
14 changes: 5 additions & 9 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void SetOp(ProgramDesc* prog,
type == "nearest_interp" || type == "nearest_interp_v2") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
} else if (type == "slice" || type == "shape") {
} else if (type == "slice") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
} else if (type == "dropout") {
Expand Down Expand Up @@ -467,7 +467,7 @@ static const std::initializer_list<std::string> variable_names_immutable_ops = {
void TestImmutableOp(const std::string tested_op) {
ProgramDesc prog;
for (auto& v : variable_names_immutable_ops) {
prog.MutableBlock(0)->Var(v);
prog.MutableBlock(0)->Var(v)->SetDataType(proto::VarType::FP32);
}
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, tested_op, tested_op, {"b"}, {"c"}, true, "int8");
Expand Down Expand Up @@ -520,7 +520,7 @@ void TestImmutableOpBetweenNonQuantizedOp(const std::string tested_op) {
void TestImmutableOpWithManyOutputs(const std::string tested_op) {
ProgramDesc prog;
for (auto& v : variable_names_immutable_ops) {
prog.MutableBlock(0)->Var(v);
prog.MutableBlock(0)->Var(v)->SetDataType(proto::VarType::FP32);
}

SetOp(&prog, "dropout", "Dropout1", {"a"}, {"b"}, true, "float32");
Expand Down Expand Up @@ -556,12 +556,8 @@ void TestImmutableOpWithManyOutputs(const std::string tested_op) {
SCALE * S8_MAX);
}

const std::vector<std::string> immutables = {"reshape2",
"transpose2",
"slice",
"shape",
"nearest_interp",
"nearest_interp_v2"};
const std::vector<std::string> immutables = {
"reshape2", "transpose2", "slice", "nearest_interp", "nearest_interp_v2"};

class TestImmutables : public testing::TestWithParam<std::string> {};

Expand Down
45 changes: 23 additions & 22 deletions paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,6 @@ bool HasBias(ir::Node* conv_op) {
conv_op->Op()->Input("Bias").size() > 0;
}

bool ShouldSkipConv(ir::Node* conv_op, Scope* scope, ir::Node* conv_filter) {
if (!platform::HasOpINT8DataType(conv_op->Op())) {
VLOG(4) << "Skipping non-int8 convolution (id: " << conv_op->id() << ").";
return true;
}

auto filter_var = scope->GetVar(conv_filter->Name());
if (filter_var->Get<LoDTensor>().dtype() != phi::DataType::FLOAT32) {
VLOG(4) << "Skipping convolution (id: " << conv_op->id()
<< ") because it's a bug that it is detected again.";
return true;
}

VLOG(4) << "Not skipping convolution (id: " << conv_op->id() << ")";
return false;
}

template <typename T>
void QuantizeConvInput(Scope* scope,
ir::Graph* g,
Expand Down Expand Up @@ -151,16 +134,34 @@ void ParamsQuantizationMkldnnPass::QuantizeConv(ir::Graph* graph,
PADDLE_ENFORCE_NOT_NULL(
scope, platform::errors::InvalidArgument("Scope cannot be nullptr."));

if (ShouldSkipConv(conv_op, scope, conv_filter)) {
// If not a quantized OP
if (!platform::HasOpINT8DataType(conv_op->Op())) {
return;
}

QuantizeConvInput<int8_t>(
scope, g, conv_op, conv_filter->Name(), "Scale_weights");
auto filter_var = scope->GetVar(conv_filter->Name());
if (filter_var->Get<LoDTensor>().dtype() != phi::DataType::FLOAT32) {
VLOG(0) << "Skipping convolution filter: " << conv_filter->Name()
yeliang2258 marked this conversation as resolved.
Show resolved Hide resolved
<< " because it is detected again.";
conv_op->Op()->SetAttr("Scale_weights", std::vector<float>(1, 1));
} else {
VLOG(0) << conv_filter->Name();
QuantizeConvInput<int8_t>(
scope, g, conv_op, conv_filter->Name(), "Scale_weights");
}

if (HasBias(conv_op)) {
QuantizeConvInput<int32_t>(
scope, g, conv_op, conv_op->Op()->Input("Bias")[0], "Bias_scales");
auto bias_var = scope->GetVar(conv_op->Op()->Input("Bias")[0]);
if (bias_var->Get<LoDTensor>().dtype() != phi::DataType::FLOAT32) {
VLOG(0) << "Skipping convolution bias: "
yeliang2258 marked this conversation as resolved.
Show resolved Hide resolved
<< conv_op->Op()->Input("Bias")[0]
<< " because it is detected again.";
conv_op->Op()->SetAttr("Bias_scales", std::vector<float>(1, 1));
} else {
VLOG(0) << conv_op->Op()->Input("Bias")[0];
QuantizeConvInput<int32_t>(
scope, g, conv_op, conv_op->Op()->Input("Bias")[0], "Bias_scales");
}
}
params_to_int8_conv_found++;
};
Expand Down
59 changes: 50 additions & 9 deletions paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,29 @@ struct ProgramStrategy {

virtual void CheckOp(const OpDesc& op) const = 0;

VarDesc* AddInput(OpDesc* op, std::string input_name, const Data& data) {
const std::string var_name = input_name + "_var";
VarDesc* AddInput(OpDesc* op,
std::string input_name,
const Data& data,
const std::string user_var_name = "") {
std::string var_name = user_var_name;
if (var_name.empty()) {
var_name = input_name + "_var";
}
op->SetInput(input_name, {var_name});
auto var = program.MutableBlock(0)->Var(var_name);
var->SetShape(data.getShape());
test_scope.CreateTensor(var_name, data);
return var;
}

void AddOutput(OpDesc* op, std::string output_name, const Data& data) {
const std::string var_name = output_name + "_var";
void AddOutput(OpDesc* op,
std::string output_name,
const Data& data,
const std::string user_var_name = "") {
std::string var_name = user_var_name;
if (var_name.empty()) {
var_name = output_name + "_var";
}
op->SetOutput(output_name, {var_name});
program.MutableBlock(0)->Var(var_name);
test_scope.CreateTensor(var_name, data);
Expand All @@ -117,21 +129,23 @@ struct ConvProgramStrategy : public ProgramStrategy {
std::vector<float>&& scale_weights,
int groups = 1,
Data&& bias = Data(),
std::vector<float>&& scale_bias = {})
std::vector<float>&& scale_bias = {},
bool share_weight = false)
: input(std::move(input)),
filter(std::move(filter)),
output(std::move(output)),
scale_weights(std::move(scale_weights)),
groups(std::move(groups)),
bias(std::move(bias)),
scale_bias(std::move(scale_bias)) {}
scale_bias(std::move(scale_bias)),
share_weight(std::move(share_weight)) {}

protected:
OpDesc* CreateBasicConvOp() {
OpDesc* CreateBasicConvOp(const std::string conv_name = "Conv1") {
auto op = program.MutableBlock(0)->AppendOp();
op->SetType("conv2d");
op->SetAttr("use_mkldnn", true);
op->SetAttr("name", std::string{"Conv1"});
op->SetAttr("name", conv_name);
op->SetAttr("mkldnn_data_type", std::string{"int8"});
op->SetAttr("data_format", std::string{"NCHW"});
op->SetAttr("dilations", std::vector<int>({1, 1}));
Expand All @@ -155,6 +169,20 @@ struct ConvProgramStrategy : public ProgramStrategy {
AddInput(op, "Bias", bias);
op->SetAttr("Bias_scales", scale_bias);
}

if (share_weight) {
OpDesc* op2 = CreateBasicConvOp("Conv2");
AddInput(op2, "Input", input);
AddInput(op2, "Filter", filter)->SetPersistable(true);
AddOutput(op2, "Output", output, "output2");
op2->SetAttr("Scale_weights", scale_weights);
op2->SetAttr("Scale_in", 1.0f);
op2->SetAttr("groups", groups);
if (HasBias()) {
AddInput(op2, "Bias", bias, "Bias2");
op2->SetAttr("Bias_scales", scale_bias);
}
}
}

void CheckOp(const OpDesc& op) const override {
Expand Down Expand Up @@ -210,9 +238,9 @@ struct ConvProgramStrategy : public ProgramStrategy {
const Data output;
const std::vector<float> scale_weights;
const int groups;

const Data bias;
const std::vector<float> scale_bias;
const bool share_weight;
};

struct ParamsQuantizationMkldnnPassTestFixture : public ::testing::Test {
Expand Down Expand Up @@ -340,6 +368,19 @@ TEST_F(ParamsQuantizationMkldnnPassTestFixture, conv_with_bias_2g2o2i1h1w) {
RunPassTest(std::move(program));
}

TEST_F(ParamsQuantizationMkldnnPassTestFixture, conv_with_bias_2g2o2i1h1ws) {
auto program = std::make_unique<ConvProgramStrategy>(
GenericInput(),
Data({2, 2, 2, 1, 1}, {1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f}),
GenericOutput(),
std::vector<float>{2.f, 2.f, 4.f, 4.f},
2,
Data({2, 2, 1, 1, 1}, {1.5f, 1.5f, 1.5f, 1.5f}),
std::vector<float>{2.f, 2.f, 4.f, 4.f},
true);
RunPassTest(std::move(program));
}

} // namespace
} // namespace ir
} // namespace framework
Expand Down
Loading