Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New format quant model support for MKLDNN #45416

Merged
merged 18 commits into from
Sep 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
303 changes: 250 additions & 53 deletions paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc

Large diffs are not rendered by default.

51 changes: 47 additions & 4 deletions paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,34 @@ class QuantDequantMkldnnPass : public FusePassBase {
std::unordered_map<std::string, std::vector<float>>* weight_thresholds)
const;

void CollectInputScalesFromFake(
///
/// \brief collect scale info for weight from onnx_format dequantize_linear op
/// onnx_format_dequantize_types: the onnx_format dequantize op type
/// weight_thresholds: scale info for weight
/// var_quant_scales: scale info for act
/// onnx_format_quantize_model: recorder if the quantize model is a
/// onnx_format quantize model
///
void CollectWeightScalesInfoFromONNXFormatDequantize(
ir::Graph* graph,
Scope* scope,
std::unordered_map<std::string, std::vector<float>>* weight_thresholds,
std::unordered_map<std::string, std::vector<float>>* var_quant_scales,
bool* onnx_format_quantize_model) const;

void CollectInputScalesFromQuantize(
ir::Graph* graph,
Scope* scope,
const std::unordered_set<std::string>& fake_quantize_types,
std::unordered_map<std::string, std::vector<float>>* var_quant_scales)
const;

void ConvertFromINT8ToFP32(const std::vector<float>& scales,
Tensor* weight_tensor,
int8_t* int8_weight_data,
float* fp32_weight_data,
const std::string& weight_var_name) const;

void CollectOutputScalesFromAttr(
ir::Graph* graph,
std::unordered_map<std::string, std::vector<float>>* var_quant_scales)
Expand All @@ -64,12 +85,22 @@ class QuantDequantMkldnnPass : public FusePassBase {
Node* op_node,
std::unordered_set<const Node*>* nodes2rm) const;

///
/// \brief collect all the onnx_format quantize related ops to remove
/// nodes2rm: record all quantize related ops to remove
///
void CollectQuantizeDequantizeOpsFromONNXFormat(
ir::Graph* graph,
Node* op_node,
std::unordered_set<const Node*>* nodes2rm) const;

void RemoveFakeOps(
ir::Graph* graph,
const std::unordered_set<std::string>& fake_quantize_types,
const std::unordered_set<std::string>& fake_dequantize_types,
const std::unordered_set<std::string>& fake_quantize_dequantize_types)
const;
const std::unordered_set<std::string>& fake_quantize_dequantize_types,
const std::unordered_set<std::string>&
onnx_format_quantize_dequantize_types) const;

bool IsInt8Weight(Node* op_node,
Scope* scope,
Expand All @@ -85,11 +116,23 @@ class QuantDequantMkldnnPass : public FusePassBase {
const std::unordered_map<std::string, std::vector<float>>&
weight_thresholds) const;

///
/// \brief Dequantize weight in conv or matmul
/// weight_thresholds: recorded scale info for weight
///
void DequantizeOpWeightsFromONNXFormat(
Node* op_node,
Scope* scope,
const std::string& weight_name,
const std::unordered_map<std::string, std::vector<float>>&
weight_thresholds) const;

void DequantizeWeights(
ir::Graph* graph,
Scope* scope,
const std::unordered_map<std::string, std::vector<float>>&
weight_thresholds) const;
weight_thresholds,
const bool& onnx_format_quantize_model) const;

void UpdateActivations(ir::Graph* graph) const;

Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ struct Argument {
DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);

#ifdef PADDLE_WITH_MKLDNN
// Calibration file path of quantize model
DECL_ARGUMENT_FIELD(calibration_file_path, CalibrationFilePath, std::string);

// A set of op types to enable their quantized kernels
DECL_ARGUMENT_FIELD(quantize_enabled_op_types,
QuantizeEnabledOpTypes,
Expand Down
51 changes: 51 additions & 0 deletions paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"

#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
#endif

namespace paddle {
namespace inference {
namespace analysis {
Expand All @@ -32,6 +36,19 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
auto* the_graph = argument->ReleaseMainGraph();
auto graph = std::unique_ptr<Graph>(the_graph);

#ifdef PADDLE_WITH_MKLDNN
if (argument->Has("calibration_file_path")) {
VLOG(5) << "Calibration file path of quantize model: "
<< argument->calibration_file_path();
std::unordered_map<std::string, std::vector<float>> var_quant_scales{};
ReadCalibrationInfo(argument, &var_quant_scales);
// save var_quant_scales in the first op's attr
// for quant_dequant_mkldnn_pass
SaveInfoInTheFirstOp(
the_graph, "has_quant_info", "var_quant_scales", var_quant_scales);
}
yeliang2258 marked this conversation as resolved.
Show resolved Hide resolved
#endif

// Apply passes.
IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph));
Expand All @@ -44,6 +61,40 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
CollectFusionStatis(argument);
}

void IrAnalysisPass::ReadCalibrationInfo(
Argument* argument,
std::unordered_map<std::string, std::vector<float>>* var_quant_scales) {
std::string calibration_file_path;
#ifdef PADDLE_WITH_MKLDNN
if (argument->Has("calibration_file_path")) {
calibration_file_path = argument->calibration_file_path();
}
#endif
if (calibration_file_path.empty()) {
LOG(INFO) << "argument has no calibration_file_path";
return;
}
std::ifstream calibration_file(calibration_file_path);
std::string one_line;
while (getline(calibration_file, one_line)) {
if (one_line.find(" ") != one_line.npos) {
auto pos = one_line.find(" ");
std::string pre_str = one_line.substr(0, pos);
std::string pos_str = one_line.substr(pos);
if (pre_str.size() && pos_str.size()) {
std::string tensor_name = pre_str;
float scale = std::stod(pos_str);
scale = 1.0 / scale;
if (std::isinf(scale) || std::isnan(scale)) {
continue;
}
std::vector<float> scales = {scale};
(*var_quant_scales)[tensor_name] = scales;
}
}
}
}

void IrAnalysisPass::CollectFusionStatis(Argument* argument) {
if (!argument->main_graph().Has(framework::ir::kFuseStatisAttr)) {
LOG(INFO) << "argument has no fuse statis";
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/passes/ir_analysis_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class IrAnalysisPass : public AnalysisPass {

void CollectFusionStatis(Argument* argument);

void ReadCalibrationInfo(
Argument* argument,
std::unordered_map<std::string, std::vector<float>>* var_quant_scales);

std::string repr() const override;
};

Expand Down
15 changes: 15 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(opt_cache_dir_);
CP_MEMBER(prog_file_);
CP_MEMBER(params_file_);
CP_MEMBER(calibration_file_path_);

CP_MEMBER(use_fc_padding_);
// GPU related.
Expand Down Expand Up @@ -509,6 +510,14 @@ void AnalysisConfig::EnableMkldnnInt8(
Update();
}

void AnalysisConfig::SetCalibrationFilePath(
const std::string &calibration_file_path) {
calibration_file_path_ = calibration_file_path;
VLOG(1) << "Set calibration file path of quantize model: " +
calibration_file_path_;
Update();
}

MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
platform::errors::PreconditionNotMet(
Expand Down Expand Up @@ -805,6 +814,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << prog_file_;
ss << params_file_;

ss << calibration_file_path_;

ss << use_gpu_;
ss << use_external_stream_;
ss << exec_stream_;
Expand Down Expand Up @@ -987,6 +998,10 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"model_file", prog_file_});
os.InsertRow({"params_file", params_file_});
}
if (!(calibration_file_path_.empty())) {
os.InsertRow({"calibration_file_path", calibration_file_path_});
}

if (model_from_memory_) {
os.InsertRow({"model_from_memory", params_file_});
}
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetQuantizeEnabledOpTypes(config_.quantize_enabled_op_types_);
argument_.SetQuantizeExcludedOpIds(config_.quantize_excluded_op_ids_);
argument_.SetQuantVarScales({});
argument_.SetCalibrationFilePath(config_.calibration_file_path_);
}
#endif

Expand Down
13 changes: 13 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,18 @@ struct PD_INFER_DECL AnalysisConfig {
///
void EnableMkldnnQuantizer();

///
/// \brief Set the calibration ranges file path of quantize model.
///
///
void SetCalibrationFilePath(const std::string& calibration_file_path = "");

///
/// \brief Return the calibration ranges file path of quantize model.
///
///
std::string CalibrationFilePath() { return calibration_file_path_; }

///
/// \brief Turn on MKLDNN int8.
///
Expand Down Expand Up @@ -933,6 +945,7 @@ struct PD_INFER_DECL AnalysisConfig {
std::string model_dir_;
mutable std::string prog_file_;
mutable std::string params_file_;
mutable std::string calibration_file_path_;

// Mixed precision.
std::unordered_set<std::string> mixed_black_list_;
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,9 @@ void BindAnalysisConfig(py::module *m) {
.def("to_native_config", &AnalysisConfig::ToNativeConfig)
.def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16)
.def("set_calibration_file_path",
&AnalysisConfig::SetCalibrationFilePath,
py::arg("calibration_file_path") = std::string(""))
#ifdef PADDLE_WITH_MKLDNN
.def("quantizer_config",
&AnalysisConfig::mkldnn_quantizer_config,
Expand Down
Empty file modified python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
100644 → 100755
Empty file.
10 changes: 10 additions & 0 deletions python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,19 @@ file(
"test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")

list(REMOVE_ITEM TEST_OPS "test_onnx_format_quantization_mobilenetv1")

if(WITH_MKLDNN AND NOT WIN32)
list(APPEND TEST_OPS "test_onnx_format_quantization_mobilenetv1")
endif()

foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach()
set_tests_properties(test_concat_mkldnn_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_conv3d_mkldnn_op PROPERTIES TIMEOUT 120)
if(WITH_MKLDNN AND NOT WIN32)
set_tests_properties(test_onnx_format_quantization_mobilenetv1
PROPERTIES TIMEOUT 300)
endif()
set_tests_properties(test_flags_mkldnn_ops_on_off PROPERTIES TIMEOUT 120)
Loading