From 4e5ce770a26ea15b1f259ad989f1e1feafae4e00 Mon Sep 17 00:00:00 2001 From: Lily Orth-Smith Date: Sat, 24 Oct 2020 00:23:50 -0700 Subject: [PATCH] [RELAY] Refactor FoldConstant to skip TNonComputationalOps (#6720) * add TNonComputational to qnn ops and change FoldConstant * remove comments * check if op in nonComputational map * forgot to mark device_copy op as TNonComputational * hacky fix to fuseops pass * fix typo * manually skip device_copy in fold_constant * Update src/relay/transforms/fold_constant.cc Co-authored-by: Junru Shao Co-authored-by: Junru Shao --- src/relay/qnn/op/concatenate.cc | 1 + src/relay/qnn/op/convolution.cc | 1 + src/relay/qnn/op/dense.cc | 1 + src/relay/qnn/op/dequantize.cc | 1 + src/relay/qnn/op/op_common.h | 1 + src/relay/qnn/op/quantize.cc | 1 + src/relay/qnn/op/requantize.cc | 1 + src/relay/transforms/fold_constant.cc | 9 ++++++--- 8 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/relay/qnn/op/concatenate.cc b/src/relay/qnn/op/concatenate.cc index 29ecf451767e..88d2ecc9b45b 100644 --- a/src/relay/qnn/op/concatenate.cc +++ b/src/relay/qnn/op/concatenate.cc @@ -207,6 +207,7 @@ RELAY_REGISTER_OP("qnn.concatenate") "The quantization zero_point of the output tensor.") .set_support_level(11) .add_type_rel("QnnConcatenate", QnnConcatenateRel) + .set_attr("TNonComputational", true) .set_attr("FTVMQnnCanonicalize", ConcatenateQnnCanonicalize) .set_attr("FInferCorrectLayout", QnnConcatenateLayout); diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc index b2b6b092fd62..73ee4561907d 100644 --- a/src/relay/qnn/op/convolution.cc +++ b/src/relay/qnn/op/convolution.cc @@ -733,6 +733,7 @@ operator to understand how to scale back the int32 output to (u)int8. "The quantization zero_point of the weight tensor.") .set_support_level(11) .add_type_rel("QnnConv2D", QnnConv2DRel) + .set_attr("TNonComputational", true) .set_attr("FTVMQnnCanonicalize", QnnConv2DCanonicalize) .set_attr("FInferCorrectLayout", QnnConvInferCorrectLayout); diff --git a/src/relay/qnn/op/dense.cc b/src/relay/qnn/op/dense.cc index 3cfc418868ea..e1cbfaf98df1 100644 --- a/src/relay/qnn/op/dense.cc +++ b/src/relay/qnn/op/dense.cc @@ -189,6 +189,7 @@ RELAY_REGISTER_OP("qnn.dense") "The quantization zero_point of the weight tensor.") .set_support_level(11) .add_type_rel("QDense", QnnDenseRel) + .set_attr("TNonComputational", true) .set_attr("FTVMQnnCanonicalize", QnnDenseCanonicalize); TVM_REGISTER_GLOBAL("relay.qnn.op._make.dense").set_body_typed(MakeQuantizedDense); diff --git a/src/relay/qnn/op/dequantize.cc b/src/relay/qnn/op/dequantize.cc index f0c139c2a5e3..0a81f3fe4fdb 100644 --- a/src/relay/qnn/op/dequantize.cc +++ b/src/relay/qnn/op/dequantize.cc @@ -136,6 +136,7 @@ The input is always quantized (int8, uint8) and will be converted to float32 giv .add_argument("input_zero_point", "Tensor", "The quantization zero_point of the input tensor.") .set_support_level(11) .add_type_rel("Dequantize", DequantizeRel) + .set_attr("TNonComputational", true) .set_attr("FTVMQnnCanonicalize", DequantizeQnnCanonicalize); TVM_REGISTER_GLOBAL("relay.qnn.op._make.dequantize").set_body_typed(MakeDequantize); diff --git a/src/relay/qnn/op/op_common.h b/src/relay/qnn/op/op_common.h index e99c11b6f02b..3ca8f64ac9d9 100644 --- a/src/relay/qnn/op/op_common.h +++ b/src/relay/qnn/op/op_common.h @@ -215,6 +215,7 @@ static inline bool QnnBroadcastRel(const Array& types, int num_inputs, con .add_argument("output_scale", "Tensor", "The scale of the output tensor.") \ .add_argument("output_zero_point", "Tensor", "The zero_point of the output tensor.") \ .add_type_rel("QnnBroadcast", QnnBroadcastRel) \ + .set_attr("TNonComputational", true) \ .set_attr("FInferCorrectLayout", QnnBinaryBroadcastLayout) } // namespace qnn diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc index 1b5cb5e2b55b..07847916fae7 100644 --- a/src/relay/qnn/op/quantize.cc +++ b/src/relay/qnn/op/quantize.cc @@ -150,6 +150,7 @@ scale and zero point. "The quantization zero_point of the output tensor.") .set_support_level(11) .add_type_rel("Quantize", QuantizeRel) + .set_attr("TNonComputational", true) .set_attr("FTVMQnnCanonicalize", QuantizeQnnCanonicalize); TVM_REGISTER_GLOBAL("relay.qnn.op._make.quantize").set_body_typed(MakeQuantize); diff --git a/src/relay/qnn/op/requantize.cc b/src/relay/qnn/op/requantize.cc index ea878557d98e..3572a3980ced 100644 --- a/src/relay/qnn/op/requantize.cc +++ b/src/relay/qnn/op/requantize.cc @@ -324,6 +324,7 @@ Q_output = zp_output + (scale_input)/(scale_output) * (Q_input - zp_input) "The quantization zero_point of the output tensor.") .set_support_level(11) .add_type_rel("Requantize", RequantizeRel) + .set_attr("TNonComputational", true) .set_attr("FTVMQnnCanonicalize", RequantizeQnnCanonicalize) .set_attr("FInferCorrectLayout", RequantizeInferCorrectLayout); diff --git a/src/relay/transforms/fold_constant.cc b/src/relay/transforms/fold_constant.cc index 1de690d91036..4a739ddba40f 100644 --- a/src/relay/transforms/fold_constant.cc +++ b/src/relay/transforms/fold_constant.cc @@ -151,9 +151,12 @@ class ConstantFolder : public MixedModeMutator { } // We should think about potentially constant evaluation over these ops too. - if (call->op == invoke_tvm_op_ || call->op == shape_func_op_ || call->op == alloc_tensor_op_ || - call->op == alloc_storage_op_ || call->op == device_copy_op_) { - return GetRef(call); + static auto fnoncomputational = Op::GetAttrMap("TNonComputational"); + if (const auto* call_node = call->op.as()) { + Op op = GetRef(call_node); + if ((fnoncomputational.count(op) && fnoncomputational[op]) || (call->op == device_copy_op_)) { + return GetRef(call); + } } bool all_const_args = true;