diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index cf0e9156e65f8..7931a3c3a51cc 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -47,16 +47,19 @@ def partition_for_cmsisnn(mod, params=None, **opts): if params: mod["main"] = bind_params_by_name(mod["main"], params) + tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__) + seq = tvm.transform.Sequential( [ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("cmsisnn"), - transform.MergeCompilerRegions(), transform.PartitionGraph(), + GenerateCMSISNNConstants(), + ExtractConstantsFromPartitionedFunction(), + transform.InferType(), ] ) - return seq(mod) @@ -64,25 +67,73 @@ def partition_for_cmsisnn(mod, params=None, **opts): def pattern_table(): """Get the cmsisnn compiler pattern table.""" - def softmax_pattern(): + def qnn_softmax_pattern(): + """Create pattern for quantized softmax""" pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant()) pattern = is_op("nn.softmax")(pattern) pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant()) return pattern - def check_quantized_softmax(extract): + def check_qnn_softmax(pattern): """Check if softmax is supported by CMSIS-NN.""" - dequantize_call = extract.args[0].args[0] - scale = extract.args[1].data.numpy().item(0) - zero_point = extract.args[2].data.numpy().item(0) + dequantize_call = pattern.args[0].args[0] + scale = pattern.args[1].data.numpy().item(0) + zero_point = pattern.args[2].data.numpy().item(0) # check for dtypes of quantize and dequantize return ( (scale == 1.0 / 256 and zero_point == -128) - and extract.attrs.out_dtype == "int8" + and pattern.attrs.out_dtype == "int8" and dequantize_call.args[0].checked_type.dtype == "int8" ) + def qnn_conv2d_pattern(): + """Create pattern for qnn.conv2D with optional fused relu.""" + qnn_conv2d = is_op("qnn.conv2d")( + wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant() + ).has_attr({"kernel_layout": "HWIO"}) + bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant()) + req = is_op("qnn.requantize")( + qnn_conv2d | bias_add, is_constant(), is_constant(), is_constant(), is_constant() + ) + clip_or_req = req.optional(is_op("clip")) + return clip_or_req + + def check_qnn_conv2d(pattern): + """Check if the Conv2D is supported by CMSIS-NN.""" + if str(pattern.op.name) == "clip": + relu = pattern + requantize = relu.args[0] + else: + requantize = pattern + requantize_input = requantize.args[0] + bias_add = None + bias_dtype = "int32" + if str(requantize_input.op.name) == "nn.bias_add": + bias_add = requantize_input + conv2d = bias_add.args[0] + bias_dtype = bias_add.args[1].checked_type.dtype + else: + conv2d = requantize_input + conv2d_input = conv2d.args[0] + conv2d_weight = conv2d.args[1] + + # kernel zero_point should be 0 + kernel_zp = conv2d.args[3].data.numpy() + kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp + + return ( + conv2d.attrs.kernel_layout == "HWIO" + and conv2d.attrs.out_dtype == "int32" + and conv2d.attrs.padding[2] == 0 + and conv2d.attrs.padding[3] == 0 + and conv2d_input.checked_type.dtype == "int8" + and conv2d_weight.checked_type.dtype == "int8" + and pattern.checked_type.dtype == "int8" + and bias_dtype == "int32" + and all([zp == 0 for zp in kernel_zp]) + ) + def binary_op_pattern(op): """Matches QNN binary operation""" return is_op(f"qnn.{op}")( @@ -96,7 +147,7 @@ def binary_op_pattern(op): is_constant(), ) - def check_quantized_binary_op(extract): + def check_qnn_binary_op(extract): """Check if multiply is supported by CMSIS-NN.""" return ( extract.args[0].checked_type.dtype == "int8" @@ -104,15 +155,8 @@ def check_quantized_binary_op(extract): ) return [ - ("cmsisnn.quantized_softmax", softmax_pattern(), check_quantized_softmax), - ( - "cmsisnn.quantized_mul", - binary_op_pattern("mul"), - check_quantized_binary_op, - ), - ( - "cmsisnn.quantized_add", - binary_op_pattern("add"), - check_quantized_binary_op, - ), + ("cmsisnn.qnn_softmax", qnn_softmax_pattern(), check_qnn_softmax), + ("cmsisnn.qnn_conv2d", qnn_conv2d_pattern(), check_qnn_conv2d), + ("cmsisnn.qnn_mul", binary_op_pattern("mul"), check_qnn_binary_op), + ("cmsisnn.qnn_add", binary_op_pattern("add"), check_qnn_binary_op), ] diff --git a/src/relay/backend/contrib/cmsisnn/codegen_cmsisnn.cc b/src/relay/backend/contrib/cmsisnn/codegen_cmsisnn.cc index c8094109771be..9856d626b77b3 100644 --- a/src/relay/backend/contrib/cmsisnn/codegen_cmsisnn.cc +++ b/src/relay/backend/contrib/cmsisnn/codegen_cmsisnn.cc @@ -21,6 +21,9 @@ #include namespace tvm { +namespace codegen { +runtime::Module CMSISNNModuleNodeCreate(IRModule mod); +} // namespace codegen namespace relay { namespace contrib { namespace cmsisnn { @@ -33,14 +36,12 @@ runtime::Module CompileCMSISNN(const ObjectRef& ref) { auto func_name = relay_func->GetAttr(tvm::attr::kGlobalSymbol); GlobalVar var = GlobalVar(func_name.value()); relay_mod->Add(var, relay_func); - relay_mod = transform::InferType()(relay_mod); - Array pass_seqs{transform::InferType(), RelayToTIR()}; + Array pass_seqs{RelayToTIR()}; transform::Sequential seq(pass_seqs); IRModule tir_mod = seq(relay_mod); - const auto* pf = runtime::Registry::Get("runtime.CMSISNNModuleNodeCreate"); - return (*pf)(tir_mod); + return tvm::codegen::CMSISNNModuleNodeCreate(tir_mod); } TVM_REGISTER_GLOBAL("relay.ext.cmsisnn").set_body_typed(CompileCMSISNN); diff --git a/src/relay/backend/contrib/cmsisnn/extract_constants.cc b/src/relay/backend/contrib/cmsisnn/extract_constants.cc new file mode 100644 index 0000000000000..8442e370c7f9e --- /dev/null +++ b/src/relay/backend/contrib/cmsisnn/extract_constants.cc @@ -0,0 +1,158 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include +#include +#include +#include + +#include "../../../qnn/utils.h" +#include "../../../transforms/pattern_utils.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace cmsisnn { + +class ExtractConstantsMutator : public MixedModeMutator { + public: + explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {} + + private: + String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); } + + Expr VisitExpr_(const FunctionNode* func) final { + Function final_func = GetRef(func); + ++func_nesting_level_; + auto new_body = VisitExpr(func->body); + --func_nesting_level_; + if (!new_body.same_as(func->body)) { + final_func = Function(FreeVars(new_body), new_body, func->ret_type, + FreeTypeVars(new_body, mod_), func->attrs); + function_to_constants_.Set(GetRef(func), constants_within_function_); + constants_within_function_.clear(); + } + return final_func; + } + + Expr Rewrite_(const CallNode* call, const Expr& post) final { + Expr final_call = post; + auto* post_call = post.as(); + if (post_call == nullptr) { + return final_call; + } + + // Replace Constant arguments with Vars for ML Operators + // Perform this for non-main Call Nodes only + if (func_nesting_level_ && call->op.as()) { + Array new_args; + for (auto& arg : post_call->args) { + auto* const_arg = arg.as(); + if (const_arg && !const_arg->is_scalar()) { + Var var_arg = Var(gen_var_name(), const_arg->tensor_type()); + new_args.push_back(var_arg); + constants_within_function_.push_back(GetRef(const_arg)); + } else { + new_args.push_back(arg); + } + } + final_call = Call(call->op, new_args, call->attrs, {}); + } + + // Since the constants are kicked out of partitioned functions + // a new call to global function is needed + if (auto* glob_var_node = post_call->op.as()) { + auto glob_var = GetRef(glob_var_node); + auto glob_func = Downcast(mod_->Lookup(glob_var)); + auto new_glob_func = VisitExpr(glob_func); + if (!new_glob_func.same_as(glob_func)) { + mod_->Update(glob_var, Downcast(new_glob_func)); + Array new_args = post_call->args; + ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end()); + for (auto constant : function_to_constants_.at(glob_func)) { + new_args.push_back(constant); + } + final_call = Call(glob_var, new_args); + } + } + + // Since the constants are kicked out of the local partitioned functions + // a new call to local function is needed + if (auto* func_node = call->op.as()) { + Function func = GetRef(func_node); + auto new_func = VisitExpr(func); + if (!new_func.same_as(func)) { + Array new_args = post_call->args; + ICHECK(function_to_constants_.find(func) != function_to_constants_.end()); + for (auto constant : function_to_constants_.at(func)) { + constants_within_function_.push_back(constant); + Var var_arg = Var(gen_var_name(), constant->tensor_type()); + new_args.push_back(var_arg); + } + final_call = Call(new_func, new_args); + } + } + + return final_call; + } + + private: + /* \brief Updated module where all calls have replaced constants with new variables */ + IRModule mod_; + /* \brief Maintains mapping of original function to the replaced constants */ + Map> function_to_constants_; + /* \brief Constants being kicked out of a function during the function visit */ + Array constants_within_function_; + /* \brief Keeps track of variables being created */ + int var_count_ = 0; + /* \brief Keeps track of function scope */ + int func_nesting_level_ = 0; +}; + +/*! * \brief Kicks out all constants out of the partitioned function into main() */ +IRModule ExtractConstants(IRModule mod) { + String func_name; + Function func; + + auto extract_constants = ExtractConstantsMutator(mod); + Function main_func = Downcast(mod->Lookup("main")); + auto new_main_body = extract_constants.VisitExpr(main_func->body); + if (!new_main_body.same_as(main_func->body)) { + auto main_var = mod->GetGlobalVar("main"); + auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type, + main_func->type_params, main_func->attrs); + mod->Update(main_var, new_main_func); + } + return mod; +} + +transform::Pass ExtractConstantsFromPartitionedFunction() { + runtime::TypedPackedFunc pass_func = + [=](IRModule m, transform::PassContext pc) { return ExtractConstants(m); }; + return tvm::transform::CreateModulePass(pass_func, 0, "ExtractConstantsFromPartitionedFunction", + {}); +} + +TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.ExtractConstantsFromPartitionedFunction") + .set_body_typed([]() { return ExtractConstantsFromPartitionedFunction(); }); + +} // namespace cmsisnn +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/cmsisnn/generate_constants.cc b/src/relay/backend/contrib/cmsisnn/generate_constants.cc new file mode 100644 index 0000000000000..1b3e9771aa4b9 --- /dev/null +++ b/src/relay/backend/contrib/cmsisnn/generate_constants.cc @@ -0,0 +1,230 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include +#include +#include +#include +#include + +#include "../../../qnn/utils.h" +#include "../../../transforms/pattern_utils.h" + +namespace tvm { +namespace relay { +Expr MakeTranspose(Expr data, Array axes); +namespace contrib { +namespace cmsisnn { + +class GenerateConstantsMutator : public MixedModeMutator { + public: + explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {} + + private: + /*! * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */ + Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) { + auto attrs = make_object(); + attrs->strides = std::move(conv2d_attrs->strides); + attrs->padding = std::move(conv2d_attrs->padding); + attrs->dilation = std::move(conv2d_attrs->dilation); + attrs->groups = conv2d_attrs->groups; + attrs->channels = std::move(conv2d_attrs->channels); + attrs->kernel_size = std::move(conv2d_attrs->kernel_size); + attrs->data_layout = std::move(conv2d_attrs->data_layout); + attrs->kernel_layout = runtime::String("OHWI"); + attrs->out_layout = std::move(conv2d_attrs->out_layout); + attrs->out_dtype = std::move(conv2d_attrs->out_dtype); + *new_attrs = tvm::Attrs{attrs}; + + IRModule kernel_module; + auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)}); + auto kernel_func = + Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module)); + GlobalVar kernel_var("main"); + kernel_module->Add(kernel_var, kernel_func); + kernel_module = relay::transform::FoldConstant()(kernel_module); + kernel_func = Downcast(kernel_module->Lookup("main")); + return kernel_func->body; + } + + /*! * \brief Performs weight transpose and substitutes existing constants in the composite + * function for Conv2D with CMSIS-NN Requantize constants */ + Expr GenerateConv2dRequantConstants(const Expr& expr) { + const CallNode* clip_call = nullptr; + const CallNode* requantize_call = nullptr; + const CallNode* bias_add_call = nullptr; + const CallNode* conv2d_call = nullptr; + auto* final_call = expr.as(); + auto* final_op = final_call->op.as(); + if (final_op->name == "clip") { + clip_call = final_call; + requantize_call = clip_call->args[0].as(); + } else { + requantize_call = final_call; + } + auto* requantize_input = requantize_call->args[0].as(); + auto* requantize_input_op = requantize_input->op.as(); + if (requantize_input_op->name == "nn.bias_add") { + bias_add_call = requantize_input; + conv2d_call = bias_add_call->args[0].as(); + } else { + conv2d_call = requantize_input; + } + + // Transpose weights: HWIO -> OHWI + auto* conv2d_attrs = conv2d_call->attrs.as(); + tvm::Attrs new_conv2d_attrs; + Expr transposed_kernel = + ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs); + + // Obtain input and output scales from Relay's Requantization + int64_t out_channels = conv2d_attrs->channels.as()->value; + float output_scale = GetScalarFromConstant(requantize_call->args[3]); + auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]); + ICHECK(input_scales.size() == static_cast(out_channels)); + + // Calculate requantization multiplier and shift + Device dev{DLDeviceType::kDLCPU, 0}; + runtime::NDArray multiplier_nda = + runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev); + runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev); + int32_t* multiplier = static_cast(multiplier_nda->data); + int32_t* shift = static_cast(shift_nda->data); + for (int i = 0; i < out_channels; ++i) { + double effective_output_scale = + static_cast(input_scales[i]) / static_cast(output_scale); + std::tie(*(multiplier + i), *(shift + i)) = + tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale); + } + + // Create constants from requantization multiplier and shift + Constant multiplier_const(multiplier_nda); + Constant shift_const(shift_nda); + + // Convert scale scalars into Constants + // Scales are expected as Constants by following passes + Expr weight_scale = conv2d_call->args[5]; + Expr req_inp_scale = requantize_call->args[1]; + if (out_channels == 1) { + runtime::NDArray weight_scale_nda = + runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev); + float* weight_scale_p = static_cast(weight_scale_nda->data); + *weight_scale_p = GetScalarFromConstant(weight_scale); + weight_scale = Constant(weight_scale_nda); + + runtime::NDArray req_inp_scale_nda = + runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev); + float* req_inp_scale_p = static_cast(req_inp_scale_nda->data); + *req_inp_scale_p = GetScalarFromConstant(req_inp_scale); + req_inp_scale = Constant(req_inp_scale_nda); + } + + // Replace existing weights (HWIO) with the transposed ones (OHWI) + // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier + // Substitute Requantize input_zero_point with CMSIS-NN shift + // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc + Array conv2d_args = {conv2d_call->args[0], transposed_kernel, conv2d_call->args[2], + multiplier_const, conv2d_call->args[4], weight_scale}; + Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {}); + if (bias_add_call) { + ret_call = + Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {}); + } + Array requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3], + requantize_call->args[4]}; + ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {}); + if (clip_call) { + ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {}); + } + return ret_call; + } + + Expr Rewrite_(const CallNode* call, const Expr& post) final { + Expr final_call = post; + auto* post_call = post.as(); + if (post_call == nullptr) { + return final_call; + } + + auto* global_var = call->op.as(); + if (global_var) { + // Update to global function call needed because the body changes while + // generating new constants + Function func = Downcast(mod_->Lookup(global_var->name_hint)); + Expr new_body = VisitExpr(func->body); + if (!new_body.same_as(func->body)) { + Function new_func = Function(FreeVars(new_body), new_body, func->ret_type, + FreeTypeVars(new_body, mod_), func->attrs); + mod_->Update(GetRef(global_var), new_func); + final_call = Call(GetRef(global_var), post_call->args); + } + } + + // Recreate composite function and corresponding call + // Updated composite function contains CMSIS-NN quantized multiplier and shift constants + if (call->op.as()) { + auto* func = call->op.as(); + auto func_name = func->GetAttr(attr::kComposite); + if (func_name.defined() && func_name == "cmsisnn.qnn_conv2d") { + Expr new_body = GenerateConv2dRequantConstants(func->body); + Function new_func = Function(FreeVars(new_body), new_body, func->ret_type, + FreeTypeVars(new_body, mod_), func->attrs); + final_call = Call(new_func, post_call->args); + } + } + + return final_call; + } + + private: + IRModule mod_; +}; + +IRModule GenerateConstants(IRModule mod) { + String func_name; + Function func; + + // Introduces CMSIS-NN constants before the call to the external Relay function + auto generate_constants = GenerateConstantsMutator(mod); + Function main_func = Downcast(mod->Lookup("main")); + auto new_main_body = generate_constants.VisitExpr(main_func->body); + if (!new_main_body.same_as(main_func->body)) { + auto main_var = mod->GetGlobalVar("main"); + auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type, + main_func->type_params, main_func->attrs); + mod->Update(main_var, new_main_func); + } + + return mod; +} + +transform::Pass GenerateCMSISNNConstants() { + runtime::TypedPackedFunc pass_func = + [=](IRModule m, transform::PassContext pc) { return GenerateConstants(m); }; + return tvm::transform::CreateModulePass(pass_func, 0, "GenerateCMSISNNConstants", {}); +} + +TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.GenerateCMSISNNConstants").set_body_typed([]() { + return GenerateCMSISNNConstants(); +}); + +} // namespace cmsisnn +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc index 3c3346340f044..c307657f8062b 100644 --- a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc +++ b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc @@ -17,6 +17,7 @@ * specific language governing permissions and limitations * under the License. */ +#include #include #include #include @@ -43,7 +44,7 @@ class RelayToTIRVisitor : public MixedModeVisitor { inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); } void CreatePrimFuncForExtern(Array func_signature, - tvm::Array call_extern_args) { + tvm::Array call_extern_args, int context_buffer_size = 0) { Map dict_attrs; dict_attrs.Set("global_symbol", func_name_); dict_attrs.Set("tir.noalias", Bool(true)); @@ -51,15 +52,145 @@ class RelayToTIRVisitor : public MixedModeVisitor { tir::Stmt body = tir::Evaluate( tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args)); + if (context_buffer_size) { + // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW + tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global")); + body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(), + body); + } + primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map(), DictAttrs(dict_attrs)); } + void EmitConv2D(const Expr& expr) { + const CallNode* clip_call = nullptr; + const CallNode* requantize_call = nullptr; + const CallNode* bias_add_call = nullptr; + const CallNode* conv2d_call = nullptr; + auto* final_call = expr.as(); + auto* final_op = final_call->op.as(); + if (final_op->name == "clip") { + clip_call = final_call; + requantize_call = clip_call->args[0].as(); + } else { + requantize_call = final_call; + } + auto* requantize_input = requantize_call->args[0].as(); + auto* requantize_input_op = requantize_input->op.as(); + if (requantize_input_op->name == "nn.bias_add") { + bias_add_call = requantize_input; + conv2d_call = bias_add_call->args[0].as(); + } else { + conv2d_call = requantize_input; + } + + // TIR variables are created in the order they appear in the Relay partitioned function + // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar, + // %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2) + // %2 = nn.bias_add(%1, %bias_const_3, axis=3) + // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5, + // %output_scale_scalar, %output_zero_point_scalar) + // clip(%3, a_min=%min_scalar, a_max=%max_scalar) + auto in_var = tir::Var("input", DataType::Handle(8)); + auto const_var0 = tir::Var("filter", DataType::Handle(8)); // weight + auto const_var1 = tir::Var("multiplier", DataType::Handle(32)); // quant multiplier + auto const_var2 = tir::Var("filter_scale", DataType::Handle(32)); // weight scale + auto const_var3 = tir::Var("bias", DataType::Handle(32)); // bias + auto const_var4 = tir::Var("input_scale", DataType::Handle(32)); // input_scale * weight_scale + auto const_var5 = tir::Var("shift", DataType::Handle(32)); // quant shift + auto out_var = tir::Var("output", DataType::Handle(8)); + + // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern + // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50 + + // prepare cmsis_nn_conv_params + auto* conv2d_attrs = conv2d_call->attrs.as(); + int32_t input_offset = -GetScalarFromConstant(conv2d_call->args[2]); + int32_t output_offset = GetScalarFromConstant(requantize_call->args[4]); + int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]); + int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]); + int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]); + int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]); + int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]); + int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]); + int32_t clip_min, clip_max; + if (clip_call) { + auto* clip_attrs = clip_call->attrs.as(); + clip_min = clip_attrs->a_min; + clip_max = clip_attrs->a_max; + } else { + clip_min = -128; + clip_max = 127; + } + + // cmsis_nn_dims *input_dims + auto input_shape = conv2d_call->args[0]->type_as()->shape; + int32_t input_n = qnn::get_const_int(input_shape[0]); + int32_t input_h = qnn::get_const_int(input_shape[1]); + int32_t input_w = qnn::get_const_int(input_shape[2]); + int32_t input_c = qnn::get_const_int(input_shape[3]); + + // cmsis_nn_dims *filter_dims (OHWI) + auto filter_shape = conv2d_call->args[1]->type_as()->shape; + int32_t filter_n = qnn::get_const_int(filter_shape[0]); + int32_t filter_h = qnn::get_const_int(filter_shape[1]); + int32_t filter_w = qnn::get_const_int(filter_shape[2]); + int32_t filter_c = qnn::get_const_int(filter_shape[3]); + + // cmsis_nn_dims *bias_dims + int32_t bias_n = 1; + int32_t bias_h = 1; + int32_t bias_w = 1; + int32_t bias_c = qnn::get_const_int(filter_shape[0]); + + // cmsis_nn_dims *output_dims + auto output_shape = conv2d_call->type_as()->shape; + int32_t output_n = qnn::get_const_int(output_shape[0]); + int32_t output_h = qnn::get_const_int(output_shape[1]); + int32_t output_w = qnn::get_const_int(output_shape[2]); + int32_t output_c = qnn::get_const_int(output_shape[3]); + + tvm::Array call_ext_args = {tir::StringImm("arm_convolve_wrapper_s8"), in_var, + const_var0, const_var1}; + if (bias_add_call) { + call_ext_args.push_back(const_var3); + } + call_ext_args.push_back(const_var5); + call_ext_args.push_back(out_var); + + tvm::Array scalar_args = { + ToArg(input_offset), ToArg(output_offset), ToArg(stride_w), ToArg(stride_h), + ToArg(padding_w), ToArg(padding_h), ToArg(dilation_w), ToArg(dilation_h), + ToArg(clip_min), ToArg(clip_max), ToArg(input_n), ToArg(input_h), + ToArg(input_w), ToArg(input_c), ToArg(filter_n), ToArg(filter_h), + ToArg(filter_w), ToArg(filter_c), ToArg(bias_n), ToArg(bias_h), + ToArg(bias_w), ToArg(bias_c), ToArg(output_n), ToArg(output_h), + ToArg(output_w), ToArg(output_c), + }; + + call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args); + + Array func_signature{in_var, const_var0, const_var1, const_var2}; + if (bias_add_call) { + func_signature.push_back(const_var3); + } + func_signature.push_back(const_var4); + func_signature.push_back(const_var5); + func_signature.push_back(out_var); + + // https://github.com/ARM-software/CMSIS_5/blob/d788fd583984388553391de18afd8b4d2a146868/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c#L367 + size_t context_buffer_size = (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t); + + CreatePrimFuncForExtern(func_signature, call_ext_args, context_buffer_size); + } + void EmitSoftMax(const Expr& expr) { auto* quantize_call = expr.as(); auto* softmax_call = quantize_call->args[0].as(); auto* dequant_call = softmax_call->args[0].as(); - const float quant_scale = GetScalarFromConstant(dequant_call->args[1]); + auto* scale_const = dequant_call->args[1].as(); + const float quant_scale = static_cast(scale_const->data->data)[0]; // assuming layout as NHWC auto shape = quantize_call->type_as()->shape; @@ -73,16 +204,15 @@ class RelayToTIRVisitor : public MixedModeVisitor { // calculate multiplier and shift for CMSIS-NN softmax API // Note: TensorFlow Lite Micro assumptions // Output zero point and scale are fixed to -128 and 1 / 256 + // kScaledDiffIntegerBits, kInputBits, kBeta are described on the following github page // https://github.com/tensorflow/tflite-micro/blob/d97cd0908d8cf5021e9d86f05a49888bee28c2a4/tensorflow/lite/micro/kernels/softmax_common.cc#L47 - double beta = 1.0; - int32_t input_bits = 5; - double beta_multiplier = (beta * quant_scale * (1 << (31 - input_bits))); + double beta_multiplier = (kBeta * quant_scale * (1 << (31 - kInputBits))); beta_multiplier = std::min(beta_multiplier, (1ll << 31) - 1.0); auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(beta_multiplier); int32_t mult = std::get<0>(mult_shift_pair); int32_t shift = std::get<1>(mult_shift_pair); - int32_t diff_min = (1 << 5) - 1; - diff_min <<= (31 - 5); + int32_t diff_min = (1 << kScaledDiffIntegerBits) - 1; + diff_min <<= (31 - kScaledDiffIntegerBits); diff_min >>= shift; diff_min *= -1; @@ -223,19 +353,25 @@ class RelayToTIRVisitor : public MixedModeVisitor { auto comp_name = func->GetAttr(attr::kComposite); if (comp_name.defined()) { - if (comp_name == "cmsisnn.quantized_softmax") { + if (comp_name == "cmsisnn.qnn_conv2d") { + EmitConv2D(func->body); + } + if (comp_name == "cmsisnn.qnn_softmax") { EmitSoftMax(func->body); } - if (comp_name == "cmsisnn.quantized_mul") { + if (comp_name == "cmsisnn.qnn_mul") { EmitMul(func->body); } - if (comp_name == "cmsisnn.quantized_add") { + if (comp_name == "cmsisnn.qnn_add") { EmitAdd(func->body); } } } public: + int32_t kScaledDiffIntegerBits = 5; + int32_t kInputBits = 5; + double kBeta = 1.0; String func_name_; tir::PrimFunc primfunc_; }; diff --git a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc index fb612e70311b5..72edb2f2ef8ee 100644 --- a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc +++ b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc @@ -37,8 +37,8 @@ class CodeGenCMSISNN : public CodeGenC { decl_stream << "#include \n"; decl_stream << "#include \n"; decl_stream << "#include \n"; - decl_stream << "#include \n"; decl_stream << "#include \n"; + decl_stream << "#include \n"; CodeGenC::Init(output_ssa); } @@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC { } private: + /*! * \brief Emit the CMSIS-NN context buffer */ + void VisitStmt_(const AllocateNode* op) { + context_buffer_name_ = op->buffer_var->name_hint; + context_buffer_size_ = op->constant_allocation_size(); + CodeGenC::VisitStmt_(op); + } + + /*! * \brief Emits CMSIS-NN APIs for every call_extern */ + void VisitExpr_(const CallNode* op, std::ostream& os) { // NOLINT(*) + if (!op->op.same_as(builtin::call_extern())) { + return; + } + std::string cmsis_func_name = op->args[0].as()->value; + if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" || + cmsis_func_name == "arm_elementwise_add_s8") { + CodeGenC::VisitExpr_(op, os); + } else if (cmsis_func_name == "arm_convolve_wrapper_s8") { + EmitConv2D(op); + } + return; + } + + /*! * \brief Emits cmsis_nn_context struct */ + std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) { + std::string struct_name = "context"; + PrintIndent(); + os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n"; + return struct_name; + } + + /*! * \brief Emits cmsis_nn_conv_params struct */ + std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset, + int32_t stride_w, int32_t stride_h, int32_t padding_w, + int32_t padding_h, int32_t dilation_w, int32_t dilation_h, + int32_t clip_min, int32_t clip_max) { + std::string struct_name = "conv_params"; + PrintIndent(); + os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n"; + PrintIndent(); + os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n"; + PrintIndent(); + os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n"; + PrintIndent(); + os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n"; + PrintIndent(); + os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset + << ", stride, padding, dilation, activation};\n"; + return struct_name; + } + + /*! * \brief Emits cmsis_nn_per_channel_quant_params struct */ + std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier, + std::string shift) { + std::string struct_name = "quant_params"; + PrintIndent(); + os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", " + << shift << "};\n"; + return struct_name; + } + + /*! * \brief Emits cmsis_nn_dims struct */ + std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h, + int32_t w, int32_t c) { + std::string struct_name = tensor_type + "_dims"; + PrintIndent(); + os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c + << "};\n"; + return struct_name; + } + + /*! * \brief Emits CMSIS-NN APIs for every call_extern */ + void EmitConv2D(const CallNode* op) { + static const int max_num_args = 33; + std::string cmsis_func_name = op->args[0].as()->value; + + bool bias_enabled = false; + if (op->args.size() == max_num_args) { + bias_enabled = true; + } + + auto get_var_name = [](const CallNode* op, int id) { + return op->args[id].as()->name_hint.c_str(); + }; + auto get_arg_value = [](const CallNode* op, int id) { + return op->args[id].as()->value; + }; + int arg_id = 0; + std::string input_data = get_var_name(op, ++arg_id); + std::string filter_data = get_var_name(op, ++arg_id); + std::string multiplier = get_var_name(op, ++arg_id); + std::string bias_data("0x0"); + if (bias_enabled) { + bias_data = get_var_name(op, ++arg_id); + } + std::string shift = get_var_name(op, ++arg_id); + std::string output_data = get_var_name(op, ++arg_id); + + int input_offset = get_arg_value(op, ++arg_id); + int output_offset = get_arg_value(op, ++arg_id); + int stride_w = get_arg_value(op, ++arg_id); + int stride_h = get_arg_value(op, ++arg_id); + int padding_w = get_arg_value(op, ++arg_id); + int padding_h = get_arg_value(op, ++arg_id); + int dilation_w = get_arg_value(op, ++arg_id); + int dilation_h = get_arg_value(op, ++arg_id); + int clip_min = get_arg_value(op, ++arg_id); + int clip_max = get_arg_value(op, ++arg_id); + int input_n = get_arg_value(op, ++arg_id); + int input_h = get_arg_value(op, ++arg_id); + int input_w = get_arg_value(op, ++arg_id); + int input_c = get_arg_value(op, ++arg_id); + int filter_n = get_arg_value(op, ++arg_id); + int filter_h = get_arg_value(op, ++arg_id); + int filter_w = get_arg_value(op, ++arg_id); + int filter_c = get_arg_value(op, ++arg_id); + int bias_n = get_arg_value(op, ++arg_id); + int bias_h = get_arg_value(op, ++arg_id); + int bias_w = get_arg_value(op, ++arg_id); + int bias_c = get_arg_value(op, ++arg_id); + int output_n = get_arg_value(op, ++arg_id); + int output_h = get_arg_value(op, ++arg_id); + int output_w = get_arg_value(op, ++arg_id); + int output_c = get_arg_value(op, ++arg_id); + + // TODO(ashutosh-arm) for mve code, need to look for tir allocate + std::string context = EmitCMSISNNContext(stream, context_buffer_name_, context_buffer_size_); + std::string conv_params = + EmitCMSISNNConvParams(stream, input_offset, output_offset, stride_w, stride_h, padding_w, + padding_h, dilation_w, dilation_h, clip_min, clip_max); + std::string quant_params = EmitCMSISNNPerChannelQuantParams(stream, multiplier, shift); + std::string input_dim = EmitCMSISNNDims(stream, "input", input_n, input_h, input_w, input_c); + std::string filter_dim = + EmitCMSISNNDims(stream, "filter", filter_n, filter_h, filter_w, filter_c); + std::string bias_dim = EmitCMSISNNDims(stream, "bias", bias_n, bias_h, bias_w, bias_c); + std::string output_dim = + EmitCMSISNNDims(stream, "output", output_n, output_h, output_w, output_c); + + PrintIndent(); + stream << "arm_status status = "; + stream << cmsis_func_name << "("; + stream << "&" << context << ", "; + stream << "&" << conv_params << ", "; + stream << "&" << quant_params << ", "; + stream << "&" << input_dim << ", " << input_data << ", "; + stream << "&" << filter_dim << ", " << filter_data << ", "; + stream << "&" << bias_dim << ", " << bias_data << ", "; + stream << "&" << output_dim << ", " << output_data << ");\n"; + PrintIndent(); + stream << "if (status != ARM_MATH_SUCCESS) {\n"; + PrintIndent(); + PrintIndent(); + stream << "printf(\"Failed during execution of " << cmsis_func_name << "().\");\n"; + PrintIndent(); + stream << "}\n"; + } + /*! * \brief Creates a cplusplus guard prefix for extern "C" printing */ void PrintExternCPrefix(std::ostringstream& ss) { PrintIndent(); @@ -69,6 +225,10 @@ class CodeGenCMSISNN : public CodeGenC { ss << "}\n"; ss << "#endif\n"; } + + private: + std::string context_buffer_name_ = "Empty"; + int context_buffer_size_ = 0; }; class CMSISNNModuleNode : public runtime::ModuleNode { @@ -105,12 +265,15 @@ class CMSISNNModuleNode : public runtime::ModuleNode { } protected: + /* \brief generated c code */ std::string code_; + /* \brief format in which code is generated */ std::string fmt_; + /* \brief names of tir and c functions */ Array func_names_; }; -static runtime::Module CMSISNNModuleNodeCreate(IRModule mod) { +runtime::Module CMSISNNModuleNodeCreate(IRModule mod) { bool output_ssa = false; CodeGenCMSISNN cg; Array function_names; diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py new file mode 100644 index 0000000000000..3a9b761a85d17 --- /dev/null +++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py @@ -0,0 +1,303 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""CMSIS-NN integration tests: Conv2D""" +import itertools +import numpy as np +import pytest +import tvm +from tvm import relay +from tvm.relay.op.contrib import cmsisnn + + +from tests.python.relay.aot.aot_test_utils import ( + AOTTestModel, + AOT_CORSTONE300_RUNNER, + AOT_DEFAULT_RUNNER, + generate_ref_data, + compile_and_run, +) +from utils import ( + skip_if_no_reference_system, + make_module, + count_num_calls, + get_range_for_dtype_str, + get_same_padding, + get_conv2d_qnn_params, + make_qnn_relu, +) + + +def make_model( + shape, + kernel_shape, + input_zp, + input_sc, + kernel_zp, + kernel_sc, + output_zp, + output_sc, + padding, + strides, + dilation, + groups, + dtype, + kernel_dtype, + out_channels, + weight_format, + enable_bias, + relu_type, +): + """Return a model and any parameters it may have""" + h_index = weight_format.index("H") + w_index = weight_format.index("W") + kernel_h = kernel_shape[h_index] + kernel_w = kernel_shape[w_index] + a = relay.var("in0", shape=shape, dtype=dtype) + p = (0, 0, 0, 0) + if padding == "SAME": + p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides) + a = relay.nn.pad( + a, + pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)], + pad_value=input_zp, + pad_mode="constant", + ) + shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3]) + + weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels) + w = tvm.nd.array( + np.random.randint( + np.iinfo(kernel_dtype).min, + high=np.iinfo(kernel_dtype).max, + size=weight_shape, + dtype=kernel_dtype, + ) + ) + weights = relay.const(w, kernel_dtype) + conv = relay.qnn.op.conv2d( + a, + weights, + input_zero_point=relay.const(input_zp, "int32"), + kernel_zero_point=relay.const(kernel_zp, "int32"), + input_scale=relay.const(input_sc, "float32"), + kernel_scale=relay.const(kernel_sc, "float32"), + kernel_size=(kernel_h, kernel_w), + data_layout="NHWC", + kernel_layout=weight_format, + dilation=dilation, + strides=strides, + groups=groups, + channels=out_channels, + padding=p, + out_dtype="int32", + ) + b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32")) + bc = relay.const(b, "int32") + bias = conv + if enable_bias: + bias = relay.nn.bias_add(conv, bc, axis=3) + requant_input_sc = [sc * input_sc for sc in kernel_sc] + req = relay.qnn.op.requantize( + bias, + relay.const(requant_input_sc, "float32"), + relay.const(0, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + out_dtype=dtype, + ) + relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype) + params = {"w": w, "b": b} + return relu, params + + +@tvm.testing.requires_cmsisnn +@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)]) +@pytest.mark.parametrize("kernel_size", [(3, 3)]) +@pytest.mark.parametrize("padding", ["SAME", "VALID"]) +@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))]) +@pytest.mark.parametrize("enable_bias", [True, False]) +@pytest.mark.parametrize("relu_type", ["NONE", "RELU"]) +@pytest.mark.parametrize( + "in_zp, in_sc, k_sc, out_channels", + [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)], +) +def test_op_int8( + ifm_shape, + kernel_size, + padding, + strides, + dilation, + enable_bias, + relu_type, + in_zp, + in_sc, + k_sc, + out_channels, +): + interface_api = "c" + use_unpacked_api = True + test_runner = AOT_CORSTONE300_RUNNER + + k_zp = 0 + groups = 1 + weight_format = "HWIO" + kernel_h = kernel_size[0] + kernel_w = kernel_size[1] + dtype = "int8" + in_min, in_max = get_range_for_dtype_str(dtype) + + weight_shape = None + if weight_format == "HWIO": + weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) + else: + weight_shape = (kernel_h, kernel_w, ifm_shape[3], out_channels) + + out_sc, out_zp = get_conv2d_qnn_params( + weight_shape, in_sc, in_zp, k_sc, k_zp, dtype, dtype, dtype, False + ) + + model, params = make_model( + ifm_shape, + weight_shape, + in_zp, + in_sc, + k_zp, + k_sc, + out_zp, + out_sc, + padding, + strides, + dilation, + groups, + dtype, + dtype, + out_channels, + weight_format, + enable_bias, + relu_type, + ) + orig_mod = make_module(model) + cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) + + # validate pattern matching + attrs = [ + cmsisnn_mod[var.name_hint].attrs + for var in cmsisnn_mod.get_global_vars() + if cmsisnn_mod[var.name_hint].attrs + ] + assert any(attrs), "At least one function with external attributes was expected." + + compilers = [ + key == "Compiler" and value == "cmsisnn" for attr in attrs for key, value in attr.items() + ] + assert any(compilers), "Module does not contain function for cmsisnn target." + + assert count_num_calls(orig_mod) == count_num_calls( + cmsisnn_mod + ), "Number of calls changed during partitioning" + + # validate the output + np.random.seed(0) + inputs = { + "in0": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8"), + } + output_list = generate_ref_data(orig_mod["main"], inputs, params) + compile_and_run( + AOTTestModel( + module=cmsisnn_mod, + inputs=inputs, + outputs=output_list, + params=params, + output_tolerance=1, + ), + test_runner, + interface_api, + use_unpacked_api, + ) + + +def parameterize_for_invalid_model(test): + in_dtype = ["uint8", "int8"] + kernel_dtype = ["uint8", "int8"] + kernel_zero_point = [-33, 10, 0] + all_combinations = itertools.product(in_dtype, kernel_dtype, kernel_zero_point) + all_combinations = filter( + lambda parameters: not ( + parameters[0] == "int8" and parameters[1] == "int8" and parameters[2] == 0 + ), + all_combinations, + ) + return pytest.mark.parametrize( + ["in_dtype", "kernel_dtype", "kernel_zero_point"], + all_combinations, + )(test) + + +@parameterize_for_invalid_model +def test_invalid_parameters( + in_dtype, + kernel_dtype, + kernel_zero_point, +): + ifm_shape = (1, 28, 28, 12) + out_channels = 2 + in_sc = 1 + in_zp = 24 + k_sc = [0.11, 0.0237] + in_min, in_max = get_range_for_dtype_str(in_dtype) + + kernel_layout = "HWIO" + kernel_shape = [3, 3, ifm_shape[3], out_channels] + out_sc, out_zp = get_conv2d_qnn_params( + kernel_shape, in_sc, in_zp, k_sc, kernel_zero_point, in_dtype, kernel_dtype, in_dtype, False + ) + model, params = make_model( + shape=ifm_shape, + kernel_shape=kernel_shape, + input_zp=in_zp, + input_sc=in_sc, + kernel_zp=kernel_zero_point, + kernel_sc=k_sc, + output_zp=out_zp, + output_sc=out_sc, + padding="SAME", + strides=(1, 1), + dilation=(1, 1), + groups=1, + dtype=in_dtype, + kernel_dtype=kernel_dtype, + out_channels=out_channels, + weight_format=kernel_layout, + enable_bias=True, + relu_type="NONE", + ) + orig_mod = make_module(model) + cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) + + # print(cmsisnn_mod.astext(False)) + # validate pattern matching + attrs = [ + cmsisnn_mod[var.name_hint].attrs + for var in cmsisnn_mod.get_global_vars() + if cmsisnn_mod[var.name_hint].attrs + ] + assert not any(attrs), "No function should have an external attribute." + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/python/contrib/test_cmsisnn/test_networks.py b/tests/python/contrib/test_cmsisnn/test_networks.py index e78b06cbab84f..a42bba0e5dca1 100644 --- a/tests/python/contrib/test_cmsisnn/test_networks.py +++ b/tests/python/contrib/test_cmsisnn/test_networks.py @@ -92,7 +92,6 @@ def test_cnn_small(): orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input") cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) - # validate CMSIS-NN output against CPU output interface_api = "c" use_unpacked_api = True diff --git a/tests/python/contrib/test_cmsisnn/test_softmax.py b/tests/python/contrib/test_cmsisnn/test_softmax.py index b030437252dc5..dcd5b5de0a6f8 100644 --- a/tests/python/contrib/test_cmsisnn/test_softmax.py +++ b/tests/python/contrib/test_cmsisnn/test_softmax.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -"""CMSIS-NN integration tests: softmax""" +"""CMSIS-NN integration tests: Softmax""" import sys import itertools @@ -64,7 +64,7 @@ def make_model( @skip_if_no_reference_system @pytest.mark.parametrize(["zero_point", "scale"], [[33, 0.256], [-64, 0.0128]]) @tvm.testing.requires_cmsisnn -def test_softmax_int8(zero_point, scale): +def test_op_int8(zero_point, scale): interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER @@ -135,7 +135,7 @@ def parameterize_for_invalid_model(test): @parameterize_for_invalid_model @tvm.testing.requires_cmsisnn -def test_invalid_softmax(in_dtype, out_dtype, zero_point, scale, out_zero_point, out_scale): +def test_invalid_parameters(in_dtype, out_dtype, zero_point, scale, out_zero_point, out_scale): model = make_model( [1, 16, 16, 3], in_dtype, out_dtype, zero_point, scale, out_zero_point, out_scale ) diff --git a/tests/python/contrib/test_cmsisnn/utils.py b/tests/python/contrib/test_cmsisnn/utils.py index 3fd12efd3367a..145dbf4b499c0 100644 --- a/tests/python/contrib/test_cmsisnn/utils.py +++ b/tests/python/contrib/test_cmsisnn/utils.py @@ -19,8 +19,10 @@ import platform +import math import numpy as np import pytest +from typing import List, Dict, Optional, Any, Union, Tuple import tvm from tvm import relay @@ -33,7 +35,7 @@ def skip_if_no_reference_system(func): def count_num_calls(mod): - """Count number of CallNode in the IRModule""" + """Counts number of CallNode(s) in the IRModule""" class CallCounter(relay.ExprVisitor): def __init__(self): @@ -54,7 +56,7 @@ def visit_call(self, call): def get_range_for_dtype_str(dtype): """ - Produce the min,max for a give data type. + Produces the min,max for a give data type. Parameters ---------- @@ -77,7 +79,124 @@ def get_range_for_dtype_str(dtype): def make_module(func): - """Create IRModule from Function""" + """Creates IRModule from Function""" func = relay.Function(relay.analysis.free_vars(func), func) mod = tvm.IRModule.from_expr(func) - return relay.transform.InferType()(mod) + mod = relay.transform.InferType()(mod) + return mod + + +def get_same_padding(data, kernel, dilation, stride, cmsisnn_padding=True): + """Provides CMSIS-NN padding when output dim == input dim""" + dilated_kernel_h = dilation[0] * (kernel[0] - 1) + 1 + dilated_kernel_w = dilation[1] * (kernel[1] - 1) + 1 + out = int(math.ceil(float(data[0]) / float(stride[0]))) + pad = max(0, (out - 1) * stride[0] + dilated_kernel_h - data[0]) + pad_top, pad_bottom = (pad, 0) if cmsisnn_padding else (0, pad) + + out = int(math.ceil(float(data[1]) / float(stride[1]))) + pad = max(0, (out - 1) * stride[1] + dilated_kernel_w - data[1]) + pad_left, pad_right = (pad, 0) if cmsisnn_padding else (0, pad) + return [pad_top, pad_left, pad_bottom, pad_right] + + +def get_conv2d_qnn_params( + weight_shape: List[int], + input_scale: float, + input_zp: int, + weights_scale: Union[float, List[float]], + weights_zp: int, + input_dtype: str = "int8", + weights_dtype: str = "int8", + output_dtype: str = "int8", + is_depthwise: bool = False, +) -> Tuple[float, int]: + """ + Calculate the output quantization parameters for convolution based on the input and + weights quantization paramters and the data types. + + Parameters + ---------- + weight_shape : List[int] + shape of the weights + input_scale : float + scale of the input tensor + input_zp : int + zero point of the input tensor + weights_scale : Union[float, List[float]] + scale(s) of the weights tensor + weights_zp : int + zero point of the weights tensor + is_depthwise : bool + whether it is a depthwise convolution + input_dtype : str + data type of the input tensor + weights_dtype : str + data type of the weights tensor + output_dtype : str + data type of the output tensor + + Returns + ------- + output_scale : float + scale of the output tensor + output_zp : int + zero point of the output tensor + """ + input_dtype_min, input_dtype_max = get_range_for_dtype_str(input_dtype) + input_max = input_scale * (input_dtype_max - input_zp) + input_min = input_scale * (input_dtype_min - input_zp) + + weights_dtype_min, weights_dtype_max = get_range_for_dtype_str(weights_dtype) + weights_sc_max = np.max(weights_scale) + weights_max = weights_sc_max * (weights_dtype_max - weights_zp) + + weights_sc_min = np.min(weights_scale) + weights_min = weights_sc_min * (weights_dtype_min - weights_zp) + + weights_h = weight_shape[1] + weights_w = weight_shape[2] + channels = weight_shape[3] + num_elements = weights_h * weights_w * channels + # Adjust the result if it is a depthwise convolution + if is_depthwise: + num_elements = num_elements / channels + + # The smallest and largest possible values in the unquantized output tensor + output_limits = [ + weights_max * input_max * num_elements, + weights_min * input_max * num_elements, + weights_min * input_min * num_elements, + weights_max * input_min * num_elements, + ] + + output_max = max(output_limits) + output_min = min(output_limits) + output_dtype_min, output_dtype_max = get_range_for_dtype_str(output_dtype) + + output_scale = (output_max - output_min) / (output_dtype_max - output_dtype_min) + output_zp = int(output_dtype_min - (output_min / output_scale)) + + return output_scale, output_zp + + +def make_qnn_relu(expr, fused_activation_fn, scale, zero_point, dtype): + """Mimics convert_qnn_fused_activation_function from TFLite frontend""" + quantize = lambda x: float(int(round(x / scale)) + zero_point) + + # Get min/max of the output dtype. This will be used to ensure that clip a_min/a_max are not + # beyond the dtype range. + qmin, qmax = get_range_for_dtype_str(dtype) + + # The input expr is a quantized tensor with its scale and zero point. We calculate the + # suitable clip off points based on these scale and zero point. + if fused_activation_fn == "NONE": + return expr + if fused_activation_fn == "RELU6": + return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0)), a_max=min(qmax, quantize(6.0))) + if fused_activation_fn == "RELU_N1_TO_1": + return tvm.relay.op.clip( + expr, a_min=max(qmin, quantize(-1.0)), a_max=min(qmax, quantize(1.0)) + ) + if fused_activation_fn == "RELU": + return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0.0)), a_max=qmax) diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py index 9d44d8f22f411..1adbec5fe4ddb 100644 --- a/tests/python/driver/tvmc/test_compiler.py +++ b/tests/python/driver/tvmc/test_compiler.py @@ -413,7 +413,7 @@ def test_compile_tflite_module_with_external_codegen_cmsisnn( for name in mlf_package.getnames() if re.match(r"\./codegen/host/src/\D+\d+\.c", name) ] - assert len(c_source_files) == 3 + assert len(c_source_files) == 5 @pytest.mark.skipif(