From 28bc3dd1d721a0920c3487c0b82cd952bb7ced26 Mon Sep 17 00:00:00 2001 From: Ashutosh Parkhi Date: Mon, 29 Nov 2021 11:33:53 +0000 Subject: [PATCH] Code generation for Pooling layers via CMSIS-NN Change-Id: Ibf22250d961a683208faee362d1960ea266347e8 --- python/tvm/relay/op/contrib/cmsisnn.py | 28 ++- .../backend/contrib/cmsisnn/relay_to_tir.cc | 94 ++++++++++ .../backend/contrib/cmsisnn/tir_to_runtime.cc | 93 ++++++++++ .../contrib/test_cmsisnn/test_pooling.py | 172 ++++++++++++++++++ 4 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 tests/python/contrib/test_cmsisnn/test_pooling.py diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index 7bc6267bc6ea6..515cf23638e47 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -146,7 +146,7 @@ def check_qnn_conv2d(pattern): ) def qnn_fully_connected_pattern(): - """Create pattern for qnn.dense with optional relu.""" + """Create pattern for qnn.dense with optional Relu.""" qnn_fc = is_op("qnn.dense")( wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant() ) @@ -188,6 +188,30 @@ def check_qnn_fully_connected(pattern): and kernel_zp == 0 ) + def qnn_avg_pool2d_pattern(): + """Matches average pooling with optional Relu""" + pattern = is_op("cast")(wildcard()) + pattern = is_op("nn.avg_pool2d")(pattern) + pattern = is_op("cast")(pattern) + pattern = pattern.optional(is_op("clip")) + return pattern + + def check_qnn_avg_pool2d(pattern): + """Check if avg pool2d is supported by CMSIS-NN.""" + in_cast = pattern + out_cast = in_cast.args[0].args[0] + return in_cast.checked_type.dtype == "int8" and out_cast.checked_type.dtype == "int32" + + def qnn_max_pool2d_pattern(): + """Matches max pooling with optional Relu""" + pattern = is_op("nn.max_pool2d")(wildcard()) + pattern = pattern.optional(is_op("clip")) + return pattern + + def check_qnn_max_pool2d(pattern): + """Check if max pool2d is supported by CMSIS-NN.""" + return True + def binary_op_pattern(op): """Matches QNN binary operation""" return is_op(f"qnn.{op}")( @@ -211,6 +235,8 @@ def check_qnn_binary_op(extract): return [ ("cmsis-nn.qnn_conv2d", qnn_conv2d_pattern(), check_qnn_conv2d), ("cmsis-nn.qnn_fully_connected", qnn_fully_connected_pattern(), check_qnn_fully_connected), + ("cmsis-nn.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_qnn_avg_pool2d), + ("cmsis-nn.qnn_max_pool2d", qnn_max_pool2d_pattern(), check_qnn_max_pool2d), ("cmsis-nn.qnn_mul", binary_op_pattern("mul"), check_qnn_binary_op), ("cmsis-nn.qnn_add", binary_op_pattern("add"), check_qnn_binary_op), ("cmsis-nn.qnn_softmax", qnn_softmax_pattern(), check_qnn_softmax), diff --git a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc index 09a773a143007..f374d6b3857cd 100644 --- a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc +++ b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc @@ -330,6 +330,97 @@ class RelayToTIRVisitor : public MixedModeMutator { context_buffer_size); } + void EmitPool2D(const GlobalVar& global_var, const Expr& expr, const String pool_name) { + Call clip, pool; + Call final_call = GetRef(expr.as()); + Op final_op = GetRef(final_call->op.as()); + if (final_op->name == "clip") { + clip = final_call; + Call clip_input = GetRef(clip->args[0].as()); + Op clip_input_op = GetRef(clip_input->op.as()); + if (clip_input_op->name == "cast") { + pool = GetRef(clip_input->args[0].as()); + } else { // max_pool2d + pool = clip_input; + } + } else if (final_op->name == "cast") { + pool = GetRef(final_call->args[0].as()); + } else { // max_pool2d + pool = final_call; + } + + // prepare cmsis_nn_pool_params + int32_t stride_h, stride_w, padding_h, padding_w, pool_size_h, pool_size_w; + int32_t clip_min, clip_max; + std::string cmsisnn_api; + if (pool_name == "cmsis-nn.qnn_avg_pool2d") { + cmsisnn_api = "arm_avgpool_s8"; + const AvgPool2DAttrs* attrs = pool->attrs.as(); + stride_h = qnn::get_const_int(attrs->strides[0]); + stride_w = qnn::get_const_int(attrs->strides[1]); + padding_h = qnn::get_const_int(attrs->padding[0]); + padding_w = qnn::get_const_int(attrs->padding[1]); + pool_size_h = qnn::get_const_int(attrs->pool_size[0]); + pool_size_w = qnn::get_const_int(attrs->pool_size[1]); + } else { + cmsisnn_api = "arm_max_pool_s8"; + const MaxPool2DAttrs* attrs = pool->attrs.as(); + stride_h = qnn::get_const_int(attrs->strides[0]); + stride_w = qnn::get_const_int(attrs->strides[1]); + padding_h = qnn::get_const_int(attrs->padding[0]); + padding_w = qnn::get_const_int(attrs->padding[1]); + pool_size_h = qnn::get_const_int(attrs->pool_size[0]); + pool_size_w = qnn::get_const_int(attrs->pool_size[1]); + } + if (clip.defined()) { + const ClipAttrs* clip_attrs = clip->attrs.as(); + clip_min = clip_attrs->a_min; + clip_max = clip_attrs->a_max; + } else { + clip_min = -128; + clip_max = 127; + } + + tvm::Array scalar_args = {ToArg(stride_h), ToArg(stride_w), ToArg(padding_h), + ToArg(padding_w), ToArg(clip_min), ToArg(clip_max)}; + + // cmsis_nn_dims *input_dims + Array input_shape = pool->args[0]->type_as()->shape; + Array cmsisnn_input_shape{1, input_shape[1], input_shape[2], input_shape[3]}; + + // cmsis_nn_dims *filter_dims + Array cmsisnn_filter_shape{1, pool_size_h, pool_size_w, 1}; + + // cmsis_nn_dims *output_dims + Array output_shape = pool->type_as()->shape; + Array cmsisnn_output_shape{1, output_shape[1], output_shape[2], output_shape[3]}; + + tir::Var input("input", DataType::Handle(8)); + tir::Var output("output", DataType::Handle(8)); + tvm::Array call_ext_args = {tir::StringImm(cmsisnn_api), input, output}; + + int context_buffer_size = 0; + std::string context_buffer_name = "NULL"; + if (pool_name == "cmsisnn.qnn_avg_pool2d") { + // TODO(@Mousius): Need to move this into buffer_size calculations + context_buffer_size = qnn::get_const_int(input_shape[3]) * sizeof(int32_t); + context_buffer_name = "context_buffer_" + std::to_string(context_buffer_id_++); + } + tvm::Array context_buffer_args = {tir::StringImm(context_buffer_name), + ToArg(context_buffer_size)}; + + scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args); + scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape); + scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape); + scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape); + call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args); + + Array func_signature{input, output}; + + CreatePrimFuncForExtern(global_var, func_signature, call_ext_args, context_buffer_name, + context_buffer_size); + } + void EmitSoftMax(const GlobalVar& global_var, const Expr& expr) { const CallNode* quantize_call = expr.as(); const CallNode* softmax_call = quantize_call->args[0].as(); @@ -521,6 +612,9 @@ class RelayToTIRVisitor : public MixedModeMutator { if (comp_name == "cmsis-nn.qnn_fully_connected") { EmitFullyConnected(new_global_var, composite_func->body); } + if (comp_name == "cmsis-nn.qnn_avg_pool2d" || comp_name == "cmsis-nn.qnn_max_pool2d") { + EmitPool2D(new_global_var, composite_func->body, comp_name.value()); + } Array args; for (const auto& arg : call->args) { diff --git a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc index 7dff7346528db..2a7d0ae21769b 100644 --- a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc +++ b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc @@ -92,6 +92,15 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost { int shift; }; + struct PoolParams { + int stride_h; + int stride_w; + int padding_h; + int padding_w; + int clip_min; + int clip_max; + }; + /*! * \brief Emits CMSIS-NN APIs for every call_extern */ void VisitExpr_(const CallNode* op, std::ostream& os) { // NOLINT(*) if (!op->op.same_as(builtin::call_extern())) { @@ -107,6 +116,8 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost { EmitConv2D(op); } else if (cmsis_func_name == "arm_fully_connected_s8") { EmitFullyConnected(op); + } else if (cmsis_func_name == "arm_avgpool_s8" || cmsis_func_name == "arm_max_pool_s8") { + EmitPool2D(op); } return; } @@ -160,6 +171,22 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost { return instance_name; } + /*! * \brief Emits cmsis_nn_pool_params struct */ + std::string EmitCMSISNNPoolParams(std::ostream& os, PoolParams params) { + std::string struct_name = "cmsis_nn_pool_params"; + std::string instance_name = "pool_params"; + PrintIndent(); + os << "cmsis_nn_tile stride = {" << params.stride_w << "," << params.stride_h << "};\n"; + PrintIndent(); + os << "cmsis_nn_tile padding = {" << params.padding_w << "," << params.padding_h << "};\n"; + PrintIndent(); + os << "cmsis_nn_activation activation = {" << params.clip_min << "," << params.clip_max + << "};\n"; + PrintIndent(); + os << struct_name << " " << instance_name << " = {stride, padding, activation};\n"; + return instance_name; + } + /*! * \brief Emits cmsis_nn_per_channel_quant_params struct */ std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier, std::string shift) { @@ -234,6 +261,18 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost { return fc_params; } + /*! * \brief extracts CMSIS-NN Pooling parameters from call_extern */ + PoolParams extract_pool_params(const CallNode* op, int base_pos) { + PoolParams pool_params; + pool_params.stride_h = ValueFromArg(op, base_pos); + pool_params.stride_w = ValueFromArg(op, ++base_pos); + pool_params.padding_h = ValueFromArg(op, ++base_pos); + pool_params.padding_w = ValueFromArg(op, ++base_pos); + pool_params.clip_min = ValueFromArg(op, ++base_pos); + pool_params.clip_max = ValueFromArg(op, ++base_pos); + return pool_params; + } + /*! * \brief extracts CMSIS-NN buffer dimensions from call_extern */ CMSISNNDims extract_buffer_dims(const CallNode* op, int base_pos) { CMSISNNDims dims; @@ -383,6 +422,60 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost { PrintIndent(); stream << "}\n"; } + + /*! * \brief Emits CMSIS-NN APIs for every call_extern comprising pooling ops */ + void EmitPool2D(const CallNode* op) { + // Position of various arguments relative to buffers in the call_extern + enum CallExternArgPos { + CONTEXT_BUFFER_POS = 1, + POOL_PARAMS_POS = 3, + INPUT_DIM_POS = 9, + FILTER_DIM_POS = 13, + OUTPUT_DIM_POS = 17, + MAX_NUM_ARGS = 23 + }; + std::string cmsis_func_name = op->args[0].as()->value; + + // extract buffer names from call_extern + int arg_id = 0; + std::string input_data = VarNameFromArg(op, ++arg_id); + std::string output_data = VarNameFromArg(op, ++arg_id); + + // extract CMSIS-NN API parameters + int context_buffer_pos = arg_id + CallExternArgPos::CONTEXT_BUFFER_POS; + int pool_params_pos = arg_id + CallExternArgPos::POOL_PARAMS_POS; + int input_dim_pos = arg_id + CallExternArgPos::INPUT_DIM_POS; + int filter_dim_pos = arg_id + CallExternArgPos::FILTER_DIM_POS; + int output_dim_pos = arg_id + CallExternArgPos::OUTPUT_DIM_POS; + + CMSISNNContextBuffer context_buffer = extract_context_buffer_info(op, context_buffer_pos); + PoolParams pool_params = extract_pool_params(op, pool_params_pos); + CMSISNNDims input_dims = extract_buffer_dims(op, input_dim_pos); + CMSISNNDims filter_dims = extract_buffer_dims(op, filter_dim_pos); + CMSISNNDims output_dims = extract_buffer_dims(op, output_dim_pos); + + std::string context = EmitCMSISNNContext(stream, context_buffer); + std::string cmsisnn_pool_params = EmitCMSISNNPoolParams(stream, pool_params); + std::string input_dim = EmitCMSISNNDims(stream, "input", input_dims); + std::string filter_dim = EmitCMSISNNDims(stream, "filter", filter_dims); + std::string output_dim = EmitCMSISNNDims(stream, "output", output_dims); + + PrintIndent(); + stream << "arm_status status = "; + stream << cmsis_func_name << "("; + stream << "&" << context << ", "; + stream << "&" << cmsisnn_pool_params << ", "; + stream << "&" << input_dim << ", " << input_data << ", "; + stream << "&" << filter_dim << ", "; + stream << "&" << output_dim << ", " << output_data << ");\n"; + PrintIndent(); + stream << "if (status != ARM_MATH_SUCCESS) {\n"; + PrintIndent(); + PrintIndent(); + stream << "return -1;\n"; + PrintIndent(); + stream << "}\n"; + } }; runtime::Module TIRToRuntime(IRModule mod, Target target) { diff --git a/tests/python/contrib/test_cmsisnn/test_pooling.py b/tests/python/contrib/test_cmsisnn/test_pooling.py new file mode 100644 index 0000000000000..7343bfae3dea9 --- /dev/null +++ b/tests/python/contrib/test_cmsisnn/test_pooling.py @@ -0,0 +1,172 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""CMSIS-NN integration tests: Conv2D""" +import itertools +import numpy as np +import pytest +import tvm +from tvm import relay +from tvm.relay.op.contrib import cmsisnn + + +from tests.python.relay.aot.aot_test_utils import ( + AOTTestModel, + AOT_CORSTONE300_RUNNER, + AOT_DEFAULT_RUNNER, + generate_ref_data, + compile_and_run, +) +from utils import ( + skip_if_no_reference_system, + make_module, + count_num_calls, + get_range_for_dtype_str, + get_same_padding, + get_conv2d_qnn_params, + make_qnn_relu, +) + + +def make_model(pool_op, shape, pool_size, strides, padding, dtype, scale, zero_point, relu_type): + """Return a model and any parameters it may have""" + op = relay.var("input", shape=shape, dtype=dtype) + pad_ = (0, 0, 0, 0) + if padding == "SAME": + dilation = (1, 1) + pad_ = get_same_padding((shape[1], shape[2]), pool_size, dilation, strides) + op = relay.nn.pad( + op, + pad_width=[(0, 0), (pad_[0], pad_[2]), (pad_[1], pad_[3]), (0, 0)], + pad_value=zero_point, + pad_mode="constant", + ) + if pool_op == relay.nn.avg_pool2d: + op = relay.cast(op, "int32") + op = pool_op( + op, pool_size=pool_size, strides=strides, padding=pad_, ceil_mode=True, layout="NHWC" + ) + if pool_op == relay.nn.avg_pool2d: + op = relay.cast(op, dtype) + op = make_qnn_relu(op, relu_type, scale, zero_point, dtype) + return op + + +@tvm.testing.requires_cmsisnn +@pytest.mark.parametrize("in_shape", [(1, 28, 28, 12), (1, 64, 100, 4)]) +@pytest.mark.parametrize( + "pool_size, strides, padding", [((3, 3), (2, 2), "SAME"), ((2, 2), (1, 1), "VALID")] +) +@pytest.mark.parametrize("relu_type", ["RELU"]) +@pytest.mark.parametrize("pool_type", [relay.nn.max_pool2d, relay.nn.avg_pool2d]) +@pytest.mark.parametrize("zero_point, scale", [(-34, 0.0256)]) +def test_op_int8( + in_shape, + pool_size, + strides, + padding, + relu_type, + pool_type, + zero_point, + scale, +): + interface_api = "c" + use_unpacked_api = True + test_runner = AOT_CORSTONE300_RUNNER + + dtype = "int8" + + model = make_model( + pool_type, + in_shape, + pool_size, + strides, + padding, + dtype, + scale, + zero_point, + relu_type, + ) + orig_mod = make_module(model) + + cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) + + # validate pattern matching + attrs = [ + cmsisnn_mod[var.name_hint].attrs + for var in cmsisnn_mod.get_global_vars() + if cmsisnn_mod[var.name_hint].attrs + ] + assert any(attrs), "At least one function with external attributes was expected." + + compilers = [ + key == "Compiler" and value == "cmsis-nn" for attr in attrs for key, value in attr.items() + ] + assert any(compilers), "Module does not contain function for cmsisnn target." + + assert count_num_calls(orig_mod) == count_num_calls( + cmsisnn_mod + ), "Number of calls changed during partitioning" + + # validate the output + in_min, in_max = get_range_for_dtype_str(dtype) + np.random.seed(0) + inputs = { + "input": np.random.randint(in_min, high=in_max, size=in_shape, dtype="int8"), + } + output_list = generate_ref_data(orig_mod["main"], inputs) + compile_and_run( + AOTTestModel( + module=cmsisnn_mod, + inputs=inputs, + outputs=output_list, + params=None, + output_tolerance=1, + ), + test_runner, + interface_api, + use_unpacked_api, + ) + + +def test_invalid_parameters(): + model = make_model( + pool_op=relay.nn.avg_pool2d, + shape=(1, 28, 28, 12), + pool_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype="uint8", + scale=1, + zero_point=-33, + relu_type="RELU", + ) + + orig_mod = make_module(model) + cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) + + # validate pattern matching + attrs = [ + cmsisnn_mod[var.name_hint].attrs + for var in cmsisnn_mod.get_global_vars() + if cmsisnn_mod[var.name_hint].attrs + ] + assert not any(attrs), "No function should have an external attribute." + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__] + sys.argv[1:]))