From c124db2dd996f3f0be85ede1daa6df2335a82917 Mon Sep 17 00:00:00 2001 From: stoa Date: Wed, 6 Oct 2021 19:50:37 +0200 Subject: [PATCH] Contributing the STM32 port (#7742) * Contribute apps/stm32 application. * Removed a useless file. * STM32: Use Model Library Format in demo. Added test. * STM32: Added quantized mnist test. * STM32: Fixed apps/stm32/Makefile. * STM32: Added quantized models test. * STM32: Added tests to tests/scripts/task_python_microtvm.sh * STM32: Listed specific files with lint. * STM32: removed external copyright notices. * STM32: Added missing ASF copyright notice. * STM32: style fixes. * STM32: more style fixes. * STM32: fixed liny for C files. * STM32: Does extern C help with cpplint. * STM32: Fixed wrong LINT_C_FILE spelling. * STM32: Still some lint fixes. * STM32: more style. * STM32: More fixes lint+formatting. * STM32: cleanup. * STM32: style cleanup. * STM32: Moved ai_runner to the apps/stm32. * Alignment with PR 7742 * lint cleanup. * STM32: Use crt_backend_api.c with standalone build. * STM32: Fixed the CI test. * STM32: style fixes. * STM32: Removed unused files. * STM32: Moved to crt_backend_api * STM32: style fix. * STM32: style fix. * Revert "STM32: Removed unused files." This reverts commit d72f8e5be3bae3c36da1758b6e61cd39ef7036c7. Undo changes to c_backend_api/c_runtime_api. * Revert "STM32: Moved to crt_backend_api" This reverts commit 6c0e66672cb636d47244e99e8efd893004acc382. Undo changes to the c_backend-api/c_runtime_api. * stm32: aligned to micro TVM structure. * stm32: improved the python style. * stm32: cpplint fixes. * stm32: Fixed the test * stm32: style fixes. * stm32: style fixes. * stm32: style fixes. * stm32: style fixes. --- python/tvm/micro/contrib/stm32/__init__.py | 20 + python/tvm/micro/contrib/stm32/emitter.py | 1366 +++++++++++++++++ .../crt/contrib/stm32/ai_runtime_api.c | 356 +++++ .../crt/contrib/stm32/ai_runtime_api.h | 244 +++ src/runtime/crt/contrib/stm32/crt_config.h | 32 + src/runtime/crt/contrib/stm32/runtime.c | 94 ++ tests/crt/contrib/stm32/Makefile | 88 ++ tests/crt/contrib/stm32/src/main.c | 461 ++++++ tests/micro/stm32/.clang-format | 2 + tests/micro/stm32/conftest.py | 19 + tests/micro/stm32/test_code_emitter.py | 395 +++++ tests/scripts/task_python_microtvm.sh | 2 + 12 files changed, 3079 insertions(+) create mode 100755 python/tvm/micro/contrib/stm32/__init__.py create mode 100644 python/tvm/micro/contrib/stm32/emitter.py create mode 100644 src/runtime/crt/contrib/stm32/ai_runtime_api.c create mode 100644 src/runtime/crt/contrib/stm32/ai_runtime_api.h create mode 100644 src/runtime/crt/contrib/stm32/crt_config.h create mode 100644 src/runtime/crt/contrib/stm32/runtime.c create mode 100644 tests/crt/contrib/stm32/Makefile create mode 100644 tests/crt/contrib/stm32/src/main.c create mode 100644 tests/micro/stm32/.clang-format create mode 100644 tests/micro/stm32/conftest.py create mode 100644 tests/micro/stm32/test_code_emitter.py diff --git a/python/tvm/micro/contrib/stm32/__init__.py b/python/tvm/micro/contrib/stm32/__init__.py new file mode 100755 index 000000000000..80e57f2d3912 --- /dev/null +++ b/python/tvm/micro/contrib/stm32/__init__.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module container of STM32 code generator.""" + +from .emitter import CodeEmitter, get_input_tensor_name, get_output_tensor_name diff --git a/python/tvm/micro/contrib/stm32/emitter.py b/python/tvm/micro/contrib/stm32/emitter.py new file mode 100644 index 000000000000..8453ea78e012 --- /dev/null +++ b/python/tvm/micro/contrib/stm32/emitter.py @@ -0,0 +1,1366 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=line-too-long + +"""Code emission for the STM32 targets.""" + +import contextlib +import json +import os +import re +import shutil +import tarfile +import textwrap + +from datetime import datetime + +import numpy as np + +import tvm +from tvm.contrib import utils + +AI_API_VERSION_MAJOR = 1 +AI_API_VERSION_MINOR = 0 +AI_API_VERSION_MICRO = 0 + +AI_TOOLS_REVISION = "v1" + +DBAR = "=" * 60 + + +def _fix_name(node_name): + """ Replace ':' with '_' in names like 'InputImg:0' """ + return node_name.replace(":", "_") + + +def get_input_tensor_name(node_name): + return _fix_name(node_name) + + +def get_output_tensor_name(node_name, idx): + return _fix_name(node_name) + "_" + str(idx) + + +def _get_node_args_name(node_name): + return _fix_name(node_name) + "_args" + + +def _get_node_arg_types_name(node_name): + return _fix_name(node_name) + "_arg_type_ids" + + +def _get_type_size(dltype): + if dltype in ("uint64", "int64"): + return 8 + if dltype in ("uint32", "int32", "float32"): + return 4 + if dltype in ("uint16", "int16"): + return 2 + if dltype in ("uint8", "int8"): + return 1 + raise ValueError(f"Data type {dltype} is not supported") + + +C_TYPE_TO_DLTYPE = { + "uint64": "kDLUInt, 64, 1", + "int64": "kDLInt, 64, 1", + "float32": "kDLFloat, 32, 1", + "uint32": "kDLUInt, 32, 1", + "int32": "kDLInt, 32, 1", + "uint16": "kDLUInt, 16, 1", + "int16": "kDLInt, 16, 1", + "uint8": "kDLUInt, 8, 1", + "int8": "kDLInt, 8, 1", +} + + +def _get_type_data(dltype): + try: + return C_TYPE_TO_DLTYPE[dltype] + except KeyError: + raise ValueError(f"Data type {dltype} is not supported") + + +def _get_aligned_offset(offset, dltype): + align = _get_type_size(dltype) + if offset % align != 0: + offset = offset + (align - offset % align) + return offset + + +def _get_num_tensor_elts(shape): + size = 1 + for dim in shape: + size = size * dim + return size + + +def _get_tensor_size_bytes(dims, dltype): + size = _get_num_tensor_elts(dims) + return size * _get_type_size(dltype) + + +def _preprocess_code(src): + """ Hack the C code implementing the model. """ + dst = "#include \n" "#include \n\n" + dst = dst + src + return dst + + +class CodeEmitter(object): + """Code emitter class.""" + + DATA_ALIGNMENT_BYTES = 8 + + def __init__(self, include_activations=True, include_inputs=True, include_outputs=True): + """Initialize the Emitter instance. + + Parameters + ---------- + include_activations: + The Emitter allocates the storage for the activations data + and places it in a specific data section. If Falsr, the + main application is responsible for allocating the activations + storage. Default: True. + + include_inputs/include_outputs: + The Emitter allocates the storage for the input/output data. + This storage is shared with the activations and placed in the + specific activations data section. If False, the main + application is responsible for allocating the input/output + data storage. Default: True. + + Returns + ------- + CodeEmitter object. + + """ + + # Static model: activations placed into a nn_data_act section + # Dynamic model: activations need to be malloc'ed by the + # applications. + self.activations_static = include_activations + + # Inputs/outputs may be allocated within the activations or + # separately. + # TODO: Separate the inputs from activations inside TVM. + if include_inputs: + assert ( + self.activations_static == True + ), "###Error: Static inputs are not allowed without activations." + self.inputs_static = include_inputs + + if include_outputs: + assert ( + self.activations_static == True + ), "###Error: Static outputs are not allowed without activations." + self.outputs_static = include_outputs + + # Parsed graph + self._nodes = [] + self._arg_nodes = [] + self._outputs = [] + self._attrs = {} + self._node_row_ptr = [] + + # Parameters + self._params = {} + + # Filled by data_placement() + self._weights = {} + self._activations = {} + self._input_data = {} + self._output_data = {} + self._nodes_size = 0 + self._weights_size = 0 + self._activations_size = 0 + + self._quantization = {} + + def _extract_quantization_info(self, quantization): + """ Build dictionary with quantization infos.""" + + for dl_tensor_name in self._input_data: + if dl_tensor_name in quantization: + self._quantization[dl_tensor_name] = quantization[dl_tensor_name] + + # Matching outputs is more difficult because TVM does not preserve + # output tensor names. + # We only support models with a single output now. + assert len(self._output_data) == 1, "Multiple outputs models are not yet supported." + + for dl_tensor_name in self._output_data: + for name in quantization: + if name not in self._input_data: + self._quantization["output"] = quantization[name] + break + + def _get_node_arg_name(self, arg): + arg_nid = arg[0] + arg_idx = arg[1] + arg_node = self._nodes[arg_nid] + arg_name = self._nodes[arg_nid]["name"] + if arg_node["op"] == "null": + # parameter + dl_tensor_name = get_input_tensor_name(arg_name) + elif arg_node["name"] == "reshape_nop": + # Handle __nop + src = arg_node["inputs"][0] + dl_tensor_name = self._get_node_arg_name(src) + else: + # activation + dl_tensor_name = get_output_tensor_name(arg_name, arg_idx) + return dl_tensor_name + + def _tensor_is_output(self, nid, idx): + for out in self._outputs: + out_nid = out[0] + out_idx = out[1] + if out_nid == nid and out_idx == idx: + return True + return False + + def _get_tensor_from_node(self, nid, idx): + # 'eid' is index into the dltype', 'shape', etc. + eid = self._node_row_ptr[nid] + idx + dltype = self._attrs["dltype"][1][eid] + dims = self._attrs["shape"][1][eid] + storage_id = self._attrs["storage_id"][1][eid] + ndim = len(dims) + size = _get_tensor_size_bytes(dims, dltype) + + tensor = { + "dltype": dltype, + "ndim": ndim, + "dims": dims, + "strides": None, + "storage_id": storage_id, + "byte_offset": 0, + "offset": 0, + "size": size, + } + + return tensor + + def _compute_data_placement(self): + """ Compute inputs, outputs, weight, activation sizes""" + + self._inputs = self._arg_nodes.copy() + + # weights: + offset = 0 + + for key in self._params: + + # First, find the node in graph + nid = 0 + for node in self._nodes: + if node["name"] == key: + break + nid += 1 + + dl_tensor_name = get_input_tensor_name(key) + tensor = self._get_tensor_from_node(nid, 0) + + # Compute the offset + dltype = tensor["dltype"] + aligned_offset = _get_aligned_offset(offset, dltype) + tensor["offset"] = aligned_offset + + for idx in self._arg_nodes: + node = self._nodes[idx] + node_name = node["name"] + if node_name == key: + self._inputs.remove(idx) + + self._weights[dl_tensor_name] = tensor + + # Next offset + offset = aligned_offset + tensor["size"] + + self._weights_size = offset + + # activations: + buffer_list_ = {} + + nid = 0 + for node in self._nodes: + + if node["op"] == "null": + nid += 1 + continue + + if node["op"] != "tvm_op": + raise ValueError(f"Only TVM ops are supported") + + node_name = node["name"] + node_attrs = node["attrs"] + func_name = node_attrs["func_name"] + num_outputs = int(node_attrs["num_outputs"]) + + if func_name == "__nop": + assert node_name == "reshape_nop", f"Unsupported __nop operator {node_name}." + assert num_outputs == 1 + assert not self._tensor_is_output(nid, 0) + nid += 1 + continue + + for idx in range(num_outputs): + + # Do not count the '_outputs' + if self._tensor_is_output(nid, idx): + continue + + dl_tensor_name = get_output_tensor_name(node_name, idx) + tensor = self._get_tensor_from_node(nid, idx) + + # Remember this tensor with the storage id + storage_id = tensor["storage_id"] + if storage_id not in buffer_list_: + buffer_list_[storage_id] = [] + buffer_entry = buffer_list_[storage_id] + buffer_entry.append(tensor) + + self._activations[dl_tensor_name] = tensor + + self._nodes_size = self._nodes_size + 1 + + nid += 1 + + # Compute '_input_data' + offset = 0 + for nid in self._inputs: + node = self._nodes[nid] + node_name = node["name"] + + # Arthur: I suppose that input nodes only have a single + # output dependency + dl_tensor_name = get_input_tensor_name(node_name) + + # This tensor is at some index inside '_input_data' dictionary + # depending on the '_inputs' list order. We refer to this position + # when generating the XXX.h file. + tensor = self._get_tensor_from_node(nid, 0) + + if self.inputs_static: + + # Remember this tensor with the storage id + storage_id = tensor["storage_id"] + if storage_id not in buffer_list_: + buffer_list_[storage_id] = [] + buffer_entry = buffer_list_[storage_id] + buffer_entry.append(tensor) + else: + + # Compute the offset + dltype = tensor["dltype"] + aligned_offset = _get_aligned_offset(offset, dltype) + tensor["offset"] = aligned_offset + + self._input_data[dl_tensor_name] = tensor + + # Next offset + offset = aligned_offset + tensor["size"] + + # Compute '_output_data' + offset = 0 + for output in self._outputs: + nid = output[0] + idx = output[1] + + node = self._nodes[nid] + node_name = node["name"] + + dl_tensor_name = get_output_tensor_name(node_name, idx) + + tensor = self._get_tensor_from_node(nid, idx) + + if self.outputs_static: + + # Remember this tensor with the storage id + storage_id = tensor["storage_id"] + if storage_id not in buffer_list_: + buffer_list_[storage_id] = [] + buffer_entry = buffer_list_[storage_id] + buffer_entry.append(tensor) + else: + + # Compute the offset + dltype = tensor["dltype"] + aligned_offset = _get_aligned_offset(offset, dltype) + tensor["offset"] = aligned_offset + + self._output_data[dl_tensor_name] = tensor + + # Next offset + offset = aligned_offset + tensor["size"] + + # Go over all storage IDs and compute offsets and _activations_size + offset = 0 + for storage_id in buffer_list_: + buffer_entry = buffer_list_[storage_id] + + new_offset = offset + for tensor in buffer_entry: + assert tensor["storage_id"] == storage_id + dltype = tensor["dltype"] + aligned_offset = _get_aligned_offset(offset, dltype) + tensor["offset"] = aligned_offset + size = tensor["size"] + if (aligned_offset + size) > new_offset: + new_offset = aligned_offset + size + offset = new_offset + + self._activations_size = offset + + def _parse_model(self, quantization=None): + """Parse the module. Build internal data structures. + + Parameters + ---------- + module : TVM module or ModuleLibraryFormat object + The module to parse + + quantization: Dictionary + The quantization information for model inputs/outputs. + """ + + for key in self._graph: + if key == "nodes": + self._nodes = self._graph["nodes"] + elif key == "arg_nodes": + self._arg_nodes = self._graph["arg_nodes"] + elif key == "node_row_ptr": + self._node_row_ptr = self._graph["node_row_ptr"] + elif key == "heads": + self._outputs = self._graph["heads"] + elif key == "attrs": + self._attrs = self._graph["attrs"] + elif key == "metadata": + continue + else: + print("### Error: JSON key {} not supported".format(key)) + assert False + + # Build all tensor lists + self._compute_data_placement() + + # Extract quantization info for inputs/outputs + if quantization is not None: + self._extract_quantization_info(quantization) + + def parse_library_format(self, model_library_format_path, quantization=None): + """Parse the module. Build internal data structures. + + Parameters + ---------- + model_library_format_path : + The ModuleLibraryFormat object to parse + + quantization: Dictionary + The quantization information for model inputs/outputs. + """ + + temp_dir = utils.tempdir() + extract_path = temp_dir.relpath("extract") + os.mkdir(extract_path) + with tarfile.TarFile(model_library_format_path) as f: + f.extractall(extract_path) + + # Extract informations from the Model Library Format + graph_file = os.path.join(extract_path, "executor-config", "graph", "graph.json") + with open(graph_file, "r") as f: + # returns JSON object as a dictionary + graph_dict = json.load(f) + + params_dict = {} + param_file = os.path.join(extract_path, "parameters", "default.params") + with open(param_file, "rb") as f: + params = tvm.runtime.load_param_dict(f.read()) + + # Map -> Python Dict + tmp_dict = {} + for (k, v) in params.items(): + tmp_dict[k] = v + + # Sort params for debugging + for k in sorted(tmp_dict.keys()): + params_dict[k] = tmp_dict[k] + + src_dir = os.path.join(extract_path, "codegen", "host", "src") + # List of strings from Model Library Format C files + src_files = [] + for filename in os.listdir(src_dir): + with open(os.path.join(src_dir, filename), "r") as fin: + src = fin.read() + src_files.append(src) + + self._graph = graph_dict + self._params = params_dict + self._lib = src_files + + self._parse_model(quantization) + + def parse_module(self, module, quantization=None): + """Parse the module. Build internal data structures. + + Parameters + ---------- + module : TVM Runtime Module + The module to parse. + + quantization: Dictionary + The quantization information for model inputs/outputs. + """ + + graph = module.get_json() + if not isinstance(graph, (str,)): + try: + graph = graph._tvm_graph_json() + except AttributeError: + raise ValueError("Type %s is not supported" % type(graph)) + + # Sort params for debugging + params_dict = {} + tmp_params = module.get_params() + for k in sorted(tmp_params.keys()): + params_dict[k] = tmp_params[k] + + self._graph = json.loads(graph) + self._params = params_dict + self._lib = module.get_lib() + + self._parse_model(quantization) + + def _emit_params_data(self, name, out_h, out_c): + """ Emits the network_data[c,h] files with parameters.""" + + name_upper = name.upper() + + # XXX_data.h + + out_h.write( + textwrap.dedent( + f"""\ + #ifndef __{name_upper}_DATA_H_ + #define __{name_upper}_DATA_H_ + + #include \"ai_runtime_api.h\" + + AI_API_ENTRY + const ai_ptr ai_{name}_data_weights_get (void); + + #endif /* __{name_upper}_DATA_H_ */ + """ + ) + ) + + # XXX_data.cc + + out_c.write( + textwrap.dedent( + f""" + #include \"{name}_data.h\" + + const ai_ptr ai_{name}_data_weights_get (void) + {{ + AI_ALIGNED({self.DATA_ALIGNMENT_BYTES}) static const __attribute__ ((section(\".nn_weights\"))) uint8_t s_{name}_weights[] = {{ + """ + ) + ) + + # Weights are arranged in the order of 'params_' + offset = 0 + + for key in self._params: + data = self._params[key] # ND Array + npdata = data.asnumpy() + blob = npdata.tobytes() + + out_c.write(f'// "{key}": \n') + out_c.write(f"\t") + + count = 0 + + # Align by emitting garbage between un-aligned data + dl_tensor_name = get_input_tensor_name(key) + tensor = self._weights[dl_tensor_name] + tensor_offset = tensor["offset"] + tensor_size = tensor["size"] + + while offset < tensor_offset: + count += 1 + out_c.write("0x{:02X}, ".format(0)) + if count == 12: + out_c.write("\n\t") + count = 0 + offset += 1 + + for val in blob: + count += 1 + out_c.write("0x{:02X}, ".format(val)) + if count == 12: + out_c.write("\n\t") + count = 0 + + offset += tensor_size + + out_c.write(f"\n") + + out_c.write( + textwrap.dedent( + f"""\ + }}; + return (const ai_ptr)s_{name}_weights; + }} + """ + ) + ) + + def _emit_open(self, name, out_h, out_c): + """Emits the network.h file with a few network defines and + writes the header part of the network.c file.""" + + name_upper = name.upper() + + input_size = len(self._input_data) + output_size = len(self._output_data) + + # XXX.h + + out_h.write( + textwrap.dedent( + f"""\ + #ifndef __AI_{name_upper}_H__ + #define __AI_{name_upper}_H__ + + #include \"ai_runtime_api.h\" + + #define _{name_upper}_INPUTS_COUNT_ ({input_size}) + #define _{name_upper}_OUTPUTS_COUNT_ ({output_size}) + #define _{name_upper}_ACTIVATION_BYTES_ ({self._activations_size}) + """ + ) + ) + + # XXX.c + + out_c.write( + textwrap.dedent( + f"""\ + #include + + #include \"dlpack/dlpack.h\" + #include \"tvm/runtime/c_runtime_api.h\" + #include \"{name}.h\" + #include \"{name}_data.h\" + """ + ) + ) + + def _emit_close(self, name, out_h, out_c): + """ Emits the ai_model_info structure. """ + + name_upper = name.upper() + + # datetime object containing current date and time + now = datetime.now() + # dd/mm/YY H:M:S + dt_string = now.strftime("%d/%m/%Y %H:%M:%S") + + # XXX.h + + out_h.write(f"#endif /*__AI_{name_upper}_H__*/ \n") + + # XXX.c + + if self.activations_static: + out_c.write( + f'AI_ALIGNED({self.DATA_ALIGNMENT_BYTES}) __attribute__ ((section(".{name}.nn_data_act"))) uint8_t {name}_activations[{self._activations_size}];\n' + ) + else: + out_c.write(f"AI_STATIC ai_ptr {name}_activations = NULL;") + + # Emit network structure + num_inputs = len(self._input_data) + num_outputs = len(self._output_data) + + tool_version = tvm.__version__ + api_version = f"{AI_API_VERSION_MAJOR}.{AI_API_VERSION_MINOR}.{AI_API_VERSION_MICRO}.0" + + out_c.write( + textwrap.dedent( + f""" + AI_API_ENTRY __attribute__ ((section(".nn_models"))) ai_model_info {name}_network = {{ + .name = \"{name}\", + .datetime = \"{dt_string}\", + .revision = \"{AI_TOOLS_REVISION}\", + .tool_version = \"{tool_version}\", + .api_version = \"{api_version}\", + .n_nodes = {self._nodes_size}, + .n_inputs = {num_inputs}, + .n_outputs = {num_outputs}, + .activations_size = {self._activations_size}, + .params_size = {self._weights_size}, + .activations = {name}_activations, + .inputs = _InputsList, + .outputs = _OutputsList, + .ai_get_params = &ai_{name}_data_weights_get, + .ai_create = &ai_{name}_create, + .ai_destroy = &ai_{name}_destroy, + .ai_run = &ai_{name}_run + }}; + """ + ) + ) + + def _emit_tensor_shape(self, dl_tensor_name, ndim, shape, strides, out_c): + out_c.write(f"AI_STATIC int64_t {dl_tensor_name}_shape[{ndim}] = {{{shape[1:-1]}}}; \n") + assert strides is None, f"###Error: non-compact tensors are not handled yet." + out_c.write(f"AI_STATIC int64_t {dl_tensor_name}_strides[{ndim}] = {{}}; \n") + + def _emit_tensor_quant(self, dl_tensor_name, out_c): + + if dl_tensor_name in self._quantization: + quantization = self._quantization[dl_tensor_name] + + # At this time, TVM only supports quantization info with + # single output models. + elif dl_tensor_name in self._output_data and "output" in self._quantization.keys(): + quantization = self._quantization["output"] + else: + quantization = None + + if quantization is not None: + scale = quantization["scale"] + zero_point = quantization["zero_point"] + + # Sometimes we get a scalar with ScaleAsNumpy. + # This seem to mean not quantized ? + if not isinstance(scale, np.ndarray): + assert scale == 0.0, f"Non-quantized tensor with scale != 0.0" + assert ( + not isinstance(zero_point, np.ndarray) and zero_point == 0 + ), f"Non-quantized tensor with zero_point != 0" + return None + + scale_size = len(scale) + zero_point_size = len(zero_point) + + assert len(scale) == len( + zero_point + ), f"Inconsistent quantizations scale:{scale} vs zero-point:{zero_point}" + + if len(scale) == 1: + quant_name = dl_tensor_name + "_quant" + + out_c.write(f"AI_STATIC float {quant_name}_scale[{scale_size}] = {{ ") + for val in scale: + out_c.write(f"{val}, ") + out_c.write(f"}};\n") + out_c.write(f"AI_STATIC int32_t {quant_name}_zero_point[{zero_point_size}] = {{ ") + for val in zero_point: + out_c.write(f"{val}, ") + out_c.write(f"}};") + out_c.write( + textwrap.dedent( + f""" + AI_STATIC ai_quantization_info {quant_name} = {{ + .scale = {quant_name}_scale, + .zero_point = {quant_name}_zero_point, + .dim = -1 + }}; + """ + ) + ) + + return quant_name + + return None + + def _emit_tensor_init(self, dl_tensor_name, tensor, out_c): + """ Emits the tensor instantiation code. """ + + dltype = tensor["dltype"] + dims = tensor["dims"] + strides = tensor["strides"] + byte_offset = tensor["byte_offset"] + dtype = _get_type_data(dltype) + ndim = len(dims) + shape = str(dims) + self._emit_tensor_shape(dl_tensor_name, ndim, shape, strides, out_c) + + # Quantization + quant_name = self._emit_tensor_quant(dl_tensor_name, out_c) + + # Contents + # + # TODO: use the 'storage_id': + # " .ctx = {{ {} }}, \n".format(str(storage_id)[1:-1]) + out_c.write( + textwrap.dedent( + f""" + AI_ALIGNED({self.DATA_ALIGNMENT_BYTES}) AI_STATIC ai_tensor {dl_tensor_name} = {{ + .dltensor = {{ + .data = (ai_ptr)(NULL), + .device = {{kDLCPU,0}}, + .ndim = {ndim}, + .dtype = {{{dtype}}}, + .shape = {dl_tensor_name}_shape, + .strides = {dl_tensor_name}_strides, + .byte_offset = {byte_offset} + }}, + """ + ) + ) + + # Figure out quantization, if exists + if quant_name is not None: + out_c.write(f" .quant = &{quant_name} \n") + else: + out_c.write(f" .quant = NULL \n") + out_c.write(f"}}; \n") + + def _emit_activation_buffers(self, name, out_c): + # pylint: disable=unused-argument + """ Emits activation tensors, including inputs/outputs.""" + + out_c.write( + textwrap.dedent( + f"""\ + // + // Inputs: + // + """ + ) + ) + + # shape/buffer + for dl_tensor_name in self._input_data: + tensor = self._input_data[dl_tensor_name] + self._emit_tensor_init(dl_tensor_name, tensor, out_c) + out_c.write(f"\n") + out_c.write(f"\n") + + # tensor + idx = 0 + out_c.write(f"AI_STATIC ai_tensor * _InputsList[] = {{ \n") + for dl_tensor_name in self._input_data: + out_c.write(f" &{dl_tensor_name}, // [{idx}]\n") + idx = idx + 1 + out_c.write(f"}}; \n") + out_c.write(f"\n") + + out_c.write( + textwrap.dedent( + f"""\ + // + // Activations: + // + """ + ) + ) + for dl_tensor_name in self._activations: + tensor = self._activations[dl_tensor_name] + self._emit_tensor_init(dl_tensor_name, tensor, out_c) + out_c.write(f"\n") + + # Outputs: + out_c.write( + textwrap.dedent( + f"""\ + // + // Outputs: + // + """ + ) + ) + for dl_tensor_name in self._output_data: + tensor = self._output_data[dl_tensor_name] + self._emit_tensor_init(dl_tensor_name, tensor, out_c) + out_c.write(f"\n") + out_c.write(f"\n") + + idx = 0 + out_c.write(f"AI_STATIC ai_tensor * _OutputsList[] = {{ \n") + for dl_tensor_name in self._output_data: + out_c.write(f" &{dl_tensor_name}, // [{idx}]\n") + idx = idx + 1 + out_c.write(f"}}; \n") + out_c.write(f"\n") + + def _emit_params_buffers(self, name, out_c): + """ Emits all parameter tensors.""" + + out_c.write( + textwrap.dedent( + f""" + // + // Weights: {name} + // + """ + ) + ) + for dl_tensor_name in self._weights: + tensor = self._weights[dl_tensor_name] + self._emit_tensor_init(dl_tensor_name, tensor, out_c) + out_c.write(f"\n") + + def _emit_network(self, name, out_c): + """ Emits prototypes for the network operator functions.""" + + out_c.write( + textwrap.dedent( + f""" + // + // Network: {name} + // + """ + ) + ) + for node in self._nodes: + if node["op"] == "null": + continue + assert node["op"] == "tvm_op", f"###Error: Only TVM ops are supported." + node_attrs = node["attrs"] + func_name = node_attrs["func_name"] + + if func_name == "__nop": + continue + + out_c.write( + f"TVM_DLL int32_t {func_name}(void * args, void * arg_type_ids, int32_t num_args); \n" + ) + out_c.write(f"\n") + + def _emit_tensor_activation(self, dl_tensor_name, tensor, out_c): + + storage_id = tensor["storage_id"] + offset = tensor["offset"] + out_c.write( + textwrap.indent( + textwrap.dedent( + f""" + // + // {dl_tensor_name}: storage_id:{storage_id} + // + {dl_tensor_name}.dltensor.data = (ai_ptr)(activations + {offset}); + """ + ), + " ", + ) + ) + + def _emit_activation_init(self, name, out_c): + """ Emits buffer initialization code for activation tensors.""" + + out_c.write( + textwrap.dedent( + f""" + // {DBAR} + // {name}_configure_activations + // {DBAR} + AI_STATIC AI_INLINE + ai_status {name}_configure_activations ( + const ai_ptr activations + ) + {{ + if (activations == NULL) {{ + TVMAPISetLastError (\"Non-null activations arena is required for this model.\"); + return AI_STATUS_ERROR; + }} + """ + ) + ) + + # Allocate inputs with the static model + if self.inputs_static: + for dl_tensor_name in self._input_data: + tensor = self._input_data[dl_tensor_name] + self._emit_tensor_activation(dl_tensor_name, tensor, out_c) + + # Prepare activation buffers + for dl_tensor_name in self._activations: + tensor = self._activations[dl_tensor_name] + self._emit_tensor_activation(dl_tensor_name, tensor, out_c) + + # Allocate outputs with the static model + if self.outputs_static: + for dl_tensor_name in self._output_data: + tensor = self._output_data[dl_tensor_name] + self._emit_tensor_activation(dl_tensor_name, tensor, out_c) + + out_c.write( + textwrap.dedent( + f""" + return AI_STATUS_OK; + }} + """ + ) + ) + + def _emit_params_init(self, name, out_c): + """ Emits buffer initialization code for params tensors.""" + + out_c.write( + textwrap.dedent( + f""" + // {DBAR} + // {name}_configure_weights + // {DBAR} + AI_STATIC AI_INLINE + ai_status {name}_configure_weights ( + const ai_ptr weights + ) + {{ + if (weights == NULL) {{ + TVMAPISetLastError(\"Non-null weights arena is required for this model.\"); + return AI_STATUS_ERROR; + }} + """ + ) + ) + + for dl_tensor_name in self._weights: + tensor = self._weights[dl_tensor_name] + offset = tensor["offset"] + out_c.write( + textwrap.indent( + textwrap.dedent( + f"""\ + // + // {dl_tensor_name} + // + {dl_tensor_name}.dltensor.data = (ai_ptr)(weights + {offset}); + """ + ), + " ", + ) + ) + + out_c.write( + textwrap.dedent( + f""" + return AI_STATUS_OK; + }} + """ + ) + ) + + def _emit_init(self, name, out_c): + """ Emits buffer initialization code.""" + + self._emit_activation_init(name, out_c) + self._emit_params_init(name, out_c) + + def _emit_run(self, name, out_h, out_c): + """ Emits the run function code.""" + + out_h.write( + textwrap.dedent( + f""" + AI_API_ENTRY + ai_status ai_{name}_run ( + ai_tensor *inputs[], + ai_tensor *outputs[] + ); + """ + ) + ) + + out_c.write( + textwrap.dedent( + f""" + // {DBAR} + // ai_{name}_run + // {DBAR} + AI_API_ENTRY + ai_status ai_{name}_run ( + ai_tensor *inputs[], + ai_tensor *outputs[] + ) + {{ + """ + ) + ) + + # Execute nodes one by one + nid = 0 + + for node in self._nodes: + node_name = node["name"] + node_name_upper = node_name.upper() + + nid += 1 + + if node["op"] == "null": + continue + + assert node["op"] == "tvm_op", f"###Error: Only TVM ops are supported." + node_attrs = node["attrs"] + func_name = node_attrs["func_name"] + + if func_name == "__nop": + continue + + out_c.write(f" // \n") + out_c.write(f" // {func_name}\n") + out_c.write(f" // \n") + + # Prepare TVM packed function - this is the one called + if name == "__nop": + print(" exec: __nop") + continue + + if name == "__copy": + print(" exec: __copy") + continue + + # Get function from the TVM module + # + # void * args : arg_values.data() + # void * arg_type_ids : arg_tcodes.data() + # int32_t num_args : arg_values.size() + + dl_args_name = _get_node_args_name(node_name) + dl_arg_types_name = _get_node_arg_types_name(node_name) + + num_inputs = len(node["inputs"]) + num_outputs = int(node_attrs["num_outputs"]) + num_args = num_inputs + num_outputs + + out_c.write(f" TVMValue {dl_args_name}[{num_args}]; \n") + out_c.write(f" int32_t {dl_arg_types_name}[{num_args}]; \n") + + curr_idx = 0 + + for arg in node["inputs"]: + dl_tensor_name = self._get_node_arg_name(arg) + # + # If this input is not an activation or a parameter => find the input + # + if dl_tensor_name not in self._weights and dl_tensor_name not in self._activations: + + assert dl_tensor_name in self._input_data, "Tensor {} not registered ?".format( + dl_tensor_name + ) + + input_idx = 0 + for dl_entry_name in self._input_data: + if dl_entry_name == dl_tensor_name: + break + input_idx += 1 + out_c.write( + f" {dl_args_name}[{curr_idx}].v_handle = &inputs[{input_idx}]->dltensor; \n" + ) + else: + out_c.write( + f" {dl_args_name}[{curr_idx}].v_handle = &{dl_tensor_name}.dltensor; \n" + ) + out_c.write(f" {dl_arg_types_name}[{curr_idx}] = kTVMNDArrayHandle; \n") + + curr_idx += 1 + + for idx in range(num_outputs): + dl_tensor_name = get_output_tensor_name(node_name, idx) + + # If this output is not an activation => find the output + if dl_tensor_name not in self._activations: + + assert dl_tensor_name in self._output_data + + output_idx = 0 + for dl_exit_name in self._output_data: + if dl_exit_name == dl_tensor_name: + break + output_idx += 1 + out_c.write( + f" {dl_args_name}[{curr_idx}].v_handle = &outputs[{output_idx}]->dltensor; \n" + ) + else: + out_c.write( + f" {dl_args_name}[{curr_idx}].v_handle = &{dl_tensor_name}.dltensor; \n" + ) + out_c.write(f" {dl_arg_types_name}[{curr_idx}] = kTVMNDArrayHandle; \n") + out_c.write(f"\n") + + curr_idx += 1 + + # call this function + out_c.write( + textwrap.dedent( + f""" + #if (_VERBOSE_ > 0) + printf (\" {func_name} ... \\r\\n\"); + #endif + if ({func_name} ({dl_args_name}, {dl_arg_types_name}, {num_args})) {{ + TVMAPISetLastError("Invalid handle"); + return AI_STATUS_ERROR; + }} + #if (_VERBOSE_ > 0) + printf (\" {func_name} Done.\\r\\n\"); + #endif + """ + ) + ) + out_c.write(f"\n") + out_c.write( + textwrap.dedent( + f""" + return AI_STATUS_OK; + }} + """ + ) + ) + out_c.write(f"\n") + + def _emit_create_destroy(self, name, out_h, out_c): + """ Emits the create/destroy functions.""" + + out_h.write( + textwrap.dedent( + f""" + AI_API_ENTRY + ai_status ai_{name}_create ( + const ai_ptr weights, + const ai_ptr activations + ); + """ + ) + ) + + out_h.write( + textwrap.dedent( + f""" + AI_API_ENTRY + ai_status ai_{name}_destroy (); + """ + ) + ) + + out_c.write( + textwrap.dedent( + f""" + // {DBAR} + // ai_{name}_create + // {DBAR} + AI_API_ENTRY + ai_status ai_{name}_create( + const ai_ptr weights, + const ai_ptr activations + ) + {{ + ai_status status = AI_STATUS_OK; + status = {name}_configure_weights (weights); + if (status != AI_STATUS_OK) {{ + return status; + }} + status = {name}_configure_activations (activations); + if (status != AI_STATUS_OK) {{ + return status; + }} + return AI_STATUS_OK; + }} + """ + ) + ) + + out_c.write( + textwrap.dedent( + f""" + // {DBAR} + // ai_{name}_destroy + // {DBAR} + AI_API_ENTRY + ai_status ai_{name}_destroy () + {{ + return AI_STATUS_OK; + }} + """ + ) + ) + + def emit_code(self, dest_dir, model_name): + """ Emits the C code implementing the model. """ + + # Build the directory structure + if os.path.exists(dest_dir): + raise ValueError(f"emit_code.Error: {dest_dir} exists.") + + # Make a new one + os.makedirs(dest_dir) + + # Fix the model name + model_name = re.sub("[^0-9a-zA-Z_]+", "_", model_name) + model_name = model_name.lower() + + # Write the C code: we can parse the string + if isinstance(self._lib, list): + # List of strings from Model Library Format C files + for idx, src in enumerate(self._lib): + code = _preprocess_code(src) + filename = os.path.join(dest_dir, f"{model_name}_lib{idx}.c") + with open(filename, "w") as fout: + fout.write(code) + else: + # a TVM RuntimeGraphFactory + src = self._lib.get_source(fmt="c") + code = _preprocess_code(src) + filename = os.path.join(dest_dir, f"{model_name}_lib.c") + with open(filename, "w") as fout: + fout.write(code) + + # Save params as binary data + saved_params = tvm.runtime.save_param_dict(self._params) + params_name = os.path.join(dest_dir, model_name + ".params") + with open(params_name, "wb") as f: + f.write(saved_params) + + # Write the .json + graph_name = os.path.join(dest_dir, model_name + ".json") + json_string = json.dumps(self._graph, indent=4) + with open(graph_name, "w") as f: + print(json_string, file=f) + + # emit X_data[c,h] + data_h_name = os.path.join(dest_dir, model_name + "_data.h") + data_c_name = os.path.join(dest_dir, model_name + "_data.c") + model_h_name = os.path.join(dest_dir, model_name + ".h") + model_c_name = os.path.join(dest_dir, model_name + ".c") + + with contextlib.ExitStack() as exit_stack: + + # emit X[c,h] + + data_h = exit_stack.enter_context(open(data_h_name, "w")) + data_c = exit_stack.enter_context(open(data_c_name, "w")) + out_h = exit_stack.enter_context(open(model_h_name, "w")) + out_c = exit_stack.enter_context(open(model_c_name, "w")) + + self._emit_params_data(model_name, data_h, data_c) + + self._emit_open(model_name, out_h, out_c) + self._emit_params_buffers(model_name, out_c) + self._emit_activation_buffers(model_name, out_c) + self._emit_network(model_name, out_c) + + self._emit_init(model_name, out_c) + self._emit_create_destroy(model_name, out_h, out_c) + self._emit_run(model_name, out_h, out_c) + + self._emit_close(model_name, out_h, out_c) diff --git a/src/runtime/crt/contrib/stm32/ai_runtime_api.c b/src/runtime/crt/contrib/stm32/ai_runtime_api.c new file mode 100644 index 000000000000..7d3cdfe12c26 --- /dev/null +++ b/src/runtime/crt/contrib/stm32/ai_runtime_api.c @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ai_runtime_api.c + * \brief The runtime API for the TVM generated C code. + */ + +// LINT_C_FILE + +#include "ai_runtime_api.h" + +#include +#include +#include + +// ======================================================= +// ai_network_t +// ======================================================= + +typedef struct { + ai_model_info* info; + ai_tensor** inputs; + ai_tensor** outputs; + ai_ptr activations; + const char* error; +} ai_network_t; + +// +// .nn_models_info section +// +extern uintptr_t __models_section_start__; +extern uintptr_t __models_section_end__; + +uint32_t _modelsSection_start = (uint32_t)(&__models_section_start__); +uint32_t _modelsSection_end = (uint32_t)(&__models_section_end__); + +// ======================================================= +// Iterator +// ======================================================= +ai_model_iterator ai_model_iterator_begin() { + return _modelsSection_start; // begin() +} + +ai_model_iterator ai_model_iterator_end() { return _modelsSection_end; } + +ai_model_iterator ai_model_iterator_next(ai_model_iterator idx) { + return (idx + sizeof(ai_model_info)); +} + +ai_model_info* ai_model_iterator_value(ai_model_iterator idx) { return (ai_model_info*)idx; } + +// ======================================================= +// ai_create +// ======================================================= +AI_API_ENTRY ai_status ai_create(ai_model_info* nn, ai_ptr activations, ai_handle* handle) { + uint32_t n_inputs = AI_MODEL_n_inputs(nn); + uint32_t n_outputs = AI_MODEL_n_outputs(nn); + + ai_status status = AI_STATUS_OK; + + // + // Create internal network representation + // + ai_network_t* network = (ai_network_t*)malloc(sizeof(ai_network_t)); + + network->info = nn; + + for (int i = 0; i < n_inputs; i++) { + network->inputs = AI_MODEL_inputs(nn); + } + for (int i = 0; i < n_outputs; i++) { + network->outputs = AI_MODEL_outputs(nn); + } + + network->activations = activations; + + network->error = NULL; + + const ai_ptr params = nn->ai_get_params(); + status = nn->ai_create(params, activations); + if (status != AI_STATUS_OK) { + network->error = TVMGetLastError(); + } + + // + // Setup weights and activations + // + *handle = network; + + return status; +} + +// ======================================================= +// ai_destroy +// ======================================================= +AI_API_ENTRY ai_status ai_destroy(ai_handle handle) { + if (handle == NULL) { + return AI_STATUS_ERROR; + } + + ai_network_t* network = (ai_network_t*)handle; + + free(network); + + return AI_STATUS_OK; +} + +// ======================================================= +// ai_get_error +// ======================================================= +AI_API_ENTRY +const char* ai_get_error(ai_handle handle) { + if (handle == NULL) { + return "Network handle is NULL"; + } + ai_network_t* network = (ai_network_t*)handle; + if (network->error == NULL) { + return ""; + } + return network->error; +} + +// ======================================================= +// ai_get_input_size +// ======================================================= +AI_API_ENTRY int32_t ai_get_input_size(ai_handle handle) { + if (handle == NULL) { + return 0; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_n_inputs(network->info); +} + +// ======================================================= +// ai_get_output_size +// ======================================================= +AI_API_ENTRY int32_t ai_get_output_size(ai_handle handle) { + if (handle == NULL) { + return 0; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_n_outputs(network->info); +} + +// ======================================================= +// ai_get_input +// ======================================================= +AI_API_ENTRY ai_tensor* ai_get_input(ai_handle handle, int32_t index) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + if (index >= AI_MODEL_n_inputs(network->info)) { + network->error = "Input index out of range"; + return NULL; + } + return (network->inputs)[index]; +} + +// ======================================================= +// ai_get_output +// ======================================================= +AI_API_ENTRY ai_tensor* ai_get_output(ai_handle handle, int32_t index) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + if (index >= AI_MODEL_n_outputs(network->info)) { + network->error = "Output index out of range"; + return NULL; + } + return (network->outputs)[index]; +} + +// ======================================================= +// ai_run +// ======================================================= +AI_API_ENTRY ai_status ai_run(ai_handle handle) { + if (handle == NULL) { + return AI_STATUS_ERROR; + } + ai_network_t* network = (ai_network_t*)handle; + + ai_model_info* nn = network->info; + + uint32_t n_inputs = AI_MODEL_n_inputs(nn); + uint32_t n_outputs = AI_MODEL_n_outputs(nn); + ai_status status = AI_STATUS_OK; + + // + // Check that input tensors have been specified + // + uint32_t i; + for (i = 0; i < n_inputs; i++) { + ai_tensor* input_tensor = network->inputs[i]; + DLTensor* input = &input_tensor->dltensor; + if (input->data == NULL) { + network->error = "Network input NULL"; + return AI_STATUS_ERROR; + } + } + for (i = 0; i < n_outputs; i++) { + ai_tensor* output_tensor = network->outputs[i]; + DLTensor* output = &output_tensor->dltensor; + if (output->data == NULL) { + network->error = "Network output NULL"; + return AI_STATUS_ERROR; + } + } + + status = nn->ai_run(network->inputs, network->outputs); + + if (status != AI_STATUS_OK) { + const char* err = TVMGetLastError(); + network->error = err; + } + + return status; +} + +// ======================================================= +// ai_get_name +// ======================================================= +const char* ai_get_name(ai_handle handle) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_name(network->info); +} + +// ======================================================= +// ai_get_datetime +// ======================================================= +const char* ai_get_datetime(ai_handle handle) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_datetime(network->info); +} + +// ======================================================= +// ai_get_revision +// ======================================================= +const char* ai_get_revision(ai_handle handle) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_revision(network->info); +} + +// ======================================================= +// ai_get_tool_version +// ======================================================= +const char* ai_get_tool_version(ai_handle handle) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_tool_version(network->info); +} + +// ======================================================= +// ai_get_api_version +// ======================================================= +const char* ai_get_api_version(ai_handle handle) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_api_version(network->info); +} + +// ======================================================= +// ai_get_node_size +// ======================================================= +uint32_t ai_get_node_size(ai_handle handle) { + if (handle == NULL) { + return 0; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_n_nodes(network->info); +} + +// ======================================================= +// ai_get_activations_size +// ======================================================= +uint32_t ai_get_activations_size(ai_handle handle) { + if (handle == NULL) { + return 0; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_activations_size(network->info); +} + +// ======================================================= +// ai_get_params_size +// ======================================================= +uint32_t ai_get_params_size(ai_handle handle) { + if (handle == NULL) { + return 0; + } + ai_network_t* network = (ai_network_t*)handle; + return AI_MODEL_params_size(network->info); +} + +// ======================================================= +// ai_get_activations +// ======================================================= +ai_ptr ai_get_activations(ai_handle handle) { + if (handle == NULL) { + return 0; + } + ai_network_t* network = (ai_network_t*)handle; + return network->activations; +} + +// ======================================================= +// ai_get_params +// ======================================================= +const ai_ptr ai_get_params(ai_handle handle) { + if (handle == NULL) { + return NULL; + } + ai_network_t* network = (ai_network_t*)handle; + return network->info->ai_get_params(); +} + +// ======================================================= +// ai_get_quantization +// ======================================================= +const ai_quantization_info* ai_get_quantization(ai_tensor* tensor) { + if (tensor == NULL) { + return NULL; + } + return tensor->quant; +} diff --git a/src/runtime/crt/contrib/stm32/ai_runtime_api.h b/src/runtime/crt/contrib/stm32/ai_runtime_api.h new file mode 100644 index 000000000000..10056fde7c10 --- /dev/null +++ b/src/runtime/crt/contrib/stm32/ai_runtime_api.h @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ai_runtime_api.h + * \brief The runtime API for the TVM generated C code. + */ + +#ifndef TVM_RUNTIME_CRT_CONTRIB_STM32_AI_RUNTIME_API_H_ +#define TVM_RUNTIME_CRT_CONTRIB_STM32_AI_RUNTIME_API_H_ + +#include +#include +#include + +#include "dlpack/dlpack.h" // From TVM +#include "tvm/runtime/c_runtime_api.h" // From TVM + +// +// This describes current ai_runtime version +// +#define AI_PLATFORM_RUNTIME_MAJOR 1 +#define AI_PLATFORM_RUNTIME_MINOR 0 +#define AI_PLATFORM_RUNTIME_MICRO 0 + +#define AI_STATIC static + +#if defined(_MSC_VER) +#define AI_INLINE __inline +#define AI_API_ENTRY __declspec(dllexport) +#define AI_ALIGNED(x) /* AI_ALIGNED(x) */ +#elif defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) +#define AI_INLINE inline +#define AI_API_ENTRY /* AI_API_ENTRY */ +#define AI_ALIGNED(x) AI_CONCAT(AI_ALIGNED_, x) +#elif defined(__CC_ARM) +#define AI_INLINE __inline +#define AI_API_ENTRY __attribute__((visibility("default"))) +#define AI_ALIGNED(x) __attribute__((aligned(x))) +/* Keil disallows anonymous union initialization by default */ +#pragma anon_unions +#elif defined(__GNUC__) +#define AI_INLINE __inline +#define AI_API_ENTRY __attribute__((visibility("default"))) +#define AI_ALIGNED(x) __attribute__((aligned(x))) +#else +/* Dynamic libraries are not supported by the compiler */ +#define AI_API_ENTRY /* AI_API_ENTRY */ +#define AI_ALIGNED(x) /* AI_ALIGNED(x) */ +#endif + +/*********************************************************/ + +typedef void* ai_handle; + +#define AI_HANDLE_PTR(ptr_) ((ai_handle)(ptr_)) +#define AI_HANDLE_NULL AI_HANDLE_PTR(NULL) + +typedef uint8_t* ai_ptr; + +typedef enum { AI_STATUS_OK = 0, AI_STATUS_ERROR = 1, AI_STATUS_DELEGATE_ERROR = 2 } ai_status; + +// ======================================================= +// ai_quantization_info +// +// Parameters for asymmetric quantization across a dimension (i.e +// per output channel quantization). +// quantized_dimension specifies which dimension the scales and +// zero_points correspond to. +// For a particular value in quantized_dimension, quantized values +// can be converted back to float using: +// real_value = scale * (quantized_value - zero_point) +// ======================================================= + +typedef struct { + /*! + * \brief The quantization info, if quantized + */ + float* scale; + int32_t* zero_point; + int32_t dim; +} ai_quantization_info; + +// ======================================================= +// ai_tensor +// ======================================================= + +typedef struct { + /*! + * \brief The TVM tensor. + */ + DLTensor dltensor; + /*! + * \brief The quantization info, if quantized + */ + ai_quantization_info* quant; +} ai_tensor; + +// ======================================================= +// get_dltensor +// ======================================================= +AI_STATIC AI_INLINE DLTensor* get_dltensor(ai_tensor* tensor) { return &tensor->dltensor; } + +// ======================================================= +// get_tensor_elts +// ======================================================= +AI_STATIC AI_INLINE uint32_t get_tensor_elts(const ai_tensor* tensor) { + const DLTensor* t = &tensor->dltensor; + uint32_t elts = 1; + for (int i = 0; i < t->ndim; ++i) { + elts *= t->shape[i]; + } + return elts; +} + +// ======================================================= +// get_tensor_size +// ======================================================= +AI_STATIC AI_INLINE uint32_t get_tensor_size(const ai_tensor* tensor) { + const DLTensor* t = &tensor->dltensor; + uint32_t size = 1; + for (int i = 0; i < t->ndim; ++i) { + size *= t->shape[i]; + } + size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + return size; +} + +// ======================================================= +// ai_network_info +// ======================================================= + +typedef struct { + const char* name; + const char* datetime; + const char* revision; + const char* tool_version; + const char* api_version; + uint16_t n_nodes; + uint8_t n_inputs; + uint8_t n_outputs; + uint32_t activations_size; + uint32_t params_size; + ai_ptr activations; + ai_tensor** inputs; + ai_tensor** outputs; + const ai_ptr (*ai_get_params)(void); + ai_status (*ai_create)(const ai_ptr weights, const ai_ptr activations); + ai_status (*ai_destroy)(); + ai_status (*ai_run)(ai_tensor* input[], ai_tensor* output[]); +} ai_model_info; + +#define AI_MODEL_name(x) (x->name) +#define AI_MODEL_datetime(x) (x->datetime) +#define AI_MODEL_revision(x) (x->revision) +#define AI_MODEL_tool_version(x) (x->tool_version) +#define AI_MODEL_api_version(x) (x->api_version) +#define AI_MODEL_n_nodes(x) (x->n_nodes) +#define AI_MODEL_n_inputs(x) (x->n_inputs) +#define AI_MODEL_n_outputs(x) (x->n_outputs) +#define AI_MODEL_activations_size(x) (x->activations_size) +#define AI_MODEL_params_size(x) (x->params_size) +#define AI_MODEL_inputs(x) (x->inputs) +#define AI_MODEL_outputs(x) (x->outputs) +#define AI_MODEL_activations(x) (x->activations) + +// ======================================================= +// Iterator +// +// Usage: +// +// for (ai_models_iterator it = ai_models_iterator_begin(); +// it != ai_models_iterator_end(); +// it = ai_models_iterator_next(it)) { +// const char * name = ai_models_iterator_value(it); +// } +// +// ======================================================= + +typedef uint32_t ai_model_iterator; + +ai_model_iterator ai_model_iterator_begin(); +ai_model_iterator ai_model_iterator_next(ai_model_iterator it); +ai_model_iterator ai_model_iterator_end(); +ai_model_info* ai_model_iterator_value(ai_model_iterator it); + +// ======================================================= +// External Interface +// ======================================================= + +ai_status ai_create(ai_model_info* nn, ai_ptr activations, ai_handle* handle); + +ai_status ai_destroy(ai_handle handle); + +const char* ai_get_error(ai_handle handle); + +int32_t ai_get_input_size(ai_handle handle); + +int32_t ai_get_output_size(ai_handle handle); + +ai_tensor* ai_get_input(ai_handle handle, int32_t index); + +ai_tensor* ai_get_output(ai_handle handle, int32_t index); + +ai_status ai_run(ai_handle handle); + +// +// Additional methods +// +const char* ai_get_name(ai_handle handle); +const char* ai_get_datetime(ai_handle handle); +const char* ai_get_revision(ai_handle handle); +const char* ai_get_tool_version(ai_handle handle); +const char* ai_get_api_version(ai_handle handle); + +uint32_t ai_get_node_size(ai_handle handle); +uint32_t ai_get_activations_size(ai_handle handle); +uint32_t ai_get_params_size(ai_handle handle); + +ai_ptr ai_get_activations(ai_handle handle); +const ai_ptr ai_get_params(ai_handle handle); + +// +// Quantization +// +const ai_quantization_info* ai_get_quantization(ai_tensor* tensor); + +#endif // TVM_RUNTIME_CRT_CONTRIB_STM32_AI_RUNTIME_API_H_ diff --git a/src/runtime/crt/contrib/stm32/crt_config.h b/src/runtime/crt/contrib/stm32/crt_config.h new file mode 100644 index 000000000000..dc583a6fee53 --- /dev/null +++ b/src/runtime/crt/contrib/stm32/crt_config.h @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/runtime/crt_config.h.template + * \brief Template for CRT configuration, to be modified on each target. + */ +#ifndef TVM_RUNTIME_CRT_CONTRIB_STM32_CRT_CONFIG_H_ +#define TVM_RUNTIME_CRT_CONTRIB_STM32_CRT_CONFIG_H_ + +#include + +/*! Log level of the CRT runtime */ +#define TVM_CRT_LOG_LEVEL TVM_CRT_LOG_LEVEL_DEBUG + +#endif // TVM_RUNTIME_CRT_CONTRIB_STM32_CRT_CONFIG_H_ diff --git a/src/runtime/crt/contrib/stm32/runtime.c b/src/runtime/crt/contrib/stm32/runtime.c new file mode 100644 index 000000000000..4583eb3c8eca --- /dev/null +++ b/src/runtime/crt/contrib/stm32/runtime.c @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file runtime.c + * \brief A minimal "C" runtime support required by the TVM + * generated C code. Declared in "runtime/c_backend_api.h" + * and "runtime/c_runtime_api.h" + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static char* g_last_error = NULL; + +// ==================================================== +// TVMPlatformMemoryAllocate +// ==================================================== +tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { +#ifdef __arm__ + *out_ptr = malloc(num_bytes); +#else // _x86_ + *out_ptr = malloc(num_bytes); +#endif + return (*out_ptr == NULL) ? kTvmErrorPlatformNoMemory : kTvmErrorNoError; +} + +// ==================================================== +// TVMPlatformMemoryFree +// ==================================================== +tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { + free(ptr); + return kTvmErrorNoError; +} + +// ==================================================== +// TVMFuncRegisterGlobal +// ==================================================== +int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { return -1; } + +// ==================================================== +// TVMPlatformAbort +// ==================================================== +void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code) { + for (;;) { + } +} + +// ==================================================== +// TVMLogf +// ==================================================== +void TVMLogf(const char* msg, ...) { return; } + +// ==================================================== +// TVMAPISetLastError +// ==================================================== +void TVMAPISetLastError(const char* msg) { + if (g_last_error) { + free(g_last_error); + } + uint32_t nbytes = strlen(msg) + 1; + g_last_error = malloc(nbytes); + snprintf(g_last_error, nbytes, "%s", msg); +} + +// ==================================================== +// TVMGetLastError +// ==================================================== +const char* TVMGetLastError(void) { + assert(g_last_error); + return g_last_error; +} diff --git a/tests/crt/contrib/stm32/Makefile b/tests/crt/contrib/stm32/Makefile new file mode 100644 index 000000000000..293e0e7c7793 --- /dev/null +++ b/tests/crt/contrib/stm32/Makefile @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ifndef TVM_PATH +$(error TVM_PATH must be set and point at your TVM installation) +endif + +ifndef MODEL_PATH +$(error MODEL_PATH must be set and point at your model implementation) +endif + +ifndef BUILD_PATH +$(error BUILD_PATH must be set and point at where your models are built) +endif + +ifndef IMAGE_PATH +$(error IMAGE_PATH must be set and point at where your images are stored) +endif + +SRC_PATH = $(TVM_PATH)/tests/crt/contrib/stm32/src +TVM_CRT_PATH = $(TVM_PATH)/src/runtime/crt/common +STM32_RUNTIME_PATH = $(TVM_PATH)/src/runtime/crt/contrib/stm32 + +# +# Model sources +# +C_SOURCES := $(wildcard ${MODEL_PATH}/*.c) + +# +# TVM sources +# +C_SOURCES += $(TVM_CRT_PATH)/crt_backend_api.c +C_SOURCES += $(STM32_RUNTIME_PATH)/runtime.c +C_SOURCES += $(STM32_RUNTIME_PATH)/ai_runtime_api.c + +# +# Application sources +# +C_SOURCES += $(SRC_PATH)/main.c + +vpath %.c $(sort $(dir $(C_SOURCES))) + +# +# Build +# + +BUILD_DIR = $(MODEL_PATH) + +TARGET = network.exe + +OBJECTS = $(addprefix $(BUILD_DIR)/,$(notdir $(C_SOURCES:.c=.o))) + +CXX = gcc -m32 -g + +DEFINES = +INCLUDES = -I$(TVM_PATH)/3rdparty/dlpack/include -I$(TVM_PATH)/include -I$(STM32_RUNTIME_PATH) + +CFLAGS = $(DEFINES) $(INCLUDES) +LDFLAGS = -lm + +all: $(BUILD_DIR)/$(TARGET) + +$(BUILD_DIR)/$(TARGET): $(OBJECTS) + $(CXX) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +$(BUILD_DIR)/main.o: main.c + $(CXX) -DBUILD_PATH=\"$(BUILD_PATH)\" -DIMAGE_PATH=\"$(IMAGE_PATH)\"$(CFLAGS) -I$(MODEL_PATH) -c $< -o $@ + +$(BUILD_DIR)/%.o: %.c + $(CXX) $(CFLAGS) -c $< -o $@ + +clean: + rm $(BUILD_DIR)/*.o + rm $(BUILD_DIR)/$(TARGET) diff --git a/tests/crt/contrib/stm32/src/main.c b/tests/crt/contrib/stm32/src/main.c new file mode 100644 index 000000000000..a124ab4441ea --- /dev/null +++ b/tests/crt/contrib/stm32/src/main.c @@ -0,0 +1,461 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ai_runtime_api.h" +#include "network.h" +#include "network_data.h" + +// +// Network that we are testing +// +extern ai_model_info network_network; + +// +// Dummy: for the runtime +// +uint32_t __models_section_start__ = (uint32_t)&network_network; +uint32_t __models_section_end__ = (uint32_t)&network_network + sizeof(ai_model_info); + +static ai_model_info* _model_p = &network_network; + +// +// Global handle to reference the instantiated NN +// +static ai_handle _network = AI_HANDLE_NULL; + +static uint8_t LoadInputImg(const char* filename, ai_tensor* input); +static int32_t quantize_val(float val, ai_quantization_info* quant); +static float dequantize_val(int32_t val, ai_quantization_info* quant); + +// ================================================================= +// Convert_Fixed_To_Float +// ================================================================= +static float Convert_Fixed_To_Float(uint8_t data, int8_t fl) { + uint8_t val = data; + float x; + if (fl >= 0) { + x = ((float)val) / (float)(1 << fl); // NOLINT + } else { + x = ((float)val) / (1 / (float)(1 << fl)); // NOLINT + } + return x; +} + +// ======================================================= +// error +// ======================================================= +static void error(const char* fmt, ...) { + va_list vp; + char emsg[512]; + int32_t loc = 0; + + // + // Prepare main error message: + // + va_start(vp, fmt); + loc += vsprintf(&emsg[loc], fmt, vp); + va_end(vp); + + // fputs (emsg, stderr); + // fflush (stderr); + + fprintf(stderr, " #### Error: %s.\n", emsg); + + exit(-1); +} + +// ================================================== +// aiLogErr +// ================================================== +static void aiLogErr(const char* fct, const char* msg) { + if (fct) { + printf("E: AI error: %s - %s\r\n", fct, msg); + } else { + printf("E: AI error - %s\r\n", msg); + } +} + +// ================================================== +// aiPrintLayoutBuffer +// ================================================== +static void aiPrintLayoutBuffer(const char* msg, int idx, ai_tensor* tensor) { + DLTensor* dltensor = get_dltensor(tensor); + DLDataType dtype = dltensor->dtype; + + printf("%s[%d] ", msg, idx); + printf(" (%u, %u, %u)", dtype.code, dtype.bits, dtype.lanes); + // + // Quantization info exists for input/output tensors + // + const ai_quantization_info* quant = ai_get_quantization(tensor); + if (quant != NULL) { + printf(" -- TODO: quantization info \n"); + } + + int32_t size = get_tensor_size(tensor); + printf(" %d bytes, shape=(", size); + for (int i = 0; i < dltensor->ndim; ++i) { + printf("%d,", (int32_t)dltensor->shape[i]); + } + printf("), address = 0x%08x\r\n", (unsigned int)dltensor->data); +} + +// ================================================== +// aiPrintNetworkInfo +// ================================================== +static void aiPrintNetworkInfo(ai_handle network) { + const char* name = ai_get_name(network); + const char* datetime = ai_get_datetime(network); + const char* revision = ai_get_revision(network); + const char* tool_version = ai_get_tool_version(network); + const char* api_version = ai_get_api_version(network); + + uint32_t n_nodes = ai_get_node_size(network); + uint32_t n_inputs = ai_get_input_size(network); + uint32_t n_outputs = ai_get_output_size(network); + + uint32_t activations_size = ai_get_activations_size(network); + uint32_t params_size = ai_get_params_size(network); + + printf("Network configuration...\r\n"); + printf(" Model name : %s\r\n", name); + printf(" Compile datetime : %s\r\n", datetime); + printf(" Tool revision : %s (%s)\r\n", revision, tool_version); + printf(" API version : %s\r\n", api_version); + printf("Network info...\r\n"); + printf(" nodes : %d\r\n", n_nodes); + printf(" activation : %d bytes\r\n", activations_size); + printf(" params : %d bytes\r\n", params_size); + printf(" inputs/outputs : %u/%u\r\n", n_inputs, n_outputs); +} + +// ====================================================== +// aiInit +// ====================================================== +static int aiInit(void) { + ai_status err = AI_STATUS_OK; + + const char* nn_name = AI_MODEL_name(_model_p); + ai_ptr built_in_activations = AI_MODEL_activations(_model_p); + + // + // Creating the network + // + printf("Creating the network \"%s\"..\r\n", nn_name); + + err = ai_create(_model_p, built_in_activations, &_network); + if (err != AI_STATUS_OK) { + const char* msg = ai_get_error(_network); + aiLogErr("ai_create", msg); + return -1; + } + + // + // Query the created network to get relevant info from it + // + aiPrintNetworkInfo(_network); + + uint32_t n_inputs = ai_get_input_size(_network); + uint32_t n_outputs = ai_get_output_size(_network); + uint32_t activations_size = ai_get_activations_size(_network); + uint32_t params_size = ai_get_params_size(_network); + + const ai_ptr params = ai_get_params(_network); + ai_ptr activations = ai_get_activations(_network); + + printf("Weights buffer : 0x%08x %d bytes)\r\n", (unsigned int)params, + (unsigned int)params_size); + printf("Activation buffer : 0x%08x (%d bytes) %s\r\n", (unsigned int)activations, + (unsigned int)activations_size, + ((uint32_t)activations & (uint32_t)0xFF000000) ? "internal" : "external"); + + printf("Inputs:\r\n"); + for (int i = 0; i < n_inputs; i++) { + ai_tensor* input = ai_get_input(_network, i); + aiPrintLayoutBuffer(" I", i, input); + } + + printf("Outputs:\r\n"); + for (int i = 0; i < n_outputs; i++) { + ai_tensor* output = ai_get_output(_network, i); + aiPrintLayoutBuffer(" O", i, output); + } + + return 0; +} + +// ====================================================== +// aiDeInit +// ====================================================== +static void aiDeInit(void) { + ai_status err = AI_STATUS_OK; + + printf("Releasing the network(s)...\r\n"); + + if (ai_destroy(_network) != AI_STATUS_OK) { + const char* err = ai_get_error(_network); + aiLogErr("ai_destroy", err); + } + _network = AI_HANDLE_NULL; + return; +} + +// ================================================================= +// argmax +// +// Description : return argument of table maximum value +// Argument : Vector_db *vec: table +// Return Value : int: index of max value +// ================================================================= +static uint8_t argmax(int8_t* vec, uint32_t num) { + uint32_t i; + uint8_t arg = 0; + int8_t imax = vec[0]; + for (i = 1; i < num; i++) { + imax = (imax > vec[i]) ? imax : vec[i]; + if (imax == vec[i]) { + arg = i; + } + } + return (arg); +} + +// ====================================================== +// aiRun +// ====================================================== +static int aiRun(void) { + ai_status err = AI_STATUS_OK; + + // + // Inputs + // + ai_tensor* input = ai_get_input(_network, 0); + if (input == NULL) { + const char* err = ai_get_error(_network); + aiLogErr("ai_run", err); + return -1; + } + + // + // Outputs + // + ai_tensor* output = ai_get_output(_network, 0); + if (output == NULL) { + const char* err = ai_get_error(_network); + aiLogErr("ai_run", err); + return -1; + } + + DLDataType out_dtype = output->dltensor.dtype; + if (out_dtype.lanes > 1) { + printf("E: vector outputs are not supported ...\r\n"); + return -1; + } + + uint32_t elts = get_tensor_elts(output); + + char outfile_name[128]; + sprintf(outfile_name, "%s/tvm_results.txt", BUILD_PATH); // NOLINT + FILE* outfile = fopen(outfile_name, "w"); + + for (int i = 0; i <= 9; i++) { + char image[128]; + + sprintf(image, "%s/0%d.raw", IMAGE_PATH, i); // NOLINT + printf("Loading input image %s ... \n", image); + if (LoadInputImg(image, input) != 0) { + error("Loading image %s\n", image); + } + + // + // Run the inference + // + printf("Running the network\r\n"); + + if (ai_run(_network) != AI_STATUS_OK) { + const char* err = ai_get_error(_network); + aiLogErr("ai_run", err); + return -1; + } + + const ai_quantization_info* output_quant = ai_get_quantization(output); + if (output_quant == NULL) { + // + // Floating point model + // + float* probabilities = (float*)output->dltensor.data; // NOLINT + for (int i = 0; i < elts; i++) { + float val = probabilities[i]; + // printf (" -- probability[%d] = %g \n", i, val); + fprintf(outfile, "%g ", val); + } + + } else { + // + // Quantized model + // + if (out_dtype.code == kDLInt) { + int8_t* probabilities = (int8_t*)output->dltensor.data; // NOLINT + for (int i = 0; i < elts; i++) { + int8_t qval = probabilities[i]; + // printf (" -- probability[%d] = %d \n", i, qval); + float val = dequantize_val(qval, output_quant); + fprintf(outfile, "%g ", val); + } + } else { + uint8_t* probabilities = (uint8_t*)output->dltensor.data; // NOLINT + for (int i = 0; i < elts; i++) { + uint8_t qval = probabilities[i]; + // printf (" -- probability[%d] = %d \n", i, qval); + float val = dequantize_val(qval, output_quant); + fprintf(outfile, "%g ", val); + } + } + } + fprintf(outfile, "\n"); + } + fclose(outfile); + + return 0; +} + +// ================================================================= +// quantize_val +// ================================================================= +static int32_t quantize_val(float val, ai_quantization_info* quant) { + float new_val; + float input_scale = quant->scale[0]; + int32_t input_zero_point = quant->zero_point[0]; + new_val = val / input_scale + input_zero_point; + return (int32_t)new_val; +} + +// ================================================================= +// dequantize_val +// ================================================================= +static float dequantize_val(int32_t val, ai_quantization_info* quant) { + float new_val; + float output_scale = quant->scale[0]; + int32_t output_zero_point = quant->zero_point[0]; + new_val = (val - output_zero_point) * output_scale; + return new_val; +} + +// ================================================================= +// LoadInputImg +// ================================================================= +uint8_t LoadInputImg(const char* filename, ai_tensor* input) { + DLDataType dtype = input->dltensor.dtype; + + const ai_quantization_info* input_quant = ai_get_quantization(input); + + if (dtype.lanes > 1) { + printf("E: vector inputs are not supported ...\r\n"); + return -1; + } + + if (dtype.code == kDLBfloat) { + printf("E: Double float inputs are not supported ...\r\n"); + return -1; + } + + FILE* file = fopen(filename, "r"); + if (file == NULL) { + printf("== File %s not found\n", filename); + return (-1); + } + + // + // Find file size + // + fseek(file, 0L, SEEK_END); + size_t img_size = ftell(file); + (void)fseek(file, 0L, SEEK_SET); + + // printf ("== Image size = %d\n", img_size); + + uint8_t* image = (uint8_t*)malloc(img_size); // NOLINT + size_t size = fread(image, 1, img_size, file); + if (size != img_size) { + perror("fread"); + printf("== Problem reading %s\n", filename); + return (-1); + } + + fclose(file); + + uint32_t x; + uint8_t* p = image; + uint8_t* pg = (uint8_t*)input->dltensor.data; // NOLINT + + for (x = 0; x < img_size; x++) { + uint8_t val = p[x]; + // + // Input image needs to be normalized into [0..1] interval + // + float nval = ((float)val) / 255.0; // NOLINT + if (input_quant != NULL) { + if (dtype.code == kDLInt) { + int8_t qval = quantize_val(nval, input_quant); + *pg = qval; + pg += sizeof(int8_t); + } else { + uint8_t qval = quantize_val(nval, input_quant); + *pg = qval; + pg += sizeof(uint8_t); + } + } else { + *(float*)pg = nval; // NOLINT + pg += sizeof(float); + } + } + + free(image); + + return 0; +} + +// ====================================================== +// main +// ====================================================== +int main(int argc, char* argv[]) { + int status; + + status = aiInit(); + if (status != 0) { + printf("Error initializing.\n"); + } + + status = aiRun(); + if (status != 0) { + printf("Error running.\n"); + } + + aiDeInit(); + + return (0); +} diff --git a/tests/micro/stm32/.clang-format b/tests/micro/stm32/.clang-format new file mode 100644 index 000000000000..9d159247d518 --- /dev/null +++ b/tests/micro/stm32/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: false diff --git a/tests/micro/stm32/conftest.py b/tests/micro/stm32/conftest.py new file mode 100644 index 000000000000..66a53625fbe7 --- /dev/null +++ b/tests/micro/stm32/conftest.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest + +import tvm.target.target diff --git a/tests/micro/stm32/test_code_emitter.py b/tests/micro/stm32/test_code_emitter.py new file mode 100644 index 000000000000..01bfaefc18cf --- /dev/null +++ b/tests/micro/stm32/test_code_emitter.py @@ -0,0 +1,395 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import shutil +import struct +import sys + +import numpy as np + +import tensorflow as tf + +import tvm +import tvm.relay as relay +from tvm.micro.contrib import stm32 +from tvm.contrib.download import download +from tvm import testing + +import conftest + +NUM_ITERATIONS = 10 + +# ========================================================= +# get_data +# ========================================================= +def get_data(in_data_shapes, in_data_dtypes): + """Generate a uint8 image.""" + assert len(in_data_shapes) == 1, "Only single input models are supported." + in_data = OrderedDict() + for shape_name, shape in in_data_shapes.items(): + for dtype_name, dtype in in_data_dtypes.items(): + if dtype_name == shape_name: + in_data[shape_name] = np.random.uniform(size=shape).astype(dtype) + in_data = np.random.uniform(size=shape).astype("uint8") + break + if shape_name not in in_data.keys(): + raise ValueError("Shape and dtype dictionaries do not fit.") + + return in_data + + +# ================================================================== +# dump_image +# ================================================================== +def dump_image(filename, image): + # Flatten image + image_data = image.flatten() + outputRaw = [] + # Raw binary format + for i in range(0, len(image_data)): + outputRaw.append(struct.pack("