diff --git a/python/tvm/_ffi/ndarray.py b/python/tvm/_ffi/ndarray.py index 3b6181a85c58..da0783e10410 100644 --- a/python/tvm/_ffi/ndarray.py +++ b/python/tvm/_ffi/ndarray.py @@ -75,10 +75,13 @@ def context(dev_type, dev_id=0): assert tvm.context("cuda", 0) == tvm.gpu(0) """ if isinstance(dev_type, string_types): - dev_type = dev_type.split()[0] - if dev_type not in TVMContext.STR2MASK: - raise ValueError("Unknown device type %s" % dev_type) - dev_type = TVMContext.STR2MASK[dev_type] + if '-device=micro_dev' in dev_type: + dev_type = 'micro_dev' + else: + dev_type = dev_type.split()[0] + if dev_type not in TVMContext.STR2MASK: + raise ValueError("Unknown device type %s" % dev_type) + dev_type = TVMContext.STR2MASK[dev_type] return TVMContext(dev_type, dev_id) diff --git a/python/tvm/contrib/binutil.py b/python/tvm/contrib/binutil.py index a323272c6a73..1f322acdf8b9 100644 --- a/python/tvm/contrib/binutil.py +++ b/python/tvm/contrib/binutil.py @@ -19,9 +19,81 @@ import os import subprocess from . import util -from .._ffi.base import py_str from ..api import register_func +RELOCATION_LD_SCRIPT_TEMPLATE = """ +/* linker symbol for use in UTVMInit */ +_utvm_stack_pointer_init = 0x{stack_pointer_init:x}; + +SECTIONS +{{ + . = 0x{text_start:x}; + . = ALIGN({word_size}); + .text : + {{ + . = ALIGN({word_size}); + KEEP(*(.text)) + KEEP(*(.text*)) + . = ALIGN({word_size}); + }} + + . = 0x{rodata_start:x}; + . = ALIGN({word_size}); + .rodata : + {{ + . = ALIGN({word_size}); + KEEP(*(.rodata)) + KEEP(*(.rodata*)) + . = ALIGN({word_size}); + }} + + . = 0x{data_start:x}; + . = ALIGN({word_size}); + .data : + {{ + . = ALIGN({word_size}); + KEEP(*(.data)) + KEEP(*(.data*)) + . = ALIGN({word_size}); + }} + + . = 0x{bss_start:x}; + . = ALIGN({word_size}); + .bss : + {{ + . = ALIGN({word_size}); + KEEP(*(.bss)) + KEEP(*(.bss*)) + . = ALIGN({word_size}); + }} +}} +""" + +def run_cmd(cmd): + """Runs `cmd` in a subprocess and awaits its completion. + + Parameters + ---------- + cmd : List[str] + list of command-line arguments + + Returns + ------- + output : str + resulting stdout capture from the subprocess + """ + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + (output, _) = proc.communicate() + output = output.decode("utf-8") + if proc.returncode != 0: + cmd_str = " ".join(cmd) + msg = f"error while running command \"{cmd_str}\":\n{output}" + raise RuntimeError(msg) + return output + @register_func("tvm_callback_get_section_size") def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): @@ -48,14 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): raise RuntimeError("no such file \"{}\"".format(binary_path)) # We use the "-A" flag here to get the ".rodata" section's size, which is # not included by default. - size_proc = subprocess.Popen( - ["{}size".format(toolchain_prefix), "-A", binary_path], stdout=subprocess.PIPE) - (size_output, _) = size_proc.communicate() - size_output = size_output.decode("utf-8") - if size_proc.returncode != 0: - msg = "error in finding section size:\n" - msg += py_str(size_output) - raise RuntimeError(msg) + size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path]) # TODO(weberlo): Refactor this method and `*relocate_binary` so they are # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss". @@ -74,13 +139,15 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): continue entry_name = tokens[0] entry_size = int(tokens[1]) - if entry_name in sections_to_sum: - section_size += entry_size + for section in sections_to_sum: + if entry_name.startswith(section): + section_size += entry_size + break # NOTE: For some reason, the size of the BSS section on the RISC-V # GCC is sometimes reported to be smaller than it is, so we need to adjust # for this. - if "riscv" in toolchain_prefix and section_name == 'bss': + if "riscv" in toolchain_prefix and section_name == "bss": # TODO(weberlo): Figure out why 32 is the minimum constant that works. # # The current hypothesis is that the last symbols in the ".bss" and @@ -97,7 +164,14 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): @register_func("tvm_callback_relocate_binary") def tvm_callback_relocate_binary( - binary_path, text_addr, rodata_addr, data_addr, bss_addr, toolchain_prefix): + binary_path, + word_size, + text_start, + rodata_start, + data_start, + bss_start, + stack_end, + toolchain_prefix): """Relocates sections in the binary to new addresses Parameters @@ -105,17 +179,23 @@ def tvm_callback_relocate_binary( binary_path : str path of the binary file - text_addr : str - text section absolute address + word_size : int + word size on the target machine + + text_start : int + text section address + + rodata_start : int + rodata section address - rodata_addr : str - rodata section absolute address + data_start : int + data section address - data_addr : str - data section absolute address + bss_start : int + bss section address - bss_addr : str - bss section absolute address + stack_end : int + stack section end address toolchain_prefix : str prefix for binary names in target compiler toolchain @@ -125,68 +205,29 @@ def tvm_callback_relocate_binary( rel_bin : bytearray the relocated binary """ - tmp_dir = util.tempdir() - rel_obj_path = tmp_dir.relpath("relocated.o") + stack_pointer_init = stack_end - word_size ld_script_contents = "" # TODO(weberlo): There should be a better way to configure this for different archs. if "riscv" in toolchain_prefix: ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n" - # TODO(weberlo): Generate the script in a more procedural manner. - ld_script_contents += """ -SECTIONS -{ - . = %s; - . = ALIGN(8); - .text : - { - *(.text) - . = ALIGN(8); - *(.text*) - } - . = %s; - . = ALIGN(8); - .rodata : - { - *(.rodata) - . = ALIGN(8); - *(.rodata*) - } - . = %s; - . = ALIGN(8); - .data : - { - *(.data) - . = ALIGN(8); - *(.data*) - . = ALIGN(8); - *(.sdata) - } - . = %s; - . = ALIGN(8); - .bss : - { - *(.bss) - . = ALIGN(8); - *(.bss*) - . = ALIGN(8); - *(.sbss) - } -} - """ % (text_addr, rodata_addr, data_addr, bss_addr) + ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format( + word_size=word_size, + text_start=text_start, + rodata_start=rodata_start, + data_start=data_start, + bss_start=bss_start, + stack_pointer_init=stack_pointer_init) + + tmp_dir = util.tempdir() + rel_obj_path = tmp_dir.relpath("relocated.obj") rel_ld_script_path = tmp_dir.relpath("relocated.lds") with open(rel_ld_script_path, "w") as f: f.write(ld_script_contents) - ld_proc = subprocess.Popen(["{}ld".format(toolchain_prefix), binary_path, - "-T", rel_ld_script_path, - "-o", rel_obj_path], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - (out, _) = ld_proc.communicate() - if ld_proc.returncode != 0: - msg = "linking error using ld:\n" - msg += py_str(out) - raise RuntimeError(msg) - + run_cmd([ + "{}ld".format(toolchain_prefix), + binary_path, + "-T", rel_ld_script_path, + "-o", rel_obj_path]) with open(rel_obj_path, "rb") as f: rel_bin = bytearray(f.read()) return rel_bin @@ -217,16 +258,11 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix): tmp_section = tmp_dir.relpath("tmp_section.bin") with open(tmp_bin, "wb") as out_file: out_file.write(bytes(binary)) - objcopy_proc = subprocess.Popen(["{}objcopy".format(toolchain_prefix), "--dump-section", - ".{}={}".format(section, tmp_section), - tmp_bin], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - (out, _) = objcopy_proc.communicate() - if objcopy_proc.returncode != 0: - msg = "error in using objcopy:\n" - msg += py_str(out) - raise RuntimeError(msg) + run_cmd([ + "{}objcopy".format(toolchain_prefix), + "--dump-section", + ".{}={}".format(section, tmp_section), + tmp_bin]) if os.path.isfile(tmp_section): # Get section content if it exists. with open(tmp_section, "rb") as f: @@ -259,15 +295,12 @@ def tvm_callback_get_symbol_map(binary, toolchain_prefix): tmp_obj = tmp_dir.relpath("tmp_obj.bin") with open(tmp_obj, "wb") as out_file: out_file.write(bytes(binary)) - nm_proc = subprocess.Popen(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - (nm_output, _) = nm_proc.communicate() - if nm_proc.returncode != 0: - msg = "error in using nm:\n" - msg += py_str(nm_output) - raise RuntimeError(msg) - nm_output = nm_output.decode("utf8").splitlines() + nm_output = run_cmd([ + "{}nm".format(toolchain_prefix), + "-C", + "--defined-only", + tmp_obj]) + nm_output = nm_output.splitlines() map_str = "" for line in nm_output: line = line.split() diff --git a/python/tvm/exec/rpc_server.py b/python/tvm/exec/rpc_server.py index 397238e0eef1..dbb690267e2a 100644 --- a/python/tvm/exec/rpc_server.py +++ b/python/tvm/exec/rpc_server.py @@ -19,14 +19,22 @@ from __future__ import absolute_import import argparse +import ast import multiprocessing import sys import logging +import tvm +from tvm import micro from .. import rpc def main(args): - """Main function""" + """Main function + Parameters + ---------- + args : argparse.Namespace + parsed args from command-line invocation + """ if args.tracker: url, port = args.tracker.rsplit(":", 1) port = int(port) @@ -37,6 +45,9 @@ def main(args): else: tracker_addr = None + if args.utvm_dev_config or args.utvm_dev_id: + init_utvm(args) + server = rpc.Server(args.host, args.port, args.port_end, @@ -48,6 +59,38 @@ def main(args): server.proc.join() +def init_utvm(args): + """MicroTVM-specific RPC initialization + + Parameters + ---------- + args : argparse.Namespace + parsed args from command-line invocation + """ + if args.utvm_dev_config and args.utvm_dev_id: + raise RuntimeError('only one of --utvm-dev-config and --utvm-dev-id allowed') + + if args.utvm_dev_config: + with open(args.utvm_dev_config, 'r') as dev_conf_file: + dev_config = json.load(dev_conf_file) + else: + dev_config_args = ast.literal_eval(args.utvm_dev_config_args) + default_config_func = micro.device.get_device_funcs(args.utvm_dev_id)['default_config'] + dev_config = default_config_func(*dev_config_args) + + if args.utvm_dev_config or args.utvm_dev_id: + # add MicroTVM overrides + @tvm.register_func('tvm.rpc.server.start', override=True) + def server_start(): + # pylint: disable=unused-variable + session = micro.Session(dev_config) + session._enter() + + @tvm.register_func('tvm.rpc.server.shutdown', override=True) + def server_shutdown(): + session._exit() + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--host', type=str, default="0.0.0.0", @@ -71,6 +114,13 @@ def main(args): and ROCM compilers.") parser.add_argument('--custom-addr', type=str, help="Custom IP Address to Report to RPC Tracker") + parser.add_argument('--utvm-dev-config', type=str, + help='JSON config file for the target device (if using MicroTVM)') + parser.add_argument('--utvm-dev-id', type=str, + help='Unique ID for the target device (if using MicroTVM)') + parser.add_argument('--utvm-dev-config-args', type=str, + help=('Python list of literals required to generate a default' + ' MicroTVM config (if --utvm-dev-id is specified)')) parser.set_defaults(fork=True) args = parser.parse_args() diff --git a/python/tvm/micro/__init__.py b/python/tvm/micro/__init__.py index 2f3981903cab..9e984c08fe2c 100644 --- a/python/tvm/micro/__init__.py +++ b/python/tvm/micro/__init__.py @@ -14,13 +14,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -"""uTVM module for bare-metal backends. - -uTVM (or the micro backend) enables provides support for bare-metal devices. -Its targets currently include a host-emulated device which is used for testing, -and JTAG-based openocd device which allows actual interfacing with microdevices. -""" +"""MicroTVM module for bare-metal backends""" from ..contrib import binutil -from .base import Session, cross_compiler, create_micro_lib +from .base import Session, create_micro_mod, cross_compiler +from .base import LibType, get_micro_host_driven_dir, get_micro_device_dir +from . import device diff --git a/python/tvm/micro/base.py b/python/tvm/micro/base.py index cab6f78363b5..e2e1329cb36c 100644 --- a/python/tvm/micro/base.py +++ b/python/tvm/micro/base.py @@ -14,71 +14,100 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -"""Base definitions for micro.""" +"""Base definitions for MicroTVM""" from __future__ import absolute_import -import logging import os import sys +from enum import Enum +import tvm from tvm.contrib import util as _util from tvm.contrib import cc as _cc - from .._ffi.function import _init_api -from .._ffi.libinfo import find_include_path -SUPPORTED_DEVICE_TYPES = ["host", "openocd"] +class LibType(Enum): + """Enumeration of library types that can be compiled and loaded onto a device""" + # library to be used as a MicroTVM runtime + RUNTIME = 0 + # library to be used as an operator + OPERATOR = 1 + class Session: """MicroTVM Device Session Parameters ---------- - device_type : str - type of low-level device - - toolchain_prefix : str - toolchain prefix to be used. For example, a prefix of - "riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as - the compiler and "riscv64-unknown-elf-ld" is used as the linker, - etc. + config : dict + configuration for this session (as generated by + `tvm.micro.device.host.default_config()`, for example) Example -------- .. code-block:: python c_mod = ... # some module generated with "c" as the target - device_type = "openocd" - toolchain_prefix = "riscv64-unknown-elf-" - with tvm.micro.Session(device_type, - toolchain_prefix, - base_addr=0x10010000, - server_addr="127.0.0.1", - port=6666): - c_mod.export_library(lib_obj_path, fcompile=tvm.micro.cross_compiler(toolchain_prefix)) - micro_mod = tvm.module.load(lib_obj_path, "micro_dev") + dev_config = micro.device.arm.stm32f746xx.default_config("127.0.0.1", 6666) + with tvm.micro.Session(dev_config) as sess: + micro_mod = create_micro_mod(c_mod, dev_config) """ - def __init__(self, device_type, toolchain_prefix, **kwargs): - if device_type not in SUPPORTED_DEVICE_TYPES: - raise RuntimeError("unknown micro device type \"{}\"".format(device_type)) + def __init__(self, config): self._check_system() - self._check_args(device_type, kwargs) + # TODO(weberlo): add config validation + + # grab a binutil instance from the ID in the config + dev_funcs = tvm.micro.device.get_device_funcs(config["device_id"]) + self.create_micro_lib = dev_funcs["create_micro_lib"] + self.toolchain_prefix = config["toolchain_prefix"] + self.mem_layout = config["mem_layout"] + self.word_size = config["word_size"] + self.thumb_mode = config["thumb_mode"] + self.comms_method = config["comms_method"] # First, find and compile runtime library. - runtime_src_path = os.path.join(_get_micro_device_dir(), "utvm_runtime.c") + runtime_src_path = os.path.join(get_micro_host_driven_dir(), "utvm_runtime.c") tmp_dir = _util.tempdir() runtime_obj_path = tmp_dir.relpath("utvm_runtime.obj") - create_micro_lib( - runtime_obj_path, runtime_src_path, toolchain_prefix, include_dev_lib_header=False) + self.create_micro_lib(runtime_obj_path, runtime_src_path, LibType.RUNTIME) + #input(f"check {runtime_obj_path}: ") + + comms_method = config["comms_method"] + if comms_method == "openocd": + server_addr = config["server_addr"] + server_port = config["server_port"] + elif comms_method == "host": + server_addr = "" + server_port = 0 + else: + raise RuntimeError(f"unknown communication method: f{self.comms_method}") - base_addr = kwargs.get("base_addr", 0) - server_addr = kwargs.get("server_addr", "") - port = kwargs.get("port", 0) self.module = _CreateSession( - device_type, runtime_obj_path, toolchain_prefix, base_addr, server_addr, port) + comms_method, + runtime_obj_path, + self.toolchain_prefix, + self.mem_layout["text"].get("start", 0), + self.mem_layout["text"]["size"], + self.mem_layout["rodata"].get("start", 0), + self.mem_layout["rodata"]["size"], + self.mem_layout["data"].get("start", 0), + self.mem_layout["data"]["size"], + self.mem_layout["bss"].get("start", 0), + self.mem_layout["bss"]["size"], + self.mem_layout["args"].get("start", 0), + self.mem_layout["args"]["size"], + self.mem_layout["heap"].get("start", 0), + self.mem_layout["heap"]["size"], + self.mem_layout["workspace"].get("start", 0), + self.mem_layout["workspace"]["size"], + self.mem_layout["stack"].get("start", 0), + self.mem_layout["stack"]["size"], + self.word_size, + self.thumb_mode, + server_addr, + server_port) self._enter = self.module["enter"] self._exit = self.module["exit"] @@ -88,55 +117,57 @@ def _check_system(self): Raises error if not supported. """ if not sys.platform.startswith("linux"): - raise RuntimeError("microTVM is currently only supported on Linux") + raise RuntimeError("MicroTVM is currently only supported on Linux hosts") # TODO(weberlo): Add 32-bit support. # It's primarily the compilation pipeline that isn't compatible. if sys.maxsize <= 2**32: - raise RuntimeError("microTVM is currently only supported on 64-bit platforms") - - def _check_args(self, device_type, args): - """Check if the given configuration is valid.""" - if device_type == "host": - pass - elif device_type == "openocd": - assert "base_addr" in args - assert "server_addr" in args - assert "port" in args + raise RuntimeError("MicroTVM is currently only supported on 64-bit host platforms") def __enter__(self): self._enter() + return self def __exit__(self, exc_type, exc_value, exc_traceback): self._exit() -def _get_micro_device_dir(): - """Get directory path for uTVM runtime source files. +def create_micro_mod(c_mod, dev_config): + """Produces a micro module from a given module. + + Parameters + ---------- + c_mod : tvm.module.Module + module with "c" as its target backend + + dev_config : Dict[str, Any] + MicroTVM config dict for the target device Return ------ - micro_device_dir : str - directory path + micro_mod : tvm.module.Module + micro module for the target device """ - micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) - micro_device_dir = os.path.join(micro_dir, "..", "..", "..", - "src", "runtime", "micro", "device") - return micro_device_dir + temp_dir = _util.tempdir() + lib_obj_path = temp_dir.relpath("dev_lib.obj") + c_mod.export_library( + lib_obj_path, + fcompile=cross_compiler(dev_config, LibType.OPERATOR)) + micro_mod = tvm.module.load(lib_obj_path) + return micro_mod -def cross_compiler(toolchain_prefix, include_dev_lib_header=True): - """Creates a cross compile function that wraps `create_micro_lib`. +def cross_compiler(dev_config, lib_type): + """Create a cross-compile function that wraps `create_lib` for a `Binutil` instance. For use in `tvm.module.Module.export_library`. Parameters ---------- - toolchain_prefix : str - toolchain prefix to be used + dev_config : Dict[str, Any] + MicroTVM config dict for the target device - include_dev_lib_header : Optional[bool] - whether to include the device library header containing definitions of - library functions. + lib_type : micro.LibType + whether to compile a MicroTVM runtime or operator library Return ------ @@ -149,78 +180,46 @@ def cross_compiler(toolchain_prefix, include_dev_lib_header=True): .. code-block:: python c_mod = ... # some module generated with "c" as the target - fcompile = tvm.micro.cross_compiler(toolchain_prefix="") + fcompile = tvm.micro.cross_compiler(dev_config, LibType.OPERATOR) c_mod.export_library("dev_lib.obj", fcompile=fcompile) """ + dev_funcs = tvm.micro.device.get_device_funcs(dev_config['device_id']) + create_micro_lib = dev_funcs['create_micro_lib'] def compile_func(obj_path, src_path, **kwargs): if isinstance(obj_path, list): obj_path = obj_path[0] if isinstance(src_path, list): src_path = src_path[0] - create_micro_lib(obj_path, src_path, toolchain_prefix, - kwargs.get("options", None), include_dev_lib_header) - return _cc.cross_compiler(compile_func) + create_micro_lib(obj_path, src_path, lib_type, kwargs.get("options", None)) + return _cc.cross_compiler(compile_func, output_format="obj") -def create_micro_lib( - obj_path, src_path, toolchain_prefix, options=None, include_dev_lib_header=True): - """Compiles code into a binary for the target micro device. +def get_micro_host_driven_dir(): + """Get directory path for uTVM host-driven runtime source files. - Parameters - ---------- - obj_path : Optional[str] - path to generated object file (defaults to same directory as `src_path`) + Return + ------ + micro_device_dir : str + directory path + """ + micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) + micro_host_driven_dir = os.path.join(micro_dir, "..", "..", "..", + "src", "runtime", "micro", "host_driven") + return micro_host_driven_dir - src_path : str - path to source file - toolchain_prefix : str - toolchain prefix to be used +def get_micro_device_dir(): + """Get directory path for parent directory of device-specific source files - include_dev_lib_header : bool - whether to include the device library header containing definitions of - library functions. + Return + ------ + micro_device_dir : str + directory path """ - def replace_suffix(s, new_suffix): - if "." in os.path.basename(s): - # There already exists an extension. - return os.path.join( - os.path.dirname(s), - ".".join(os.path.basename(s).split(".")[:-1] + [new_suffix])) - # No existing extension; we can just append. - return s + "." + new_suffix - - # uTVM object files cannot have an ".o" suffix, because it triggers the - # code path for creating shared objects in `tvm.module.load`. So we replace - # ".o" suffixes with ".obj". - if obj_path.endswith(".o"): - logging.warning( - "\".o\" suffix in \"%s\" has been replaced with \".obj\"", obj_path) - obj_path = replace_suffix(obj_path, "obj") - - options = ["-I" + path for path in find_include_path()] - options += ["-I{}".format(_get_micro_device_dir())] - options += ["-fno-stack-protector"] - # TODO(weberlo): Don't rely on the toolchain prefix to identify if this is the host - # device. - if toolchain_prefix == "" and sys.maxsize > 2**32 and sys.platform.startswith("linux"): - # Only add this option if the host is a 64-bit Linux. - options += ["-mcmodel=large"] - compile_cmd = "{}gcc".format(toolchain_prefix) - - if include_dev_lib_header: - # Create a temporary copy of the source, so we can inject the dev lib - # header without modifying the original. - tmp_dir = _util.tempdir() - temp_src_path = tmp_dir.relpath("temp.c") - with open(src_path, "r") as f: - src_lines = f.read().splitlines() - src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"") - with open(temp_src_path, "w") as f: - f.write("\n".join(src_lines)) - src_path = temp_src_path - - _cc.create_shared(obj_path, src_path, options, compile_cmd) + micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) + micro_device_dir = os.path.join(micro_dir, "..", "..", "..", + "src", "runtime", "micro", "device") + return micro_device_dir _init_api("tvm.micro", "tvm.micro.base") diff --git a/python/tvm/micro/device/__init__.py b/python/tvm/micro/device/__init__.py new file mode 100644 index 000000000000..1ccd6847edd8 --- /dev/null +++ b/python/tvm/micro/device/__init__.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Device-specific configuration for MicroTVM""" + +from .base import register_device, get_device_funcs, create_micro_lib_base +from . import host +from . import arm +from . import riscv_spike diff --git a/python/tvm/micro/device/arm/__init__.py b/python/tvm/micro/device/arm/__init__.py new file mode 100644 index 000000000000..be323b9e0a2b --- /dev/null +++ b/python/tvm/micro/device/arm/__init__.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Base module for ARM device configurations""" + +from . import stm32f746xx diff --git a/python/tvm/micro/device/arm/stm32f746xx.py b/python/tvm/micro/device/arm/stm32f746xx.py new file mode 100644 index 000000000000..31b44cf9d36b --- /dev/null +++ b/python/tvm/micro/device/arm/stm32f746xx.py @@ -0,0 +1,123 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Compilation and config definitions for ARM STM32F746XX devices""" +from .. import create_micro_lib_base, register_device + +DEVICE_ID = "arm.stm32f746xx" +TOOLCHAIN_PREFIX = "arm-none-eabi-" + +def create_micro_lib(obj_path, src_path, lib_type, options=None): + """Wrapper over `create_micro_lib_base` to add device-specific options + + Parameters + ---------- + obj_path : str + path to generated object file + + src_path : str + path to source file + + lib_type : micro.LibType + whether to compile a MicroTVM runtime or operator library + + options : Optional[List[str]] + additional options to pass to GCC + """ + if options is None: + options = [] + options += [ + "-mcpu=cortex-m7", + "-mlittle-endian", + "-mfloat-abi=hard", + "-mfpu=fpv5-sp-d16", + "-mthumb", + "-gdwarf-5", + ] + create_micro_lib_base( + obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options) + + +def default_config(server_addr, server_port): + """Generates a default configuration for ARM STM32F746XX devices + + Parameters + ---------- + server_addr : str + address of OpenOCD server to connect to + + server_port : int + port of OpenOCD server to connect to + + Return + ------ + config : Dict[str, Any] + MicroTVM config dict for this device + """ + return { + "device_id": DEVICE_ID, + "toolchain_prefix": TOOLCHAIN_PREFIX, + # + # [Device Memory Layout] + # RAM (rwx) : START = 0x20000000, LENGTH = 320K + # FLASH (rx) : START = 0x8000000, LENGTH = 1024K + # + "mem_layout": { + "text": { + "start": 0x20000180, + "size": 20480, + }, + "rodata": { + "start": 0x20005180, + "size": 20480, + }, + "data": { + "start": 0x2000a180, + "size": 768, + }, + "bss": { + "start": 0x2000a480, + "size": 768, + }, + "args": { + "start": 0x2000a780, + "size": 1280, + }, + "heap": { + "start": 0x2000ac80, + "size": 262144, + }, + "workspace": { + "start": 0x2004ac80, + "size": 20480, + }, + "stack": { + "start": 0x2004fc80, + "size": 80, + }, + }, + "word_size": 4, + "thumb_mode": True, + "comms_method": "openocd", + "server_addr": server_addr, + "server_port": server_port, + } + + +register_device(DEVICE_ID, { + "create_micro_lib": create_micro_lib, + "default_config": default_config, +}) diff --git a/python/tvm/micro/device/base.py b/python/tvm/micro/device/base.py new file mode 100644 index 000000000000..ae53b9cc539f --- /dev/null +++ b/python/tvm/micro/device/base.py @@ -0,0 +1,162 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Base definitions for MicroTVM config""" +import glob +import os +from pathlib import Path + +from tvm.contrib import util as _util +from tvm.contrib.binutil import run_cmd +from tvm._ffi.libinfo import find_include_path +from tvm.micro import LibType, get_micro_host_driven_dir, get_micro_device_dir + +_DEVICE_REGISTRY = {} + +def register_device(device_id, device_funcs): + """Register a device and associated compilation/config functions + + Parameters + ---------- + device_id : str + unique identifier for the device + + device_funcs : Dict[str, func] + dictionary with compilation and config generation functions as values + """ + if device_id in _DEVICE_REGISTRY: + raise RuntimeError(f"\"{device_id}\" already exists in the device registry") + _DEVICE_REGISTRY[device_id] = device_funcs + + +def get_device_funcs(device_id): + """Get compilation and config generation functions for device + + Parameters + ---------- + device_id : str + unique identifier for the device + + Return + ------ + device_funcs : Dict[str, func] + dictionary with compilation and config generation functions as values + """ + if device_id not in _DEVICE_REGISTRY: + raise RuntimeError(f"\"{device_id}\" does not exist in the binutil registry") + device_funcs = _DEVICE_REGISTRY[device_id] + return device_funcs + + +def create_micro_lib_base( + out_obj_path, + in_src_path, + toolchain_prefix, + device_id, + lib_type, + options=None): + """Compiles code into a binary for the target micro device. + + Parameters + ---------- + out_obj_path : str + path to generated object file + + in_src_path : str + path to source file + + toolchain_prefix : str + toolchain prefix to be used. For example, a prefix of + "riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as + the compiler and "riscv64-unknown-elf-ld" is used as the linker, + etc. + + device_id : str + unique identifier for the target device + + lib_type : micro.LibType + whether to compile a MicroTVM runtime or operator library + + options : List[str] + additional options to pass to GCC + """ + base_compile_cmd = [ + f"{toolchain_prefix}gcc", + "-std=c11", + "-Wall", + "-Wextra", + "--pedantic", + "-c", + "-O0", + "-g", + "-nostartfiles", + "-nodefaultlibs", + "-nostdlib", + "-fdata-sections", + "-ffunction-sections", + ] + if options is not None: + base_compile_cmd += options + + src_paths = [] + include_paths = find_include_path() + [get_micro_host_driven_dir()] + tmp_dir = _util.tempdir() + # we might transform the src path in one of the branches below + new_in_src_path = in_src_path + if lib_type == LibType.RUNTIME: + dev_dir = _get_device_source_dir(device_id) + dev_src_paths = glob.glob(f"{dev_dir}/*.[csS]") + # there needs to at least be a utvm_timer.c file + assert dev_src_paths + assert "utvm_timer.c" in map(os.path.basename, dev_src_paths) + src_paths += dev_src_paths + elif lib_type == LibType.OPERATOR: + # create a temporary copy of the source, so we can inject the dev lib + # header without modifying the original. + temp_src_path = tmp_dir.relpath("temp.c") + with open(in_src_path, "r") as f: + src_lines = f.read().splitlines() + src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"") + with open(temp_src_path, "w") as f: + f.write("\n".join(src_lines)) + new_in_src_path = temp_src_path + base_compile_cmd += ["-c"] + else: + raise RuntimeError("unknown lib type") + + src_paths += [new_in_src_path] + + for path in include_paths: + base_compile_cmd += ["-I", path] + + prereq_obj_paths = [] + for src_path in src_paths: + curr_obj_path = Path(src_path).with_suffix(".o").name + assert curr_obj_path not in prereq_obj_paths + prereq_obj_paths.append(curr_obj_path) + curr_compile_cmd = base_compile_cmd + [src_path, "-o", curr_obj_path] + run_cmd(curr_compile_cmd) + + ld_cmd = [f"{toolchain_prefix}ld", "-relocatable"] + ld_cmd += prereq_obj_paths + ld_cmd += ["-o", out_obj_path] + run_cmd(ld_cmd) + + +def _get_device_source_dir(device_id): + """Grabs the source directory for device-specific uTVM files""" + dev_subdir = "/".join(device_id.split(".")) + return get_micro_device_dir() + "/" + dev_subdir diff --git a/python/tvm/micro/device/host.py b/python/tvm/micro/device/host.py new file mode 100644 index 000000000000..a5495b60cf99 --- /dev/null +++ b/python/tvm/micro/device/host.py @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Compilation and config definitions for the host emulated device""" +import sys + +from . import create_micro_lib_base, register_device + +DEVICE_ID = "host" +TOOLCHAIN_PREFIX = "" + +def create_micro_lib(obj_path, src_path, lib_type, options=None): + """Wrapper over `create_micro_lib_base` to add device-specific options + + Parameters + ---------- + obj_path : str + path to generated object file + + src_path : str + path to source file + + lib_type : micro.LibType + whether to compile a MicroTVM runtime or operator library + + options : Optional[List[str]] + additional options to pass to GCC + """ + if options is None: + options = [] + if sys.maxsize > 2**32 and sys.platform.startswith("linux"): + options += ["-mcmodel=large"] + create_micro_lib_base( + obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options) + + +def default_config(): + """Generates a default configuration for the host emulated device + + Return + ------ + config : Dict[str, Any] + MicroTVM config dict for this device + """ + return { + "device_id": DEVICE_ID, + "toolchain_prefix": TOOLCHAIN_PREFIX, + "mem_layout": { + "text": { + "size": 20480, + }, + "rodata": { + "size": 20480, + }, + "data": { + "size": 768, + }, + "bss": { + "size": 768, + }, + "args": { + "size": 1280, + }, + "heap": { + "size": 262144, + }, + "workspace": { + "size": 20480, + }, + "stack": { + "size": 80, + }, + }, + "word_size": 8 if sys.maxsize > 2**32 else 4, + "thumb_mode": False, + "comms_method": "host", + } + + +register_device(DEVICE_ID, { + "create_micro_lib": create_micro_lib, + "default_config": default_config, +}) diff --git a/python/tvm/micro/device/riscv_spike.py b/python/tvm/micro/device/riscv_spike.py new file mode 100644 index 000000000000..923e5dfb23a2 --- /dev/null +++ b/python/tvm/micro/device/riscv_spike.py @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Compilation and config definitions for Spike, a RISC-V functional ISA simulator""" +from collections import OrderedDict + +from . import create_micro_lib_base, register_device + +DEVICE_ID = "riscv_spike" +TOOLCHAIN_PREFIX = "riscv64-unknown-elf-" + +def create_micro_lib(obj_path, src_path, lib_type, options=None): + """Wrapper over `create_micro_lib_base` to add device-specific options + + Parameters + ---------- + obj_path : str + path to generated object file + + src_path : str + path to source file + + lib_type : micro.LibType + whether to compile a MicroTVM runtime or operator library + + options : Optional[List[str]] + additional options to pass to GCC + """ + create_micro_lib_base( + obj_path, + src_path, + TOOLCHAIN_PREFIX, + DEVICE_ID, + lib_type, + options=options) + + +def default_config(base_addr, server_addr, server_port): + """Generates a default configuration for Spike + + Parameters + ---------- + base_addr : int + base address of the simulator (for calculating the memory layout) + + server_addr : str + address of OpenOCD server to connect to + + server_port : int + port of OpenOCD server to connect to + + Return + ------ + config : Dict[str, Any] + MicroTVM config dict for this device + """ + res = { + "device_id": DEVICE_ID, + "toolchain_prefix": TOOLCHAIN_PREFIX, + "mem_layout": OrderedDict([ + ("text", { + "size": 20480, + }), + ("rodata", { + "size": 20480, + }), + ("data", { + "size": 768, + }), + ("bss", { + "size": 768, + }), + ("args", { + "size": 1280, + }), + ("heap", { + "size": 262144, + }), + ("workspace", { + "size": 20480, + }), + ("stack", { + "size": 80, + }), + ]), + "word_size": 4, + "thumb_mode": True, + "comms_method": "openocd", + "server_addr": server_addr, + "server_port": server_port, + } + # generate section start addresses from the given `base_addr` + curr_offset = 0 + mem_layout = res["mem_layout"] + for region_dict in mem_layout.values(): + region_dict["start"] = base_addr + curr_offset + curr_offset += region_dict["size"] + return res + + +register_device(DEVICE_ID, { + "create_micro_lib": create_micro_lib, + "default_config": default_config, +}) diff --git a/python/tvm/module.py b/python/tvm/module.py index 2790227f32c7..0cd522ed1edd 100644 --- a/python/tvm/module.py +++ b/python/tvm/module.py @@ -265,6 +265,9 @@ def load(path, fmt=""): files = [tar_temp.relpath(x) for x in tar_temp.listdir()] _cc.create_shared(path + ".so", files) path += ".so" + # TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files + elif path.endswith(".obj"): + fmt = "micro_dev" # Redirect to the load API return _LoadFromFile(path, fmt) diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index ac991d4bfea3..80fd57af66f9 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -85,7 +85,9 @@ Target CreateTarget(const std::string& target_name, } t->device_type = kDLCPU; t->thread_warp_size = 1; - if (target_name == "c" || target_name == "llvm") { + if (target_name == "c" && t->device_name == "micro_dev") { + t->device_type = kDLMicroDev; + } else if (target_name == "c" || target_name == "llvm") { t->keys_array.push_back(ir::StringImm::make("cpu")); } else if (target_name == "cuda" || target_name == "nvptx") { t->device_type = kDLGPU; diff --git a/src/codegen/codegen_c_host.cc b/src/codegen/codegen_c_host.cc index 8e8c6633c22c..9c099a425fd6 100644 --- a/src/codegen/codegen_c_host.cc +++ b/src/codegen/codegen_c_host.cc @@ -33,7 +33,8 @@ CodeGenCHost::CodeGenCHost() { module_name_ = GetUniqueName("__tvm_module_ctx"); } -void CodeGenCHost::Init(bool output_ssa) { +void CodeGenCHost::Init(bool output_ssa, bool emit_asserts) { + emit_asserts_ = emit_asserts; decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n"; decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n"; decl_stream << "extern void* " << module_name_ << " = NULL;\n"; @@ -237,17 +238,19 @@ void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) } void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*) - std::string cond = PrintExpr(op->condition); - PrintIndent(); - stream << "if (!(" << cond << ")) {\n"; - int assert_if_scope = this->BeginScope(); - PrintIndent(); - stream << "TVMAPISetLastError(\"" << op->message.as()->value << "\");\n"; - PrintIndent(); - stream << "return -1;\n"; - this->EndScope(assert_if_scope); - PrintIndent(); - stream << "}\n"; + if (emit_asserts_) { + std::string cond = PrintExpr(op->condition); + PrintIndent(); + stream << "if (!(" << cond << ")) {\n"; + int assert_if_scope = this->BeginScope(); + PrintIndent(); + stream << "TVMAPISetLastError(\"" << op->message.as()->value << "\");\n"; + PrintIndent(); + stream << "return -1;\n"; + this->EndScope(assert_if_scope); + PrintIndent(); + stream << "}\n"; + } this->PrintStmt(op->body); } @@ -277,8 +280,9 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op, runtime::Module BuildCHost(Array funcs) { using tvm::runtime::Registry; bool output_ssa = false; + bool emit_asserts = false; CodeGenCHost cg; - cg.Init(output_ssa); + cg.Init(output_ssa, emit_asserts); for (LoweredFunc f : funcs) { cg.AddFunction(f); } diff --git a/src/codegen/codegen_c_host.h b/src/codegen/codegen_c_host.h index 1b3271fd2c6b..80e359c33ce0 100644 --- a/src/codegen/codegen_c_host.h +++ b/src/codegen/codegen_c_host.h @@ -35,7 +35,7 @@ namespace codegen { class CodeGenCHost final : public CodeGenC { public: CodeGenCHost(); - void Init(bool output_ssa); + void Init(bool output_ssa, bool emit_asserts); void AddFunction(LoweredFunc f); std::string Finish(); @@ -53,6 +53,8 @@ class CodeGenCHost final : public CodeGenC { private: std::string module_name_; + /*! \brief whether to emit asserts in the resulting C code */ + bool emit_asserts_; void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name); void PrintFuncCall(const std::string& packed_func_name, int num_args); diff --git a/src/runtime/micro/device/arm/stm32f746xx/utvm_init.s b/src/runtime/micro/device/arm/stm32f746xx/utvm_init.s new file mode 100644 index 000000000000..300deb8079a0 --- /dev/null +++ b/src/runtime/micro/device/arm/stm32f746xx/utvm_init.s @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file utvm_init.s + * \brief uTVM init definition for STM32F746XX-series boards + */ + +.syntax unified +.cpu cortex-m7 +.fpu softvfp +.thumb + +.section .text.UTVMInit +.type UTVMInit, %function +UTVMInit: + /* enable fpu */ + ldr r0, =0xE000ED88 + ldr r1, [r0] + ldr r2, =0xF00000 + orr r1, r2 + str r1, [r0] + dsb + isb + /* set stack pointer */ + ldr sp, =_utvm_stack_pointer_init + bl UTVMMain +.size UTVMInit, .-UTVMInit diff --git a/src/runtime/micro/device/arm/stm32f746xx/utvm_timer.c b/src/runtime/micro/device/arm/stm32f746xx/utvm_timer.c new file mode 100644 index 000000000000..1b8376150fce --- /dev/null +++ b/src/runtime/micro/device/arm/stm32f746xx/utvm_timer.c @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file utvm_timer.c + * \brief uTVM timer API definitions for STM32F746XX-series boards + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "utvm_runtime.h" + +// There are two implementations of cycle counters on the STM32F7X: SysTick and +// CYCCNT. SysTick is preferred, as it gives better error handling, but the +// counter is only 24 bits wide. If a larger timer is needed, use the CYCCNT +// implementation, which has a 32-bit counter. +#define USE_SYSTICK + +#ifdef USE_SYSTICK + +#define SYST_CSR (*((volatile uint32_t *) 0xE000E010)) +#define SYST_RVR (*((volatile uint32_t *) 0xE000E014)) +#define SYST_CVR (*((volatile uint32_t *) 0xE000E018)) +#define SYST_CALIB (*((volatile uint32_t *) 0xE000E01C)) + +#define SYST_CSR_ENABLE 0 +#define SYST_CSR_TICKINT 1 +#define SYST_CSR_CLKSOURCE 2 +#define SYST_COUNTFLAG 16 + +#define SYST_CALIB_NOREF 31 +#define SYST_CALIB_SKEW 30 + +uint32_t start_time = 0; +uint32_t stop_time = 0; + +int32_t UTVMTimerStart() { + SYST_CSR = (1 << SYST_CSR_ENABLE) | (1 << SYST_CSR_CLKSOURCE); + // wait until timer starts + while (SYST_CVR == 0) {} + start_time = SYST_CVR; + return 0; +} + +void UTVMTimerStop() { + SYST_CSR = 0; + stop_time = SYST_CVR; +} + +void UTVMTimerReset() { + SYST_CSR = 0; + // maximum reload value (24-bit) + SYST_RVR = (~((uint32_t) 0)) >> 8; + SYST_CVR = 0; +} + +uint32_t UTVMTimerRead() { + if (SYST_CSR & SYST_COUNTFLAG) { + TVMAPISetLastError("timer overflowed"); + return -1; + } else { + return start_time - stop_time; + } +} + +#else // !USE_SYSTICK + +#define DWT_CTRL (*((volatile uint32_t *) 0xE0001000)) +#define DWT_CYCCNT (*((volatile uint32_t *) 0xE0001004)) + +#define DWT_CTRL_NOCYCCNT 25 +#define DWT_CTRL_CYCCNTENA 0 + +uint32_t start_time = 0; +uint32_t stop_time = 0; + +void UTVMTimerReset() { + DWT_CYCCNT = 0; +} + +int32_t UTVMTimerStart() { + if (DWT_CTRL & DWT_CTRL_NOCYCCNT) { + TVMAPISetLastError("cycle counter not implemented on device"); + return -1; + } + start_time = DWT_CYCCNT; + DWT_CTRL |= (1 << DWT_CTRL_CYCCNTENA); +} + +void UTVMTimerStop() { + stop_time = DWT_CYCCNT; + DWT_CTRL &= ~(1 << DWT_CTRL_CYCCNTENA); +} + +int32_t UTVMTimerRead() { + if (stop_time > stop_time) { + return stop_time - start_time; + } else { + uint32_t largest = ~0; + return (largest - start_time) + stop_time; + } +} + +#endif // USE_SYSTICK + +#ifdef __cplusplus +} // TVM_EXTERN_C +#endif diff --git a/src/runtime/micro/device/host/utvm_init.c b/src/runtime/micro/device/host/utvm_init.c new file mode 100644 index 000000000000..4fb43c11d20e --- /dev/null +++ b/src/runtime/micro/device/host/utvm_init.c @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file utvm_init.c + * \brief uTVM init definition for the host emulated device + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "utvm_runtime.h" + +void UTVMInit() { + // no init required for the host + UTVMMain(); +} + +#ifdef __cplusplus +} // TVM_EXTERN_C +#endif diff --git a/src/runtime/micro/device/host/utvm_timer.c b/src/runtime/micro/device/host/utvm_timer.c new file mode 100644 index 000000000000..56a36ebae86d --- /dev/null +++ b/src/runtime/micro/device/host/utvm_timer.c @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file utvm_timer.c + * \brief uTVM timer API stubs for the host emulated device + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "utvm_runtime.h" + +// TODO(weberlo): use this? https://stackoverflow.com/questions/5141960/get-the-current-time-in-c + +int32_t UTVMTimerStart() { + return 0; +} + +void UTVMTimerStop() { } + +void UTVMTimerReset() { } + +uint32_t UTVMTimerRead() { + return 1; +} + +#ifdef __cplusplus +} // TVM_EXTERN_C +#endif diff --git a/src/runtime/micro/device/utvm_device_dylib_redirect.c b/src/runtime/micro/host_driven/utvm_device_dylib_redirect.c similarity index 100% rename from src/runtime/micro/device/utvm_device_dylib_redirect.c rename to src/runtime/micro/host_driven/utvm_device_dylib_redirect.c diff --git a/src/runtime/micro/device/utvm_runtime.c b/src/runtime/micro/host_driven/utvm_runtime.c similarity index 66% rename from src/runtime/micro/device/utvm_runtime.c rename to src/runtime/micro/host_driven/utvm_runtime.c index beb749a01f1e..a4de495a185c 100644 --- a/src/runtime/micro/device/utvm_runtime.c +++ b/src/runtime/micro/host_driven/utvm_runtime.c @@ -21,9 +21,9 @@ * \file utvm_runtime.cc * \brief uTVM runtime * - * All function calls go through `UTVMMain`, which reads from the current - * `UTVMTask` and calls the appropriate function with the arguments from the - * task. + * All function calls go through the externally defined `UTVMInit`, which + * performs device-specific setup, then calls `UTVMMain`. `UTVMMain` then + * calls the function in `utvm_task` with the arguments from the task. * * Additionally included in this file are definitions for some of the most * common functions used in the C runtime API. @@ -35,36 +35,58 @@ extern "C" { #include "utvm_runtime.h" // Task pointers must be patched before calling a function. -UTVMTask task; +UTVMTask utvm_task = { + .func = NULL, + .arg_values = NULL, + .arg_type_codes = NULL, + .num_args = 0, +}; + +size_t utvm_word_size = 0; // NOLINT(*) // These pointers are patched at load time to point to the workspace section. -char* utvm_workspace_begin = NULL; // NOLINT(*) -char* utvm_workspace_end = NULL; // NOLINT(*) -char* utvm_workspace_curr = NULL; // NOLINT(*) +char* utvm_workspace_start = NULL; // NOLINT(*) +char* utvm_workspace_end = NULL; // NOLINT(*) +char* utvm_workspace_curr = NULL; // NOLINT(*) // Keep track of how many active allocations there are on the workspace. size_t utvm_num_active_allocs = 0; const char* utvm_last_error = NULL; // NOLINT(*) -int32_t utvm_return_code = 0; // NOLINT(*) +int32_t utvm_return_code = 0; // NOLINT(*) -// We use a dummy function to signal execution is finished for device -// backends which require breakpoints. -void UTVMDone() { } +uint32_t utvm_task_time = 0; +// Gets called by UTVMInit, after device-specific initialization is finished. void UTVMMain() { - utvm_workspace_curr = utvm_workspace_begin; + utvm_workspace_curr = utvm_workspace_start; utvm_num_active_allocs = 0; utvm_last_error = NULL; // NOLINT(*) utvm_return_code = 0; - utvm_return_code = task.func((void*) task.arg_values, (void*) task.arg_type_codes, // NOLINT(*) - task.num_args); + utvm_task_time = 0; + UTVMTimerReset(); + int32_t err = UTVMTimerStart(); + if (err < 0) { + utvm_return_code = err; + UTVMDone(); + } + utvm_return_code = utvm_task.func( + (void*) utvm_task.arg_values, // NOLINT(*) + (void*) utvm_task.arg_type_codes, // NOLINT(*) + utvm_task.num_args); + UTVMTimerStop(); + utvm_task_time = UTVMTimerRead(); UTVMDone(); } +// We use a dummy function to signal execution is finished for device +// backends which require breakpoints. +void UTVMDone() { } + void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size, int dtype_code_hint, int dtype_bits_hint) { // Align up to 8 bytes. - utvm_workspace_curr += (8 - ((uintptr_t) utvm_workspace_curr % 8)) % 8; // NOLINT(*) + utvm_workspace_curr += + (utvm_word_size - ((uintptr_t) utvm_workspace_curr % utvm_word_size)) % utvm_word_size; // NOLINT(*) if (utvm_workspace_curr + size > utvm_workspace_end) { // Out of space in workspace. return NULL; @@ -81,11 +103,11 @@ int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { TVMAPISetLastError("free called with no active workspace allocations"); // Reset allocations and workspace (for future task executions). utvm_num_active_allocs = 0; - utvm_workspace_curr = utvm_workspace_begin; + utvm_workspace_curr = utvm_workspace_start; return -1; } else if (utvm_num_active_allocs == 0) { // No more allocations. Reset workspace. - utvm_workspace_curr = utvm_workspace_begin; + utvm_workspace_curr = utvm_workspace_start; return 0; } else { return 0; diff --git a/src/runtime/micro/device/utvm_runtime.h b/src/runtime/micro/host_driven/utvm_runtime.h similarity index 77% rename from src/runtime/micro/device/utvm_runtime.h rename to src/runtime/micro/host_driven/utvm_runtime.h index 3aa171223e01..c364ecf40792 100644 --- a/src/runtime/micro/device/utvm_runtime.h +++ b/src/runtime/micro/host_driven/utvm_runtime.h @@ -21,8 +21,8 @@ * \file utvm_runtime.h * \brief uTVM runtime headers */ -#ifndef TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ -#define TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ +#ifndef TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_ +#define TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_ #ifdef __cplusplus extern "C" { @@ -30,6 +30,7 @@ extern "C" { #include #include +#include /*! * \brief Task structure for uTVM @@ -45,8 +46,22 @@ typedef struct { int32_t num_args; } UTVMTask; +extern void UTVMInit(); + +extern void UTVMTimerReset(); + +extern int32_t UTVMTimerStart(); + +extern void UTVMTimerStop(); + +extern uint32_t UTVMTimerRead(); + +void UTVMMain(); + +void UTVMDone(); + #ifdef __cplusplus } // TVM_EXTERN_C #endif -#endif // TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ +#endif // TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_ diff --git a/src/runtime/micro/host_low_level_device.cc b/src/runtime/micro/host_low_level_device.cc index 4eea88aaaffd..a24994a2a0e5 100644 --- a/src/runtime/micro/host_low_level_device.cc +++ b/src/runtime/micro/host_low_level_device.cc @@ -31,6 +31,9 @@ namespace tvm { namespace runtime { +/*! \brief number of bytes in each page */ +constexpr int kPageSize = 4096; + /*! * \brief emulated low-level device on host machine */ @@ -40,40 +43,33 @@ class HostLowLevelDevice final : public LowLevelDevice { * \brief constructor to initialize on-host memory region to act as device * \param num_bytes size of the emulated on-device memory region */ - explicit HostLowLevelDevice(size_t num_bytes) : size_(num_bytes) { + explicit HostLowLevelDevice(size_t num_bytes, void** base_addr) : size_(num_bytes) { size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize; // TODO(weberlo): Set permissions per section (e.g., read-write perms for // the heap, execute perms for text, etc.). int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC; int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE; - base_addr_ = reinterpret_cast( - mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0)); + base_addr_ = mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0); + *base_addr = base_addr_; } /*! * \brief destructor to deallocate on-host device region */ virtual ~HostLowLevelDevice() { - munmap(reinterpret_cast(base_addr_), size_); - } - - void Read(DevBaseOffset offset, void* buf, size_t num_bytes) { - void* addr = ToDevPtr(offset).cast_to(); - std::memcpy(buf, addr, num_bytes); + munmap(base_addr_, size_); } - void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { - void* addr = ToDevPtr(offset).cast_to(); - std::memcpy(addr, buf, num_bytes); + void Read(DevPtr addr, void* buf, size_t num_bytes) { + std::memcpy(buf, addr.cast_to(), num_bytes); } - void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { - DevPtr func_addr = ToDevPtr(func_offset); - reinterpret_cast(func_addr.value())(); + void Write(DevPtr addr, const void* buf, size_t num_bytes) { + std::memcpy(addr.cast_to(), buf, num_bytes); } - std::uintptr_t base_addr() const final { - return base_addr_; + void Execute(DevPtr func_addr, DevPtr breakpoint_addr) { + reinterpret_cast(func_addr.value().val64)(); } const char* device_type() const final { @@ -82,14 +78,14 @@ class HostLowLevelDevice final : public LowLevelDevice { private: /*! \brief base address of the micro device memory region */ - std::uintptr_t base_addr_; + void* base_addr_; /*! \brief size of memory region */ size_t size_; }; -const std::shared_ptr HostLowLevelDeviceCreate(size_t num_bytes) { +const std::shared_ptr HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr) { std::shared_ptr lld = - std::make_shared(num_bytes); + std::make_shared(num_bytes, base_addr); return lld; } diff --git a/src/runtime/micro/low_level_device.h b/src/runtime/micro/low_level_device.h index 3cdfa77d30bc..3158e2fe20de 100644 --- a/src/runtime/micro/low_level_device.h +++ b/src/runtime/micro/low_level_device.h @@ -40,87 +40,52 @@ class LowLevelDevice { virtual ~LowLevelDevice() {} /*! - * \brief reads num_bytes from device memory at base_addr + offset into buffer - * \param offset on-device memory offset pointer to be read from + * \brief reads num_bytes from device memory at addr into buffer + * \param addr on-device memory address to read from * \param buffer on-host buffer to be read into - * \param num_bytes number of bytes to be read + * \param num_bytes number of bytes to read */ - virtual void Read(DevBaseOffset offset, + virtual void Read(DevPtr addr, void* buffer, size_t num_bytes) = 0; /*! - * \brief writes num_bytes from buffer to device memory at base_addr + offset - * \param offset on-device memory offset pointer to be written to - * \param buffer on-host buffer to be written - * \param num_bytes number of bytes to be written + * \brief writes num_bytes from buffer to device memory at addr + * \param addr on-device memory address to write into + * \param buffer host buffer to write from + * \param num_bytes number of bytes to write */ - virtual void Write(DevBaseOffset offset, + virtual void Write(DevPtr addr, const void* buffer, size_t num_bytes) = 0; /*! - * \brief starts execution of device at offset + * \brief starts execution of device at func_addr * \param func_addr offset of the init stub function - * \param breakpoint breakpoint at which to stop function execution + * \param breakpoint_addr address at which to stop function execution */ - virtual void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) = 0; - - // TODO(weberlo): Should we just give the device the *entire* memory layout - // decided by the session? - - /*! - * \brief sets the offset of the top of the stack section - * \param stack_top offset of the stack top - */ - virtual void SetStackTop(DevBaseOffset stack_top) { - LOG(FATAL) << "unimplemented"; - } - - /*! - * \brief convert from base offset to absolute address - * \param offset base offset - * \return absolute address - */ - DevPtr ToDevPtr(DevBaseOffset offset) { - return DevPtr(base_addr() + offset.value()); - } - - /*! - * \brief convert from absolute address to base offset - * \param ptr absolute address - * \return base offset - */ - DevBaseOffset ToDevOffset(DevPtr ptr) { - return DevBaseOffset(ptr.value() - base_addr()); - } + virtual void Execute(DevPtr func_addr, DevPtr breakpoint_addr) = 0; /*! * \brief getter function for low-level device type * \return string containing device type */ virtual const char* device_type() const = 0; - - protected: - /*! - * \brief getter function for base_addr - * \return the base address of the device memory region - */ - virtual std::uintptr_t base_addr() const = 0; }; /*! * \brief create a host low-level device * \param num_bytes size of the memory region + * \param base_addr pointer to write the host device's resulting base address into */ -const std::shared_ptr HostLowLevelDeviceCreate(size_t num_bytes); +const std::shared_ptr HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr); /*! * \brief connect to OpenOCD and create an OpenOCD low-level device + * \param addr address of the OpenOCD server to connect to * \param port port of the OpenOCD server to connect to */ -const std::shared_ptr OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr, - const std::string& addr, +const std::shared_ptr OpenOCDLowLevelDeviceCreate(const std::string& addr, int port); } // namespace runtime diff --git a/src/runtime/micro/micro_common.cc b/src/runtime/micro/micro_common.cc index 72eb40961fbc..632b6048b182 100644 --- a/src/runtime/micro/micro_common.cc +++ b/src/runtime/micro/micro_common.cc @@ -35,30 +35,6 @@ namespace tvm { namespace runtime { -size_t GetDefaultSectionSize(SectionKind kind) { - switch (kind) { - case SectionKind::kText: - return 0xF000; - case SectionKind::kRodata: - return 0xF000; - case SectionKind::kData: - return 0xF00; - case SectionKind::kBss: - return 0xF00; - case SectionKind::kArgs: - return 0xF0000; - case SectionKind::kStack: - return 0xF000; - case SectionKind::kHeap: - return 0xF00000; - case SectionKind::kWorkspace: - return 0xF0000; - default: - LOG(FATAL) << "invalid section " << static_cast(kind); - return 0; - } -} - const char* SectionToString(SectionKind section) { switch (section) { case SectionKind::kText: return "text"; @@ -66,37 +42,32 @@ const char* SectionToString(SectionKind section) { case SectionKind::kData: return "data"; case SectionKind::kBss: return "bss"; case SectionKind::kArgs: return "args"; - case SectionKind::kStack: return "stack"; case SectionKind::kHeap: return "heap"; case SectionKind::kWorkspace: return "workspace"; + case SectionKind::kStack: return "stack"; default: return ""; } } -static std::string AddrToString(void* addr) { - std::stringstream stream; - if (addr != nullptr) - stream << addr; - else - stream << "0x0"; - std::string string_addr = stream.str(); - return string_addr; -} - -std::string RelocateBinarySections(const std::string& binary_path, - DevPtr text, - DevPtr rodata, - DevPtr data, - DevPtr bss, - const std::string& toolchain_prefix) { +std::string RelocateBinarySections( + const std::string& binary_path, + size_t word_size, + DevPtr text_start, + DevPtr rodata_start, + DevPtr data_start, + DevPtr bss_start, + DevPtr stack_end, + const std::string& toolchain_prefix) { const auto* f = Registry::Get("tvm_callback_relocate_binary"); CHECK(f != nullptr) << "Require tvm_callback_relocate_binary to exist in registry"; std::string relocated_bin = (*f)(binary_path, - AddrToString(text.cast_to()), - AddrToString(rodata.cast_to()), - AddrToString(data.cast_to()), - AddrToString(bss.cast_to()), + word_size, + text_start.cast_to(), + rodata_start.cast_to(), + data_start.cast_to(), + bss_start.cast_to(), + stack_end.cast_to(), toolchain_prefix); return relocated_bin; } diff --git a/src/runtime/micro/micro_common.h b/src/runtime/micro/micro_common.h index 10ae9d50b8bc..4a0189b3e89e 100644 --- a/src/runtime/micro/micro_common.h +++ b/src/runtime/micro/micro_common.h @@ -46,122 +46,79 @@ enum class SectionKind : size_t { kData, kBss, kArgs, - kStack, kHeap, kWorkspace, + kStack, kNumKinds, }; -/*! \brief default size alignment */ -constexpr int kDefaultSizeAlignment = 8; +/*! \brief union for storing values on varying target word sizes */ +union TargetVal { + /*! \brief 32-bit pointer */ + uint32_t val32; + /*! \brief 64-bit pointer */ + uint64_t val64; +}; -/*! \brief Base class for interfacing with device locations (pointers/offsets) */ -class DeviceLocation { +/*! \brief absolute device address */ +class DevPtr { public: - /*! \brief construct a location with value `value` */ - explicit DeviceLocation(std::uintptr_t value) : value_(value) {} + /*! \brief construct a device address with value `value` */ + explicit DevPtr(std::uintptr_t value) : value_(TargetVal { .val64 = value }) {} /*! \brief default constructor */ - DeviceLocation() : value_(0) {} + DevPtr() : value_(TargetVal { .val64 = 0 }) {} - /*! \brief construct a null location */ - explicit DeviceLocation(std::nullptr_t value) : value_(0) {} + /*! \brief construct a null address */ + explicit DevPtr(std::nullptr_t value) : value_(TargetVal { .val64 = 0 }) {} /*! \brief destructor */ - virtual ~DeviceLocation() {} + ~DevPtr() {} /*! - * \brief get value of location - * \return value of location + * \brief get value of pointer + * \return value of pointer */ - std::uintptr_t value() const { return value_; } + TargetVal value() const { return value_; } /*! * \brief cast location to type `T` * \return casted result */ template - T cast_to() const { return reinterpret_cast(value_); } + T cast_to() const { return reinterpret_cast(value_.val64); } /*! \brief check if location is null */ - bool operator==(std::nullptr_t) const { return value_ == 0; } + bool operator==(std::nullptr_t) const { return value_.val64 == 0; } /*! \brief check if location is not null */ - bool operator!=(std::nullptr_t) const { return value_ != 0; } - - protected: - /*! \brief raw value storing the location */ - std::uintptr_t value_; -}; - -/*! \brief absolute device address */ -class DevPtr : public DeviceLocation { - public: - /*! \brief construct an absolute address with value `value` */ - explicit DevPtr(std::uintptr_t val) : DeviceLocation(val) {} - - /*! \brief default constructor */ - DevPtr() : DeviceLocation() {} - - /*! \brief construct a null absolute address */ - explicit DevPtr(std::nullptr_t val) : DeviceLocation(val) {} + bool operator!=(std::nullptr_t) const { return value_.val64 != 0; } /*! \brief add an integer to this absolute address to get a larger absolute address */ DevPtr operator+(size_t n) const { - return DevPtr(value_ + n); + return DevPtr(value_.val64 + n); } /*! \brief mutably add an integer to this absolute address */ DevPtr& operator+=(size_t n) { - value_ += n; + value_.val64 += n; return *this; } /*! \brief subtract an integer from this absolute address to get a smaller absolute address */ DevPtr operator-(size_t n) const { - return DevPtr(value_ - n); + return DevPtr(value_.val64 - n); } /*! \brief mutably subtract an integer from this absolute address */ DevPtr& operator-=(size_t n) { - value_ -= n; + value_.val64 -= n; return *this; } -}; - -/*! \brief offset from device base address */ -class DevBaseOffset : public DeviceLocation { - public: - /*! \brief construct a base offset with value `value` */ - explicit DevBaseOffset(std::uintptr_t value) : DeviceLocation(value) {} - - /*! \brief default constructor */ - DevBaseOffset() : DeviceLocation() {} - - /*! \brief construct a null base offset */ - explicit DevBaseOffset(std::nullptr_t value) : DeviceLocation(value) {} - /*! \brief add an integer to this base offset to get a larger base offset */ - DevBaseOffset operator+(size_t n) const { - return DevBaseOffset(value_ + n); - } - - /*! \brief mutably add an integer to this base offset */ - DevBaseOffset& operator+=(size_t n) { - value_ += n; - return *this; - } - - /*! \brief subtract an integer from this base offset to get a smaller base offset */ - DevBaseOffset operator-(size_t n) const { - return DevBaseOffset(value_ - n); - } - - /*! \brief mutably subtract an integer from this base offset */ - DevBaseOffset& operator-=(size_t n) { - value_ -= n; - return *this; - } + private: + /*! \brief raw value storing the pointer */ + TargetVal value_; }; /*! @@ -212,6 +169,10 @@ class SymbolMap { return result->second; } + bool HasSymbol(const std::string& name) const { + return map_.find(name) != map_.end(); + } + private: /*! \brief backing map */ std::unordered_map map_; @@ -220,7 +181,7 @@ class SymbolMap { /*! \brief struct containing start and size of a device memory region */ struct DevMemRegion { /*! \brief section start offset */ - DevBaseOffset start; + DevPtr start; /*! \brief size of section */ size_t size; }; @@ -239,16 +200,13 @@ struct BinaryInfo { SymbolMap symbol_map; }; -// TODO(weberlo): should this be here? -/*! \brief number of bytes in each page */ -constexpr int kPageSize = 4096; - -const DevBaseOffset kDeviceStart = DevBaseOffset(64); - -/*! - * \brief return default size of given section kind in bytes - */ -size_t GetDefaultSectionSize(SectionKind kind); +struct BinaryContents { + BinaryInfo binary_info; + std::string text_contents; + std::string rodata_contents; + std::string data_contents; + std::string bss_contents; +}; /*! * \brief upper-aligns value according to specified alignment @@ -270,19 +228,24 @@ const char* SectionToString(SectionKind section); /*! * \brief links binary by repositioning section addresses * \param binary_name input binary filename - * \param text new text section address - * \param rodata new rodata section address - * \param data new data section address - * \param bss new bss section address + * \param word_size word size on the target machine + * \param text_start text section address + * \param rodata_start rodata section address + * \param data_start data section address + * \param bss_start bss section address + * \param stack_end stack section end address * \param toolchain_prefix prefix of compiler toolchain to use * \return relocated binary file contents */ -std::string RelocateBinarySections(const std::string& binary_name, - DevPtr text, - DevPtr rodata, - DevPtr data, - DevPtr bss, - const std::string& toolchain_prefix); +std::string RelocateBinarySections( + const std::string& binary_path, + size_t word_size, + DevPtr text_start, + DevPtr rodata_start, + DevPtr data_start, + DevPtr bss_start, + DevPtr stack_end, + const std::string& toolchain_prefix); /*! * \brief reads section from binary @@ -306,7 +269,7 @@ std::string ReadSection(const std::string& binary, size_t GetSectionSize(const std::string& binary_name, SectionKind section, const std::string& toolchain_prefix, - size_t align = kDefaultSizeAlignment); + size_t align); } // namespace runtime } // namespace tvm diff --git a/src/runtime/micro/micro_device_api.cc b/src/runtime/micro/micro_device_api.cc index d1df67f00d9b..de2e03155a04 100644 --- a/src/runtime/micro/micro_device_api.cc +++ b/src/runtime/micro/micro_device_api.cc @@ -61,7 +61,7 @@ class MicroDeviceAPI final : public DeviceAPI { void FreeDataSpace(TVMContext ctx, void* ptr) final { MicroDevSpace* dev_space = static_cast(ptr); dev_space->session->FreeInSection( - SectionKind::kHeap, DevBaseOffset(reinterpret_cast(dev_space->data))); + SectionKind::kHeap, DevPtr(reinterpret_cast(dev_space->data))); delete dev_space; } @@ -89,12 +89,12 @@ class MicroDeviceAPI final : public DeviceAPI { ObjectPtr& session = from_space->session; const std::shared_ptr& lld = session->low_level_device(); - DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); - DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); + DevPtr from_dev_addr = GetDevLoc(from_space, from_offset); + DevPtr to_dev_addr = GetDevLoc(to_space, to_offset); std::vector buffer(size); - lld->Read(from_dev_offset, static_cast(buffer.data()), size); - lld->Write(to_dev_offset, static_cast(buffer.data()), size); + lld->Read(from_dev_addr, static_cast(buffer.data()), size); + lld->Write(to_dev_addr, static_cast(buffer.data()), size); } else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) { // Reading from the device. @@ -102,9 +102,9 @@ class MicroDeviceAPI final : public DeviceAPI { ObjectPtr& session = from_space->session; const std::shared_ptr& lld = session->low_level_device(); - DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); + DevPtr from_dev_addr = GetDevLoc(from_space, from_offset); void* to_host_ptr = GetHostLoc(to, to_offset); - lld->Read(from_dev_offset, to_host_ptr, size); + lld->Read(from_dev_addr, to_host_ptr, size); } else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) { // Writing to the device. @@ -113,8 +113,8 @@ class MicroDeviceAPI final : public DeviceAPI { const std::shared_ptr& lld = session->low_level_device(); void* from_host_ptr = GetHostLoc(from, from_offset); - DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); - lld->Write(to_dev_offset, from_host_ptr, size); + DevPtr to_dev_addr = GetDevLoc(to_space, to_offset); + lld->Write(to_dev_addr, from_host_ptr, size); } else { LOG(FATAL) << "Expect copy from/to micro device or between micro device\n"; } @@ -138,7 +138,7 @@ class MicroDeviceAPI final : public DeviceAPI { MicroDevSpace* dev_space = static_cast(data); ObjectPtr& session = dev_space->session; session->FreeInSection(SectionKind::kWorkspace, - DevBaseOffset(reinterpret_cast(dev_space->data))); + DevPtr(reinterpret_cast(dev_space->data))); delete dev_space; } @@ -152,10 +152,8 @@ class MicroDeviceAPI final : public DeviceAPI { } private: - DevBaseOffset GetDevLoc(MicroDevSpace* dev_space, size_t offset) { - DevBaseOffset dev_offset = - DevBaseOffset(reinterpret_cast(dev_space->data) + offset); - return dev_offset; + DevPtr GetDevLoc(MicroDevSpace* dev_space, size_t offset) { + return DevPtr(reinterpret_cast(dev_space->data) + offset); } void* GetHostLoc(const void* ptr, size_t offset) { diff --git a/src/runtime/micro/micro_module.cc b/src/runtime/micro/micro_module.cc index e66c45b3f063..4a41d4bca28f 100644 --- a/src/runtime/micro/micro_module.cc +++ b/src/runtime/micro/micro_module.cc @@ -55,62 +55,48 @@ class MicroModuleNode final : public ModuleNode { */ void InitMicroModule(const std::string& binary_path) { session_ = MicroSession::Current(); - binary_path_ = binary_path; - binary_info_ = session_->LoadBinary(binary_path_); - } - - /*! - * \brief runs selected function on the micro device - * \param func_name name of the function to be run - * \param func_offset offset of the function to be run - * \param args type-erased arguments passed to the function - */ - void RunFunction(const std::string& func_name, DevBaseOffset func_offset, const TVMArgs& args) { - session_->PushToExecQueue(func_offset, args); + symbol_map_ = session_->LoadBinary(binary_path, true).symbol_map; } private: - /*! \brief module binary info */ - BinaryInfo binary_info_; - /*! \brief path to module binary */ - std::string binary_path_; + SymbolMap symbol_map_; /*! \brief global session pointer */ ObjectPtr session_; }; class MicroWrappedFunc { public: - MicroWrappedFunc(MicroModuleNode* m, - ObjectPtr session, - const std::string& func_name, - DevBaseOffset func_offset) { - m_ = m; + MicroWrappedFunc(ObjectPtr session, + DevPtr func_ptr) { session_ = session; - func_name_ = func_name; - func_offset_ = func_offset; + func_ptr_ = func_ptr; } void operator()(TVMArgs args, TVMRetValue* rv) const { - m_->RunFunction(func_name_, func_offset_, args); + *rv = session_->PushToExecQueue(func_ptr_, args); } private: - /*! \brief internal module */ - MicroModuleNode* m_; /*! \brief reference to the session for this function (to keep the session alive) */ ObjectPtr session_; - /*! \brief name of the function */ - std::string func_name_; /*! \brief offset of the function to be called */ - DevBaseOffset func_offset_; + DevPtr func_ptr_; }; PackedFunc MicroModuleNode::GetFunction( const std::string& name, const ObjectPtr& sptr_to_self) { - DevBaseOffset func_offset = - session_->low_level_device()->ToDevOffset(binary_info_.symbol_map[name]); - MicroWrappedFunc f(this, session_, name, func_offset); + DevPtr func_ptr; + if (name == tvm::runtime::symbol::tvm_module_main) { + if (symbol_map_.HasSymbol(tvm::runtime::symbol::tvm_module_main)) { + func_ptr = symbol_map_[tvm::runtime::symbol::tvm_module_main]; + } else { + func_ptr = symbol_map_["default_function"]; + } + } else { + func_ptr = symbol_map_[name]; + } + MicroWrappedFunc f(session_, func_ptr); return PackedFunc(f); } diff --git a/src/runtime/micro/micro_section_allocator.h b/src/runtime/micro/micro_section_allocator.h index c950075c179d..5c75f92737ab 100644 --- a/src/runtime/micro/micro_section_allocator.h +++ b/src/runtime/micro/micro_section_allocator.h @@ -38,11 +38,15 @@ class MicroSectionAllocator { * \brief constructor that specifies section boundaries * \param region location and size of the section on the device */ - explicit MicroSectionAllocator(DevMemRegion region) - : start_offset_(region.start), + explicit MicroSectionAllocator(DevMemRegion region, size_t word_size) + : start_addr_(region.start), size_(0), - capacity_(region.size) { - CHECK_EQ(start_offset_.value() % 8, 0) << "micro section not aligned to 8 bytes"; + capacity_(region.size), + word_size_(word_size) { + CHECK_EQ(start_addr_.value().val64 % word_size, 0) + << "micro section start not aligned to " << word_size << " bytes"; + CHECK_EQ(capacity_ % word_size, 0) + << "micro section end not aligned to " << word_size << " bytes"; } /*! @@ -55,15 +59,15 @@ class MicroSectionAllocator { * \param size size of allocated memory in bytes * \return pointer to allocated memory region in section, nullptr if out of space */ - DevBaseOffset Allocate(size_t size) { - size_ = UpperAlignValue(size_, 8); + DevPtr Allocate(size_t size) { + size_ = UpperAlignValue(size_, word_size_); CHECK(size_ + size < capacity_) << "cannot alloc " << size << " bytes in section with start_addr " << - start_offset_.value(); - DevBaseOffset alloc_ptr = start_offset_ + size_; + start_addr_.cast_to(); + DevPtr alloc_addr = start_addr_ + size_; size_ += size; - alloc_map_[alloc_ptr.value()] = size; - return alloc_ptr; + alloc_map_[alloc_addr.value().val64] = size; + return alloc_addr; } /*! @@ -71,10 +75,10 @@ class MicroSectionAllocator { * \param offs offset to allocated memory * \note simple allocator scheme, more complex versions will be implemented later */ - void Free(DevBaseOffset offs) { - std::uintptr_t ptr = offs.value(); - CHECK(alloc_map_.find(ptr) != alloc_map_.end()) << "freed pointer was never allocated"; - alloc_map_.erase(ptr); + void Free(DevPtr addr) { + CHECK(alloc_map_.find(addr.value().val64) != alloc_map_.end()) + << "freed pointer was never allocated"; + alloc_map_.erase(addr.value().val64); if (alloc_map_.empty()) { size_ = 0; } @@ -83,17 +87,17 @@ class MicroSectionAllocator { /*! * \brief start offset of the memory region managed by this allocator */ - DevBaseOffset start_offset() const { return start_offset_; } + DevPtr start_addr() const { return start_addr_; } /*! - * \brief current end offset of the space being used in this memory region + * \brief current end addr of the space being used in this memory region */ - DevBaseOffset curr_end_offset() const { return start_offset_ + size_; } + DevPtr curr_end_addr() const { return start_addr_ + size_; } /*! - * \brief end offset of the memory region managed by this allocator + * \brief end addr of the memory region managed by this allocator */ - DevBaseOffset max_end_offset() const { return start_offset_ + capacity_; } + DevPtr max_addr() const { return start_addr_ + capacity_; } /*! * \brief size of the section @@ -107,13 +111,15 @@ class MicroSectionAllocator { private: /*! \brief start address of the section */ - DevBaseOffset start_offset_; + DevPtr start_addr_; /*! \brief current size of the section */ size_t size_; /*! \brief total storage capacity of the section */ size_t capacity_; + /*! \brief number of bytes in a word on the target device */ + size_t word_size_; /*! \brief allocation map for allocation sizes */ - std::unordered_map alloc_map_; + std::unordered_map alloc_map_; }; } // namespace runtime diff --git a/src/runtime/micro/micro_session.cc b/src/runtime/micro/micro_session.cc index febf726184d9..06ffa73e79df 100644 --- a/src/runtime/micro/micro_session.cc +++ b/src/runtime/micro/micro_session.cc @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -56,17 +57,149 @@ void MicroSession::ExitWithScope() { entry->session_stack.pop(); } -MicroSession::MicroSession() { - DevBaseOffset curr_start_offset = kDeviceStart; - for (size_t i = 0; i < static_cast(SectionKind::kNumKinds); i++) { - size_t section_size = GetDefaultSectionSize(static_cast(i)); - section_allocators_[i] = std::make_shared(DevMemRegion { - .start = curr_start_offset, - .size = section_size, - }); - curr_start_offset += section_size; +MicroSession::MicroSession( + const std::string& comms_method, + const std::string& binary_path, + const std::string& toolchain_prefix, + uint64_t text_start, + size_t text_size, + uint64_t rodata_start, + size_t rodata_size, + uint64_t data_start, + size_t data_size, + uint64_t bss_start, + size_t bss_size, + uint64_t args_start, + size_t args_size, + uint64_t heap_start, + size_t heap_size, + uint64_t workspace_start, + size_t workspace_size, + uint64_t stack_start, + size_t stack_size, + size_t word_size, + bool thumb_mode, + const std::string& server_addr, + int port) + : toolchain_prefix_(toolchain_prefix) + , word_size_(word_size) + , thumb_mode_(thumb_mode) { + CHECK(word_size_ == 4 || word_size_ == 8) << "unsupported word size " << word_size_; + if (comms_method == "host") { + // TODO(weberlo): move checks to python + CHECK( + text_start == 0 && + rodata_start == 0 && + data_start == 0 && + bss_start == 0 && + args_start == 0 && + heap_start == 0 && + workspace_start == 0 && + stack_start == 0) << "unable to specify section addresses for host device"; + size_t memory_size = + text_size + rodata_size + data_size + bss_size + + args_size + heap_size + workspace_size + stack_size; + void* base_addr; + low_level_device_ = HostLowLevelDeviceCreate(memory_size, &base_addr); + CHECK_EQ(reinterpret_cast(base_addr) % word_size_, 0) + << "base address not aligned to " << word_size_ << " bytes"; + DevPtr curr_addr = DevPtr(reinterpret_cast(base_addr)); + + section_allocators_[0] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = text_size, + }, word_size_); + curr_addr += text_size; + section_allocators_[1] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = rodata_size, + }, word_size_); + curr_addr += rodata_size; + section_allocators_[2] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = data_size, + }, word_size_); + curr_addr += data_size; + section_allocators_[3] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = bss_size, + }, word_size_); + curr_addr += bss_size; + section_allocators_[4] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = args_size, + }, word_size_); + curr_addr += args_size; + section_allocators_[5] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = heap_size, + }, word_size_); + curr_addr += heap_size; + section_allocators_[6] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = workspace_size, + }, word_size_); + curr_addr += workspace_size; + section_allocators_[7] = std::make_shared(DevMemRegion { + .start = curr_addr, + .size = stack_size, + }, word_size_); + curr_addr += stack_size; + } else if (comms_method == "openocd") { + low_level_device_ = OpenOCDLowLevelDeviceCreate(server_addr, port); + section_allocators_[0] = std::make_shared(DevMemRegion { + .start = DevPtr(text_start), + .size = text_size, + }, word_size_); + section_allocators_[1] = std::make_shared(DevMemRegion { + .start = DevPtr(rodata_start), + .size = rodata_size, + }, word_size_); + section_allocators_[2] = std::make_shared(DevMemRegion { + .start = DevPtr(data_start), + .size = data_size, + }, word_size_); + section_allocators_[3] = std::make_shared(DevMemRegion { + .start = DevPtr(bss_start), + .size = bss_size, + }, word_size_); + section_allocators_[4] = std::make_shared(DevMemRegion { + .start = DevPtr(args_start), + .size = args_size, + }, word_size_); + section_allocators_[5] = std::make_shared(DevMemRegion { + .start = DevPtr(heap_start), + .size = heap_size, + }, word_size_); + section_allocators_[6] = std::make_shared(DevMemRegion { + .start = DevPtr(workspace_start), + .size = workspace_size, + }, word_size_); + section_allocators_[7] = std::make_shared(DevMemRegion { + .start = DevPtr(stack_start), + .size = stack_size, + }, word_size_); + } else { + LOG(FATAL) << "unsupported micro low-level device"; + } + + runtime_symbol_map_ = LoadBinary(binary_path, false).symbol_map; + + // Patch pointers to define the bounds of the workspace section and the word + // size (for allocation alignment). + std::shared_ptr ws_allocator = GetAllocator(SectionKind::kWorkspace); + TargetVal ws_start = ws_allocator->start_addr().value(); + TargetVal ws_end = ws_allocator->max_addr().value(); + TargetVal target_word_size { .val64 = word_size_ }; + if (word_size_ == 4) { + DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_start", ws_start.val32); + DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_end", ws_end.val32); + DevSymbolWrite(runtime_symbol_map_, "utvm_word_size", target_word_size.val32); + } else if (word_size_ == 8) { + DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_start", ws_start.val64); + DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_end", ws_end.val64); + DevSymbolWrite(runtime_symbol_map_, "utvm_word_size", target_word_size.val64); } - memory_size_ = curr_start_offset.cast_to(); } MicroSession::~MicroSession() { @@ -76,79 +209,118 @@ MicroSession::~MicroSession() { low_level_device_ = nullptr; } -void MicroSession::CreateSession(const std::string& device_type, - const std::string& binary_path, - const std::string& toolchain_prefix, - std::uintptr_t base_addr, - const std::string& server_addr, - int port) { - // TODO(weberlo): make device type enum - toolchain_prefix_ = toolchain_prefix; - if (device_type == "host") { - low_level_device_ = HostLowLevelDeviceCreate(memory_size_); - } else if (device_type == "openocd") { - // TODO(weberlo): We need a better way of configuring devices. - low_level_device_ = OpenOCDLowLevelDeviceCreate(base_addr, server_addr, port); - } else { - LOG(FATAL) << "unsupported micro low-level device"; +double MicroSession::PushToExecQueue(DevPtr func_ptr, const TVMArgs& args) { + if (thumb_mode_) { + func_ptr += 1; } - SetRuntimeBinaryPath(binary_path); - CHECK(!runtime_binary_path_.empty()) << "uTVM runtime not initialized"; - runtime_bin_info_ = LoadBinary(runtime_binary_path_, /* patch_dylib_pointers */ false); - utvm_main_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMMain"]); - utvm_done_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMDone"]); - - if (device_type == "openocd") { - // Set OpenOCD device's stack pointer. - auto stack_section = GetAllocator(SectionKind::kStack); - low_level_device_->SetStackTop(stack_section->max_end_offset()); - } - - // Patch workspace pointers to the start of the workspace section. - DevBaseOffset workspace_start_offset = GetAllocator(SectionKind::kWorkspace)->start_offset(); - DevBaseOffset workspace_end_offset = GetAllocator(SectionKind::kWorkspace)->max_end_offset(); - void* workspace_start_addr = - low_level_device_->ToDevPtr(workspace_start_offset).cast_to(); - void* workspace_end_addr = - low_level_device_->ToDevPtr(workspace_end_offset).cast_to(); - DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_begin", workspace_start_addr); - DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_end", workspace_end_addr); -} - -void MicroSession::PushToExecQueue(DevBaseOffset func, const TVMArgs& args) { - int32_t (*func_dev_addr)(void*, void*, int32_t) = - reinterpret_cast( - low_level_device()->ToDevPtr(func).value()); - // Create an allocator stream for the memory region after the most recent // allocation in the args section. - DevPtr args_addr = - low_level_device()->ToDevPtr(GetAllocator(SectionKind::kArgs)->curr_end_offset()); - TargetDataLayoutEncoder encoder(args_addr); + DevPtr args_addr = GetAllocator(SectionKind::kArgs)->curr_end_addr(); + TargetDataLayoutEncoder encoder(args_addr, word_size_); std::tuple arg_field_addrs = EncoderAppend(&encoder, args); + // Flush `stream` to device memory. - DevBaseOffset stream_dev_offset = + DevPtr stream_dev_addr = GetAllocator(SectionKind::kArgs)->Allocate(encoder.buf_size()); - low_level_device()->Write(stream_dev_offset, + low_level_device()->Write(stream_dev_addr, reinterpret_cast(encoder.data()), encoder.buf_size()); - UTVMTask task = { - .func = func_dev_addr, - .arg_values = std::get<0>(arg_field_addrs).cast_to(), - .arg_type_codes = std::get<1>(arg_field_addrs).cast_to(), + TargetVal arg_values_dev_addr = std::get<0>(arg_field_addrs).value(); + TargetVal arg_type_codes_dev_addr = std::get<1>(arg_field_addrs).value(); + if (word_size_ == 4) { + UTVMTask32 task = { + .func = func_ptr.value().val32, + .arg_values = arg_values_dev_addr.val32, + .arg_type_codes = arg_type_codes_dev_addr.val32, .num_args = args.num_args, - }; - // Write the task. - DevSymbolWrite(runtime_symbol_map(), "task", task); + }; + // Write the task. + DevSymbolWrite(runtime_symbol_map_, "utvm_task", task); + } else if (word_size_ == 8) { + UTVMTask64 task = { + .func = func_ptr.value().val64, + .arg_values = arg_values_dev_addr.val64, + .arg_type_codes = arg_type_codes_dev_addr.val64, + .num_args = args.num_args, + }; + // Write the task. + DevSymbolWrite(runtime_symbol_map_, "utvm_task", task); + } - low_level_device()->Execute(utvm_main_symbol_, utvm_done_symbol_); + DevPtr utvm_init_addr = runtime_symbol_map_["UTVMInit"]; + DevPtr utvm_done_addr = runtime_symbol_map_["UTVMDone"]; + if (thumb_mode_) { + utvm_init_addr += 1; + } + + low_level_device()->Execute(utvm_init_addr, utvm_done_addr); // Check if there was an error during execution. If so, log it. CheckDeviceError(); + uint32_t task_time = DevSymbolRead(runtime_symbol_map_, "utvm_task_time"); + GetAllocator(SectionKind::kArgs)->Free(stream_dev_addr); + return static_cast(task_time); +} + +BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) { + DevMemRegion text_section; + DevMemRegion rodata_section; + DevMemRegion data_section; + DevMemRegion bss_section; + + text_section.size = GetSectionSize( + binary_path, SectionKind::kText, toolchain_prefix_, word_size_); + rodata_section.size = GetSectionSize( + binary_path, SectionKind::kRodata, toolchain_prefix_, word_size_); + data_section.size = GetSectionSize( + binary_path, SectionKind::kData, toolchain_prefix_, word_size_); + bss_section.size = GetSectionSize( + binary_path, SectionKind::kBss, toolchain_prefix_, word_size_); + + text_section.start = AllocateInSection(SectionKind::kText, text_section.size); + rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size); + data_section.start = AllocateInSection(SectionKind::kData, data_section.size); + bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size); + CHECK(text_section.start != nullptr && rodata_section.start != nullptr && + data_section.start != nullptr && bss_section.start != nullptr) + << "not enough space to load module on device"; + + std::string relocated_bin = RelocateBinarySections( + binary_path, + word_size_, + text_section.start, + rodata_section.start, + data_section.start, + bss_section.start, + GetAllocator(SectionKind::kStack)->max_addr(), + toolchain_prefix_); + std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_); + std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_); + std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_); + std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_); + + low_level_device_->Write(text_section.start, &text_contents[0], text_section.size); + low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size); + low_level_device_->Write(data_section.start, &data_contents[0], data_section.size); + low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size); + SymbolMap symbol_map {relocated_bin, toolchain_prefix_}; + + if (patch_dylib_pointers) { + // Patch device lib pointers. + PatchImplHole(symbol_map, "TVMBackendAllocWorkspace"); + PatchImplHole(symbol_map, "TVMBackendFreeWorkspace"); + PatchImplHole(symbol_map, "TVMAPISetLastError"); + } - GetAllocator(SectionKind::kArgs)->Free(stream_dev_offset); + return BinaryInfo { + .text_section = text_section, + .rodata_section = rodata_section, + .data_section = data_section, + .bss_section = bss_section, + .symbol_map = symbol_map, + }; } std::tuple MicroSession::EncoderAppend( @@ -171,7 +343,12 @@ std::tuple MicroSession::EncoderAppend( // Mutate the array to unwrap the `data` field. base_arr_handle->data = reinterpret_cast(old_data)->data; // Now, encode the unwrapped version. - void* arr_ptr = EncoderAppend(encoder, *base_arr_handle).cast_to(); + void* arr_ptr = nullptr; + if (word_size_ == 4) { + arr_ptr = EncoderAppend(encoder, *base_arr_handle).cast_to(); + } else if (word_size_ == 8) { + arr_ptr = EncoderAppend(encoder, *base_arr_handle).cast_to(); + } // And restore the original wrapped version. base_arr_handle->data = old_data; @@ -190,54 +367,53 @@ std::tuple MicroSession::EncoderAppend( } } type_codes_slot.WriteArray(type_codes, num_args); - return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr()); } +template DevPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr) { - auto tvm_arr_slot = encoder->Alloc(); + auto tvm_arr_slot = encoder->Alloc(); auto shape_slot = encoder->Alloc(arr.ndim); // `shape` and `strides` are stored on the host, so we need to write them to // the device first. The `data` field is already allocated on the device and // is a device pointer, so we don't need to write it. shape_slot.WriteArray(arr.shape, arr.ndim); - DevPtr shape_addr = shape_slot.start_addr(); - DevPtr strides_addr = DevPtr(nullptr); + DevPtr shape_dev_addr = shape_slot.start_addr(); + DevPtr strides_dev_addr = DevPtr(nullptr); if (arr.strides != nullptr) { auto stride_slot = encoder->Alloc(arr.ndim); stride_slot.WriteArray(arr.strides, arr.ndim); - strides_addr = stride_slot.start_addr(); + strides_dev_addr = stride_slot.start_addr(); } - // Copy `arr`, update the copy's pointers to be device pointers, then - // write the copy to `tvm_arr_slot`. - TVMArray dev_arr = arr; - // Update the device type to look like a host, because codegen generates - // checks that it is a host array. + T dev_arr( + TargetVal { .val64 = reinterpret_cast(arr.data) }, + arr.ctx, + arr.ndim, + arr.dtype, + shape_dev_addr.value(), + strides_dev_addr.value(), + TargetVal { .val64 = arr.byte_offset }); CHECK(dev_arr.ctx.device_type == static_cast(kDLMicroDev)) << "attempt to write TVMArray with non-micro device type"; + // Update the device type to CPU, because from the microcontroller's + // perspective, it is. dev_arr.ctx.device_type = DLDeviceType::kDLCPU; - // Add the base address of the device to the array's data's device offset to - // get a device address. - DevBaseOffset arr_offset(reinterpret_cast(arr.data)); - dev_arr.data = low_level_device()->ToDevPtr(arr_offset).cast_to(); - dev_arr.shape = shape_addr.cast_to(); - dev_arr.strides = strides_addr.cast_to(); tvm_arr_slot.WriteValue(dev_arr); return tvm_arr_slot.start_addr(); } void MicroSession::CheckDeviceError() { - int32_t return_code = DevSymbolRead(runtime_symbol_map(), "utvm_return_code"); + int32_t return_code = DevSymbolRead(runtime_symbol_map_, "utvm_return_code"); if (return_code) { std::uintptr_t last_error = - DevSymbolRead(runtime_symbol_map(), "utvm_last_error"); + DevSymbolRead(runtime_symbol_map_, "utvm_last_error"); std::string last_error_str; if (last_error) { - DevBaseOffset last_err_offset = low_level_device()->ToDevOffset(DevPtr(last_error)); - last_error_str = ReadString(last_err_offset); + DevPtr last_err_addr = DevPtr(last_error); + last_error_str = ReadString(last_err_addr); } LOG(FATAL) << "error during micro function execution:\n" << " return code: " << std::dec << return_code << "\n" @@ -246,100 +422,51 @@ void MicroSession::CheckDeviceError() { } } -BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) { - DevMemRegion text_section; - DevMemRegion rodata_section; - DevMemRegion data_section; - DevMemRegion bss_section; - - text_section.size = GetSectionSize(binary_path, SectionKind::kText, toolchain_prefix_); - rodata_section.size = GetSectionSize(binary_path, SectionKind::kRodata, toolchain_prefix_); - data_section.size = GetSectionSize(binary_path, SectionKind::kData, toolchain_prefix_); - bss_section.size = GetSectionSize(binary_path, SectionKind::kBss, toolchain_prefix_); - - text_section.start = AllocateInSection(SectionKind::kText, text_section.size); - rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size); - data_section.start = AllocateInSection(SectionKind::kData, data_section.size); - bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size); - CHECK(text_section.start != nullptr && rodata_section.start != nullptr && - data_section.start != nullptr && bss_section.start != nullptr) - << "not enough space to load module on device"; - - std::string relocated_bin = RelocateBinarySections( - binary_path, - low_level_device_->ToDevPtr(text_section.start), - low_level_device_->ToDevPtr(rodata_section.start), - low_level_device_->ToDevPtr(data_section.start), - low_level_device_->ToDevPtr(bss_section.start), - toolchain_prefix_); - std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_); - std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_); - std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_); - std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_); - low_level_device_->Write(text_section.start, &text_contents[0], text_section.size); - low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size); - low_level_device_->Write(data_section.start, &data_contents[0], data_section.size); - low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size); - SymbolMap symbol_map {relocated_bin, toolchain_prefix_}; - - if (patch_dylib_pointers) { - // Patch device lib pointers. - PatchImplHole(symbol_map, "TVMBackendAllocWorkspace"); - PatchImplHole(symbol_map, "TVMBackendFreeWorkspace"); - PatchImplHole(symbol_map, "TVMAPISetLastError"); - } - - return BinaryInfo { - .text_section = text_section, - .rodata_section = rodata_section, - .data_section = data_section, - .bss_section = bss_section, - .symbol_map = symbol_map, - }; -} - void MicroSession::PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name) { - void* runtime_impl_addr = runtime_symbol_map()[func_name].cast_to(); + DevPtr runtime_impl_addr = runtime_symbol_map_[func_name]; + if (thumb_mode_) { + runtime_impl_addr += 1; + } std::ostringstream func_name_underscore; func_name_underscore << func_name << "_"; - DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr); -} - -void MicroSession::SetRuntimeBinaryPath(std::string path) { - runtime_binary_path_ = path; + if (word_size_ == 4) { + DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr.value().val32); + } else if (word_size_ == 8) { + DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr.value().val64); + } } -std::string MicroSession::ReadString(DevBaseOffset str_offset) { +std::string MicroSession::ReadString(DevPtr str_addr) { std::ostringstream result; const size_t buf_size = 256; std::vector buf(buf_size, 0); size_t i = buf_size; while (i == buf_size) { - low_level_device()->Read(str_offset, buf.data(), buf_size); + low_level_device()->Read(str_addr, buf.data(), buf_size); i = 0; while (i < buf_size) { if (buf[i] == 0) break; result << buf[i]; i++; } - str_offset = str_offset + i; + str_addr = str_addr + i; } return result.str(); } -DevBaseOffset MicroSession::AllocateInSection(SectionKind type, size_t size) { +DevPtr MicroSession::AllocateInSection(SectionKind type, size_t size) { return GetAllocator(type)->Allocate(size); } -void MicroSession::FreeInSection(SectionKind type, DevBaseOffset ptr) { - return GetAllocator(type)->Free(ptr); +void MicroSession::FreeInSection(SectionKind type, DevPtr addr) { + return GetAllocator(type)->Free(addr); } template T MicroSession::DevSymbolRead(const SymbolMap& symbol_map, const std::string& symbol) { - DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]); + DevPtr sym_addr = symbol_map[symbol]; T result; - low_level_device()->Read(sym_offset, &result, sizeof(T)); + low_level_device()->Read(sym_addr, &result, sizeof(T)); return result; } @@ -347,8 +474,8 @@ template void MicroSession::DevSymbolWrite(const SymbolMap& symbol_map, const std::string& symbol, const T& value) { - DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]); - low_level_device()->Write(sym_offset, &value, sizeof(T)); + DevPtr sym_addr = symbol_map[symbol]; + low_level_device()->Write(sym_addr, &value, sizeof(T)); } PackedFunc MicroSession::GetFunction( @@ -370,15 +497,53 @@ PackedFunc MicroSession::GetFunction( // create micro session and low-level device from Python frontend TVM_REGISTER_GLOBAL("micro._CreateSession") .set_body([](TVMArgs args, TVMRetValue* rv) { - const std::string& device_type = args[0]; + const std::string& comms_method = args[0]; const std::string& binary_path = args[1]; const std::string& toolchain_prefix = args[2]; - uint64_t base_addr = args[3]; - const std::string& server_addr = args[4]; - int port = args[5]; - ObjectPtr session = make_object(); - session->CreateSession( - device_type, binary_path, toolchain_prefix, base_addr, server_addr, port); + uint64_t text_start = args[3]; + size_t text_size = args[4]; + uint64_t rodata_start = args[5]; + size_t rodata_size = args[6]; + uint64_t data_start = args[7]; + size_t data_size = args[8]; + uint64_t bss_start = args[9]; + size_t bss_size = args[10]; + uint64_t args_start = args[11]; + size_t args_size = args[12]; + uint64_t heap_start = args[13]; + size_t heap_size = args[14]; + uint64_t workspace_start = args[15]; + size_t workspace_size = args[16]; + uint64_t stack_start = args[17]; + size_t stack_size = args[18]; + size_t word_size = args[19]; + bool thumb_mode = args[20]; + const std::string& server_addr = args[21]; + int port = args[22]; + ObjectPtr session = make_object( + comms_method, + binary_path, + toolchain_prefix, + text_start, + text_size, + rodata_start, + rodata_size, + data_start, + data_size, + bss_start, + bss_size, + args_start, + args_size, + heap_start, + heap_size, + workspace_start, + workspace_size, + stack_start, + stack_size, + word_size, + thumb_mode, + server_addr, + port); *rv = Module(session); }); diff --git a/src/runtime/micro/micro_session.h b/src/runtime/micro/micro_session.h index 65b64218313b..495fc74aa177 100644 --- a/src/runtime/micro/micro_session.h +++ b/src/runtime/micro/micro_session.h @@ -47,7 +47,6 @@ #include #include "low_level_device.h" -#include "device/utvm_runtime.h" #include "target_data_layout_encoder.h" namespace tvm { @@ -75,9 +74,55 @@ class MicroSession : public ModuleNode { } /*! - * \brief constructor + * \brief creates session by setting up a low-level device and initting allocators for it + * \param comms_method method of communication with the device (e.g., "openocd") + * \param binary_path file system path to the runtime binary + * \param toolchain_prefix GCC toolchain prefix + * \param text_start text section start address + * \param text_size text section size + * \param rodata_start text section start address + * \param rodata_size rodata section size + * \param data_start data section start address + * \param data_size data section size + * \param bss_start bss section start address + * \param bss_size bss section size + * \param args_start args section start address + * \param args_size args section size + * \param heap_start heap section start address + * \param heap_size heap section size + * \param workspace_start workspace section start address + * \param workspace_size workspace section size + * \param stack_start stack section start address + * \param stack_size stack section size + * \param word_size number of bytes in a word on the target device + * \param thumb_mode whether the target device requires a thumb-mode bit on function addresses + * \param server_addr address of the OpenOCD server to connect to (if `comms_method == "openocd"`) + * \param port port of the OpenOCD server to connect to (if `comms_method == "openocd"`) */ - MicroSession(); + MicroSession( + const std::string& comms_method, + const std::string& binary_path, + const std::string& toolchain_prefix, + uint64_t text_start, + size_t text_size, + uint64_t rodata_start, + size_t rodata_size, + uint64_t data_start, + size_t data_size, + uint64_t bss_start, + size_t bss_size, + uint64_t args_start, + size_t args_size, + uint64_t heap_start, + size_t heap_size, + uint64_t workspace_start, + size_t workspace_size, + uint64_t stack_start, + size_t stack_size, + size_t word_size, + bool thumb_mode, + const std::string& server_addr, + int port); /*! * \brief destructor @@ -87,20 +132,20 @@ class MicroSession : public ModuleNode { static ObjectPtr& Current(); /*! - * \brief creates session by setting up a low-level device and initting allocators for it - * \param args TVMArgs passed into the micro.init packedfunc + * \brief sets up runtime metadata for `func` and copies arguments for on-device execution + * \param func address of the function to be executed + * \param args args to the packed function + * \return elapsed time during function execution on the device */ - void CreateSession(const std::string& device_type, - const std::string& binary_path, - const std::string& toolchain_prefix, - std::uintptr_t base_addr, - const std::string& server_addr, - int port); + double PushToExecQueue(DevPtr func, const TVMArgs& args); /*! - * \brief ends the session by destructing the low-level device and its allocators + * \brief loads binary onto device + * \param binary_path path to binary object file + * \param patch_dylib_pointers whether to patch runtime API function pointers + * \return info about loaded binary */ - void EndSession(); + BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers); /*! * \brief allocate memory in section @@ -108,36 +153,21 @@ class MicroSession : public ModuleNode { * \param size size of allocated memory in bytes * \return pointer to allocated memory region in section, nullptr if out of space */ - DevBaseOffset AllocateInSection(SectionKind type, size_t size); + DevPtr AllocateInSection(SectionKind type, size_t size); /*! * \brief free prior allocation from section * \param type type of section to allocate in - * \param ptr pointer to allocated memory + * \param addr device address of allocated memory */ - void FreeInSection(SectionKind type, DevBaseOffset ptr); + void FreeInSection(SectionKind type, DevPtr addr); /*! * \brief read string from device to host - * \param str_offset device offset of first character of string + * \param str_addr device address of first character of string * \return host copy of device string that was read */ - std::string ReadString(DevBaseOffset str_offset); - - /*! - * \brief sets up runtime metadata for `func` and copies arguments for on-device execution - * \param func address of the function to be executed - * \param args args to the packed function - */ - void PushToExecQueue(DevBaseOffset func, const TVMArgs& args); - - /*! - * \brief loads binary onto device - * \param binary_path path to binary object file - * \param patch_dylib_pointers whether runtime API function pointer patching is needed - * \return info about loaded binary - */ - BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers = true); + std::string ReadString(DevPtr str_addr); /*! * \brief read value of symbol from device memory @@ -174,16 +204,17 @@ class MicroSession : public ModuleNode { /*! \brief array of memory allocators for each on-device section */ std::shared_ptr section_allocators_[static_cast(SectionKind::kNumKinds)]; - /*! \brief total number of bytes of usable device memory for this session */ - size_t memory_size_; - /*! \brief uTVM runtime binary info */ - BinaryInfo runtime_bin_info_; - /*! \brief path to uTVM runtime source code */ - std::string runtime_binary_path_; - /*! \brief offset of the runtime entry function */ - DevBaseOffset utvm_main_symbol_; - /*! \brief offset of the runtime exit breakpoint */ - DevBaseOffset utvm_done_symbol_; + /*! \brief number of bytes in a word on the target device */ + size_t word_size_; + /*! \brief whether the target device requires a thumb-mode bit on function addresses + * + * ARM and other manufacturers use the lowest bit of a function address to determine + * whether it's a "thumb mode" function. The Thumb ISA is more restricted, but + * results in more compact binaries. + */ + bool thumb_mode_; + /*! \brief symbol map for the device runtime */ + SymbolMap runtime_symbol_map_; /*! * \brief patches a function pointer in this module to an implementation @@ -191,12 +222,6 @@ class MicroSession : public ModuleNode { */ void PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name); - /*! - * \brief sets the runtime binary path - * \param path to runtime binary - */ - void SetRuntimeBinaryPath(std::string path); - /*! * \brief appends arguments to the host-side buffer of `encoder` * \param encoder encoder being used to append `args` @@ -211,6 +236,7 @@ class MicroSession : public ModuleNode { * \param arr TVMArray to be appended * \return device address of the allocated `TVMArray` */ + template DevPtr EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr); /*! @@ -227,19 +253,12 @@ class MicroSession : public ModuleNode { return section_allocators_[static_cast(kind)]; } - /*! - * \brief returns the symbol map for the uTVM runtime - * \return reference to symbol map - */ - const SymbolMap& runtime_symbol_map() { - return runtime_bin_info_.symbol_map; - } - /*! * \brief Push a new session context onto the thread-local stack. * The session on top of the stack is used as the current global session. */ static void EnterWithScope(ObjectPtr session); + /*! * \brief Pop a session off the thread-local context stack, * restoring the previous session as the current context. @@ -260,6 +279,118 @@ struct MicroDevSpace { ObjectPtr session; }; +// TODO(weberlo): maybe templatize serialization to reduce redundancy + +/*! \brief TVM array for serialization to 32-bit devices */ +struct TVMArray32 { + TVMArray32( + TargetVal data, + DLContext ctx, + int32_t ndim, + DLDataType dtype, + TargetVal shape, + TargetVal strides, + TargetVal byte_offset) + : data(data.val32), + ctx(ctx), + ndim(ndim), + pad0(0), + dtype(dtype), + shape(shape.val32), + strides(strides.val32), + pad1(0), + byte_offset(byte_offset.val32), + pad2(0) { } + + /*! \brief opaque pointer to the allocated data */ + uint32_t data; + /*! \brief The device context of the tensor */ + DLContext ctx; + /*! \brief Number of dimensions */ + int32_t ndim; + /*! \brief Padding to enforce struct alignment */ + uint32_t pad0; + /*! \brief The data type of the pointer */ + DLDataType dtype; + /*! \brief The shape of the tensor */ + uint32_t shape; + /*! + * \brief strides of the tensor, + * can be NULL, indicating tensor is compact. + */ + uint32_t strides; + /*! \brief Padding to enforce struct alignment */ + uint32_t pad1; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint32_t byte_offset; + /*! \brief Padding to enforce struct alignment */ + uint32_t pad2; +}; + +/*! \brief TVM array for serialization to 64-bit devices */ +struct TVMArray64 { + TVMArray64( + TargetVal data, + DLContext ctx, + int32_t ndim, + DLDataType dtype, + TargetVal shape, + TargetVal strides, + TargetVal byte_offset) + : data(data.val64), + ctx(ctx), + ndim(ndim), + pad0(0), + dtype(dtype), + shape(shape.val64), + strides(strides.val64), + byte_offset(byte_offset.val64) { } + + /*! \brief opaque pointer to the allocated data */ + uint64_t data; + /*! \brief The device context of the tensor */ + DLContext ctx; + /*! \brief Number of dimensions */ + int32_t ndim; + /*! \brief Padding to enforce struct alignment */ + uint32_t pad0; + /*! \brief The data type of the pointer */ + DLDataType dtype; + /*! \brief The shape of the tensor */ + uint64_t shape; + /*! + * \brief strides of the tensor, + * can be NULL, indicating tensor is compact. + */ + uint64_t strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +}; + +/*! \brief MicroTVM task for serialization to 32-bit devices */ +typedef struct StructUTVMTask32 { + /*! \brief Pointer to function to call for this task */ + uint32_t func; + /*! \brief Array of argument values */ + uint32_t arg_values; + /*! \brief Array of type codes for each argument value */ + uint32_t arg_type_codes; + /*! \brief Number of arguments */ + int32_t num_args; +} UTVMTask32; + +/*! \brief MicroTVM task for serialization to 64-bit devices */ +typedef struct StructUTVMTask64 { + /*! \brief Pointer to function to call for this task */ + uint64_t func; + /*! \brief Array of argument values */ + uint64_t arg_values; + /*! \brief Array of type codes for each argument value */ + uint64_t arg_type_codes; + /*! \brief Number of arguments */ + int32_t num_args; +} UTVMTask64; + } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_MICRO_MICRO_SESSION_H_ diff --git a/src/runtime/micro/openocd_low_level_device.cc b/src/runtime/micro/openocd_low_level_device.cc index 97730efef4ff..5e6685a3a09b 100644 --- a/src/runtime/micro/openocd_low_level_device.cc +++ b/src/runtime/micro/openocd_low_level_device.cc @@ -37,21 +37,20 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { public: /*! * \brief constructor to initialize connection to openocd device - * \param base_addr base address of the device * \param server_addr address of the OpenOCD server to connect to * \param port port of the OpenOCD server to connect to */ - explicit OpenOCDLowLevelDevice(std::uintptr_t base_addr, - const std::string& server_addr, + explicit OpenOCDLowLevelDevice(const std::string& server_addr, int port) : socket_() { - socket_.Connect(tvm::common::SockAddr(server_addr.c_str(), port)); - socket_.cmd_builder() << "reset halt"; - socket_.SendCommand(); - base_addr_ = base_addr; - CHECK(base_addr_ % 8 == 0) << "base address not aligned to 8 bytes"; + server_addr_ = server_addr; + port_ = port; + + socket_.Connect(tvm::common::SockAddr(server_addr_.c_str(), port_)); + socket_.cmd_builder() << "halt 0"; + socket_.SendCommand(); } - void Read(DevBaseOffset offset, void* buf, size_t num_bytes) { + void Read(DevPtr addr, void* buf, size_t num_bytes) { if (num_bytes == 0) { return; } @@ -59,7 +58,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { // TODO(weberlo): Refactor between read and write. // Check if we need to chunk this write request. if (num_bytes > kMemTransferLimit) { - DevBaseOffset curr_offset = offset; char* curr_buf_ptr = reinterpret_cast(buf); while (num_bytes != 0) { size_t amount_to_read; @@ -68,8 +66,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { } else { amount_to_read = num_bytes; } - Read(offset, reinterpret_cast(curr_buf_ptr), amount_to_read); - offset += amount_to_read; + Read(addr, reinterpret_cast(curr_buf_ptr), amount_to_read); + addr += amount_to_read; curr_buf_ptr += amount_to_read; num_bytes -= amount_to_read; } @@ -79,7 +77,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { socket_.cmd_builder() << "array unset output"; socket_.SendCommand(); - DevPtr addr = DevPtr(base_addr_ + offset.value()); socket_.cmd_builder() << "mem2array output" << " " << std::dec << kWordSize @@ -122,14 +119,13 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { } } - void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { + void Write(DevPtr addr, const void* buf, size_t num_bytes) { if (num_bytes == 0) { return; } // Check if we need to chunk this write request. if (num_bytes > kMemTransferLimit) { - DevBaseOffset curr_offset = offset; const char* curr_buf_ptr = reinterpret_cast(buf); while (num_bytes != 0) { size_t amount_to_write; @@ -138,8 +134,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { } else { amount_to_write = num_bytes; } - Write(offset, reinterpret_cast(curr_buf_ptr), amount_to_write); - offset += amount_to_write; + Write(addr, reinterpret_cast(curr_buf_ptr), amount_to_write); + addr += amount_to_write; curr_buf_ptr += amount_to_write; num_bytes -= amount_to_write; } @@ -166,7 +162,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { socket_.SendCommand(); } { - DevPtr addr = DevPtr(base_addr_ + offset.value()); socket_.cmd_builder() << "array2mem input" << " " << std::dec << kWordSize @@ -176,20 +171,14 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { } } - void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { + void Execute(DevPtr func_addr, DevPtr breakpoint_addr) { socket_.cmd_builder() << "halt 0"; socket_.SendCommand(); - // Set up the stack pointer. - DevPtr stack_end = stack_top() - 8; - socket_.cmd_builder() << "reg sp " << stack_end.cast_to(); - socket_.SendCommand(); - // Set a breakpoint at the beginning of `UTVMDone`. - socket_.cmd_builder() << "bp " << ToDevPtr(breakpoint).cast_to() << " 2"; + socket_.cmd_builder() << "bp " << breakpoint_addr.cast_to() << " 2"; socket_.SendCommand(); - DevPtr func_addr = DevPtr(base_addr_ + func_offset.value()); socket_.cmd_builder() << "resume " << func_addr.cast_to(); socket_.SendCommand(); @@ -200,34 +189,21 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { socket_.SendCommand(); // Remove the breakpoint. - socket_.cmd_builder() << "rbp " << ToDevPtr(breakpoint).cast_to(); + socket_.cmd_builder() << "rbp " << breakpoint_addr.cast_to(); socket_.SendCommand(); } - void SetStackTop(DevBaseOffset stack_top) { - stack_top_ = DevPtr(base_addr_ + stack_top.value()); - } - - std::uintptr_t base_addr() const final { - return base_addr_; - } - - DevPtr stack_top() const { - CHECK(stack_top_ != nullptr) << "stack top was never initialized"; - return stack_top_; - } - const char* device_type() const final { return "openocd"; } private: - /*! \brief base address of the micro device memory region */ - std::uintptr_t base_addr_; - /*! \brief top of the stack section */ - DevPtr stack_top_; /*! \brief socket used to communicate with the device through Tcl */ TclSocket socket_; + /*! \brief address of OpenOCD server */ + std::string server_addr_; + /*! \brief port of OpenOCD server */ + int port_; /*! \brief number of bytes in a word on the target device (64-bit) */ static const constexpr ssize_t kWordSize = 8; @@ -239,11 +215,10 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { static const constexpr int kWaitTime = 10000; }; -const std::shared_ptr OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr, - const std::string& server_addr, +const std::shared_ptr OpenOCDLowLevelDeviceCreate(const std::string& server_addr, int port) { std::shared_ptr lld = - std::make_shared(base_addr, server_addr, port); + std::make_shared(server_addr, port); return lld; } diff --git a/src/runtime/micro/target_data_layout_encoder.h b/src/runtime/micro/target_data_layout_encoder.h index a5b649bd131f..e0275165e774 100644 --- a/src/runtime/micro/target_data_layout_encoder.h +++ b/src/runtime/micro/target_data_layout_encoder.h @@ -25,7 +25,7 @@ #define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ #include -#include "device/utvm_runtime.h" +#include "host_driven/utvm_runtime.h" namespace tvm { namespace runtime { @@ -96,9 +96,9 @@ class TargetDataLayoutEncoder { * \brief constructor * \param start_addr start address of the encoder in device memory */ - explicit TargetDataLayoutEncoder(DevPtr start_addr) - : buf_(std::vector()), curr_offset_(0) { - start_addr_ = DevPtr(UpperAlignValue(start_addr.value(), 8)); + explicit TargetDataLayoutEncoder(DevPtr start_addr, size_t word_size) + : buf_(std::vector()), curr_offset_(0), word_size_(word_size) { + start_addr_ = DevPtr(UpperAlignValue(start_addr.value().val64, word_size_)); } /*! @@ -108,7 +108,7 @@ class TargetDataLayoutEncoder { */ template Slot Alloc(size_t num_elems = 1) { - curr_offset_ = UpperAlignValue(curr_offset_, 8); + curr_offset_ = UpperAlignValue(curr_offset_, word_size_); size_t size = sizeof(T) * num_elems; if (curr_offset_ + size > buf_.size()) { buf_.resize(curr_offset_ + size); @@ -141,6 +141,8 @@ class TargetDataLayoutEncoder { size_t curr_offset_; /*! \brief start address of the encoder in device memory */ DevPtr start_addr_; + /*! \brief number of bytes in a word on the target device */ + size_t word_size_; }; template diff --git a/src/runtime/rpc/rpc_session.cc b/src/runtime/rpc/rpc_session.cc index 3500533157f5..c7e524d2f295 100644 --- a/src/runtime/rpc/rpc_session.cc +++ b/src/runtime/rpc/rpc_session.cc @@ -21,6 +21,7 @@ * \file rpc_session.cc * \brief RPC session for remote function call. */ +#include #include #include #include @@ -40,6 +41,7 @@ namespace tvm { namespace runtime { + // Temp buffer for data array struct RPCByteArrayBuffer { TVMByteArray arr; @@ -1215,11 +1217,45 @@ void RPCSession::EventHandler::HandlePackedCall() { CHECK_EQ(state_, kRecvCode); } +PackedFunc MicroTimeEvaluator( + PackedFunc pf, + TVMContext ctx, + int number, + int repeat) { + auto ftimer = [pf, ctx, number, repeat](TVMArgs args, TVMRetValue *rv) mutable { + TVMRetValue temp; + std::ostringstream os; + // skip first time call, to activate lazy compilation components. + pf.CallPacked(args, &temp); + DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr); + for (int i = 0; i < repeat; ++i) { + double speed = 0.0; + for (int j = 0; j < number; ++j) { + pf.CallPacked(args, &temp); + DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr); + speed += (temp.operator double()) / number; + } + os.write(reinterpret_cast(&speed), sizeof(speed)); + } + std::string blob = os.str(); + TVMByteArray arr; + arr.size = blob.length(); + arr.data = blob.data(); + // return the time. + *rv = arr; + }; + return PackedFunc(ftimer); +} + PackedFunc WrapTimeEvaluator(PackedFunc pf, TVMContext ctx, int number, int repeat, int min_repeat_ms) { + if (static_cast(ctx.device_type) == static_cast(kDLMicroDev)) { + return MicroTimeEvaluator(pf, ctx, number, repeat); + } + auto ftimer = [pf, ctx, number, repeat, min_repeat_ms](TVMArgs args, TVMRetValue *rv) mutable { TVMRetValue temp; std::ostringstream os; diff --git a/tests/lint/check_file_type.py b/tests/lint/check_file_type.py index 0998c789332c..5b83641eb8e1 100644 --- a/tests/lint/check_file_type.py +++ b/tests/lint/check_file_type.py @@ -25,6 +25,7 @@ "cc", "c", "h", + "s", "rs", "m", "mm", diff --git a/tests/python/contrib/test_binutil.py b/tests/python/contrib/test_binutil.py index e4ebd74a1a9c..44739bbda3cb 100644 --- a/tests/python/contrib/test_binutil.py +++ b/tests/python/contrib/test_binutil.py @@ -73,12 +73,21 @@ def test_tvm_callback_relocate_binary(): with open(tmp_bin, "wb") as f: f.write(binary) def verify(): - text_loc_str = "0x0" - rodata_loc_str = "0x10000" - data_loc_str = "0x20000" - bss_loc_str = "0x30000" + word_size = 8 + text_loc = 0x0 + rodata_loc = 0x10000 + data_loc = 0x20000 + bss_loc = 0x30000 + stack_end = 0x50000 rel_bin = tvm_callback_relocate_binary( - tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) + tmp_bin, + word_size, + text_loc, + rodata_loc, + data_loc, + bss_loc, + stack_end, + TOOLCHAIN_PREFIX) print("Relocated binary section sizes") test_tvm_callback_get_section_size(binary=rel_bin) relf = tmp_dir.relpath("rel.bin") @@ -88,10 +97,6 @@ def verify(): stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = nm_proc.communicate() - # Ensure the relocated symbols are within the ranges we specified. - text_loc = int(text_loc_str, 16) - data_loc = int(data_loc_str, 16) - bss_loc = int(bss_loc_str, 16) symbol_entries = out.decode("utf-8").split("\n") for entry in symbol_entries: if len(entry) == 0: @@ -127,12 +132,21 @@ def test_tvm_callback_get_symbol_map(): with open(tmp_bin, "wb") as f: f.write(binary) def verify(): - text_loc_str = "0x0" - rodata_loc_str = "0x10000" - data_loc_str = "0x20000" - bss_loc_str = "0x30000" + word_size = 8 + text_loc = 0x0 + rodata_loc = 0x10000 + data_loc = 0x20000 + bss_loc = 0x30000 + stack_end = 0x50000 rel_bin = tvm_callback_relocate_binary( - tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) + tmp_bin, + word_size, + text_loc, + rodata_loc, + data_loc, + bss_loc, + stack_end, + TOOLCHAIN_PREFIX) symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX) symbols = set() for i, line in enumerate(symbol_map.split('\n')): diff --git a/tests/python/unittest/test_runtime_micro.py b/tests/python/unittest/test_runtime_micro.py index 82c279712e20..e94c09991b35 100644 --- a/tests/python/unittest/test_runtime_micro.py +++ b/tests/python/unittest/test_runtime_micro.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - import os import numpy as np @@ -22,38 +21,13 @@ from tvm.contrib import graph_runtime, util from tvm import relay import tvm.micro as micro +from tvm.micro import create_micro_mod from tvm.relay.testing import resnet # Use the host emulated micro device. -DEVICE_TYPE = "host" -TOOLCHAIN_PREFIX = "" - -def create_micro_mod(c_mod, toolchain_prefix): - """Produces a micro module from a given module. - - Parameters - ---------- - c_mod : tvm.module.Module - module with "c" as its target backend - - toolchain_prefix : str - toolchain prefix to be used (see `tvm.micro.Session` docs) - - Return - ------ - micro_mod : tvm.module.Module - micro module for the target device - """ - temp_dir = util.tempdir() - lib_obj_path = temp_dir.relpath("dev_lib.obj") - c_mod.export_library( - lib_obj_path, - fcompile=tvm.micro.cross_compiler(toolchain_prefix=toolchain_prefix)) - micro_mod = tvm.module.load(lib_obj_path, "micro_dev") - return micro_mod - +DEV_CONFIG = micro.device.host.default_config() -def relay_micro_build(func, toolchain_prefix, params=None): +def relay_micro_build(func, dev_config, params=None): """Create a graph runtime module with a micro device context from a Relay function. Parameters @@ -61,6 +35,9 @@ def relay_micro_build(func, toolchain_prefix, params=None): func : relay.Function function to compile + dev_config : Dict[str, Any] + MicroTVM config dict for the target device + params : dict input parameters that do not change during inference @@ -71,24 +48,20 @@ def relay_micro_build(func, toolchain_prefix, params=None): """ with tvm.build_config(disable_vectorize=True): graph, c_mod, params = relay.build(func, target="c", params=params) - micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) + micro_mod = create_micro_mod(c_mod, dev_config) ctx = tvm.micro_dev(0) mod = graph_runtime.create(graph, micro_mod, ctx) mod.set_input(**params) return mod -# TODO(weberlo): Add example program to test scalar double/int TVMValue serialization. -# TODO(weberlo): How can we test the OpenOCD device? The CI would need to have OpenOCD -# and Spike installed. - def test_alloc(): """Test tensor allocation on the device.""" if not tvm.module.enabled("micro_dev"): return shape = (1024,) dtype = "float32" - with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): + with micro.Session(DEV_CONFIG): ctx = tvm.micro_dev(0) np_tensor = np.random.uniform(size=shape).astype(dtype) micro_tensor = tvm.nd.array(np_tensor, ctx) @@ -112,15 +85,14 @@ def test_add(): func_name = "fadd" c_mod = tvm.build(s, [A, B, C], target="c", name=func_name) - with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): - micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) + with micro.Session(DEV_CONFIG): + micro_mod = create_micro_mod(c_mod, DEV_CONFIG) micro_func = micro_mod[func_name] ctx = tvm.micro_dev(0) a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx) micro_func(a, b, c) - tvm.testing.assert_allclose( c.asnumpy(), a.asnumpy() + b.asnumpy()) @@ -143,8 +115,8 @@ def test_workspace_add(): func_name = "fadd_two_workspace" c_mod = tvm.build(s, [A, C], target="c", name=func_name) - with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): - micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) + with micro.Session(DEV_CONFIG): + micro_mod = create_micro_mod(c_mod, DEV_CONFIG) micro_func = micro_mod[func_name] ctx = tvm.micro_dev(0) a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) @@ -168,8 +140,8 @@ def test_graph_runtime(): z = relay.add(xx, relay.const(1.0)) func = relay.Function([x], z) - with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): - mod = relay_micro_build(func, TOOLCHAIN_PREFIX) + with micro.Session(DEV_CONFIG): + mod = relay_micro_build(func, DEV_CONFIG) x_in = np.random.uniform(size=shape[0]).astype(dtype) mod.run(x=x_in) @@ -195,9 +167,9 @@ def test_multiple_modules(): ret = relay.subtract(x, relay.const(1.0)) sub_const_func = relay.Function([x], ret) - with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): - add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) - sub_const_mod = relay_micro_build(sub_const_func, TOOLCHAIN_PREFIX) + with micro.Session(DEV_CONFIG): + add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG) + sub_const_mod = relay_micro_build(sub_const_func, DEV_CONFIG) x_in = np.random.uniform(size=shape[0]).astype(dtype) add_const_mod.run(x=x_in) @@ -223,8 +195,8 @@ def test_interleave_sessions(): ret = relay.add(x, relay.const(1.0)) add_const_func = relay.Function([x], ret) - sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) - sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + sess_a = micro.Session(DEV_CONFIG) + sess_b = micro.Session(DEV_CONFIG) with sess_a: np_tensor_a = np.random.uniform(size=shape).astype(dtype) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) @@ -232,13 +204,13 @@ def test_interleave_sessions(): np_tensor_b = np.random.uniform(size=shape).astype(dtype) micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) with sess_a: - add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG) add_const_mod.run(x=micro_tensor_a) add_result = add_const_mod.get_output(0).asnumpy() tvm.testing.assert_allclose( add_result, np_tensor_a + 1.0) with sess_b: - add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG) add_const_mod.run(x=micro_tensor_b) add_result = add_const_mod.get_output(0).asnumpy() tvm.testing.assert_allclose( @@ -257,15 +229,15 @@ def test_nested_sessions(): ret = relay.add(x, relay.const(1.0)) add_const_func = relay.Function([x], ret) - sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) - sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + sess_a = micro.Session(DEV_CONFIG) + sess_b = micro.Session(DEV_CONFIG) with sess_a: np_tensor_a = np.random.uniform(size=shape).astype(dtype) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) with sess_b: np_tensor_b = np.random.uniform(size=shape).astype(dtype) micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) - add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG) add_const_mod.run(x=micro_tensor_a) add_result = add_const_mod.get_output(0).asnumpy() tvm.testing.assert_allclose( @@ -284,12 +256,12 @@ def test_inactive_session_use(): ret = relay.add(x, relay.const(1.0)) add_const_func = relay.Function([x], ret) - sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) - sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + sess_a = micro.Session(DEV_CONFIG) + sess_b = micro.Session(DEV_CONFIG) with sess_a: np_tensor_a = np.random.uniform(size=shape).astype(dtype) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) - add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG) with sess_b: # These objects belong to `sess_a`.