Implementation of uTVM (apache#3227)

* uTVM interfaces (apache#14) * some minor interface changes * implemented HostLowLevelDevice * added MicroDeviceAPI * implemented micro_common and added Python interfaces * current status, semi implemented micro session * added micro_common implementation and python interfaces (apache#18) * added micro_common implementation and python interfaces (apache#18) * current status, semi implemented * host test working * updated interfaces for MicroSession arguments allocation * make somewhat lint compatible * fix based on comments * added rounding macro * fix minor bug * improvements based on comments * Clean up `binutil.py` and make Python-3-compatible * Change argument allocation design * Address feedback and lint errors * Improve binutil tests * Simplify allocator (per @tqchen's suggestions) * Doc/style fixes * farts * mcgee * rodata section werks (and so does `test_runtime_micro_workspace.py`) * simple graph runtime werk * TEMP * ResNet works, yo * First round of cleanup * More cleanup * runs a dyson over the code * Another pass * Fix `make lint` issues * ready to pr... probably * final * Undo change * Fix rebase resolution * Minor fixes * Undo changes to C codegen tests * Add `obj_path` in `create_micro_lib` * TEMP * Address feedback * Add missing TODO * Partially address feedback * Fix headers * Switch to enum class for `SectionKind` * Add missing ASF header * Fix lint * Fix lint again * Fix lint * Kill lint warnings * Address feedback * Change Python interface to MicroTVM All interaction with the device is now through `Session` objects, which are used through Python's `with` blocks. * Reorder LowLevelDevice interface * Store shared ptr to session in all alloced objects * Move helper functions out of `tvm.micro` * Switch static char arr to vector * Improve general infra and code quality Does not yet address all of tqchen's feedback * Forgot a rename * Fix lint * Add ASF header * Fix lint * Partially address MarisaKirisame's feedback * Lint * Expose `MicroSession` as a node to Python * Revert to using `Session` constructor * Fix compiler error * (Maybe) fix CI error * Debugging * Remove * Quell lint * Switch to stack-based session contexts * Make uTVM less intrusive to host codegen And use SSA for operands of generated ternary operators * Inline UTVMArgs into UTVMTask struct * Remove `HostLowLevelDevice` header * Remove `BaseAddr` class * Address feedback * Add "utvm" prefix to global vars in runtime * Fix lint * Fix CI * Fix `test_binutil.py` * Fix submodules * Remove ResNet tests * Make `test_binutil.py` work with nose * Fix CI * I swear this actually fixes the binutil tests * lint * lint * Add fcompile-compatible cross-compile func * Add docs for uTVM runtime files * Move pointer patching into `MicroSession` * Fix lint * First attempt at unifying cross-compile APIs * Fix lint * Rename `cross_compile` back to `cc` * Address feedback * Remove commented code * Lint * Figure out failing function * Remove debugging code * Change "micro_dev" target to "micro" * Add checks in tests for whether uTVM is enabled * Add TODO for 32-bit support * Rename more "micro_dev" to "micro" * Undo rename We already have `tvm.micro` as a namespace. Can't have it as a method as well. * Fix failing CI Thanks to @tqchen for finding this bug. Emitting ternary operators for `min` and `max` causes concurrency bugs in CUDA, so we're moving the ternary op emissions from `CodeGenC` to `CodeGenCHost`. * Address feedback * Fix lint
wweic · Sep 6, 2019 · 2d66f23 · 2d66f23
1 parent bf14d62
commit 2d66f23
Show file tree

Hide file tree

Showing 37 changed files with 3,321 additions and 73 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -36,6 +36,7 @@ tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
 tvm_option(USE_SGX "Build with SGX" OFF)
 tvm_option(USE_RTTI "Build with RTTI" ON)
 tvm_option(USE_MSVC_MT "Build with MT" OFF)
+tvm_option(USE_MICRO "Build with Micro" OFF)
 tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF)
 tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF)
 
@@ -208,6 +209,7 @@ include(cmake/modules/Metal.cmake)
 include(cmake/modules/ROCM.cmake)
 include(cmake/modules/SGX.cmake)
 include(cmake/modules/LLVM.cmake)
+include(cmake/modules/Micro.cmake)
 include(cmake/modules/ANTLR.cmake)
 include(cmake/modules/contrib/BLAS.cmake)
 include(cmake/modules/contrib/Random.cmake)

diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -62,6 +62,9 @@ set(USE_VULKAN OFF)
 # Whether enable OpenGL runtime
 set(USE_OPENGL OFF)
 
+# Whether enable MicroTVM runtime
+set(USE_MICRO OFF)
+
 # Whether to enable SGX runtime
 #
 # Possible values for USE_SGX:

diff --git a/cmake/modules/Micro.cmake b/cmake/modules/Micro.cmake
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_MICRO)
+  message(STATUS "Build with Micro support")
+  file(GLOB RUNTIME_MICRO_SRCS src/runtime/micro/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_MICRO_SRCS})
+endif(USE_MICRO)
diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
@@ -81,6 +81,7 @@ typedef enum {
   kDLAOCL = 5,
   kDLSDAccel = 6,
   kOpenGL = 11,
+  kDLMicroDev = 13,
   // AddExtraTVMType which is not in DLPack here
 } TVMDeviceExtType;
 

diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h
@@ -215,6 +215,7 @@ inline const char* DeviceName(int type) {
     case kDLROCM: return "rocm";
     case kOpenGL: return "opengl";
     case kDLExtDev: return "ext_dev";
+    case kDLMicroDev: return "micro_dev";
     default: LOG(FATAL) << "unknown type =" << type; return "Unknown";
   }
 }

diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
@@ -42,7 +42,7 @@
 
 from . import ndarray as nd
 from .ndarray import context, cpu, gpu, opencl, cl, vulkan, metal, mtl
-from .ndarray import vpi, rocm, opengl, ext_dev
+from .ndarray import vpi, rocm, opengl, ext_dev, micro_dev
 
 from ._ffi.runtime_ctypes import TypeCode, TVMType
 from ._ffi.ndarray import TVMContext

diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py
@@ -143,6 +143,7 @@ class TVMContext(ctypes.Structure):
         10: 'rocm',
         11: 'opengl',
         12: 'ext_dev',
+        13: 'micro_dev',
     }
     STR2MASK = {
         'llvm': 1,
@@ -163,6 +164,7 @@ class TVMContext(ctypes.Structure):
         'rocm': 10,
         'opengl': 11,
         'ext_dev': 12,
+        'micro_dev': 13,
     }
     def __init__(self, device_type, device_id):
         super(TVMContext, self).__init__()

diff --git a/python/tvm/contrib/binutil.py b/python/tvm/contrib/binutil.py
@@ -0,0 +1,258 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Utilities for binary file manipulation"""
+import os
+import subprocess
+from . import util
+from .._ffi.base import py_str
+from ..api import register_func
+
+@register_func("tvm_callback_get_section_size")
+def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
+    """Finds size of the section in the binary.
+    Assumes `size` shell command exists (typically works only on Linux machines)
+
+    Parameters
+    ----------
+    binary_path : str
+        path of the binary file
+
+    section_name : str
+        name of section
+
+    toolchain_prefix : str
+        prefix for binary names in target compiler toolchain
+
+    Returns
+    -------
+    size : integer
+        size of the section in bytes
+    """
+    if not os.path.isfile(binary_path):
+        raise RuntimeError("no such file \"{}\"".format(binary_path))
+    # We use the "-A" flag here to get the ".rodata" section's size, which is
+    # not included by default.
+    size_proc = subprocess.Popen(
+        ["{}size".format(toolchain_prefix), "-A", binary_path], stdout=subprocess.PIPE)
+    (size_output, _) = size_proc.communicate()
+    size_output = size_output.decode("utf-8")
+    if size_proc.returncode != 0:
+        msg = "error in finding section size:\n"
+        msg += py_str(out)
+        raise RuntimeError(msg)
+
+    # TODO(weberlo): Refactor this method and `*relocate_binary` so they are
+    # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss".
+    section_mapping = {
+        ".text": [".text"],
+        ".rodata": [".rodata"],
+        ".data": [".data", ".sdata"],
+        ".bss": [".bss", ".sbss"],
+    }
+    sections_to_sum = section_mapping["." + section_name]
+    section_size = 0
+    # Skip the first two header lines in the `size` output.
+    for line in size_output.split("\n")[2:]:
+        tokens = list(filter(lambda s: len(s) != 0, line.split(" ")))
+        if len(tokens) != 3:
+            continue
+        entry_name = tokens[0]
+        entry_size = int(tokens[1])
+        if entry_name in sections_to_sum:
+            section_size += entry_size
+    return section_size
+
+
+@register_func("tvm_callback_relocate_binary")
+def tvm_callback_relocate_binary(
+        binary_path, text_addr, rodata_addr, data_addr, bss_addr, toolchain_prefix):
+    """Relocates sections in the binary to new addresses
+
+    Parameters
+    ----------
+    binary_path : str
+        path of the binary file
+
+    text_addr : str
+        text section absolute address
+
+    rodata_addr : str
+        rodata section absolute address
+
+    data_addr : str
+        data section absolute address
+
+    bss_addr : str
+        bss section absolute address
+
+    toolchain_prefix : str
+        prefix for binary names in target compiler toolchain
+
+    Returns
+    -------
+    rel_bin : bytearray
+        the relocated binary
+    """
+    tmp_dir = util.tempdir()
+    rel_obj_path = tmp_dir.relpath("relocated.o")
+    ld_script_contents = ""
+    # TODO(weberlo): There should be a better way to configure this for different archs.
+    if "riscv" in toolchain_prefix:
+        ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n"
+    # TODO(weberlo): Generate the script in a more procedural manner.
+    ld_script_contents += """
+SECTIONS
+{
+  . = %s;
+  . = ALIGN(8);
+  .text :
+  {
+    *(.text)
+    . = ALIGN(8);
+    *(.text*)
+  }
+  . = %s;
+  . = ALIGN(8);
+  .rodata :
+  {
+    *(.rodata)
+    . = ALIGN(8);
+    *(.rodata*)
+  }
+  . = %s;
+  . = ALIGN(8);
+  .data :
+  {
+    *(.data)
+    . = ALIGN(8);
+    *(.data*)
+    . = ALIGN(8);
+    *(.sdata)
+  }
+  . = %s;
+  . = ALIGN(8);
+  .bss :
+  {
+    *(.bss)
+    . = ALIGN(8);
+    *(.bss*)
+    . = ALIGN(8);
+    *(.sbss)
+  }
+}
+    """ % (text_addr, rodata_addr, data_addr, bss_addr)
+    rel_ld_script_path = tmp_dir.relpath("relocated.lds")
+    with open(rel_ld_script_path, "w") as f:
+        f.write(ld_script_contents)
+    ld_proc = subprocess.Popen(["{}ld".format(toolchain_prefix), binary_path,
+                                "-T", rel_ld_script_path,
+                                "-o", rel_obj_path],
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.STDOUT)
+    (out, _) = ld_proc.communicate()
+    if ld_proc.returncode != 0:
+        msg = "linking error using ld:\n"
+        msg += py_str(out)
+        raise RuntimeError(msg)
+    with open(rel_obj_path, "rb") as f:
+        rel_bin = bytearray(f.read())
+    return rel_bin
+
+
+@register_func("tvm_callback_read_binary_section")
+def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
+    """Returns the contents of the specified section in the binary byte array
+
+    Parameters
+    ----------
+    binary : bytearray
+        contents of the binary
+
+    section : str
+        type of section
+
+    toolchain_prefix : str
+        prefix for binary names in target compiler toolchain
+
+    Returns
+    -------
+    section_bin : bytearray
+        contents of the read section
+    """
+    tmp_dir = util.tempdir()
+    tmp_bin = tmp_dir.relpath("temp.bin")
+    tmp_section = tmp_dir.relpath("tmp_section.bin")
+    with open(tmp_bin, "wb") as out_file:
+        out_file.write(bytes(binary))
+    objcopy_proc = subprocess.Popen(["{}objcopy".format(toolchain_prefix), "--dump-section",
+                                     ".{}={}".format(section, tmp_section),
+                                     tmp_bin],
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.STDOUT)
+    (out, _) = objcopy_proc.communicate()
+    if objcopy_proc.returncode != 0:
+        msg = "error in using objcopy:\n"
+        msg += py_str(out)
+        raise RuntimeError(msg)
+    if os.path.isfile(tmp_section):
+        # Get section content if it exists.
+        with open(tmp_section, "rb") as f:
+            section_bin = bytearray(f.read())
+    else:
+        # Return empty bytearray if the section does not exist.
+        section_bin = bytearray("", "utf-8")
+    return section_bin
+
+
+@register_func("tvm_callback_get_symbol_map")
+def tvm_callback_get_symbol_map(binary, toolchain_prefix):
+    """Obtains a map of symbols to addresses in the passed binary
+
+    Parameters
+    ----------
+    binary : bytearray
+        contents of the binary
+
+    toolchain_prefix : str
+        prefix for binary names in target compiler toolchain
+
+    Returns
+    -------
+    map_str : str
+        map of defined symbols to addresses, encoded as a series of
+        alternating newline-separated keys and values
+    """
+    tmp_dir = util.tempdir()
+    tmp_obj = tmp_dir.relpath("tmp_obj.bin")
+    with open(tmp_obj, "wb") as out_file:
+        out_file.write(bytes(binary))
+    nm_proc = subprocess.Popen(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj],
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.STDOUT)
+    (nm_output, _) = nm_proc.communicate()
+    if nm_proc.returncode != 0:
+        msg = "error in using nm:\n"
+        msg += py_str(nm_output)
+        raise RuntimeError(msg)
+    nm_output = nm_output.decode("utf8").splitlines()
+    map_str = ""
+    for line in nm_output:
+        line = line.split()
+        map_str += line[2] + "\n"
+        map_str += line[0] + "\n"
+    return map_str