Heterogeneous Runtime (apache#1695)

FrozenGene · Sep 22, 2018 · 6d44dbd · 6d44dbd
1 parent ad394a5
commit 6d44dbd
Show file tree

Hide file tree

Showing 4 changed files with 626 additions and 101 deletions.
diff --git a/python/tvm/build_module.py b/python/tvm/build_module.py
@@ -384,8 +384,14 @@ def build(sch,
           target=None,
           target_host=None,
           name="default_function",
-          binds=None):
-    """Build a function with arguments as signiture.
+          binds=None,
+          postpone_host_codegen=False):
+    """Build a function with arguments as signature. Code will be generated
+    for a device specified by the target. For homogeneous execution, a module
+    that contains both host and device code is returned. For heterogeneous
+    execution, a list of lowered functions for the host and a module containing
+    device code are returned, but actual code generation for the host module is
+    postponed after code generation is finished for all devices.
 
     Parameters
     ----------
@@ -414,10 +420,18 @@ def build(sch,
         Dictionary that maps the binding of symbolic buffer to Tensor.
         By default, a new buffer is created for each tensor in the argument.
 
+    postpone_host_codegen : bool, optional
+        A bool value that indicates if code generation for the host module
+        should be postponed. This variable is set to be true for heterogeneous
+        execution. Otherwise, it is defaulted to false.
+
     Returns
     -------
-    f : Function, or pair of functions
-       The result function.
+    ret : tvm.module, or (list of LoweredFunc, tvm.module) tuple
+        A module that combines both host and device code is returned when
+        postpone_host_codegen is not set. Otherwise, a list of lowered
+        functions for the host and a module contains only device code are
+        returned.
 
     Note
     ----
@@ -498,9 +512,15 @@ def build(sch,
     fdevice = [ir_pass.LowerIntrin(x, target_device.target_name) for x in fdevice]
     fhost = [ir_pass.LowerIntrin(x, target_host.target_name) for x in fhost]
     fhost = [ir_pass.CombineContextCall(x) for x in fhost]
-    mhost = codegen.build_module(fhost, str(target_host))
 
+    # Append fhost to the device module and return the updated module. All
+    # device modules will be imported to the host module after all of them are
+    # collected.
+    mdev = codegen.build_module(fdevice, str(target_device)) if fdevice else None
+    if postpone_host_codegen:
+        return fhost, mdev
+
+    mhost = codegen.build_module(fhost, str(target_host))
     if fdevice:
-        mdev = codegen.build_module(fdevice, str(target_device))
         mhost.import_module(mdev)
     return mhost
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py
@@ -3,26 +3,24 @@
 
 from .._ffi.base import string_types
 from .._ffi.function import get_global_func
+from .._ffi.runtime_ctypes import TVMContext
 from ..rpc import base as rpc_base
-from .. import ndarray as nd
-
 
 def create(graph_json_str, libmod, ctx):
     """Create a runtime executor module given a graph and module.
-
     Parameters
     ----------
     graph_json_str : str or graph class
         The graph to be deployed in json format output by nnvm graph.
         The graph can only contain one operator(tvm_op) that
         points to the name of PackedFunc in the libmod.
-
     libmod : tvm.Module
         The module of the corresponding function
-
-    ctx : TVMContext
-        The context to deploy the module, can be local or remote.
-
+    ctx : TVMContext or list of TVMContext
+        The context to deploy the module. It can be local or remote when there
+        is only one TVMContext. Otherwise, the first context in the list will
+        be used as this purpose. All context should be given for heterogeneous
+        execution.
     Returns
     -------
     graph_module : GraphModule
@@ -33,17 +31,42 @@ def create(graph_json_str, libmod, ctx):
             graph_json_str = graph_json_str._tvm_graph_json()
         except AttributeError:
             raise ValueError("Type %s is not supported" % type(graph_json_str))
-    device_type = ctx.device_type
-    device_id = ctx.device_id
-    if device_type >= rpc_base.RPC_SESS_MASK:
-        assert libmod.type_key == "rpc"
-        assert rpc_base._SessTableIndex(libmod) == ctx._rpc_sess._tbl_index
+    if isinstance(ctx, TVMContext):
+        ctx = [ctx]
+    elif not isinstance(ctx, (list, tuple)):
+        raise ValueError("ctx has to be the type of TVMContext or a list of "
+                         "TVMCTVMContext")
+    for cur_ctx in ctx:
+        if not isinstance(cur_ctx, TVMContext):
+            raise ValueError("ctx has to be the type of TVMContext or a list "
+                             "of TVMContext")
+
+    # device_type_id[0], device_type_id[1] are used as the primary/fallback
+    # context type and id. All other ones are used as device context for
+    # heterogeneous execution.
+    num_rpc_ctx = 0
+    device_type_id = []
+    for cur_ctx in ctx:
+        device_type = cur_ctx.device_type
+        if device_type >= rpc_base.RPC_SESS_MASK:
+            assert libmod.type_key == "rpc"
+            assert rpc_base._SessTableIndex(
+                libmod) == cur_ctx._rpc_sess._tbl_index
+            num_rpc_ctx += 1
+            device_type = cur_ctx.device_type % rpc_base.RPC_SESS_MASK
+        device_type_id.append(device_type)
+        device_type_id.append(cur_ctx.device_id)
+
+    if 0 < num_rpc_ctx < len(ctx):
+        raise ValueError("Either all or none of the contexts should be rpc.")
+
+    if num_rpc_ctx == len(ctx):
         hmod = rpc_base._ModuleHandle(libmod)
-        fcreate = ctx._rpc_sess.get_function("tvm.graph_runtime.remote_create")
-        device_type = device_type % rpc_base.RPC_SESS_MASK
-        return GraphModule(fcreate(graph_json_str, hmod, device_type, device_id), ctx)
+        fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime.remote_create")
+        return GraphModule(fcreate(graph_json_str, hmod, *device_type_id))
+
     fcreate = get_global_func("tvm.graph_runtime.create")
-    return GraphModule(fcreate(graph_json_str, libmod, device_type, device_id), ctx)
+    return GraphModule(fcreate(graph_json_str, libmod, *device_type_id))
 
 
 class GraphModule(object):
@@ -58,18 +81,13 @@ class GraphModule(object):
     module : Module
         The interal tvm module that holds the actual graph functions.
 
-    ctx : TVMContext
-        The context this module is under
-
     Attributes
     ----------
     module : Module
         The interal tvm module that holds the actual graph functions.
-
-    ctx : TVMContext
-        The context this module is under
     """
-    def __init__(self, module, ctx):
+
+    def __init__(self, module):
         self.module = module
         self._set_input = module["set_input"]
         self._run = module["run"]
@@ -81,7 +99,6 @@ def __init__(self, module, ctx):
         except AttributeError:
             pass
         self._load_params = module["load_params"]
-        self.ctx = ctx
 
     def set_input(self, key=None, value=None, **params):
         """Set inputs to the module via kwargs
@@ -98,14 +115,14 @@ def set_input(self, key=None, value=None, **params):
            Additonal arguments
         """
         if key:
-            self._set_input(key, nd.array(value, ctx=self.ctx))
+            self._get_input(key).copyfrom(value)
 
         if params:
             # upload big arrays first to avoid memory issue in rpc mode
             keys = list(params.keys())
             keys.sort(key=lambda x: -np.prod(params[x].shape))
             for k in keys:
-                self._set_input(k, nd.array(params[k], ctx=self.ctx))
+                self._get_input(k).copyfrom(params[k])
 
     def run(self, **input_dict):
         """Run forward execution of the graph
@@ -177,7 +194,8 @@ def debug_get_output(self, node, out):
         if hasattr(self, '_debug_get_output'):
             self._debug_get_output(node, out)
         else:
-            raise RuntimeError("Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0")
+            raise RuntimeError(
+                "Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0")
         return out
 
     def load_params(self, params_bytes):