fix tutorial

apache · Nov 28, 2019 · e6a934c · e6a934c
1 parent 817f09b
commit e6a934c
Showing 1 changed file with 55 additions and 53 deletions.
diff --git a/tutorials/dev/custom_relay_backend.py b/tutorials/dev/custom_relay_backend.py
@@ -36,10 +36,9 @@
 
 In this tutorial, we demonstrate how a hardware vendor can easily implement
 a Relay backend to support a specialized hardware device/library. It mainly
-takes three steps: 1) define whether an operator is supported under a given
-template, 2) specify how to compile and serialize the supported operators so
-that it can ingest TVM specific data format, e.g. NDArray, and 3) specify how
-to execute the compiled operators on a certain device. We will demonstrate how
+takes two steps: 1) define whether an operator is supported under a given
+template, and 2) specify how to compile and serialize the supported operators so
+that it can ingest TVM specific data format, e.g. NDArray. We will demonstrate how
 to add a new backend that uses open source compilers (e.g. GCC, LLVM, etc) or any
 proprietary compilers to execute a subgraph of a model without the exposure of
 the IP of customer's codegen tool chain. Note that you will need to add the
@@ -205,67 +204,42 @@ def visit_call(self, call):
 ######################################################################
 # Implement The Codegen
 # ---------------------
-# The second and the third step are implemented in C++ instead of Python.
+# The second step is implemented in C++ instead of Python.
 # Specifically, we create src/relay/backend/contrib/gcc/codegen.cc and
-# implement the codegen and runtime dispatcher here. For the codegen,
-# we need to implement two functions: `CompileExternalLib()` and `Build()`.
-# `Build()` accepts a Relay module or subgraph and generate the library or device
-# code accordingly. In the GCC example, we implement a Relay IR visitor to generate
-# C++ code for subgraphs.
+# implement the codegen here. We need to implement two classes: `GccBuilder`
+# and `GccCodegen`. `GccBuilder` derives TVM `ExprVisitor` and 
+# `ExternSourcePrinter` and is used for generating the code running on your
+# device by given a Relay subgraph. Typically, we use expression visitor to
+# traverse the Relay subgraph and collect the required information such as
+# operator shapes, attributes, and topology. In this example, we collect
+# all information to `GccBuilder` class members, and generate the compilable
+# C++ code by the `JIT` function, which is a unified interface for builders
+# to output the generated code.
 
 ######################################################################
-# In addition `CompileExternalLib()` is used for specifying how to generate and
-# serialize an external library for the generated device code (C++ in this
-# example). The generated library/executable binary can either be materialized
-# to disk and load back during runtime, or stored in memory directly for
-# later usage using whatever user defined mechanism. In the GCC case, the
-# stand system calls e.g. dlopen/dlsym or LoadLibrary/GetProcAddress are used
-# for Linux and Windows, respectively.
+# In addition, `GccCodegen` derives `ExternCodegenBase`. You will need to implement
+# a function called `CreateExternModule`. This function accepts a subgraph in Relay
+# module or Relay function, and returns an extern module.
+# After the codegen has been implemented, we register it as a Relay external backend.
+# In this example, we use `TVM_REGISTER_API("relay.ext.gcc")` to register `gcc`
+# as an external backend. The registration lets Relay make use of the codegen we just
+# implemented when it saws an external function with `External=gcc` attribute that we
+# annotated in the previous step.
 
 ######################################################################
-# Implement The Runtime Dispather
-# -------------------------------
-# The last step is invoking the generated external library in runtime.
-# We create a runtime module `GccModule` derived from `ExternModuleBase`
-# in src/runtime/contrib/gcc/gcc.h for Relay runtime to dispatch the
-# generated library/executable. Then, we implement the dispatcher in
-# src/runtime/contrib/gcc/gcc.cc. Note that altough the `GccModule` constructor
-# accepts the path of generated library/executable for runtime initialization,
-# it can be customized by each external backend to accept any types of required
-# artifacts.
-
-######################################################################
-# In addition, we implement tvm runtime `Module` compatible
-# `GetFunction()`. The function takes a subgraph name and returns
-# a `PackedFunc` that executes the subgraph with runtime input data. Note that
-# the runtime data in TVM is provided in the tvm `NDArray` format. It's
-# vendors' repsonsiblity to deserialize it into the format that they library
-# can ingest. For example, we unpack it and extract the raw pointers for
-# MKL-DNN. If the subgraph is compiled by `Build` in advance and the shared
-# library or executable binary is available, then we can invoke it here.
-#
-# `GetFunction()` will be invoked by Relay runtime, including interpreter,
-# graph runtime, and VM, meaning that this one implemtation works for all
-# kinds of Relay runtimes.
-
-######################################################################
-# Add Codegen to TVM Building Process
-# -----------------------------------
-# Finally, we include the implemented codegen to the cmake config so that
+# Finally, we include the implemented codegen to the cmake configure so that
 # it will be built along with the TVM. In cmake/modules/contrib/Extern.cmake:
 #
 # list(FIND USE_EXTERN "gcc" _gcc_idx)
 # if(_gcc_idx GREATER -1)
 #     file(GLOB GCC_RELAY_CONTRIB_SRC src/relay/backend/contrib/gcc/codegen.cc)
 #     list(APPEND COMPILER_SRCS ${GCC_RELAY_CONTRIB_SRC})
-#     file(GLOB GCC_CONTRIB_SRC src/runtime/contrib/gcc/*.cc)
-#     list(APPEND RUNTIME_SRCS ${GCC_CONTRIB_SRC})
 #     message(STATUS "Use extern library: GCC")
 # endif()
 
-
 ######################################################################
-# We can now build TVM with the external GCC backend and test the correctness:
+# In order to enable the external codegen when building TVM, make sure you 
+# specify it in `config.cmake`:
 # 1. cd build
 # 2. set(USE_EXTERN gcc) in config.cmake
 # 3. cmake ..; make -j
@@ -277,15 +251,43 @@ def visit_call(self, call):
 #     Multiple external backends can be eneabled simultaneously by ";".
 #     For example: set(USE_EXTERN gcc;dnnl)
 
+######################################################################
+# We could then build the entire Relay program using the unified TVM build
+# commands. Note that 1) the "llvm" here means we will use LLVM to support other
+# ops that our GCC codegen does not support; 2) the extra compilation flags required
+# by your external codegen can be passed via `kwargs` when exporting the library.
+
+from tvm.contrib import util
+
+with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+    json, lib, _ = relay.build(mod, "llvm")
+kwargs = {"options": ["-O2", "-std=c++11"]}
+tmp_path = util.tempdir()
+lib_name = 'lib.so'
+lib_path = tmp_path.relpath(lib_name)
+lib.export_library(lib_path, fcompile=False, **kwargs)
+
+######################################################################
+# Finally, we can load the library back and verify its correctness.
+
+lib = tvm.module.load(lib_path)
+
 import numpy as np
 
 a_data = np.random.rand(10, 10).astype('float32')
 b_data = np.random.rand(10, 10).astype('float32')
 c_data = np.random.rand(10, 10).astype('float32')
 d_data = np.random.rand(10, 10).astype('float32')
 
-ex = relay.create_executor('debug', mod=mod, ctx=tvm.cpu(0))
-result = ex.evaluate()(a_data, b_data, c_data, d_data)
-tvm.testing.assert_allclose(result.asnumpy(), (a_data + b_data - c_data) * d_data)
+ctx = tvm.cpu()
+rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)
+rt_mod.set_input('a', a_data)
+rt_mod.set_input('b', b_data)
+rt_mod.set_input('c', c_data)
+rt_mod.set_input('d', d_data)
+rt_mod.run()
+out = tvm.nd.empty((10, 10), ctx=ctx)
+out = rt_mod.get_output(0, out)
+tvm.testing.assert_allclose(out.asnumpy(), (a_data + b_data - c_data) * d_data)
 
 print('Results are correct!')