From 4b0116f295dcd7df6ac1e1c55e85fd634ce77698 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Mon, 6 Apr 2020 01:01:02 +0000
Subject: [PATCH 01/30] json runtime

---
 CMakeLists.txt                                |   2 +
 cmake/modules/JSON.cmake                      |  22 ++
 cmake/modules/contrib/DNNL.cmake              |   3 +-
 python/tvm/relay/analysis/analysis.py         |   4 +
 .../contrib/codegen_json/codegen_json.cc      | 352 +++++++++++++++++
 src/relay/backend/graph_runtime_codegen.cc    |  65 +---
 src/relay/backend/utils.h                     |  17 +
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc |  65 ++++
 src/runtime/json/json_node.h                  | 368 ++++++++++++++++++
 src/runtime/json/json_runtime.h               | 170 ++++++++
 src/runtime/json/json_runtime_driver.cc       | 210 ++++++++++
 11 files changed, 1215 insertions(+), 63 deletions(-)
 create mode 100644 cmake/modules/JSON.cmake
 create mode 100644 src/relay/backend/contrib/codegen_json/codegen_json.cc
 create mode 100644 src/runtime/contrib/dnnl/dnnl_json_runtime.cc
 create mode 100644 src/runtime/json/json_node.h
 create mode 100644 src/runtime/json/json_runtime.h
 create mode 100644 src/runtime/json/json_runtime_driver.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aaddebdfe3c5..c0fa1cbf4230 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,7 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF)
 tvm_option(USE_TFLITE "Build with tflite support" OFF)
 tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none)
 tvm_option(USE_COREML "Build with coreml support" OFF)
+tvm_option(USE_JSON_RUNTIME "Build with JSON runtime" OFF)
 
 if(USE_CPP_RPC AND UNIX)
   message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.")
@@ -305,6 +306,7 @@ if(USE_EXAMPLE_EXT_RUNTIME)
 endif(USE_EXAMPLE_EXT_RUNTIME)
 
 # Module rules
+include(cmake/modules/JSON.cmake)
 include(cmake/modules/VTA.cmake)
 include(cmake/modules/CUDA.cmake)
 include(cmake/modules/Hexagon.cmake)
diff --git a/cmake/modules/JSON.cmake b/cmake/modules/JSON.cmake
new file mode 100644
index 000000000000..bfc07bb1b13a
--- /dev/null
+++ b/cmake/modules/JSON.cmake
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_JSON_RUNTIME)
+  message(STATUS "Build with JSON runtime support")
+  file(GLOB RUNTIME_JSON_SRCS src/runtime/json/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_JSON_SRCS})
+endif(USE_JSON_RUNTIME)
diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake
index 3fd3f7cbc887..0b56bc9a9241 100644
--- a/cmake/modules/contrib/DNNL.cmake
+++ b/cmake/modules/contrib/DNNL.cmake
@@ -16,7 +16,8 @@
 # under the License.
 
 if(USE_DNNL_CODEGEN STREQUAL "ON")
-  file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/codegen.cc)
+  file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
+  file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.cc)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
diff --git a/python/tvm/relay/analysis/analysis.py b/python/tvm/relay/analysis/analysis.py
index c237859eb987..adbc2e6222b1 100644
--- a/python/tvm/relay/analysis/analysis.py
+++ b/python/tvm/relay/analysis/analysis.py
@@ -313,6 +313,10 @@ def detect_feature(a, b=None):
     return {Feature(int(x)) for x in _ffi_api.detect_feature(a, b)}
 
 
+def to_json(expr):
+    return _ffi_api.ToJSON(expr)
+
+
 def extract_fused_functions(mod):
     """Pass to extract IRModule of only fused primitive functions.
 
diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc
new file mode 100644
index 000000000000..9fc880ad3521
--- /dev/null
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc
@@ -0,0 +1,352 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/contrib/codegen_json.cc
+ * \brief Utilities for json codegen and runtime
+ */
+
+#include <dmlc/any.h>
+#include <dmlc/json.h>
+#include <tvm/tir/op.h>
+#include <tvm/node/container.h>
+#include <tvm/node/reflection.h>
+#include <tvm/runtime/container.h>
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "../../../../runtime/json/json_node.h"
+#include "../../../../runtime/json/json_runtime.h"
+#include "../../utils.h"
+
+namespace tvm {
+namespace relay {
+namespace backend {
+namespace contrib {
+
+using namespace tvm::runtime::json;
+
+using ShapeVector = std::vector<std::vector<int64_t> >;
+using TypeVector = std::vector<std::string>;
+using JSONGraphObjectPtr = std::shared_ptr<JSONGraphNode>;
+
+/*! \brief The artifacts that needs to be serialized. */
+struct JSONOutput {
+  std::string graph_json;
+  std::unordered_map<std::string, tvm::runtime::NDArray> params;
+};
+
+/*!
+ * \brief Helper class to extract all attributes of a certain op and save them
+ * into text format.
+ */
+class OpAttrExtractor : public AttrVisitor {
+ public:
+  explicit OpAttrExtractor(JSONGraphObjectPtr node) : node_(node) {}
+
+  template <typename T = double,
+            typename = std::enable_if_t<std::is_floating_point<T>::value>>
+  std::string Fp2String(const T value, int n = 16) {
+    std::ostringstream out;
+    out.precision(n);
+    out << value;
+    return out.str();
+  }
+
+  void SetNodeAttr(const char* key, const std::vector<std::string>& value) {
+    std::vector<dmlc::any> attr;
+    attr.emplace_back(value);
+    node_->SetAttr(key, attr);
+  }
+
+  void Visit(const char* key, double* value) final {
+    SetNodeAttr(key, {Fp2String(*value)});
+  }
+
+  void Visit(const char* key, int64_t* value) final {
+    SetNodeAttr(key, {std::to_string(*value)});
+  }
+
+  void Visit(const char* key, uint64_t* value) final {
+    SetNodeAttr(key, {std::to_string(*value)});
+  }
+
+  void Visit(const char* key, int* value) final {
+    SetNodeAttr(key, {std::to_string(*value)});
+  }
+
+  void Visit(const char* key, bool* value) final {
+    SetNodeAttr(key, {std::to_string(*value)});
+  }
+
+  void Visit(const char* key, std::string* value) final {
+    SetNodeAttr(key, {*value});
+  }
+
+  void Visit(const char* key, DataType* value) final {
+    if (!value->is_void()) {
+      SetNodeAttr(key, {runtime::DLDataType2String(*value)});
+    } else {
+      SetNodeAttr(key, {""});
+    }
+  }
+
+  void Visit(const char* key, runtime::ObjectRef* value) final {
+    if (const auto* an = (*value).as<ArrayNode>()) {
+      std::vector<std::string> attr;
+      for (size_t i = 0; i < an->data.size(); ++i) {
+        if (const auto* im = an->data[i].as<IntImmNode>()) {
+          attr.push_back(std::to_string(im->value));
+        } else if (const auto* fm = an->data[i].as<FloatImmNode>()) {
+          attr.push_back(Fp2String(fm->value));
+        } else if (const auto* str = an->data[i].as<StringObj>()) {
+          String s = GetRef<String>(str);
+          attr.push_back(s.operator std::string());
+        } else {
+          LOG(FATAL) << "Not supported type: " << an->data[i]->GetTypeKey();
+        }
+      }
+      SetNodeAttr(key, attr);
+    } else if (!(*value).defined()) {  // Skip NullValue
+      SetNodeAttr(key, std::vector<std::string>{""});
+    } else {
+      LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey();
+    }
+  }
+
+  void Visit(const char* key, runtime::NDArray* value) final {
+    LOG(FATAL) << "NDArray is not allowed in op attribute";
+  }
+
+  void Visit(const char* key, void** value) final {
+    LOG(FATAL) << "void pointer is not allowed in op attribute";
+  }
+
+  void Extract(Object* node) {
+    if (node) {
+      reflection_->VisitAttrs(node, this);
+    }
+  }
+
+ private:
+  JSONGraphObjectPtr node_;
+  ReflectionVTable* reflection_ = ReflectionVTable::Global();
+};
+
+/*! \brief Serialize a Relay expression to JSON. */
+class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEntry>> {
+ public:
+  void Serialize(const Expr& expr) {
+     relay::Function func = Downcast<relay::Function>(expr);
+    // First we convert all the parameters into input nodes.
+    for (const auto& param : func->params) {
+      auto node_ptr = std::make_shared<JSONGraphNode>(param->name_hint(), "input" /* op_type_ */);
+      memo_[param] = AddNode(node_ptr, param);
+    }
+    heads_ = VisitExpr(func->body);
+  }
+
+  /*!
+   * \brief Save to JSON graph
+   *
+   * \param writer A json writer
+   */
+  void Save(dmlc::JSONWriter* writer) {
+    std::vector<size_t> arg_nodes;
+    for (size_t i = 0; i < nodes_.size(); ++i) {
+      auto node = nodes_[i];
+      if (node->IsLeaf()) {
+        arg_nodes.push_back(i);
+      }
+    }
+    size_t num_entry = 0;
+    std::vector<size_t> node_row_ptr{0};
+    for (auto node : nodes_) {
+      num_entry += node->GetNumOutput();
+      node_row_ptr.push_back(num_entry);
+    }
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("nodes", nodes_);
+    writer->WriteObjectKeyValue("arg_nodes", arg_nodes);
+    writer->WriteObjectKeyValue("heads", heads_);
+    writer->WriteObjectKeyValue("node_row_ptr", node_row_ptr);
+    writer->EndObject();
+  }
+
+  std::unordered_map<std::string, runtime::NDArray> GetParams() const {
+    return params_;
+  }
+
+ protected:
+  /*!
+   * \brief Add a node to graph.
+   *
+   * \param node A graph node. It is a shared pointer. Some attributes of it
+   *        will be added, i.e. shape and type. These attributes are attached to
+   *        the JSON graph in the end.
+   * \param expr The relay expression.
+   * \return A list of graph entry nodes. It the relay expr is a tuple type, we
+   *         will flatten it.
+   */
+  std::vector<JSONGraphNodeEntry> AddNode(JSONGraphObjectPtr node, const Expr& expr) {
+    auto checked_type = expr->checked_type();
+    auto node_id = nodes_.size();
+    nodes_.push_back(node);
+    std::vector<JSONGraphNodeEntry> ret;
+    ShapeVector shape;
+    TypeVector dtype;
+    // Flatten tuple node.
+    if (const auto* tuple_type = checked_type.as<TupleTypeNode>()) {
+      for (size_t i = 0; i < tuple_type->fields.size(); ++i) {
+        const auto* tensor_type = tuple_type->fields[i].as<TensorTypeNode>();
+        CHECK(tensor_type) << "Expect TensorType, but received: ."
+                           << tuple_type->fields[i]->GetTypeKey();
+        ret.push_back(JSONGraphNodeEntry(node_id, i));
+        shape.emplace_back(GetIntShape(tensor_type->shape));
+        dtype.emplace_back(DType2String(tensor_type->dtype));
+      }
+      node->SetNumOutput(tuple_type->fields.size());
+    } else {
+      const auto* tensor_type = checked_type.as<TensorTypeNode>();
+      CHECK(tensor_type) << "Expect TensorType, but received: ." << checked_type->GetTypeKey();
+      shape.emplace_back(GetIntShape(tensor_type->shape));
+      dtype.emplace_back(DType2String(tensor_type->dtype));
+      ret.push_back(JSONGraphNodeEntry(node_id, 0));
+    }
+    std::vector<dmlc::any> shape_attrs;
+    shape_attrs.emplace_back(shape);
+    node->SetAttr("shape", shape_attrs);
+
+    std::vector<dmlc::any> type_attrs;
+    type_attrs.emplace_back(dtype);
+    node->SetAttr("dtype", type_attrs);
+    return ret;
+  }
+
+  void SetCallNodeAttribute(JSONGraphObjectPtr node, const CallNode* cn) {
+    OpAttrExtractor extractor(node);
+    extractor.Extract(const_cast<Object*>(cn->attrs.get()));
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExprDefault_(const Object* op) final {
+    LOG(FATAL) << "JSON runtime currently doesn't support " << op->GetTypeKey();
+    return {};
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const VarNode* vn) final {
+    CHECK(memo_.count(GetRef<Expr>(vn)));
+    return memo_[GetRef<Expr>(vn)];
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) final {
+    std::string name = "const_" + std::to_string(params_.size());
+    params_[name] = cn->data;
+    auto node = std::make_shared<JSONGraphNode>(name, "const" /* op_type_ */);
+    return AddNode(node, GetRef<Expr>(cn));
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const TupleNode* tn) final {
+    std::vector<JSONGraphNodeEntry> fields;
+    for (const auto& field : tn->fields) {
+      auto ref = VisitExpr(field);
+      fields.insert(fields.end(), ref.begin(), ref.end());
+    }
+    return fields;
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) final {
+    Expr expr = GetRef<Expr>(cn);
+    std::string name;
+    if (const auto* op_node = cn->op.as<OpNode>()) {
+      name = op_node->name;
+    } else if (const auto* fn = cn->op.as<FunctionNode>()) {
+      auto comp = fn->GetAttr<String>(attr::kComposite);
+      CHECK(comp.defined()) << "JSON runtime only supports composite functions.";
+      name = comp.value().operator std::string();
+      // TODO(zhiics) Handle composite function here.
+      LOG(FATAL) << "Composite function is not handled yet.";
+    } else {
+      LOG(FATAL) << "JSON runtime does not support calls to " << cn->op->GetTypeKey();
+    }
+
+    std::vector<JSONGraphNodeEntry> inputs;
+    for (const auto& arg : cn->args) {
+      auto res = VisitExpr(arg);
+      inputs.insert(inputs.end(), res.begin(), res.end());
+    }
+    auto node = std::make_shared<JSONGraphNode>(name, /* name_ */
+                                                "kernel", /* op_type_ */
+                                                inputs,
+                                                1 /* num_outputs_ */);
+    SetCallNodeAttribute(node, cn);
+    return AddNode(node, GetRef<Expr>(cn));
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const LetNode* ln) final {
+    CHECK_EQ(memo_.count(ln->var), 0);
+    memo_[ln->var] = VisitExpr(ln->value);
+    return VisitExpr(ln->body);
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const TupleGetItemNode* gtn) final {
+    auto vtuple = VisitExpr(gtn->tuple);
+    return {vtuple[gtn->index]};
+  }
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const FunctionNode* fn) final {
+    CHECK(fn->GetAttr<String>(attr::kComposite).defined())
+        << "JSON runtime only supports composite functions";
+    // FunctionNode should be handled by the caller.
+    return {};
+  }
+
+ private:
+  /*! \brief JSON graph nodes. */
+  std::vector<JSONGraphObjectPtr> nodes_;
+  /*! \brief Output of the JSON graph. */
+  std::vector<JSONGraphNodeEntry> heads_;
+  /*! \brief Constants. */
+  std::unordered_map<std::string, runtime::NDArray> params_;
+};
+
+}  // namespace contrib
+}  // namespace backend
+
+std::string ToJSON(const Expr& expr) {
+  backend::contrib::JSONSerializer converter;
+  converter.Serialize(expr);
+
+  std::ostringstream os;
+  dmlc::JSONWriter writer(&os);
+  converter.Save(&writer);
+  backend::contrib::JSONOutput ret;
+  ret.graph_json = os.str();
+  ret.params = converter.GetParams();
+
+  backend::contrib::JSONRuntimeBase jr(ret.graph_json);
+  return ret.graph_json;
+}
+
+TVM_REGISTER_GLOBAL("relay.analysis.ToJSON")
+.set_body_typed(ToJSON);
+
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc
index 19e67703946f..16f95a1b79df 100644
--- a/src/relay/backend/graph_runtime_codegen.cc
+++ b/src/relay/backend/graph_runtime_codegen.cc
@@ -28,12 +28,15 @@
 #include <tvm/relay/expr_functor.h>
 #include <tvm/runtime/device_api.h>
 
+#include <cstdint>
+#include <cstdio>
 #include <list>
 #include <string>
 #include <vector>
 
 #include "compile_engine.h"
 #include "utils.h"
+#include "../../runtime/json/json_node.h"
 
 namespace tvm {
 namespace relay {
@@ -625,12 +628,6 @@ TVM_REGISTER_GLOBAL("relay.build_module._GraphRuntimeCodegen")
 
 namespace dmlc {
 namespace json {
-// JSON utils
-template <typename T>
-inline bool SameType(const dmlc::any& data) {
-  return std::type_index(data.type()) == std::type_index(typeid(T));
-}
-
 template <>
 struct Handler<std::shared_ptr<tvm::relay::backend::GraphNode>> {
   inline static void Write(dmlc::JSONWriter* writer,
@@ -642,61 +639,5 @@ struct Handler<std::shared_ptr<tvm::relay::backend::GraphNode>> {
     LOG(FATAL) << "Not implemented.";
   }
 };
-
-template <>
-struct Handler<std::unordered_map<std::string, dmlc::any>> {
-  inline static void Write(dmlc::JSONWriter* writer,
-                           const std::unordered_map<std::string, dmlc::any>& data) {
-    writer->BeginObject();
-    for (const auto& kv : data) {
-      auto k = kv.first;
-      const dmlc::any& v = kv.second;
-      if (SameType<std::string>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::string>(v));
-      } else if (SameType<int>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<int>(v));
-      } else if (SameType<std::vector<size_t>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<size_t>>(v));
-      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::vector<int64_t>>>(v));
-      } else if (SameType<std::vector<std::string>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::string>>(v));
-      } else {
-        LOG(FATAL) << "Not supported";
-      }
-    }
-    writer->EndObject();
-  }
-  inline static void Read(dmlc::JSONReader* reader,
-                          std::unordered_map<std::string, dmlc::any>* data) {
-    LOG(FATAL) << "Not implemented.";
-  }
-};
-
-template <>
-struct Handler<std::vector<dmlc::any>> {
-  inline static void Write(dmlc::JSONWriter* writer, const std::vector<dmlc::any>& data) {
-    writer->BeginArray();
-    for (const auto& v : data) {
-      if (SameType<std::string>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::string>(v));
-      } else if (SameType<int>(v)) {
-        writer->WriteArrayItem(dmlc::get<int>(v));
-      } else if (SameType<std::vector<size_t>>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::vector<size_t>>(v));
-      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::vector<std::vector<int64_t>>>(v));
-      } else if (SameType<std::vector<std::string>>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::vector<std::string>>(v));
-      } else {
-        LOG(FATAL) << "Not supported";
-      }
-    }
-    writer->EndArray();
-  }
-  inline static void Read(dmlc::JSONReader* reader, std::vector<dmlc::any>* data) {
-    LOG(FATAL) << "Not implemented.";
-  }
-};
 }  // namespace json
 }  // namespace dmlc
diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h
index cac6f55329c8..1fe14b8fedf8 100644
--- a/src/relay/backend/utils.h
+++ b/src/relay/backend/utils.h
@@ -118,6 +118,23 @@ inline const runtime::TypedPackedFunc<R(Args...)> GetTypedPackedFunc(const std::
   CHECK(pf != nullptr) << "can not find packed function";
   return runtime::TypedPackedFunc<R(Args...)>(*pf);
 }
+
+/*!
+ * \brief Extract shape from an IndexExpr array to std::vector<int64_t>
+ *
+ * \param shape The shape in Array
+ * \return The converted shape in std::vector<int64_t>
+ */
+inline std::vector<int64_t> GetIntShape(const Array<IndexExpr>& shape) {
+  std::vector<int64_t> ret;
+  for (const auto& dim : shape) {
+    const int64_t* pval = tir::as_const_int(dim);
+    CHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey();
+    ret.push_back(*pval);
+  }
+  return ret;
+}
+
 /*!
  * \brief Convert type to string
  *
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
new file mode 100644
index 000000000000..f9e6affc0196
--- /dev/null
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+ * \brief A simple JSON runtime for DNNL.
+ */
+
+#include <tvm/runtime/registry.h>
+
+#include <string>
+#include <vector>
+
+#include "../../json/json_node.h"
+#include "../../json/json_runtime.h"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using namespace tvm::runtime;
+using namespace tvm::runtime::json;
+
+class DNNLJSONRuntime : public JSONRuntimeBase {
+ public:
+  explicit DNNLJSONRuntime(const std::string& graph_json) : JSONRuntimeBase(graph_json) {}
+  ~DNNLJSONRuntime() = default;
+
+  void Run() override {
+    // Invoke the engine and return the result
+  }
+
+  void Init() override {
+    // Create a engine here
+  }
+
+ private:
+  // Engine
+};
+
+TVM_REGISTER_GLOBAL("runtime.ext.dnnl")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+  auto n = tvm::runtime::make_object<DNNLJSONRuntime>(args[0].operator std::string());
+  *rv = Module(n);
+});
+
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/json/json_node.h b/src/runtime/json/json_node.h
new file mode 100644
index 000000000000..3516cd220fbf
--- /dev/null
+++ b/src/runtime/json/json_node.h
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/json/json_node.h
+ * \brief The graph nodes used by JSON runtime.
+ */
+
+#ifndef TVM_RUNTIME_JSON_JSON_NODE_H_
+#define TVM_RUNTIME_JSON_JSON_NODE_H_
+
+#include <dlpack/dlpack.h>
+#include <dmlc/json.h>
+#include <dmlc/memory_io.h>
+#include <tvm/runtime/container.h>
+
+#include <cstdint>
+#include <cstdio>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <string>
+
+namespace tvm {
+namespace runtime {
+namespace json {
+
+using namespace tvm::runtime;
+using JSONGraphAttrs = std::unordered_map<std::string, dmlc::any>;
+
+/*!
+ * \brief The node entry in the serialized json graph.
+ */
+class JSONGraphNodeEntry {
+ public:
+  // Constructors.
+  JSONGraphNodeEntry() = default;
+  JSONGraphNodeEntry(int id, int index, int version = 0)
+    : id_(id), index_(index), version_(version) {}
+
+  /*!
+   * \brief Serialize a node entry.
+   * \param writer The json writer.
+   */
+  void Save(dmlc::JSONWriter* writer) const {
+    writer->BeginArray();
+    writer->WriteArrayItem(id_);
+    writer->WriteArrayItem(index_);
+    writer->WriteArrayItem(version_);
+    writer->EndArray();
+  }
+
+  /*!
+   * \brief Deserialize the json string into a node entry.
+   * \param reader The json reader.
+   */
+  void Load(dmlc::JSONReader* reader) {
+    reader->BeginArray();
+    CHECK(reader->NextArrayItem()) << "invalid json format";
+    reader->Read(&id_);
+    CHECK(reader->NextArrayItem()) << "invalid json format";
+    reader->Read(&index_);
+    if (reader->NextArrayItem()) {
+      reader->Read(&version_);
+      CHECK(!reader->NextArrayItem()) << "invalid json format";
+    } else {
+      version_ = 0;
+    }
+  }
+
+  uint32_t id_;
+  uint32_t index_;
+  uint32_t version_;
+};
+
+/*!
+ * \brief The node of the serialized json graph. It includes an array of
+ * entries.
+ */
+class JSONGraphNode {
+ public:
+  // Constructors.
+  JSONGraphNode() = default;
+  JSONGraphNode(const std::string& name,
+                const std::string& op_type,
+                const std::vector<JSONGraphNodeEntry>& inputs = {},
+                size_t num_outputs = 1) {
+    name_ = name;
+    op_type_ = op_type;
+    num_inputs_ = inputs.size();
+    inputs_ = inputs;
+    num_outputs_ = num_outputs;
+  }
+
+  /*!
+   * \brief Serialize a node so that it can be saved to disk.
+   * \param writer The json writer.
+   */
+  void Save(dmlc::JSONWriter* writer) {
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("op", op_type_);
+    writer->WriteObjectKeyValue("name", name_);
+    if (!inputs_.empty()) {
+      SetAttr("num_inputs", std::to_string(inputs_.size()));
+      SetAttr("num_outputs", std::to_string(num_outputs_));
+      writer->WriteObjectKeyValue("inputs", this->inputs_);
+    }
+    if (!attrs_.empty()) {
+      writer->WriteObjectKeyValue("attrs", attrs_);
+    }
+    writer->EndObject();
+  }
+
+  /*!
+   * \brief Load the attribute of a node in the json string.
+   * \param reader The json reader.
+   */
+  void LoadAttrs(dmlc::JSONReader* reader) {
+    std::string key, value;
+    reader->BeginObject();
+    while (reader->NextObjectItem(&key)) {
+      if (key == "num_inputs") {
+        reader->Read(&value);
+        num_inputs_ = strtoul(value.c_str(), nullptr, 10);
+      } else if (key == "num_outputs") {
+        reader->Read(&value);
+        num_outputs_ = strtoul(value.c_str(), nullptr, 10);
+      } else if (key == "dtype") {
+        std::vector<std::string> tmp;
+        reader->BeginArray();
+        CHECK(reader->NextArrayItem());
+        reader->Read(&tmp);
+        CHECK(!reader->NextArrayItem());
+        for (const auto& it : tmp) {
+          dtype_.push_back(tvm::runtime::String2DLDataType(it));
+        }
+      } else if (key == "shape") {
+        reader->BeginArray();
+        CHECK(reader->NextArrayItem());
+        reader->Read(&shape_);
+        CHECK(!reader->NextArrayItem());
+      } else {
+        reader->BeginArray();
+        CHECK(reader->NextArrayItem());
+        std::vector<std::string> tmp;
+        reader->Read(&tmp);
+        attrs_[key] = tmp;
+        CHECK(!reader->NextArrayItem());
+      }
+    }
+    CHECK_EQ(shape_.size(), dtype_.size());
+  }
+
+  /*!
+   * \brief Load a node in the json string.
+   * \param reader The json reader.
+   */
+  void Load(dmlc::JSONReader* reader) {
+    reader->BeginObject();
+    std::string key;
+    while (reader->NextObjectItem(&key)) {
+      if (key == "op") {
+        reader->Read(&op_type_);
+      } else if (key == "name") {
+        reader->Read(&name_);
+      } else if (key == "inputs") {
+        reader->Read(&inputs_);
+      } else if (key == "attr" || key == "attrs") {
+        this->LoadAttrs(reader);
+      } else {
+        LOG(FATAL) << "Unknown key: " << key;
+      }
+    }
+  }
+
+  /*!
+   * \brief Check if a node is a leaf node, i.e. input to the graph.
+   *
+   * \return True if the node has no input, otherwise, false.
+   */
+  bool IsLeaf() const { return inputs_.empty(); }
+
+  /*!
+   * \brief Return the number of outputs of the node.
+   *
+   * \return The number of the output.
+   */
+  uint32_t GetNumOutput() const { return num_outputs_; }
+
+  /*!
+   * \brief Set the number of outputs of the node.
+   *
+   * \param num_outputs The number of output.
+   */
+  void SetNumOutput(uint32_t num_outputs) {
+    num_outputs_ = num_outputs;
+  }
+
+  /*!
+   * \brief Get the value of an attribute in the node.
+   *
+   * \tparam T The return type.
+   * \param key The key for lookup.
+   *
+   * \return The value.
+   */
+  template <typename T>
+  T GetAttr(const std::string& key) const {
+    CHECK_GT(attrs_.count(key), 0U) << "Key: " << key << "is not found";
+    return dmlc::get<T>(attrs_.at(key));
+  }
+
+  /*!
+   * \brief Set an attribute for the node.
+   *
+   * \tparam ValueT The type of the value being stored.
+   * \param key The key of the attribute.
+   * \param value The value of the attribute.
+   */
+  template <typename ValueT>
+  void SetAttr(const std::string& key, const ValueT& value) {
+    attrs_[key] = value;
+  }
+
+  virtual ~JSONGraphNode() {}
+
+ private:
+  /*! \brief The number of input. */
+  uint32_t num_inputs_{0};
+  /*! \brief The number of output. */
+  uint32_t num_outputs_{1};
+  /*! \brief The name of the op. It is the symbol that used for runtime lookup. */
+  std::string name_;
+  /*! \brief The operator type, i.e. input is "null". */
+  std::string op_type_;
+  /*! \brief The shape of the node. */
+  std::vector<std::vector<int64_t>> shape_;
+  /*! \brief The type of the node. */
+  std::vector<DLDataType> dtype_;
+  /*! \brief The inputs of the node. */
+  std::vector<JSONGraphNodeEntry> inputs_;
+  /*!
+   * \brief Attribute of the node. For simplicity, we store all attribute as
+   * a list of std::string. It's the developer's resposibility to check the
+   * required attribute of a certain op and convert it into the needed type.
+   *
+   * For example, for conv2d, this map could contain:
+   *  attrs_["strides"] = ["1", "1"]
+   *  attrs_["padding"] = ["0", "0", "0", "0"]
+   *  attrs_["data_layout"] = ["NCHW"]
+   *
+   * when creating an execution engine, developers may need to use these
+   * attributes and they can convert it into the needed type, i.e. padding to
+   * int
+   */
+  JSONGraphAttrs attrs_;
+
+  friend class JSONRuntimeBase;
+};
+
+}  // namespace json
+}  // namespace runtime
+}  // namespace tvm
+
+namespace dmlc {
+namespace json {
+// JSON utils
+template <typename T>
+inline bool SameType(const dmlc::any& data) {
+  return std::type_index(data.type()) == std::type_index(typeid(T));
+}
+
+template <>
+struct Handler<std::shared_ptr<tvm::runtime::json::JSONGraphNode>> {
+  inline static void Write(
+      dmlc::JSONWriter* writer,
+      const std::shared_ptr<tvm::runtime::json::JSONGraphNode>& data) {
+    data->Save(writer);
+  }
+
+  inline static void Read(dmlc::JSONReader* reader,
+                          std::shared_ptr<tvm::runtime::json::JSONGraphNode>* data) {
+    (*data)->Load(reader);
+  }
+};
+
+template <>
+struct Handler<std::vector<dmlc::any>> {
+  inline static void Write(dmlc::JSONWriter* writer,
+                           const std::vector<dmlc::any>& data) {
+    writer->BeginArray();
+    for (const auto& v : data) {
+      if (SameType<std::string>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::string>(v));
+      } else if (SameType<int>(v)) {
+        writer->WriteArrayItem(dmlc::get<int>(v));
+      } else if (SameType<std::vector<size_t>>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::vector<size_t>>(v));
+      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::vector<std::vector<int64_t>>>(v));
+      } else if (SameType<std::vector<std::string>>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::vector<std::string>>(v));
+      } else {
+        LOG(FATAL) << "Not supported";
+      }
+    }
+    writer->EndArray();
+  }
+
+  inline static void Read(dmlc::JSONReader* reader,
+                          std::vector<dmlc::any>* data) {
+    LOG(FATAL) << "Not implemented.";
+  }
+};
+
+template <>
+struct Handler<std::unordered_map<std::string, dmlc::any>> {
+  inline static void Write(dmlc::JSONWriter* writer,
+                           const std::unordered_map<std::string, dmlc::any>& data) {
+    writer->BeginObject();
+    for (const auto& kv : data) {
+      auto k = kv.first;
+      const dmlc::any& v = kv.second;
+      if (SameType<std::string>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::string>(v));
+      } else if (SameType<int>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<int>(v));
+      } else if (SameType<std::vector<size_t>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<size_t>>(v));
+      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::vector<int64_t>>>(v));
+      } else if (SameType<std::vector<std::string>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::string>>(v));
+      } else if (SameType<std::vector<dmlc::any>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<dmlc::any>>(v));
+      } else {
+        LOG(FATAL) << "Not supported";
+      }
+    }
+    writer->EndObject();
+  }
+
+  inline static void Read(dmlc::JSONReader* reader,
+                          std::unordered_map<std::string, dmlc::any>* data) {
+    LOG(FATAL) << "Not implemented.";
+  }
+};
+
+}  // namespace json
+}  // namespace dmlc
+
+#endif  // TVM_RUNTIME_JSON_JSON_NODE_H_
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
new file mode 100644
index 000000000000..6765fa49e0e1
--- /dev/null
+++ b/src/runtime/json/json_runtime.h
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/json/json_runtime.h
+ * \brief Utilities for json runtime.
+ */
+
+#ifndef TVM_RUNTIME_JSON_JSON_RUNTIME_H_
+#define TVM_RUNTIME_JSON_JSON_RUNTIME_H_
+
+#include <tvm/runtime/container.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/ndarray.h>
+
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <string>
+
+#include "json_node.h"
+
+namespace tvm {
+namespace runtime {
+namespace json {
+
+/*!
+ * \brief A json runtime that executes the serialized JSON format. This runtime
+ * can be extended by user defined runtime for execution.
+ */
+class JSONRuntimeBase : public ModuleNode {
+ public:
+  explicit JSONRuntimeBase(const std::string& graph_json) {
+    LoadGraph(graph_json);
+  }
+
+  // The type key of each subclass can be saved to the json file and them
+  // used to create the specific runtime during deserialization.
+  // virtual const char* type_key() const = 0;
+  const char* type_key() const { return ""; }
+
+  virtual void Init() { LOG(FATAL) << "NYI"; }
+
+  /*!
+   * \brief Get a packed function.
+   * \param name The name/symbol of the function.
+   * \param sptr_to_self The pointer to the module node.
+   * \return The packed function.
+   */
+  virtual PackedFunc GetFunction(const std::string& name,
+                                 const ObjectPtr<Object>& sptr_to_self) {
+    return PackedFunc();
+  }
+
+  // Run(TVMValue*,value, int* type_code, int nargs), or
+  // Run(TVMArgs arg, TVMRetValue rv) ?
+  virtual void Run() { LOG(FATAL) << "NYI"; }
+
+  void SetInput(const std::string& name, const NDArray& data) {
+    auto it = input_map_.find(name);
+    CHECK(it != input_map_.end()) << "Not found input: " << name;
+    SetInput(it->second, data);
+  }
+
+  void SetInput(uint32_t index, const NDArray& data) {
+    CHECK_LT(static_cast<size_t>(index), input_nodes_.size());
+    uint32_t eid = EntryID(input_nodes_[index], 0);
+    data_entry_[eid] = data;
+  }
+
+  size_t NumOutputs() const { return outputs_.size(); }
+
+  ObjectRef GetOutput() {
+    // Return the NDArray directly if there is only one outpput.
+    if (NumOutputs() == 1) {
+      uint32_t eid = EntryID(outputs_[0]);
+      return data_entry_[eid];
+    }
+
+    // We need to return an ADTObj if there are multiple outputs.
+    std::vector<ObjectRef> outs;
+    for (size_t i = 0; i < NumOutputs(); i++) {
+      uint32_t eid = EntryID(outputs_[i]);
+      outs.push_back(data_entry_[eid]);
+    }
+    return ADT::Tuple(outs);
+  }
+
+ protected:
+  void LoadGraph(const std::string& graph_json) {
+    std::istringstream is(graph_json);
+    dmlc::JSONReader reader(&is);
+    this->Load(&reader);
+
+    for (size_t i = 0; i < input_nodes_.size(); i++) {
+      uint32_t nid = input_nodes_[i];
+      std::string& name = nodes_[nid].name_;
+      input_map_[name] = i;
+    }
+  }
+
+  void Load(dmlc::JSONReader* reader) {
+    reader->BeginObject();
+    std::string key;
+    while (reader->NextObjectItem(&key)) {
+      if (key == "nodes") {
+        reader->Read(&nodes_);
+      } else if (key == "arg_nodes") {
+        reader->Read(&input_nodes_);
+      } else if (key == "node_row_ptr") {
+        reader->Read(&node_row_ptr_);
+      } else if (key == "heads") {
+        reader->Read(&outputs_);
+      } else {
+        LOG(FATAL) << "Unknow key: " << key;
+      }
+    }
+  }
+
+  // Get the node entry index.
+  uint32_t EntryID(uint32_t nid, uint32_t index) const {
+    return node_row_ptr_[nid] + index;
+  }
+
+  // Get the node entry index.
+  uint32_t EntryID(const JSONGraphNodeEntry& e) const {
+    return EntryID(e.id_, e.index_);
+  }
+
+  // Number of node entries.
+  uint32_t NumEntries() const {
+    return node_row_ptr_.back();
+  }
+
+ protected:
+  /*! \brief The json graph nodes. */
+  std::vector<JSONGraphNode> nodes_;
+  /*! \brief The input nodes, including variables and constants. */
+  std::vector<uint32_t> input_nodes_;
+  /*! \brief Used for quick entry indexing. */
+  std::vector<uint32_t> node_row_ptr_;
+  /*! \brief Output entries. */
+  std::vector<JSONGraphNodeEntry> outputs_;
+  /*! \brief Data of that entry. */
+  std::vector<NDArray> data_entry_;
+  /*! \brief Map the input name to index. */
+  std::unordered_map<std::string, uint32_t> input_map_;
+};
+
+}  // namespace json
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_JSON_JSON_RUNTIME_H_
diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc
new file mode 100644
index 000000000000..072a184d2f06
--- /dev/null
+++ b/src/runtime/json/json_runtime_driver.cc
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/json/json_runtime_driver.cc
+ * \brief The driver for json runtime.
+ */
+
+#include <dmlc/json.h>
+#include <dmlc/memory_io.h>
+#include <tvm/runtime/container.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include <unordered_map>
+#include <vector>
+#include <string>
+
+#include "json_node.h"
+#include "json_runtime.h"
+
+namespace tvm {
+namespace runtime {
+namespace json {
+
+/*!
+ * \brief The class represents a json runtime driver. It is mainly responsible
+ * for 1) serializing and deserializing the json runtime artifacts, 2)
+ * dispatching and invoking the actual runtime that intepretes the json
+ * artifacts.
+ */
+class JSONRuntimeDriver : public ModuleNode {
+ public:
+  struct Subgraph {
+    std::string symbol_name;
+    std::string graph_json;
+    std::unordered_map<std::string, NDArray> weights;
+  };
+
+  explicit JSONRuntimeDriver(const std::string& graph_json) {
+    this->graph_json_ = graph_json;
+    Deserialize();
+  }
+
+  const char* type_key() const { return "JSONRuntime"; }
+
+  /*!
+   * \brief Get a packed function.
+   * \param name The name/symbol of the function.
+   * \param sptr_to_self The pointer to the module node.
+   * \return The packed function.
+   */
+  PackedFunc GetFunction(const std::string& name,
+                         const ObjectPtr<Object>& sptr_to_self) {
+    if (this->subgraphs_.count(name)) {
+      return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
+        auto json_rt = this->subgraphs_[name];
+        auto* json_rt_node = static_cast<JSONRuntimeBase*>(json_rt.operator->());
+        CHECK(json_rt_node);
+        // Set input, how to make sure it is only invoked once? Likely we don't
+        // really need this as we could directly set input when creating the
+        // engine, but what if the input for each inference varies.
+        // json_rt_node->SetInput();
+        //
+        // Execute the egine
+        json_rt_node->Run();
+
+        // Get the output, set rv or fill directly to args?
+        *rv = json_rt_node->GetOutput();
+      });
+    } else {
+      // Issue a warning when we don't find the symbol from the module. Note
+      // we don't kill the execution here as the symbol may exist in other
+      // runtime modules.
+      LOG(WARNING) << "Cannot find " << name << " from json runtime";
+      return PackedFunc();
+    }
+  }
+
+  void Deserialize() {
+    std::vector<Subgraph> subgraphs;
+    dmlc::MemoryStringStream memstrm(&graph_json_);
+    dmlc::Stream* strm = &memstrm;
+    // Header
+    uint64_t header;
+    CHECK(strm->Read(&header)) << "Invalid serialized file format";
+
+    // Compiler name
+    std::string compiler_name;
+    CHECK(strm->Read(&compiler_name)) << "Invalid serialized file format";
+
+    uint64_t num_subgraphs;
+    CHECK(strm->Read(&num_subgraphs)) << "Invalid serialized file format";
+    // CHECK(header == kTVMJSONRuntimeMagic) << "Invalid serialized file format";
+
+    for (uint64_t i = 0; i < num_subgraphs; i++) {
+      Subgraph g;
+      // Load the symbol for runtime lookup.
+      std::string symbol_name;
+      CHECK(strm->Read(&symbol_name)) << "Invalid serialized file format";
+      g.symbol_name = symbol_name;
+
+      // Load the graph representation.
+      std::string json_graph;
+      CHECK(strm->Read(&json_graph)) << "Invalid serialized file format";
+      g.graph_json = json_graph;
+
+      // Load the weights for the graph.
+      uint64_t num_params;
+      CHECK(strm->Read(&num_params)) << "Invalid serialized file format";
+
+      std::vector<std::string> names;
+      CHECK(strm->Read(&names)) << "Invalid serialized file format";
+      CHECK_EQ(names.size(), num_params) << "Invalid serialized file format";
+
+      for (size_t i = 0; i < static_cast<size_t>(num_params); i++) {
+        NDArray tmp;
+        tmp.Load(strm);
+        g.weights[names[i]] = tmp;
+      }
+      subgraphs.push_back(g);
+    }
+    CreateSubgraphs(subgraphs, compiler_name);
+  }
+
+  // Create subgraphs for a specific runtime and cache it, therefore, we can
+  // invoke them without the need to repeatedly create them at runtime.
+  void CreateSubgraphs(const std::vector<Subgraph>& subgraphs,
+                       const std::string& compiler_name) {
+    // How do we know which runtime to create? Should we bake something in the
+    // json to indicate this? i.e. we can register a runtime "runtime.ext.dnnl"
+    // and save dnnl. Now we can just get it from the registry using dnnl. This
+    // requires us to have single place to invoke different external codegens
+    // and serialize them.
+    //
+    std::string ext_runtime_name = "runtime.ext." + compiler_name;
+    auto pf = tvm::runtime::Registry::Get(ext_runtime_name);
+    CHECK(pf) << "Failed to find the extern runtime for " << ext_runtime_name;
+    for (const auto& sg : subgraphs) {
+      CHECK_EQ(subgraphs_.count(sg.graph_json), 0U)
+        << "Found duplicated symbol: " << sg.graph_json;
+
+      Module ext_mod = (*pf)(sg.graph_json);
+      const auto* json_rt_node = ext_mod.as<JSONRuntimeBase>();
+      CHECK(json_rt_node);
+      // Set up the params that are constants.
+      for (const auto& it : sg.weights) {
+        CallPakcedFunc(ext_mod, "set_input", it.first, it.second);
+      }
+      // Init the engine
+      CallPakcedFunc(ext_mod, "init");
+
+      subgraphs_[sg.graph_json] = ext_mod;
+    }
+  }
+
+  static Module LoadFromBinary(void* strm) {
+    dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+    std::string graph;
+    stream->Read(&graph);
+    auto n = make_object<JSONRuntimeDriver>(graph);
+    return Module(n);
+  }
+
+  void SaveToBinary(dmlc::Stream* stream) override {
+    stream->Write(this->graph_json_);
+  }
+
+ private:
+  template <typename... Args>
+  void CallPakcedFunc(Module mod, const std::string& name, Args... args) {
+    auto pf = mod.GetFunction(name);
+    pf(std::forward<Args>(args)...);
+  }
+
+  /*! \brief The graph json. Weights are also baked in. */
+  std::string graph_json_;
+  /*!
+   * \brief Cache the created runtime module that can be directly invoked.
+   *
+   * The runtime could be a csource runtime or a any user defined runtime that
+   * is extend from the JSONRuntimeBase class.
+   */
+  std::unordered_map<std::string, Module> subgraphs_;
+};
+
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_jsonruntime")
+.set_body_typed(JSONRuntimeDriver::LoadFromBinary);
+
+}  // namespace json
+}  // namespace runtime
+}  // namespace tvm
+

From 96bcc0c8492f3725efc9c7bf302dd6c5fe00f13a Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Mon, 4 May 2020 06:21:46 +0000
Subject: [PATCH 02/30] json dnnl WIP

---
 .../contrib/codegen_json/codegen_json.cc      |  9 +++-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 47 ++++++++++++++++++-
 src/runtime/json/json_node.h                  | 35 ++++++++++++++
 src/runtime/json/json_runtime_driver.cc       |  2 +-
 4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc
index 9fc880ad3521..bed5206aac67 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.cc
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc
@@ -127,8 +127,15 @@ class OpAttrExtractor : public AttrVisitor {
       SetNodeAttr(key, attr);
     } else if (!(*value).defined()) {  // Skip NullValue
       SetNodeAttr(key, std::vector<std::string>{""});
+    } else if (const auto* im = (*value).as<IntImmNode>()) {
+      SetNodeAttr(key, std::vector<std::string>{std::to_string(im->value)});
+    } else if (const auto* fm = (*value).as<FloatImmNode>()) {
+      SetNodeAttr(key, std::vector<std::string>{Fp2String(fm->value)});
+    } else if (const auto* str = (*value).as<StringObj>()) {
+      String s = GetRef<String>(str);
+      SetNodeAttr(key, std::vector<std::string>{s.operator std::string()});
     } else {
-      LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey();
+      LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey() << ": " << *value;
     }
   }
 
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index f9e6affc0196..503ade7c08a7 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -24,9 +24,11 @@
 
 #include <tvm/runtime/registry.h>
 
+#include <cstddef>
 #include <string>
 #include <vector>
 
+#include "dnnl.hpp"
 #include "../../json/json_node.h"
 #include "../../json/json_runtime.h"
 
@@ -38,6 +40,9 @@ using namespace tvm::runtime;
 using namespace tvm::runtime::json;
 
 class DNNLJSONRuntime : public JSONRuntimeBase {
+  using tag = dnnl::memory::format_tag;
+  using dt = dnnl::memory::data_type;
+
  public:
   explicit DNNLJSONRuntime(const std::string& graph_json) : JSONRuntimeBase(graph_json) {}
   ~DNNLJSONRuntime() = default;
@@ -50,8 +55,48 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     // Create a engine here
   }
 
+  void BuildEngine() {
+    for (size_t nid = 0; nid < this->nodes_.size(); ++nid) {
+      const auto& node = nodes_[nid];
+      if (node.GetOpType() == "input") {
+        // Handle inputs
+      } else {
+        CHECK_EQ(node.GetOpType(), "kernel");
+        auto op_name = node.GetOpName();
+        // Handle kernel
+        for (const auto& e : node.GetInputs()) {
+          // uint32_t eid = this->EntryID(e);
+          // shape/type for the i-th input
+          // std::vector<int64_t> shape = node.GetShape()[e.index_];
+          // DLDataType dltype = node.GetDataType()[e.index_];
+        }
+      }
+    }
+  }
+
+  void Conv2d() {
+  }
+
+  void Dense() {
+  }
+
+  void BatchNorm() {
+  }
+
+  void Relu() {
+  }
+
+  // Macro for add, subtract, multiply...
+
  private:
-  // Engine
+  /* The dnnl engine. */
+  dnnl::engine engine_;
+  /* The dnnl stream. */
+  dnnl::stream stream_;
+  /* The network layers that are represented in dnnl primitives. */
+  std::vector<dnnl::primitive> net_;
+  /* The memory that is consumed by arguments. */
+  std::vector<std::unordered_map<int, dnnl::memory>> net_args_;
 };
 
 TVM_REGISTER_GLOBAL("runtime.ext.dnnl")
diff --git a/src/runtime/json/json_node.h b/src/runtime/json/json_node.h
index 3516cd220fbf..dec2d3b8228d 100644
--- a/src/runtime/json/json_node.h
+++ b/src/runtime/json/json_node.h
@@ -203,6 +203,41 @@ class JSONGraphNode {
    */
   uint32_t GetNumOutput() const { return num_outputs_; }
 
+  /*!
+   * \brief Return the input entries.
+   *
+   * \return The input entries.
+   */
+  std::vector<JSONGraphNodeEntry> GetInputs() const { return inputs_; }
+
+  /*!
+   * \brief Return the op type.
+   *
+   * \return The op type.
+   */
+  std::string GetOpType() const { return op_type_; }
+
+  /*!
+   * \brief Return the op name.
+   *
+   * \return The op name.
+   */
+  std::string GetOpName() const { return name_; }
+
+  /*!
+   * \brief Return the op output shapes.
+   *
+   * \return The shapes.
+   */
+  std::vector<std::vector<int64_t>> GetOpShape() const { return shape_; }
+
+  /*!
+   * \brief Return the op types.
+   *
+   * \return The types.
+   */
+  std::vector<DLDataType> GetOpDataType() const { return dtype_; }
+
   /*!
    * \brief Set the number of outputs of the node.
    *
diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc
index 072a184d2f06..3ed34bbac44c 100644
--- a/src/runtime/json/json_runtime_driver.cc
+++ b/src/runtime/json/json_runtime_driver.cc
@@ -59,7 +59,7 @@ class JSONRuntimeDriver : public ModuleNode {
     Deserialize();
   }
 
-  const char* type_key() const { return "JSONRuntime"; }
+  const char* type_key() const { return "jsonruntime"; }
 
   /*!
    * \brief Get a packed function.

From e273c1068be42c37130ddddb0083e3d2f55860c3 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Wed, 10 Jun 2020 18:58:55 +0000
Subject: [PATCH 03/30] fix ArrayNode usages

---
 src/relay/backend/contrib/codegen_json/codegen_json.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc
index bed5206aac67..acdd0d5d8a14 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.cc
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc
@@ -112,16 +112,16 @@ class OpAttrExtractor : public AttrVisitor {
   void Visit(const char* key, runtime::ObjectRef* value) final {
     if (const auto* an = (*value).as<ArrayNode>()) {
       std::vector<std::string> attr;
-      for (size_t i = 0; i < an->data.size(); ++i) {
-        if (const auto* im = an->data[i].as<IntImmNode>()) {
+      for (size_t i = 0; i < an->size(); ++i) {
+        if (const auto* im = (*an)[i].as<IntImmNode>()) {
           attr.push_back(std::to_string(im->value));
-        } else if (const auto* fm = an->data[i].as<FloatImmNode>()) {
+        } else if (const auto* fm = (*an)[i].as<FloatImmNode>()) {
           attr.push_back(Fp2String(fm->value));
-        } else if (const auto* str = an->data[i].as<StringObj>()) {
+        } else if (const auto* str = (*an)[i].as<StringObj>()) {
           String s = GetRef<String>(str);
           attr.push_back(s.operator std::string());
         } else {
-          LOG(FATAL) << "Not supported type: " << an->data[i]->GetTypeKey();
+          LOG(FATAL) << "Not supported type: " << (*an)[i]->GetTypeKey();
         }
       }
       SetNodeAttr(key, attr);

From cae8b59167d53d77513fd78ceaae7baf792f2ad2 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Wed, 10 Jun 2020 23:21:21 +0000
Subject: [PATCH 04/30] Support composite functions

---
 .../backend/contrib/codegen_json/codegen_json.cc | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc
index acdd0d5d8a14..bd42ed568b94 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.cc
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc
@@ -249,8 +249,18 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
   }
 
   void SetCallNodeAttribute(JSONGraphObjectPtr node, const CallNode* cn) {
-    OpAttrExtractor extractor(node);
-    extractor.Extract(const_cast<Object*>(cn->attrs.get()));
+    if (cn->op.as<OpNode>()) {
+      OpAttrExtractor extractor(node);
+      extractor.Extract(const_cast<Object*>(cn->attrs.get()));
+    } else if (const auto* fn = cn->op.as<FunctionNode>()) {
+      auto pattern = fn->GetAttr<String>(attr::kPartitionedFromPattern);
+      CHECK(pattern.defined());
+      std::vector<std::string> values;
+      values.push_back(pattern.value().operator std::string());
+      std::vector<dmlc::any> attr;
+      attr.emplace_back(values);
+      node->SetAttr("PartitionedFromPattern", attr);
+    }
   }
 
   std::vector<JSONGraphNodeEntry> VisitExprDefault_(const Object* op) final {
@@ -288,8 +298,6 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
       auto comp = fn->GetAttr<String>(attr::kComposite);
       CHECK(comp.defined()) << "JSON runtime only supports composite functions.";
       name = comp.value().operator std::string();
-      // TODO(zhiics) Handle composite function here.
-      LOG(FATAL) << "Composite function is not handled yet.";
     } else {
       LOG(FATAL) << "JSON runtime does not support calls to " << cn->op->GetTypeKey();
     }

From 9594047d73d446b0f09d1c262476ad0b9e23a64c Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Fri, 12 Jun 2020 01:25:18 +0000
Subject: [PATCH 05/30] DNNL json runtime: conv2d/add/relu/dense/bn

---
 cmake/modules/contrib/DNNL.cmake              |   3 +-
 .../{codegen_json.cc => codegen_json.h}       |   5 +-
 src/relay/backend/contrib/dnnl/codegen.cc     |  38 +-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 443 +++++++++++++++++-
 src/runtime/json/json_node.h                  |   2 +
 src/runtime/json/json_runtime.h               |   2 +-
 src/runtime/json/json_runtime_driver.cc       |   8 +
 tests/python/relay/test_external_runtime.py   |   2 +-
 tests/python/relay/test_json_runtime.py       | 403 ++++++++++++++++
 9 files changed, 881 insertions(+), 25 deletions(-)
 rename src/relay/backend/contrib/codegen_json/{codegen_json.cc => codegen_json.h} (98%)
 create mode 100644 tests/python/relay/test_json_runtime.py

diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake
index 0b56bc9a9241..ac03fbcc8dd1 100644
--- a/cmake/modules/contrib/DNNL.cmake
+++ b/cmake/modules/contrib/DNNL.cmake
@@ -17,8 +17,9 @@
 
 if(USE_DNNL_CODEGEN STREQUAL "ON")
   file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
-  file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.cc)
+  file(GLOB JSON_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.h)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
+  list(APPEND COMPILER_SRCS ${JSON_RELAY_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
   list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.h
similarity index 98%
rename from src/relay/backend/contrib/codegen_json/codegen_json.cc
rename to src/relay/backend/contrib/codegen_json/codegen_json.h
index bd42ed568b94..093d076c43b7 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.cc
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -18,9 +18,11 @@
  */
 
 /*!
- * \file relay/backend/contrib/codegen_json.cc
+ * \file relay/backend/contrib/codegen_json.h
  * \brief Utilities for json codegen and runtime
  */
+#ifndef TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_
+#define TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_
 
 #include <dmlc/any.h>
 #include <dmlc/json.h>
@@ -365,3 +367,4 @@ TVM_REGISTER_GLOBAL("relay.analysis.ToJSON")
 
 }  // namespace relay
 }  // namespace tvm
+#endif  // TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 60138ae99b3e..be37899b7180 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -35,6 +35,7 @@
 
 #include "../../utils.h"
 #include "../codegen_c/codegen_c.h"
+#include "../codegen_json/codegen_json.h"
 
 namespace tvm {
 namespace relay {
@@ -417,13 +418,46 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
   std::ostringstream code_stream_;
 };
 
+/*!
+ * \brief Get the external symbol of the Relay function name.
+ *
+ * \param func The provided function.
+ *
+ * \return An external symbol.
+ */
+std::string GetExtSymbol(const Function& func) {
+  const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+  CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
+  return std::string(name_node.value());
+}
+
 /*!
  * \brief The external compiler/codegen tool. It takes a Relay expression/module and
  * compile it into a runtime module.
  */
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
-  DNNLModuleCodegen dnnl;
-  return dnnl.CreateCSourceModule(ref);
+  std::string func_name;
+  std::string graph_json;
+  if (ref->IsInstance<FunctionNode>()) {
+    auto func = Downcast<Function>(ref);
+    func_name = GetExtSymbol(func);
+    graph_json = ToJSON(func);
+  } else if (ref->IsInstance<IRModuleNode>()) {
+    IRModule mod = Downcast<IRModule>(ref);
+    CHECK_EQ(mod->functions.size(), 1U) << "Only support single subgraph";
+    for (const auto& it : mod->functions) {
+      auto func = Downcast<Function>(it.second);
+      func_name = GetExtSymbol(func);
+      graph_json = ToJSON(func);
+    }
+  } else {
+    LOG(FATAL) << "The input ref is expected to be a Relay function or module\n";
+  }
+
+  const auto* pf = runtime::Registry::Get("runtime.DNNLJSONRuntimeCreate");
+  CHECK(pf != nullptr) << "Cannot find JSON runtime driver module to create";
+  auto mod = (*pf)(func_name, graph_json);
+  return mod;
 }
 
 TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler);
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 503ade7c08a7..05d308245a45 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -44,67 +44,472 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   using dt = dnnl::memory::data_type;
 
  public:
-  explicit DNNLJSONRuntime(const std::string& graph_json) : JSONRuntimeBase(graph_json) {}
+  explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json)
+      : JSONRuntimeBase(graph_json), func_name_(func_name) {}
   ~DNNLJSONRuntime() = default;
 
+  const char* type_key() const { return "dnnljsonruntime"; }
+
+  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
+    if (!this->is_init_) {
+      Init();
+      BuildEngine();
+    }
+    this->is_init_ = true;
+
+    if (this->func_name_ == name) {
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        for (auto i = 0; i < args.size(); ++i) {
+          // Setup data entries.
+          CHECK(args[i].type_code() == kTVMNDArrayHandle ||
+                args[i].type_code() == kTVMDLTensorHandle)
+              << "Expect NDArray or DLTensor as inputs\n";
+          if (args[i].type_code() == kTVMDLTensorHandle) {
+            DLTensor* arg = args[i];
+            this->data_entry_[i].CopyFrom(arg);
+          } else {
+            NDArray arg = args[i];
+            this->data_entry_[i].CopyFrom(arg);
+          }
+        }
+
+        // Execute the subgraph.
+        this->Run();
+
+        // Get result.
+        auto offset = this->input_nodes_.size();
+        for (size_t i = 0; i < this->outputs_.size(); ++i) {
+          size_t idx = i + offset;
+          if (args[idx].type_code() == kTVMDLTensorHandle) {
+            DLTensor* arg = args[idx];
+            this->data_entry_[idx].CopyTo(arg);
+          } else {
+            NDArray arg = args[idx];
+            this->data_entry_[idx].CopyTo(arg);
+          }
+        }
+
+        // FIXME: Multiple outputs.
+        //*rv = data_entry_.back();
+      });
+    } else {
+      LOG(WARNING) << "Unknown DNNL symbol " << name;
+      return PackedFunc();
+    }
+  }
+
   void Run() override {
-    // Invoke the engine and return the result
+    // Fill in the input buffers.
+    for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
+      auto nid = this->input_nodes_[i];
+      // TODO: Support other data lengths.
+      size_t offset_in_bytes = this->node_out_mem_[nid][0].second * 4;
+      write_to_dnnl_memory(this->data_entry_[i]->data, this->node_out_mem_[nid][0].first,
+                           GetNDArraySize(this->data_entry_[i]), offset_in_bytes);
+    }
+
+    // Invoke the engine.
+    for (size_t i = 0; i < net_.size(); ++i) {
+      net_.at(i).execute(stream_, net_args_.at(i));
+    }
+    stream_.wait();
+
+    // Read output buffers.
+    auto offset = this->input_nodes_.size();
+    for (size_t i = 0; i < this->outputs_.size(); ++i) {
+      auto out_entry = this->outputs_[i];
+      auto nid = out_entry.id_;
+      auto idx = out_entry.index_;
+      size_t offset_in_bytes = this->node_out_mem_[nid][idx].second * 4;
+      read_from_dnnl_memory(this->data_entry_[offset + i]->data,
+                            this->node_out_mem_[nid][idx].first,
+                            GetNDArraySize(this->data_entry_[offset + i]), offset_in_bytes);
+    }
   }
 
   void Init() override {
-    // Create a engine here
+    engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0);
+    stream_ = dnnl::stream(engine_);
   }
 
   void BuildEngine() {
+    // Build subgraph engine.
     for (size_t nid = 0; nid < this->nodes_.size(); ++nid) {
       const auto& node = nodes_[nid];
-      if (node.GetOpType() == "input") {
-        // Handle inputs
-      } else {
+      if (node.GetOpType() == "kernel") {
         CHECK_EQ(node.GetOpType(), "kernel");
         auto op_name = node.GetOpName();
-        // Handle kernel
-        for (const auto& e : node.GetInputs()) {
-          // uint32_t eid = this->EntryID(e);
-          // shape/type for the i-th input
-          // std::vector<int64_t> shape = node.GetShape()[e.index_];
-          // DLDataType dltype = node.GetDataType()[e.index_];
+        if ("nn.conv2d" == op_name) {
+          Conv2d(nid);
+        } else if ("nn.dense" == op_name) {
+          Dense(nid);
+        } else if ("nn.batch_norm" == op_name) {
+          BatchNorm(nid);
+        } else if ("nn.relu" == op_name) {
+          Relu(nid);
+        } else if ("add" == op_name) {
+          Add(nid);
+        } else {
+          LOG(FATAL) << "Unsupported op: " << op_name;
         }
       }
     }
+
+    // Initialize input/output entries.
+    DLContext ctx;
+    ctx.device_type = static_cast<DLDeviceType>(1);
+    ctx.device_id = 0;
+    for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
+      auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0];
+      this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx));
+    }
+    for (size_t i = 0; i < this->outputs_.size(); ++i) {
+      auto entry = this->outputs_[i];
+      auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
+      this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx));
+    }
   }
 
-  void Conv2d() {
+private:
+  // Bind a JSON graph node entry to a DNNL memory.
+ dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc,
+                             size_t offset = 0) {
+   if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) {
+     return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset);
+   }
+   return node_out_mem_[entry.id_][entry.index_].first;
+ }
+
+ // Bind a JSON graph node entry to a given DNNL memory.
+ dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem,
+                             size_t offset = 0) {
+   // Since the DNNL memory has been created before calling this function, we assume the entry
+   // has not yet been bind to the other DNNL memory; otherwise it may have memory leak.
+   CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0);
+
+   // TODO: Support other data types (i.e., int8).
+   auto data_node = nodes_[entry.id_];
+   auto dltype = data_node.GetOpDataType()[entry.index_];
+   CHECK_EQ(dltype.bits, 32);
+
+   node_out_mem_[entry.id_][entry.index_] = {mem, offset};
+   return node_out_mem_[entry.id_][entry.index_].first;
+ }
+
+  void Conv2d(const size_t& nid) {
+    auto node = this->nodes_[nid];
+
+    // Setup attributes.
+    auto data_entry = node.GetInputs()[0];
+    auto weight_entry = node.GetInputs()[1];
+    dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dims weight_shape =
+        this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
+    std::vector<std::string> str_strides = node.GetAttr<std::vector<std::string>>("strides");
+    std::vector<std::string> str_padding = node.GetAttr<std::vector<std::string>>("padding");
+    int groups = std::stoi(node.GetAttr<std::vector<std::string>>("groups")[0]);
+
+    dnnl::memory::dim N = input_shape[0],       // batch size
+        IC = input_shape[1],                    // input channels
+        IH = input_shape[2],                    // input height
+        IW = input_shape[2],                    // input width
+        OC = weight_shape[0],                   // output channels
+        KH = weight_shape[2],                   // weight height
+        KW = weight_shape[3],                   // weight width
+        PH_L = std::stoi(str_padding[1]),       // height padding: left
+        PH_R = std::stoi(str_padding[3]),       // height padding: right
+        PW_L = std::stoi(str_padding[0]),       // width padding: left
+        PW_R = std::stoi(str_padding[2]),       // width padding: right
+        SH = std::stoi(str_strides[0]),         // height-wise stride
+        SW = std::stoi(str_strides[0]),         // weight-wise stride
+        OH = (IH - KH + PH_L + PH_R) / SH + 1,  // output height
+        OW = (IW - KW + PW_L + PW_R) / SW + 1;  // output width
+    // std::cerr << N << ", " << IC << ", " << IH << ", " << IW << "\n";
+    // std::cerr << OC << ", " << IC << ", " << KH << ", " << KW << "\n";
+    // std::cerr << PH_L << ", " << PH_R << ", " << PW_L << ", " << PW_R << "\n";
+    // std::cerr << SH << ", " << SW << "\n";
+    // std::cerr << OH << ", " << OW << "\n";
+
+    // Memory shapes.
+    dnnl::memory::dims src_dims = {N, IC, IH, IW};
+    dnnl::memory::dims weights_dims = {OC, IC, KH, KW};
+    if (groups > 1) {
+      weights_dims = {groups, 1, IC / groups, KH, KW};
+    }
+    dnnl::memory::dims bias_dims = {OC};
+    dnnl::memory::dims dst_dims = {N, OC, OH, OW};
+    dnnl::memory::dims strides_dims = {SH, SW};
+    dnnl::memory::dims padding_dims_l = {PH_L, PW_L};
+    dnnl::memory::dims padding_dims_r = {PH_R, PW_R};
+
+    // Memory descriptions.
+    auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, tag::any);
+    auto conv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, tag::any);
+    auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, tag::nchw);
+    auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::a);
+
+    // Covn2d description.
+    auto conv_desc = dnnl::convolution_forward::desc(
+        dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md,
+        conv_weights_md, conv_bias_md, conv_dst_md, strides_dims, padding_dims_l, padding_dims_r);
+    dnnl::primitive_attr attr;
+    auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_);
+
+    // Push to the network.
+    auto conv = dnnl::convolution_forward(conv2d_prim_desc);
+    net_.push_back(conv);
+
+    // Data memory.
+    CHECK_EQ(node.GetAttr<std::vector<std::string>>("data_layout")[0], "NCHW");
+    auto conv2d_src_memory = BindDNNLMemory(data_entry, {src_dims, dt::f32, tag::nchw});
+
+    // Weight memory.
+    CHECK_EQ(node.GetAttr<std::vector<std::string>>("kernel_layout")[0], "OIHW");
+    auto conv2d_weights_memory = BindDNNLMemory(
+        weight_entry, {weights_dims, dt::f32, (groups > 1) ? tag::goihw : tag::oihw});
+
+    // Bias memory (useless for now as TVM conv2d op has no bias).
+    std::vector<float> bias(OC, 0);
+    auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_, bias.data());
+
+    // Output memory.
+    JSONGraphNodeEntry out_entry(nid, 0);
+    auto conv2d_dst_memory = BindDNNLMemory(out_entry, conv2d_prim_desc.dst_desc());
+
+    // Bind memory buffers.
+    net_args_.push_back({{DNNL_ARG_SRC, conv2d_src_memory},
+                         {DNNL_ARG_WEIGHTS, conv2d_weights_memory},
+                         {DNNL_ARG_BIAS, conv2d_bias_memory},
+                         {DNNL_ARG_DST, conv2d_dst_memory}});
   }
 
-  void Dense() {
+  void Dense(const size_t& nid) {
+    auto node = this->nodes_[nid];
+
+    // Setup attributes.
+    auto data_entry = node.GetInputs()[0];
+    auto weight_entry = node.GetInputs()[1];
+    dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dims weight_shape =
+        this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
+
+    dnnl::memory::dim B = input_shape[0],  // batch size
+        IC = input_shape[1],               // input channels
+        OC = weight_shape[0];              // output channels
+
+    // Memory shapes.
+    dnnl::memory::dims data_dims = {B, IC};
+    dnnl::memory::dims weight_dims = {OC, IC};
+    dnnl::memory::dims bias_dims = {OC};
+    dnnl::memory::dims out_dims = {B, OC};
+
+    // Memory descriptions.
+    auto data_md = dnnl::memory::desc({data_dims, dt::f32, tag::nc});
+    auto weight_md = dnnl::memory::desc({weight_dims, dt::f32, tag::nc});
+    auto bias_md = dnnl::memory::desc({bias_dims, dt::f32, tag::x});
+    auto dst_md = dnnl::memory::desc({out_dims, dt::f32, tag::nc});
+
+    // Dense description.
+    auto dense_desc = dnnl::inner_product_forward::desc(dnnl::prop_kind::forward_inference, data_md,
+                                                        weight_md, bias_md, dst_md);
+    auto dense_prim_desc = dnnl::inner_product_forward::primitive_desc(dense_desc, engine_);
+
+    auto dense = dnnl::inner_product_forward(dense_prim_desc);
+    net_.push_back(dense);
+
+    // Memories.
+    std::vector<float> bias(OC, 0);
+    auto data_memory = BindDNNLMemory(data_entry, data_md);
+    auto weight_memory = BindDNNLMemory(weight_entry, weight_md);
+    auto bias_memory = dnnl::memory(bias_md, engine_, bias.data());
+    JSONGraphNodeEntry out_entry(nid, 0);
+    auto dst_memory = BindDNNLMemory(out_entry, dense_prim_desc.dst_desc());
+
+    net_args_.push_back({{DNNL_ARG_SRC, data_memory},
+                         {DNNL_ARG_WEIGHTS, weight_memory},
+                         {DNNL_ARG_BIAS, bias_memory},
+                         {DNNL_ARG_DST, dst_memory}});
   }
 
-  void BatchNorm() {
+  void BatchNorm(const size_t& nid) {
+    auto node = this->nodes_[nid];
+
+    auto data_entry = node.GetInputs()[0];
+    auto gamma_entry = node.GetInputs()[1];
+    auto beta_entry = node.GetInputs()[2];
+    auto mean_entry = node.GetInputs()[3];
+    auto variance_entry = node.GetInputs()[4];
+    dnnl::memory::dims data_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dim IC = data_shape[1];
+    float epsilon = std::stof(node.GetAttr<std::vector<std::string>>("epsilon")[0]);
+
+    // Memory description.
+    dnnl::memory::desc data_md = GenDNNLMemDescByShape(data_shape, dt::f32);
+
+    // BN description.
+    auto bn_desc = dnnl::batch_normalization_forward::desc(
+        dnnl::prop_kind::forward_inference, data_md, epsilon,
+        dnnl::normalization_flags::use_global_stats | dnnl::normalization_flags::use_scale_shift);
+    auto bn_prim_desc = dnnl::batch_normalization_forward::primitive_desc(bn_desc, engine_);
+    auto bn = dnnl::batch_normalization_forward(bn_prim_desc);
+    net_.push_back(bn);
+
+    // Memories.
+    auto data_memory = BindDNNLMemory(data_entry, data_md);
+    JSONGraphNodeEntry out_entry(nid, 0);
+    auto out_memory = BindDNNLMemory(out_entry, data_md);
+    auto mean_memory = BindDNNLMemory(mean_entry, bn_prim_desc.mean_desc());
+    auto variance_memory = BindDNNLMemory(variance_entry, bn_prim_desc.variance_desc());
+
+    // In DNNL, weight is composed of gamma+beta, so we point them to the same DNNL memory but
+    // assign an offset to beta data for runtime serialization.
+    auto weight_memory = BindDNNLMemory(gamma_entry, bn_prim_desc.weights_desc(), 0);
+    BindDNNLMemory(beta_entry, weight_memory, IC);
+
+    net_args_.push_back({{DNNL_ARG_SRC, data_memory},
+                         {DNNL_ARG_DST, out_memory},
+                         {DNNL_ARG_SCALE_SHIFT, weight_memory},
+                         {DNNL_ARG_MEAN, mean_memory},
+                         {DNNL_ARG_VARIANCE, variance_memory}});
   }
 
-  void Relu() {
+  void Relu(const size_t& nid) {
+    auto node = this->nodes_[nid];
+
+    auto data_entry = node.GetInputs()[0];
+    dnnl::memory::dims shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    auto data_md = dnnl::memory::desc{{shape}, dt::f32, tag::abcd};
+
+    auto relu_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference,
+                                                 dnnl::algorithm::eltwise_relu, data_md, 0);
+    auto relu_prim_desc = dnnl::eltwise_forward::primitive_desc(relu_desc, engine_);
+    CHECK(data_md == relu_prim_desc.dst_desc());
+
+    auto relu = dnnl::eltwise_forward(relu_prim_desc);
+    net_.push_back(relu);
+
+    auto data_memory = BindDNNLMemory(data_entry, data_md);
+    auto out_md = dnnl::memory::desc(shape, dt::f32, tag::abcd);
+    JSONGraphNodeEntry out_entry(nid, 0);
+    auto out_memory = BindDNNLMemory(out_entry, out_md);
+
+    net_args_.push_back({{DNNL_ARG_SRC, data_memory}, {DNNL_ARG_DST, out_memory}});
   }
 
-  // Macro for add, subtract, multiply...
+  void Add(const size_t& nid) {
+    auto node = this->nodes_[nid];
+
+    // Memory and compute description.
+    std::vector<dnnl::memory::dims> data_dims;
+    std::vector<dnnl::memory::desc> data_mds;
+    std::vector<dnnl::memory> data_memories;
+
+    CHECK_EQ(node.GetInputs().size(), 2U);
+    for (auto entry : node.GetInputs()) {
+      auto data_shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
+      dnnl::memory::desc data_md = GenDNNLMemDescByShape(data_shape, dt::f32);
+
+      data_dims.push_back(data_shape);
+      data_mds.push_back(data_md);
+      data_memories.push_back(BindDNNLMemory(entry, data_md));
+    }
+    CHECK(data_dims[0] == data_dims[1]);
+    auto out_md = data_mds[0];
+    JSONGraphNodeEntry out_entry(nid, 0);
+    auto out_memory = BindDNNLMemory(out_entry, out_md);
+
+    auto add_desc =
+        dnnl::binary::desc(dnnl::algorithm::binary_add, data_mds[0], data_mds[1], out_md);
+    auto add_prim_desc = dnnl::binary::primitive_desc(add_desc, engine_);
+    auto add = dnnl::binary(add_prim_desc);
+    net_.push_back(add);
+
+    net_args_.push_back({{DNNL_ARG_SRC_0, data_memories[0]},
+                         {DNNL_ARG_SRC_1, data_memories[1]},
+                         {DNNL_ARG_DST, out_memory}});
+  }
+
+  // Read from DNNL memory (+offset) and write to the handle.
+  inline void read_from_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size,
+                                    size_t offset = 0) {
+    uint8_t* src = static_cast<uint8_t*>(mem.get_data_handle());
+    std::copy(src + offset, src + size, reinterpret_cast<uint8_t*>(handle));
+  }
+
+  // Read from the handle and write to DNNL memory (+offset).
+  inline void write_to_dnnl_memory(void* handle, dnnl::memory& mem, size_t size,
+                                   size_t offset = 0) {
+    uint8_t* dst = static_cast<uint8_t*>(mem.get_data_handle());
+    std::copy((uint8_t*)handle, (uint8_t*)handle + size, dst + offset);
+  }
+
+// Generate DNNL memory description and infer the data layout by the given shape.
+inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape, dt dtype) {
+  dnnl::memory::desc data_md;
+  switch (shape.size()) {
+    case 2:
+      data_md = dnnl::memory::desc({shape, dtype, tag::ab});
+      break;
+    case 3:
+      data_md = dnnl::memory::desc({shape, dtype, tag::abc});
+      break;
+    case 4:
+      data_md = dnnl::memory::desc({shape, dtype, tag::abcd});
+      break;
+    case 5:
+      data_md = dnnl::memory::desc({shape, dtype, tag::abcde});
+      break;
+    default:
+      LOG(FATAL) << "Unsupported data shape dimension: " << shape.size();
+      break;
+  }
+  return data_md;
+}
+
+// Calculate the size of a given NDArray in bytes.
+inline size_t GetNDArraySize(const NDArray& arr) {
+  size_t size = 1;
+  for (tvm_index_t i = 0; i < arr->ndim; ++i) {
+    size *= static_cast<size_t>(arr->shape[i]);
+  }
+  size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8;
+  return size;
+}
 
- private:
   /* The dnnl engine. */
   dnnl::engine engine_;
   /* The dnnl stream. */
   dnnl::stream stream_;
+  /* \brief A simple pool to contain the tensor for each node in the graph. */
+  std::vector<NDArray> data_entry_;
   /* The network layers that are represented in dnnl primitives. */
   std::vector<dnnl::primitive> net_;
   /* The memory that is consumed by arguments. */
   std::vector<std::unordered_map<int, dnnl::memory>> net_args_;
+  /* The node ID to its corresponding output memory. */
+  std::unordered_map < uint32_t,
+      std::unordered_map<int, std::pair<dnnl::memory, size_t>>> node_out_mem_;
+  /* Indicate if the DNNL engine has been initialized. */
+  bool is_init_ = false;
+  /* The only subgraph name for this module. */
+  std::string func_name_;
 };
 
 TVM_REGISTER_GLOBAL("runtime.ext.dnnl")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
-  auto n = tvm::runtime::make_object<DNNLJSONRuntime>(args[0].operator std::string());
+  auto n = tvm::runtime::make_object<DNNLJSONRuntime>(
+      args[0].operator std::string(), args[1].operator std::string());
   *rv = Module(n);
 });
 
+runtime::Module DNNLJSONRuntimeCreate(std::string func_name, std::string graph_json) {
+  auto n = make_object<DNNLJSONRuntime>(func_name, graph_json);
+  return runtime::Module(n);
+}
+
+TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate")
+.set_body_typed(DNNLJSONRuntimeCreate);
+
 }  // namespace contrib
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/json/json_node.h b/src/runtime/json/json_node.h
index dec2d3b8228d..fd45ab226e19 100644
--- a/src/runtime/json/json_node.h
+++ b/src/runtime/json/json_node.h
@@ -84,7 +84,9 @@ class JSONGraphNodeEntry {
     }
   }
 
+  /*! \brief The json graph node ID. */
   uint32_t id_;
+  /*! \brief The entry index. */
   uint32_t index_;
   uint32_t version_;
 };
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
index 6765fa49e0e1..a8182a205eae 100644
--- a/src/runtime/json/json_runtime.h
+++ b/src/runtime/json/json_runtime.h
@@ -54,7 +54,7 @@ class JSONRuntimeBase : public ModuleNode {
   // The type key of each subclass can be saved to the json file and them
   // used to create the specific runtime during deserialization.
   // virtual const char* type_key() const = 0;
-  const char* type_key() const { return ""; }
+  const char* type_key() const { return "jsonruntime"; }
 
   virtual void Init() { LOG(FATAL) << "NYI"; }
 
diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc
index 3ed34bbac44c..8436a477e914 100644
--- a/src/runtime/json/json_runtime_driver.cc
+++ b/src/runtime/json/json_runtime_driver.cc
@@ -204,6 +204,14 @@ class JSONRuntimeDriver : public ModuleNode {
 TVM_REGISTER_GLOBAL("runtime.module.loadbinary_jsonruntime")
 .set_body_typed(JSONRuntimeDriver::LoadFromBinary);
 
+runtime::Module JSONRuntimeDriverCreate(std::string graph_json) {
+  auto n = make_object<JSONRuntimeDriver>(graph_json);
+  return runtime::Module(n);
+}
+
+TVM_REGISTER_GLOBAL("runtime.JSONRuntimeDriverCreate")
+.set_body_typed(JSONRuntimeDriverCreate);
+
 }  // namespace json
 }  // namespace runtime
 }  // namespace tvm
diff --git a/tests/python/relay/test_external_runtime.py b/tests/python/relay/test_external_runtime.py
index 7928e4d61b37..7c6199a87bfe 100644
--- a/tests/python/relay/test_external_runtime.py
+++ b/tests/python/relay/test_external_runtime.py
@@ -479,7 +479,7 @@ def test_engine_extern():
                options=["-O2", "-std=c++14", "-I" + tmp_path.relpath("")])
 
 def test_json_extern():
-    if not tvm.get_global_func("module.loadfile_examplejson", True):
+    if not tvm.get_global_func("runtime.module.loadfile_examplejson", True):
         print("Skip because JSON example runtime is not enabled.")
         return
 
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
new file mode 100644
index 000000000000..dd318f1a47fa
--- /dev/null
+++ b/tests/python/relay/test_json_runtime.py
@@ -0,0 +1,403 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Unit tests for JSON codegen and runtime."""
+import os
+import sys
+
+import numpy as np
+
+import tvm
+import tvm.relay.op as reg
+import tvm.relay.testing
+from tvm import relay, runtime
+from tvm.contrib import util
+from tvm.relay import transform
+from tvm.relay.analysis.analysis import to_json
+from tvm.relay.backend import compile_engine
+from tvm.relay.build_module import bind_params_by_name
+
+
+def set_func_attr(func, compile_name, symbol_name):
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", compile_name)
+    func = func.with_attr("global_symbol", symbol_name)
+    return func
+
+
+def check_result(mod,
+                 ref_mod,
+                 map_inputs,
+                 out_shape,
+                 tol=1e-5,
+                 target="llvm",
+                 ctx=tvm.cpu(),
+                 params=None):
+    if sys.platform == "win32":
+        print("Skip test on Windows for now")
+        return
+
+    # Run the reference result
+    compile_engine.get().clear()
+    with relay.build_config(opt_level=3):
+        json, lib, param = relay.build(ref_mod, target=target, params=params)
+    rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)
+
+    for name, data in map_inputs.items():
+        rt_mod.set_input(name, data)
+    rt_mod.set_input(**param)
+    rt_mod.run()
+    out = tvm.nd.empty(out_shape, ctx=ctx)
+    out = rt_mod.get_output(0, out)
+    ref_result = out.asnumpy()
+
+    def check_vm_result():
+        compile_engine.get().clear()
+        with relay.build_config(opt_level=3):
+            exe = relay.vm.compile(mod, target=target, params=params)
+        code, lib = exe.save()
+        exe = runtime.vm.Executable.load_exec(code, lib)
+        vm = runtime.vm.VirtualMachine(exe)
+        vm.init(ctx)
+        out = vm.run(**map_inputs)
+        tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
+
+    def check_graph_runtime_result():
+        compile_engine.get().clear()
+        with relay.build_config(opt_level=3):
+            json, lib, param = relay.build(mod, target=target, params=params)
+        rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)
+
+        for name, data in map_inputs.items():
+            rt_mod.set_input(name, data)
+        rt_mod.set_input(**param)
+        rt_mod.run()
+        out = tvm.nd.empty(out_shape, ctx=ctx)
+        out = rt_mod.get_output(0, out)
+        tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
+
+    check_vm_result()
+    check_graph_runtime_result()
+
+
+def test_conv2d():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    def conv2d_direct():
+        dtype = 'float32'
+        ishape = (1, 32, 14, 14)
+        w1shape = (32, 32, 3, 3)
+
+        data0 = relay.var("data", shape=ishape, dtype=dtype)
+        weight0 = relay.var("weight", shape=w1shape, dtype=dtype)
+        out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1))
+
+        func = relay.Function([data0, weight0], out)
+        func = set_func_attr(func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = func
+
+        data = relay.var("data", shape=(ishape), dtype=dtype)
+        weight = relay.var("weight", shape=(w1shape), dtype=dtype)
+        main_f = relay.Function([data, weight], glb_var(data, weight))
+        mod["main"] = main_f
+
+        data0 = relay.var("data", shape=ishape, dtype=dtype)
+        weight0 = relay.var("weight", shape=w1shape, dtype=dtype)
+        out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1))
+        main_f = relay.Function([data0, weight0], out)
+        ref_mod = tvm.IRModule()
+        ref_mod['main'] = main_f
+
+        i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+        w1_data = np.random.uniform(0, 1, w1shape).astype(dtype)
+
+        return mod, ref_mod, {"data": i_data, "weight": w1_data}, (1, 32, 14, 14)
+
+    def group_conv2d():
+        dtype = 'float32'
+        ishape = (1, 32, 14, 14)
+        w2shape = (32, 1, 3, 3)
+
+        data0 = relay.var("data", shape=(ishape), dtype=dtype)
+        weight0 = relay.var("weight", shape=(w2shape), dtype=dtype)
+        out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=32)
+
+        func = relay.Function([data0, weight0], out)
+        func = set_func_attr(func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = func
+
+        data = relay.var("data", shape=(ishape), dtype=dtype)
+        weight = relay.var("weight", shape=(w2shape), dtype=dtype)
+        main_f = relay.Function([data, weight], glb_var(data, weight))
+        mod["main"] = main_f
+
+        data0 = relay.var("data", shape=(ishape), dtype=dtype)
+        weight0 = relay.var("weight", shape=(w2shape), dtype=dtype)
+        out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=32)
+        main_f = relay.Function([data0, weight0], out)
+        ref_mod = tvm.IRModule()
+        ref_mod['main'] = main_f
+
+        i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+        w_data = np.random.uniform(0, 1, w2shape).astype(dtype)
+
+        return mod, ref_mod, {"data": i_data, "weight": w_data}, (1, 32, 14, 14)
+
+    for mod, ref_mod, map_inputs, out_shape in [conv2d_direct(), group_conv2d()]:
+        # FIXME: Check accuracy. Current avg error: ~0.03
+        check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-1)
+
+
+def test_add():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    shape = (10, 10)
+
+    def gen_add():
+        data0 = relay.var("data0", shape=shape, dtype=dtype)
+        data1 = relay.var("data1", shape=shape, dtype=dtype)
+        out = relay.add(data0, data1)
+
+        func = relay.Function([data0, data1], out)
+        func = set_func_attr(func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = func
+
+        data0 = relay.var("data0", shape=shape, dtype=dtype)
+        data1 = relay.var("data1", shape=shape, dtype=dtype)
+        main_f = relay.Function([data0, data1], glb_var(data0, data1))
+        mod["main"] = main_f
+
+        data0 = relay.var("data0", shape=shape, dtype=dtype)
+        data1 = relay.var("data1", shape=shape, dtype=dtype)
+        out = relay.add(data0, data1)
+        main_f = relay.Function([data0, data1], out)
+        ref_mod = tvm.IRModule()
+        ref_mod["main"] = main_f
+
+        return mod, ref_mod
+
+    mod, ref_mod = gen_add()
+
+    data0 = np.random.uniform(0, 1, shape).astype(dtype)
+    data1 = np.random.uniform(0, 1, shape).astype(dtype)
+    check_result(mod, ref_mod, {"data0": data0, "data1": data1}, shape, tol=1e-5)
+
+
+def test_relu():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    shape = (1, 32, 14, 14)
+
+    def gen_relu():
+        data0 = relay.var("data0", shape=shape, dtype=dtype)
+        out = relay.nn.relu(data0)
+
+        func = relay.Function([data0], out)
+        func = set_func_attr(func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = func
+
+        data0 = relay.var("data0", shape=shape, dtype=dtype)
+        main_f = relay.Function([data0], glb_var(data0))
+        mod["main"] = main_f
+
+        data0 = relay.var("data0", shape=shape, dtype=dtype)
+        out = relay.nn.relu(data0)
+        main_f = relay.Function([data0], out)
+        ref_mod = tvm.IRModule()
+        ref_mod["main"] = main_f
+
+        return mod, ref_mod
+
+    mod, ref_mod = gen_relu()
+
+    data0 = np.random.uniform(-1, 1, shape).astype(dtype)
+    check_result(mod, ref_mod, {"data0": data0,}, (1, 32, 14, 14), tol=1e-5)
+
+
+def test_dense():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    a_shape = (1, 512)
+    b_shape = (1024, 512)
+
+    def gen_dense():
+        a = relay.var("A", shape=a_shape, dtype=dtype)
+        b = relay.var("B", shape=b_shape, dtype=dtype)
+        out = relay.nn.dense(a, b)
+
+        func = relay.Function([a, b], out)
+        func = set_func_attr(func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = func
+
+        a = relay.var("A", shape=a_shape, dtype=dtype)
+        b = relay.var("B", shape=b_shape, dtype=dtype)
+        main_f = relay.Function([a, b], glb_var(a, b))
+        mod["main"] = main_f
+
+        a = relay.var("A", shape=a_shape, dtype=dtype)
+        b = relay.var("B", shape=b_shape, dtype=dtype)
+        out = relay.nn.dense(a, b)
+        main_f = relay.Function([a, b], out)
+        ref_mod = tvm.IRModule()
+        ref_mod["main"] = main_f
+
+        return mod, ref_mod
+
+    mod, ref_mod = gen_dense()
+
+    data_a = np.random.uniform(0, 1, a_shape).astype(dtype)
+    data_b = np.random.uniform(0, 1, b_shape).astype(dtype)
+    check_result(mod, ref_mod, {"A": data_a, "B": data_b}, (1, 1024), tol=1e-5)
+
+
+def test_bn():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    d_shape = (1, 8)
+    c_shape = (8, )
+
+    def gen_bn():
+        data = relay.var('data', shape=d_shape)
+        gamma = relay.var("gamma", shape=c_shape)
+        beta = relay.var("beta", shape=c_shape)
+        moving_mean = relay.var("moving_mean", shape=c_shape)
+        moving_var = relay.var("moving_var", shape=c_shape)
+        bn = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var)
+        out = bn[0]
+
+        func = relay.Function([data, gamma, beta, moving_mean, moving_var], out)
+        func = set_func_attr(func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = func
+
+        data = relay.var('data', shape=d_shape)
+        gamma = relay.var("gamma", shape=c_shape)
+        beta = relay.var("beta", shape=c_shape)
+        moving_mean = relay.var("moving_mean", shape=c_shape)
+        moving_var = relay.var("moving_var", shape=c_shape)
+        main_f = relay.Function([data, gamma, beta, moving_mean, moving_var],
+                                glb_var(data, gamma, beta, moving_mean, moving_var))
+        mod["main"] = main_f
+
+        data = relay.var('data', shape=d_shape)
+        gamma = relay.var("gamma", shape=c_shape)
+        beta = relay.var("beta", shape=c_shape)
+        moving_mean = relay.var("moving_mean", shape=c_shape)
+        moving_var = relay.var("moving_var", shape=c_shape)
+        bn = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var)
+        out = bn[0]
+        main_f = relay.Function([data, gamma, beta, moving_mean, moving_var], out)
+        ref_mod = tvm.IRModule()
+        ref_mod["main"] = main_f
+
+        return mod, ref_mod
+
+    mod, ref_mod = gen_bn()
+
+    data = np.random.uniform(-1, 1, d_shape).astype(dtype)
+    gamma = np.random.uniform(-1, 1, c_shape).astype(dtype)
+    beta = np.random.uniform(-1, 1, c_shape).astype(dtype)
+    moving_mean = np.random.uniform(-1, 1, c_shape).astype(dtype)
+    moving_var = np.random.uniform(-1, 1, c_shape).astype(dtype)
+    check_result(mod,
+                 ref_mod, {
+                     "data": data,
+                     "gamma": gamma,
+                     "beta": beta,
+                     "moving_mean": moving_mean,
+                     "moving_var": moving_var
+                 },
+                 d_shape,
+                 tol=1e-5)
+
+
+def test_composite():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    ishape = (1, 32, 14, 14)
+    w1shape = (32, 32, 3, 3)
+
+    def after_partition():
+        # Composite function
+        in_1 = relay.var("in_1", shape=ishape, dtype=dtype)
+        in_2 = relay.var("in_2", shape=w1shape, dtype=dtype)
+        conv2d = relay.nn.conv2d(in_1, in_2, kernel_size=(3, 3), padding=(1, 1))
+        relu = relay.nn.relu(conv2d)
+        func = relay.Function([in_1, in_2], relu)
+        func = func.with_attr('Composite', 'conv2d_relu')
+        func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_nn.relu_')
+
+        # Partition function
+        arg_1 = relay.var("arg_1", shape=ishape, dtype=dtype)
+        arg_2 = relay.var("arg_2", shape=w1shape, dtype=dtype)
+        call = relay.Call(func, [arg_1, arg_2])
+        p_func = relay.Function([arg_1, arg_2], call)
+        p_func = set_func_attr(p_func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = p_func
+
+        # Main function
+        data = relay.var("data", shape=ishape, dtype=dtype)
+        weight = relay.var("input", shape=w1shape, dtype=dtype)
+        main_func = relay.Function([data, weight], glb_var(data, weight))
+        mod["main"] = main_func
+        return mod
+
+    mod = after_partition()
+    for global_var, func in mod.functions.items():
+        if global_var.name_hint != 'main':
+            print(global_var)
+            print(to_json(func))
+
+
+
+if __name__ == "__main__":
+    test_conv2d()
+    test_add()
+    test_relu()
+    test_dense()
+    test_bn()
+    #test_composite()

From e1353a8ec12a72677a414c4ac5c3a537d5aab9ed Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Tue, 16 Jun 2020 01:11:35 +0000
Subject: [PATCH 06/30] add a more complex example

---
 tests/python/relay/test_json_runtime.py | 56 +++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index dd318f1a47fa..afd5d56ab5ed 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -350,6 +350,61 @@ def gen_bn():
                  tol=1e-5)
 
 
+def test_multiple_ops():
+    if not tvm.get_global_func("runtime.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    ishape = (1, 32, 14, 14)
+    w1shape = (32, 32, 3, 3)
+    w2shape = (64, 32, 5, 5)
+
+    def get_net():
+        data = relay.var("data", relay.TensorType(ishape, dtype))
+        w1 = relay.var("w1", relay.TensorType(w1shape, dtype))
+        w2 = relay.var("w2", relay.TensorType(w2shape, dtype))
+
+        layer = relay.nn.conv2d(data=data, weight=w1, kernel_size=(3, 3), padding=(1, 1))
+        layer = relay.nn.relu(layer)
+        layer = relay.nn.conv2d(data=layer, weight=w2, kernel_size=(5, 5), padding=(2, 2))
+        layer = relay.nn.relu(layer)
+
+        main_f = relay.Function([data, w1, w2], layer)
+        mod = tvm.IRModule()
+        mod["main"] = main_f
+        return mod
+
+    def get_partitoned_mod(mod):
+        remove_bn_pass = tvm.transform.Sequential([
+            transform.InferType(),
+            transform.SimplifyInference(),
+            transform.FoldConstant(),
+            transform.FoldScaleAxis(),
+        ])
+        byoc_pass = tvm.transform.Sequential([
+            remove_bn_pass,
+            transform.AnnotateTarget("dnnl"),
+            transform.MergeCompilerRegions(),
+            transform.PartitionGraph()
+        ])
+
+        with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            return byoc_pass(mod)
+
+    ref_mod = get_net()
+    mod = get_partitoned_mod(ref_mod)
+
+    data = np.random.uniform(0, 1, ishape).astype(dtype)
+    w1 = np.random.uniform(0, 1, w1shape).astype(dtype)
+    w2 = np.random.uniform(0, 1, w2shape).astype(dtype)
+    check_result(mod, ref_mod, {
+        "data": data,
+        "w1": w1,
+        "w2": w2,
+    }, (1, 64, 14, 14), tol=1e-5)
+
+
 def test_composite():
     if not tvm.get_global_func("runtime.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
@@ -400,4 +455,5 @@ def after_partition():
     test_relu()
     test_dense()
     test_bn()
+    test_multiple_ops()
     #test_composite()

From 4a11ffc7dfd58f9c25c31f1dbf26b9ee771b01fa Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Fri, 19 Jun 2020 01:36:45 +0000
Subject: [PATCH 07/30] fix bias memory issue

---
 src/relay/backend/contrib/dnnl/codegen.cc     | 3 +++
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 9 +++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index be37899b7180..bf0db8863ef9 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -408,6 +408,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     const auto* pf = runtime::Registry::Get("runtime.CSourceModuleCreate");
     CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
     return (*pf)(code, "c", sym, variables);
+    std::cout << code_stream_.str();
   }
 
  private:
@@ -436,6 +437,8 @@ std::string GetExtSymbol(const Function& func) {
  * compile it into a runtime module.
  */
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
+  // DNNLModuleCodegen dnnl;
+  // return dnnl.CreateCSourceModule(ref);
   std::string func_name;
   std::string graph_json;
   if (ref->IsInstance<FunctionNode>()) {
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 05d308245a45..7dc2988d5b53 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -207,7 +207,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
     std::vector<std::string> str_strides = node.GetAttr<std::vector<std::string>>("strides");
     std::vector<std::string> str_padding = node.GetAttr<std::vector<std::string>>("padding");
-    int groups = std::stoi(node.GetAttr<std::vector<std::string>>("groups")[0]);
+    dnnl::memory::dim groups = std::stoi(node.GetAttr<std::vector<std::string>>("groups")[0]);
 
     dnnl::memory::dim N = input_shape[0],       // batch size
         IC = input_shape[1],                    // input channels
@@ -245,8 +245,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     // Memory descriptions.
     auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, tag::any);
     auto conv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, tag::any);
+    auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::any);
     auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, tag::nchw);
-    auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::a);
 
     // Covn2d description.
     auto conv_desc = dnnl::convolution_forward::desc(
@@ -270,7 +270,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
 
     // Bias memory (useless for now as TVM conv2d op has no bias).
     std::vector<float> bias(OC, 0);
-    auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_, bias.data());
+    auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_);
+    write_to_dnnl_memory(bias.data(), conv2d_bias_memory, OC * 4);
 
     // Output memory.
     JSONGraphNodeEntry out_entry(nid, 0);
@@ -433,7 +434,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   inline void read_from_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size,
                                     size_t offset = 0) {
     uint8_t* src = static_cast<uint8_t*>(mem.get_data_handle());
-    std::copy(src + offset, src + size, reinterpret_cast<uint8_t*>(handle));
+    std::copy(src + offset, src + offset + size, (uint8_t*)handle);
   }
 
   // Read from the handle and write to DNNL memory (+offset).

From ae7517c00c79a3e6a2c301b83ad37a03f600ae73 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Fri, 19 Jun 2020 06:05:29 +0000
Subject: [PATCH 08/30] rebase to upstream

---
 src/relay/backend/contrib/codegen_json/codegen_json.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 093d076c43b7..1c916348c594 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -253,7 +253,8 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
   void SetCallNodeAttribute(JSONGraphObjectPtr node, const CallNode* cn) {
     if (cn->op.as<OpNode>()) {
       OpAttrExtractor extractor(node);
-      extractor.Extract(const_cast<Object*>(cn->attrs.get()));
+      const Object* call_attr = cn->attrs.get();
+      extractor.Extract(const_cast<Object*>(call_attr));
     } else if (const auto* fn = cn->op.as<FunctionNode>()) {
       auto pattern = fn->GetAttr<String>(attr::kPartitionedFromPattern);
       CHECK(pattern.defined());

From 67ff4e3e0e16b8dbb50819241feed626c7fd3efc Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Fri, 19 Jun 2020 17:23:31 +0000
Subject: [PATCH 09/30] merge to metadata module, remove the unused driver

---
 python/tvm/relay/analysis/analysis.py         |   4 -
 .../contrib/codegen_json/codegen_json.h       | 107 ++++-----
 src/relay/backend/contrib/dnnl/codegen.cc     |  27 +--
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc |  23 +-
 src/runtime/json/json_runtime.h               |   2 +-
 src/runtime/json/json_runtime_driver.cc       | 218 ------------------
 tests/python/relay/test_json_runtime.py       |   3 -
 7 files changed, 67 insertions(+), 317 deletions(-)
 delete mode 100644 src/runtime/json/json_runtime_driver.cc

diff --git a/python/tvm/relay/analysis/analysis.py b/python/tvm/relay/analysis/analysis.py
index adbc2e6222b1..c237859eb987 100644
--- a/python/tvm/relay/analysis/analysis.py
+++ b/python/tvm/relay/analysis/analysis.py
@@ -313,10 +313,6 @@ def detect_feature(a, b=None):
     return {Feature(int(x)) for x in _ffi_api.detect_feature(a, b)}
 
 
-def to_json(expr):
-    return _ffi_api.ToJSON(expr)
-
-
 def extract_fused_functions(mod):
     """Pass to extract IRModule of only fused primitive functions.
 
diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 1c916348c594..14cb14ecf781 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -50,12 +50,6 @@ using ShapeVector = std::vector<std::vector<int64_t> >;
 using TypeVector = std::vector<std::string>;
 using JSONGraphObjectPtr = std::shared_ptr<JSONGraphNode>;
 
-/*! \brief The artifacts that needs to be serialized. */
-struct JSONOutput {
-  std::string graph_json;
-  std::unordered_map<std::string, tvm::runtime::NDArray> params;
-};
-
 /*!
  * \brief Helper class to extract all attributes of a certain op and save them
  * into text format.
@@ -163,8 +157,13 @@ class OpAttrExtractor : public AttrVisitor {
 /*! \brief Serialize a Relay expression to JSON. */
 class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEntry>> {
  public:
-  void Serialize(const Expr& expr) {
-     relay::Function func = Downcast<relay::Function>(expr);
+  /*!
+   * \brief Constructor
+   *
+   * \param expr The Relay expression to be converted to the JSON form.
+   */
+  JSONSerializer(const std::string& symbol, const Expr& expr) : symbol_(symbol) {
+    relay::Function func = Downcast<relay::Function>(expr);
     // First we convert all the parameters into input nodes.
     for (const auto& param : func->params) {
       auto node_ptr = std::make_shared<JSONGraphNode>(param->name_hint(), "input" /* op_type_ */);
@@ -173,35 +172,17 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
     heads_ = VisitExpr(func->body);
   }
 
-  /*!
-   * \brief Save to JSON graph
-   *
-   * \param writer A json writer
-   */
-  void Save(dmlc::JSONWriter* writer) {
-    std::vector<size_t> arg_nodes;
-    for (size_t i = 0; i < nodes_.size(); ++i) {
-      auto node = nodes_[i];
-      if (node->IsLeaf()) {
-        arg_nodes.push_back(i);
-      }
-    }
-    size_t num_entry = 0;
-    std::vector<size_t> node_row_ptr{0};
-    for (auto node : nodes_) {
-      num_entry += node->GetNumOutput();
-      node_row_ptr.push_back(num_entry);
-    }
-    writer->BeginObject();
-    writer->WriteObjectKeyValue("nodes", nodes_);
-    writer->WriteObjectKeyValue("arg_nodes", arg_nodes);
-    writer->WriteObjectKeyValue("heads", heads_);
-    writer->WriteObjectKeyValue("node_row_ptr", node_row_ptr);
-    writer->EndObject();
+  /*!\brief Return the required params. */
+  Array<String> GetParams() const {
+    return params_;
   }
 
-  std::unordered_map<std::string, runtime::NDArray> GetParams() const {
-    return params_;
+  /*!\brief Return the generated json. */
+  std::string GetJSON() {
+    std::ostringstream os;
+    dmlc::JSONWriter writer(&os);
+    Save(&writer);
+    return os.str();
   }
 
  protected:
@@ -277,8 +258,8 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
   }
 
   std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) final {
-    std::string name = "const_" + std::to_string(params_.size());
-    params_[name] = cn->data;
+    std::string name = symbol_ + "_const_" + std::to_string(params_.size());
+    params_.push_back(name);
     auto node = std::make_shared<JSONGraphNode>(name, "const" /* op_type_ */);
     return AddNode(node, GetRef<Expr>(cn));
   }
@@ -336,36 +317,46 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
     return {};
   }
 
+  /*!
+   * \brief Save to JSON graph
+   *
+   * \param writer A json writer
+   */
+  void Save(dmlc::JSONWriter* writer) {
+    std::vector<size_t> arg_nodes;
+    for (size_t i = 0; i < nodes_.size(); ++i) {
+      auto node = nodes_[i];
+      if (node->IsLeaf()) {
+        arg_nodes.push_back(i);
+      }
+    }
+    size_t num_entry = 0;
+    std::vector<size_t> node_row_ptr{0};
+    for (auto node : nodes_) {
+      num_entry += node->GetNumOutput();
+      node_row_ptr.push_back(num_entry);
+    }
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("nodes", nodes_);
+    writer->WriteObjectKeyValue("arg_nodes", arg_nodes);
+    writer->WriteObjectKeyValue("heads", heads_);
+    writer->WriteObjectKeyValue("node_row_ptr", node_row_ptr);
+    writer->EndObject();
+  }
+
  private:
+  /*! \brief The symbol that represents the json graph. */
+  std::string symbol_;
   /*! \brief JSON graph nodes. */
   std::vector<JSONGraphObjectPtr> nodes_;
   /*! \brief Output of the JSON graph. */
   std::vector<JSONGraphNodeEntry> heads_;
-  /*! \brief Constants. */
-  std::unordered_map<std::string, runtime::NDArray> params_;
+  /*! \brief The list of required constants. */
+  Array<String> params_;
 };
 
 }  // namespace contrib
 }  // namespace backend
-
-std::string ToJSON(const Expr& expr) {
-  backend::contrib::JSONSerializer converter;
-  converter.Serialize(expr);
-
-  std::ostringstream os;
-  dmlc::JSONWriter writer(&os);
-  converter.Save(&writer);
-  backend::contrib::JSONOutput ret;
-  ret.graph_json = os.str();
-  ret.params = converter.GetParams();
-
-  backend::contrib::JSONRuntimeBase jr(ret.graph_json);
-  return ret.graph_json;
-}
-
-TVM_REGISTER_GLOBAL("relay.analysis.ToJSON")
-.set_body_typed(ToJSON);
-
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index bf0db8863ef9..3b6d88b31427 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -439,27 +439,16 @@ std::string GetExtSymbol(const Function& func) {
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
   // DNNLModuleCodegen dnnl;
   // return dnnl.CreateCSourceModule(ref);
-  std::string func_name;
-  std::string graph_json;
-  if (ref->IsInstance<FunctionNode>()) {
-    auto func = Downcast<Function>(ref);
-    func_name = GetExtSymbol(func);
-    graph_json = ToJSON(func);
-  } else if (ref->IsInstance<IRModuleNode>()) {
-    IRModule mod = Downcast<IRModule>(ref);
-    CHECK_EQ(mod->functions.size(), 1U) << "Only support single subgraph";
-    for (const auto& it : mod->functions) {
-      auto func = Downcast<Function>(it.second);
-      func_name = GetExtSymbol(func);
-      graph_json = ToJSON(func);
-    }
-  } else {
-    LOG(FATAL) << "The input ref is expected to be a Relay function or module\n";
-  }
+  CHECK(ref->IsInstance<FunctionNode>());
+  auto func = Downcast<Function>(ref);
+  auto func_name = GetExtSymbol(func);
+  backend::contrib::JSONSerializer converter(func_name, func);
+  std::string graph_json = converter.GetJSON();
+  auto params = converter.GetParams();
 
   const auto* pf = runtime::Registry::Get("runtime.DNNLJSONRuntimeCreate");
-  CHECK(pf != nullptr) << "Cannot find JSON runtime driver module to create";
-  auto mod = (*pf)(func_name, graph_json);
+  CHECK(pf != nullptr) << "Cannot find JSON runtime module to create";
+  auto mod = (*pf)(func_name, graph_json, params);
   return mod;
 }
 
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 7dc2988d5b53..c7bc3889240f 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -44,11 +44,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   using dt = dnnl::memory::data_type;
 
  public:
-  explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json)
-      : JSONRuntimeBase(graph_json), func_name_(func_name) {}
-  ~DNNLJSONRuntime() = default;
-
-  const char* type_key() const { return "dnnljsonruntime"; }
+  explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json,
+                           const Array<String> const_names)
+      : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {}
 
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
     if (!this->is_init_) {
@@ -494,17 +492,14 @@ inline size_t GetNDArraySize(const NDArray& arr) {
   bool is_init_ = false;
   /* The only subgraph name for this module. */
   std::string func_name_;
+  /* The required constant names. */
+  Array<String> const_names_;
 };
 
-TVM_REGISTER_GLOBAL("runtime.ext.dnnl")
-.set_body([](TVMArgs args, TVMRetValue* rv) {
-  auto n = tvm::runtime::make_object<DNNLJSONRuntime>(
-      args[0].operator std::string(), args[1].operator std::string());
-  *rv = Module(n);
-});
-
-runtime::Module DNNLJSONRuntimeCreate(std::string func_name, std::string graph_json) {
-  auto n = make_object<DNNLJSONRuntime>(func_name, graph_json);
+runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json,
+                                      const Array<String>& const_names) {
+  auto n = make_object<DNNLJSONRuntime>(func_name.operator std::string(),
+                                        graph_json.operator std::string(), const_names);
   return runtime::Module(n);
 }
 
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
index a8182a205eae..38e195297e96 100644
--- a/src/runtime/json/json_runtime.h
+++ b/src/runtime/json/json_runtime.h
@@ -54,7 +54,7 @@ class JSONRuntimeBase : public ModuleNode {
   // The type key of each subclass can be saved to the json file and them
   // used to create the specific runtime during deserialization.
   // virtual const char* type_key() const = 0;
-  const char* type_key() const { return "jsonruntime"; }
+  const char* type_key() const { return "json"; }
 
   virtual void Init() { LOG(FATAL) << "NYI"; }
 
diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc
deleted file mode 100644
index 8436a477e914..000000000000
--- a/src/runtime/json/json_runtime_driver.cc
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file src/runtime/json/json_runtime_driver.cc
- * \brief The driver for json runtime.
- */
-
-#include <dmlc/json.h>
-#include <dmlc/memory_io.h>
-#include <tvm/runtime/container.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/registry.h>
-
-#include <unordered_map>
-#include <vector>
-#include <string>
-
-#include "json_node.h"
-#include "json_runtime.h"
-
-namespace tvm {
-namespace runtime {
-namespace json {
-
-/*!
- * \brief The class represents a json runtime driver. It is mainly responsible
- * for 1) serializing and deserializing the json runtime artifacts, 2)
- * dispatching and invoking the actual runtime that intepretes the json
- * artifacts.
- */
-class JSONRuntimeDriver : public ModuleNode {
- public:
-  struct Subgraph {
-    std::string symbol_name;
-    std::string graph_json;
-    std::unordered_map<std::string, NDArray> weights;
-  };
-
-  explicit JSONRuntimeDriver(const std::string& graph_json) {
-    this->graph_json_ = graph_json;
-    Deserialize();
-  }
-
-  const char* type_key() const { return "jsonruntime"; }
-
-  /*!
-   * \brief Get a packed function.
-   * \param name The name/symbol of the function.
-   * \param sptr_to_self The pointer to the module node.
-   * \return The packed function.
-   */
-  PackedFunc GetFunction(const std::string& name,
-                         const ObjectPtr<Object>& sptr_to_self) {
-    if (this->subgraphs_.count(name)) {
-      return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
-        auto json_rt = this->subgraphs_[name];
-        auto* json_rt_node = static_cast<JSONRuntimeBase*>(json_rt.operator->());
-        CHECK(json_rt_node);
-        // Set input, how to make sure it is only invoked once? Likely we don't
-        // really need this as we could directly set input when creating the
-        // engine, but what if the input for each inference varies.
-        // json_rt_node->SetInput();
-        //
-        // Execute the egine
-        json_rt_node->Run();
-
-        // Get the output, set rv or fill directly to args?
-        *rv = json_rt_node->GetOutput();
-      });
-    } else {
-      // Issue a warning when we don't find the symbol from the module. Note
-      // we don't kill the execution here as the symbol may exist in other
-      // runtime modules.
-      LOG(WARNING) << "Cannot find " << name << " from json runtime";
-      return PackedFunc();
-    }
-  }
-
-  void Deserialize() {
-    std::vector<Subgraph> subgraphs;
-    dmlc::MemoryStringStream memstrm(&graph_json_);
-    dmlc::Stream* strm = &memstrm;
-    // Header
-    uint64_t header;
-    CHECK(strm->Read(&header)) << "Invalid serialized file format";
-
-    // Compiler name
-    std::string compiler_name;
-    CHECK(strm->Read(&compiler_name)) << "Invalid serialized file format";
-
-    uint64_t num_subgraphs;
-    CHECK(strm->Read(&num_subgraphs)) << "Invalid serialized file format";
-    // CHECK(header == kTVMJSONRuntimeMagic) << "Invalid serialized file format";
-
-    for (uint64_t i = 0; i < num_subgraphs; i++) {
-      Subgraph g;
-      // Load the symbol for runtime lookup.
-      std::string symbol_name;
-      CHECK(strm->Read(&symbol_name)) << "Invalid serialized file format";
-      g.symbol_name = symbol_name;
-
-      // Load the graph representation.
-      std::string json_graph;
-      CHECK(strm->Read(&json_graph)) << "Invalid serialized file format";
-      g.graph_json = json_graph;
-
-      // Load the weights for the graph.
-      uint64_t num_params;
-      CHECK(strm->Read(&num_params)) << "Invalid serialized file format";
-
-      std::vector<std::string> names;
-      CHECK(strm->Read(&names)) << "Invalid serialized file format";
-      CHECK_EQ(names.size(), num_params) << "Invalid serialized file format";
-
-      for (size_t i = 0; i < static_cast<size_t>(num_params); i++) {
-        NDArray tmp;
-        tmp.Load(strm);
-        g.weights[names[i]] = tmp;
-      }
-      subgraphs.push_back(g);
-    }
-    CreateSubgraphs(subgraphs, compiler_name);
-  }
-
-  // Create subgraphs for a specific runtime and cache it, therefore, we can
-  // invoke them without the need to repeatedly create them at runtime.
-  void CreateSubgraphs(const std::vector<Subgraph>& subgraphs,
-                       const std::string& compiler_name) {
-    // How do we know which runtime to create? Should we bake something in the
-    // json to indicate this? i.e. we can register a runtime "runtime.ext.dnnl"
-    // and save dnnl. Now we can just get it from the registry using dnnl. This
-    // requires us to have single place to invoke different external codegens
-    // and serialize them.
-    //
-    std::string ext_runtime_name = "runtime.ext." + compiler_name;
-    auto pf = tvm::runtime::Registry::Get(ext_runtime_name);
-    CHECK(pf) << "Failed to find the extern runtime for " << ext_runtime_name;
-    for (const auto& sg : subgraphs) {
-      CHECK_EQ(subgraphs_.count(sg.graph_json), 0U)
-        << "Found duplicated symbol: " << sg.graph_json;
-
-      Module ext_mod = (*pf)(sg.graph_json);
-      const auto* json_rt_node = ext_mod.as<JSONRuntimeBase>();
-      CHECK(json_rt_node);
-      // Set up the params that are constants.
-      for (const auto& it : sg.weights) {
-        CallPakcedFunc(ext_mod, "set_input", it.first, it.second);
-      }
-      // Init the engine
-      CallPakcedFunc(ext_mod, "init");
-
-      subgraphs_[sg.graph_json] = ext_mod;
-    }
-  }
-
-  static Module LoadFromBinary(void* strm) {
-    dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
-    std::string graph;
-    stream->Read(&graph);
-    auto n = make_object<JSONRuntimeDriver>(graph);
-    return Module(n);
-  }
-
-  void SaveToBinary(dmlc::Stream* stream) override {
-    stream->Write(this->graph_json_);
-  }
-
- private:
-  template <typename... Args>
-  void CallPakcedFunc(Module mod, const std::string& name, Args... args) {
-    auto pf = mod.GetFunction(name);
-    pf(std::forward<Args>(args)...);
-  }
-
-  /*! \brief The graph json. Weights are also baked in. */
-  std::string graph_json_;
-  /*!
-   * \brief Cache the created runtime module that can be directly invoked.
-   *
-   * The runtime could be a csource runtime or a any user defined runtime that
-   * is extend from the JSONRuntimeBase class.
-   */
-  std::unordered_map<std::string, Module> subgraphs_;
-};
-
-TVM_REGISTER_GLOBAL("runtime.module.loadbinary_jsonruntime")
-.set_body_typed(JSONRuntimeDriver::LoadFromBinary);
-
-runtime::Module JSONRuntimeDriverCreate(std::string graph_json) {
-  auto n = make_object<JSONRuntimeDriver>(graph_json);
-  return runtime::Module(n);
-}
-
-TVM_REGISTER_GLOBAL("runtime.JSONRuntimeDriverCreate")
-.set_body_typed(JSONRuntimeDriverCreate);
-
-}  // namespace json
-}  // namespace runtime
-}  // namespace tvm
-
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index afd5d56ab5ed..dda9bb70ccdc 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -26,7 +26,6 @@
 from tvm import relay, runtime
 from tvm.contrib import util
 from tvm.relay import transform
-from tvm.relay.analysis.analysis import to_json
 from tvm.relay.backend import compile_engine
 from tvm.relay.build_module import bind_params_by_name
 
@@ -445,8 +444,6 @@ def after_partition():
     for global_var, func in mod.functions.items():
         if global_var.name_hint != 'main':
             print(global_var)
-            print(to_json(func))
-
 
 
 if __name__ == "__main__":

From e76860feba4364fe4f2deff78751303605053b67 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Fri, 19 Jun 2020 21:27:25 +0000
Subject: [PATCH 10/30] support composite functions

---
 .../contrib/codegen_json/codegen_json.h       |  24 +++--
 src/relay/backend/contrib/dnnl/codegen.cc     |  49 ++++++++-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc |  41 ++++---
 tests/python/relay/test_json_runtime.py       | 100 ++++++++++++++----
 4 files changed, 168 insertions(+), 46 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 14cb14ecf781..69140435ac82 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -162,8 +162,10 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
    *
    * \param expr The Relay expression to be converted to the JSON form.
    */
-  JSONSerializer(const std::string& symbol, const Expr& expr) : symbol_(symbol) {
-    relay::Function func = Downcast<relay::Function>(expr);
+  JSONSerializer(const std::string& symbol, const Expr& expr) : symbol_(symbol), func_(expr) {}
+
+  void serialize() {
+    relay::Function func = Downcast<relay::Function>(func_);
     // First we convert all the parameters into input nodes.
     for (const auto& param : func->params) {
       auto node_ptr = std::make_shared<JSONGraphNode>(param->name_hint(), "input" /* op_type_ */);
@@ -247,24 +249,24 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
     }
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExprDefault_(const Object* op) final {
+  std::vector<JSONGraphNodeEntry> VisitExprDefault_(const Object* op) {
     LOG(FATAL) << "JSON runtime currently doesn't support " << op->GetTypeKey();
     return {};
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const VarNode* vn) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const VarNode* vn) {
     CHECK(memo_.count(GetRef<Expr>(vn)));
     return memo_[GetRef<Expr>(vn)];
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) {
     std::string name = symbol_ + "_const_" + std::to_string(params_.size());
     params_.push_back(name);
     auto node = std::make_shared<JSONGraphNode>(name, "const" /* op_type_ */);
     return AddNode(node, GetRef<Expr>(cn));
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const TupleNode* tn) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const TupleNode* tn) {
     std::vector<JSONGraphNodeEntry> fields;
     for (const auto& field : tn->fields) {
       auto ref = VisitExpr(field);
@@ -273,7 +275,7 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
     return fields;
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) {
     Expr expr = GetRef<Expr>(cn);
     std::string name;
     if (const auto* op_node = cn->op.as<OpNode>()) {
@@ -299,18 +301,18 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
     return AddNode(node, GetRef<Expr>(cn));
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const LetNode* ln) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const LetNode* ln) {
     CHECK_EQ(memo_.count(ln->var), 0);
     memo_[ln->var] = VisitExpr(ln->value);
     return VisitExpr(ln->body);
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const TupleGetItemNode* gtn) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const TupleGetItemNode* gtn) {
     auto vtuple = VisitExpr(gtn->tuple);
     return {vtuple[gtn->index]};
   }
 
-  std::vector<JSONGraphNodeEntry> VisitExpr_(const FunctionNode* fn) final {
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const FunctionNode* fn) {
     CHECK(fn->GetAttr<String>(attr::kComposite).defined())
         << "JSON runtime only supports composite functions";
     // FunctionNode should be handled by the caller.
@@ -347,6 +349,8 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
  private:
   /*! \brief The symbol that represents the json graph. */
   std::string symbol_;
+  /*! \brief The function to be serialized. */
+  const Expr func_;
   /*! \brief JSON graph nodes. */
   std::vector<JSONGraphObjectPtr> nodes_;
   /*! \brief Output of the JSON graph. */
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 3b6d88b31427..6be176456921 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -33,6 +33,7 @@
 #include <numeric>
 #include <sstream>
 
+#include "../../../../runtime/json/json_node.h"
 #include "../../utils.h"
 #include "../codegen_c/codegen_c.h"
 #include "../codegen_json/codegen_json.h"
@@ -42,6 +43,7 @@ namespace relay {
 namespace contrib {
 
 using namespace backend;
+using namespace tvm::runtime::json;
 
 inline size_t GetShape1DSize(const Type& type) {
   const auto shape = GetShape(type);
@@ -419,6 +421,46 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
   std::ostringstream code_stream_;
 };
 
+class DNNLJSONSerializer : public backend::contrib::JSONSerializer {
+ public:
+  DNNLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) override {
+    Expr expr = GetRef<Expr>(cn);
+    std::string name;
+    const CallNode* call = cn;
+    if (const auto* op_node = cn->op.as<OpNode>()) {
+      name = op_node->name;
+    } else if (const auto* fn = cn->op.as<FunctionNode>()) {
+      auto comp = fn->GetAttr<String>(attr::kComposite);
+      CHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions.";
+      name = comp.value().operator std::string();
+
+      if (name == "conv2d_bias_relu") {
+        call = GetRootCall(fn->body.as<CallNode>(), 2, {"nn.conv2d", "add", "nn.relu"});
+      } else if (name == "conv2d_relu") {
+        call = GetRootCall(fn->body.as<CallNode>(), 1, {"nn.conv2d", "nn.relu"});
+        CHECK(call->op.as<OpNode>()) << "Not op node";
+      } else {
+        LOG(FATAL) << "Unrecognized DNNL pattern: " << name;
+      }
+    } else {
+      LOG(FATAL) << "DNNL JSON runtime does not support calls to " << cn->op->GetTypeKey();
+    }
+
+    std::vector<JSONGraphNodeEntry> inputs;
+    for (const auto& arg : cn->args) {
+      auto res = VisitExpr(arg);
+      inputs.insert(inputs.end(), res.begin(), res.end());
+    }
+    auto node = std::make_shared<JSONGraphNode>(name,     /* name_ */
+                                                "kernel", /* op_type_ */
+                                                inputs, 1 /* num_outputs_ */);
+    SetCallNodeAttribute(node, call);
+    return AddNode(node, GetRef<Expr>(cn));
+  }
+};
+
 /*!
  * \brief Get the external symbol of the Relay function name.
  *
@@ -442,9 +484,10 @@ runtime::Module DNNLCompiler(const ObjectRef& ref) {
   CHECK(ref->IsInstance<FunctionNode>());
   auto func = Downcast<Function>(ref);
   auto func_name = GetExtSymbol(func);
-  backend::contrib::JSONSerializer converter(func_name, func);
-  std::string graph_json = converter.GetJSON();
-  auto params = converter.GetParams();
+  DNNLJSONSerializer serializer(func_name, func);
+  serializer.serialize();
+  std::string graph_json = serializer.GetJSON();
+  auto params = serializer.GetParams();
 
   const auto* pf = runtime::Registry::Get("runtime.DNNLJSONRuntimeCreate");
   CHECK(pf != nullptr) << "Cannot find JSON runtime module to create";
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index c7bc3889240f..fac9765c421d 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -49,11 +49,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
       : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {}
 
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
-    if (!this->is_init_) {
-      Init();
-      BuildEngine();
-    }
-    this->is_init_ = true;
 
     if (this->func_name_ == name) {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -86,10 +81,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
             this->data_entry_[idx].CopyTo(arg);
           }
         }
-
-        // FIXME: Multiple outputs.
-        //*rv = data_entry_.back();
       });
+    } else if ("__init_" + this->func_name_ == name) {
+      if (!this->is_init_) {
+        Init();
+      }
+      this->is_init_ = true;
+      return PackedFunc();
     } else {
       LOG(WARNING) << "Unknown DNNL symbol " << name;
       return PackedFunc();
@@ -128,9 +126,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   void Init() override {
     engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0);
     stream_ = dnnl::stream(engine_);
-  }
 
-  void BuildEngine() {
     // Build subgraph engine.
     for (size_t nid = 0; nid < this->nodes_.size(); ++nid) {
       const auto& node = nodes_[nid];
@@ -139,6 +135,10 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         auto op_name = node.GetOpName();
         if ("nn.conv2d" == op_name) {
           Conv2d(nid);
+        } else if ("conv2d_relu" == op_name) {
+          Conv2d(nid, true, false);
+        } else if ("conv2d_bias_relu" == op_name) {
+          Conv2d(nid, true, true);
         } else if ("nn.dense" == op_name) {
           Dense(nid);
         } else if ("nn.batch_norm" == op_name) {
@@ -194,7 +194,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
    return node_out_mem_[entry.id_][entry.index_].first;
  }
 
-  void Conv2d(const size_t& nid) {
+  void Conv2d(const size_t& nid, const bool has_relu=false, const bool has_bias=false) {
     auto node = this->nodes_[nid];
 
     // Setup attributes.
@@ -250,7 +250,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     auto conv_desc = dnnl::convolution_forward::desc(
         dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md,
         conv_weights_md, conv_bias_md, conv_dst_md, strides_dims, padding_dims_l, padding_dims_r);
+
+    // Enable ReLU
     dnnl::primitive_attr attr;
+    if (has_relu) {
+      dnnl::post_ops ops;
+      ops.append_eltwise(1.f, dnnl::algorithm::eltwise_relu, 0.f, 0.f);
+      attr.set_post_ops(ops);
+    }
+    
     auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_);
 
     // Push to the network.
@@ -266,10 +274,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     auto conv2d_weights_memory = BindDNNLMemory(
         weight_entry, {weights_dims, dt::f32, (groups > 1) ? tag::goihw : tag::oihw});
 
-    // Bias memory (useless for now as TVM conv2d op has no bias).
-    std::vector<float> bias(OC, 0);
+    // Bias memory.
     auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_);
-    write_to_dnnl_memory(bias.data(), conv2d_bias_memory, OC * 4);
+    if (has_bias) {
+      auto bias_entry = node.GetInputs()[2];
+      BindDNNLMemory(bias_entry, conv2d_bias_memory);
+    } else {
+      float bias[OC] = {0};
+      write_to_dnnl_memory(bias, conv2d_bias_memory, OC * sizeof(float));
+    }
 
     // Output memory.
     JSONGraphNodeEntry out_entry(nid, 0);
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index dda9bb70ccdc..6ffe0995a413 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -94,7 +94,7 @@ def check_graph_runtime_result():
 
 
 def test_conv2d():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -163,12 +163,11 @@ def group_conv2d():
         return mod, ref_mod, {"data": i_data, "weight": w_data}, (1, 32, 14, 14)
 
     for mod, ref_mod, map_inputs, out_shape in [conv2d_direct(), group_conv2d()]:
-        # FIXME: Check accuracy. Current avg error: ~0.03
-        check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-1)
+        check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-5)
 
 
 def test_add():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -208,7 +207,7 @@ def gen_add():
 
 
 def test_relu():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -244,7 +243,7 @@ def gen_relu():
 
 
 def test_dense():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -285,7 +284,7 @@ def gen_dense():
 
 
 def test_bn():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -350,7 +349,7 @@ def gen_bn():
 
 
 def test_multiple_ops():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -405,15 +404,16 @@ def get_partitoned_mod(mod):
 
 
 def test_composite():
-    if not tvm.get_global_func("runtime.ext.dnnl", True):
+    if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
 
     dtype = 'float32'
-    ishape = (1, 32, 14, 14)
-    w1shape = (32, 32, 3, 3)
 
-    def after_partition():
+    def conv2d_relu():
+        ishape = (1, 32, 14, 14)
+        w1shape = (32, 32, 3, 3)
+
         # Composite function
         in_1 = relay.var("in_1", shape=ishape, dtype=dtype)
         in_2 = relay.var("in_2", shape=w1shape, dtype=dtype)
@@ -435,15 +435,77 @@ def after_partition():
 
         # Main function
         data = relay.var("data", shape=ishape, dtype=dtype)
-        weight = relay.var("input", shape=w1shape, dtype=dtype)
+        weight = relay.var("weight", shape=w1shape, dtype=dtype)
         main_func = relay.Function([data, weight], glb_var(data, weight))
         mod["main"] = main_func
-        return mod
 
-    mod = after_partition()
-    for global_var, func in mod.functions.items():
-        if global_var.name_hint != 'main':
-            print(global_var)
+        # Reference module
+        data = relay.var("data", shape=ishape, dtype=dtype)
+        weight = relay.var("weight", shape=w1shape, dtype=dtype)
+        conv2d = relay.nn.conv2d(data, weight, kernel_size=(3, 3), padding=(1, 1))
+        relu = relay.nn.relu(conv2d)
+        main_func = relay.Function([data, weight], relu)
+        ref_mod = tvm.IRModule()
+        ref_mod["main"] = main_func
+
+        i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+        w1_data = np.random.uniform(0, 1, w1shape).astype(dtype)
+
+        return mod, ref_mod, {'data': i_data, 'weight': w1_data}, (1, 32, 14, 14)
+
+    def conv2d_bias_relu():
+        ishape = (1, 32, 14, 14)
+        w1shape = (32, 32, 3, 3)
+        bshape = (32, 1, 1)
+
+        # Composite function
+        in_1 = relay.var("in_1", shape=ishape, dtype=dtype)
+        in_2 = relay.var("in_2", shape=w1shape, dtype=dtype)
+        in_3 = relay.var("in_3", shape=bshape, dtype=dtype)
+        conv2d = relay.nn.conv2d(in_1, in_2, kernel_size=(3, 3), padding=(1, 1))
+        add = relay.add(conv2d, in_3)
+        relu = relay.nn.relu(add)
+        func = relay.Function([in_1, in_2, in_3], relu)
+        func = func.with_attr('Composite', 'conv2d_bias_relu')
+        func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_add_nn.relu_')
+
+        # Partition function
+        arg_1 = relay.var("arg_1", shape=ishape, dtype=dtype)
+        arg_2 = relay.var("arg_2", shape=w1shape, dtype=dtype)
+        arg_3 = relay.var("arg_3", shape=bshape, dtype=dtype)
+        call = relay.Call(func, [arg_1, arg_2, arg_3])
+        p_func = relay.Function([arg_1, arg_2, arg_3], call)
+        p_func = set_func_attr(p_func, "dnnl", "dnnl_0")
+        glb_var = relay.GlobalVar("dnnl_0")
+        mod = tvm.IRModule()
+        mod[glb_var] = p_func
+
+        # Main function
+        data = relay.var("data", shape=ishape, dtype=dtype)
+        weight = relay.var("weight", shape=w1shape, dtype=dtype)
+        bias = relay.var('bias', shape=bshape, dtype=dtype)
+        main_func = relay.Function([data, weight, bias], glb_var(data, weight, bias))
+        mod["main"] = main_func
+
+        # Reference module
+        data = relay.var("data", shape=ishape, dtype=dtype)
+        weight = relay.var("weight", shape=w1shape, dtype=dtype)
+        bias = relay.var('bias', shape=bshape, dtype=dtype)
+        conv2d = relay.nn.conv2d(data, weight, kernel_size=(3, 3), padding=(1, 1))
+        add = relay.add(conv2d, bias)
+        relu = relay.nn.relu(add)
+        main_func = relay.Function([data, weight, bias], relu)
+        ref_mod = tvm.IRModule()
+        ref_mod["main"] = main_func
+
+        i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+        w1_data = np.random.uniform(0, 1, w1shape).astype(dtype)
+        b_data = np.random.uniform(0, 1, bshape).astype(dtype)
+
+        return mod, ref_mod, {'data': i_data, 'weight': w1_data, 'bias': b_data}, (1, 32, 14, 14)
+
+    for mod, ref_mod, input_maps, out_shape in [conv2d_relu(), conv2d_bias_relu()]:
+        check_result(mod, ref_mod, input_maps, out_shape, tol=1e-5)
 
 
 if __name__ == "__main__":
@@ -453,4 +515,4 @@ def after_partition():
     test_dense()
     test_bn()
     test_multiple_ops()
-    #test_composite()
+    test_composite()

From 71f9e965ef5ad93c595f99d839dd04bd4316d725 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Fri, 19 Jun 2020 23:36:54 +0000
Subject: [PATCH 11/30] support DNNL constant

---
 src/relay/backend/contrib/dnnl/codegen.cc     |   4 +-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 120 +++++++++++++-----
 tests/python/relay/test_json_runtime.py       |  54 +++++++-
 3 files changed, 142 insertions(+), 36 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 6be176456921..67dba2969923 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -436,9 +436,9 @@ class DNNLJSONSerializer : public backend::contrib::JSONSerializer {
       CHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions.";
       name = comp.value().operator std::string();
 
-      if (name == "conv2d_bias_relu") {
+      if (name == "dnnl.conv2d_bias_relu") {
         call = GetRootCall(fn->body.as<CallNode>(), 2, {"nn.conv2d", "add", "nn.relu"});
-      } else if (name == "conv2d_relu") {
+      } else if (name == "dnnl.conv2d_relu") {
         call = GetRootCall(fn->body.as<CallNode>(), 1, {"nn.conv2d", "nn.relu"});
         CHECK(call->op.as<OpNode>()) << "Not op node";
       } else {
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index fac9765c421d..34646e52dae7 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -49,45 +49,73 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
       : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {}
 
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
-
-    if (this->func_name_ == name) {
+    if (name == "get_symbol") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->func_name_; });
+    } else if (name == "get_const_vars") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
+    } else if (this->func_name_ == name) {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        for (auto i = 0; i < args.size(); ++i) {
-          // Setup data entries.
-          CHECK(args[i].type_code() == kTVMNDArrayHandle ||
-                args[i].type_code() == kTVMDLTensorHandle)
+        size_t arg_idx = 0;
+
+        // Set input data entries.
+        for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
+          if (this->is_const_input_[i]) {
+            continue;
+          }
+          auto nid = this->input_nodes_[i];
+
+          CHECK(args[arg_idx].type_code() == kTVMNDArrayHandle ||
+                args[arg_idx].type_code() == kTVMDLTensorHandle)
               << "Expect NDArray or DLTensor as inputs\n";
-          if (args[i].type_code() == kTVMDLTensorHandle) {
-            DLTensor* arg = args[i];
-            this->data_entry_[i].CopyFrom(arg);
+          if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
+            DLTensor* arg = args[arg_idx];
+            this->data_entry_[nid][0].CopyFrom(arg);
           } else {
-            NDArray arg = args[i];
-            this->data_entry_[i].CopyFrom(arg);
+            NDArray arg = args[arg_idx];
+            this->data_entry_[nid][0].CopyFrom(arg);
           }
+          CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size();
+          arg_idx++;
         }
 
         // Execute the subgraph.
         this->Run();
 
-        // Get result.
-        auto offset = this->input_nodes_.size();
+        // Copy result to output buffer.
         for (size_t i = 0; i < this->outputs_.size(); ++i) {
-          size_t idx = i + offset;
-          if (args[idx].type_code() == kTVMDLTensorHandle) {
-            DLTensor* arg = args[idx];
-            this->data_entry_[idx].CopyTo(arg);
+          auto entry = this->outputs_[i];
+
+          if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
+            DLTensor* arg = args[arg_idx];
+            this->data_entry_[entry.id_][entry.index_].CopyTo(arg);
           } else {
-            NDArray arg = args[idx];
-            this->data_entry_[idx].CopyTo(arg);
+            NDArray arg = args[arg_idx];
+            this->data_entry_[entry.id_][entry.index_].CopyTo(arg);
           }
+          CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size();
+          arg_idx++;
         }
       });
     } else if ("__init_" + this->func_name_ == name) {
+      // Build DNNL engine and memory.
       if (!this->is_init_) {
         Init();
       }
       this->is_init_ = true;
-      return PackedFunc();
+
+      // The function to initialize constant tensors.
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        CHECK_EQ(args.size(), 1U);
+        Array<NDArray> consts = args[0];
+        for (size_t i = 0; i < consts.size(); ++i) {
+          CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized";
+          auto nid = const_idx_to_nid_[i];
+          this->data_entry_[nid][0].CopyFrom(consts[i]);
+        }
+        *rv = 0;
+      });
     } else {
       LOG(WARNING) << "Unknown DNNL symbol " << name;
       return PackedFunc();
@@ -100,8 +128,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
       auto nid = this->input_nodes_[i];
       // TODO: Support other data lengths.
       size_t offset_in_bytes = this->node_out_mem_[nid][0].second * 4;
-      write_to_dnnl_memory(this->data_entry_[i]->data, this->node_out_mem_[nid][0].first,
-                           GetNDArraySize(this->data_entry_[i]), offset_in_bytes);
+      write_to_dnnl_memory(this->data_entry_[nid][0]->data, this->node_out_mem_[nid][0].first,
+                           GetNDArraySize(this->data_entry_[nid][0]), offset_in_bytes);
     }
 
     // Invoke the engine.
@@ -111,15 +139,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     stream_.wait();
 
     // Read output buffers.
-    auto offset = this->input_nodes_.size();
     for (size_t i = 0; i < this->outputs_.size(); ++i) {
       auto out_entry = this->outputs_[i];
       auto nid = out_entry.id_;
       auto idx = out_entry.index_;
       size_t offset_in_bytes = this->node_out_mem_[nid][idx].second * 4;
-      read_from_dnnl_memory(this->data_entry_[offset + i]->data,
-                            this->node_out_mem_[nid][idx].first,
-                            GetNDArraySize(this->data_entry_[offset + i]), offset_in_bytes);
+      read_from_dnnl_memory(this->data_entry_[nid][idx]->data, this->node_out_mem_[nid][idx].first,
+                            GetNDArraySize(this->data_entry_[nid][idx]), offset_in_bytes);
     }
   }
 
@@ -135,9 +161,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         auto op_name = node.GetOpName();
         if ("nn.conv2d" == op_name) {
           Conv2d(nid);
-        } else if ("conv2d_relu" == op_name) {
+        } else if ("dnnl.conv2d_relu" == op_name) {
           Conv2d(nid, true, false);
-        } else if ("conv2d_bias_relu" == op_name) {
+        } else if ("dnnl.conv2d_bias_relu" == op_name) {
           Conv2d(nid, true, true);
         } else if ("nn.dense" == op_name) {
           Dense(nid);
@@ -150,6 +176,30 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         } else {
           LOG(FATAL) << "Unsupported op: " << op_name;
         }
+      } else if (node.GetOpType() == "const") {
+        auto name = node.GetOpName();
+        bool found = false;
+        for (size_t cid = 0; cid < const_names_.size(); ++cid) {
+          if (name == const_names_[cid]) {
+            found = true;
+            const_idx_to_nid_[cid] = nid;
+            break;
+          }
+        }
+        if (!found) {
+          LOG(FATAL) << "Unrecognized constant node: " << name;
+        }
+      }
+    }
+
+    this->is_const_input_.resize(this->input_nodes_.size());
+    for (size_t nid = 0; nid < this->input_nodes_.size(); ++nid) {
+      const auto& node = nodes_[nid];
+      if (node.GetOpType() == "input") {
+        this->is_const_input_[nid] = false;
+      } else {
+        CHECK_EQ(node.GetOpType(), "const");
+        this->is_const_input_[nid] = true;
       }
     }
 
@@ -159,12 +209,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     ctx.device_id = 0;
     for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
       auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0];
-      this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx));
+      auto nid = this->input_nodes_[i];
+      this->data_entry_[nid][0] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
     }
     for (size_t i = 0; i < this->outputs_.size(); ++i) {
       auto entry = this->outputs_[i];
       auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
-      this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx));
+      this->data_entry_[entry.id_][entry.index_] =
+          NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
     }
   }
 
@@ -492,8 +544,8 @@ inline size_t GetNDArraySize(const NDArray& arr) {
   dnnl::engine engine_;
   /* The dnnl stream. */
   dnnl::stream stream_;
-  /* \brief A simple pool to contain the tensor for each node in the graph. */
-  std::vector<NDArray> data_entry_;
+  /* \brief A simple pool to map from node ID to the output tensors. */
+  std::unordered_map<size_t, std::unordered_map<size_t, NDArray>> data_entry_;
   /* The network layers that are represented in dnnl primitives. */
   std::vector<dnnl::primitive> net_;
   /* The memory that is consumed by arguments. */
@@ -507,6 +559,10 @@ inline size_t GetNDArraySize(const NDArray& arr) {
   std::string func_name_;
   /* The required constant names. */
   Array<String> const_names_;
+  /* Indicate if an input node is a constant node. */
+  std::vector<bool> is_const_input_;
+  /* Map from constant index to JSON constant node ID. */
+  std::unordered_map<size_t, size_t> const_idx_to_nid_;
 };
 
 runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json,
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index 6ffe0995a413..cdba3c92a69f 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -28,6 +28,7 @@
 from tvm.relay import transform
 from tvm.relay.backend import compile_engine
 from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.op.contrib.register import get_pattern_table
 
 
 def set_func_attr(func, compile_name, symbol_name):
@@ -420,7 +421,7 @@ def conv2d_relu():
         conv2d = relay.nn.conv2d(in_1, in_2, kernel_size=(3, 3), padding=(1, 1))
         relu = relay.nn.relu(conv2d)
         func = relay.Function([in_1, in_2], relu)
-        func = func.with_attr('Composite', 'conv2d_relu')
+        func = func.with_attr('Composite', 'dnnl.conv2d_relu')
         func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_nn.relu_')
 
         # Partition function
@@ -466,7 +467,7 @@ def conv2d_bias_relu():
         add = relay.add(conv2d, in_3)
         relu = relay.nn.relu(add)
         func = relay.Function([in_1, in_2, in_3], relu)
-        func = func.with_attr('Composite', 'conv2d_bias_relu')
+        func = func.with_attr('Composite', 'dnnl.conv2d_bias_relu')
         func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_add_nn.relu_')
 
         # Partition function
@@ -508,6 +509,54 @@ def conv2d_bias_relu():
         check_result(mod, ref_mod, input_maps, out_shape, tol=1e-5)
 
 
+def test_constant():
+    if not tvm.get_global_func("relay.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    ishape = (1, 32, 14, 14)
+    wshape = (32, 32, 3, 3)
+
+    data = relay.var("data", shape=ishape, dtype=dtype)
+    weight = relay.var("weight", shape=wshape, dtype=dtype)
+    bn_gamma = relay.var("bn_gamma")
+    bn_beta = relay.var("bn_beta")
+    bn_mmean = relay.var("bn_mean")
+    bn_mvar = relay.var("bn_var")
+
+    layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), padding=(1, 1))
+    bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar)
+    out = bn_output[0]
+    out = relay.nn.relu(out)
+
+    func = relay.Function(relay.analysis.free_vars(out), out)
+    ref_mod, params = tvm.relay.testing.create_workload(func)
+    ref_mod["main"] = bind_params_by_name(ref_mod["main"], params)
+
+    remove_bn_pass = tvm.transform.Sequential([
+        transform.InferType(),
+        transform.SimplifyInference(),
+        transform.FoldConstant(),
+        transform.FoldScaleAxis(),
+    ])
+
+    dnnl_patterns = get_pattern_table("dnnl")
+    composite_partition = tvm.transform.Sequential([
+        transform.MergeComposite(dnnl_patterns),
+        transform.AnnotateTarget("dnnl"),
+        transform.PartitionGraph()
+    ])
+
+    with tvm.transform.PassContext(opt_level=3,
+                                    disabled_pass=["AlterOpLayout"]):
+        ref_mod = remove_bn_pass(ref_mod)
+        mod = composite_partition(ref_mod)
+
+    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+    check_result(mod, ref_mod, {'data': i_data}, (1, 32, 14, 14), tol=1e-5)
+
+
 if __name__ == "__main__":
     test_conv2d()
     test_add()
@@ -516,3 +565,4 @@ def conv2d_bias_relu():
     test_bn()
     test_multiple_ops()
     test_composite()
+    test_constant()

From 896e4d0ffba706aa602bbb599bfebabf4f93bc4c Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Fri, 19 Jun 2020 17:50:02 +0000
Subject: [PATCH 12/30] handle constant

---
 src/relay/backend/contrib/codegen_json/codegen_json.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 69140435ac82..97dc2b333ca5 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -160,6 +160,7 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
   /*!
    * \brief Constructor
    *
+   * \param symbol The symbol that represents the graph being converted.
    * \param expr The Relay expression to be converted to the JSON form.
    */
   JSONSerializer(const std::string& symbol, const Expr& expr) : symbol_(symbol), func_(expr) {}
@@ -175,9 +176,7 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
   }
 
   /*!\brief Return the required params. */
-  Array<String> GetParams() const {
-    return params_;
-  }
+  Array<String> GetParams() const { return params_; }
 
   /*!\brief Return the generated json. */
   std::string GetJSON() {

From f4c023a79c9dd869afc684b123a8a91ea4beca3c Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Sat, 20 Jun 2020 01:54:43 +0000
Subject: [PATCH 13/30] clean up

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 182 +++++++++---------
 src/runtime/json/json_runtime.h               | 106 +++++-----
 2 files changed, 145 insertions(+), 143 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 34646e52dae7..fb74929a209b 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -28,9 +28,9 @@
 #include <string>
 #include <vector>
 
-#include "dnnl.hpp"
 #include "../../json/json_node.h"
 #include "../../json/json_runtime.h"
+#include "dnnl.hpp"
 
 namespace tvm {
 namespace runtime {
@@ -44,19 +44,17 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   using dt = dnnl::memory::data_type;
 
  public:
-  explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json,
-                           const Array<String> const_names)
-      : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {}
+  DNNLJSONRuntime(const std::string& symbol_name, const std::string& graph_json,
+                  const Array<String> const_names)
+      : JSONRuntimeBase(symbol_name, graph_json, const_names) {}
+
+  const char* type_key() const { return "dnnl_json"; }
 
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
-    if (name == "get_symbol") {
-      return PackedFunc(
-          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->func_name_; });
-    } else if (name == "get_const_vars") {
-      return PackedFunc(
-          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
-    } else if (this->func_name_ == name) {
+    if (this->symbol_name_ == name) {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        CHECK_EQ(this->is_const_input_.size(), this->input_nodes_.size())
+            << "The module has not been initialized";
         size_t arg_idx = 0;
 
         // Set input data entries.
@@ -98,27 +96,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
           arg_idx++;
         }
       });
-    } else if ("__init_" + this->func_name_ == name) {
-      // Build DNNL engine and memory.
-      if (!this->is_init_) {
-        Init();
-      }
-      this->is_init_ = true;
-
+    } else if ("__init_" + this->symbol_name_ == name) {
       // The function to initialize constant tensors.
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
         CHECK_EQ(args.size(), 1U);
-        Array<NDArray> consts = args[0];
-        for (size_t i = 0; i < consts.size(); ++i) {
-          CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized";
-          auto nid = const_idx_to_nid_[i];
-          this->data_entry_[nid][0].CopyFrom(consts[i]);
-        }
+        this->Init(args[0]);
         *rv = 0;
       });
     } else {
-      LOG(WARNING) << "Unknown DNNL symbol " << name;
-      return PackedFunc();
+      return JSONRuntimeBase::GetFunction(name, sptr_to_self);
     }
   }
 
@@ -149,7 +135,18 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     }
   }
 
-  void Init() override {
+  void Init(const Array<NDArray>& consts) override {
+    BuildEngine();
+
+    // Initialize consts
+    for (size_t i = 0; i < consts.size(); ++i) {
+      CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized";
+      auto nid = const_idx_to_nid_[i];
+      this->data_entry_[nid][0].CopyFrom(consts[i]);
+    }
+  }
+
+  void BuildEngine() {
     engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0);
     stream_ = dnnl::stream(engine_);
 
@@ -193,7 +190,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     }
 
     this->is_const_input_.resize(this->input_nodes_.size());
-    for (size_t nid = 0; nid < this->input_nodes_.size(); ++nid) {
+    for (auto nid : this->input_nodes_) {
       const auto& node = nodes_[nid];
       if (node.GetOpType() == "input") {
         this->is_const_input_[nid] = false;
@@ -220,33 +217,33 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     }
   }
 
-private:
+ private:
   // Bind a JSON graph node entry to a DNNL memory.
- dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc,
-                             size_t offset = 0) {
-   if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) {
-     return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset);
-   }
-   return node_out_mem_[entry.id_][entry.index_].first;
- }
-
- // Bind a JSON graph node entry to a given DNNL memory.
- dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem,
-                             size_t offset = 0) {
-   // Since the DNNL memory has been created before calling this function, we assume the entry
-   // has not yet been bind to the other DNNL memory; otherwise it may have memory leak.
-   CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0);
-
-   // TODO: Support other data types (i.e., int8).
-   auto data_node = nodes_[entry.id_];
-   auto dltype = data_node.GetOpDataType()[entry.index_];
-   CHECK_EQ(dltype.bits, 32);
-
-   node_out_mem_[entry.id_][entry.index_] = {mem, offset};
-   return node_out_mem_[entry.id_][entry.index_].first;
- }
-
-  void Conv2d(const size_t& nid, const bool has_relu=false, const bool has_bias=false) {
+  dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc,
+                              size_t offset = 0) {
+    if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) {
+      return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset);
+    }
+    return node_out_mem_[entry.id_][entry.index_].first;
+  }
+
+  // Bind a JSON graph node entry to a given DNNL memory.
+  dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem,
+                              size_t offset = 0) {
+    // Since the DNNL memory has been created before calling this function, we assume the entry
+    // has not yet been bind to the other DNNL memory; otherwise it may have memory leak.
+    CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0);
+
+    // TODO: Support other data types (i.e., int8).
+    auto data_node = nodes_[entry.id_];
+    auto dltype = data_node.GetOpDataType()[entry.index_];
+    CHECK_EQ(dltype.bits, 32);
+
+    node_out_mem_[entry.id_][entry.index_] = {mem, offset};
+    return node_out_mem_[entry.id_][entry.index_].first;
+  }
+
+  void Conv2d(const size_t& nid, const bool has_relu = false, const bool has_bias = false) {
     auto node = this->nodes_[nid];
 
     // Setup attributes.
@@ -310,7 +307,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
       ops.append_eltwise(1.f, dnnl::algorithm::eltwise_relu, 0.f, 0.f);
       attr.set_post_ops(ops);
     }
-    
+
     auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_);
 
     // Push to the network.
@@ -507,38 +504,38 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     std::copy((uint8_t*)handle, (uint8_t*)handle + size, dst + offset);
   }
 
-// Generate DNNL memory description and infer the data layout by the given shape.
-inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape, dt dtype) {
-  dnnl::memory::desc data_md;
-  switch (shape.size()) {
-    case 2:
-      data_md = dnnl::memory::desc({shape, dtype, tag::ab});
-      break;
-    case 3:
-      data_md = dnnl::memory::desc({shape, dtype, tag::abc});
-      break;
-    case 4:
-      data_md = dnnl::memory::desc({shape, dtype, tag::abcd});
-      break;
-    case 5:
-      data_md = dnnl::memory::desc({shape, dtype, tag::abcde});
-      break;
-    default:
-      LOG(FATAL) << "Unsupported data shape dimension: " << shape.size();
-      break;
+  // Generate DNNL memory description and infer the data layout by the given shape.
+  inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape, dt dtype) {
+    dnnl::memory::desc data_md;
+    switch (shape.size()) {
+      case 2:
+        data_md = dnnl::memory::desc({shape, dtype, tag::ab});
+        break;
+      case 3:
+        data_md = dnnl::memory::desc({shape, dtype, tag::abc});
+        break;
+      case 4:
+        data_md = dnnl::memory::desc({shape, dtype, tag::abcd});
+        break;
+      case 5:
+        data_md = dnnl::memory::desc({shape, dtype, tag::abcde});
+        break;
+      default:
+        LOG(FATAL) << "Unsupported data shape dimension: " << shape.size();
+        break;
+    }
+    return data_md;
   }
-  return data_md;
-}
 
-// Calculate the size of a given NDArray in bytes.
-inline size_t GetNDArraySize(const NDArray& arr) {
-  size_t size = 1;
-  for (tvm_index_t i = 0; i < arr->ndim; ++i) {
-    size *= static_cast<size_t>(arr->shape[i]);
+  // Calculate the size of a given NDArray in bytes.
+  inline size_t GetNDArraySize(const NDArray& arr) {
+    size_t size = 1;
+    for (tvm_index_t i = 0; i < arr->ndim; ++i) {
+      size *= static_cast<size_t>(arr->shape[i]);
+    }
+    size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8;
+    return size;
   }
-  size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8;
-  return size;
-}
 
   /* The dnnl engine. */
   dnnl::engine engine_;
@@ -551,23 +548,17 @@ inline size_t GetNDArraySize(const NDArray& arr) {
   /* The memory that is consumed by arguments. */
   std::vector<std::unordered_map<int, dnnl::memory>> net_args_;
   /* The node ID to its corresponding output memory. */
-  std::unordered_map < uint32_t,
-      std::unordered_map<int, std::pair<dnnl::memory, size_t>>> node_out_mem_;
-  /* Indicate if the DNNL engine has been initialized. */
-  bool is_init_ = false;
-  /* The only subgraph name for this module. */
-  std::string func_name_;
-  /* The required constant names. */
-  Array<String> const_names_;
+  std::unordered_map<uint32_t, std::unordered_map<int, std::pair<dnnl::memory, size_t>>>
+      node_out_mem_;
   /* Indicate if an input node is a constant node. */
   std::vector<bool> is_const_input_;
   /* Map from constant index to JSON constant node ID. */
   std::unordered_map<size_t, size_t> const_idx_to_nid_;
 };
 
-runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json,
+runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json,
                                       const Array<String>& const_names) {
-  auto n = make_object<DNNLJSONRuntime>(func_name.operator std::string(),
+  auto n = make_object<DNNLJSONRuntime>(symbol_name.operator std::string(),
                                         graph_json.operator std::string(), const_names);
   return runtime::Module(n);
 }
@@ -575,6 +566,9 @@ runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json,
 TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate")
 .set_body_typed(DNNLJSONRuntimeCreate);
 
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_dnnl_json")
+    .set_body_typed(JSONRuntimeBase::LoadFromBinary<DNNLJSONRuntime>);
+
 }  // namespace contrib
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
index 38e195297e96..f4633ac9a693 100644
--- a/src/runtime/json/json_runtime.h
+++ b/src/runtime/json/json_runtime.h
@@ -29,11 +29,12 @@
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
 
+#include <string>
 #include <tuple>
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include <string>
 
 #include "json_node.h"
 
@@ -47,16 +48,19 @@ namespace json {
  */
 class JSONRuntimeBase : public ModuleNode {
  public:
-  explicit JSONRuntimeBase(const std::string& graph_json) {
-    LoadGraph(graph_json);
+  JSONRuntimeBase(const std::string& symbol_name, const std::string& graph_json,
+                  const Array<String> const_names)
+      : symbol_name_(symbol_name), graph_json_(graph_json), const_names_(const_names) {
+    LoadGraph(graph_json_);
   }
 
-  // The type key of each subclass can be saved to the json file and them
-  // used to create the specific runtime during deserialization.
-  // virtual const char* type_key() const = 0;
   const char* type_key() const { return "json"; }
 
-  virtual void Init() { LOG(FATAL) << "NYI"; }
+  /*! \brief Initialize a specific json runtime. */
+  virtual void Init(const Array<NDArray>& consts) = 0;
+
+  /*! \brief Invoke the execution engine to inteprete a specific json runtime. */
+  virtual void Run() = 0;
 
   /*!
    * \brief Get a packed function.
@@ -64,43 +68,48 @@ class JSONRuntimeBase : public ModuleNode {
    * \param sptr_to_self The pointer to the module node.
    * \return The packed function.
    */
-  virtual PackedFunc GetFunction(const std::string& name,
-                                 const ObjectPtr<Object>& sptr_to_self) {
-    return PackedFunc();
-  }
-
-  // Run(TVMValue*,value, int* type_code, int nargs), or
-  // Run(TVMArgs arg, TVMRetValue rv) ?
-  virtual void Run() { LOG(FATAL) << "NYI"; }
-
-  void SetInput(const std::string& name, const NDArray& data) {
-    auto it = input_map_.find(name);
-    CHECK(it != input_map_.end()) << "Not found input: " << name;
-    SetInput(it->second, data);
-  }
-
-  void SetInput(uint32_t index, const NDArray& data) {
-    CHECK_LT(static_cast<size_t>(index), input_nodes_.size());
-    uint32_t eid = EntryID(input_nodes_[index], 0);
-    data_entry_[eid] = data;
+  virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) {
+    if (name == "get_symbol") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; });
+    } else if (name == "get_const_vars") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
+    } else {
+      return PackedFunc(nullptr);
+    }
   }
 
-  size_t NumOutputs() const { return outputs_.size(); }
-
-  ObjectRef GetOutput() {
-    // Return the NDArray directly if there is only one outpput.
-    if (NumOutputs() == 1) {
-      uint32_t eid = EntryID(outputs_[0]);
-      return data_entry_[eid];
+  virtual void SaveToBinary(dmlc::Stream* stream) {
+    // Save the symbol
+    stream->Write(symbol_name_);
+    // Save the graph
+    stream->Write(graph_json_);
+    // Save the required const names
+    std::vector<std::string> consts;
+    for (const auto& it : const_names_) {
+      consts.push_back(it);
     }
+    stream->Write(consts);
+  }
 
-    // We need to return an ADTObj if there are multiple outputs.
-    std::vector<ObjectRef> outs;
-    for (size_t i = 0; i < NumOutputs(); i++) {
-      uint32_t eid = EntryID(outputs_[i]);
-      outs.push_back(data_entry_[eid]);
+  template <typename T,
+            typename = typename std::enable_if<std::is_base_of<JSONRuntimeBase, T>::value>::type>
+  static Module LoadFromBinary(void* strm) {
+    dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+    std::string symbol;
+    std::string graph_json;
+    std::vector<std::string> consts;
+    // Load the symbol
+    CHECK(stream->Read(&symbol)) << "Loading symbol name failed";
+    CHECK(stream->Read(&graph_json)) << "Loading graph json failed";
+    CHECK(stream->Read(&consts)) << "Loading the const name list failed";
+    Array<String> const_names;
+    for (const auto& it : consts) {
+      const_names.push_back(it);
     }
-    return ADT::Tuple(outs);
+    auto n = make_object<T>(symbol, graph_json, const_names);
+    return Module(n);
   }
 
  protected:
@@ -108,7 +117,6 @@ class JSONRuntimeBase : public ModuleNode {
     std::istringstream is(graph_json);
     dmlc::JSONReader reader(&is);
     this->Load(&reader);
-
     for (size_t i = 0; i < input_nodes_.size(); i++) {
       uint32_t nid = input_nodes_[i];
       std::string& name = nodes_[nid].name_;
@@ -135,21 +143,21 @@ class JSONRuntimeBase : public ModuleNode {
   }
 
   // Get the node entry index.
-  uint32_t EntryID(uint32_t nid, uint32_t index) const {
-    return node_row_ptr_[nid] + index;
-  }
+  uint32_t EntryID(uint32_t nid, uint32_t index) const { return node_row_ptr_[nid] + index; }
 
   // Get the node entry index.
-  uint32_t EntryID(const JSONGraphNodeEntry& e) const {
-    return EntryID(e.id_, e.index_);
-  }
+  uint32_t EntryID(const JSONGraphNodeEntry& e) const { return EntryID(e.id_, e.index_); }
 
   // Number of node entries.
-  uint32_t NumEntries() const {
-    return node_row_ptr_.back();
-  }
+  uint32_t NumEntries() const { return node_row_ptr_.back(); }
 
  protected:
+  /* The only subgraph name for this module. */
+  std::string symbol_name_;
+  /* The graph. */
+  std::string graph_json_;
+  /* The required constant names. */
+  Array<String> const_names_;
   /*! \brief The json graph nodes. */
   std::vector<JSONGraphNode> nodes_;
   /*! \brief The input nodes, including variables and constants. */

From edd8d36c2ebac8aa1662a33c2ba5936f853b9739 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Sat, 20 Jun 2020 18:48:56 +0000
Subject: [PATCH 14/30] Simplify dnnl user code

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 125 +++++-------------
 src/runtime/json/json_runtime.h               |  63 ++++++++-
 2 files changed, 93 insertions(+), 95 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index fb74929a209b..3b3a320c03be 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -53,48 +53,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
     if (this->symbol_name_ == name) {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        CHECK_EQ(this->is_const_input_.size(), this->input_nodes_.size())
-            << "The module has not been initialized";
-        size_t arg_idx = 0;
-
-        // Set input data entries.
-        for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
-          if (this->is_const_input_[i]) {
-            continue;
-          }
-          auto nid = this->input_nodes_[i];
-
-          CHECK(args[arg_idx].type_code() == kTVMNDArrayHandle ||
-                args[arg_idx].type_code() == kTVMDLTensorHandle)
-              << "Expect NDArray or DLTensor as inputs\n";
-          if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
-            DLTensor* arg = args[arg_idx];
-            this->data_entry_[nid][0].CopyFrom(arg);
-          } else {
-            NDArray arg = args[arg_idx];
-            this->data_entry_[nid][0].CopyFrom(arg);
-          }
-          CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size();
-          arg_idx++;
-        }
+        CHECK(this->initialized_) << "The module has not been initialized";
 
+        // Set inputs.
+        SetInputs(args);
         // Execute the subgraph.
         this->Run();
-
         // Copy result to output buffer.
-        for (size_t i = 0; i < this->outputs_.size(); ++i) {
-          auto entry = this->outputs_[i];
-
-          if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
-            DLTensor* arg = args[arg_idx];
-            this->data_entry_[entry.id_][entry.index_].CopyTo(arg);
-          } else {
-            NDArray arg = args[arg_idx];
-            this->data_entry_[entry.id_][entry.index_].CopyTo(arg);
-          }
-          CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size();
-          arg_idx++;
-        }
+        GetOutput(args);
       });
     } else if ("__init_" + this->symbol_name_ == name) {
       // The function to initialize constant tensors.
@@ -112,10 +78,11 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     // Fill in the input buffers.
     for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
       auto nid = this->input_nodes_[i];
+      auto eid = EntryID(nid, 0);
       // TODO: Support other data lengths.
-      size_t offset_in_bytes = this->node_out_mem_[nid][0].second * 4;
-      write_to_dnnl_memory(this->data_entry_[nid][0]->data, this->node_out_mem_[nid][0].first,
-                           GetNDArraySize(this->data_entry_[nid][0]), offset_in_bytes);
+      size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4;
+      write_to_dnnl_memory(this->data_entry_[nid]->data, this->entry_out_mem_[eid].first,
+                           GetNDArraySize(this->data_entry_[eid]), offset_in_bytes);
     }
 
     // Invoke the engine.
@@ -127,23 +94,26 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     // Read output buffers.
     for (size_t i = 0; i < this->outputs_.size(); ++i) {
       auto out_entry = this->outputs_[i];
-      auto nid = out_entry.id_;
-      auto idx = out_entry.index_;
-      size_t offset_in_bytes = this->node_out_mem_[nid][idx].second * 4;
-      read_from_dnnl_memory(this->data_entry_[nid][idx]->data, this->node_out_mem_[nid][idx].first,
-                            GetNDArraySize(this->data_entry_[nid][idx]), offset_in_bytes);
+      auto eid = EntryID(out_entry);
+      size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4;
+      read_from_dnnl_memory(this->data_entry_[eid]->data, this->entry_out_mem_[eid].first,
+                            GetNDArraySize(this->data_entry_[eid]), offset_in_bytes);
     }
   }
 
   void Init(const Array<NDArray>& consts) override {
+    data_entry_.resize(NumEntries());
     BuildEngine();
 
+    CHECK_EQ(consts.size(), const_idx_.size())
+        << "The number of input constants must match the number of required.";
+
     // Initialize consts
     for (size_t i = 0; i < consts.size(); ++i) {
-      CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized";
-      auto nid = const_idx_to_nid_[i];
-      this->data_entry_[nid][0].CopyFrom(consts[i]);
+      this->data_entry_[const_idx_[i]].CopyFrom(consts[i]);
     }
+
+    initialized_ = true;
   }
 
   void BuildEngine() {
@@ -173,30 +143,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         } else {
           LOG(FATAL) << "Unsupported op: " << op_name;
         }
-      } else if (node.GetOpType() == "const") {
-        auto name = node.GetOpName();
-        bool found = false;
-        for (size_t cid = 0; cid < const_names_.size(); ++cid) {
-          if (name == const_names_[cid]) {
-            found = true;
-            const_idx_to_nid_[cid] = nid;
-            break;
-          }
-        }
-        if (!found) {
-          LOG(FATAL) << "Unrecognized constant node: " << name;
-        }
-      }
-    }
-
-    this->is_const_input_.resize(this->input_nodes_.size());
-    for (auto nid : this->input_nodes_) {
-      const auto& node = nodes_[nid];
-      if (node.GetOpType() == "input") {
-        this->is_const_input_[nid] = false;
-      } else {
-        CHECK_EQ(node.GetOpType(), "const");
-        this->is_const_input_[nid] = true;
       }
     }
 
@@ -207,13 +153,12 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
       auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0];
       auto nid = this->input_nodes_[i];
-      this->data_entry_[nid][0] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
+      this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
     }
     for (size_t i = 0; i < this->outputs_.size(); ++i) {
       auto entry = this->outputs_[i];
       auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
-      this->data_entry_[entry.id_][entry.index_] =
-          NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
+      this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
     }
   }
 
@@ -221,26 +166,28 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   // Bind a JSON graph node entry to a DNNL memory.
   dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc,
                               size_t offset = 0) {
-    if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) {
+    auto eid = EntryID(entry);
+    if (entry_out_mem_.count(eid) == 0) {
       return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset);
     }
-    return node_out_mem_[entry.id_][entry.index_].first;
+    return entry_out_mem_[eid].first;
   }
 
   // Bind a JSON graph node entry to a given DNNL memory.
   dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem,
                               size_t offset = 0) {
+    auto eid = EntryID(entry);
     // Since the DNNL memory has been created before calling this function, we assume the entry
     // has not yet been bind to the other DNNL memory; otherwise it may have memory leak.
-    CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0);
+    CHECK(entry_out_mem_.count(eid) == 0);
 
     // TODO: Support other data types (i.e., int8).
     auto data_node = nodes_[entry.id_];
     auto dltype = data_node.GetOpDataType()[entry.index_];
     CHECK_EQ(dltype.bits, 32);
 
-    node_out_mem_[entry.id_][entry.index_] = {mem, offset};
-    return node_out_mem_[entry.id_][entry.index_].first;
+    entry_out_mem_[eid] = {mem, offset};
+    return entry_out_mem_[eid].first;
   }
 
   void Conv2d(const size_t& nid, const bool has_relu = false, const bool has_bias = false) {
@@ -541,19 +488,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   dnnl::engine engine_;
   /* The dnnl stream. */
   dnnl::stream stream_;
-  /* \brief A simple pool to map from node ID to the output tensors. */
-  std::unordered_map<size_t, std::unordered_map<size_t, NDArray>> data_entry_;
   /* The network layers that are represented in dnnl primitives. */
   std::vector<dnnl::primitive> net_;
   /* The memory that is consumed by arguments. */
   std::vector<std::unordered_map<int, dnnl::memory>> net_args_;
-  /* The node ID to its corresponding output memory. */
-  std::unordered_map<uint32_t, std::unordered_map<int, std::pair<dnnl::memory, size_t>>>
-      node_out_mem_;
-  /* Indicate if an input node is a constant node. */
-  std::vector<bool> is_const_input_;
-  /* Map from constant index to JSON constant node ID. */
-  std::unordered_map<size_t, size_t> const_idx_to_nid_;
+  /* The entry ID to its corresponding output memory. */
+  std::unordered_map<uint32_t, std::pair<dnnl::memory, size_t>> entry_out_mem_;
+  /* Indicate if the DNNL engine has been initialized. */
+  bool initialized_{false};
 };
 
 runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json,
@@ -563,8 +505,7 @@ runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json,
   return runtime::Module(n);
 }
 
-TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate")
-.set_body_typed(DNNLJSONRuntimeCreate);
+TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate").set_body_typed(DNNLJSONRuntimeCreate);
 
 TVM_REGISTER_GLOBAL("runtime.module.loadbinary_dnnl_json")
     .set_body_typed(JSONRuntimeBase::LoadFromBinary<DNNLJSONRuntime>);
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
index f4633ac9a693..2e5bddcd3a07 100644
--- a/src/runtime/json/json_runtime.h
+++ b/src/runtime/json/json_runtime.h
@@ -29,6 +29,7 @@
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
 
+#include <cstddef>
 #include <string>
 #include <tuple>
 #include <type_traits>
@@ -113,14 +114,68 @@ class JSONRuntimeBase : public ModuleNode {
   }
 
  protected:
+  void SetInputs(const TVMArgs& args) {
+    CHECK_EQ(args.size(), input_var_idx_.size() + outputs_.size())
+        << "Found mismatch in the number of provided data entryies and required.";
+
+    for (size_t i = 0; i < input_var_idx_.size(); i++) {
+      auto eid = EntryID(input_var_idx_[i], 0);
+      CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle)
+          << "Expect NDArray or DLTensor as inputs";
+      if (args[i].type_code() == kTVMDLTensorHandle) {
+        DLTensor* arg = args[i];
+        this->data_entry_[eid].CopyFrom(arg);
+      } else {
+        // Zero copy for input because the tensor is managed by the host.
+        this->data_entry_[eid] = args[i];
+      }
+    }
+  }
+
+  void GetOutput(const TVMArgs& args) {
+    // Copy result to output buffer.
+    size_t arg_idx = input_var_idx_.size();
+    CHECK_EQ(args.size(), arg_idx + outputs_.size())
+        << "Found mismatch in the number of provided data entryies and required.";
+
+    for (size_t i = 0; i < this->outputs_.size(); i++) {
+      auto eid = EntryID(outputs_[i]);
+
+      if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
+        DLTensor* arg = args[arg_idx];
+        this->data_entry_[eid].CopyTo(arg);
+      } else {
+        NDArray arg = args[arg_idx];
+        this->data_entry_[eid].CopyTo(arg);
+      }
+      arg_idx++;
+    }
+  }
+
   void LoadGraph(const std::string& graph_json) {
     std::istringstream is(graph_json);
     dmlc::JSONReader reader(&is);
     this->Load(&reader);
+    std::vector<std::string> consts;
     for (size_t i = 0; i < input_nodes_.size(); i++) {
       uint32_t nid = input_nodes_[i];
-      std::string& name = nodes_[nid].name_;
-      input_map_[name] = i;
+      std::string name = nodes_[nid].name_;
+      if (nodes_[nid].op_type_ == "input") {
+        input_var_idx_.push_back(nid);
+      } else {
+        CHECK_EQ(nodes_[nid].op_type_, "const");
+        auto pos = std::find(std::begin(const_names_), std::end(const_names_), name);
+        CHECK(pos != std::end(const_names_)) << "Found non-existent constant: " << name;
+        const_idx_.push_back(nid);
+        consts.push_back(name);
+      }
+    }
+    CHECK_EQ(consts.size(), const_names_.size())
+        << "Found mismatch for the number of constants in the graph and required.";
+
+    for (size_t i = 0; i < consts.size(); i++) {
+      CHECK_EQ(consts[i], const_names_[i])
+          << "The position of constant in the graph must be the same as the required.";
     }
   }
 
@@ -169,7 +224,9 @@ class JSONRuntimeBase : public ModuleNode {
   /*! \brief Data of that entry. */
   std::vector<NDArray> data_entry_;
   /*! \brief Map the input name to index. */
-  std::unordered_map<std::string, uint32_t> input_map_;
+  std::vector<uint32_t> input_var_idx_;
+  /*! \brief input const index. */
+  std::vector<uint32_t> const_idx_;
 };
 
 }  // namespace json

From ededaf1bbe4e0c065a0fc540ebba85022b45956a Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Sun, 21 Jun 2020 01:12:07 +0000
Subject: [PATCH 15/30] GetDataSize

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 116 +++++++-----------
 src/runtime/json/json_runtime.h               |  68 +++++++++-
 2 files changed, 110 insertions(+), 74 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 3b3a320c03be..8f24c2a5054b 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -22,6 +22,7 @@
  * \brief A simple JSON runtime for DNNL.
  */
 
+#include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/registry.h>
 
 #include <cstddef>
@@ -74,54 +75,59 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     }
   }
 
+  void Init(const Array<NDArray>& consts) override {
+    BuildEngine();
+
+    CHECK_EQ(consts.size(), const_idx_.size())
+        << "The number of input constants must match the number of required.";
+
+    // Pre-allocate buffers on CPU for input and output entries.
+    DLContext ctx;
+    ctx.device_type = static_cast<DLDeviceType>(kDLCPU);
+    ctx.device_id = 0;
+    AllocateInputOutputBuffer(ctx);
+
+    // Setup constants entries for weights.
+    SetupConstants(consts);
+
+    initialized_ = true;
+  }
+
   void Run() override {
     // Fill in the input buffers.
-    for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
-      auto nid = this->input_nodes_[i];
-      auto eid = EntryID(nid, 0);
+    for (size_t i = 0; i < input_nodes_.size(); ++i) {
+      auto eid = EntryID(input_nodes_[i], 0);
       // TODO: Support other data lengths.
-      size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4;
-      write_to_dnnl_memory(this->data_entry_[nid]->data, this->entry_out_mem_[eid].first,
-                           GetNDArraySize(this->data_entry_[eid]), offset_in_bytes);
+      size_t offset_in_bytes = entry_out_mem_[eid].second * 4;
+      size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->()));
+      write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size,
+                           offset_in_bytes);
     }
 
-    // Invoke the engine.
+    // Invoke the engine through intepreting the stream.
     for (size_t i = 0; i < net_.size(); ++i) {
       net_.at(i).execute(stream_, net_args_.at(i));
     }
     stream_.wait();
 
     // Read output buffers.
-    for (size_t i = 0; i < this->outputs_.size(); ++i) {
-      auto out_entry = this->outputs_[i];
-      auto eid = EntryID(out_entry);
-      size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4;
-      read_from_dnnl_memory(this->data_entry_[eid]->data, this->entry_out_mem_[eid].first,
-                            GetNDArraySize(this->data_entry_[eid]), offset_in_bytes);
-    }
-  }
-
-  void Init(const Array<NDArray>& consts) override {
-    data_entry_.resize(NumEntries());
-    BuildEngine();
-
-    CHECK_EQ(consts.size(), const_idx_.size())
-        << "The number of input constants must match the number of required.";
-
-    // Initialize consts
-    for (size_t i = 0; i < consts.size(); ++i) {
-      this->data_entry_[const_idx_[i]].CopyFrom(consts[i]);
+    for (size_t i = 0; i < outputs_.size(); ++i) {
+      auto eid = EntryID(outputs_[i]);
+      size_t offset_in_bytes = entry_out_mem_[eid].second * 4;
+      size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->()));
+      read_from_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size,
+                            offset_in_bytes);
     }
-
-    initialized_ = true;
   }
 
+ private:
+  // Build up the engine based on the input graph.
   void BuildEngine() {
     engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0);
     stream_ = dnnl::stream(engine_);
 
     // Build subgraph engine.
-    for (size_t nid = 0; nid < this->nodes_.size(); ++nid) {
+    for (size_t nid = 0; nid < nodes_.size(); ++nid) {
       const auto& node = nodes_[nid];
       if (node.GetOpType() == "kernel") {
         CHECK_EQ(node.GetOpType(), "kernel");
@@ -145,24 +151,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         }
       }
     }
-
-    // Initialize input/output entries.
-    DLContext ctx;
-    ctx.device_type = static_cast<DLDeviceType>(1);
-    ctx.device_id = 0;
-    for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
-      auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0];
-      auto nid = this->input_nodes_[i];
-      this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
-    }
-    for (size_t i = 0; i < this->outputs_.size(); ++i) {
-      auto entry = this->outputs_[i];
-      auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
-      this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
-    }
   }
 
- private:
   // Bind a JSON graph node entry to a DNNL memory.
   dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc,
                               size_t offset = 0) {
@@ -191,14 +181,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   }
 
   void Conv2d(const size_t& nid, const bool has_relu = false, const bool has_bias = false) {
-    auto node = this->nodes_[nid];
+    auto node = nodes_[nid];
 
     // Setup attributes.
     auto data_entry = node.GetInputs()[0];
     auto weight_entry = node.GetInputs()[1];
-    dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
-    dnnl::memory::dims weight_shape =
-        this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
+    dnnl::memory::dims input_shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dims weight_shape = nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
     std::vector<std::string> str_strides = node.GetAttr<std::vector<std::string>>("strides");
     std::vector<std::string> str_padding = node.GetAttr<std::vector<std::string>>("padding");
     dnnl::memory::dim groups = std::stoi(node.GetAttr<std::vector<std::string>>("groups")[0]);
@@ -292,14 +281,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   }
 
   void Dense(const size_t& nid) {
-    auto node = this->nodes_[nid];
+    auto node = nodes_[nid];
 
     // Setup attributes.
     auto data_entry = node.GetInputs()[0];
     auto weight_entry = node.GetInputs()[1];
-    dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
-    dnnl::memory::dims weight_shape =
-        this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
+    dnnl::memory::dims input_shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dims weight_shape = nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_];
 
     dnnl::memory::dim B = input_shape[0],  // batch size
         IC = input_shape[1],               // input channels
@@ -340,14 +328,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   }
 
   void BatchNorm(const size_t& nid) {
-    auto node = this->nodes_[nid];
+    auto node = nodes_[nid];
 
     auto data_entry = node.GetInputs()[0];
     auto gamma_entry = node.GetInputs()[1];
     auto beta_entry = node.GetInputs()[2];
     auto mean_entry = node.GetInputs()[3];
     auto variance_entry = node.GetInputs()[4];
-    dnnl::memory::dims data_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dims data_shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
     dnnl::memory::dim IC = data_shape[1];
     float epsilon = std::stof(node.GetAttr<std::vector<std::string>>("epsilon")[0]);
 
@@ -382,10 +370,10 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   }
 
   void Relu(const size_t& nid) {
-    auto node = this->nodes_[nid];
+    auto node = nodes_[nid];
 
     auto data_entry = node.GetInputs()[0];
-    dnnl::memory::dims shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
+    dnnl::memory::dims shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_];
     auto data_md = dnnl::memory::desc{{shape}, dt::f32, tag::abcd};
 
     auto relu_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference,
@@ -405,7 +393,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   }
 
   void Add(const size_t& nid) {
-    auto node = this->nodes_[nid];
+    auto node = nodes_[nid];
 
     // Memory and compute description.
     std::vector<dnnl::memory::dims> data_dims;
@@ -414,7 +402,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
 
     CHECK_EQ(node.GetInputs().size(), 2U);
     for (auto entry : node.GetInputs()) {
-      auto data_shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
+      auto data_shape = nodes_[entry.id_].GetOpShape()[entry.index_];
       dnnl::memory::desc data_md = GenDNNLMemDescByShape(data_shape, dt::f32);
 
       data_dims.push_back(data_shape);
@@ -474,16 +462,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     return data_md;
   }
 
-  // Calculate the size of a given NDArray in bytes.
-  inline size_t GetNDArraySize(const NDArray& arr) {
-    size_t size = 1;
-    for (tvm_index_t i = 0; i < arr->ndim; ++i) {
-      size *= static_cast<size_t>(arr->shape[i]);
-    }
-    size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8;
-    return size;
-  }
-
   /* The dnnl engine. */
   dnnl::engine engine_;
   /* The dnnl stream. */
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
index 2e5bddcd3a07..8314dd9a2a36 100644
--- a/src/runtime/json/json_runtime.h
+++ b/src/runtime/json/json_runtime.h
@@ -114,6 +114,11 @@ class JSONRuntimeBase : public ModuleNode {
   }
 
  protected:
+  /*!
+   * \brief Set up the inputs for inference.
+   *
+   * \param args The packed args.
+   */
   void SetInputs(const TVMArgs& args) {
     CHECK_EQ(args.size(), input_var_idx_.size() + outputs_.size())
         << "Found mismatch in the number of provided data entryies and required.";
@@ -122,36 +127,73 @@ class JSONRuntimeBase : public ModuleNode {
       auto eid = EntryID(input_var_idx_[i], 0);
       CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle)
           << "Expect NDArray or DLTensor as inputs";
+      size_t to_size = GetDataSize(*(data_entry_[eid].operator->()));
       if (args[i].type_code() == kTVMDLTensorHandle) {
         DLTensor* arg = args[i];
-        this->data_entry_[eid].CopyFrom(arg);
+        data_entry_[eid].CopyFrom(arg);
       } else {
         // Zero copy for input because the tensor is managed by the host.
-        this->data_entry_[eid] = args[i];
+        NDArray arg = args[i];
+        size_t from_size = GetDataSize(*(arg.operator->()));
+        CHECK_EQ(from_size, to_size);
+        if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) {
+          data_entry_[eid] = args[i];
+        } else {
+          data_entry_[eid].CopyFrom(arg);
+        }
       }
     }
   }
 
+  /*!
+   * \brief Return the results through packed args.
+   *
+   * \param args The packed args.
+   */
   void GetOutput(const TVMArgs& args) {
     // Copy result to output buffer.
     size_t arg_idx = input_var_idx_.size();
     CHECK_EQ(args.size(), arg_idx + outputs_.size())
         << "Found mismatch in the number of provided data entryies and required.";
 
-    for (size_t i = 0; i < this->outputs_.size(); i++) {
+    for (size_t i = 0; i < outputs_.size(); i++) {
       auto eid = EntryID(outputs_[i]);
 
       if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
         DLTensor* arg = args[arg_idx];
-        this->data_entry_[eid].CopyTo(arg);
+        data_entry_[eid].CopyTo(arg);
       } else {
         NDArray arg = args[arg_idx];
-        this->data_entry_[eid].CopyTo(arg);
+        data_entry_[eid].CopyTo(arg);
       }
       arg_idx++;
     }
   }
 
+  /*!
+   * \brief Pre-allocate empty buffers for input and output entries.
+   *
+   * \param ctx The context for the pre-allocated buffer.
+   */
+  void AllocateInputOutputBuffer(const DLContext& ctx) {
+    for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
+      auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0];
+      auto nid = this->input_nodes_[i];
+      this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
+    }
+
+    for (size_t i = 0; i < this->outputs_.size(); ++i) {
+      auto entry = this->outputs_[i];
+      auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
+      this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
+    }
+  }
+
+  /*!
+   * \brief Load the graph and record the entries for inputs and constants.
+   *
+   * \param graph_json The graph in the json format.
+   */
   void LoadGraph(const std::string& graph_json) {
     std::istringstream is(graph_json);
     dmlc::JSONReader reader(&is);
@@ -177,8 +219,24 @@ class JSONRuntimeBase : public ModuleNode {
       CHECK_EQ(consts[i], const_names_[i])
           << "The position of constant in the graph must be the same as the required.";
     }
+
+    // Reserve data entries.
+    data_entry_.resize(NumEntries());
+  }
+
+  /*!
+   * \brief Set up the constants/weights for inference.
+   *
+   * \param consts The constant to be filled.
+   */
+  void SetupConstants(const Array<NDArray>& consts) {
+    // Initialize consts
+    for (size_t i = 0; i < consts.size(); ++i) {
+      data_entry_[const_idx_[i]].CopyFrom(consts[i]);
+    }
   }
 
+  // Load the graph.
   void Load(dmlc::JSONReader* reader) {
     reader->BeginObject();
     std::string key;

From 8f9215a9568ed8b3515ddb370b02b48659044a6d Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Mon, 22 Jun 2020 23:55:41 +0000
Subject: [PATCH 16/30] fix dense bug

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc   |  5 +++--
 tests/python/relay/test_pass_partition_graph.py | 12 ++++--------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 8f24c2a5054b..3a3425a8d52c 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -314,10 +314,11 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     net_.push_back(dense);
 
     // Memories.
-    std::vector<float> bias(OC, 0);
     auto data_memory = BindDNNLMemory(data_entry, data_md);
     auto weight_memory = BindDNNLMemory(weight_entry, weight_md);
-    auto bias_memory = dnnl::memory(bias_md, engine_, bias.data());
+    auto bias_memory = dnnl::memory(bias_md, engine_);
+    float bias[OC] = {0};
+    write_to_dnnl_memory(bias, bias_memory, OC * sizeof(float));
     JSONGraphNodeEntry out_entry(nid, 0);
     auto dst_memory = BindDNNLMemory(out_entry, dense_prim_desc.dst_desc());
 
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 473ca9d66106..8dc5344b00be 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -462,21 +462,17 @@ def test_extern_dnnl_mobilenet():
 
     dtype = 'float32'
     ishape = (1, 3, 224, 224)
-    mod, params = relay.testing.mobilenet.get_workload(
-        batch_size=1, dtype='float32')
-
-    mod = transform.AnnotateTarget(["dnnl"])(mod)
+    ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype='float32')
+    mod = transform.AnnotateTarget(["dnnl"])(ref_mod)
     mod = transform.MergeCompilerRegions()(mod)
     mod = transform.PartitionGraph()(mod)
     i_data = np.random.uniform(0, 1, ishape).astype(dtype)
 
-    ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1,
-                                                           dtype='float32')
     ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
     ref_res = ref_ex.evaluate()(i_data, **params)
+    compile_engine.get().clear()
 
-    check_result(mod, {"data": i_data},
-                 (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
+    check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
 
 
 def test_function_lifting():

From 57a90fc22d4127a9ea19941d0163ece4d990b895 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Tue, 23 Jun 2020 00:52:35 +0000
Subject: [PATCH 17/30] improve cmake

---
 cmake/modules/contrib/DNNL.cmake | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake
index ac03fbcc8dd1..8b863a0b764b 100644
--- a/cmake/modules/contrib/DNNL.cmake
+++ b/cmake/modules/contrib/DNNL.cmake
@@ -16,6 +16,10 @@
 # under the License.
 
 if(USE_DNNL_CODEGEN STREQUAL "ON")
+  if(USE_JSON_RUNTIME STREQUAL "OFF")
+      message(FATAL_ERROR "USE_JSON_RUNTIME must be ON to use DNNL JSON RUNTIME")
+  endif()
+
   file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
   file(GLOB JSON_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.h)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
@@ -25,6 +29,16 @@ if(USE_DNNL_CODEGEN STREQUAL "ON")
   list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
   file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*)
   list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
-  message(STATUS "Build with DNNL codegen: " ${EXTERN_LIBRARY_DNNL})
+  message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
+elseif(USE_DNNL_CODEGEN STREQUAL "C_SRC")
+  add_definitions(-DDNNL_WITH_C_SOURCE_MODULE=1)
+  file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
+  list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
+
+  find_library(EXTERN_LIBRARY_DNNL dnnl)
+  list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
+  file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*)
+  list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
+  message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL})
 endif()
 

From cd843d1ee845e6ddd1558d2ed51c665eccf1dbdf Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Tue, 23 Jun 2020 15:40:27 +0000
Subject: [PATCH 18/30] zero copy

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc |  4 +-
 src/runtime/json/json_runtime.h               | 66 +++++++++++--------
 2 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 3a3425a8d52c..21d0dd66e2b2 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -99,7 +99,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
       auto eid = EntryID(input_nodes_[i], 0);
       // TODO: Support other data lengths.
       size_t offset_in_bytes = entry_out_mem_[eid].second * 4;
-      size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->()));
+      size_t buffer_size = GetDataSize(*data_entry_[eid]);
       write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size,
                            offset_in_bytes);
     }
@@ -114,7 +114,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     for (size_t i = 0; i < outputs_.size(); ++i) {
       auto eid = EntryID(outputs_[i]);
       size_t offset_in_bytes = entry_out_mem_[eid].second * 4;
-      size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->()));
+      size_t buffer_size = GetDataSize(*data_entry_[eid]);
       read_from_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size,
                             offset_in_bytes);
     }
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h
index 8314dd9a2a36..96d569730c97 100644
--- a/src/runtime/json/json_runtime.h
+++ b/src/runtime/json/json_runtime.h
@@ -127,20 +127,24 @@ class JSONRuntimeBase : public ModuleNode {
       auto eid = EntryID(input_var_idx_[i], 0);
       CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle)
           << "Expect NDArray or DLTensor as inputs";
-      size_t to_size = GetDataSize(*(data_entry_[eid].operator->()));
-      if (args[i].type_code() == kTVMDLTensorHandle) {
-        DLTensor* arg = args[i];
-        data_entry_[eid].CopyFrom(arg);
+
+      const DLTensor* arg;
+      if (args[i].IsObjectRef<NDArray>()) {
+        NDArray arr = args[i];
+        arg = arr.operator->();
       } else {
+        arg = args[i].operator DLTensor*();
+      }
+
+      size_t from_size = GetDataSize(*arg);
+      size_t to_size = GetDataSize(*data_entry_[eid]);
+      CHECK_EQ(from_size, to_size);
+
+      if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) {
         // Zero copy for input because the tensor is managed by the host.
-        NDArray arg = args[i];
-        size_t from_size = GetDataSize(*(arg.operator->()));
-        CHECK_EQ(from_size, to_size);
-        if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) {
-          data_entry_[eid] = args[i];
-        } else {
-          data_entry_[eid].CopyFrom(arg);
-        }
+        data_entry_[eid]->data = arg->data;
+      } else {
+        NDArray::CopyFromTo(arg, data_entry_[eid]);
       }
     }
   }
@@ -156,17 +160,17 @@ class JSONRuntimeBase : public ModuleNode {
     CHECK_EQ(args.size(), arg_idx + outputs_.size())
         << "Found mismatch in the number of provided data entryies and required.";
 
-    for (size_t i = 0; i < outputs_.size(); i++) {
+    for (size_t i = 0; i < outputs_.size(); i++, arg_idx++) {
       auto eid = EntryID(outputs_[i]);
 
       if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
         DLTensor* arg = args[arg_idx];
-        data_entry_[eid].CopyTo(arg);
+        NDArray::CopyFromTo(data_entry_[eid], arg);
       } else {
+        CHECK(args[arg_idx].IsObjectRef<NDArray>());
         NDArray arg = args[arg_idx];
-        data_entry_[eid].CopyTo(arg);
+        arg.CopyFrom(data_entry_[eid]);
       }
-      arg_idx++;
     }
   }
 
@@ -176,16 +180,26 @@ class JSONRuntimeBase : public ModuleNode {
    * \param ctx The context for the pre-allocated buffer.
    */
   void AllocateInputOutputBuffer(const DLContext& ctx) {
-    for (size_t i = 0; i < this->input_nodes_.size(); ++i) {
-      auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0];
-      auto nid = this->input_nodes_[i];
-      this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
+    for (size_t i = 0; i < input_nodes_.size(); ++i) {
+      auto nid = input_nodes_[i];
+      auto shape = nodes_[nid].GetOpShape()[0];
+      auto dtype = nodes_[nid].GetOpDataType()[0];
+      DLTensor* tensor;
+      int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes,
+                              ctx.device_type, ctx.device_id, &tensor);
+      CHECK_EQ(ret, 0) << TVMGetLastError();
+      data_entry_[EntryID(nid, 0)] = tensor;
     }
 
-    for (size_t i = 0; i < this->outputs_.size(); ++i) {
-      auto entry = this->outputs_[i];
-      auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_];
-      this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
+    for (size_t i = 0; i < outputs_.size(); ++i) {
+      auto entry = outputs_[i];
+      auto shape = nodes_[entry.id_].GetOpShape()[entry.index_];
+      auto dtype = nodes_[entry.id_].GetOpDataType()[entry.index_];
+      DLTensor* tensor;
+      int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes,
+                              ctx.device_type, ctx.device_id, &tensor);
+      CHECK_EQ(ret, 0) << TVMGetLastError();
+      data_entry_[EntryID(entry)] = tensor;
     }
   }
 
@@ -232,7 +246,7 @@ class JSONRuntimeBase : public ModuleNode {
   void SetupConstants(const Array<NDArray>& consts) {
     // Initialize consts
     for (size_t i = 0; i < consts.size(); ++i) {
-      data_entry_[const_idx_[i]].CopyFrom(consts[i]);
+      consts[i].CopyTo(data_entry_[const_idx_[i]]);
     }
   }
 
@@ -280,7 +294,7 @@ class JSONRuntimeBase : public ModuleNode {
   /*! \brief Output entries. */
   std::vector<JSONGraphNodeEntry> outputs_;
   /*! \brief Data of that entry. */
-  std::vector<NDArray> data_entry_;
+  std::vector<DLTensor*> data_entry_;
   /*! \brief Map the input name to index. */
   std::vector<uint32_t> input_var_idx_;
   /*! \brief input const index. */

From d8929a67f9e308336cc8d6e6aa241fb78a131ef8 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Tue, 23 Jun 2020 19:11:13 +0000
Subject: [PATCH 19/30] add unit test

---
 tests/python/relay/test_json_runtime.py | 61 ++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index cdba3c92a69f..26a418864d3a 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -95,6 +95,7 @@ def check_graph_runtime_result():
 
 
 def test_conv2d():
+    """Test a subgraph with a single conv2d operator."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -168,6 +169,7 @@ def group_conv2d():
 
 
 def test_add():
+    """Test a subgraph with a single add operator."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -208,6 +210,7 @@ def gen_add():
 
 
 def test_relu():
+    """Test a subgraph with a single ReLU operator."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -244,6 +247,7 @@ def gen_relu():
 
 
 def test_dense():
+    """Test a subgraph with a single dense operator."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -285,6 +289,7 @@ def gen_dense():
 
 
 def test_bn():
+    """Test a subgraph with a single batch_norm operator."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -350,6 +355,7 @@ def gen_bn():
 
 
 def test_multiple_ops():
+    """Test a subgraph with multiple operators."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -405,6 +411,7 @@ def get_partitoned_mod(mod):
 
 
 def test_composite():
+    """Test DNNL patterns and there composite functions."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -510,6 +517,7 @@ def conv2d_bias_relu():
 
 
 def test_constant():
+    """Test the subgraph with (var, const, ...) arguments."""
     if not tvm.get_global_func("relay.ext.dnnl", True):
         print("skip because DNNL codegen is not available")
         return
@@ -548,14 +556,62 @@ def test_constant():
         transform.PartitionGraph()
     ])
 
-    with tvm.transform.PassContext(opt_level=3,
-                                    disabled_pass=["AlterOpLayout"]):
+    with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
         ref_mod = remove_bn_pass(ref_mod)
         mod = composite_partition(ref_mod)
 
     i_data = np.random.uniform(0, 1, ishape).astype(dtype)
     check_result(mod, ref_mod, {'data': i_data}, (1, 32, 14, 14), tol=1e-5)
 
+def test_partial_constant():
+    """Test the subgraph with (const, var, const, var) arguments."""
+    if not tvm.get_global_func("relay.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
+    dtype = 'float32'
+    ishape = (10, 10)
+
+    in_1 = relay.var("in_1", shape=ishape, dtype=dtype)
+    in_2 = relay.var("in_2", shape=ishape, dtype=dtype)
+    in_3 = relay.var("in_3", shape=ishape, dtype=dtype)
+    in_4 = relay.var("in_4", shape=ishape, dtype=dtype)
+
+    add1 = relay.add(in_1, in_2)
+    add2 = relay.add(add1, in_3)
+    add3 = relay.add(add2, in_3)
+    add4 = relay.add(add3, in_3)
+
+    func = relay.Function([in_1, in_2, in_3, in_4], add4)
+    ref_mod = tvm.IRModule.from_expr(func)
+    ref_mod = relay.transform.InferType()(ref_mod)
+
+    data1 = np.random.uniform(0, 1, ishape).astype(dtype)
+    data3 = np.random.uniform(0, 1, ishape).astype(dtype)
+
+    params = {
+        'in_1': tvm.nd.array(data1, ctx=tvm.cpu(0)),
+        'in_3': tvm.nd.array(data3, ctx=tvm.cpu(0))
+    }
+    ref_mod["main"] = bind_params_by_name(ref_mod["main"], params)
+
+    opt_pass = tvm.transform.Sequential([
+        transform.InferType(),
+        transform.SimplifyInference(),
+        transform.FoldConstant(),
+        transform.FoldScaleAxis(),
+        transform.AnnotateTarget("dnnl"),
+        transform.MergeCompilerRegions(),
+        transform.PartitionGraph()
+    ])
+
+    with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
+        mod = opt_pass(ref_mod)
+
+    data2 = np.random.uniform(0, 1, ishape).astype(dtype)
+    data4 = np.random.uniform(0, 1, ishape).astype(dtype)
+    check_result(mod, ref_mod, {'in_2': data2, 'in_4': data4}, (10, 10), tol=1e-5)
+
 
 if __name__ == "__main__":
     test_conv2d()
@@ -566,3 +622,4 @@ def test_constant():
     test_multiple_ops()
     test_composite()
     test_constant()
+    test_partial_constant()

From 5e34210a24f4fd70f02998192c5fb36e25089e72 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Tue, 23 Jun 2020 23:50:05 +0000
Subject: [PATCH 20/30] move json to contrib/json

---
 cmake/modules/JSON.cmake                      |  2 +-
 .../contrib/codegen_json/codegen_json.h       |  4 +-
 src/relay/backend/contrib/dnnl/codegen.cc     |  2 +-
 src/relay/backend/graph_runtime_codegen.cc    | 66 +++++++++++++++++-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc |  9 +--
 src/runtime/{ => contrib}/json/json_node.h    | 69 -------------------
 src/runtime/{ => contrib}/json/json_runtime.h |  0
 7 files changed, 69 insertions(+), 83 deletions(-)
 rename src/runtime/{ => contrib}/json/json_node.h (78%)
 rename src/runtime/{ => contrib}/json/json_runtime.h (100%)

diff --git a/cmake/modules/JSON.cmake b/cmake/modules/JSON.cmake
index bfc07bb1b13a..16a6e59697bc 100644
--- a/cmake/modules/JSON.cmake
+++ b/cmake/modules/JSON.cmake
@@ -17,6 +17,6 @@
 
 if(USE_JSON_RUNTIME)
   message(STATUS "Build with JSON runtime support")
-  file(GLOB RUNTIME_JSON_SRCS src/runtime/json/*.cc)
+  file(GLOB RUNTIME_JSON_SRCS src/runtime/contrib/json/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_JSON_SRCS})
 endif(USE_JSON_RUNTIME)
diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 97dc2b333ca5..1cc26014501b 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -35,8 +35,8 @@
 #include <string>
 #include <vector>
 
-#include "../../../../runtime/json/json_node.h"
-#include "../../../../runtime/json/json_runtime.h"
+#include "../../../../runtime/contrib/json/json_node.h"
+#include "../../../../runtime/contrib/json/json_runtime.h"
 #include "../../utils.h"
 
 namespace tvm {
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 67dba2969923..c5dc6a520f14 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -33,7 +33,7 @@
 #include <numeric>
 #include <sstream>
 
-#include "../../../../runtime/json/json_node.h"
+#include "../../../../runtime/contrib/json/json_node.h"
 #include "../../utils.h"
 #include "../codegen_c/codegen_c.h"
 #include "../codegen_json/codegen_json.h"
diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc
index 16f95a1b79df..b5024d55633c 100644
--- a/src/relay/backend/graph_runtime_codegen.cc
+++ b/src/relay/backend/graph_runtime_codegen.cc
@@ -28,15 +28,12 @@
 #include <tvm/relay/expr_functor.h>
 #include <tvm/runtime/device_api.h>
 
-#include <cstdint>
-#include <cstdio>
 #include <list>
 #include <string>
 #include <vector>
 
 #include "compile_engine.h"
 #include "utils.h"
-#include "../../runtime/json/json_node.h"
 
 namespace tvm {
 namespace relay {
@@ -628,6 +625,12 @@ TVM_REGISTER_GLOBAL("relay.build_module._GraphRuntimeCodegen")
 
 namespace dmlc {
 namespace json {
+// JSON utils
+template <typename T>
+inline bool SameType(const dmlc::any& data) {
+  return std::type_index(data.type()) == std::type_index(typeid(T));
+}
+
 template <>
 struct Handler<std::shared_ptr<tvm::relay::backend::GraphNode>> {
   inline static void Write(dmlc::JSONWriter* writer,
@@ -639,5 +642,62 @@ struct Handler<std::shared_ptr<tvm::relay::backend::GraphNode>> {
     LOG(FATAL) << "Not implemented.";
   }
 };
+template <>
+struct Handler<std::unordered_map<std::string, dmlc::any>> {
+  inline static void Write(dmlc::JSONWriter* writer,
+                           const std::unordered_map<std::string, dmlc::any>& data) {
+    writer->BeginObject();
+    for (const auto& kv : data) {
+      auto k = kv.first;
+      const dmlc::any& v = kv.second;
+      if (SameType<std::string>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::string>(v));
+      } else if (SameType<int>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<int>(v));
+      } else if (SameType<std::vector<size_t>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<size_t>>(v));
+      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::vector<int64_t>>>(v));
+      } else if (SameType<std::vector<std::string>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::string>>(v));
+      } else if (SameType<std::vector<dmlc::any>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<dmlc::any>>(v));
+      } else {
+        LOG(FATAL) << "Not supported";
+      }
+    }
+    writer->EndObject();
+  }
+  inline static void Read(dmlc::JSONReader* reader,
+                          std::unordered_map<std::string, dmlc::any>* data) {
+    LOG(FATAL) << "Not implemented.";
+  }
+};
+
+template <>
+struct Handler<std::vector<dmlc::any>> {
+  inline static void Write(dmlc::JSONWriter* writer, const std::vector<dmlc::any>& data) {
+    writer->BeginArray();
+    for (const auto& v : data) {
+      if (SameType<std::string>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::string>(v));
+      } else if (SameType<int>(v)) {
+        writer->WriteArrayItem(dmlc::get<int>(v));
+      } else if (SameType<std::vector<size_t>>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::vector<size_t>>(v));
+      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::vector<std::vector<int64_t>>>(v));
+      } else if (SameType<std::vector<std::string>>(v)) {
+        writer->WriteArrayItem(dmlc::get<std::vector<std::string>>(v));
+      } else {
+        LOG(FATAL) << "Not supported";
+      }
+    }
+    writer->EndArray();
+  }
+  inline static void Read(dmlc::JSONReader* reader, std::vector<dmlc::any>* data) {
+    LOG(FATAL) << "Not implemented.";
+  }
+};
 }  // namespace json
 }  // namespace dmlc
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 21d0dd66e2b2..bd425439bd2c 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -29,8 +29,8 @@
 #include <string>
 #include <vector>
 
-#include "../../json/json_node.h"
-#include "../../json/json_runtime.h"
+#include "../json/json_node.h"
+#include "../json/json_runtime.h"
 #include "dnnl.hpp"
 
 namespace tvm {
@@ -207,11 +207,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
         SW = std::stoi(str_strides[0]),         // weight-wise stride
         OH = (IH - KH + PH_L + PH_R) / SH + 1,  // output height
         OW = (IW - KW + PW_L + PW_R) / SW + 1;  // output width
-    // std::cerr << N << ", " << IC << ", " << IH << ", " << IW << "\n";
-    // std::cerr << OC << ", " << IC << ", " << KH << ", " << KW << "\n";
-    // std::cerr << PH_L << ", " << PH_R << ", " << PW_L << ", " << PW_R << "\n";
-    // std::cerr << SH << ", " << SW << "\n";
-    // std::cerr << OH << ", " << OW << "\n";
 
     // Memory shapes.
     dnnl::memory::dims src_dims = {N, IC, IH, IW};
diff --git a/src/runtime/json/json_node.h b/src/runtime/contrib/json/json_node.h
similarity index 78%
rename from src/runtime/json/json_node.h
rename to src/runtime/contrib/json/json_node.h
index fd45ab226e19..035394078bd2 100644
--- a/src/runtime/json/json_node.h
+++ b/src/runtime/contrib/json/json_node.h
@@ -317,12 +317,6 @@ class JSONGraphNode {
 
 namespace dmlc {
 namespace json {
-// JSON utils
-template <typename T>
-inline bool SameType(const dmlc::any& data) {
-  return std::type_index(data.type()) == std::type_index(typeid(T));
-}
-
 template <>
 struct Handler<std::shared_ptr<tvm::runtime::json::JSONGraphNode>> {
   inline static void Write(
@@ -336,69 +330,6 @@ struct Handler<std::shared_ptr<tvm::runtime::json::JSONGraphNode>> {
     (*data)->Load(reader);
   }
 };
-
-template <>
-struct Handler<std::vector<dmlc::any>> {
-  inline static void Write(dmlc::JSONWriter* writer,
-                           const std::vector<dmlc::any>& data) {
-    writer->BeginArray();
-    for (const auto& v : data) {
-      if (SameType<std::string>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::string>(v));
-      } else if (SameType<int>(v)) {
-        writer->WriteArrayItem(dmlc::get<int>(v));
-      } else if (SameType<std::vector<size_t>>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::vector<size_t>>(v));
-      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::vector<std::vector<int64_t>>>(v));
-      } else if (SameType<std::vector<std::string>>(v)) {
-        writer->WriteArrayItem(dmlc::get<std::vector<std::string>>(v));
-      } else {
-        LOG(FATAL) << "Not supported";
-      }
-    }
-    writer->EndArray();
-  }
-
-  inline static void Read(dmlc::JSONReader* reader,
-                          std::vector<dmlc::any>* data) {
-    LOG(FATAL) << "Not implemented.";
-  }
-};
-
-template <>
-struct Handler<std::unordered_map<std::string, dmlc::any>> {
-  inline static void Write(dmlc::JSONWriter* writer,
-                           const std::unordered_map<std::string, dmlc::any>& data) {
-    writer->BeginObject();
-    for (const auto& kv : data) {
-      auto k = kv.first;
-      const dmlc::any& v = kv.second;
-      if (SameType<std::string>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::string>(v));
-      } else if (SameType<int>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<int>(v));
-      } else if (SameType<std::vector<size_t>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<size_t>>(v));
-      } else if (SameType<std::vector<std::vector<int64_t>>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::vector<int64_t>>>(v));
-      } else if (SameType<std::vector<std::string>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<std::string>>(v));
-      } else if (SameType<std::vector<dmlc::any>>(v)) {
-        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<dmlc::any>>(v));
-      } else {
-        LOG(FATAL) << "Not supported";
-      }
-    }
-    writer->EndObject();
-  }
-
-  inline static void Read(dmlc::JSONReader* reader,
-                          std::unordered_map<std::string, dmlc::any>* data) {
-    LOG(FATAL) << "Not implemented.";
-  }
-};
-
 }  // namespace json
 }  // namespace dmlc
 
diff --git a/src/runtime/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h
similarity index 100%
rename from src/runtime/json/json_runtime.h
rename to src/runtime/contrib/json/json_runtime.h

From ade0d27d37c44a7e2f73029fe044d2f9ccfba2d8 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Wed, 24 Jun 2020 01:02:09 +0000
Subject: [PATCH 21/30] fix cmake

---
 CMakeLists.txt                                |  2 --
 cmake/modules/JSON.cmake                      | 22 ------------
 cmake/modules/contrib/DNNL.cmake              | 13 +++----
 .../contrib/codegen_json/codegen_json.h       | 36 ++++++-------------
 src/relay/backend/contrib/dnnl/codegen.cc     | 24 +++++++++----
 src/runtime/contrib/json/json_node.h          | 25 ++++++-------
 src/runtime/contrib/json/json_runtime.h       |  6 ++--
 tests/python/relay/test_json_runtime.py       | 18 +++++-----
 8 files changed, 55 insertions(+), 91 deletions(-)
 delete mode 100644 cmake/modules/JSON.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c0fa1cbf4230..aaddebdfe3c5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,7 +70,6 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF)
 tvm_option(USE_TFLITE "Build with tflite support" OFF)
 tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none)
 tvm_option(USE_COREML "Build with coreml support" OFF)
-tvm_option(USE_JSON_RUNTIME "Build with JSON runtime" OFF)
 
 if(USE_CPP_RPC AND UNIX)
   message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.")
@@ -306,7 +305,6 @@ if(USE_EXAMPLE_EXT_RUNTIME)
 endif(USE_EXAMPLE_EXT_RUNTIME)
 
 # Module rules
-include(cmake/modules/JSON.cmake)
 include(cmake/modules/VTA.cmake)
 include(cmake/modules/CUDA.cmake)
 include(cmake/modules/Hexagon.cmake)
diff --git a/cmake/modules/JSON.cmake b/cmake/modules/JSON.cmake
deleted file mode 100644
index 16a6e59697bc..000000000000
--- a/cmake/modules/JSON.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(USE_JSON_RUNTIME)
-  message(STATUS "Build with JSON runtime support")
-  file(GLOB RUNTIME_JSON_SRCS src/runtime/contrib/json/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_JSON_SRCS})
-endif(USE_JSON_RUNTIME)
diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake
index 8b863a0b764b..85a6bea0ede1 100644
--- a/cmake/modules/contrib/DNNL.cmake
+++ b/cmake/modules/contrib/DNNL.cmake
@@ -15,29 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-if(USE_DNNL_CODEGEN STREQUAL "ON")
-  if(USE_JSON_RUNTIME STREQUAL "OFF")
-      message(FATAL_ERROR "USE_JSON_RUNTIME must be ON to use DNNL JSON RUNTIME")
-  endif()
-
+if((USE_DNNL_CODEGEN STREQUAL "ON") OR (USE_DNNL_CODEGEN STREQUAL "JSON"))
+  add_definitions(-DUSE_JSON_RUNTIME=1)
   file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
-  file(GLOB JSON_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.h)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
   list(APPEND COMPILER_SRCS ${JSON_RELAY_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
   list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
-  file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*)
+  file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc)
   list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
   message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
 elseif(USE_DNNL_CODEGEN STREQUAL "C_SRC")
-  add_definitions(-DDNNL_WITH_C_SOURCE_MODULE=1)
   file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
   list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
-  file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*)
+  file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc)
   list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
   message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL})
 endif()
diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 1cc26014501b..8300cbedb38e 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -26,10 +26,10 @@
 
 #include <dmlc/any.h>
 #include <dmlc/json.h>
-#include <tvm/tir/op.h>
 #include <tvm/node/container.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/container.h>
+#include <tvm/tir/op.h>
 
 #include <cstdint>
 #include <string>
@@ -46,7 +46,7 @@ namespace contrib {
 
 using namespace tvm::runtime::json;
 
-using ShapeVector = std::vector<std::vector<int64_t> >;
+using ShapeVector = std::vector<std::vector<int64_t>>;
 using TypeVector = std::vector<std::string>;
 using JSONGraphObjectPtr = std::shared_ptr<JSONGraphNode>;
 
@@ -58,8 +58,7 @@ class OpAttrExtractor : public AttrVisitor {
  public:
   explicit OpAttrExtractor(JSONGraphObjectPtr node) : node_(node) {}
 
-  template <typename T = double,
-            typename = std::enable_if_t<std::is_floating_point<T>::value>>
+  template <typename T = double, typename = std::enable_if_t<std::is_floating_point<T>::value>>
   std::string Fp2String(const T value, int n = 16) {
     std::ostringstream out;
     out.precision(n);
@@ -73,29 +72,17 @@ class OpAttrExtractor : public AttrVisitor {
     node_->SetAttr(key, attr);
   }
 
-  void Visit(const char* key, double* value) final {
-    SetNodeAttr(key, {Fp2String(*value)});
-  }
+  void Visit(const char* key, double* value) final { SetNodeAttr(key, {Fp2String(*value)}); }
 
-  void Visit(const char* key, int64_t* value) final {
-    SetNodeAttr(key, {std::to_string(*value)});
-  }
+  void Visit(const char* key, int64_t* value) final { SetNodeAttr(key, {std::to_string(*value)}); }
 
-  void Visit(const char* key, uint64_t* value) final {
-    SetNodeAttr(key, {std::to_string(*value)});
-  }
+  void Visit(const char* key, uint64_t* value) final { SetNodeAttr(key, {std::to_string(*value)}); }
 
-  void Visit(const char* key, int* value) final {
-    SetNodeAttr(key, {std::to_string(*value)});
-  }
+  void Visit(const char* key, int* value) final { SetNodeAttr(key, {std::to_string(*value)}); }
 
-  void Visit(const char* key, bool* value) final {
-    SetNodeAttr(key, {std::to_string(*value)});
-  }
+  void Visit(const char* key, bool* value) final { SetNodeAttr(key, {std::to_string(*value)}); }
 
-  void Visit(const char* key, std::string* value) final {
-    SetNodeAttr(key, {*value});
-  }
+  void Visit(const char* key, std::string* value) final { SetNodeAttr(key, {*value}); }
 
   void Visit(const char* key, DataType* value) final {
     if (!value->is_void()) {
@@ -292,10 +279,9 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
       auto res = VisitExpr(arg);
       inputs.insert(inputs.end(), res.begin(), res.end());
     }
-    auto node = std::make_shared<JSONGraphNode>(name, /* name_ */
+    auto node = std::make_shared<JSONGraphNode>(name,     /* name_ */
                                                 "kernel", /* op_type_ */
-                                                inputs,
-                                                1 /* num_outputs_ */);
+                                                inputs, 1 /* num_outputs_ */);
     SetCallNodeAttribute(node, cn);
     return AddNode(node, GetRef<Expr>(cn));
   }
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index c5dc6a520f14..9d4041af6395 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -33,18 +33,22 @@
 #include <numeric>
 #include <sstream>
 
-#include "../../../../runtime/contrib/json/json_node.h"
 #include "../../utils.h"
-#include "../codegen_c/codegen_c.h"
+
+#ifdef USE_JSON_RUNTIME
+#include "../../../../runtime/contrib/json/json_node.h"
 #include "../codegen_json/codegen_json.h"
+#else
+#include "../codegen_c/codegen_c.h"
+#endif
 
 namespace tvm {
 namespace relay {
 namespace contrib {
 
 using namespace backend;
-using namespace tvm::runtime::json;
 
+#ifndef USE_JSON_RUNTIME  // C source runtime
 inline size_t GetShape1DSize(const Type& type) {
   const auto shape = GetShape(type);
   return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
@@ -410,7 +414,6 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     const auto* pf = runtime::Registry::Get("runtime.CSourceModuleCreate");
     CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
     return (*pf)(code, "c", sym, variables);
-    std::cout << code_stream_.str();
   }
 
  private:
@@ -421,7 +424,12 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
   std::ostringstream code_stream_;
 };
 
+#else  // DNNL JSON runtime
+
 class DNNLJSONSerializer : public backend::contrib::JSONSerializer {
+  using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+  using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry;
+
  public:
   DNNLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
 
@@ -473,14 +481,14 @@ std::string GetExtSymbol(const Function& func) {
   CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
   return std::string(name_node.value());
 }
+#endif
 
 /*!
  * \brief The external compiler/codegen tool. It takes a Relay expression/module and
  * compile it into a runtime module.
  */
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
-  // DNNLModuleCodegen dnnl;
-  // return dnnl.CreateCSourceModule(ref);
+#ifdef USE_JSON_RUNTIME
   CHECK(ref->IsInstance<FunctionNode>());
   auto func = Downcast<Function>(ref);
   auto func_name = GetExtSymbol(func);
@@ -493,6 +501,10 @@ runtime::Module DNNLCompiler(const ObjectRef& ref) {
   CHECK(pf != nullptr) << "Cannot find JSON runtime module to create";
   auto mod = (*pf)(func_name, graph_json, params);
   return mod;
+#else
+  DNNLModuleCodegen dnnl;
+  return dnnl.CreateCSourceModule(ref);
+#endif
 }
 
 TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler);
diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h
index 035394078bd2..6bae5c5d6bf2 100644
--- a/src/runtime/contrib/json/json_node.h
+++ b/src/runtime/contrib/json/json_node.h
@@ -22,8 +22,8 @@
  * \brief The graph nodes used by JSON runtime.
  */
 
-#ifndef TVM_RUNTIME_JSON_JSON_NODE_H_
-#define TVM_RUNTIME_JSON_JSON_NODE_H_
+#ifndef TVM_RUNTIME_CONTRIB_JSON_JSON_NODE_H_
+#define TVM_RUNTIME_CONTRIB_JSON_JSON_NODE_H_
 
 #include <dlpack/dlpack.h>
 #include <dmlc/json.h>
@@ -32,10 +32,10 @@
 
 #include <cstdint>
 #include <cstdio>
+#include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include <string>
 
 namespace tvm {
 namespace runtime {
@@ -52,7 +52,7 @@ class JSONGraphNodeEntry {
   // Constructors.
   JSONGraphNodeEntry() = default;
   JSONGraphNodeEntry(int id, int index, int version = 0)
-    : id_(id), index_(index), version_(version) {}
+      : id_(id), index_(index), version_(version) {}
 
   /*!
    * \brief Serialize a node entry.
@@ -99,10 +99,8 @@ class JSONGraphNode {
  public:
   // Constructors.
   JSONGraphNode() = default;
-  JSONGraphNode(const std::string& name,
-                const std::string& op_type,
-                const std::vector<JSONGraphNodeEntry>& inputs = {},
-                size_t num_outputs = 1) {
+  JSONGraphNode(const std::string& name, const std::string& op_type,
+                const std::vector<JSONGraphNodeEntry>& inputs = {}, size_t num_outputs = 1) {
     name_ = name;
     op_type_ = op_type;
     num_inputs_ = inputs.size();
@@ -245,9 +243,7 @@ class JSONGraphNode {
    *
    * \param num_outputs The number of output.
    */
-  void SetNumOutput(uint32_t num_outputs) {
-    num_outputs_ = num_outputs;
-  }
+  void SetNumOutput(uint32_t num_outputs) { num_outputs_ = num_outputs; }
 
   /*!
    * \brief Get the value of an attribute in the node.
@@ -319,9 +315,8 @@ namespace dmlc {
 namespace json {
 template <>
 struct Handler<std::shared_ptr<tvm::runtime::json::JSONGraphNode>> {
-  inline static void Write(
-      dmlc::JSONWriter* writer,
-      const std::shared_ptr<tvm::runtime::json::JSONGraphNode>& data) {
+  inline static void Write(dmlc::JSONWriter* writer,
+                           const std::shared_ptr<tvm::runtime::json::JSONGraphNode>& data) {
     data->Save(writer);
   }
 
@@ -333,4 +328,4 @@ struct Handler<std::shared_ptr<tvm::runtime::json::JSONGraphNode>> {
 }  // namespace json
 }  // namespace dmlc
 
-#endif  // TVM_RUNTIME_JSON_JSON_NODE_H_
+#endif  // TVM_RUNTIME_CONTRIB_JSON_JSON_NODE_H_
diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h
index 96d569730c97..dec9abf378f9 100644
--- a/src/runtime/contrib/json/json_runtime.h
+++ b/src/runtime/contrib/json/json_runtime.h
@@ -22,8 +22,8 @@
  * \brief Utilities for json runtime.
  */
 
-#ifndef TVM_RUNTIME_JSON_JSON_RUNTIME_H_
-#define TVM_RUNTIME_JSON_JSON_RUNTIME_H_
+#ifndef TVM_RUNTIME_CONTRIB_JSON_JSON_RUNTIME_H_
+#define TVM_RUNTIME_CONTRIB_JSON_JSON_RUNTIME_H_
 
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/module.h>
@@ -304,4 +304,4 @@ class JSONRuntimeBase : public ModuleNode {
 }  // namespace json
 }  // namespace runtime
 }  // namespace tvm
-#endif  // TVM_RUNTIME_JSON_JSON_RUNTIME_H_
+#endif  // TVM_RUNTIME_CONTRIB_JSON_JSON_RUNTIME_H_
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index 26a418864d3a..d15468c6a942 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -96,7 +96,7 @@ def check_graph_runtime_result():
 
 def test_conv2d():
     """Test a subgraph with a single conv2d operator."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -170,7 +170,7 @@ def group_conv2d():
 
 def test_add():
     """Test a subgraph with a single add operator."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -211,7 +211,7 @@ def gen_add():
 
 def test_relu():
     """Test a subgraph with a single ReLU operator."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -248,7 +248,7 @@ def gen_relu():
 
 def test_dense():
     """Test a subgraph with a single dense operator."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -290,7 +290,7 @@ def gen_dense():
 
 def test_bn():
     """Test a subgraph with a single batch_norm operator."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -356,7 +356,7 @@ def gen_bn():
 
 def test_multiple_ops():
     """Test a subgraph with multiple operators."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -412,7 +412,7 @@ def get_partitoned_mod(mod):
 
 def test_composite():
     """Test DNNL patterns and there composite functions."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -518,7 +518,7 @@ def conv2d_bias_relu():
 
 def test_constant():
     """Test the subgraph with (var, const, ...) arguments."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 
@@ -565,7 +565,7 @@ def test_constant():
 
 def test_partial_constant():
     """Test the subgraph with (const, var, const, var) arguments."""
-    if not tvm.get_global_func("relay.ext.dnnl", True):
+    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
         print("skip because DNNL codegen is not available")
         return
 

From 3ec8b841dac0950de578c673204eef9572459fa1 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Wed, 24 Jun 2020 16:14:28 +0000
Subject: [PATCH 22/30] lint

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index bd425439bd2c..ce422c3543d3 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -97,7 +97,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     // Fill in the input buffers.
     for (size_t i = 0; i < input_nodes_.size(); ++i) {
       auto eid = EntryID(input_nodes_[i], 0);
-      // TODO: Support other data lengths.
+      // TODO(@comanic): Support other data lengths.
       size_t offset_in_bytes = entry_out_mem_[eid].second * 4;
       size_t buffer_size = GetDataSize(*data_entry_[eid]);
       write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size,
@@ -169,9 +169,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     auto eid = EntryID(entry);
     // Since the DNNL memory has been created before calling this function, we assume the entry
     // has not yet been bind to the other DNNL memory; otherwise it may have memory leak.
-    CHECK(entry_out_mem_.count(eid) == 0);
+    CHECK_EQ(entry_out_mem_.count(eid), 0);
 
-    // TODO: Support other data types (i.e., int8).
+    // TODO(@comanic): Support other data types (i.e., int8).
     auto data_node = nodes_[entry.id_];
     auto dltype = data_node.GetOpDataType()[entry.index_];
     CHECK_EQ(dltype.bits, 32);
@@ -425,14 +425,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   inline void read_from_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size,
                                     size_t offset = 0) {
     uint8_t* src = static_cast<uint8_t*>(mem.get_data_handle());
-    std::copy(src + offset, src + offset + size, (uint8_t*)handle);
+    std::copy(src + offset, src + offset + size, static_cast<uint8_t*>(handle));
   }
 
   // Read from the handle and write to DNNL memory (+offset).
-  inline void write_to_dnnl_memory(void* handle, dnnl::memory& mem, size_t size,
+  inline void write_to_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size,
                                    size_t offset = 0) {
     uint8_t* dst = static_cast<uint8_t*>(mem.get_data_handle());
-    std::copy((uint8_t*)handle, (uint8_t*)handle + size, dst + offset);
+    std::copy(reinterpret_cast<uint8_t*>(handle), reinterpret_cast<uint8_t*>(handle) + size,
+              dst + offset);
   }
 
   // Generate DNNL memory description and infer the data layout by the given shape.

From 8535921c80cb5c1e6ee5afa057668197d2f452e0 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Wed, 24 Jun 2020 16:38:24 +0000
Subject: [PATCH 23/30] max_digits10 for fp serialization

---
 src/relay/backend/contrib/codegen_json/codegen_json.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 8300cbedb38e..1e643e561613 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -32,6 +32,7 @@
 #include <tvm/tir/op.h>
 
 #include <cstdint>
+#include <limits>
 #include <string>
 #include <vector>
 
@@ -59,9 +60,9 @@ class OpAttrExtractor : public AttrVisitor {
   explicit OpAttrExtractor(JSONGraphObjectPtr node) : node_(node) {}
 
   template <typename T = double, typename = std::enable_if_t<std::is_floating_point<T>::value>>
-  std::string Fp2String(const T value, int n = 16) {
+  std::string Fp2String(const T value) {
     std::ostringstream out;
-    out.precision(n);
+    out.precision(std::numeric_limits<T>::max_digits10);
     out << value;
     return out.str();
   }

From 20786071455798d58557903886e662107ea0e380 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Wed, 24 Jun 2020 16:55:55 +0000
Subject: [PATCH 24/30] only keep base getfunction

---
 .../contrib/codegen_json/codegen_json.h       |  2 +-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 28 -------------------
 src/runtime/contrib/json/json_runtime.h       | 21 ++++++++++++++
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 1e643e561613..bb8ea0c6162e 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -119,7 +119,7 @@ class OpAttrExtractor : public AttrVisitor {
       String s = GetRef<String>(str);
       SetNodeAttr(key, std::vector<std::string>{s.operator std::string()});
     } else {
-      LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey() << ": " << *value;
+      LOG(FATAL) << "Not yet supported type: " << (*value)->GetTypeKey() << ": " << *value;
     }
   }
 
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index ce422c3543d3..87eb74490dc3 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -51,30 +51,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
 
   const char* type_key() const { return "dnnl_json"; }
 
-  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
-    if (this->symbol_name_ == name) {
-      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        CHECK(this->initialized_) << "The module has not been initialized";
-
-        // Set inputs.
-        SetInputs(args);
-        // Execute the subgraph.
-        this->Run();
-        // Copy result to output buffer.
-        GetOutput(args);
-      });
-    } else if ("__init_" + this->symbol_name_ == name) {
-      // The function to initialize constant tensors.
-      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        CHECK_EQ(args.size(), 1U);
-        this->Init(args[0]);
-        *rv = 0;
-      });
-    } else {
-      return JSONRuntimeBase::GetFunction(name, sptr_to_self);
-    }
-  }
-
   void Init(const Array<NDArray>& consts) override {
     BuildEngine();
 
@@ -89,8 +65,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
 
     // Setup constants entries for weights.
     SetupConstants(consts);
-
-    initialized_ = true;
   }
 
   void Run() override {
@@ -469,8 +443,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
   std::vector<std::unordered_map<int, dnnl::memory>> net_args_;
   /* The entry ID to its corresponding output memory. */
   std::unordered_map<uint32_t, std::pair<dnnl::memory, size_t>> entry_out_mem_;
-  /* Indicate if the DNNL engine has been initialized. */
-  bool initialized_{false};
 };
 
 runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json,
diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h
index dec9abf378f9..d716929478f7 100644
--- a/src/runtime/contrib/json/json_runtime.h
+++ b/src/runtime/contrib/json/json_runtime.h
@@ -76,6 +76,25 @@ class JSONRuntimeBase : public ModuleNode {
     } else if (name == "get_const_vars") {
       return PackedFunc(
           [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
+    } else if (this->symbol_name_ == name) {
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        CHECK(this->initialized_) << "The module has not been initialized";
+
+        // Set inputs.
+        this->SetInputs(args);
+        // Execute the subgraph.
+        this->Run();
+        // Copy result to output buffer.
+        this->GetOutput(args);
+      });
+    } else if ("__init_" + this->symbol_name_ == name) {
+      // The function to initialize constant tensors.
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        CHECK_EQ(args.size(), 1U);
+        this->Init(args[0]);
+        this->initialized_ = true;
+        *rv = 0;
+      });
     } else {
       return PackedFunc(nullptr);
     }
@@ -299,6 +318,8 @@ class JSONRuntimeBase : public ModuleNode {
   std::vector<uint32_t> input_var_idx_;
   /*! \brief input const index. */
   std::vector<uint32_t> const_idx_;
+  /* Indicate if the engine has been initialized. */
+  bool initialized_{false};
 };
 
 }  // namespace json

From 7a78ff8c9ae054fe936731a4fa3158b404a29778 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Wed, 24 Jun 2020 18:11:45 +0000
Subject: [PATCH 25/30] fix lint

---
 src/relay/backend/contrib/codegen_json/codegen_json.h | 1 +
 src/runtime/contrib/json/json_node.h                  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index bb8ea0c6162e..b9cba3656d80 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -33,6 +33,7 @@
 
 #include <cstdint>
 #include <limits>
+#include <memory>
 #include <string>
 #include <vector>
 
diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h
index 6bae5c5d6bf2..53c36b383ea2 100644
--- a/src/runtime/contrib/json/json_node.h
+++ b/src/runtime/contrib/json/json_node.h
@@ -32,6 +32,7 @@
 
 #include <cstdint>
 #include <cstdio>
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <utility>

From c5af62d800f8db1f5b39ba752dc3cec5fc5175eb Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Thu, 25 Jun 2020 00:26:51 +0000
Subject: [PATCH 26/30] zero copy for all data entries

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc |  8 +-
 src/runtime/contrib/json/json_runtime.h       | 93 ++++---------------
 2 files changed, 17 insertions(+), 84 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index 87eb74490dc3..c2747d145857 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -57,12 +57,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     CHECK_EQ(consts.size(), const_idx_.size())
         << "The number of input constants must match the number of required.";
 
-    // Pre-allocate buffers on CPU for input and output entries.
-    DLContext ctx;
-    ctx.device_type = static_cast<DLDeviceType>(kDLCPU);
-    ctx.device_id = 0;
-    AllocateInputOutputBuffer(ctx);
-
     // Setup constants entries for weights.
     SetupConstants(consts);
   }
@@ -71,7 +65,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
     // Fill in the input buffers.
     for (size_t i = 0; i < input_nodes_.size(); ++i) {
       auto eid = EntryID(input_nodes_[i], 0);
-      // TODO(@comanic): Support other data lengths.
+      // TODO(@comaniac): Support other data lengths.
       size_t offset_in_bytes = entry_out_mem_[eid].second * 4;
       size_t buffer_size = GetDataSize(*data_entry_[eid]);
       write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size,
diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h
index d716929478f7..da7d8952d150 100644
--- a/src/runtime/contrib/json/json_runtime.h
+++ b/src/runtime/contrib/json/json_runtime.h
@@ -80,12 +80,10 @@ class JSONRuntimeBase : public ModuleNode {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
         CHECK(this->initialized_) << "The module has not been initialized";
 
-        // Set inputs.
-        this->SetInputs(args);
+        // Bind argument tensors to data entries.
+        this->SetInputOutputBuffers(args);
         // Execute the subgraph.
         this->Run();
-        // Copy result to output buffer.
-        this->GetOutput(args);
       });
     } else if ("__init_" + this->symbol_name_ == name) {
       // The function to initialize constant tensors.
@@ -134,16 +132,18 @@ class JSONRuntimeBase : public ModuleNode {
 
  protected:
   /*!
-   * \brief Set up the inputs for inference.
+   * \brief Set up the input and output buffers by binding their DLTensor pointers to the
+   * corresponding data entry.
    *
    * \param args The packed args.
    */
-  void SetInputs(const TVMArgs& args) {
+  void SetInputOutputBuffers(const TVMArgs& args) {
     CHECK_EQ(args.size(), input_var_idx_.size() + outputs_.size())
         << "Found mismatch in the number of provided data entryies and required.";
 
-    for (size_t i = 0; i < input_var_idx_.size(); i++) {
-      auto eid = EntryID(input_var_idx_[i], 0);
+    for (size_t i = 0; i < static_cast<size_t>(args.size()); i++) {
+      auto eid = i < input_var_idx_.size() ? EntryID(input_var_idx_[i], 0)
+                                           : EntryID(outputs_[i - input_var_idx_.size()]);
       CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle)
           << "Expect NDArray or DLTensor as inputs";
 
@@ -155,70 +155,9 @@ class JSONRuntimeBase : public ModuleNode {
         arg = args[i].operator DLTensor*();
       }
 
-      size_t from_size = GetDataSize(*arg);
-      size_t to_size = GetDataSize(*data_entry_[eid]);
-      CHECK_EQ(from_size, to_size);
-
-      if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) {
-        // Zero copy for input because the tensor is managed by the host.
-        data_entry_[eid]->data = arg->data;
-      } else {
-        NDArray::CopyFromTo(arg, data_entry_[eid]);
-      }
-    }
-  }
-
-  /*!
-   * \brief Return the results through packed args.
-   *
-   * \param args The packed args.
-   */
-  void GetOutput(const TVMArgs& args) {
-    // Copy result to output buffer.
-    size_t arg_idx = input_var_idx_.size();
-    CHECK_EQ(args.size(), arg_idx + outputs_.size())
-        << "Found mismatch in the number of provided data entryies and required.";
-
-    for (size_t i = 0; i < outputs_.size(); i++, arg_idx++) {
-      auto eid = EntryID(outputs_[i]);
-
-      if (args[arg_idx].type_code() == kTVMDLTensorHandle) {
-        DLTensor* arg = args[arg_idx];
-        NDArray::CopyFromTo(data_entry_[eid], arg);
-      } else {
-        CHECK(args[arg_idx].IsObjectRef<NDArray>());
-        NDArray arg = args[arg_idx];
-        arg.CopyFrom(data_entry_[eid]);
-      }
-    }
-  }
-
-  /*!
-   * \brief Pre-allocate empty buffers for input and output entries.
-   *
-   * \param ctx The context for the pre-allocated buffer.
-   */
-  void AllocateInputOutputBuffer(const DLContext& ctx) {
-    for (size_t i = 0; i < input_nodes_.size(); ++i) {
-      auto nid = input_nodes_[i];
-      auto shape = nodes_[nid].GetOpShape()[0];
-      auto dtype = nodes_[nid].GetOpDataType()[0];
-      DLTensor* tensor;
-      int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes,
-                              ctx.device_type, ctx.device_id, &tensor);
-      CHECK_EQ(ret, 0) << TVMGetLastError();
-      data_entry_[EntryID(nid, 0)] = tensor;
-    }
-
-    for (size_t i = 0; i < outputs_.size(); ++i) {
-      auto entry = outputs_[i];
-      auto shape = nodes_[entry.id_].GetOpShape()[entry.index_];
-      auto dtype = nodes_[entry.id_].GetOpDataType()[entry.index_];
-      DLTensor* tensor;
-      int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes,
-                              ctx.device_type, ctx.device_id, &tensor);
-      CHECK_EQ(ret, 0) << TVMGetLastError();
-      data_entry_[EntryID(entry)] = tensor;
+      // Assign input/output the NDArray pointers to data entry so that we can directly
+      // read/write host buffers.
+      data_entry_[eid] = arg;
     }
   }
 
@@ -258,14 +197,14 @@ class JSONRuntimeBase : public ModuleNode {
   }
 
   /*!
-   * \brief Set up the constants/weights for inference.
+   * \brief Set up the constants/weights for inference by binding their DLTensor pointer to
+   * the corresponding data entry.
    *
-   * \param consts The constant to be filled.
+   * \param consts A list of constant NDArray to be used.
    */
   void SetupConstants(const Array<NDArray>& consts) {
-    // Initialize consts
     for (size_t i = 0; i < consts.size(); ++i) {
-      consts[i].CopyTo(data_entry_[const_idx_[i]]);
+      data_entry_[const_idx_[i]] = consts[i].operator->();
     }
   }
 
@@ -313,7 +252,7 @@ class JSONRuntimeBase : public ModuleNode {
   /*! \brief Output entries. */
   std::vector<JSONGraphNodeEntry> outputs_;
   /*! \brief Data of that entry. */
-  std::vector<DLTensor*> data_entry_;
+  std::vector<const DLTensor*> data_entry_;
   /*! \brief Map the input name to index. */
   std::vector<uint32_t> input_var_idx_;
   /*! \brief input const index. */

From 5d37beb2ef16c2cde33fbd77acd6a873dac86c34 Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Thu, 25 Jun 2020 15:51:14 +0000
Subject: [PATCH 27/30] address comments

---
 .../contrib/codegen_json/codegen_json.h       |  2 +-
 src/runtime/contrib/json/json_node.h          | 26 +++++++++++++++++++
 src/runtime/contrib/json/json_runtime.h       | 12 ++++-----
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index b9cba3656d80..8bf5fbc013c2 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -206,7 +206,7 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
       node->SetNumOutput(tuple_type->fields.size());
     } else {
       const auto* tensor_type = checked_type.as<TensorTypeNode>();
-      CHECK(tensor_type) << "Expect TensorType, but received: ." << checked_type->GetTypeKey();
+      CHECK(tensor_type) << "Expect TensorType, but received: " << checked_type->GetTypeKey();
       shape.emplace_back(GetIntShape(tensor_type->shape));
       dtype.emplace_back(DType2String(tensor_type->dtype));
       ret.push_back(JSONGraphNodeEntry(node_id, 0));
diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h
index 53c36b383ea2..7468feb21cb1 100644
--- a/src/runtime/contrib/json/json_node.h
+++ b/src/runtime/contrib/json/json_node.h
@@ -314,6 +314,32 @@ class JSONGraphNode {
 
 namespace dmlc {
 namespace json {
+template <typename T>
+inline bool SameType(const dmlc::any& data) {
+  return std::type_index(data.type()) == std::type_index(typeid(T));
+}
+
+template <>
+struct Handler<std::unordered_map<std::string, dmlc::any>> {
+  inline static void Write(dmlc::JSONWriter* writer,
+                           const std::unordered_map<std::string, dmlc::any>& data) {
+    for (const auto& kv : data) {
+      auto k = kv.first;
+      const dmlc::any& v = kv.second;
+      if (SameType<std::vector<dmlc::any>>(v)) {
+        writer->WriteObjectKeyValue(k, dmlc::get<std::vector<dmlc::any>>(v));
+      } else {
+        LOG(FATAL) << "Not supported";
+      }
+    }
+    writer->EndObject();
+  }
+  inline static void Read(dmlc::JSONReader* reader,
+                          std::unordered_map<std::string, dmlc::any>* data) {
+    LOG(FATAL) << "Not implemented";
+  }
+};
+
 template <>
 struct Handler<std::shared_ptr<tvm::runtime::json::JSONGraphNode>> {
   inline static void Write(dmlc::JSONWriter* writer,
diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h
index da7d8952d150..a2769568cf04 100644
--- a/src/runtime/contrib/json/json_runtime.h
+++ b/src/runtime/contrib/json/json_runtime.h
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file src/runtime/json/json_runtime.h
+ * \file src/runtime/contrib/json/json_runtime.h
  * \brief Utilities for json runtime.
  */
 
@@ -222,7 +222,7 @@ class JSONRuntimeBase : public ModuleNode {
       } else if (key == "heads") {
         reader->Read(&outputs_);
       } else {
-        LOG(FATAL) << "Unknow key: " << key;
+        LOG(FATAL) << "Unknown key: " << key;
       }
     }
   }
@@ -237,11 +237,11 @@ class JSONRuntimeBase : public ModuleNode {
   uint32_t NumEntries() const { return node_row_ptr_.back(); }
 
  protected:
-  /* The only subgraph name for this module. */
+  /*! \brief The only subgraph name for this module. */
   std::string symbol_name_;
-  /* The graph. */
+  /*! \brief The graph. */
   std::string graph_json_;
-  /* The required constant names. */
+  /*! \brief The required constant names. */
   Array<String> const_names_;
   /*! \brief The json graph nodes. */
   std::vector<JSONGraphNode> nodes_;
@@ -257,7 +257,7 @@ class JSONRuntimeBase : public ModuleNode {
   std::vector<uint32_t> input_var_idx_;
   /*! \brief input const index. */
   std::vector<uint32_t> const_idx_;
-  /* Indicate if the engine has been initialized. */
+  /*! \brief Indicate if the engine has been initialized. */
   bool initialized_{false};
 };
 

From 872a60d7250b7151602009ed6fa1dd0d9f39522a Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Fri, 26 Jun 2020 18:34:15 +0000
Subject: [PATCH 28/30] enable ci

---
 tests/scripts/task_config_build_cpu.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index ce545bde6609..d64bcab7bc8f 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -29,6 +29,7 @@ echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake
 echo set\(USE_VM_PROFILER ON\) >> config.cmake
 echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
+echo set\(USE_DNNL_CODEGEN ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-10\) >> config.cmake
 echo set\(USE_NNPACK ON\) >> config.cmake
 echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake

From 12312f5672afb7b974b68c8c283c94474853500e Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Mon, 29 Jun 2020 20:08:05 +0000
Subject: [PATCH 29/30] address comment; fix bug

---
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 3 +--
 src/runtime/contrib/json/json_runtime.h       | 6 +++---
 tests/python/relay/test_json_runtime.py       | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index c2747d145857..a79537f46e74 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -441,8 +441,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
 
 runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json,
                                       const Array<String>& const_names) {
-  auto n = make_object<DNNLJSONRuntime>(symbol_name.operator std::string(),
-                                        graph_json.operator std::string(), const_names);
+  auto n = make_object<DNNLJSONRuntime>(symbol_name, graph_json, const_names);
   return runtime::Module(n);
 }
 
diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h
index a2769568cf04..c4f126e8ccba 100644
--- a/src/runtime/contrib/json/json_runtime.h
+++ b/src/runtime/contrib/json/json_runtime.h
@@ -204,7 +204,7 @@ class JSONRuntimeBase : public ModuleNode {
    */
   void SetupConstants(const Array<NDArray>& consts) {
     for (size_t i = 0; i < consts.size(); ++i) {
-      data_entry_[const_idx_[i]] = consts[i].operator->();
+      data_entry_[EntryID(const_idx_[i], 0)] = consts[i].operator->();
     }
   }
 
@@ -253,9 +253,9 @@ class JSONRuntimeBase : public ModuleNode {
   std::vector<JSONGraphNodeEntry> outputs_;
   /*! \brief Data of that entry. */
   std::vector<const DLTensor*> data_entry_;
-  /*! \brief Map the input name to index. */
+  /*! \brief Map the input name to node index. */
   std::vector<uint32_t> input_var_idx_;
-  /*! \brief input const index. */
+  /*! \brief input const node index. */
   std::vector<uint32_t> const_idx_;
   /*! \brief Indicate if the engine has been initialized. */
   bool initialized_{false};
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index d15468c6a942..a886692c5838 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -53,7 +53,7 @@ def check_result(mod,
 
     # Run the reference result
     compile_engine.get().clear()
-    with relay.build_config(opt_level=3):
+    with tvm.transform.PassContext(opt_level=3):
         json, lib, param = relay.build(ref_mod, target=target, params=params)
     rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)
 

From b0129049ba3f29c16bf76e22ef1fd05b3be1a0de Mon Sep 17 00:00:00 2001
From: Cody Yu <comaniac0422@gmail.com>
Date: Wed, 1 Jul 2020 16:48:43 +0000
Subject: [PATCH 30/30] address comment

---
 src/relay/backend/contrib/codegen_json/codegen_json.h | 8 ++++----
 src/relay/backend/contrib/dnnl/codegen.cc             | 2 +-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc         | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h
index 8bf5fbc013c2..9ed15a88c72a 100644
--- a/src/relay/backend/contrib/codegen_json/codegen_json.h
+++ b/src/relay/backend/contrib/codegen_json/codegen_json.h
@@ -104,7 +104,7 @@ class OpAttrExtractor : public AttrVisitor {
           attr.push_back(Fp2String(fm->value));
         } else if (const auto* str = (*an)[i].as<StringObj>()) {
           String s = GetRef<String>(str);
-          attr.push_back(s.operator std::string());
+          attr.push_back(s);
         } else {
           LOG(FATAL) << "Not supported type: " << (*an)[i]->GetTypeKey();
         }
@@ -118,7 +118,7 @@ class OpAttrExtractor : public AttrVisitor {
       SetNodeAttr(key, std::vector<std::string>{Fp2String(fm->value)});
     } else if (const auto* str = (*value).as<StringObj>()) {
       String s = GetRef<String>(str);
-      SetNodeAttr(key, std::vector<std::string>{s.operator std::string()});
+      SetNodeAttr(key, std::vector<std::string>{s});
     } else {
       LOG(FATAL) << "Not yet supported type: " << (*value)->GetTypeKey() << ": " << *value;
     }
@@ -230,7 +230,7 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
       auto pattern = fn->GetAttr<String>(attr::kPartitionedFromPattern);
       CHECK(pattern.defined());
       std::vector<std::string> values;
-      values.push_back(pattern.value().operator std::string());
+      values.push_back(pattern.value());
       std::vector<dmlc::any> attr;
       attr.emplace_back(values);
       node->SetAttr("PartitionedFromPattern", attr);
@@ -271,7 +271,7 @@ class JSONSerializer : public MemoizedExprTranslator<std::vector<JSONGraphNodeEn
     } else if (const auto* fn = cn->op.as<FunctionNode>()) {
       auto comp = fn->GetAttr<String>(attr::kComposite);
       CHECK(comp.defined()) << "JSON runtime only supports composite functions.";
-      name = comp.value().operator std::string();
+      name = comp.value();
     } else {
       LOG(FATAL) << "JSON runtime does not support calls to " << cn->op->GetTypeKey();
     }
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 9d4041af6395..d5a483d0d112 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -442,7 +442,7 @@ class DNNLJSONSerializer : public backend::contrib::JSONSerializer {
     } else if (const auto* fn = cn->op.as<FunctionNode>()) {
       auto comp = fn->GetAttr<String>(attr::kComposite);
       CHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions.";
-      name = comp.value().operator std::string();
+      name = comp.value();
 
       if (name == "dnnl.conv2d_bias_relu") {
         call = GetRootCall(fn->body.as<CallNode>(), 2, {"nn.conv2d", "add", "nn.relu"});
diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
index a79537f46e74..bda9f1a44932 100644
--- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc
@@ -136,7 +136,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase {
                               size_t offset = 0) {
     auto eid = EntryID(entry);
     // Since the DNNL memory has been created before calling this function, we assume the entry
-    // has not yet been bind to the other DNNL memory; otherwise it may have memory leak.
+    // has not yet been bound to the other DNNL memory; otherwise it may have memory leak.
     CHECK_EQ(entry_out_mem_.count(eid), 0);
 
     // TODO(@comanic): Support other data types (i.e., int8).