From 4b0116f295dcd7df6ac1e1c55e85fd634ce77698 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Mon, 6 Apr 2020 01:01:02 +0000 Subject: [PATCH 01/30] json runtime --- CMakeLists.txt | 2 + cmake/modules/JSON.cmake | 22 ++ cmake/modules/contrib/DNNL.cmake | 3 +- python/tvm/relay/analysis/analysis.py | 4 + .../contrib/codegen_json/codegen_json.cc | 352 +++++++++++++++++ src/relay/backend/graph_runtime_codegen.cc | 65 +--- src/relay/backend/utils.h | 17 + src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 65 ++++ src/runtime/json/json_node.h | 368 ++++++++++++++++++ src/runtime/json/json_runtime.h | 170 ++++++++ src/runtime/json/json_runtime_driver.cc | 210 ++++++++++ 11 files changed, 1215 insertions(+), 63 deletions(-) create mode 100644 cmake/modules/JSON.cmake create mode 100644 src/relay/backend/contrib/codegen_json/codegen_json.cc create mode 100644 src/runtime/contrib/dnnl/dnnl_json_runtime.cc create mode 100644 src/runtime/json/json_node.h create mode 100644 src/runtime/json/json_runtime.h create mode 100644 src/runtime/json/json_runtime_driver.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index aaddebdfe3c5..c0fa1cbf4230 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,7 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF) tvm_option(USE_TFLITE "Build with tflite support" OFF) tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none) tvm_option(USE_COREML "Build with coreml support" OFF) +tvm_option(USE_JSON_RUNTIME "Build with JSON runtime" OFF) if(USE_CPP_RPC AND UNIX) message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.") @@ -305,6 +306,7 @@ if(USE_EXAMPLE_EXT_RUNTIME) endif(USE_EXAMPLE_EXT_RUNTIME) # Module rules +include(cmake/modules/JSON.cmake) include(cmake/modules/VTA.cmake) include(cmake/modules/CUDA.cmake) include(cmake/modules/Hexagon.cmake) diff --git a/cmake/modules/JSON.cmake b/cmake/modules/JSON.cmake new file mode 100644 index 000000000000..bfc07bb1b13a --- /dev/null +++ b/cmake/modules/JSON.cmake @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(USE_JSON_RUNTIME) + message(STATUS "Build with JSON runtime support") + file(GLOB RUNTIME_JSON_SRCS src/runtime/json/*.cc) + list(APPEND RUNTIME_SRCS ${RUNTIME_JSON_SRCS}) +endif(USE_JSON_RUNTIME) diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake index 3fd3f7cbc887..0b56bc9a9241 100644 --- a/cmake/modules/contrib/DNNL.cmake +++ b/cmake/modules/contrib/DNNL.cmake @@ -16,7 +16,8 @@ # under the License. if(USE_DNNL_CODEGEN STREQUAL "ON") - file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/codegen.cc) + file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc) + file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.cc) list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) find_library(EXTERN_LIBRARY_DNNL dnnl) diff --git a/python/tvm/relay/analysis/analysis.py b/python/tvm/relay/analysis/analysis.py index c237859eb987..adbc2e6222b1 100644 --- a/python/tvm/relay/analysis/analysis.py +++ b/python/tvm/relay/analysis/analysis.py @@ -313,6 +313,10 @@ def detect_feature(a, b=None): return {Feature(int(x)) for x in _ffi_api.detect_feature(a, b)} +def to_json(expr): + return _ffi_api.ToJSON(expr) + + def extract_fused_functions(mod): """Pass to extract IRModule of only fused primitive functions. diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc new file mode 100644 index 000000000000..9fc880ad3521 --- /dev/null +++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file relay/backend/contrib/codegen_json.cc + * \brief Utilities for json codegen and runtime + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../../../runtime/json/json_node.h" +#include "../../../../runtime/json/json_runtime.h" +#include "../../utils.h" + +namespace tvm { +namespace relay { +namespace backend { +namespace contrib { + +using namespace tvm::runtime::json; + +using ShapeVector = std::vector >; +using TypeVector = std::vector; +using JSONGraphObjectPtr = std::shared_ptr; + +/*! \brief The artifacts that needs to be serialized. */ +struct JSONOutput { + std::string graph_json; + std::unordered_map params; +}; + +/*! + * \brief Helper class to extract all attributes of a certain op and save them + * into text format. + */ +class OpAttrExtractor : public AttrVisitor { + public: + explicit OpAttrExtractor(JSONGraphObjectPtr node) : node_(node) {} + + template ::value>> + std::string Fp2String(const T value, int n = 16) { + std::ostringstream out; + out.precision(n); + out << value; + return out.str(); + } + + void SetNodeAttr(const char* key, const std::vector& value) { + std::vector attr; + attr.emplace_back(value); + node_->SetAttr(key, attr); + } + + void Visit(const char* key, double* value) final { + SetNodeAttr(key, {Fp2String(*value)}); + } + + void Visit(const char* key, int64_t* value) final { + SetNodeAttr(key, {std::to_string(*value)}); + } + + void Visit(const char* key, uint64_t* value) final { + SetNodeAttr(key, {std::to_string(*value)}); + } + + void Visit(const char* key, int* value) final { + SetNodeAttr(key, {std::to_string(*value)}); + } + + void Visit(const char* key, bool* value) final { + SetNodeAttr(key, {std::to_string(*value)}); + } + + void Visit(const char* key, std::string* value) final { + SetNodeAttr(key, {*value}); + } + + void Visit(const char* key, DataType* value) final { + if (!value->is_void()) { + SetNodeAttr(key, {runtime::DLDataType2String(*value)}); + } else { + SetNodeAttr(key, {""}); + } + } + + void Visit(const char* key, runtime::ObjectRef* value) final { + if (const auto* an = (*value).as()) { + std::vector attr; + for (size_t i = 0; i < an->data.size(); ++i) { + if (const auto* im = an->data[i].as()) { + attr.push_back(std::to_string(im->value)); + } else if (const auto* fm = an->data[i].as()) { + attr.push_back(Fp2String(fm->value)); + } else if (const auto* str = an->data[i].as()) { + String s = GetRef(str); + attr.push_back(s.operator std::string()); + } else { + LOG(FATAL) << "Not supported type: " << an->data[i]->GetTypeKey(); + } + } + SetNodeAttr(key, attr); + } else if (!(*value).defined()) { // Skip NullValue + SetNodeAttr(key, std::vector{""}); + } else { + LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey(); + } + } + + void Visit(const char* key, runtime::NDArray* value) final { + LOG(FATAL) << "NDArray is not allowed in op attribute"; + } + + void Visit(const char* key, void** value) final { + LOG(FATAL) << "void pointer is not allowed in op attribute"; + } + + void Extract(Object* node) { + if (node) { + reflection_->VisitAttrs(node, this); + } + } + + private: + JSONGraphObjectPtr node_; + ReflectionVTable* reflection_ = ReflectionVTable::Global(); +}; + +/*! \brief Serialize a Relay expression to JSON. */ +class JSONSerializer : public MemoizedExprTranslator> { + public: + void Serialize(const Expr& expr) { + relay::Function func = Downcast(expr); + // First we convert all the parameters into input nodes. + for (const auto& param : func->params) { + auto node_ptr = std::make_shared(param->name_hint(), "input" /* op_type_ */); + memo_[param] = AddNode(node_ptr, param); + } + heads_ = VisitExpr(func->body); + } + + /*! + * \brief Save to JSON graph + * + * \param writer A json writer + */ + void Save(dmlc::JSONWriter* writer) { + std::vector arg_nodes; + for (size_t i = 0; i < nodes_.size(); ++i) { + auto node = nodes_[i]; + if (node->IsLeaf()) { + arg_nodes.push_back(i); + } + } + size_t num_entry = 0; + std::vector node_row_ptr{0}; + for (auto node : nodes_) { + num_entry += node->GetNumOutput(); + node_row_ptr.push_back(num_entry); + } + writer->BeginObject(); + writer->WriteObjectKeyValue("nodes", nodes_); + writer->WriteObjectKeyValue("arg_nodes", arg_nodes); + writer->WriteObjectKeyValue("heads", heads_); + writer->WriteObjectKeyValue("node_row_ptr", node_row_ptr); + writer->EndObject(); + } + + std::unordered_map GetParams() const { + return params_; + } + + protected: + /*! + * \brief Add a node to graph. + * + * \param node A graph node. It is a shared pointer. Some attributes of it + * will be added, i.e. shape and type. These attributes are attached to + * the JSON graph in the end. + * \param expr The relay expression. + * \return A list of graph entry nodes. It the relay expr is a tuple type, we + * will flatten it. + */ + std::vector AddNode(JSONGraphObjectPtr node, const Expr& expr) { + auto checked_type = expr->checked_type(); + auto node_id = nodes_.size(); + nodes_.push_back(node); + std::vector ret; + ShapeVector shape; + TypeVector dtype; + // Flatten tuple node. + if (const auto* tuple_type = checked_type.as()) { + for (size_t i = 0; i < tuple_type->fields.size(); ++i) { + const auto* tensor_type = tuple_type->fields[i].as(); + CHECK(tensor_type) << "Expect TensorType, but received: ." + << tuple_type->fields[i]->GetTypeKey(); + ret.push_back(JSONGraphNodeEntry(node_id, i)); + shape.emplace_back(GetIntShape(tensor_type->shape)); + dtype.emplace_back(DType2String(tensor_type->dtype)); + } + node->SetNumOutput(tuple_type->fields.size()); + } else { + const auto* tensor_type = checked_type.as(); + CHECK(tensor_type) << "Expect TensorType, but received: ." << checked_type->GetTypeKey(); + shape.emplace_back(GetIntShape(tensor_type->shape)); + dtype.emplace_back(DType2String(tensor_type->dtype)); + ret.push_back(JSONGraphNodeEntry(node_id, 0)); + } + std::vector shape_attrs; + shape_attrs.emplace_back(shape); + node->SetAttr("shape", shape_attrs); + + std::vector type_attrs; + type_attrs.emplace_back(dtype); + node->SetAttr("dtype", type_attrs); + return ret; + } + + void SetCallNodeAttribute(JSONGraphObjectPtr node, const CallNode* cn) { + OpAttrExtractor extractor(node); + extractor.Extract(const_cast(cn->attrs.get())); + } + + std::vector VisitExprDefault_(const Object* op) final { + LOG(FATAL) << "JSON runtime currently doesn't support " << op->GetTypeKey(); + return {}; + } + + std::vector VisitExpr_(const VarNode* vn) final { + CHECK(memo_.count(GetRef(vn))); + return memo_[GetRef(vn)]; + } + + std::vector VisitExpr_(const ConstantNode* cn) final { + std::string name = "const_" + std::to_string(params_.size()); + params_[name] = cn->data; + auto node = std::make_shared(name, "const" /* op_type_ */); + return AddNode(node, GetRef(cn)); + } + + std::vector VisitExpr_(const TupleNode* tn) final { + std::vector fields; + for (const auto& field : tn->fields) { + auto ref = VisitExpr(field); + fields.insert(fields.end(), ref.begin(), ref.end()); + } + return fields; + } + + std::vector VisitExpr_(const CallNode* cn) final { + Expr expr = GetRef(cn); + std::string name; + if (const auto* op_node = cn->op.as()) { + name = op_node->name; + } else if (const auto* fn = cn->op.as()) { + auto comp = fn->GetAttr(attr::kComposite); + CHECK(comp.defined()) << "JSON runtime only supports composite functions."; + name = comp.value().operator std::string(); + // TODO(zhiics) Handle composite function here. + LOG(FATAL) << "Composite function is not handled yet."; + } else { + LOG(FATAL) << "JSON runtime does not support calls to " << cn->op->GetTypeKey(); + } + + std::vector inputs; + for (const auto& arg : cn->args) { + auto res = VisitExpr(arg); + inputs.insert(inputs.end(), res.begin(), res.end()); + } + auto node = std::make_shared(name, /* name_ */ + "kernel", /* op_type_ */ + inputs, + 1 /* num_outputs_ */); + SetCallNodeAttribute(node, cn); + return AddNode(node, GetRef(cn)); + } + + std::vector VisitExpr_(const LetNode* ln) final { + CHECK_EQ(memo_.count(ln->var), 0); + memo_[ln->var] = VisitExpr(ln->value); + return VisitExpr(ln->body); + } + + std::vector VisitExpr_(const TupleGetItemNode* gtn) final { + auto vtuple = VisitExpr(gtn->tuple); + return {vtuple[gtn->index]}; + } + + std::vector VisitExpr_(const FunctionNode* fn) final { + CHECK(fn->GetAttr(attr::kComposite).defined()) + << "JSON runtime only supports composite functions"; + // FunctionNode should be handled by the caller. + return {}; + } + + private: + /*! \brief JSON graph nodes. */ + std::vector nodes_; + /*! \brief Output of the JSON graph. */ + std::vector heads_; + /*! \brief Constants. */ + std::unordered_map params_; +}; + +} // namespace contrib +} // namespace backend + +std::string ToJSON(const Expr& expr) { + backend::contrib::JSONSerializer converter; + converter.Serialize(expr); + + std::ostringstream os; + dmlc::JSONWriter writer(&os); + converter.Save(&writer); + backend::contrib::JSONOutput ret; + ret.graph_json = os.str(); + ret.params = converter.GetParams(); + + backend::contrib::JSONRuntimeBase jr(ret.graph_json); + return ret.graph_json; +} + +TVM_REGISTER_GLOBAL("relay.analysis.ToJSON") +.set_body_typed(ToJSON); + +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index 19e67703946f..16f95a1b79df 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -28,12 +28,15 @@ #include #include +#include +#include #include #include #include #include "compile_engine.h" #include "utils.h" +#include "../../runtime/json/json_node.h" namespace tvm { namespace relay { @@ -625,12 +628,6 @@ TVM_REGISTER_GLOBAL("relay.build_module._GraphRuntimeCodegen") namespace dmlc { namespace json { -// JSON utils -template -inline bool SameType(const dmlc::any& data) { - return std::type_index(data.type()) == std::type_index(typeid(T)); -} - template <> struct Handler> { inline static void Write(dmlc::JSONWriter* writer, @@ -642,61 +639,5 @@ struct Handler> { LOG(FATAL) << "Not implemented."; } }; - -template <> -struct Handler> { - inline static void Write(dmlc::JSONWriter* writer, - const std::unordered_map& data) { - writer->BeginObject(); - for (const auto& kv : data) { - auto k = kv.first; - const dmlc::any& v = kv.second; - if (SameType(v)) { - writer->WriteObjectKeyValue(k, dmlc::get(v)); - } else if (SameType(v)) { - writer->WriteObjectKeyValue(k, dmlc::get(v)); - } else if (SameType>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>(v)); - } else if (SameType>>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>>(v)); - } else if (SameType>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>(v)); - } else { - LOG(FATAL) << "Not supported"; - } - } - writer->EndObject(); - } - inline static void Read(dmlc::JSONReader* reader, - std::unordered_map* data) { - LOG(FATAL) << "Not implemented."; - } -}; - -template <> -struct Handler> { - inline static void Write(dmlc::JSONWriter* writer, const std::vector& data) { - writer->BeginArray(); - for (const auto& v : data) { - if (SameType(v)) { - writer->WriteArrayItem(dmlc::get(v)); - } else if (SameType(v)) { - writer->WriteArrayItem(dmlc::get(v)); - } else if (SameType>(v)) { - writer->WriteArrayItem(dmlc::get>(v)); - } else if (SameType>>(v)) { - writer->WriteArrayItem(dmlc::get>>(v)); - } else if (SameType>(v)) { - writer->WriteArrayItem(dmlc::get>(v)); - } else { - LOG(FATAL) << "Not supported"; - } - } - writer->EndArray(); - } - inline static void Read(dmlc::JSONReader* reader, std::vector* data) { - LOG(FATAL) << "Not implemented."; - } -}; } // namespace json } // namespace dmlc diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h index cac6f55329c8..1fe14b8fedf8 100644 --- a/src/relay/backend/utils.h +++ b/src/relay/backend/utils.h @@ -118,6 +118,23 @@ inline const runtime::TypedPackedFunc GetTypedPackedFunc(const std:: CHECK(pf != nullptr) << "can not find packed function"; return runtime::TypedPackedFunc(*pf); } + +/*! + * \brief Extract shape from an IndexExpr array to std::vector + * + * \param shape The shape in Array + * \return The converted shape in std::vector + */ +inline std::vector GetIntShape(const Array& shape) { + std::vector ret; + for (const auto& dim : shape) { + const int64_t* pval = tir::as_const_int(dim); + CHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey(); + ret.push_back(*pval); + } + return ret; +} + /*! * \brief Convert type to string * diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc new file mode 100644 index 000000000000..f9e6affc0196 --- /dev/null +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/dnnl/dnnl_json_runtime.cc + * \brief A simple JSON runtime for DNNL. + */ + +#include + +#include +#include + +#include "../../json/json_node.h" +#include "../../json/json_runtime.h" + +namespace tvm { +namespace runtime { +namespace contrib { + +using namespace tvm::runtime; +using namespace tvm::runtime::json; + +class DNNLJSONRuntime : public JSONRuntimeBase { + public: + explicit DNNLJSONRuntime(const std::string& graph_json) : JSONRuntimeBase(graph_json) {} + ~DNNLJSONRuntime() = default; + + void Run() override { + // Invoke the engine and return the result + } + + void Init() override { + // Create a engine here + } + + private: + // Engine +}; + +TVM_REGISTER_GLOBAL("runtime.ext.dnnl") +.set_body([](TVMArgs args, TVMRetValue* rv) { + auto n = tvm::runtime::make_object(args[0].operator std::string()); + *rv = Module(n); +}); + +} // namespace contrib +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/json/json_node.h b/src/runtime/json/json_node.h new file mode 100644 index 000000000000..3516cd220fbf --- /dev/null +++ b/src/runtime/json/json_node.h @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/json/json_node.h + * \brief The graph nodes used by JSON runtime. + */ + +#ifndef TVM_RUNTIME_JSON_JSON_NODE_H_ +#define TVM_RUNTIME_JSON_JSON_NODE_H_ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace tvm { +namespace runtime { +namespace json { + +using namespace tvm::runtime; +using JSONGraphAttrs = std::unordered_map; + +/*! + * \brief The node entry in the serialized json graph. + */ +class JSONGraphNodeEntry { + public: + // Constructors. + JSONGraphNodeEntry() = default; + JSONGraphNodeEntry(int id, int index, int version = 0) + : id_(id), index_(index), version_(version) {} + + /*! + * \brief Serialize a node entry. + * \param writer The json writer. + */ + void Save(dmlc::JSONWriter* writer) const { + writer->BeginArray(); + writer->WriteArrayItem(id_); + writer->WriteArrayItem(index_); + writer->WriteArrayItem(version_); + writer->EndArray(); + } + + /*! + * \brief Deserialize the json string into a node entry. + * \param reader The json reader. + */ + void Load(dmlc::JSONReader* reader) { + reader->BeginArray(); + CHECK(reader->NextArrayItem()) << "invalid json format"; + reader->Read(&id_); + CHECK(reader->NextArrayItem()) << "invalid json format"; + reader->Read(&index_); + if (reader->NextArrayItem()) { + reader->Read(&version_); + CHECK(!reader->NextArrayItem()) << "invalid json format"; + } else { + version_ = 0; + } + } + + uint32_t id_; + uint32_t index_; + uint32_t version_; +}; + +/*! + * \brief The node of the serialized json graph. It includes an array of + * entries. + */ +class JSONGraphNode { + public: + // Constructors. + JSONGraphNode() = default; + JSONGraphNode(const std::string& name, + const std::string& op_type, + const std::vector& inputs = {}, + size_t num_outputs = 1) { + name_ = name; + op_type_ = op_type; + num_inputs_ = inputs.size(); + inputs_ = inputs; + num_outputs_ = num_outputs; + } + + /*! + * \brief Serialize a node so that it can be saved to disk. + * \param writer The json writer. + */ + void Save(dmlc::JSONWriter* writer) { + writer->BeginObject(); + writer->WriteObjectKeyValue("op", op_type_); + writer->WriteObjectKeyValue("name", name_); + if (!inputs_.empty()) { + SetAttr("num_inputs", std::to_string(inputs_.size())); + SetAttr("num_outputs", std::to_string(num_outputs_)); + writer->WriteObjectKeyValue("inputs", this->inputs_); + } + if (!attrs_.empty()) { + writer->WriteObjectKeyValue("attrs", attrs_); + } + writer->EndObject(); + } + + /*! + * \brief Load the attribute of a node in the json string. + * \param reader The json reader. + */ + void LoadAttrs(dmlc::JSONReader* reader) { + std::string key, value; + reader->BeginObject(); + while (reader->NextObjectItem(&key)) { + if (key == "num_inputs") { + reader->Read(&value); + num_inputs_ = strtoul(value.c_str(), nullptr, 10); + } else if (key == "num_outputs") { + reader->Read(&value); + num_outputs_ = strtoul(value.c_str(), nullptr, 10); + } else if (key == "dtype") { + std::vector tmp; + reader->BeginArray(); + CHECK(reader->NextArrayItem()); + reader->Read(&tmp); + CHECK(!reader->NextArrayItem()); + for (const auto& it : tmp) { + dtype_.push_back(tvm::runtime::String2DLDataType(it)); + } + } else if (key == "shape") { + reader->BeginArray(); + CHECK(reader->NextArrayItem()); + reader->Read(&shape_); + CHECK(!reader->NextArrayItem()); + } else { + reader->BeginArray(); + CHECK(reader->NextArrayItem()); + std::vector tmp; + reader->Read(&tmp); + attrs_[key] = tmp; + CHECK(!reader->NextArrayItem()); + } + } + CHECK_EQ(shape_.size(), dtype_.size()); + } + + /*! + * \brief Load a node in the json string. + * \param reader The json reader. + */ + void Load(dmlc::JSONReader* reader) { + reader->BeginObject(); + std::string key; + while (reader->NextObjectItem(&key)) { + if (key == "op") { + reader->Read(&op_type_); + } else if (key == "name") { + reader->Read(&name_); + } else if (key == "inputs") { + reader->Read(&inputs_); + } else if (key == "attr" || key == "attrs") { + this->LoadAttrs(reader); + } else { + LOG(FATAL) << "Unknown key: " << key; + } + } + } + + /*! + * \brief Check if a node is a leaf node, i.e. input to the graph. + * + * \return True if the node has no input, otherwise, false. + */ + bool IsLeaf() const { return inputs_.empty(); } + + /*! + * \brief Return the number of outputs of the node. + * + * \return The number of the output. + */ + uint32_t GetNumOutput() const { return num_outputs_; } + + /*! + * \brief Set the number of outputs of the node. + * + * \param num_outputs The number of output. + */ + void SetNumOutput(uint32_t num_outputs) { + num_outputs_ = num_outputs; + } + + /*! + * \brief Get the value of an attribute in the node. + * + * \tparam T The return type. + * \param key The key for lookup. + * + * \return The value. + */ + template + T GetAttr(const std::string& key) const { + CHECK_GT(attrs_.count(key), 0U) << "Key: " << key << "is not found"; + return dmlc::get(attrs_.at(key)); + } + + /*! + * \brief Set an attribute for the node. + * + * \tparam ValueT The type of the value being stored. + * \param key The key of the attribute. + * \param value The value of the attribute. + */ + template + void SetAttr(const std::string& key, const ValueT& value) { + attrs_[key] = value; + } + + virtual ~JSONGraphNode() {} + + private: + /*! \brief The number of input. */ + uint32_t num_inputs_{0}; + /*! \brief The number of output. */ + uint32_t num_outputs_{1}; + /*! \brief The name of the op. It is the symbol that used for runtime lookup. */ + std::string name_; + /*! \brief The operator type, i.e. input is "null". */ + std::string op_type_; + /*! \brief The shape of the node. */ + std::vector> shape_; + /*! \brief The type of the node. */ + std::vector dtype_; + /*! \brief The inputs of the node. */ + std::vector inputs_; + /*! + * \brief Attribute of the node. For simplicity, we store all attribute as + * a list of std::string. It's the developer's resposibility to check the + * required attribute of a certain op and convert it into the needed type. + * + * For example, for conv2d, this map could contain: + * attrs_["strides"] = ["1", "1"] + * attrs_["padding"] = ["0", "0", "0", "0"] + * attrs_["data_layout"] = ["NCHW"] + * + * when creating an execution engine, developers may need to use these + * attributes and they can convert it into the needed type, i.e. padding to + * int + */ + JSONGraphAttrs attrs_; + + friend class JSONRuntimeBase; +}; + +} // namespace json +} // namespace runtime +} // namespace tvm + +namespace dmlc { +namespace json { +// JSON utils +template +inline bool SameType(const dmlc::any& data) { + return std::type_index(data.type()) == std::type_index(typeid(T)); +} + +template <> +struct Handler> { + inline static void Write( + dmlc::JSONWriter* writer, + const std::shared_ptr& data) { + data->Save(writer); + } + + inline static void Read(dmlc::JSONReader* reader, + std::shared_ptr* data) { + (*data)->Load(reader); + } +}; + +template <> +struct Handler> { + inline static void Write(dmlc::JSONWriter* writer, + const std::vector& data) { + writer->BeginArray(); + for (const auto& v : data) { + if (SameType(v)) { + writer->WriteArrayItem(dmlc::get(v)); + } else if (SameType(v)) { + writer->WriteArrayItem(dmlc::get(v)); + } else if (SameType>(v)) { + writer->WriteArrayItem(dmlc::get>(v)); + } else if (SameType>>(v)) { + writer->WriteArrayItem(dmlc::get>>(v)); + } else if (SameType>(v)) { + writer->WriteArrayItem(dmlc::get>(v)); + } else { + LOG(FATAL) << "Not supported"; + } + } + writer->EndArray(); + } + + inline static void Read(dmlc::JSONReader* reader, + std::vector* data) { + LOG(FATAL) << "Not implemented."; + } +}; + +template <> +struct Handler> { + inline static void Write(dmlc::JSONWriter* writer, + const std::unordered_map& data) { + writer->BeginObject(); + for (const auto& kv : data) { + auto k = kv.first; + const dmlc::any& v = kv.second; + if (SameType(v)) { + writer->WriteObjectKeyValue(k, dmlc::get(v)); + } else if (SameType(v)) { + writer->WriteObjectKeyValue(k, dmlc::get(v)); + } else if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else if (SameType>>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>>(v)); + } else if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else { + LOG(FATAL) << "Not supported"; + } + } + writer->EndObject(); + } + + inline static void Read(dmlc::JSONReader* reader, + std::unordered_map* data) { + LOG(FATAL) << "Not implemented."; + } +}; + +} // namespace json +} // namespace dmlc + +#endif // TVM_RUNTIME_JSON_JSON_NODE_H_ diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h new file mode 100644 index 000000000000..6765fa49e0e1 --- /dev/null +++ b/src/runtime/json/json_runtime.h @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/json/json_runtime.h + * \brief Utilities for json runtime. + */ + +#ifndef TVM_RUNTIME_JSON_JSON_RUNTIME_H_ +#define TVM_RUNTIME_JSON_JSON_RUNTIME_H_ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "json_node.h" + +namespace tvm { +namespace runtime { +namespace json { + +/*! + * \brief A json runtime that executes the serialized JSON format. This runtime + * can be extended by user defined runtime for execution. + */ +class JSONRuntimeBase : public ModuleNode { + public: + explicit JSONRuntimeBase(const std::string& graph_json) { + LoadGraph(graph_json); + } + + // The type key of each subclass can be saved to the json file and them + // used to create the specific runtime during deserialization. + // virtual const char* type_key() const = 0; + const char* type_key() const { return ""; } + + virtual void Init() { LOG(FATAL) << "NYI"; } + + /*! + * \brief Get a packed function. + * \param name The name/symbol of the function. + * \param sptr_to_self The pointer to the module node. + * \return The packed function. + */ + virtual PackedFunc GetFunction(const std::string& name, + const ObjectPtr& sptr_to_self) { + return PackedFunc(); + } + + // Run(TVMValue*,value, int* type_code, int nargs), or + // Run(TVMArgs arg, TVMRetValue rv) ? + virtual void Run() { LOG(FATAL) << "NYI"; } + + void SetInput(const std::string& name, const NDArray& data) { + auto it = input_map_.find(name); + CHECK(it != input_map_.end()) << "Not found input: " << name; + SetInput(it->second, data); + } + + void SetInput(uint32_t index, const NDArray& data) { + CHECK_LT(static_cast(index), input_nodes_.size()); + uint32_t eid = EntryID(input_nodes_[index], 0); + data_entry_[eid] = data; + } + + size_t NumOutputs() const { return outputs_.size(); } + + ObjectRef GetOutput() { + // Return the NDArray directly if there is only one outpput. + if (NumOutputs() == 1) { + uint32_t eid = EntryID(outputs_[0]); + return data_entry_[eid]; + } + + // We need to return an ADTObj if there are multiple outputs. + std::vector outs; + for (size_t i = 0; i < NumOutputs(); i++) { + uint32_t eid = EntryID(outputs_[i]); + outs.push_back(data_entry_[eid]); + } + return ADT::Tuple(outs); + } + + protected: + void LoadGraph(const std::string& graph_json) { + std::istringstream is(graph_json); + dmlc::JSONReader reader(&is); + this->Load(&reader); + + for (size_t i = 0; i < input_nodes_.size(); i++) { + uint32_t nid = input_nodes_[i]; + std::string& name = nodes_[nid].name_; + input_map_[name] = i; + } + } + + void Load(dmlc::JSONReader* reader) { + reader->BeginObject(); + std::string key; + while (reader->NextObjectItem(&key)) { + if (key == "nodes") { + reader->Read(&nodes_); + } else if (key == "arg_nodes") { + reader->Read(&input_nodes_); + } else if (key == "node_row_ptr") { + reader->Read(&node_row_ptr_); + } else if (key == "heads") { + reader->Read(&outputs_); + } else { + LOG(FATAL) << "Unknow key: " << key; + } + } + } + + // Get the node entry index. + uint32_t EntryID(uint32_t nid, uint32_t index) const { + return node_row_ptr_[nid] + index; + } + + // Get the node entry index. + uint32_t EntryID(const JSONGraphNodeEntry& e) const { + return EntryID(e.id_, e.index_); + } + + // Number of node entries. + uint32_t NumEntries() const { + return node_row_ptr_.back(); + } + + protected: + /*! \brief The json graph nodes. */ + std::vector nodes_; + /*! \brief The input nodes, including variables and constants. */ + std::vector input_nodes_; + /*! \brief Used for quick entry indexing. */ + std::vector node_row_ptr_; + /*! \brief Output entries. */ + std::vector outputs_; + /*! \brief Data of that entry. */ + std::vector data_entry_; + /*! \brief Map the input name to index. */ + std::unordered_map input_map_; +}; + +} // namespace json +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_JSON_JSON_RUNTIME_H_ diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc new file mode 100644 index 000000000000..072a184d2f06 --- /dev/null +++ b/src/runtime/json/json_runtime_driver.cc @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/json/json_runtime_driver.cc + * \brief The driver for json runtime. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "json_node.h" +#include "json_runtime.h" + +namespace tvm { +namespace runtime { +namespace json { + +/*! + * \brief The class represents a json runtime driver. It is mainly responsible + * for 1) serializing and deserializing the json runtime artifacts, 2) + * dispatching and invoking the actual runtime that intepretes the json + * artifacts. + */ +class JSONRuntimeDriver : public ModuleNode { + public: + struct Subgraph { + std::string symbol_name; + std::string graph_json; + std::unordered_map weights; + }; + + explicit JSONRuntimeDriver(const std::string& graph_json) { + this->graph_json_ = graph_json; + Deserialize(); + } + + const char* type_key() const { return "JSONRuntime"; } + + /*! + * \brief Get a packed function. + * \param name The name/symbol of the function. + * \param sptr_to_self The pointer to the module node. + * \return The packed function. + */ + PackedFunc GetFunction(const std::string& name, + const ObjectPtr& sptr_to_self) { + if (this->subgraphs_.count(name)) { + return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { + auto json_rt = this->subgraphs_[name]; + auto* json_rt_node = static_cast(json_rt.operator->()); + CHECK(json_rt_node); + // Set input, how to make sure it is only invoked once? Likely we don't + // really need this as we could directly set input when creating the + // engine, but what if the input for each inference varies. + // json_rt_node->SetInput(); + // + // Execute the egine + json_rt_node->Run(); + + // Get the output, set rv or fill directly to args? + *rv = json_rt_node->GetOutput(); + }); + } else { + // Issue a warning when we don't find the symbol from the module. Note + // we don't kill the execution here as the symbol may exist in other + // runtime modules. + LOG(WARNING) << "Cannot find " << name << " from json runtime"; + return PackedFunc(); + } + } + + void Deserialize() { + std::vector subgraphs; + dmlc::MemoryStringStream memstrm(&graph_json_); + dmlc::Stream* strm = &memstrm; + // Header + uint64_t header; + CHECK(strm->Read(&header)) << "Invalid serialized file format"; + + // Compiler name + std::string compiler_name; + CHECK(strm->Read(&compiler_name)) << "Invalid serialized file format"; + + uint64_t num_subgraphs; + CHECK(strm->Read(&num_subgraphs)) << "Invalid serialized file format"; + // CHECK(header == kTVMJSONRuntimeMagic) << "Invalid serialized file format"; + + for (uint64_t i = 0; i < num_subgraphs; i++) { + Subgraph g; + // Load the symbol for runtime lookup. + std::string symbol_name; + CHECK(strm->Read(&symbol_name)) << "Invalid serialized file format"; + g.symbol_name = symbol_name; + + // Load the graph representation. + std::string json_graph; + CHECK(strm->Read(&json_graph)) << "Invalid serialized file format"; + g.graph_json = json_graph; + + // Load the weights for the graph. + uint64_t num_params; + CHECK(strm->Read(&num_params)) << "Invalid serialized file format"; + + std::vector names; + CHECK(strm->Read(&names)) << "Invalid serialized file format"; + CHECK_EQ(names.size(), num_params) << "Invalid serialized file format"; + + for (size_t i = 0; i < static_cast(num_params); i++) { + NDArray tmp; + tmp.Load(strm); + g.weights[names[i]] = tmp; + } + subgraphs.push_back(g); + } + CreateSubgraphs(subgraphs, compiler_name); + } + + // Create subgraphs for a specific runtime and cache it, therefore, we can + // invoke them without the need to repeatedly create them at runtime. + void CreateSubgraphs(const std::vector& subgraphs, + const std::string& compiler_name) { + // How do we know which runtime to create? Should we bake something in the + // json to indicate this? i.e. we can register a runtime "runtime.ext.dnnl" + // and save dnnl. Now we can just get it from the registry using dnnl. This + // requires us to have single place to invoke different external codegens + // and serialize them. + // + std::string ext_runtime_name = "runtime.ext." + compiler_name; + auto pf = tvm::runtime::Registry::Get(ext_runtime_name); + CHECK(pf) << "Failed to find the extern runtime for " << ext_runtime_name; + for (const auto& sg : subgraphs) { + CHECK_EQ(subgraphs_.count(sg.graph_json), 0U) + << "Found duplicated symbol: " << sg.graph_json; + + Module ext_mod = (*pf)(sg.graph_json); + const auto* json_rt_node = ext_mod.as(); + CHECK(json_rt_node); + // Set up the params that are constants. + for (const auto& it : sg.weights) { + CallPakcedFunc(ext_mod, "set_input", it.first, it.second); + } + // Init the engine + CallPakcedFunc(ext_mod, "init"); + + subgraphs_[sg.graph_json] = ext_mod; + } + } + + static Module LoadFromBinary(void* strm) { + dmlc::Stream* stream = static_cast(strm); + std::string graph; + stream->Read(&graph); + auto n = make_object(graph); + return Module(n); + } + + void SaveToBinary(dmlc::Stream* stream) override { + stream->Write(this->graph_json_); + } + + private: + template + void CallPakcedFunc(Module mod, const std::string& name, Args... args) { + auto pf = mod.GetFunction(name); + pf(std::forward(args)...); + } + + /*! \brief The graph json. Weights are also baked in. */ + std::string graph_json_; + /*! + * \brief Cache the created runtime module that can be directly invoked. + * + * The runtime could be a csource runtime or a any user defined runtime that + * is extend from the JSONRuntimeBase class. + */ + std::unordered_map subgraphs_; +}; + +TVM_REGISTER_GLOBAL("runtime.module.loadbinary_jsonruntime") +.set_body_typed(JSONRuntimeDriver::LoadFromBinary); + +} // namespace json +} // namespace runtime +} // namespace tvm + From 96bcc0c8492f3725efc9c7bf302dd6c5fe00f13a Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Mon, 4 May 2020 06:21:46 +0000 Subject: [PATCH 02/30] json dnnl WIP --- .../contrib/codegen_json/codegen_json.cc | 9 +++- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 47 ++++++++++++++++++- src/runtime/json/json_node.h | 35 ++++++++++++++ src/runtime/json/json_runtime_driver.cc | 2 +- 4 files changed, 90 insertions(+), 3 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc index 9fc880ad3521..bed5206aac67 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.cc +++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc @@ -127,8 +127,15 @@ class OpAttrExtractor : public AttrVisitor { SetNodeAttr(key, attr); } else if (!(*value).defined()) { // Skip NullValue SetNodeAttr(key, std::vector{""}); + } else if (const auto* im = (*value).as()) { + SetNodeAttr(key, std::vector{std::to_string(im->value)}); + } else if (const auto* fm = (*value).as()) { + SetNodeAttr(key, std::vector{Fp2String(fm->value)}); + } else if (const auto* str = (*value).as()) { + String s = GetRef(str); + SetNodeAttr(key, std::vector{s.operator std::string()}); } else { - LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey(); + LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey() << ": " << *value; } } diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index f9e6affc0196..503ade7c08a7 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -24,9 +24,11 @@ #include +#include #include #include +#include "dnnl.hpp" #include "../../json/json_node.h" #include "../../json/json_runtime.h" @@ -38,6 +40,9 @@ using namespace tvm::runtime; using namespace tvm::runtime::json; class DNNLJSONRuntime : public JSONRuntimeBase { + using tag = dnnl::memory::format_tag; + using dt = dnnl::memory::data_type; + public: explicit DNNLJSONRuntime(const std::string& graph_json) : JSONRuntimeBase(graph_json) {} ~DNNLJSONRuntime() = default; @@ -50,8 +55,48 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Create a engine here } + void BuildEngine() { + for (size_t nid = 0; nid < this->nodes_.size(); ++nid) { + const auto& node = nodes_[nid]; + if (node.GetOpType() == "input") { + // Handle inputs + } else { + CHECK_EQ(node.GetOpType(), "kernel"); + auto op_name = node.GetOpName(); + // Handle kernel + for (const auto& e : node.GetInputs()) { + // uint32_t eid = this->EntryID(e); + // shape/type for the i-th input + // std::vector shape = node.GetShape()[e.index_]; + // DLDataType dltype = node.GetDataType()[e.index_]; + } + } + } + } + + void Conv2d() { + } + + void Dense() { + } + + void BatchNorm() { + } + + void Relu() { + } + + // Macro for add, subtract, multiply... + private: - // Engine + /* The dnnl engine. */ + dnnl::engine engine_; + /* The dnnl stream. */ + dnnl::stream stream_; + /* The network layers that are represented in dnnl primitives. */ + std::vector net_; + /* The memory that is consumed by arguments. */ + std::vector> net_args_; }; TVM_REGISTER_GLOBAL("runtime.ext.dnnl") diff --git a/src/runtime/json/json_node.h b/src/runtime/json/json_node.h index 3516cd220fbf..dec2d3b8228d 100644 --- a/src/runtime/json/json_node.h +++ b/src/runtime/json/json_node.h @@ -203,6 +203,41 @@ class JSONGraphNode { */ uint32_t GetNumOutput() const { return num_outputs_; } + /*! + * \brief Return the input entries. + * + * \return The input entries. + */ + std::vector GetInputs() const { return inputs_; } + + /*! + * \brief Return the op type. + * + * \return The op type. + */ + std::string GetOpType() const { return op_type_; } + + /*! + * \brief Return the op name. + * + * \return The op name. + */ + std::string GetOpName() const { return name_; } + + /*! + * \brief Return the op output shapes. + * + * \return The shapes. + */ + std::vector> GetOpShape() const { return shape_; } + + /*! + * \brief Return the op types. + * + * \return The types. + */ + std::vector GetOpDataType() const { return dtype_; } + /*! * \brief Set the number of outputs of the node. * diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc index 072a184d2f06..3ed34bbac44c 100644 --- a/src/runtime/json/json_runtime_driver.cc +++ b/src/runtime/json/json_runtime_driver.cc @@ -59,7 +59,7 @@ class JSONRuntimeDriver : public ModuleNode { Deserialize(); } - const char* type_key() const { return "JSONRuntime"; } + const char* type_key() const { return "jsonruntime"; } /*! * \brief Get a packed function. From e273c1068be42c37130ddddb0083e3d2f55860c3 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Wed, 10 Jun 2020 18:58:55 +0000 Subject: [PATCH 03/30] fix ArrayNode usages --- src/relay/backend/contrib/codegen_json/codegen_json.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc index bed5206aac67..acdd0d5d8a14 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.cc +++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc @@ -112,16 +112,16 @@ class OpAttrExtractor : public AttrVisitor { void Visit(const char* key, runtime::ObjectRef* value) final { if (const auto* an = (*value).as()) { std::vector attr; - for (size_t i = 0; i < an->data.size(); ++i) { - if (const auto* im = an->data[i].as()) { + for (size_t i = 0; i < an->size(); ++i) { + if (const auto* im = (*an)[i].as()) { attr.push_back(std::to_string(im->value)); - } else if (const auto* fm = an->data[i].as()) { + } else if (const auto* fm = (*an)[i].as()) { attr.push_back(Fp2String(fm->value)); - } else if (const auto* str = an->data[i].as()) { + } else if (const auto* str = (*an)[i].as()) { String s = GetRef(str); attr.push_back(s.operator std::string()); } else { - LOG(FATAL) << "Not supported type: " << an->data[i]->GetTypeKey(); + LOG(FATAL) << "Not supported type: " << (*an)[i]->GetTypeKey(); } } SetNodeAttr(key, attr); From cae8b59167d53d77513fd78ceaae7baf792f2ad2 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Wed, 10 Jun 2020 23:21:21 +0000 Subject: [PATCH 04/30] Support composite functions --- .../backend/contrib/codegen_json/codegen_json.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.cc index acdd0d5d8a14..bd42ed568b94 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.cc +++ b/src/relay/backend/contrib/codegen_json/codegen_json.cc @@ -249,8 +249,18 @@ class JSONSerializer : public MemoizedExprTranslator(cn->attrs.get())); + if (cn->op.as()) { + OpAttrExtractor extractor(node); + extractor.Extract(const_cast(cn->attrs.get())); + } else if (const auto* fn = cn->op.as()) { + auto pattern = fn->GetAttr(attr::kPartitionedFromPattern); + CHECK(pattern.defined()); + std::vector values; + values.push_back(pattern.value().operator std::string()); + std::vector attr; + attr.emplace_back(values); + node->SetAttr("PartitionedFromPattern", attr); + } } std::vector VisitExprDefault_(const Object* op) final { @@ -288,8 +298,6 @@ class JSONSerializer : public MemoizedExprTranslatorGetAttr(attr::kComposite); CHECK(comp.defined()) << "JSON runtime only supports composite functions."; name = comp.value().operator std::string(); - // TODO(zhiics) Handle composite function here. - LOG(FATAL) << "Composite function is not handled yet."; } else { LOG(FATAL) << "JSON runtime does not support calls to " << cn->op->GetTypeKey(); } From 9594047d73d446b0f09d1c262476ad0b9e23a64c Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Fri, 12 Jun 2020 01:25:18 +0000 Subject: [PATCH 05/30] DNNL json runtime: conv2d/add/relu/dense/bn --- cmake/modules/contrib/DNNL.cmake | 3 +- .../{codegen_json.cc => codegen_json.h} | 5 +- src/relay/backend/contrib/dnnl/codegen.cc | 38 +- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 443 +++++++++++++++++- src/runtime/json/json_node.h | 2 + src/runtime/json/json_runtime.h | 2 +- src/runtime/json/json_runtime_driver.cc | 8 + tests/python/relay/test_external_runtime.py | 2 +- tests/python/relay/test_json_runtime.py | 403 ++++++++++++++++ 9 files changed, 881 insertions(+), 25 deletions(-) rename src/relay/backend/contrib/codegen_json/{codegen_json.cc => codegen_json.h} (98%) create mode 100644 tests/python/relay/test_json_runtime.py diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake index 0b56bc9a9241..ac03fbcc8dd1 100644 --- a/cmake/modules/contrib/DNNL.cmake +++ b/cmake/modules/contrib/DNNL.cmake @@ -17,8 +17,9 @@ if(USE_DNNL_CODEGEN STREQUAL "ON") file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc) - file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.cc) + file(GLOB JSON_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.h) list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) + list(APPEND COMPILER_SRCS ${JSON_RELAY_CONTRIB_SRC}) find_library(EXTERN_LIBRARY_DNNL dnnl) list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.cc b/src/relay/backend/contrib/codegen_json/codegen_json.h similarity index 98% rename from src/relay/backend/contrib/codegen_json/codegen_json.cc rename to src/relay/backend/contrib/codegen_json/codegen_json.h index bd42ed568b94..093d076c43b7 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.cc +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -18,9 +18,11 @@ */ /*! - * \file relay/backend/contrib/codegen_json.cc + * \file relay/backend/contrib/codegen_json.h * \brief Utilities for json codegen and runtime */ +#ifndef TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_ +#define TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_ #include #include @@ -365,3 +367,4 @@ TVM_REGISTER_GLOBAL("relay.analysis.ToJSON") } // namespace relay } // namespace tvm +#endif // TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_ diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 60138ae99b3e..be37899b7180 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -35,6 +35,7 @@ #include "../../utils.h" #include "../codegen_c/codegen_c.h" +#include "../codegen_json/codegen_json.h" namespace tvm { namespace relay { @@ -417,13 +418,46 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { std::ostringstream code_stream_; }; +/*! + * \brief Get the external symbol of the Relay function name. + * + * \param func The provided function. + * + * \return An external symbol. + */ +std::string GetExtSymbol(const Function& func) { + const auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); + CHECK(name_node.defined()) << "Fail to retrieve external symbol."; + return std::string(name_node.value()); +} + /*! * \brief The external compiler/codegen tool. It takes a Relay expression/module and * compile it into a runtime module. */ runtime::Module DNNLCompiler(const ObjectRef& ref) { - DNNLModuleCodegen dnnl; - return dnnl.CreateCSourceModule(ref); + std::string func_name; + std::string graph_json; + if (ref->IsInstance()) { + auto func = Downcast(ref); + func_name = GetExtSymbol(func); + graph_json = ToJSON(func); + } else if (ref->IsInstance()) { + IRModule mod = Downcast(ref); + CHECK_EQ(mod->functions.size(), 1U) << "Only support single subgraph"; + for (const auto& it : mod->functions) { + auto func = Downcast(it.second); + func_name = GetExtSymbol(func); + graph_json = ToJSON(func); + } + } else { + LOG(FATAL) << "The input ref is expected to be a Relay function or module\n"; + } + + const auto* pf = runtime::Registry::Get("runtime.DNNLJSONRuntimeCreate"); + CHECK(pf != nullptr) << "Cannot find JSON runtime driver module to create"; + auto mod = (*pf)(func_name, graph_json); + return mod; } TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler); diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 503ade7c08a7..05d308245a45 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -44,67 +44,472 @@ class DNNLJSONRuntime : public JSONRuntimeBase { using dt = dnnl::memory::data_type; public: - explicit DNNLJSONRuntime(const std::string& graph_json) : JSONRuntimeBase(graph_json) {} + explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json) + : JSONRuntimeBase(graph_json), func_name_(func_name) {} ~DNNLJSONRuntime() = default; + const char* type_key() const { return "dnnljsonruntime"; } + + PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { + if (!this->is_init_) { + Init(); + BuildEngine(); + } + this->is_init_ = true; + + if (this->func_name_ == name) { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + for (auto i = 0; i < args.size(); ++i) { + // Setup data entries. + CHECK(args[i].type_code() == kTVMNDArrayHandle || + args[i].type_code() == kTVMDLTensorHandle) + << "Expect NDArray or DLTensor as inputs\n"; + if (args[i].type_code() == kTVMDLTensorHandle) { + DLTensor* arg = args[i]; + this->data_entry_[i].CopyFrom(arg); + } else { + NDArray arg = args[i]; + this->data_entry_[i].CopyFrom(arg); + } + } + + // Execute the subgraph. + this->Run(); + + // Get result. + auto offset = this->input_nodes_.size(); + for (size_t i = 0; i < this->outputs_.size(); ++i) { + size_t idx = i + offset; + if (args[idx].type_code() == kTVMDLTensorHandle) { + DLTensor* arg = args[idx]; + this->data_entry_[idx].CopyTo(arg); + } else { + NDArray arg = args[idx]; + this->data_entry_[idx].CopyTo(arg); + } + } + + // FIXME: Multiple outputs. + //*rv = data_entry_.back(); + }); + } else { + LOG(WARNING) << "Unknown DNNL symbol " << name; + return PackedFunc(); + } + } + void Run() override { - // Invoke the engine and return the result + // Fill in the input buffers. + for (size_t i = 0; i < this->input_nodes_.size(); ++i) { + auto nid = this->input_nodes_[i]; + // TODO: Support other data lengths. + size_t offset_in_bytes = this->node_out_mem_[nid][0].second * 4; + write_to_dnnl_memory(this->data_entry_[i]->data, this->node_out_mem_[nid][0].first, + GetNDArraySize(this->data_entry_[i]), offset_in_bytes); + } + + // Invoke the engine. + for (size_t i = 0; i < net_.size(); ++i) { + net_.at(i).execute(stream_, net_args_.at(i)); + } + stream_.wait(); + + // Read output buffers. + auto offset = this->input_nodes_.size(); + for (size_t i = 0; i < this->outputs_.size(); ++i) { + auto out_entry = this->outputs_[i]; + auto nid = out_entry.id_; + auto idx = out_entry.index_; + size_t offset_in_bytes = this->node_out_mem_[nid][idx].second * 4; + read_from_dnnl_memory(this->data_entry_[offset + i]->data, + this->node_out_mem_[nid][idx].first, + GetNDArraySize(this->data_entry_[offset + i]), offset_in_bytes); + } } void Init() override { - // Create a engine here + engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0); + stream_ = dnnl::stream(engine_); } void BuildEngine() { + // Build subgraph engine. for (size_t nid = 0; nid < this->nodes_.size(); ++nid) { const auto& node = nodes_[nid]; - if (node.GetOpType() == "input") { - // Handle inputs - } else { + if (node.GetOpType() == "kernel") { CHECK_EQ(node.GetOpType(), "kernel"); auto op_name = node.GetOpName(); - // Handle kernel - for (const auto& e : node.GetInputs()) { - // uint32_t eid = this->EntryID(e); - // shape/type for the i-th input - // std::vector shape = node.GetShape()[e.index_]; - // DLDataType dltype = node.GetDataType()[e.index_]; + if ("nn.conv2d" == op_name) { + Conv2d(nid); + } else if ("nn.dense" == op_name) { + Dense(nid); + } else if ("nn.batch_norm" == op_name) { + BatchNorm(nid); + } else if ("nn.relu" == op_name) { + Relu(nid); + } else if ("add" == op_name) { + Add(nid); + } else { + LOG(FATAL) << "Unsupported op: " << op_name; } } } + + // Initialize input/output entries. + DLContext ctx; + ctx.device_type = static_cast(1); + ctx.device_id = 0; + for (size_t i = 0; i < this->input_nodes_.size(); ++i) { + auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0]; + this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); + } + for (size_t i = 0; i < this->outputs_.size(); ++i) { + auto entry = this->outputs_[i]; + auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; + this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); + } } - void Conv2d() { +private: + // Bind a JSON graph node entry to a DNNL memory. + dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc, + size_t offset = 0) { + if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) { + return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset); + } + return node_out_mem_[entry.id_][entry.index_].first; + } + + // Bind a JSON graph node entry to a given DNNL memory. + dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem, + size_t offset = 0) { + // Since the DNNL memory has been created before calling this function, we assume the entry + // has not yet been bind to the other DNNL memory; otherwise it may have memory leak. + CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0); + + // TODO: Support other data types (i.e., int8). + auto data_node = nodes_[entry.id_]; + auto dltype = data_node.GetOpDataType()[entry.index_]; + CHECK_EQ(dltype.bits, 32); + + node_out_mem_[entry.id_][entry.index_] = {mem, offset}; + return node_out_mem_[entry.id_][entry.index_].first; + } + + void Conv2d(const size_t& nid) { + auto node = this->nodes_[nid]; + + // Setup attributes. + auto data_entry = node.GetInputs()[0]; + auto weight_entry = node.GetInputs()[1]; + dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dims weight_shape = + this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; + std::vector str_strides = node.GetAttr>("strides"); + std::vector str_padding = node.GetAttr>("padding"); + int groups = std::stoi(node.GetAttr>("groups")[0]); + + dnnl::memory::dim N = input_shape[0], // batch size + IC = input_shape[1], // input channels + IH = input_shape[2], // input height + IW = input_shape[2], // input width + OC = weight_shape[0], // output channels + KH = weight_shape[2], // weight height + KW = weight_shape[3], // weight width + PH_L = std::stoi(str_padding[1]), // height padding: left + PH_R = std::stoi(str_padding[3]), // height padding: right + PW_L = std::stoi(str_padding[0]), // width padding: left + PW_R = std::stoi(str_padding[2]), // width padding: right + SH = std::stoi(str_strides[0]), // height-wise stride + SW = std::stoi(str_strides[0]), // weight-wise stride + OH = (IH - KH + PH_L + PH_R) / SH + 1, // output height + OW = (IW - KW + PW_L + PW_R) / SW + 1; // output width + // std::cerr << N << ", " << IC << ", " << IH << ", " << IW << "\n"; + // std::cerr << OC << ", " << IC << ", " << KH << ", " << KW << "\n"; + // std::cerr << PH_L << ", " << PH_R << ", " << PW_L << ", " << PW_R << "\n"; + // std::cerr << SH << ", " << SW << "\n"; + // std::cerr << OH << ", " << OW << "\n"; + + // Memory shapes. + dnnl::memory::dims src_dims = {N, IC, IH, IW}; + dnnl::memory::dims weights_dims = {OC, IC, KH, KW}; + if (groups > 1) { + weights_dims = {groups, 1, IC / groups, KH, KW}; + } + dnnl::memory::dims bias_dims = {OC}; + dnnl::memory::dims dst_dims = {N, OC, OH, OW}; + dnnl::memory::dims strides_dims = {SH, SW}; + dnnl::memory::dims padding_dims_l = {PH_L, PW_L}; + dnnl::memory::dims padding_dims_r = {PH_R, PW_R}; + + // Memory descriptions. + auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, tag::any); + auto conv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, tag::any); + auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, tag::nchw); + auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::a); + + // Covn2d description. + auto conv_desc = dnnl::convolution_forward::desc( + dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md, + conv_weights_md, conv_bias_md, conv_dst_md, strides_dims, padding_dims_l, padding_dims_r); + dnnl::primitive_attr attr; + auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_); + + // Push to the network. + auto conv = dnnl::convolution_forward(conv2d_prim_desc); + net_.push_back(conv); + + // Data memory. + CHECK_EQ(node.GetAttr>("data_layout")[0], "NCHW"); + auto conv2d_src_memory = BindDNNLMemory(data_entry, {src_dims, dt::f32, tag::nchw}); + + // Weight memory. + CHECK_EQ(node.GetAttr>("kernel_layout")[0], "OIHW"); + auto conv2d_weights_memory = BindDNNLMemory( + weight_entry, {weights_dims, dt::f32, (groups > 1) ? tag::goihw : tag::oihw}); + + // Bias memory (useless for now as TVM conv2d op has no bias). + std::vector bias(OC, 0); + auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_, bias.data()); + + // Output memory. + JSONGraphNodeEntry out_entry(nid, 0); + auto conv2d_dst_memory = BindDNNLMemory(out_entry, conv2d_prim_desc.dst_desc()); + + // Bind memory buffers. + net_args_.push_back({{DNNL_ARG_SRC, conv2d_src_memory}, + {DNNL_ARG_WEIGHTS, conv2d_weights_memory}, + {DNNL_ARG_BIAS, conv2d_bias_memory}, + {DNNL_ARG_DST, conv2d_dst_memory}}); } - void Dense() { + void Dense(const size_t& nid) { + auto node = this->nodes_[nid]; + + // Setup attributes. + auto data_entry = node.GetInputs()[0]; + auto weight_entry = node.GetInputs()[1]; + dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dims weight_shape = + this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; + + dnnl::memory::dim B = input_shape[0], // batch size + IC = input_shape[1], // input channels + OC = weight_shape[0]; // output channels + + // Memory shapes. + dnnl::memory::dims data_dims = {B, IC}; + dnnl::memory::dims weight_dims = {OC, IC}; + dnnl::memory::dims bias_dims = {OC}; + dnnl::memory::dims out_dims = {B, OC}; + + // Memory descriptions. + auto data_md = dnnl::memory::desc({data_dims, dt::f32, tag::nc}); + auto weight_md = dnnl::memory::desc({weight_dims, dt::f32, tag::nc}); + auto bias_md = dnnl::memory::desc({bias_dims, dt::f32, tag::x}); + auto dst_md = dnnl::memory::desc({out_dims, dt::f32, tag::nc}); + + // Dense description. + auto dense_desc = dnnl::inner_product_forward::desc(dnnl::prop_kind::forward_inference, data_md, + weight_md, bias_md, dst_md); + auto dense_prim_desc = dnnl::inner_product_forward::primitive_desc(dense_desc, engine_); + + auto dense = dnnl::inner_product_forward(dense_prim_desc); + net_.push_back(dense); + + // Memories. + std::vector bias(OC, 0); + auto data_memory = BindDNNLMemory(data_entry, data_md); + auto weight_memory = BindDNNLMemory(weight_entry, weight_md); + auto bias_memory = dnnl::memory(bias_md, engine_, bias.data()); + JSONGraphNodeEntry out_entry(nid, 0); + auto dst_memory = BindDNNLMemory(out_entry, dense_prim_desc.dst_desc()); + + net_args_.push_back({{DNNL_ARG_SRC, data_memory}, + {DNNL_ARG_WEIGHTS, weight_memory}, + {DNNL_ARG_BIAS, bias_memory}, + {DNNL_ARG_DST, dst_memory}}); } - void BatchNorm() { + void BatchNorm(const size_t& nid) { + auto node = this->nodes_[nid]; + + auto data_entry = node.GetInputs()[0]; + auto gamma_entry = node.GetInputs()[1]; + auto beta_entry = node.GetInputs()[2]; + auto mean_entry = node.GetInputs()[3]; + auto variance_entry = node.GetInputs()[4]; + dnnl::memory::dims data_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dim IC = data_shape[1]; + float epsilon = std::stof(node.GetAttr>("epsilon")[0]); + + // Memory description. + dnnl::memory::desc data_md = GenDNNLMemDescByShape(data_shape, dt::f32); + + // BN description. + auto bn_desc = dnnl::batch_normalization_forward::desc( + dnnl::prop_kind::forward_inference, data_md, epsilon, + dnnl::normalization_flags::use_global_stats | dnnl::normalization_flags::use_scale_shift); + auto bn_prim_desc = dnnl::batch_normalization_forward::primitive_desc(bn_desc, engine_); + auto bn = dnnl::batch_normalization_forward(bn_prim_desc); + net_.push_back(bn); + + // Memories. + auto data_memory = BindDNNLMemory(data_entry, data_md); + JSONGraphNodeEntry out_entry(nid, 0); + auto out_memory = BindDNNLMemory(out_entry, data_md); + auto mean_memory = BindDNNLMemory(mean_entry, bn_prim_desc.mean_desc()); + auto variance_memory = BindDNNLMemory(variance_entry, bn_prim_desc.variance_desc()); + + // In DNNL, weight is composed of gamma+beta, so we point them to the same DNNL memory but + // assign an offset to beta data for runtime serialization. + auto weight_memory = BindDNNLMemory(gamma_entry, bn_prim_desc.weights_desc(), 0); + BindDNNLMemory(beta_entry, weight_memory, IC); + + net_args_.push_back({{DNNL_ARG_SRC, data_memory}, + {DNNL_ARG_DST, out_memory}, + {DNNL_ARG_SCALE_SHIFT, weight_memory}, + {DNNL_ARG_MEAN, mean_memory}, + {DNNL_ARG_VARIANCE, variance_memory}}); } - void Relu() { + void Relu(const size_t& nid) { + auto node = this->nodes_[nid]; + + auto data_entry = node.GetInputs()[0]; + dnnl::memory::dims shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + auto data_md = dnnl::memory::desc{{shape}, dt::f32, tag::abcd}; + + auto relu_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference, + dnnl::algorithm::eltwise_relu, data_md, 0); + auto relu_prim_desc = dnnl::eltwise_forward::primitive_desc(relu_desc, engine_); + CHECK(data_md == relu_prim_desc.dst_desc()); + + auto relu = dnnl::eltwise_forward(relu_prim_desc); + net_.push_back(relu); + + auto data_memory = BindDNNLMemory(data_entry, data_md); + auto out_md = dnnl::memory::desc(shape, dt::f32, tag::abcd); + JSONGraphNodeEntry out_entry(nid, 0); + auto out_memory = BindDNNLMemory(out_entry, out_md); + + net_args_.push_back({{DNNL_ARG_SRC, data_memory}, {DNNL_ARG_DST, out_memory}}); } - // Macro for add, subtract, multiply... + void Add(const size_t& nid) { + auto node = this->nodes_[nid]; + + // Memory and compute description. + std::vector data_dims; + std::vector data_mds; + std::vector data_memories; + + CHECK_EQ(node.GetInputs().size(), 2U); + for (auto entry : node.GetInputs()) { + auto data_shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; + dnnl::memory::desc data_md = GenDNNLMemDescByShape(data_shape, dt::f32); + + data_dims.push_back(data_shape); + data_mds.push_back(data_md); + data_memories.push_back(BindDNNLMemory(entry, data_md)); + } + CHECK(data_dims[0] == data_dims[1]); + auto out_md = data_mds[0]; + JSONGraphNodeEntry out_entry(nid, 0); + auto out_memory = BindDNNLMemory(out_entry, out_md); + + auto add_desc = + dnnl::binary::desc(dnnl::algorithm::binary_add, data_mds[0], data_mds[1], out_md); + auto add_prim_desc = dnnl::binary::primitive_desc(add_desc, engine_); + auto add = dnnl::binary(add_prim_desc); + net_.push_back(add); + + net_args_.push_back({{DNNL_ARG_SRC_0, data_memories[0]}, + {DNNL_ARG_SRC_1, data_memories[1]}, + {DNNL_ARG_DST, out_memory}}); + } + + // Read from DNNL memory (+offset) and write to the handle. + inline void read_from_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size, + size_t offset = 0) { + uint8_t* src = static_cast(mem.get_data_handle()); + std::copy(src + offset, src + size, reinterpret_cast(handle)); + } + + // Read from the handle and write to DNNL memory (+offset). + inline void write_to_dnnl_memory(void* handle, dnnl::memory& mem, size_t size, + size_t offset = 0) { + uint8_t* dst = static_cast(mem.get_data_handle()); + std::copy((uint8_t*)handle, (uint8_t*)handle + size, dst + offset); + } + +// Generate DNNL memory description and infer the data layout by the given shape. +inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape, dt dtype) { + dnnl::memory::desc data_md; + switch (shape.size()) { + case 2: + data_md = dnnl::memory::desc({shape, dtype, tag::ab}); + break; + case 3: + data_md = dnnl::memory::desc({shape, dtype, tag::abc}); + break; + case 4: + data_md = dnnl::memory::desc({shape, dtype, tag::abcd}); + break; + case 5: + data_md = dnnl::memory::desc({shape, dtype, tag::abcde}); + break; + default: + LOG(FATAL) << "Unsupported data shape dimension: " << shape.size(); + break; + } + return data_md; +} + +// Calculate the size of a given NDArray in bytes. +inline size_t GetNDArraySize(const NDArray& arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= static_cast(arr->shape[i]); + } + size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8; + return size; +} - private: /* The dnnl engine. */ dnnl::engine engine_; /* The dnnl stream. */ dnnl::stream stream_; + /* \brief A simple pool to contain the tensor for each node in the graph. */ + std::vector data_entry_; /* The network layers that are represented in dnnl primitives. */ std::vector net_; /* The memory that is consumed by arguments. */ std::vector> net_args_; + /* The node ID to its corresponding output memory. */ + std::unordered_map < uint32_t, + std::unordered_map>> node_out_mem_; + /* Indicate if the DNNL engine has been initialized. */ + bool is_init_ = false; + /* The only subgraph name for this module. */ + std::string func_name_; }; TVM_REGISTER_GLOBAL("runtime.ext.dnnl") .set_body([](TVMArgs args, TVMRetValue* rv) { - auto n = tvm::runtime::make_object(args[0].operator std::string()); + auto n = tvm::runtime::make_object( + args[0].operator std::string(), args[1].operator std::string()); *rv = Module(n); }); +runtime::Module DNNLJSONRuntimeCreate(std::string func_name, std::string graph_json) { + auto n = make_object(func_name, graph_json); + return runtime::Module(n); +} + +TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate") +.set_body_typed(DNNLJSONRuntimeCreate); + } // namespace contrib } // namespace runtime } // namespace tvm diff --git a/src/runtime/json/json_node.h b/src/runtime/json/json_node.h index dec2d3b8228d..fd45ab226e19 100644 --- a/src/runtime/json/json_node.h +++ b/src/runtime/json/json_node.h @@ -84,7 +84,9 @@ class JSONGraphNodeEntry { } } + /*! \brief The json graph node ID. */ uint32_t id_; + /*! \brief The entry index. */ uint32_t index_; uint32_t version_; }; diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h index 6765fa49e0e1..a8182a205eae 100644 --- a/src/runtime/json/json_runtime.h +++ b/src/runtime/json/json_runtime.h @@ -54,7 +54,7 @@ class JSONRuntimeBase : public ModuleNode { // The type key of each subclass can be saved to the json file and them // used to create the specific runtime during deserialization. // virtual const char* type_key() const = 0; - const char* type_key() const { return ""; } + const char* type_key() const { return "jsonruntime"; } virtual void Init() { LOG(FATAL) << "NYI"; } diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc index 3ed34bbac44c..8436a477e914 100644 --- a/src/runtime/json/json_runtime_driver.cc +++ b/src/runtime/json/json_runtime_driver.cc @@ -204,6 +204,14 @@ class JSONRuntimeDriver : public ModuleNode { TVM_REGISTER_GLOBAL("runtime.module.loadbinary_jsonruntime") .set_body_typed(JSONRuntimeDriver::LoadFromBinary); +runtime::Module JSONRuntimeDriverCreate(std::string graph_json) { + auto n = make_object(graph_json); + return runtime::Module(n); +} + +TVM_REGISTER_GLOBAL("runtime.JSONRuntimeDriverCreate") +.set_body_typed(JSONRuntimeDriverCreate); + } // namespace json } // namespace runtime } // namespace tvm diff --git a/tests/python/relay/test_external_runtime.py b/tests/python/relay/test_external_runtime.py index 7928e4d61b37..7c6199a87bfe 100644 --- a/tests/python/relay/test_external_runtime.py +++ b/tests/python/relay/test_external_runtime.py @@ -479,7 +479,7 @@ def test_engine_extern(): options=["-O2", "-std=c++14", "-I" + tmp_path.relpath("")]) def test_json_extern(): - if not tvm.get_global_func("module.loadfile_examplejson", True): + if not tvm.get_global_func("runtime.module.loadfile_examplejson", True): print("Skip because JSON example runtime is not enabled.") return diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py new file mode 100644 index 000000000000..dd318f1a47fa --- /dev/null +++ b/tests/python/relay/test_json_runtime.py @@ -0,0 +1,403 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for JSON codegen and runtime.""" +import os +import sys + +import numpy as np + +import tvm +import tvm.relay.op as reg +import tvm.relay.testing +from tvm import relay, runtime +from tvm.contrib import util +from tvm.relay import transform +from tvm.relay.analysis.analysis import to_json +from tvm.relay.backend import compile_engine +from tvm.relay.build_module import bind_params_by_name + + +def set_func_attr(func, compile_name, symbol_name): + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", compile_name) + func = func.with_attr("global_symbol", symbol_name) + return func + + +def check_result(mod, + ref_mod, + map_inputs, + out_shape, + tol=1e-5, + target="llvm", + ctx=tvm.cpu(), + params=None): + if sys.platform == "win32": + print("Skip test on Windows for now") + return + + # Run the reference result + compile_engine.get().clear() + with relay.build_config(opt_level=3): + json, lib, param = relay.build(ref_mod, target=target, params=params) + rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) + + for name, data in map_inputs.items(): + rt_mod.set_input(name, data) + rt_mod.set_input(**param) + rt_mod.run() + out = tvm.nd.empty(out_shape, ctx=ctx) + out = rt_mod.get_output(0, out) + ref_result = out.asnumpy() + + def check_vm_result(): + compile_engine.get().clear() + with relay.build_config(opt_level=3): + exe = relay.vm.compile(mod, target=target, params=params) + code, lib = exe.save() + exe = runtime.vm.Executable.load_exec(code, lib) + vm = runtime.vm.VirtualMachine(exe) + vm.init(ctx) + out = vm.run(**map_inputs) + tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol) + + def check_graph_runtime_result(): + compile_engine.get().clear() + with relay.build_config(opt_level=3): + json, lib, param = relay.build(mod, target=target, params=params) + rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) + + for name, data in map_inputs.items(): + rt_mod.set_input(name, data) + rt_mod.set_input(**param) + rt_mod.run() + out = tvm.nd.empty(out_shape, ctx=ctx) + out = rt_mod.get_output(0, out) + tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol) + + check_vm_result() + check_graph_runtime_result() + + +def test_conv2d(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + def conv2d_direct(): + dtype = 'float32' + ishape = (1, 32, 14, 14) + w1shape = (32, 32, 3, 3) + + data0 = relay.var("data", shape=ishape, dtype=dtype) + weight0 = relay.var("weight", shape=w1shape, dtype=dtype) + out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1)) + + func = relay.Function([data0, weight0], out) + func = set_func_attr(func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = func + + data = relay.var("data", shape=(ishape), dtype=dtype) + weight = relay.var("weight", shape=(w1shape), dtype=dtype) + main_f = relay.Function([data, weight], glb_var(data, weight)) + mod["main"] = main_f + + data0 = relay.var("data", shape=ishape, dtype=dtype) + weight0 = relay.var("weight", shape=w1shape, dtype=dtype) + out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1)) + main_f = relay.Function([data0, weight0], out) + ref_mod = tvm.IRModule() + ref_mod['main'] = main_f + + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) + + return mod, ref_mod, {"data": i_data, "weight": w1_data}, (1, 32, 14, 14) + + def group_conv2d(): + dtype = 'float32' + ishape = (1, 32, 14, 14) + w2shape = (32, 1, 3, 3) + + data0 = relay.var("data", shape=(ishape), dtype=dtype) + weight0 = relay.var("weight", shape=(w2shape), dtype=dtype) + out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=32) + + func = relay.Function([data0, weight0], out) + func = set_func_attr(func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = func + + data = relay.var("data", shape=(ishape), dtype=dtype) + weight = relay.var("weight", shape=(w2shape), dtype=dtype) + main_f = relay.Function([data, weight], glb_var(data, weight)) + mod["main"] = main_f + + data0 = relay.var("data", shape=(ishape), dtype=dtype) + weight0 = relay.var("weight", shape=(w2shape), dtype=dtype) + out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=32) + main_f = relay.Function([data0, weight0], out) + ref_mod = tvm.IRModule() + ref_mod['main'] = main_f + + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + w_data = np.random.uniform(0, 1, w2shape).astype(dtype) + + return mod, ref_mod, {"data": i_data, "weight": w_data}, (1, 32, 14, 14) + + for mod, ref_mod, map_inputs, out_shape in [conv2d_direct(), group_conv2d()]: + # FIXME: Check accuracy. Current avg error: ~0.03 + check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-1) + + +def test_add(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + shape = (10, 10) + + def gen_add(): + data0 = relay.var("data0", shape=shape, dtype=dtype) + data1 = relay.var("data1", shape=shape, dtype=dtype) + out = relay.add(data0, data1) + + func = relay.Function([data0, data1], out) + func = set_func_attr(func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = func + + data0 = relay.var("data0", shape=shape, dtype=dtype) + data1 = relay.var("data1", shape=shape, dtype=dtype) + main_f = relay.Function([data0, data1], glb_var(data0, data1)) + mod["main"] = main_f + + data0 = relay.var("data0", shape=shape, dtype=dtype) + data1 = relay.var("data1", shape=shape, dtype=dtype) + out = relay.add(data0, data1) + main_f = relay.Function([data0, data1], out) + ref_mod = tvm.IRModule() + ref_mod["main"] = main_f + + return mod, ref_mod + + mod, ref_mod = gen_add() + + data0 = np.random.uniform(0, 1, shape).astype(dtype) + data1 = np.random.uniform(0, 1, shape).astype(dtype) + check_result(mod, ref_mod, {"data0": data0, "data1": data1}, shape, tol=1e-5) + + +def test_relu(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + shape = (1, 32, 14, 14) + + def gen_relu(): + data0 = relay.var("data0", shape=shape, dtype=dtype) + out = relay.nn.relu(data0) + + func = relay.Function([data0], out) + func = set_func_attr(func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = func + + data0 = relay.var("data0", shape=shape, dtype=dtype) + main_f = relay.Function([data0], glb_var(data0)) + mod["main"] = main_f + + data0 = relay.var("data0", shape=shape, dtype=dtype) + out = relay.nn.relu(data0) + main_f = relay.Function([data0], out) + ref_mod = tvm.IRModule() + ref_mod["main"] = main_f + + return mod, ref_mod + + mod, ref_mod = gen_relu() + + data0 = np.random.uniform(-1, 1, shape).astype(dtype) + check_result(mod, ref_mod, {"data0": data0,}, (1, 32, 14, 14), tol=1e-5) + + +def test_dense(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + a_shape = (1, 512) + b_shape = (1024, 512) + + def gen_dense(): + a = relay.var("A", shape=a_shape, dtype=dtype) + b = relay.var("B", shape=b_shape, dtype=dtype) + out = relay.nn.dense(a, b) + + func = relay.Function([a, b], out) + func = set_func_attr(func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = func + + a = relay.var("A", shape=a_shape, dtype=dtype) + b = relay.var("B", shape=b_shape, dtype=dtype) + main_f = relay.Function([a, b], glb_var(a, b)) + mod["main"] = main_f + + a = relay.var("A", shape=a_shape, dtype=dtype) + b = relay.var("B", shape=b_shape, dtype=dtype) + out = relay.nn.dense(a, b) + main_f = relay.Function([a, b], out) + ref_mod = tvm.IRModule() + ref_mod["main"] = main_f + + return mod, ref_mod + + mod, ref_mod = gen_dense() + + data_a = np.random.uniform(0, 1, a_shape).astype(dtype) + data_b = np.random.uniform(0, 1, b_shape).astype(dtype) + check_result(mod, ref_mod, {"A": data_a, "B": data_b}, (1, 1024), tol=1e-5) + + +def test_bn(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + d_shape = (1, 8) + c_shape = (8, ) + + def gen_bn(): + data = relay.var('data', shape=d_shape) + gamma = relay.var("gamma", shape=c_shape) + beta = relay.var("beta", shape=c_shape) + moving_mean = relay.var("moving_mean", shape=c_shape) + moving_var = relay.var("moving_var", shape=c_shape) + bn = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var) + out = bn[0] + + func = relay.Function([data, gamma, beta, moving_mean, moving_var], out) + func = set_func_attr(func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = func + + data = relay.var('data', shape=d_shape) + gamma = relay.var("gamma", shape=c_shape) + beta = relay.var("beta", shape=c_shape) + moving_mean = relay.var("moving_mean", shape=c_shape) + moving_var = relay.var("moving_var", shape=c_shape) + main_f = relay.Function([data, gamma, beta, moving_mean, moving_var], + glb_var(data, gamma, beta, moving_mean, moving_var)) + mod["main"] = main_f + + data = relay.var('data', shape=d_shape) + gamma = relay.var("gamma", shape=c_shape) + beta = relay.var("beta", shape=c_shape) + moving_mean = relay.var("moving_mean", shape=c_shape) + moving_var = relay.var("moving_var", shape=c_shape) + bn = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var) + out = bn[0] + main_f = relay.Function([data, gamma, beta, moving_mean, moving_var], out) + ref_mod = tvm.IRModule() + ref_mod["main"] = main_f + + return mod, ref_mod + + mod, ref_mod = gen_bn() + + data = np.random.uniform(-1, 1, d_shape).astype(dtype) + gamma = np.random.uniform(-1, 1, c_shape).astype(dtype) + beta = np.random.uniform(-1, 1, c_shape).astype(dtype) + moving_mean = np.random.uniform(-1, 1, c_shape).astype(dtype) + moving_var = np.random.uniform(-1, 1, c_shape).astype(dtype) + check_result(mod, + ref_mod, { + "data": data, + "gamma": gamma, + "beta": beta, + "moving_mean": moving_mean, + "moving_var": moving_var + }, + d_shape, + tol=1e-5) + + +def test_composite(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + ishape = (1, 32, 14, 14) + w1shape = (32, 32, 3, 3) + + def after_partition(): + # Composite function + in_1 = relay.var("in_1", shape=ishape, dtype=dtype) + in_2 = relay.var("in_2", shape=w1shape, dtype=dtype) + conv2d = relay.nn.conv2d(in_1, in_2, kernel_size=(3, 3), padding=(1, 1)) + relu = relay.nn.relu(conv2d) + func = relay.Function([in_1, in_2], relu) + func = func.with_attr('Composite', 'conv2d_relu') + func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_nn.relu_') + + # Partition function + arg_1 = relay.var("arg_1", shape=ishape, dtype=dtype) + arg_2 = relay.var("arg_2", shape=w1shape, dtype=dtype) + call = relay.Call(func, [arg_1, arg_2]) + p_func = relay.Function([arg_1, arg_2], call) + p_func = set_func_attr(p_func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = p_func + + # Main function + data = relay.var("data", shape=ishape, dtype=dtype) + weight = relay.var("input", shape=w1shape, dtype=dtype) + main_func = relay.Function([data, weight], glb_var(data, weight)) + mod["main"] = main_func + return mod + + mod = after_partition() + for global_var, func in mod.functions.items(): + if global_var.name_hint != 'main': + print(global_var) + print(to_json(func)) + + + +if __name__ == "__main__": + test_conv2d() + test_add() + test_relu() + test_dense() + test_bn() + #test_composite() From e1353a8ec12a72677a414c4ac5c3a537d5aab9ed Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Tue, 16 Jun 2020 01:11:35 +0000 Subject: [PATCH 06/30] add a more complex example --- tests/python/relay/test_json_runtime.py | 56 +++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index dd318f1a47fa..afd5d56ab5ed 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -350,6 +350,61 @@ def gen_bn(): tol=1e-5) +def test_multiple_ops(): + if not tvm.get_global_func("runtime.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + ishape = (1, 32, 14, 14) + w1shape = (32, 32, 3, 3) + w2shape = (64, 32, 5, 5) + + def get_net(): + data = relay.var("data", relay.TensorType(ishape, dtype)) + w1 = relay.var("w1", relay.TensorType(w1shape, dtype)) + w2 = relay.var("w2", relay.TensorType(w2shape, dtype)) + + layer = relay.nn.conv2d(data=data, weight=w1, kernel_size=(3, 3), padding=(1, 1)) + layer = relay.nn.relu(layer) + layer = relay.nn.conv2d(data=layer, weight=w2, kernel_size=(5, 5), padding=(2, 2)) + layer = relay.nn.relu(layer) + + main_f = relay.Function([data, w1, w2], layer) + mod = tvm.IRModule() + mod["main"] = main_f + return mod + + def get_partitoned_mod(mod): + remove_bn_pass = tvm.transform.Sequential([ + transform.InferType(), + transform.SimplifyInference(), + transform.FoldConstant(), + transform.FoldScaleAxis(), + ]) + byoc_pass = tvm.transform.Sequential([ + remove_bn_pass, + transform.AnnotateTarget("dnnl"), + transform.MergeCompilerRegions(), + transform.PartitionGraph() + ]) + + with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): + return byoc_pass(mod) + + ref_mod = get_net() + mod = get_partitoned_mod(ref_mod) + + data = np.random.uniform(0, 1, ishape).astype(dtype) + w1 = np.random.uniform(0, 1, w1shape).astype(dtype) + w2 = np.random.uniform(0, 1, w2shape).astype(dtype) + check_result(mod, ref_mod, { + "data": data, + "w1": w1, + "w2": w2, + }, (1, 64, 14, 14), tol=1e-5) + + def test_composite(): if not tvm.get_global_func("runtime.ext.dnnl", True): print("skip because DNNL codegen is not available") @@ -400,4 +455,5 @@ def after_partition(): test_relu() test_dense() test_bn() + test_multiple_ops() #test_composite() From 4a11ffc7dfd58f9c25c31f1dbf26b9ee771b01fa Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Fri, 19 Jun 2020 01:36:45 +0000 Subject: [PATCH 07/30] fix bias memory issue --- src/relay/backend/contrib/dnnl/codegen.cc | 3 +++ src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index be37899b7180..bf0db8863ef9 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -408,6 +408,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { const auto* pf = runtime::Registry::Get("runtime.CSourceModuleCreate"); CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module"; return (*pf)(code, "c", sym, variables); + std::cout << code_stream_.str(); } private: @@ -436,6 +437,8 @@ std::string GetExtSymbol(const Function& func) { * compile it into a runtime module. */ runtime::Module DNNLCompiler(const ObjectRef& ref) { + // DNNLModuleCodegen dnnl; + // return dnnl.CreateCSourceModule(ref); std::string func_name; std::string graph_json; if (ref->IsInstance()) { diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 05d308245a45..7dc2988d5b53 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -207,7 +207,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; std::vector str_strides = node.GetAttr>("strides"); std::vector str_padding = node.GetAttr>("padding"); - int groups = std::stoi(node.GetAttr>("groups")[0]); + dnnl::memory::dim groups = std::stoi(node.GetAttr>("groups")[0]); dnnl::memory::dim N = input_shape[0], // batch size IC = input_shape[1], // input channels @@ -245,8 +245,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Memory descriptions. auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, tag::any); auto conv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, tag::any); + auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::any); auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, tag::nchw); - auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::a); // Covn2d description. auto conv_desc = dnnl::convolution_forward::desc( @@ -270,7 +270,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Bias memory (useless for now as TVM conv2d op has no bias). std::vector bias(OC, 0); - auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_, bias.data()); + auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_); + write_to_dnnl_memory(bias.data(), conv2d_bias_memory, OC * 4); // Output memory. JSONGraphNodeEntry out_entry(nid, 0); @@ -433,7 +434,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { inline void read_from_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size, size_t offset = 0) { uint8_t* src = static_cast(mem.get_data_handle()); - std::copy(src + offset, src + size, reinterpret_cast(handle)); + std::copy(src + offset, src + offset + size, (uint8_t*)handle); } // Read from the handle and write to DNNL memory (+offset). From ae7517c00c79a3e6a2c301b83ad37a03f600ae73 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Fri, 19 Jun 2020 06:05:29 +0000 Subject: [PATCH 08/30] rebase to upstream --- src/relay/backend/contrib/codegen_json/codegen_json.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 093d076c43b7..1c916348c594 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -253,7 +253,8 @@ class JSONSerializer : public MemoizedExprTranslatorop.as()) { OpAttrExtractor extractor(node); - extractor.Extract(const_cast(cn->attrs.get())); + const Object* call_attr = cn->attrs.get(); + extractor.Extract(const_cast(call_attr)); } else if (const auto* fn = cn->op.as()) { auto pattern = fn->GetAttr(attr::kPartitionedFromPattern); CHECK(pattern.defined()); From 67ff4e3e0e16b8dbb50819241feed626c7fd3efc Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Fri, 19 Jun 2020 17:23:31 +0000 Subject: [PATCH 09/30] merge to metadata module, remove the unused driver --- python/tvm/relay/analysis/analysis.py | 4 - .../contrib/codegen_json/codegen_json.h | 107 ++++----- src/relay/backend/contrib/dnnl/codegen.cc | 27 +-- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 23 +- src/runtime/json/json_runtime.h | 2 +- src/runtime/json/json_runtime_driver.cc | 218 ------------------ tests/python/relay/test_json_runtime.py | 3 - 7 files changed, 67 insertions(+), 317 deletions(-) delete mode 100644 src/runtime/json/json_runtime_driver.cc diff --git a/python/tvm/relay/analysis/analysis.py b/python/tvm/relay/analysis/analysis.py index adbc2e6222b1..c237859eb987 100644 --- a/python/tvm/relay/analysis/analysis.py +++ b/python/tvm/relay/analysis/analysis.py @@ -313,10 +313,6 @@ def detect_feature(a, b=None): return {Feature(int(x)) for x in _ffi_api.detect_feature(a, b)} -def to_json(expr): - return _ffi_api.ToJSON(expr) - - def extract_fused_functions(mod): """Pass to extract IRModule of only fused primitive functions. diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 1c916348c594..14cb14ecf781 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -50,12 +50,6 @@ using ShapeVector = std::vector >; using TypeVector = std::vector; using JSONGraphObjectPtr = std::shared_ptr; -/*! \brief The artifacts that needs to be serialized. */ -struct JSONOutput { - std::string graph_json; - std::unordered_map params; -}; - /*! * \brief Helper class to extract all attributes of a certain op and save them * into text format. @@ -163,8 +157,13 @@ class OpAttrExtractor : public AttrVisitor { /*! \brief Serialize a Relay expression to JSON. */ class JSONSerializer : public MemoizedExprTranslator> { public: - void Serialize(const Expr& expr) { - relay::Function func = Downcast(expr); + /*! + * \brief Constructor + * + * \param expr The Relay expression to be converted to the JSON form. + */ + JSONSerializer(const std::string& symbol, const Expr& expr) : symbol_(symbol) { + relay::Function func = Downcast(expr); // First we convert all the parameters into input nodes. for (const auto& param : func->params) { auto node_ptr = std::make_shared(param->name_hint(), "input" /* op_type_ */); @@ -173,35 +172,17 @@ class JSONSerializer : public MemoizedExprTranslatorbody); } - /*! - * \brief Save to JSON graph - * - * \param writer A json writer - */ - void Save(dmlc::JSONWriter* writer) { - std::vector arg_nodes; - for (size_t i = 0; i < nodes_.size(); ++i) { - auto node = nodes_[i]; - if (node->IsLeaf()) { - arg_nodes.push_back(i); - } - } - size_t num_entry = 0; - std::vector node_row_ptr{0}; - for (auto node : nodes_) { - num_entry += node->GetNumOutput(); - node_row_ptr.push_back(num_entry); - } - writer->BeginObject(); - writer->WriteObjectKeyValue("nodes", nodes_); - writer->WriteObjectKeyValue("arg_nodes", arg_nodes); - writer->WriteObjectKeyValue("heads", heads_); - writer->WriteObjectKeyValue("node_row_ptr", node_row_ptr); - writer->EndObject(); + /*!\brief Return the required params. */ + Array GetParams() const { + return params_; } - std::unordered_map GetParams() const { - return params_; + /*!\brief Return the generated json. */ + std::string GetJSON() { + std::ostringstream os; + dmlc::JSONWriter writer(&os); + Save(&writer); + return os.str(); } protected: @@ -277,8 +258,8 @@ class JSONSerializer : public MemoizedExprTranslator VisitExpr_(const ConstantNode* cn) final { - std::string name = "const_" + std::to_string(params_.size()); - params_[name] = cn->data; + std::string name = symbol_ + "_const_" + std::to_string(params_.size()); + params_.push_back(name); auto node = std::make_shared(name, "const" /* op_type_ */); return AddNode(node, GetRef(cn)); } @@ -336,36 +317,46 @@ class JSONSerializer : public MemoizedExprTranslator arg_nodes; + for (size_t i = 0; i < nodes_.size(); ++i) { + auto node = nodes_[i]; + if (node->IsLeaf()) { + arg_nodes.push_back(i); + } + } + size_t num_entry = 0; + std::vector node_row_ptr{0}; + for (auto node : nodes_) { + num_entry += node->GetNumOutput(); + node_row_ptr.push_back(num_entry); + } + writer->BeginObject(); + writer->WriteObjectKeyValue("nodes", nodes_); + writer->WriteObjectKeyValue("arg_nodes", arg_nodes); + writer->WriteObjectKeyValue("heads", heads_); + writer->WriteObjectKeyValue("node_row_ptr", node_row_ptr); + writer->EndObject(); + } + private: + /*! \brief The symbol that represents the json graph. */ + std::string symbol_; /*! \brief JSON graph nodes. */ std::vector nodes_; /*! \brief Output of the JSON graph. */ std::vector heads_; - /*! \brief Constants. */ - std::unordered_map params_; + /*! \brief The list of required constants. */ + Array params_; }; } // namespace contrib } // namespace backend - -std::string ToJSON(const Expr& expr) { - backend::contrib::JSONSerializer converter; - converter.Serialize(expr); - - std::ostringstream os; - dmlc::JSONWriter writer(&os); - converter.Save(&writer); - backend::contrib::JSONOutput ret; - ret.graph_json = os.str(); - ret.params = converter.GetParams(); - - backend::contrib::JSONRuntimeBase jr(ret.graph_json); - return ret.graph_json; -} - -TVM_REGISTER_GLOBAL("relay.analysis.ToJSON") -.set_body_typed(ToJSON); - } // namespace relay } // namespace tvm #endif // TVM_RELAY_BACKEND_CONTRIB_CODEGEN_JSON_CODEGEN_JSON_H_ diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index bf0db8863ef9..3b6d88b31427 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -439,27 +439,16 @@ std::string GetExtSymbol(const Function& func) { runtime::Module DNNLCompiler(const ObjectRef& ref) { // DNNLModuleCodegen dnnl; // return dnnl.CreateCSourceModule(ref); - std::string func_name; - std::string graph_json; - if (ref->IsInstance()) { - auto func = Downcast(ref); - func_name = GetExtSymbol(func); - graph_json = ToJSON(func); - } else if (ref->IsInstance()) { - IRModule mod = Downcast(ref); - CHECK_EQ(mod->functions.size(), 1U) << "Only support single subgraph"; - for (const auto& it : mod->functions) { - auto func = Downcast(it.second); - func_name = GetExtSymbol(func); - graph_json = ToJSON(func); - } - } else { - LOG(FATAL) << "The input ref is expected to be a Relay function or module\n"; - } + CHECK(ref->IsInstance()); + auto func = Downcast(ref); + auto func_name = GetExtSymbol(func); + backend::contrib::JSONSerializer converter(func_name, func); + std::string graph_json = converter.GetJSON(); + auto params = converter.GetParams(); const auto* pf = runtime::Registry::Get("runtime.DNNLJSONRuntimeCreate"); - CHECK(pf != nullptr) << "Cannot find JSON runtime driver module to create"; - auto mod = (*pf)(func_name, graph_json); + CHECK(pf != nullptr) << "Cannot find JSON runtime module to create"; + auto mod = (*pf)(func_name, graph_json, params); return mod; } diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 7dc2988d5b53..c7bc3889240f 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -44,11 +44,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase { using dt = dnnl::memory::data_type; public: - explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json) - : JSONRuntimeBase(graph_json), func_name_(func_name) {} - ~DNNLJSONRuntime() = default; - - const char* type_key() const { return "dnnljsonruntime"; } + explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json, + const Array const_names) + : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {} PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { if (!this->is_init_) { @@ -494,17 +492,14 @@ inline size_t GetNDArraySize(const NDArray& arr) { bool is_init_ = false; /* The only subgraph name for this module. */ std::string func_name_; + /* The required constant names. */ + Array const_names_; }; -TVM_REGISTER_GLOBAL("runtime.ext.dnnl") -.set_body([](TVMArgs args, TVMRetValue* rv) { - auto n = tvm::runtime::make_object( - args[0].operator std::string(), args[1].operator std::string()); - *rv = Module(n); -}); - -runtime::Module DNNLJSONRuntimeCreate(std::string func_name, std::string graph_json) { - auto n = make_object(func_name, graph_json); +runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json, + const Array& const_names) { + auto n = make_object(func_name.operator std::string(), + graph_json.operator std::string(), const_names); return runtime::Module(n); } diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h index a8182a205eae..38e195297e96 100644 --- a/src/runtime/json/json_runtime.h +++ b/src/runtime/json/json_runtime.h @@ -54,7 +54,7 @@ class JSONRuntimeBase : public ModuleNode { // The type key of each subclass can be saved to the json file and them // used to create the specific runtime during deserialization. // virtual const char* type_key() const = 0; - const char* type_key() const { return "jsonruntime"; } + const char* type_key() const { return "json"; } virtual void Init() { LOG(FATAL) << "NYI"; } diff --git a/src/runtime/json/json_runtime_driver.cc b/src/runtime/json/json_runtime_driver.cc deleted file mode 100644 index 8436a477e914..000000000000 --- a/src/runtime/json/json_runtime_driver.cc +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/runtime/json/json_runtime_driver.cc - * \brief The driver for json runtime. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "json_node.h" -#include "json_runtime.h" - -namespace tvm { -namespace runtime { -namespace json { - -/*! - * \brief The class represents a json runtime driver. It is mainly responsible - * for 1) serializing and deserializing the json runtime artifacts, 2) - * dispatching and invoking the actual runtime that intepretes the json - * artifacts. - */ -class JSONRuntimeDriver : public ModuleNode { - public: - struct Subgraph { - std::string symbol_name; - std::string graph_json; - std::unordered_map weights; - }; - - explicit JSONRuntimeDriver(const std::string& graph_json) { - this->graph_json_ = graph_json; - Deserialize(); - } - - const char* type_key() const { return "jsonruntime"; } - - /*! - * \brief Get a packed function. - * \param name The name/symbol of the function. - * \param sptr_to_self The pointer to the module node. - * \return The packed function. - */ - PackedFunc GetFunction(const std::string& name, - const ObjectPtr& sptr_to_self) { - if (this->subgraphs_.count(name)) { - return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { - auto json_rt = this->subgraphs_[name]; - auto* json_rt_node = static_cast(json_rt.operator->()); - CHECK(json_rt_node); - // Set input, how to make sure it is only invoked once? Likely we don't - // really need this as we could directly set input when creating the - // engine, but what if the input for each inference varies. - // json_rt_node->SetInput(); - // - // Execute the egine - json_rt_node->Run(); - - // Get the output, set rv or fill directly to args? - *rv = json_rt_node->GetOutput(); - }); - } else { - // Issue a warning when we don't find the symbol from the module. Note - // we don't kill the execution here as the symbol may exist in other - // runtime modules. - LOG(WARNING) << "Cannot find " << name << " from json runtime"; - return PackedFunc(); - } - } - - void Deserialize() { - std::vector subgraphs; - dmlc::MemoryStringStream memstrm(&graph_json_); - dmlc::Stream* strm = &memstrm; - // Header - uint64_t header; - CHECK(strm->Read(&header)) << "Invalid serialized file format"; - - // Compiler name - std::string compiler_name; - CHECK(strm->Read(&compiler_name)) << "Invalid serialized file format"; - - uint64_t num_subgraphs; - CHECK(strm->Read(&num_subgraphs)) << "Invalid serialized file format"; - // CHECK(header == kTVMJSONRuntimeMagic) << "Invalid serialized file format"; - - for (uint64_t i = 0; i < num_subgraphs; i++) { - Subgraph g; - // Load the symbol for runtime lookup. - std::string symbol_name; - CHECK(strm->Read(&symbol_name)) << "Invalid serialized file format"; - g.symbol_name = symbol_name; - - // Load the graph representation. - std::string json_graph; - CHECK(strm->Read(&json_graph)) << "Invalid serialized file format"; - g.graph_json = json_graph; - - // Load the weights for the graph. - uint64_t num_params; - CHECK(strm->Read(&num_params)) << "Invalid serialized file format"; - - std::vector names; - CHECK(strm->Read(&names)) << "Invalid serialized file format"; - CHECK_EQ(names.size(), num_params) << "Invalid serialized file format"; - - for (size_t i = 0; i < static_cast(num_params); i++) { - NDArray tmp; - tmp.Load(strm); - g.weights[names[i]] = tmp; - } - subgraphs.push_back(g); - } - CreateSubgraphs(subgraphs, compiler_name); - } - - // Create subgraphs for a specific runtime and cache it, therefore, we can - // invoke them without the need to repeatedly create them at runtime. - void CreateSubgraphs(const std::vector& subgraphs, - const std::string& compiler_name) { - // How do we know which runtime to create? Should we bake something in the - // json to indicate this? i.e. we can register a runtime "runtime.ext.dnnl" - // and save dnnl. Now we can just get it from the registry using dnnl. This - // requires us to have single place to invoke different external codegens - // and serialize them. - // - std::string ext_runtime_name = "runtime.ext." + compiler_name; - auto pf = tvm::runtime::Registry::Get(ext_runtime_name); - CHECK(pf) << "Failed to find the extern runtime for " << ext_runtime_name; - for (const auto& sg : subgraphs) { - CHECK_EQ(subgraphs_.count(sg.graph_json), 0U) - << "Found duplicated symbol: " << sg.graph_json; - - Module ext_mod = (*pf)(sg.graph_json); - const auto* json_rt_node = ext_mod.as(); - CHECK(json_rt_node); - // Set up the params that are constants. - for (const auto& it : sg.weights) { - CallPakcedFunc(ext_mod, "set_input", it.first, it.second); - } - // Init the engine - CallPakcedFunc(ext_mod, "init"); - - subgraphs_[sg.graph_json] = ext_mod; - } - } - - static Module LoadFromBinary(void* strm) { - dmlc::Stream* stream = static_cast(strm); - std::string graph; - stream->Read(&graph); - auto n = make_object(graph); - return Module(n); - } - - void SaveToBinary(dmlc::Stream* stream) override { - stream->Write(this->graph_json_); - } - - private: - template - void CallPakcedFunc(Module mod, const std::string& name, Args... args) { - auto pf = mod.GetFunction(name); - pf(std::forward(args)...); - } - - /*! \brief The graph json. Weights are also baked in. */ - std::string graph_json_; - /*! - * \brief Cache the created runtime module that can be directly invoked. - * - * The runtime could be a csource runtime or a any user defined runtime that - * is extend from the JSONRuntimeBase class. - */ - std::unordered_map subgraphs_; -}; - -TVM_REGISTER_GLOBAL("runtime.module.loadbinary_jsonruntime") -.set_body_typed(JSONRuntimeDriver::LoadFromBinary); - -runtime::Module JSONRuntimeDriverCreate(std::string graph_json) { - auto n = make_object(graph_json); - return runtime::Module(n); -} - -TVM_REGISTER_GLOBAL("runtime.JSONRuntimeDriverCreate") -.set_body_typed(JSONRuntimeDriverCreate); - -} // namespace json -} // namespace runtime -} // namespace tvm - diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index afd5d56ab5ed..dda9bb70ccdc 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -26,7 +26,6 @@ from tvm import relay, runtime from tvm.contrib import util from tvm.relay import transform -from tvm.relay.analysis.analysis import to_json from tvm.relay.backend import compile_engine from tvm.relay.build_module import bind_params_by_name @@ -445,8 +444,6 @@ def after_partition(): for global_var, func in mod.functions.items(): if global_var.name_hint != 'main': print(global_var) - print(to_json(func)) - if __name__ == "__main__": From e76860feba4364fe4f2deff78751303605053b67 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Fri, 19 Jun 2020 21:27:25 +0000 Subject: [PATCH 10/30] support composite functions --- .../contrib/codegen_json/codegen_json.h | 24 +++-- src/relay/backend/contrib/dnnl/codegen.cc | 49 ++++++++- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 41 ++++--- tests/python/relay/test_json_runtime.py | 100 ++++++++++++++---- 4 files changed, 168 insertions(+), 46 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 14cb14ecf781..69140435ac82 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -162,8 +162,10 @@ class JSONSerializer : public MemoizedExprTranslator(expr); + JSONSerializer(const std::string& symbol, const Expr& expr) : symbol_(symbol), func_(expr) {} + + void serialize() { + relay::Function func = Downcast(func_); // First we convert all the parameters into input nodes. for (const auto& param : func->params) { auto node_ptr = std::make_shared(param->name_hint(), "input" /* op_type_ */); @@ -247,24 +249,24 @@ class JSONSerializer : public MemoizedExprTranslator VisitExprDefault_(const Object* op) final { + std::vector VisitExprDefault_(const Object* op) { LOG(FATAL) << "JSON runtime currently doesn't support " << op->GetTypeKey(); return {}; } - std::vector VisitExpr_(const VarNode* vn) final { + std::vector VisitExpr_(const VarNode* vn) { CHECK(memo_.count(GetRef(vn))); return memo_[GetRef(vn)]; } - std::vector VisitExpr_(const ConstantNode* cn) final { + std::vector VisitExpr_(const ConstantNode* cn) { std::string name = symbol_ + "_const_" + std::to_string(params_.size()); params_.push_back(name); auto node = std::make_shared(name, "const" /* op_type_ */); return AddNode(node, GetRef(cn)); } - std::vector VisitExpr_(const TupleNode* tn) final { + std::vector VisitExpr_(const TupleNode* tn) { std::vector fields; for (const auto& field : tn->fields) { auto ref = VisitExpr(field); @@ -273,7 +275,7 @@ class JSONSerializer : public MemoizedExprTranslator VisitExpr_(const CallNode* cn) final { + std::vector VisitExpr_(const CallNode* cn) { Expr expr = GetRef(cn); std::string name; if (const auto* op_node = cn->op.as()) { @@ -299,18 +301,18 @@ class JSONSerializer : public MemoizedExprTranslator(cn)); } - std::vector VisitExpr_(const LetNode* ln) final { + std::vector VisitExpr_(const LetNode* ln) { CHECK_EQ(memo_.count(ln->var), 0); memo_[ln->var] = VisitExpr(ln->value); return VisitExpr(ln->body); } - std::vector VisitExpr_(const TupleGetItemNode* gtn) final { + std::vector VisitExpr_(const TupleGetItemNode* gtn) { auto vtuple = VisitExpr(gtn->tuple); return {vtuple[gtn->index]}; } - std::vector VisitExpr_(const FunctionNode* fn) final { + std::vector VisitExpr_(const FunctionNode* fn) { CHECK(fn->GetAttr(attr::kComposite).defined()) << "JSON runtime only supports composite functions"; // FunctionNode should be handled by the caller. @@ -347,6 +349,8 @@ class JSONSerializer : public MemoizedExprTranslator nodes_; /*! \brief Output of the JSON graph. */ diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 3b6d88b31427..6be176456921 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -33,6 +33,7 @@ #include #include +#include "../../../../runtime/json/json_node.h" #include "../../utils.h" #include "../codegen_c/codegen_c.h" #include "../codegen_json/codegen_json.h" @@ -42,6 +43,7 @@ namespace relay { namespace contrib { using namespace backend; +using namespace tvm::runtime::json; inline size_t GetShape1DSize(const Type& type) { const auto shape = GetShape(type); @@ -419,6 +421,46 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { std::ostringstream code_stream_; }; +class DNNLJSONSerializer : public backend::contrib::JSONSerializer { + public: + DNNLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {} + + std::vector VisitExpr_(const CallNode* cn) override { + Expr expr = GetRef(cn); + std::string name; + const CallNode* call = cn; + if (const auto* op_node = cn->op.as()) { + name = op_node->name; + } else if (const auto* fn = cn->op.as()) { + auto comp = fn->GetAttr(attr::kComposite); + CHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions."; + name = comp.value().operator std::string(); + + if (name == "conv2d_bias_relu") { + call = GetRootCall(fn->body.as(), 2, {"nn.conv2d", "add", "nn.relu"}); + } else if (name == "conv2d_relu") { + call = GetRootCall(fn->body.as(), 1, {"nn.conv2d", "nn.relu"}); + CHECK(call->op.as()) << "Not op node"; + } else { + LOG(FATAL) << "Unrecognized DNNL pattern: " << name; + } + } else { + LOG(FATAL) << "DNNL JSON runtime does not support calls to " << cn->op->GetTypeKey(); + } + + std::vector inputs; + for (const auto& arg : cn->args) { + auto res = VisitExpr(arg); + inputs.insert(inputs.end(), res.begin(), res.end()); + } + auto node = std::make_shared(name, /* name_ */ + "kernel", /* op_type_ */ + inputs, 1 /* num_outputs_ */); + SetCallNodeAttribute(node, call); + return AddNode(node, GetRef(cn)); + } +}; + /*! * \brief Get the external symbol of the Relay function name. * @@ -442,9 +484,10 @@ runtime::Module DNNLCompiler(const ObjectRef& ref) { CHECK(ref->IsInstance()); auto func = Downcast(ref); auto func_name = GetExtSymbol(func); - backend::contrib::JSONSerializer converter(func_name, func); - std::string graph_json = converter.GetJSON(); - auto params = converter.GetParams(); + DNNLJSONSerializer serializer(func_name, func); + serializer.serialize(); + std::string graph_json = serializer.GetJSON(); + auto params = serializer.GetParams(); const auto* pf = runtime::Registry::Get("runtime.DNNLJSONRuntimeCreate"); CHECK(pf != nullptr) << "Cannot find JSON runtime module to create"; diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index c7bc3889240f..fac9765c421d 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -49,11 +49,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {} PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { - if (!this->is_init_) { - Init(); - BuildEngine(); - } - this->is_init_ = true; if (this->func_name_ == name) { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { @@ -86,10 +81,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase { this->data_entry_[idx].CopyTo(arg); } } - - // FIXME: Multiple outputs. - //*rv = data_entry_.back(); }); + } else if ("__init_" + this->func_name_ == name) { + if (!this->is_init_) { + Init(); + } + this->is_init_ = true; + return PackedFunc(); } else { LOG(WARNING) << "Unknown DNNL symbol " << name; return PackedFunc(); @@ -128,9 +126,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { void Init() override { engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0); stream_ = dnnl::stream(engine_); - } - void BuildEngine() { // Build subgraph engine. for (size_t nid = 0; nid < this->nodes_.size(); ++nid) { const auto& node = nodes_[nid]; @@ -139,6 +135,10 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto op_name = node.GetOpName(); if ("nn.conv2d" == op_name) { Conv2d(nid); + } else if ("conv2d_relu" == op_name) { + Conv2d(nid, true, false); + } else if ("conv2d_bias_relu" == op_name) { + Conv2d(nid, true, true); } else if ("nn.dense" == op_name) { Dense(nid); } else if ("nn.batch_norm" == op_name) { @@ -194,7 +194,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { return node_out_mem_[entry.id_][entry.index_].first; } - void Conv2d(const size_t& nid) { + void Conv2d(const size_t& nid, const bool has_relu=false, const bool has_bias=false) { auto node = this->nodes_[nid]; // Setup attributes. @@ -250,7 +250,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto conv_desc = dnnl::convolution_forward::desc( dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md, conv_weights_md, conv_bias_md, conv_dst_md, strides_dims, padding_dims_l, padding_dims_r); + + // Enable ReLU dnnl::primitive_attr attr; + if (has_relu) { + dnnl::post_ops ops; + ops.append_eltwise(1.f, dnnl::algorithm::eltwise_relu, 0.f, 0.f); + attr.set_post_ops(ops); + } + auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_); // Push to the network. @@ -266,10 +274,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto conv2d_weights_memory = BindDNNLMemory( weight_entry, {weights_dims, dt::f32, (groups > 1) ? tag::goihw : tag::oihw}); - // Bias memory (useless for now as TVM conv2d op has no bias). - std::vector bias(OC, 0); + // Bias memory. auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_); - write_to_dnnl_memory(bias.data(), conv2d_bias_memory, OC * 4); + if (has_bias) { + auto bias_entry = node.GetInputs()[2]; + BindDNNLMemory(bias_entry, conv2d_bias_memory); + } else { + float bias[OC] = {0}; + write_to_dnnl_memory(bias, conv2d_bias_memory, OC * sizeof(float)); + } // Output memory. JSONGraphNodeEntry out_entry(nid, 0); diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index dda9bb70ccdc..6ffe0995a413 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -94,7 +94,7 @@ def check_graph_runtime_result(): def test_conv2d(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -163,12 +163,11 @@ def group_conv2d(): return mod, ref_mod, {"data": i_data, "weight": w_data}, (1, 32, 14, 14) for mod, ref_mod, map_inputs, out_shape in [conv2d_direct(), group_conv2d()]: - # FIXME: Check accuracy. Current avg error: ~0.03 - check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-1) + check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-5) def test_add(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -208,7 +207,7 @@ def gen_add(): def test_relu(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -244,7 +243,7 @@ def gen_relu(): def test_dense(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -285,7 +284,7 @@ def gen_dense(): def test_bn(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -350,7 +349,7 @@ def gen_bn(): def test_multiple_ops(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -405,15 +404,16 @@ def get_partitoned_mod(mod): def test_composite(): - if not tvm.get_global_func("runtime.ext.dnnl", True): + if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = 'float32' - ishape = (1, 32, 14, 14) - w1shape = (32, 32, 3, 3) - def after_partition(): + def conv2d_relu(): + ishape = (1, 32, 14, 14) + w1shape = (32, 32, 3, 3) + # Composite function in_1 = relay.var("in_1", shape=ishape, dtype=dtype) in_2 = relay.var("in_2", shape=w1shape, dtype=dtype) @@ -435,15 +435,77 @@ def after_partition(): # Main function data = relay.var("data", shape=ishape, dtype=dtype) - weight = relay.var("input", shape=w1shape, dtype=dtype) + weight = relay.var("weight", shape=w1shape, dtype=dtype) main_func = relay.Function([data, weight], glb_var(data, weight)) mod["main"] = main_func - return mod - mod = after_partition() - for global_var, func in mod.functions.items(): - if global_var.name_hint != 'main': - print(global_var) + # Reference module + data = relay.var("data", shape=ishape, dtype=dtype) + weight = relay.var("weight", shape=w1shape, dtype=dtype) + conv2d = relay.nn.conv2d(data, weight, kernel_size=(3, 3), padding=(1, 1)) + relu = relay.nn.relu(conv2d) + main_func = relay.Function([data, weight], relu) + ref_mod = tvm.IRModule() + ref_mod["main"] = main_func + + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) + + return mod, ref_mod, {'data': i_data, 'weight': w1_data}, (1, 32, 14, 14) + + def conv2d_bias_relu(): + ishape = (1, 32, 14, 14) + w1shape = (32, 32, 3, 3) + bshape = (32, 1, 1) + + # Composite function + in_1 = relay.var("in_1", shape=ishape, dtype=dtype) + in_2 = relay.var("in_2", shape=w1shape, dtype=dtype) + in_3 = relay.var("in_3", shape=bshape, dtype=dtype) + conv2d = relay.nn.conv2d(in_1, in_2, kernel_size=(3, 3), padding=(1, 1)) + add = relay.add(conv2d, in_3) + relu = relay.nn.relu(add) + func = relay.Function([in_1, in_2, in_3], relu) + func = func.with_attr('Composite', 'conv2d_bias_relu') + func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_add_nn.relu_') + + # Partition function + arg_1 = relay.var("arg_1", shape=ishape, dtype=dtype) + arg_2 = relay.var("arg_2", shape=w1shape, dtype=dtype) + arg_3 = relay.var("arg_3", shape=bshape, dtype=dtype) + call = relay.Call(func, [arg_1, arg_2, arg_3]) + p_func = relay.Function([arg_1, arg_2, arg_3], call) + p_func = set_func_attr(p_func, "dnnl", "dnnl_0") + glb_var = relay.GlobalVar("dnnl_0") + mod = tvm.IRModule() + mod[glb_var] = p_func + + # Main function + data = relay.var("data", shape=ishape, dtype=dtype) + weight = relay.var("weight", shape=w1shape, dtype=dtype) + bias = relay.var('bias', shape=bshape, dtype=dtype) + main_func = relay.Function([data, weight, bias], glb_var(data, weight, bias)) + mod["main"] = main_func + + # Reference module + data = relay.var("data", shape=ishape, dtype=dtype) + weight = relay.var("weight", shape=w1shape, dtype=dtype) + bias = relay.var('bias', shape=bshape, dtype=dtype) + conv2d = relay.nn.conv2d(data, weight, kernel_size=(3, 3), padding=(1, 1)) + add = relay.add(conv2d, bias) + relu = relay.nn.relu(add) + main_func = relay.Function([data, weight, bias], relu) + ref_mod = tvm.IRModule() + ref_mod["main"] = main_func + + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) + b_data = np.random.uniform(0, 1, bshape).astype(dtype) + + return mod, ref_mod, {'data': i_data, 'weight': w1_data, 'bias': b_data}, (1, 32, 14, 14) + + for mod, ref_mod, input_maps, out_shape in [conv2d_relu(), conv2d_bias_relu()]: + check_result(mod, ref_mod, input_maps, out_shape, tol=1e-5) if __name__ == "__main__": @@ -453,4 +515,4 @@ def after_partition(): test_dense() test_bn() test_multiple_ops() - #test_composite() + test_composite() From 71f9e965ef5ad93c595f99d839dd04bd4316d725 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Fri, 19 Jun 2020 23:36:54 +0000 Subject: [PATCH 11/30] support DNNL constant --- src/relay/backend/contrib/dnnl/codegen.cc | 4 +- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 120 +++++++++++++----- tests/python/relay/test_json_runtime.py | 54 +++++++- 3 files changed, 142 insertions(+), 36 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 6be176456921..67dba2969923 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -436,9 +436,9 @@ class DNNLJSONSerializer : public backend::contrib::JSONSerializer { CHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions."; name = comp.value().operator std::string(); - if (name == "conv2d_bias_relu") { + if (name == "dnnl.conv2d_bias_relu") { call = GetRootCall(fn->body.as(), 2, {"nn.conv2d", "add", "nn.relu"}); - } else if (name == "conv2d_relu") { + } else if (name == "dnnl.conv2d_relu") { call = GetRootCall(fn->body.as(), 1, {"nn.conv2d", "nn.relu"}); CHECK(call->op.as()) << "Not op node"; } else { diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index fac9765c421d..34646e52dae7 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -49,45 +49,73 @@ class DNNLJSONRuntime : public JSONRuntimeBase { : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {} PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { - - if (this->func_name_ == name) { + if (name == "get_symbol") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->func_name_; }); + } else if (name == "get_const_vars") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; }); + } else if (this->func_name_ == name) { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - for (auto i = 0; i < args.size(); ++i) { - // Setup data entries. - CHECK(args[i].type_code() == kTVMNDArrayHandle || - args[i].type_code() == kTVMDLTensorHandle) + size_t arg_idx = 0; + + // Set input data entries. + for (size_t i = 0; i < this->input_nodes_.size(); ++i) { + if (this->is_const_input_[i]) { + continue; + } + auto nid = this->input_nodes_[i]; + + CHECK(args[arg_idx].type_code() == kTVMNDArrayHandle || + args[arg_idx].type_code() == kTVMDLTensorHandle) << "Expect NDArray or DLTensor as inputs\n"; - if (args[i].type_code() == kTVMDLTensorHandle) { - DLTensor* arg = args[i]; - this->data_entry_[i].CopyFrom(arg); + if (args[arg_idx].type_code() == kTVMDLTensorHandle) { + DLTensor* arg = args[arg_idx]; + this->data_entry_[nid][0].CopyFrom(arg); } else { - NDArray arg = args[i]; - this->data_entry_[i].CopyFrom(arg); + NDArray arg = args[arg_idx]; + this->data_entry_[nid][0].CopyFrom(arg); } + CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size(); + arg_idx++; } // Execute the subgraph. this->Run(); - // Get result. - auto offset = this->input_nodes_.size(); + // Copy result to output buffer. for (size_t i = 0; i < this->outputs_.size(); ++i) { - size_t idx = i + offset; - if (args[idx].type_code() == kTVMDLTensorHandle) { - DLTensor* arg = args[idx]; - this->data_entry_[idx].CopyTo(arg); + auto entry = this->outputs_[i]; + + if (args[arg_idx].type_code() == kTVMDLTensorHandle) { + DLTensor* arg = args[arg_idx]; + this->data_entry_[entry.id_][entry.index_].CopyTo(arg); } else { - NDArray arg = args[idx]; - this->data_entry_[idx].CopyTo(arg); + NDArray arg = args[arg_idx]; + this->data_entry_[entry.id_][entry.index_].CopyTo(arg); } + CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size(); + arg_idx++; } }); } else if ("__init_" + this->func_name_ == name) { + // Build DNNL engine and memory. if (!this->is_init_) { Init(); } this->is_init_ = true; - return PackedFunc(); + + // The function to initialize constant tensors. + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK_EQ(args.size(), 1U); + Array consts = args[0]; + for (size_t i = 0; i < consts.size(); ++i) { + CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized"; + auto nid = const_idx_to_nid_[i]; + this->data_entry_[nid][0].CopyFrom(consts[i]); + } + *rv = 0; + }); } else { LOG(WARNING) << "Unknown DNNL symbol " << name; return PackedFunc(); @@ -100,8 +128,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto nid = this->input_nodes_[i]; // TODO: Support other data lengths. size_t offset_in_bytes = this->node_out_mem_[nid][0].second * 4; - write_to_dnnl_memory(this->data_entry_[i]->data, this->node_out_mem_[nid][0].first, - GetNDArraySize(this->data_entry_[i]), offset_in_bytes); + write_to_dnnl_memory(this->data_entry_[nid][0]->data, this->node_out_mem_[nid][0].first, + GetNDArraySize(this->data_entry_[nid][0]), offset_in_bytes); } // Invoke the engine. @@ -111,15 +139,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase { stream_.wait(); // Read output buffers. - auto offset = this->input_nodes_.size(); for (size_t i = 0; i < this->outputs_.size(); ++i) { auto out_entry = this->outputs_[i]; auto nid = out_entry.id_; auto idx = out_entry.index_; size_t offset_in_bytes = this->node_out_mem_[nid][idx].second * 4; - read_from_dnnl_memory(this->data_entry_[offset + i]->data, - this->node_out_mem_[nid][idx].first, - GetNDArraySize(this->data_entry_[offset + i]), offset_in_bytes); + read_from_dnnl_memory(this->data_entry_[nid][idx]->data, this->node_out_mem_[nid][idx].first, + GetNDArraySize(this->data_entry_[nid][idx]), offset_in_bytes); } } @@ -135,9 +161,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto op_name = node.GetOpName(); if ("nn.conv2d" == op_name) { Conv2d(nid); - } else if ("conv2d_relu" == op_name) { + } else if ("dnnl.conv2d_relu" == op_name) { Conv2d(nid, true, false); - } else if ("conv2d_bias_relu" == op_name) { + } else if ("dnnl.conv2d_bias_relu" == op_name) { Conv2d(nid, true, true); } else if ("nn.dense" == op_name) { Dense(nid); @@ -150,6 +176,30 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } else { LOG(FATAL) << "Unsupported op: " << op_name; } + } else if (node.GetOpType() == "const") { + auto name = node.GetOpName(); + bool found = false; + for (size_t cid = 0; cid < const_names_.size(); ++cid) { + if (name == const_names_[cid]) { + found = true; + const_idx_to_nid_[cid] = nid; + break; + } + } + if (!found) { + LOG(FATAL) << "Unrecognized constant node: " << name; + } + } + } + + this->is_const_input_.resize(this->input_nodes_.size()); + for (size_t nid = 0; nid < this->input_nodes_.size(); ++nid) { + const auto& node = nodes_[nid]; + if (node.GetOpType() == "input") { + this->is_const_input_[nid] = false; + } else { + CHECK_EQ(node.GetOpType(), "const"); + this->is_const_input_[nid] = true; } } @@ -159,12 +209,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { ctx.device_id = 0; for (size_t i = 0; i < this->input_nodes_.size(); ++i) { auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0]; - this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); + auto nid = this->input_nodes_[i]; + this->data_entry_[nid][0] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); } for (size_t i = 0; i < this->outputs_.size(); ++i) { auto entry = this->outputs_[i]; auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; - this->data_entry_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); + this->data_entry_[entry.id_][entry.index_] = + NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); } } @@ -492,8 +544,8 @@ inline size_t GetNDArraySize(const NDArray& arr) { dnnl::engine engine_; /* The dnnl stream. */ dnnl::stream stream_; - /* \brief A simple pool to contain the tensor for each node in the graph. */ - std::vector data_entry_; + /* \brief A simple pool to map from node ID to the output tensors. */ + std::unordered_map> data_entry_; /* The network layers that are represented in dnnl primitives. */ std::vector net_; /* The memory that is consumed by arguments. */ @@ -507,6 +559,10 @@ inline size_t GetNDArraySize(const NDArray& arr) { std::string func_name_; /* The required constant names. */ Array const_names_; + /* Indicate if an input node is a constant node. */ + std::vector is_const_input_; + /* Map from constant index to JSON constant node ID. */ + std::unordered_map const_idx_to_nid_; }; runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json, diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index 6ffe0995a413..cdba3c92a69f 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -28,6 +28,7 @@ from tvm.relay import transform from tvm.relay.backend import compile_engine from tvm.relay.build_module import bind_params_by_name +from tvm.relay.op.contrib.register import get_pattern_table def set_func_attr(func, compile_name, symbol_name): @@ -420,7 +421,7 @@ def conv2d_relu(): conv2d = relay.nn.conv2d(in_1, in_2, kernel_size=(3, 3), padding=(1, 1)) relu = relay.nn.relu(conv2d) func = relay.Function([in_1, in_2], relu) - func = func.with_attr('Composite', 'conv2d_relu') + func = func.with_attr('Composite', 'dnnl.conv2d_relu') func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_nn.relu_') # Partition function @@ -466,7 +467,7 @@ def conv2d_bias_relu(): add = relay.add(conv2d, in_3) relu = relay.nn.relu(add) func = relay.Function([in_1, in_2, in_3], relu) - func = func.with_attr('Composite', 'conv2d_bias_relu') + func = func.with_attr('Composite', 'dnnl.conv2d_bias_relu') func = func.with_attr('PartitionedFromPattern', 'nn.conv2d_add_nn.relu_') # Partition function @@ -508,6 +509,54 @@ def conv2d_bias_relu(): check_result(mod, ref_mod, input_maps, out_shape, tol=1e-5) +def test_constant(): + if not tvm.get_global_func("relay.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + ishape = (1, 32, 14, 14) + wshape = (32, 32, 3, 3) + + data = relay.var("data", shape=ishape, dtype=dtype) + weight = relay.var("weight", shape=wshape, dtype=dtype) + bn_gamma = relay.var("bn_gamma") + bn_beta = relay.var("bn_beta") + bn_mmean = relay.var("bn_mean") + bn_mvar = relay.var("bn_var") + + layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), padding=(1, 1)) + bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar) + out = bn_output[0] + out = relay.nn.relu(out) + + func = relay.Function(relay.analysis.free_vars(out), out) + ref_mod, params = tvm.relay.testing.create_workload(func) + ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) + + remove_bn_pass = tvm.transform.Sequential([ + transform.InferType(), + transform.SimplifyInference(), + transform.FoldConstant(), + transform.FoldScaleAxis(), + ]) + + dnnl_patterns = get_pattern_table("dnnl") + composite_partition = tvm.transform.Sequential([ + transform.MergeComposite(dnnl_patterns), + transform.AnnotateTarget("dnnl"), + transform.PartitionGraph() + ]) + + with tvm.transform.PassContext(opt_level=3, + disabled_pass=["AlterOpLayout"]): + ref_mod = remove_bn_pass(ref_mod) + mod = composite_partition(ref_mod) + + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + check_result(mod, ref_mod, {'data': i_data}, (1, 32, 14, 14), tol=1e-5) + + if __name__ == "__main__": test_conv2d() test_add() @@ -516,3 +565,4 @@ def conv2d_bias_relu(): test_bn() test_multiple_ops() test_composite() + test_constant() From 896e4d0ffba706aa602bbb599bfebabf4f93bc4c Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Fri, 19 Jun 2020 17:50:02 +0000 Subject: [PATCH 12/30] handle constant --- src/relay/backend/contrib/codegen_json/codegen_json.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 69140435ac82..97dc2b333ca5 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -160,6 +160,7 @@ class JSONSerializer : public MemoizedExprTranslator GetParams() const { - return params_; - } + Array GetParams() const { return params_; } /*!\brief Return the generated json. */ std::string GetJSON() { From f4c023a79c9dd869afc684b123a8a91ea4beca3c Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Sat, 20 Jun 2020 01:54:43 +0000 Subject: [PATCH 13/30] clean up --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 182 +++++++++--------- src/runtime/json/json_runtime.h | 106 +++++----- 2 files changed, 145 insertions(+), 143 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 34646e52dae7..fb74929a209b 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -28,9 +28,9 @@ #include #include -#include "dnnl.hpp" #include "../../json/json_node.h" #include "../../json/json_runtime.h" +#include "dnnl.hpp" namespace tvm { namespace runtime { @@ -44,19 +44,17 @@ class DNNLJSONRuntime : public JSONRuntimeBase { using dt = dnnl::memory::data_type; public: - explicit DNNLJSONRuntime(const std::string& func_name, const std::string& graph_json, - const Array const_names) - : JSONRuntimeBase(graph_json), func_name_(func_name), const_names_(const_names) {} + DNNLJSONRuntime(const std::string& symbol_name, const std::string& graph_json, + const Array const_names) + : JSONRuntimeBase(symbol_name, graph_json, const_names) {} + + const char* type_key() const { return "dnnl_json"; } PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { - if (name == "get_symbol") { - return PackedFunc( - [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->func_name_; }); - } else if (name == "get_const_vars") { - return PackedFunc( - [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; }); - } else if (this->func_name_ == name) { + if (this->symbol_name_ == name) { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK_EQ(this->is_const_input_.size(), this->input_nodes_.size()) + << "The module has not been initialized"; size_t arg_idx = 0; // Set input data entries. @@ -98,27 +96,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase { arg_idx++; } }); - } else if ("__init_" + this->func_name_ == name) { - // Build DNNL engine and memory. - if (!this->is_init_) { - Init(); - } - this->is_init_ = true; - + } else if ("__init_" + this->symbol_name_ == name) { // The function to initialize constant tensors. return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { CHECK_EQ(args.size(), 1U); - Array consts = args[0]; - for (size_t i = 0; i < consts.size(); ++i) { - CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized"; - auto nid = const_idx_to_nid_[i]; - this->data_entry_[nid][0].CopyFrom(consts[i]); - } + this->Init(args[0]); *rv = 0; }); } else { - LOG(WARNING) << "Unknown DNNL symbol " << name; - return PackedFunc(); + return JSONRuntimeBase::GetFunction(name, sptr_to_self); } } @@ -149,7 +135,18 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } } - void Init() override { + void Init(const Array& consts) override { + BuildEngine(); + + // Initialize consts + for (size_t i = 0; i < consts.size(); ++i) { + CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized"; + auto nid = const_idx_to_nid_[i]; + this->data_entry_[nid][0].CopyFrom(consts[i]); + } + } + + void BuildEngine() { engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0); stream_ = dnnl::stream(engine_); @@ -193,7 +190,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } this->is_const_input_.resize(this->input_nodes_.size()); - for (size_t nid = 0; nid < this->input_nodes_.size(); ++nid) { + for (auto nid : this->input_nodes_) { const auto& node = nodes_[nid]; if (node.GetOpType() == "input") { this->is_const_input_[nid] = false; @@ -220,33 +217,33 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } } -private: + private: // Bind a JSON graph node entry to a DNNL memory. - dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc, - size_t offset = 0) { - if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) { - return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset); - } - return node_out_mem_[entry.id_][entry.index_].first; - } - - // Bind a JSON graph node entry to a given DNNL memory. - dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem, - size_t offset = 0) { - // Since the DNNL memory has been created before calling this function, we assume the entry - // has not yet been bind to the other DNNL memory; otherwise it may have memory leak. - CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0); - - // TODO: Support other data types (i.e., int8). - auto data_node = nodes_[entry.id_]; - auto dltype = data_node.GetOpDataType()[entry.index_]; - CHECK_EQ(dltype.bits, 32); - - node_out_mem_[entry.id_][entry.index_] = {mem, offset}; - return node_out_mem_[entry.id_][entry.index_].first; - } - - void Conv2d(const size_t& nid, const bool has_relu=false, const bool has_bias=false) { + dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc, + size_t offset = 0) { + if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) { + return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset); + } + return node_out_mem_[entry.id_][entry.index_].first; + } + + // Bind a JSON graph node entry to a given DNNL memory. + dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem, + size_t offset = 0) { + // Since the DNNL memory has been created before calling this function, we assume the entry + // has not yet been bind to the other DNNL memory; otherwise it may have memory leak. + CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0); + + // TODO: Support other data types (i.e., int8). + auto data_node = nodes_[entry.id_]; + auto dltype = data_node.GetOpDataType()[entry.index_]; + CHECK_EQ(dltype.bits, 32); + + node_out_mem_[entry.id_][entry.index_] = {mem, offset}; + return node_out_mem_[entry.id_][entry.index_].first; + } + + void Conv2d(const size_t& nid, const bool has_relu = false, const bool has_bias = false) { auto node = this->nodes_[nid]; // Setup attributes. @@ -310,7 +307,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { ops.append_eltwise(1.f, dnnl::algorithm::eltwise_relu, 0.f, 0.f); attr.set_post_ops(ops); } - + auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_); // Push to the network. @@ -507,38 +504,38 @@ class DNNLJSONRuntime : public JSONRuntimeBase { std::copy((uint8_t*)handle, (uint8_t*)handle + size, dst + offset); } -// Generate DNNL memory description and infer the data layout by the given shape. -inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape, dt dtype) { - dnnl::memory::desc data_md; - switch (shape.size()) { - case 2: - data_md = dnnl::memory::desc({shape, dtype, tag::ab}); - break; - case 3: - data_md = dnnl::memory::desc({shape, dtype, tag::abc}); - break; - case 4: - data_md = dnnl::memory::desc({shape, dtype, tag::abcd}); - break; - case 5: - data_md = dnnl::memory::desc({shape, dtype, tag::abcde}); - break; - default: - LOG(FATAL) << "Unsupported data shape dimension: " << shape.size(); - break; + // Generate DNNL memory description and infer the data layout by the given shape. + inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape, dt dtype) { + dnnl::memory::desc data_md; + switch (shape.size()) { + case 2: + data_md = dnnl::memory::desc({shape, dtype, tag::ab}); + break; + case 3: + data_md = dnnl::memory::desc({shape, dtype, tag::abc}); + break; + case 4: + data_md = dnnl::memory::desc({shape, dtype, tag::abcd}); + break; + case 5: + data_md = dnnl::memory::desc({shape, dtype, tag::abcde}); + break; + default: + LOG(FATAL) << "Unsupported data shape dimension: " << shape.size(); + break; + } + return data_md; } - return data_md; -} -// Calculate the size of a given NDArray in bytes. -inline size_t GetNDArraySize(const NDArray& arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= static_cast(arr->shape[i]); + // Calculate the size of a given NDArray in bytes. + inline size_t GetNDArraySize(const NDArray& arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr->ndim; ++i) { + size *= static_cast(arr->shape[i]); + } + size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8; + return size; } - size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8; - return size; -} /* The dnnl engine. */ dnnl::engine engine_; @@ -551,23 +548,17 @@ inline size_t GetNDArraySize(const NDArray& arr) { /* The memory that is consumed by arguments. */ std::vector> net_args_; /* The node ID to its corresponding output memory. */ - std::unordered_map < uint32_t, - std::unordered_map>> node_out_mem_; - /* Indicate if the DNNL engine has been initialized. */ - bool is_init_ = false; - /* The only subgraph name for this module. */ - std::string func_name_; - /* The required constant names. */ - Array const_names_; + std::unordered_map>> + node_out_mem_; /* Indicate if an input node is a constant node. */ std::vector is_const_input_; /* Map from constant index to JSON constant node ID. */ std::unordered_map const_idx_to_nid_; }; -runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json, +runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json, const Array& const_names) { - auto n = make_object(func_name.operator std::string(), + auto n = make_object(symbol_name.operator std::string(), graph_json.operator std::string(), const_names); return runtime::Module(n); } @@ -575,6 +566,9 @@ runtime::Module DNNLJSONRuntimeCreate(String func_name, String graph_json, TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate") .set_body_typed(DNNLJSONRuntimeCreate); +TVM_REGISTER_GLOBAL("runtime.module.loadbinary_dnnl_json") + .set_body_typed(JSONRuntimeBase::LoadFromBinary); + } // namespace contrib } // namespace runtime } // namespace tvm diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h index 38e195297e96..f4633ac9a693 100644 --- a/src/runtime/json/json_runtime.h +++ b/src/runtime/json/json_runtime.h @@ -29,11 +29,12 @@ #include #include +#include #include +#include #include #include #include -#include #include "json_node.h" @@ -47,16 +48,19 @@ namespace json { */ class JSONRuntimeBase : public ModuleNode { public: - explicit JSONRuntimeBase(const std::string& graph_json) { - LoadGraph(graph_json); + JSONRuntimeBase(const std::string& symbol_name, const std::string& graph_json, + const Array const_names) + : symbol_name_(symbol_name), graph_json_(graph_json), const_names_(const_names) { + LoadGraph(graph_json_); } - // The type key of each subclass can be saved to the json file and them - // used to create the specific runtime during deserialization. - // virtual const char* type_key() const = 0; const char* type_key() const { return "json"; } - virtual void Init() { LOG(FATAL) << "NYI"; } + /*! \brief Initialize a specific json runtime. */ + virtual void Init(const Array& consts) = 0; + + /*! \brief Invoke the execution engine to inteprete a specific json runtime. */ + virtual void Run() = 0; /*! * \brief Get a packed function. @@ -64,43 +68,48 @@ class JSONRuntimeBase : public ModuleNode { * \param sptr_to_self The pointer to the module node. * \return The packed function. */ - virtual PackedFunc GetFunction(const std::string& name, - const ObjectPtr& sptr_to_self) { - return PackedFunc(); - } - - // Run(TVMValue*,value, int* type_code, int nargs), or - // Run(TVMArgs arg, TVMRetValue rv) ? - virtual void Run() { LOG(FATAL) << "NYI"; } - - void SetInput(const std::string& name, const NDArray& data) { - auto it = input_map_.find(name); - CHECK(it != input_map_.end()) << "Not found input: " << name; - SetInput(it->second, data); - } - - void SetInput(uint32_t index, const NDArray& data) { - CHECK_LT(static_cast(index), input_nodes_.size()); - uint32_t eid = EntryID(input_nodes_[index], 0); - data_entry_[eid] = data; + virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) { + if (name == "get_symbol") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; }); + } else if (name == "get_const_vars") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; }); + } else { + return PackedFunc(nullptr); + } } - size_t NumOutputs() const { return outputs_.size(); } - - ObjectRef GetOutput() { - // Return the NDArray directly if there is only one outpput. - if (NumOutputs() == 1) { - uint32_t eid = EntryID(outputs_[0]); - return data_entry_[eid]; + virtual void SaveToBinary(dmlc::Stream* stream) { + // Save the symbol + stream->Write(symbol_name_); + // Save the graph + stream->Write(graph_json_); + // Save the required const names + std::vector consts; + for (const auto& it : const_names_) { + consts.push_back(it); } + stream->Write(consts); + } - // We need to return an ADTObj if there are multiple outputs. - std::vector outs; - for (size_t i = 0; i < NumOutputs(); i++) { - uint32_t eid = EntryID(outputs_[i]); - outs.push_back(data_entry_[eid]); + template ::value>::type> + static Module LoadFromBinary(void* strm) { + dmlc::Stream* stream = static_cast(strm); + std::string symbol; + std::string graph_json; + std::vector consts; + // Load the symbol + CHECK(stream->Read(&symbol)) << "Loading symbol name failed"; + CHECK(stream->Read(&graph_json)) << "Loading graph json failed"; + CHECK(stream->Read(&consts)) << "Loading the const name list failed"; + Array const_names; + for (const auto& it : consts) { + const_names.push_back(it); } - return ADT::Tuple(outs); + auto n = make_object(symbol, graph_json, const_names); + return Module(n); } protected: @@ -108,7 +117,6 @@ class JSONRuntimeBase : public ModuleNode { std::istringstream is(graph_json); dmlc::JSONReader reader(&is); this->Load(&reader); - for (size_t i = 0; i < input_nodes_.size(); i++) { uint32_t nid = input_nodes_[i]; std::string& name = nodes_[nid].name_; @@ -135,21 +143,21 @@ class JSONRuntimeBase : public ModuleNode { } // Get the node entry index. - uint32_t EntryID(uint32_t nid, uint32_t index) const { - return node_row_ptr_[nid] + index; - } + uint32_t EntryID(uint32_t nid, uint32_t index) const { return node_row_ptr_[nid] + index; } // Get the node entry index. - uint32_t EntryID(const JSONGraphNodeEntry& e) const { - return EntryID(e.id_, e.index_); - } + uint32_t EntryID(const JSONGraphNodeEntry& e) const { return EntryID(e.id_, e.index_); } // Number of node entries. - uint32_t NumEntries() const { - return node_row_ptr_.back(); - } + uint32_t NumEntries() const { return node_row_ptr_.back(); } protected: + /* The only subgraph name for this module. */ + std::string symbol_name_; + /* The graph. */ + std::string graph_json_; + /* The required constant names. */ + Array const_names_; /*! \brief The json graph nodes. */ std::vector nodes_; /*! \brief The input nodes, including variables and constants. */ From edd8d36c2ebac8aa1662a33c2ba5936f853b9739 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Sat, 20 Jun 2020 18:48:56 +0000 Subject: [PATCH 14/30] Simplify dnnl user code --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 125 +++++------------- src/runtime/json/json_runtime.h | 63 ++++++++- 2 files changed, 93 insertions(+), 95 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index fb74929a209b..3b3a320c03be 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -53,48 +53,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { if (this->symbol_name_ == name) { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - CHECK_EQ(this->is_const_input_.size(), this->input_nodes_.size()) - << "The module has not been initialized"; - size_t arg_idx = 0; - - // Set input data entries. - for (size_t i = 0; i < this->input_nodes_.size(); ++i) { - if (this->is_const_input_[i]) { - continue; - } - auto nid = this->input_nodes_[i]; - - CHECK(args[arg_idx].type_code() == kTVMNDArrayHandle || - args[arg_idx].type_code() == kTVMDLTensorHandle) - << "Expect NDArray or DLTensor as inputs\n"; - if (args[arg_idx].type_code() == kTVMDLTensorHandle) { - DLTensor* arg = args[arg_idx]; - this->data_entry_[nid][0].CopyFrom(arg); - } else { - NDArray arg = args[arg_idx]; - this->data_entry_[nid][0].CopyFrom(arg); - } - CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size(); - arg_idx++; - } + CHECK(this->initialized_) << "The module has not been initialized"; + // Set inputs. + SetInputs(args); // Execute the subgraph. this->Run(); - // Copy result to output buffer. - for (size_t i = 0; i < this->outputs_.size(); ++i) { - auto entry = this->outputs_[i]; - - if (args[arg_idx].type_code() == kTVMDLTensorHandle) { - DLTensor* arg = args[arg_idx]; - this->data_entry_[entry.id_][entry.index_].CopyTo(arg); - } else { - NDArray arg = args[arg_idx]; - this->data_entry_[entry.id_][entry.index_].CopyTo(arg); - } - CHECK_LT(arg_idx, args.size()) << "Too less arguments: " << args.size(); - arg_idx++; - } + GetOutput(args); }); } else if ("__init_" + this->symbol_name_ == name) { // The function to initialize constant tensors. @@ -112,10 +78,11 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Fill in the input buffers. for (size_t i = 0; i < this->input_nodes_.size(); ++i) { auto nid = this->input_nodes_[i]; + auto eid = EntryID(nid, 0); // TODO: Support other data lengths. - size_t offset_in_bytes = this->node_out_mem_[nid][0].second * 4; - write_to_dnnl_memory(this->data_entry_[nid][0]->data, this->node_out_mem_[nid][0].first, - GetNDArraySize(this->data_entry_[nid][0]), offset_in_bytes); + size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4; + write_to_dnnl_memory(this->data_entry_[nid]->data, this->entry_out_mem_[eid].first, + GetNDArraySize(this->data_entry_[eid]), offset_in_bytes); } // Invoke the engine. @@ -127,23 +94,26 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Read output buffers. for (size_t i = 0; i < this->outputs_.size(); ++i) { auto out_entry = this->outputs_[i]; - auto nid = out_entry.id_; - auto idx = out_entry.index_; - size_t offset_in_bytes = this->node_out_mem_[nid][idx].second * 4; - read_from_dnnl_memory(this->data_entry_[nid][idx]->data, this->node_out_mem_[nid][idx].first, - GetNDArraySize(this->data_entry_[nid][idx]), offset_in_bytes); + auto eid = EntryID(out_entry); + size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4; + read_from_dnnl_memory(this->data_entry_[eid]->data, this->entry_out_mem_[eid].first, + GetNDArraySize(this->data_entry_[eid]), offset_in_bytes); } } void Init(const Array& consts) override { + data_entry_.resize(NumEntries()); BuildEngine(); + CHECK_EQ(consts.size(), const_idx_.size()) + << "The number of input constants must match the number of required."; + // Initialize consts for (size_t i = 0; i < consts.size(); ++i) { - CHECK_GT(const_idx_to_nid_.count(i), 0U) << "Const #" << i << " is not initialized"; - auto nid = const_idx_to_nid_[i]; - this->data_entry_[nid][0].CopyFrom(consts[i]); + this->data_entry_[const_idx_[i]].CopyFrom(consts[i]); } + + initialized_ = true; } void BuildEngine() { @@ -173,30 +143,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } else { LOG(FATAL) << "Unsupported op: " << op_name; } - } else if (node.GetOpType() == "const") { - auto name = node.GetOpName(); - bool found = false; - for (size_t cid = 0; cid < const_names_.size(); ++cid) { - if (name == const_names_[cid]) { - found = true; - const_idx_to_nid_[cid] = nid; - break; - } - } - if (!found) { - LOG(FATAL) << "Unrecognized constant node: " << name; - } - } - } - - this->is_const_input_.resize(this->input_nodes_.size()); - for (auto nid : this->input_nodes_) { - const auto& node = nodes_[nid]; - if (node.GetOpType() == "input") { - this->is_const_input_[nid] = false; - } else { - CHECK_EQ(node.GetOpType(), "const"); - this->is_const_input_[nid] = true; } } @@ -207,13 +153,12 @@ class DNNLJSONRuntime : public JSONRuntimeBase { for (size_t i = 0; i < this->input_nodes_.size(); ++i) { auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0]; auto nid = this->input_nodes_[i]; - this->data_entry_[nid][0] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); + this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); } for (size_t i = 0; i < this->outputs_.size(); ++i) { auto entry = this->outputs_[i]; auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; - this->data_entry_[entry.id_][entry.index_] = - NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); + this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); } } @@ -221,26 +166,28 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Bind a JSON graph node entry to a DNNL memory. dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc, size_t offset = 0) { - if (node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0) { + auto eid = EntryID(entry); + if (entry_out_mem_.count(eid) == 0) { return BindDNNLMemory(entry, dnnl::memory(mem_desc, engine_), offset); } - return node_out_mem_[entry.id_][entry.index_].first; + return entry_out_mem_[eid].first; } // Bind a JSON graph node entry to a given DNNL memory. dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory mem, size_t offset = 0) { + auto eid = EntryID(entry); // Since the DNNL memory has been created before calling this function, we assume the entry // has not yet been bind to the other DNNL memory; otherwise it may have memory leak. - CHECK(node_out_mem_.count(entry.id_) == 0 || node_out_mem_[entry.id_].count(entry.index_) == 0); + CHECK(entry_out_mem_.count(eid) == 0); // TODO: Support other data types (i.e., int8). auto data_node = nodes_[entry.id_]; auto dltype = data_node.GetOpDataType()[entry.index_]; CHECK_EQ(dltype.bits, 32); - node_out_mem_[entry.id_][entry.index_] = {mem, offset}; - return node_out_mem_[entry.id_][entry.index_].first; + entry_out_mem_[eid] = {mem, offset}; + return entry_out_mem_[eid].first; } void Conv2d(const size_t& nid, const bool has_relu = false, const bool has_bias = false) { @@ -541,19 +488,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::engine engine_; /* The dnnl stream. */ dnnl::stream stream_; - /* \brief A simple pool to map from node ID to the output tensors. */ - std::unordered_map> data_entry_; /* The network layers that are represented in dnnl primitives. */ std::vector net_; /* The memory that is consumed by arguments. */ std::vector> net_args_; - /* The node ID to its corresponding output memory. */ - std::unordered_map>> - node_out_mem_; - /* Indicate if an input node is a constant node. */ - std::vector is_const_input_; - /* Map from constant index to JSON constant node ID. */ - std::unordered_map const_idx_to_nid_; + /* The entry ID to its corresponding output memory. */ + std::unordered_map> entry_out_mem_; + /* Indicate if the DNNL engine has been initialized. */ + bool initialized_{false}; }; runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json, @@ -563,8 +505,7 @@ runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json, return runtime::Module(n); } -TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate") -.set_body_typed(DNNLJSONRuntimeCreate); +TVM_REGISTER_GLOBAL("runtime.DNNLJSONRuntimeCreate").set_body_typed(DNNLJSONRuntimeCreate); TVM_REGISTER_GLOBAL("runtime.module.loadbinary_dnnl_json") .set_body_typed(JSONRuntimeBase::LoadFromBinary); diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h index f4633ac9a693..2e5bddcd3a07 100644 --- a/src/runtime/json/json_runtime.h +++ b/src/runtime/json/json_runtime.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -113,14 +114,68 @@ class JSONRuntimeBase : public ModuleNode { } protected: + void SetInputs(const TVMArgs& args) { + CHECK_EQ(args.size(), input_var_idx_.size() + outputs_.size()) + << "Found mismatch in the number of provided data entryies and required."; + + for (size_t i = 0; i < input_var_idx_.size(); i++) { + auto eid = EntryID(input_var_idx_[i], 0); + CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle) + << "Expect NDArray or DLTensor as inputs"; + if (args[i].type_code() == kTVMDLTensorHandle) { + DLTensor* arg = args[i]; + this->data_entry_[eid].CopyFrom(arg); + } else { + // Zero copy for input because the tensor is managed by the host. + this->data_entry_[eid] = args[i]; + } + } + } + + void GetOutput(const TVMArgs& args) { + // Copy result to output buffer. + size_t arg_idx = input_var_idx_.size(); + CHECK_EQ(args.size(), arg_idx + outputs_.size()) + << "Found mismatch in the number of provided data entryies and required."; + + for (size_t i = 0; i < this->outputs_.size(); i++) { + auto eid = EntryID(outputs_[i]); + + if (args[arg_idx].type_code() == kTVMDLTensorHandle) { + DLTensor* arg = args[arg_idx]; + this->data_entry_[eid].CopyTo(arg); + } else { + NDArray arg = args[arg_idx]; + this->data_entry_[eid].CopyTo(arg); + } + arg_idx++; + } + } + void LoadGraph(const std::string& graph_json) { std::istringstream is(graph_json); dmlc::JSONReader reader(&is); this->Load(&reader); + std::vector consts; for (size_t i = 0; i < input_nodes_.size(); i++) { uint32_t nid = input_nodes_[i]; - std::string& name = nodes_[nid].name_; - input_map_[name] = i; + std::string name = nodes_[nid].name_; + if (nodes_[nid].op_type_ == "input") { + input_var_idx_.push_back(nid); + } else { + CHECK_EQ(nodes_[nid].op_type_, "const"); + auto pos = std::find(std::begin(const_names_), std::end(const_names_), name); + CHECK(pos != std::end(const_names_)) << "Found non-existent constant: " << name; + const_idx_.push_back(nid); + consts.push_back(name); + } + } + CHECK_EQ(consts.size(), const_names_.size()) + << "Found mismatch for the number of constants in the graph and required."; + + for (size_t i = 0; i < consts.size(); i++) { + CHECK_EQ(consts[i], const_names_[i]) + << "The position of constant in the graph must be the same as the required."; } } @@ -169,7 +224,9 @@ class JSONRuntimeBase : public ModuleNode { /*! \brief Data of that entry. */ std::vector data_entry_; /*! \brief Map the input name to index. */ - std::unordered_map input_map_; + std::vector input_var_idx_; + /*! \brief input const index. */ + std::vector const_idx_; }; } // namespace json From ededaf1bbe4e0c065a0fc540ebba85022b45956a Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Sun, 21 Jun 2020 01:12:07 +0000 Subject: [PATCH 15/30] GetDataSize --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 116 +++++++----------- src/runtime/json/json_runtime.h | 68 +++++++++- 2 files changed, 110 insertions(+), 74 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 3b3a320c03be..8f24c2a5054b 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -22,6 +22,7 @@ * \brief A simple JSON runtime for DNNL. */ +#include #include #include @@ -74,54 +75,59 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } } + void Init(const Array& consts) override { + BuildEngine(); + + CHECK_EQ(consts.size(), const_idx_.size()) + << "The number of input constants must match the number of required."; + + // Pre-allocate buffers on CPU for input and output entries. + DLContext ctx; + ctx.device_type = static_cast(kDLCPU); + ctx.device_id = 0; + AllocateInputOutputBuffer(ctx); + + // Setup constants entries for weights. + SetupConstants(consts); + + initialized_ = true; + } + void Run() override { // Fill in the input buffers. - for (size_t i = 0; i < this->input_nodes_.size(); ++i) { - auto nid = this->input_nodes_[i]; - auto eid = EntryID(nid, 0); + for (size_t i = 0; i < input_nodes_.size(); ++i) { + auto eid = EntryID(input_nodes_[i], 0); // TODO: Support other data lengths. - size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4; - write_to_dnnl_memory(this->data_entry_[nid]->data, this->entry_out_mem_[eid].first, - GetNDArraySize(this->data_entry_[eid]), offset_in_bytes); + size_t offset_in_bytes = entry_out_mem_[eid].second * 4; + size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->())); + write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size, + offset_in_bytes); } - // Invoke the engine. + // Invoke the engine through intepreting the stream. for (size_t i = 0; i < net_.size(); ++i) { net_.at(i).execute(stream_, net_args_.at(i)); } stream_.wait(); // Read output buffers. - for (size_t i = 0; i < this->outputs_.size(); ++i) { - auto out_entry = this->outputs_[i]; - auto eid = EntryID(out_entry); - size_t offset_in_bytes = this->entry_out_mem_[eid].second * 4; - read_from_dnnl_memory(this->data_entry_[eid]->data, this->entry_out_mem_[eid].first, - GetNDArraySize(this->data_entry_[eid]), offset_in_bytes); - } - } - - void Init(const Array& consts) override { - data_entry_.resize(NumEntries()); - BuildEngine(); - - CHECK_EQ(consts.size(), const_idx_.size()) - << "The number of input constants must match the number of required."; - - // Initialize consts - for (size_t i = 0; i < consts.size(); ++i) { - this->data_entry_[const_idx_[i]].CopyFrom(consts[i]); + for (size_t i = 0; i < outputs_.size(); ++i) { + auto eid = EntryID(outputs_[i]); + size_t offset_in_bytes = entry_out_mem_[eid].second * 4; + size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->())); + read_from_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size, + offset_in_bytes); } - - initialized_ = true; } + private: + // Build up the engine based on the input graph. void BuildEngine() { engine_ = dnnl::engine(dnnl::engine::kind::cpu, 0); stream_ = dnnl::stream(engine_); // Build subgraph engine. - for (size_t nid = 0; nid < this->nodes_.size(); ++nid) { + for (size_t nid = 0; nid < nodes_.size(); ++nid) { const auto& node = nodes_[nid]; if (node.GetOpType() == "kernel") { CHECK_EQ(node.GetOpType(), "kernel"); @@ -145,24 +151,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } } } - - // Initialize input/output entries. - DLContext ctx; - ctx.device_type = static_cast(1); - ctx.device_id = 0; - for (size_t i = 0; i < this->input_nodes_.size(); ++i) { - auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0]; - auto nid = this->input_nodes_[i]; - this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); - } - for (size_t i = 0; i < this->outputs_.size(); ++i) { - auto entry = this->outputs_[i]; - auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; - this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); - } } - private: // Bind a JSON graph node entry to a DNNL memory. dnnl::memory BindDNNLMemory(const JSONGraphNodeEntry& entry, dnnl::memory::desc mem_desc, size_t offset = 0) { @@ -191,14 +181,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } void Conv2d(const size_t& nid, const bool has_relu = false, const bool has_bias = false) { - auto node = this->nodes_[nid]; + auto node = nodes_[nid]; // Setup attributes. auto data_entry = node.GetInputs()[0]; auto weight_entry = node.GetInputs()[1]; - dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; - dnnl::memory::dims weight_shape = - this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; + dnnl::memory::dims input_shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dims weight_shape = nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; std::vector str_strides = node.GetAttr>("strides"); std::vector str_padding = node.GetAttr>("padding"); dnnl::memory::dim groups = std::stoi(node.GetAttr>("groups")[0]); @@ -292,14 +281,13 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } void Dense(const size_t& nid) { - auto node = this->nodes_[nid]; + auto node = nodes_[nid]; // Setup attributes. auto data_entry = node.GetInputs()[0]; auto weight_entry = node.GetInputs()[1]; - dnnl::memory::dims input_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; - dnnl::memory::dims weight_shape = - this->nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; + dnnl::memory::dims input_shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dims weight_shape = nodes_[weight_entry.id_].GetOpShape()[weight_entry.index_]; dnnl::memory::dim B = input_shape[0], // batch size IC = input_shape[1], // input channels @@ -340,14 +328,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } void BatchNorm(const size_t& nid) { - auto node = this->nodes_[nid]; + auto node = nodes_[nid]; auto data_entry = node.GetInputs()[0]; auto gamma_entry = node.GetInputs()[1]; auto beta_entry = node.GetInputs()[2]; auto mean_entry = node.GetInputs()[3]; auto variance_entry = node.GetInputs()[4]; - dnnl::memory::dims data_shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dims data_shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; dnnl::memory::dim IC = data_shape[1]; float epsilon = std::stof(node.GetAttr>("epsilon")[0]); @@ -382,10 +370,10 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } void Relu(const size_t& nid) { - auto node = this->nodes_[nid]; + auto node = nodes_[nid]; auto data_entry = node.GetInputs()[0]; - dnnl::memory::dims shape = this->nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; + dnnl::memory::dims shape = nodes_[data_entry.id_].GetOpShape()[data_entry.index_]; auto data_md = dnnl::memory::desc{{shape}, dt::f32, tag::abcd}; auto relu_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference, @@ -405,7 +393,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } void Add(const size_t& nid) { - auto node = this->nodes_[nid]; + auto node = nodes_[nid]; // Memory and compute description. std::vector data_dims; @@ -414,7 +402,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { CHECK_EQ(node.GetInputs().size(), 2U); for (auto entry : node.GetInputs()) { - auto data_shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; + auto data_shape = nodes_[entry.id_].GetOpShape()[entry.index_]; dnnl::memory::desc data_md = GenDNNLMemDescByShape(data_shape, dt::f32); data_dims.push_back(data_shape); @@ -474,16 +462,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { return data_md; } - // Calculate the size of a given NDArray in bytes. - inline size_t GetNDArraySize(const NDArray& arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= static_cast(arr->shape[i]); - } - size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8; - return size; - } - /* The dnnl engine. */ dnnl::engine engine_; /* The dnnl stream. */ diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h index 2e5bddcd3a07..8314dd9a2a36 100644 --- a/src/runtime/json/json_runtime.h +++ b/src/runtime/json/json_runtime.h @@ -114,6 +114,11 @@ class JSONRuntimeBase : public ModuleNode { } protected: + /*! + * \brief Set up the inputs for inference. + * + * \param args The packed args. + */ void SetInputs(const TVMArgs& args) { CHECK_EQ(args.size(), input_var_idx_.size() + outputs_.size()) << "Found mismatch in the number of provided data entryies and required."; @@ -122,36 +127,73 @@ class JSONRuntimeBase : public ModuleNode { auto eid = EntryID(input_var_idx_[i], 0); CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle) << "Expect NDArray or DLTensor as inputs"; + size_t to_size = GetDataSize(*(data_entry_[eid].operator->())); if (args[i].type_code() == kTVMDLTensorHandle) { DLTensor* arg = args[i]; - this->data_entry_[eid].CopyFrom(arg); + data_entry_[eid].CopyFrom(arg); } else { // Zero copy for input because the tensor is managed by the host. - this->data_entry_[eid] = args[i]; + NDArray arg = args[i]; + size_t from_size = GetDataSize(*(arg.operator->())); + CHECK_EQ(from_size, to_size); + if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) { + data_entry_[eid] = args[i]; + } else { + data_entry_[eid].CopyFrom(arg); + } } } } + /*! + * \brief Return the results through packed args. + * + * \param args The packed args. + */ void GetOutput(const TVMArgs& args) { // Copy result to output buffer. size_t arg_idx = input_var_idx_.size(); CHECK_EQ(args.size(), arg_idx + outputs_.size()) << "Found mismatch in the number of provided data entryies and required."; - for (size_t i = 0; i < this->outputs_.size(); i++) { + for (size_t i = 0; i < outputs_.size(); i++) { auto eid = EntryID(outputs_[i]); if (args[arg_idx].type_code() == kTVMDLTensorHandle) { DLTensor* arg = args[arg_idx]; - this->data_entry_[eid].CopyTo(arg); + data_entry_[eid].CopyTo(arg); } else { NDArray arg = args[arg_idx]; - this->data_entry_[eid].CopyTo(arg); + data_entry_[eid].CopyTo(arg); } arg_idx++; } } + /*! + * \brief Pre-allocate empty buffers for input and output entries. + * + * \param ctx The context for the pre-allocated buffer. + */ + void AllocateInputOutputBuffer(const DLContext& ctx) { + for (size_t i = 0; i < this->input_nodes_.size(); ++i) { + auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0]; + auto nid = this->input_nodes_[i]; + this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); + } + + for (size_t i = 0; i < this->outputs_.size(); ++i) { + auto entry = this->outputs_[i]; + auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; + this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); + } + } + + /*! + * \brief Load the graph and record the entries for inputs and constants. + * + * \param graph_json The graph in the json format. + */ void LoadGraph(const std::string& graph_json) { std::istringstream is(graph_json); dmlc::JSONReader reader(&is); @@ -177,8 +219,24 @@ class JSONRuntimeBase : public ModuleNode { CHECK_EQ(consts[i], const_names_[i]) << "The position of constant in the graph must be the same as the required."; } + + // Reserve data entries. + data_entry_.resize(NumEntries()); + } + + /*! + * \brief Set up the constants/weights for inference. + * + * \param consts The constant to be filled. + */ + void SetupConstants(const Array& consts) { + // Initialize consts + for (size_t i = 0; i < consts.size(); ++i) { + data_entry_[const_idx_[i]].CopyFrom(consts[i]); + } } + // Load the graph. void Load(dmlc::JSONReader* reader) { reader->BeginObject(); std::string key; From 8f9215a9568ed8b3515ddb370b02b48659044a6d Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Mon, 22 Jun 2020 23:55:41 +0000 Subject: [PATCH 16/30] fix dense bug --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 5 +++-- tests/python/relay/test_pass_partition_graph.py | 12 ++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 8f24c2a5054b..3a3425a8d52c 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -314,10 +314,11 @@ class DNNLJSONRuntime : public JSONRuntimeBase { net_.push_back(dense); // Memories. - std::vector bias(OC, 0); auto data_memory = BindDNNLMemory(data_entry, data_md); auto weight_memory = BindDNNLMemory(weight_entry, weight_md); - auto bias_memory = dnnl::memory(bias_md, engine_, bias.data()); + auto bias_memory = dnnl::memory(bias_md, engine_); + float bias[OC] = {0}; + write_to_dnnl_memory(bias, bias_memory, OC * sizeof(float)); JSONGraphNodeEntry out_entry(nid, 0); auto dst_memory = BindDNNLMemory(out_entry, dense_prim_desc.dst_desc()); diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 473ca9d66106..8dc5344b00be 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -462,21 +462,17 @@ def test_extern_dnnl_mobilenet(): dtype = 'float32' ishape = (1, 3, 224, 224) - mod, params = relay.testing.mobilenet.get_workload( - batch_size=1, dtype='float32') - - mod = transform.AnnotateTarget(["dnnl"])(mod) + ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype='float32') + mod = transform.AnnotateTarget(["dnnl"])(ref_mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) - ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, - dtype='float32') ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **params) + compile_engine.get().clear() - check_result(mod, {"data": i_data}, - (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params) + check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params) def test_function_lifting(): From 57a90fc22d4127a9ea19941d0163ece4d990b895 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Tue, 23 Jun 2020 00:52:35 +0000 Subject: [PATCH 17/30] improve cmake --- cmake/modules/contrib/DNNL.cmake | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake index ac03fbcc8dd1..8b863a0b764b 100644 --- a/cmake/modules/contrib/DNNL.cmake +++ b/cmake/modules/contrib/DNNL.cmake @@ -16,6 +16,10 @@ # under the License. if(USE_DNNL_CODEGEN STREQUAL "ON") + if(USE_JSON_RUNTIME STREQUAL "OFF") + message(FATAL_ERROR "USE_JSON_RUNTIME must be ON to use DNNL JSON RUNTIME") + endif() + file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc) file(GLOB JSON_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.h) list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) @@ -25,6 +29,16 @@ if(USE_DNNL_CODEGEN STREQUAL "ON") list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*) list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) - message(STATUS "Build with DNNL codegen: " ${EXTERN_LIBRARY_DNNL}) + message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL}) +elseif(USE_DNNL_CODEGEN STREQUAL "C_SRC") + add_definitions(-DDNNL_WITH_C_SOURCE_MODULE=1) + file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc) + list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) + + find_library(EXTERN_LIBRARY_DNNL dnnl) + list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) + file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*) + list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) + message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL}) endif() From cd843d1ee845e6ddd1558d2ed51c665eccf1dbdf Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Tue, 23 Jun 2020 15:40:27 +0000 Subject: [PATCH 18/30] zero copy --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 4 +- src/runtime/json/json_runtime.h | 66 +++++++++++-------- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 3a3425a8d52c..21d0dd66e2b2 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -99,7 +99,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto eid = EntryID(input_nodes_[i], 0); // TODO: Support other data lengths. size_t offset_in_bytes = entry_out_mem_[eid].second * 4; - size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->())); + size_t buffer_size = GetDataSize(*data_entry_[eid]); write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size, offset_in_bytes); } @@ -114,7 +114,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { for (size_t i = 0; i < outputs_.size(); ++i) { auto eid = EntryID(outputs_[i]); size_t offset_in_bytes = entry_out_mem_[eid].second * 4; - size_t buffer_size = GetDataSize(*(data_entry_[eid].operator->())); + size_t buffer_size = GetDataSize(*data_entry_[eid]); read_from_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size, offset_in_bytes); } diff --git a/src/runtime/json/json_runtime.h b/src/runtime/json/json_runtime.h index 8314dd9a2a36..96d569730c97 100644 --- a/src/runtime/json/json_runtime.h +++ b/src/runtime/json/json_runtime.h @@ -127,20 +127,24 @@ class JSONRuntimeBase : public ModuleNode { auto eid = EntryID(input_var_idx_[i], 0); CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle) << "Expect NDArray or DLTensor as inputs"; - size_t to_size = GetDataSize(*(data_entry_[eid].operator->())); - if (args[i].type_code() == kTVMDLTensorHandle) { - DLTensor* arg = args[i]; - data_entry_[eid].CopyFrom(arg); + + const DLTensor* arg; + if (args[i].IsObjectRef()) { + NDArray arr = args[i]; + arg = arr.operator->(); } else { + arg = args[i].operator DLTensor*(); + } + + size_t from_size = GetDataSize(*arg); + size_t to_size = GetDataSize(*data_entry_[eid]); + CHECK_EQ(from_size, to_size); + + if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) { // Zero copy for input because the tensor is managed by the host. - NDArray arg = args[i]; - size_t from_size = GetDataSize(*(arg.operator->())); - CHECK_EQ(from_size, to_size); - if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) { - data_entry_[eid] = args[i]; - } else { - data_entry_[eid].CopyFrom(arg); - } + data_entry_[eid]->data = arg->data; + } else { + NDArray::CopyFromTo(arg, data_entry_[eid]); } } } @@ -156,17 +160,17 @@ class JSONRuntimeBase : public ModuleNode { CHECK_EQ(args.size(), arg_idx + outputs_.size()) << "Found mismatch in the number of provided data entryies and required."; - for (size_t i = 0; i < outputs_.size(); i++) { + for (size_t i = 0; i < outputs_.size(); i++, arg_idx++) { auto eid = EntryID(outputs_[i]); if (args[arg_idx].type_code() == kTVMDLTensorHandle) { DLTensor* arg = args[arg_idx]; - data_entry_[eid].CopyTo(arg); + NDArray::CopyFromTo(data_entry_[eid], arg); } else { + CHECK(args[arg_idx].IsObjectRef()); NDArray arg = args[arg_idx]; - data_entry_[eid].CopyTo(arg); + arg.CopyFrom(data_entry_[eid]); } - arg_idx++; } } @@ -176,16 +180,26 @@ class JSONRuntimeBase : public ModuleNode { * \param ctx The context for the pre-allocated buffer. */ void AllocateInputOutputBuffer(const DLContext& ctx) { - for (size_t i = 0; i < this->input_nodes_.size(); ++i) { - auto shape = this->nodes_[this->input_nodes_[i]].GetOpShape()[0]; - auto nid = this->input_nodes_[i]; - this->data_entry_[EntryID(nid, 0)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); + for (size_t i = 0; i < input_nodes_.size(); ++i) { + auto nid = input_nodes_[i]; + auto shape = nodes_[nid].GetOpShape()[0]; + auto dtype = nodes_[nid].GetOpDataType()[0]; + DLTensor* tensor; + int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes, + ctx.device_type, ctx.device_id, &tensor); + CHECK_EQ(ret, 0) << TVMGetLastError(); + data_entry_[EntryID(nid, 0)] = tensor; } - for (size_t i = 0; i < this->outputs_.size(); ++i) { - auto entry = this->outputs_[i]; - auto shape = this->nodes_[entry.id_].GetOpShape()[entry.index_]; - this->data_entry_[EntryID(entry)] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx); + for (size_t i = 0; i < outputs_.size(); ++i) { + auto entry = outputs_[i]; + auto shape = nodes_[entry.id_].GetOpShape()[entry.index_]; + auto dtype = nodes_[entry.id_].GetOpDataType()[entry.index_]; + DLTensor* tensor; + int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes, + ctx.device_type, ctx.device_id, &tensor); + CHECK_EQ(ret, 0) << TVMGetLastError(); + data_entry_[EntryID(entry)] = tensor; } } @@ -232,7 +246,7 @@ class JSONRuntimeBase : public ModuleNode { void SetupConstants(const Array& consts) { // Initialize consts for (size_t i = 0; i < consts.size(); ++i) { - data_entry_[const_idx_[i]].CopyFrom(consts[i]); + consts[i].CopyTo(data_entry_[const_idx_[i]]); } } @@ -280,7 +294,7 @@ class JSONRuntimeBase : public ModuleNode { /*! \brief Output entries. */ std::vector outputs_; /*! \brief Data of that entry. */ - std::vector data_entry_; + std::vector data_entry_; /*! \brief Map the input name to index. */ std::vector input_var_idx_; /*! \brief input const index. */ From d8929a67f9e308336cc8d6e6aa241fb78a131ef8 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Tue, 23 Jun 2020 19:11:13 +0000 Subject: [PATCH 19/30] add unit test --- tests/python/relay/test_json_runtime.py | 61 ++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index cdba3c92a69f..26a418864d3a 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -95,6 +95,7 @@ def check_graph_runtime_result(): def test_conv2d(): + """Test a subgraph with a single conv2d operator.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -168,6 +169,7 @@ def group_conv2d(): def test_add(): + """Test a subgraph with a single add operator.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -208,6 +210,7 @@ def gen_add(): def test_relu(): + """Test a subgraph with a single ReLU operator.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -244,6 +247,7 @@ def gen_relu(): def test_dense(): + """Test a subgraph with a single dense operator.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -285,6 +289,7 @@ def gen_dense(): def test_bn(): + """Test a subgraph with a single batch_norm operator.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -350,6 +355,7 @@ def gen_bn(): def test_multiple_ops(): + """Test a subgraph with multiple operators.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -405,6 +411,7 @@ def get_partitoned_mod(mod): def test_composite(): + """Test DNNL patterns and there composite functions.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -510,6 +517,7 @@ def conv2d_bias_relu(): def test_constant(): + """Test the subgraph with (var, const, ...) arguments.""" if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return @@ -548,14 +556,62 @@ def test_constant(): transform.PartitionGraph() ]) - with tvm.transform.PassContext(opt_level=3, - disabled_pass=["AlterOpLayout"]): + with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): ref_mod = remove_bn_pass(ref_mod) mod = composite_partition(ref_mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, {'data': i_data}, (1, 32, 14, 14), tol=1e-5) +def test_partial_constant(): + """Test the subgraph with (const, var, const, var) arguments.""" + if not tvm.get_global_func("relay.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + + dtype = 'float32' + ishape = (10, 10) + + in_1 = relay.var("in_1", shape=ishape, dtype=dtype) + in_2 = relay.var("in_2", shape=ishape, dtype=dtype) + in_3 = relay.var("in_3", shape=ishape, dtype=dtype) + in_4 = relay.var("in_4", shape=ishape, dtype=dtype) + + add1 = relay.add(in_1, in_2) + add2 = relay.add(add1, in_3) + add3 = relay.add(add2, in_3) + add4 = relay.add(add3, in_3) + + func = relay.Function([in_1, in_2, in_3, in_4], add4) + ref_mod = tvm.IRModule.from_expr(func) + ref_mod = relay.transform.InferType()(ref_mod) + + data1 = np.random.uniform(0, 1, ishape).astype(dtype) + data3 = np.random.uniform(0, 1, ishape).astype(dtype) + + params = { + 'in_1': tvm.nd.array(data1, ctx=tvm.cpu(0)), + 'in_3': tvm.nd.array(data3, ctx=tvm.cpu(0)) + } + ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) + + opt_pass = tvm.transform.Sequential([ + transform.InferType(), + transform.SimplifyInference(), + transform.FoldConstant(), + transform.FoldScaleAxis(), + transform.AnnotateTarget("dnnl"), + transform.MergeCompilerRegions(), + transform.PartitionGraph() + ]) + + with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): + mod = opt_pass(ref_mod) + + data2 = np.random.uniform(0, 1, ishape).astype(dtype) + data4 = np.random.uniform(0, 1, ishape).astype(dtype) + check_result(mod, ref_mod, {'in_2': data2, 'in_4': data4}, (10, 10), tol=1e-5) + if __name__ == "__main__": test_conv2d() @@ -566,3 +622,4 @@ def test_constant(): test_multiple_ops() test_composite() test_constant() + test_partial_constant() From 5e34210a24f4fd70f02998192c5fb36e25089e72 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Tue, 23 Jun 2020 23:50:05 +0000 Subject: [PATCH 20/30] move json to contrib/json --- cmake/modules/JSON.cmake | 2 +- .../contrib/codegen_json/codegen_json.h | 4 +- src/relay/backend/contrib/dnnl/codegen.cc | 2 +- src/relay/backend/graph_runtime_codegen.cc | 66 +++++++++++++++++- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 9 +-- src/runtime/{ => contrib}/json/json_node.h | 69 ------------------- src/runtime/{ => contrib}/json/json_runtime.h | 0 7 files changed, 69 insertions(+), 83 deletions(-) rename src/runtime/{ => contrib}/json/json_node.h (78%) rename src/runtime/{ => contrib}/json/json_runtime.h (100%) diff --git a/cmake/modules/JSON.cmake b/cmake/modules/JSON.cmake index bfc07bb1b13a..16a6e59697bc 100644 --- a/cmake/modules/JSON.cmake +++ b/cmake/modules/JSON.cmake @@ -17,6 +17,6 @@ if(USE_JSON_RUNTIME) message(STATUS "Build with JSON runtime support") - file(GLOB RUNTIME_JSON_SRCS src/runtime/json/*.cc) + file(GLOB RUNTIME_JSON_SRCS src/runtime/contrib/json/*.cc) list(APPEND RUNTIME_SRCS ${RUNTIME_JSON_SRCS}) endif(USE_JSON_RUNTIME) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 97dc2b333ca5..1cc26014501b 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -35,8 +35,8 @@ #include #include -#include "../../../../runtime/json/json_node.h" -#include "../../../../runtime/json/json_runtime.h" +#include "../../../../runtime/contrib/json/json_node.h" +#include "../../../../runtime/contrib/json/json_runtime.h" #include "../../utils.h" namespace tvm { diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 67dba2969923..c5dc6a520f14 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -33,7 +33,7 @@ #include #include -#include "../../../../runtime/json/json_node.h" +#include "../../../../runtime/contrib/json/json_node.h" #include "../../utils.h" #include "../codegen_c/codegen_c.h" #include "../codegen_json/codegen_json.h" diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index 16f95a1b79df..b5024d55633c 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -28,15 +28,12 @@ #include #include -#include -#include #include #include #include #include "compile_engine.h" #include "utils.h" -#include "../../runtime/json/json_node.h" namespace tvm { namespace relay { @@ -628,6 +625,12 @@ TVM_REGISTER_GLOBAL("relay.build_module._GraphRuntimeCodegen") namespace dmlc { namespace json { +// JSON utils +template +inline bool SameType(const dmlc::any& data) { + return std::type_index(data.type()) == std::type_index(typeid(T)); +} + template <> struct Handler> { inline static void Write(dmlc::JSONWriter* writer, @@ -639,5 +642,62 @@ struct Handler> { LOG(FATAL) << "Not implemented."; } }; +template <> +struct Handler> { + inline static void Write(dmlc::JSONWriter* writer, + const std::unordered_map& data) { + writer->BeginObject(); + for (const auto& kv : data) { + auto k = kv.first; + const dmlc::any& v = kv.second; + if (SameType(v)) { + writer->WriteObjectKeyValue(k, dmlc::get(v)); + } else if (SameType(v)) { + writer->WriteObjectKeyValue(k, dmlc::get(v)); + } else if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else if (SameType>>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>>(v)); + } else if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else { + LOG(FATAL) << "Not supported"; + } + } + writer->EndObject(); + } + inline static void Read(dmlc::JSONReader* reader, + std::unordered_map* data) { + LOG(FATAL) << "Not implemented."; + } +}; + +template <> +struct Handler> { + inline static void Write(dmlc::JSONWriter* writer, const std::vector& data) { + writer->BeginArray(); + for (const auto& v : data) { + if (SameType(v)) { + writer->WriteArrayItem(dmlc::get(v)); + } else if (SameType(v)) { + writer->WriteArrayItem(dmlc::get(v)); + } else if (SameType>(v)) { + writer->WriteArrayItem(dmlc::get>(v)); + } else if (SameType>>(v)) { + writer->WriteArrayItem(dmlc::get>>(v)); + } else if (SameType>(v)) { + writer->WriteArrayItem(dmlc::get>(v)); + } else { + LOG(FATAL) << "Not supported"; + } + } + writer->EndArray(); + } + inline static void Read(dmlc::JSONReader* reader, std::vector* data) { + LOG(FATAL) << "Not implemented."; + } +}; } // namespace json } // namespace dmlc diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 21d0dd66e2b2..bd425439bd2c 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -29,8 +29,8 @@ #include #include -#include "../../json/json_node.h" -#include "../../json/json_runtime.h" +#include "../json/json_node.h" +#include "../json/json_runtime.h" #include "dnnl.hpp" namespace tvm { @@ -207,11 +207,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { SW = std::stoi(str_strides[0]), // weight-wise stride OH = (IH - KH + PH_L + PH_R) / SH + 1, // output height OW = (IW - KW + PW_L + PW_R) / SW + 1; // output width - // std::cerr << N << ", " << IC << ", " << IH << ", " << IW << "\n"; - // std::cerr << OC << ", " << IC << ", " << KH << ", " << KW << "\n"; - // std::cerr << PH_L << ", " << PH_R << ", " << PW_L << ", " << PW_R << "\n"; - // std::cerr << SH << ", " << SW << "\n"; - // std::cerr << OH << ", " << OW << "\n"; // Memory shapes. dnnl::memory::dims src_dims = {N, IC, IH, IW}; diff --git a/src/runtime/json/json_node.h b/src/runtime/contrib/json/json_node.h similarity index 78% rename from src/runtime/json/json_node.h rename to src/runtime/contrib/json/json_node.h index fd45ab226e19..035394078bd2 100644 --- a/src/runtime/json/json_node.h +++ b/src/runtime/contrib/json/json_node.h @@ -317,12 +317,6 @@ class JSONGraphNode { namespace dmlc { namespace json { -// JSON utils -template -inline bool SameType(const dmlc::any& data) { - return std::type_index(data.type()) == std::type_index(typeid(T)); -} - template <> struct Handler> { inline static void Write( @@ -336,69 +330,6 @@ struct Handler> { (*data)->Load(reader); } }; - -template <> -struct Handler> { - inline static void Write(dmlc::JSONWriter* writer, - const std::vector& data) { - writer->BeginArray(); - for (const auto& v : data) { - if (SameType(v)) { - writer->WriteArrayItem(dmlc::get(v)); - } else if (SameType(v)) { - writer->WriteArrayItem(dmlc::get(v)); - } else if (SameType>(v)) { - writer->WriteArrayItem(dmlc::get>(v)); - } else if (SameType>>(v)) { - writer->WriteArrayItem(dmlc::get>>(v)); - } else if (SameType>(v)) { - writer->WriteArrayItem(dmlc::get>(v)); - } else { - LOG(FATAL) << "Not supported"; - } - } - writer->EndArray(); - } - - inline static void Read(dmlc::JSONReader* reader, - std::vector* data) { - LOG(FATAL) << "Not implemented."; - } -}; - -template <> -struct Handler> { - inline static void Write(dmlc::JSONWriter* writer, - const std::unordered_map& data) { - writer->BeginObject(); - for (const auto& kv : data) { - auto k = kv.first; - const dmlc::any& v = kv.second; - if (SameType(v)) { - writer->WriteObjectKeyValue(k, dmlc::get(v)); - } else if (SameType(v)) { - writer->WriteObjectKeyValue(k, dmlc::get(v)); - } else if (SameType>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>(v)); - } else if (SameType>>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>>(v)); - } else if (SameType>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>(v)); - } else if (SameType>(v)) { - writer->WriteObjectKeyValue(k, dmlc::get>(v)); - } else { - LOG(FATAL) << "Not supported"; - } - } - writer->EndObject(); - } - - inline static void Read(dmlc::JSONReader* reader, - std::unordered_map* data) { - LOG(FATAL) << "Not implemented."; - } -}; - } // namespace json } // namespace dmlc diff --git a/src/runtime/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h similarity index 100% rename from src/runtime/json/json_runtime.h rename to src/runtime/contrib/json/json_runtime.h From ade0d27d37c44a7e2f73029fe044d2f9ccfba2d8 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Wed, 24 Jun 2020 01:02:09 +0000 Subject: [PATCH 21/30] fix cmake --- CMakeLists.txt | 2 -- cmake/modules/JSON.cmake | 22 ------------ cmake/modules/contrib/DNNL.cmake | 13 +++---- .../contrib/codegen_json/codegen_json.h | 36 ++++++------------- src/relay/backend/contrib/dnnl/codegen.cc | 24 +++++++++---- src/runtime/contrib/json/json_node.h | 25 ++++++------- src/runtime/contrib/json/json_runtime.h | 6 ++-- tests/python/relay/test_json_runtime.py | 18 +++++----- 8 files changed, 55 insertions(+), 91 deletions(-) delete mode 100644 cmake/modules/JSON.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c0fa1cbf4230..aaddebdfe3c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,6 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF) tvm_option(USE_TFLITE "Build with tflite support" OFF) tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none) tvm_option(USE_COREML "Build with coreml support" OFF) -tvm_option(USE_JSON_RUNTIME "Build with JSON runtime" OFF) if(USE_CPP_RPC AND UNIX) message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.") @@ -306,7 +305,6 @@ if(USE_EXAMPLE_EXT_RUNTIME) endif(USE_EXAMPLE_EXT_RUNTIME) # Module rules -include(cmake/modules/JSON.cmake) include(cmake/modules/VTA.cmake) include(cmake/modules/CUDA.cmake) include(cmake/modules/Hexagon.cmake) diff --git a/cmake/modules/JSON.cmake b/cmake/modules/JSON.cmake deleted file mode 100644 index 16a6e59697bc..000000000000 --- a/cmake/modules/JSON.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if(USE_JSON_RUNTIME) - message(STATUS "Build with JSON runtime support") - file(GLOB RUNTIME_JSON_SRCS src/runtime/contrib/json/*.cc) - list(APPEND RUNTIME_SRCS ${RUNTIME_JSON_SRCS}) -endif(USE_JSON_RUNTIME) diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake index 8b863a0b764b..85a6bea0ede1 100644 --- a/cmake/modules/contrib/DNNL.cmake +++ b/cmake/modules/contrib/DNNL.cmake @@ -15,29 +15,24 @@ # specific language governing permissions and limitations # under the License. -if(USE_DNNL_CODEGEN STREQUAL "ON") - if(USE_JSON_RUNTIME STREQUAL "OFF") - message(FATAL_ERROR "USE_JSON_RUNTIME must be ON to use DNNL JSON RUNTIME") - endif() - +if((USE_DNNL_CODEGEN STREQUAL "ON") OR (USE_DNNL_CODEGEN STREQUAL "JSON")) + add_definitions(-DUSE_JSON_RUNTIME=1) file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc) - file(GLOB JSON_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_json/*.h) list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) list(APPEND COMPILER_SRCS ${JSON_RELAY_CONTRIB_SRC}) find_library(EXTERN_LIBRARY_DNNL dnnl) list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) - file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*) + file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc) list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL}) elseif(USE_DNNL_CODEGEN STREQUAL "C_SRC") - add_definitions(-DDNNL_WITH_C_SOURCE_MODULE=1) file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc) list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) find_library(EXTERN_LIBRARY_DNNL dnnl) list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) - file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*) + file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc) list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL}) endif() diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 1cc26014501b..8300cbedb38e 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -26,10 +26,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -46,7 +46,7 @@ namespace contrib { using namespace tvm::runtime::json; -using ShapeVector = std::vector >; +using ShapeVector = std::vector>; using TypeVector = std::vector; using JSONGraphObjectPtr = std::shared_ptr; @@ -58,8 +58,7 @@ class OpAttrExtractor : public AttrVisitor { public: explicit OpAttrExtractor(JSONGraphObjectPtr node) : node_(node) {} - template ::value>> + template ::value>> std::string Fp2String(const T value, int n = 16) { std::ostringstream out; out.precision(n); @@ -73,29 +72,17 @@ class OpAttrExtractor : public AttrVisitor { node_->SetAttr(key, attr); } - void Visit(const char* key, double* value) final { - SetNodeAttr(key, {Fp2String(*value)}); - } + void Visit(const char* key, double* value) final { SetNodeAttr(key, {Fp2String(*value)}); } - void Visit(const char* key, int64_t* value) final { - SetNodeAttr(key, {std::to_string(*value)}); - } + void Visit(const char* key, int64_t* value) final { SetNodeAttr(key, {std::to_string(*value)}); } - void Visit(const char* key, uint64_t* value) final { - SetNodeAttr(key, {std::to_string(*value)}); - } + void Visit(const char* key, uint64_t* value) final { SetNodeAttr(key, {std::to_string(*value)}); } - void Visit(const char* key, int* value) final { - SetNodeAttr(key, {std::to_string(*value)}); - } + void Visit(const char* key, int* value) final { SetNodeAttr(key, {std::to_string(*value)}); } - void Visit(const char* key, bool* value) final { - SetNodeAttr(key, {std::to_string(*value)}); - } + void Visit(const char* key, bool* value) final { SetNodeAttr(key, {std::to_string(*value)}); } - void Visit(const char* key, std::string* value) final { - SetNodeAttr(key, {*value}); - } + void Visit(const char* key, std::string* value) final { SetNodeAttr(key, {*value}); } void Visit(const char* key, DataType* value) final { if (!value->is_void()) { @@ -292,10 +279,9 @@ class JSONSerializer : public MemoizedExprTranslator(name, /* name_ */ + auto node = std::make_shared(name, /* name_ */ "kernel", /* op_type_ */ - inputs, - 1 /* num_outputs_ */); + inputs, 1 /* num_outputs_ */); SetCallNodeAttribute(node, cn); return AddNode(node, GetRef(cn)); } diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index c5dc6a520f14..9d4041af6395 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -33,18 +33,22 @@ #include #include -#include "../../../../runtime/contrib/json/json_node.h" #include "../../utils.h" -#include "../codegen_c/codegen_c.h" + +#ifdef USE_JSON_RUNTIME +#include "../../../../runtime/contrib/json/json_node.h" #include "../codegen_json/codegen_json.h" +#else +#include "../codegen_c/codegen_c.h" +#endif namespace tvm { namespace relay { namespace contrib { using namespace backend; -using namespace tvm::runtime::json; +#ifndef USE_JSON_RUNTIME // C source runtime inline size_t GetShape1DSize(const Type& type) { const auto shape = GetShape(type); return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); @@ -410,7 +414,6 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { const auto* pf = runtime::Registry::Get("runtime.CSourceModuleCreate"); CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module"; return (*pf)(code, "c", sym, variables); - std::cout << code_stream_.str(); } private: @@ -421,7 +424,12 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { std::ostringstream code_stream_; }; +#else // DNNL JSON runtime + class DNNLJSONSerializer : public backend::contrib::JSONSerializer { + using JSONGraphNode = tvm::runtime::json::JSONGraphNode; + using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry; + public: DNNLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {} @@ -473,14 +481,14 @@ std::string GetExtSymbol(const Function& func) { CHECK(name_node.defined()) << "Fail to retrieve external symbol."; return std::string(name_node.value()); } +#endif /*! * \brief The external compiler/codegen tool. It takes a Relay expression/module and * compile it into a runtime module. */ runtime::Module DNNLCompiler(const ObjectRef& ref) { - // DNNLModuleCodegen dnnl; - // return dnnl.CreateCSourceModule(ref); +#ifdef USE_JSON_RUNTIME CHECK(ref->IsInstance()); auto func = Downcast(ref); auto func_name = GetExtSymbol(func); @@ -493,6 +501,10 @@ runtime::Module DNNLCompiler(const ObjectRef& ref) { CHECK(pf != nullptr) << "Cannot find JSON runtime module to create"; auto mod = (*pf)(func_name, graph_json, params); return mod; +#else + DNNLModuleCodegen dnnl; + return dnnl.CreateCSourceModule(ref); +#endif } TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler); diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h index 035394078bd2..6bae5c5d6bf2 100644 --- a/src/runtime/contrib/json/json_node.h +++ b/src/runtime/contrib/json/json_node.h @@ -22,8 +22,8 @@ * \brief The graph nodes used by JSON runtime. */ -#ifndef TVM_RUNTIME_JSON_JSON_NODE_H_ -#define TVM_RUNTIME_JSON_JSON_NODE_H_ +#ifndef TVM_RUNTIME_CONTRIB_JSON_JSON_NODE_H_ +#define TVM_RUNTIME_CONTRIB_JSON_JSON_NODE_H_ #include #include @@ -32,10 +32,10 @@ #include #include +#include #include #include #include -#include namespace tvm { namespace runtime { @@ -52,7 +52,7 @@ class JSONGraphNodeEntry { // Constructors. JSONGraphNodeEntry() = default; JSONGraphNodeEntry(int id, int index, int version = 0) - : id_(id), index_(index), version_(version) {} + : id_(id), index_(index), version_(version) {} /*! * \brief Serialize a node entry. @@ -99,10 +99,8 @@ class JSONGraphNode { public: // Constructors. JSONGraphNode() = default; - JSONGraphNode(const std::string& name, - const std::string& op_type, - const std::vector& inputs = {}, - size_t num_outputs = 1) { + JSONGraphNode(const std::string& name, const std::string& op_type, + const std::vector& inputs = {}, size_t num_outputs = 1) { name_ = name; op_type_ = op_type; num_inputs_ = inputs.size(); @@ -245,9 +243,7 @@ class JSONGraphNode { * * \param num_outputs The number of output. */ - void SetNumOutput(uint32_t num_outputs) { - num_outputs_ = num_outputs; - } + void SetNumOutput(uint32_t num_outputs) { num_outputs_ = num_outputs; } /*! * \brief Get the value of an attribute in the node. @@ -319,9 +315,8 @@ namespace dmlc { namespace json { template <> struct Handler> { - inline static void Write( - dmlc::JSONWriter* writer, - const std::shared_ptr& data) { + inline static void Write(dmlc::JSONWriter* writer, + const std::shared_ptr& data) { data->Save(writer); } @@ -333,4 +328,4 @@ struct Handler> { } // namespace json } // namespace dmlc -#endif // TVM_RUNTIME_JSON_JSON_NODE_H_ +#endif // TVM_RUNTIME_CONTRIB_JSON_JSON_NODE_H_ diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h index 96d569730c97..dec9abf378f9 100644 --- a/src/runtime/contrib/json/json_runtime.h +++ b/src/runtime/contrib/json/json_runtime.h @@ -22,8 +22,8 @@ * \brief Utilities for json runtime. */ -#ifndef TVM_RUNTIME_JSON_JSON_RUNTIME_H_ -#define TVM_RUNTIME_JSON_JSON_RUNTIME_H_ +#ifndef TVM_RUNTIME_CONTRIB_JSON_JSON_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_JSON_JSON_RUNTIME_H_ #include #include @@ -304,4 +304,4 @@ class JSONRuntimeBase : public ModuleNode { } // namespace json } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_JSON_JSON_RUNTIME_H_ +#endif // TVM_RUNTIME_CONTRIB_JSON_JSON_RUNTIME_H_ diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index 26a418864d3a..d15468c6a942 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -96,7 +96,7 @@ def check_graph_runtime_result(): def test_conv2d(): """Test a subgraph with a single conv2d operator.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -170,7 +170,7 @@ def group_conv2d(): def test_add(): """Test a subgraph with a single add operator.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -211,7 +211,7 @@ def gen_add(): def test_relu(): """Test a subgraph with a single ReLU operator.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -248,7 +248,7 @@ def gen_relu(): def test_dense(): """Test a subgraph with a single dense operator.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -290,7 +290,7 @@ def gen_dense(): def test_bn(): """Test a subgraph with a single batch_norm operator.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -356,7 +356,7 @@ def gen_bn(): def test_multiple_ops(): """Test a subgraph with multiple operators.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -412,7 +412,7 @@ def get_partitoned_mod(mod): def test_composite(): """Test DNNL patterns and there composite functions.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -518,7 +518,7 @@ def conv2d_bias_relu(): def test_constant(): """Test the subgraph with (var, const, ...) arguments.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return @@ -565,7 +565,7 @@ def test_constant(): def test_partial_constant(): """Test the subgraph with (const, var, const, var) arguments.""" - if not tvm.get_global_func("relay.ext.dnnl", True): + if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return From 3ec8b841dac0950de578c673204eef9572459fa1 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Wed, 24 Jun 2020 16:14:28 +0000 Subject: [PATCH 22/30] lint --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index bd425439bd2c..ce422c3543d3 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -97,7 +97,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Fill in the input buffers. for (size_t i = 0; i < input_nodes_.size(); ++i) { auto eid = EntryID(input_nodes_[i], 0); - // TODO: Support other data lengths. + // TODO(@comanic): Support other data lengths. size_t offset_in_bytes = entry_out_mem_[eid].second * 4; size_t buffer_size = GetDataSize(*data_entry_[eid]); write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size, @@ -169,9 +169,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto eid = EntryID(entry); // Since the DNNL memory has been created before calling this function, we assume the entry // has not yet been bind to the other DNNL memory; otherwise it may have memory leak. - CHECK(entry_out_mem_.count(eid) == 0); + CHECK_EQ(entry_out_mem_.count(eid), 0); - // TODO: Support other data types (i.e., int8). + // TODO(@comanic): Support other data types (i.e., int8). auto data_node = nodes_[entry.id_]; auto dltype = data_node.GetOpDataType()[entry.index_]; CHECK_EQ(dltype.bits, 32); @@ -425,14 +425,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase { inline void read_from_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size, size_t offset = 0) { uint8_t* src = static_cast(mem.get_data_handle()); - std::copy(src + offset, src + offset + size, (uint8_t*)handle); + std::copy(src + offset, src + offset + size, static_cast(handle)); } // Read from the handle and write to DNNL memory (+offset). - inline void write_to_dnnl_memory(void* handle, dnnl::memory& mem, size_t size, + inline void write_to_dnnl_memory(void* handle, const dnnl::memory& mem, size_t size, size_t offset = 0) { uint8_t* dst = static_cast(mem.get_data_handle()); - std::copy((uint8_t*)handle, (uint8_t*)handle + size, dst + offset); + std::copy(reinterpret_cast(handle), reinterpret_cast(handle) + size, + dst + offset); } // Generate DNNL memory description and infer the data layout by the given shape. From 8535921c80cb5c1e6ee5afa057668197d2f452e0 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Wed, 24 Jun 2020 16:38:24 +0000 Subject: [PATCH 23/30] max_digits10 for fp serialization --- src/relay/backend/contrib/codegen_json/codegen_json.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 8300cbedb38e..1e643e561613 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -59,9 +60,9 @@ class OpAttrExtractor : public AttrVisitor { explicit OpAttrExtractor(JSONGraphObjectPtr node) : node_(node) {} template ::value>> - std::string Fp2String(const T value, int n = 16) { + std::string Fp2String(const T value) { std::ostringstream out; - out.precision(n); + out.precision(std::numeric_limits::max_digits10); out << value; return out.str(); } From 20786071455798d58557903886e662107ea0e380 Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Wed, 24 Jun 2020 16:55:55 +0000 Subject: [PATCH 24/30] only keep base getfunction --- .../contrib/codegen_json/codegen_json.h | 2 +- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 28 ------------------- src/runtime/contrib/json/json_runtime.h | 21 ++++++++++++++ 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 1e643e561613..bb8ea0c6162e 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -119,7 +119,7 @@ class OpAttrExtractor : public AttrVisitor { String s = GetRef(str); SetNodeAttr(key, std::vector{s.operator std::string()}); } else { - LOG(FATAL) << "Not yet supprted type: " << (*value)->GetTypeKey() << ": " << *value; + LOG(FATAL) << "Not yet supported type: " << (*value)->GetTypeKey() << ": " << *value; } } diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index ce422c3543d3..87eb74490dc3 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -51,30 +51,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { const char* type_key() const { return "dnnl_json"; } - PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { - if (this->symbol_name_ == name) { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - CHECK(this->initialized_) << "The module has not been initialized"; - - // Set inputs. - SetInputs(args); - // Execute the subgraph. - this->Run(); - // Copy result to output buffer. - GetOutput(args); - }); - } else if ("__init_" + this->symbol_name_ == name) { - // The function to initialize constant tensors. - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - CHECK_EQ(args.size(), 1U); - this->Init(args[0]); - *rv = 0; - }); - } else { - return JSONRuntimeBase::GetFunction(name, sptr_to_self); - } - } - void Init(const Array& consts) override { BuildEngine(); @@ -89,8 +65,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Setup constants entries for weights. SetupConstants(consts); - - initialized_ = true; } void Run() override { @@ -469,8 +443,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { std::vector> net_args_; /* The entry ID to its corresponding output memory. */ std::unordered_map> entry_out_mem_; - /* Indicate if the DNNL engine has been initialized. */ - bool initialized_{false}; }; runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json, diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h index dec9abf378f9..d716929478f7 100644 --- a/src/runtime/contrib/json/json_runtime.h +++ b/src/runtime/contrib/json/json_runtime.h @@ -76,6 +76,25 @@ class JSONRuntimeBase : public ModuleNode { } else if (name == "get_const_vars") { return PackedFunc( [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; }); + } else if (this->symbol_name_ == name) { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK(this->initialized_) << "The module has not been initialized"; + + // Set inputs. + this->SetInputs(args); + // Execute the subgraph. + this->Run(); + // Copy result to output buffer. + this->GetOutput(args); + }); + } else if ("__init_" + this->symbol_name_ == name) { + // The function to initialize constant tensors. + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK_EQ(args.size(), 1U); + this->Init(args[0]); + this->initialized_ = true; + *rv = 0; + }); } else { return PackedFunc(nullptr); } @@ -299,6 +318,8 @@ class JSONRuntimeBase : public ModuleNode { std::vector input_var_idx_; /*! \brief input const index. */ std::vector const_idx_; + /* Indicate if the engine has been initialized. */ + bool initialized_{false}; }; } // namespace json From 7a78ff8c9ae054fe936731a4fa3158b404a29778 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Wed, 24 Jun 2020 18:11:45 +0000 Subject: [PATCH 25/30] fix lint --- src/relay/backend/contrib/codegen_json/codegen_json.h | 1 + src/runtime/contrib/json/json_node.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index bb8ea0c6162e..b9cba3656d80 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -33,6 +33,7 @@ #include #include +#include #include #include diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h index 6bae5c5d6bf2..53c36b383ea2 100644 --- a/src/runtime/contrib/json/json_node.h +++ b/src/runtime/contrib/json/json_node.h @@ -32,6 +32,7 @@ #include #include +#include #include #include #include From c5af62d800f8db1f5b39ba752dc3cec5fc5175eb Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Thu, 25 Jun 2020 00:26:51 +0000 Subject: [PATCH 26/30] zero copy for all data entries --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 8 +- src/runtime/contrib/json/json_runtime.h | 93 ++++--------------- 2 files changed, 17 insertions(+), 84 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 87eb74490dc3..c2747d145857 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -57,12 +57,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { CHECK_EQ(consts.size(), const_idx_.size()) << "The number of input constants must match the number of required."; - // Pre-allocate buffers on CPU for input and output entries. - DLContext ctx; - ctx.device_type = static_cast(kDLCPU); - ctx.device_id = 0; - AllocateInputOutputBuffer(ctx); - // Setup constants entries for weights. SetupConstants(consts); } @@ -71,7 +65,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Fill in the input buffers. for (size_t i = 0; i < input_nodes_.size(); ++i) { auto eid = EntryID(input_nodes_[i], 0); - // TODO(@comanic): Support other data lengths. + // TODO(@comaniac): Support other data lengths. size_t offset_in_bytes = entry_out_mem_[eid].second * 4; size_t buffer_size = GetDataSize(*data_entry_[eid]); write_to_dnnl_memory(data_entry_[eid]->data, entry_out_mem_[eid].first, buffer_size, diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h index d716929478f7..da7d8952d150 100644 --- a/src/runtime/contrib/json/json_runtime.h +++ b/src/runtime/contrib/json/json_runtime.h @@ -80,12 +80,10 @@ class JSONRuntimeBase : public ModuleNode { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { CHECK(this->initialized_) << "The module has not been initialized"; - // Set inputs. - this->SetInputs(args); + // Bind argument tensors to data entries. + this->SetInputOutputBuffers(args); // Execute the subgraph. this->Run(); - // Copy result to output buffer. - this->GetOutput(args); }); } else if ("__init_" + this->symbol_name_ == name) { // The function to initialize constant tensors. @@ -134,16 +132,18 @@ class JSONRuntimeBase : public ModuleNode { protected: /*! - * \brief Set up the inputs for inference. + * \brief Set up the input and output buffers by binding their DLTensor pointers to the + * corresponding data entry. * * \param args The packed args. */ - void SetInputs(const TVMArgs& args) { + void SetInputOutputBuffers(const TVMArgs& args) { CHECK_EQ(args.size(), input_var_idx_.size() + outputs_.size()) << "Found mismatch in the number of provided data entryies and required."; - for (size_t i = 0; i < input_var_idx_.size(); i++) { - auto eid = EntryID(input_var_idx_[i], 0); + for (size_t i = 0; i < static_cast(args.size()); i++) { + auto eid = i < input_var_idx_.size() ? EntryID(input_var_idx_[i], 0) + : EntryID(outputs_[i - input_var_idx_.size()]); CHECK(args[i].type_code() == kTVMNDArrayHandle || args[i].type_code() == kTVMDLTensorHandle) << "Expect NDArray or DLTensor as inputs"; @@ -155,70 +155,9 @@ class JSONRuntimeBase : public ModuleNode { arg = args[i].operator DLTensor*(); } - size_t from_size = GetDataSize(*arg); - size_t to_size = GetDataSize(*data_entry_[eid]); - CHECK_EQ(from_size, to_size); - - if (data_entry_[eid]->ctx.device_type == arg->ctx.device_type) { - // Zero copy for input because the tensor is managed by the host. - data_entry_[eid]->data = arg->data; - } else { - NDArray::CopyFromTo(arg, data_entry_[eid]); - } - } - } - - /*! - * \brief Return the results through packed args. - * - * \param args The packed args. - */ - void GetOutput(const TVMArgs& args) { - // Copy result to output buffer. - size_t arg_idx = input_var_idx_.size(); - CHECK_EQ(args.size(), arg_idx + outputs_.size()) - << "Found mismatch in the number of provided data entryies and required."; - - for (size_t i = 0; i < outputs_.size(); i++, arg_idx++) { - auto eid = EntryID(outputs_[i]); - - if (args[arg_idx].type_code() == kTVMDLTensorHandle) { - DLTensor* arg = args[arg_idx]; - NDArray::CopyFromTo(data_entry_[eid], arg); - } else { - CHECK(args[arg_idx].IsObjectRef()); - NDArray arg = args[arg_idx]; - arg.CopyFrom(data_entry_[eid]); - } - } - } - - /*! - * \brief Pre-allocate empty buffers for input and output entries. - * - * \param ctx The context for the pre-allocated buffer. - */ - void AllocateInputOutputBuffer(const DLContext& ctx) { - for (size_t i = 0; i < input_nodes_.size(); ++i) { - auto nid = input_nodes_[i]; - auto shape = nodes_[nid].GetOpShape()[0]; - auto dtype = nodes_[nid].GetOpDataType()[0]; - DLTensor* tensor; - int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes, - ctx.device_type, ctx.device_id, &tensor); - CHECK_EQ(ret, 0) << TVMGetLastError(); - data_entry_[EntryID(nid, 0)] = tensor; - } - - for (size_t i = 0; i < outputs_.size(); ++i) { - auto entry = outputs_[i]; - auto shape = nodes_[entry.id_].GetOpShape()[entry.index_]; - auto dtype = nodes_[entry.id_].GetOpDataType()[entry.index_]; - DLTensor* tensor; - int ret = TVMArrayAlloc(shape.data(), shape.size(), dtype.code, dtype.bits, dtype.lanes, - ctx.device_type, ctx.device_id, &tensor); - CHECK_EQ(ret, 0) << TVMGetLastError(); - data_entry_[EntryID(entry)] = tensor; + // Assign input/output the NDArray pointers to data entry so that we can directly + // read/write host buffers. + data_entry_[eid] = arg; } } @@ -258,14 +197,14 @@ class JSONRuntimeBase : public ModuleNode { } /*! - * \brief Set up the constants/weights for inference. + * \brief Set up the constants/weights for inference by binding their DLTensor pointer to + * the corresponding data entry. * - * \param consts The constant to be filled. + * \param consts A list of constant NDArray to be used. */ void SetupConstants(const Array& consts) { - // Initialize consts for (size_t i = 0; i < consts.size(); ++i) { - consts[i].CopyTo(data_entry_[const_idx_[i]]); + data_entry_[const_idx_[i]] = consts[i].operator->(); } } @@ -313,7 +252,7 @@ class JSONRuntimeBase : public ModuleNode { /*! \brief Output entries. */ std::vector outputs_; /*! \brief Data of that entry. */ - std::vector data_entry_; + std::vector data_entry_; /*! \brief Map the input name to index. */ std::vector input_var_idx_; /*! \brief input const index. */ From 5d37beb2ef16c2cde33fbd77acd6a873dac86c34 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Thu, 25 Jun 2020 15:51:14 +0000 Subject: [PATCH 27/30] address comments --- .../contrib/codegen_json/codegen_json.h | 2 +- src/runtime/contrib/json/json_node.h | 26 +++++++++++++++++++ src/runtime/contrib/json/json_runtime.h | 12 ++++----- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index b9cba3656d80..8bf5fbc013c2 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -206,7 +206,7 @@ class JSONSerializer : public MemoizedExprTranslatorSetNumOutput(tuple_type->fields.size()); } else { const auto* tensor_type = checked_type.as(); - CHECK(tensor_type) << "Expect TensorType, but received: ." << checked_type->GetTypeKey(); + CHECK(tensor_type) << "Expect TensorType, but received: " << checked_type->GetTypeKey(); shape.emplace_back(GetIntShape(tensor_type->shape)); dtype.emplace_back(DType2String(tensor_type->dtype)); ret.push_back(JSONGraphNodeEntry(node_id, 0)); diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h index 53c36b383ea2..7468feb21cb1 100644 --- a/src/runtime/contrib/json/json_node.h +++ b/src/runtime/contrib/json/json_node.h @@ -314,6 +314,32 @@ class JSONGraphNode { namespace dmlc { namespace json { +template +inline bool SameType(const dmlc::any& data) { + return std::type_index(data.type()) == std::type_index(typeid(T)); +} + +template <> +struct Handler> { + inline static void Write(dmlc::JSONWriter* writer, + const std::unordered_map& data) { + for (const auto& kv : data) { + auto k = kv.first; + const dmlc::any& v = kv.second; + if (SameType>(v)) { + writer->WriteObjectKeyValue(k, dmlc::get>(v)); + } else { + LOG(FATAL) << "Not supported"; + } + } + writer->EndObject(); + } + inline static void Read(dmlc::JSONReader* reader, + std::unordered_map* data) { + LOG(FATAL) << "Not implemented"; + } +}; + template <> struct Handler> { inline static void Write(dmlc::JSONWriter* writer, diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h index da7d8952d150..a2769568cf04 100644 --- a/src/runtime/contrib/json/json_runtime.h +++ b/src/runtime/contrib/json/json_runtime.h @@ -18,7 +18,7 @@ */ /*! - * \file src/runtime/json/json_runtime.h + * \file src/runtime/contrib/json/json_runtime.h * \brief Utilities for json runtime. */ @@ -222,7 +222,7 @@ class JSONRuntimeBase : public ModuleNode { } else if (key == "heads") { reader->Read(&outputs_); } else { - LOG(FATAL) << "Unknow key: " << key; + LOG(FATAL) << "Unknown key: " << key; } } } @@ -237,11 +237,11 @@ class JSONRuntimeBase : public ModuleNode { uint32_t NumEntries() const { return node_row_ptr_.back(); } protected: - /* The only subgraph name for this module. */ + /*! \brief The only subgraph name for this module. */ std::string symbol_name_; - /* The graph. */ + /*! \brief The graph. */ std::string graph_json_; - /* The required constant names. */ + /*! \brief The required constant names. */ Array const_names_; /*! \brief The json graph nodes. */ std::vector nodes_; @@ -257,7 +257,7 @@ class JSONRuntimeBase : public ModuleNode { std::vector input_var_idx_; /*! \brief input const index. */ std::vector const_idx_; - /* Indicate if the engine has been initialized. */ + /*! \brief Indicate if the engine has been initialized. */ bool initialized_{false}; }; From 872a60d7250b7151602009ed6fa1dd0d9f39522a Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Fri, 26 Jun 2020 18:34:15 +0000 Subject: [PATCH 28/30] enable ci --- tests/scripts/task_config_build_cpu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh index ce545bde6609..d64bcab7bc8f 100755 --- a/tests/scripts/task_config_build_cpu.sh +++ b/tests/scripts/task_config_build_cpu.sh @@ -29,6 +29,7 @@ echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake echo set\(USE_VM_PROFILER ON\) >> config.cmake echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake +echo set\(USE_DNNL_CODEGEN ON\) >> config.cmake echo set\(USE_LLVM llvm-config-10\) >> config.cmake echo set\(USE_NNPACK ON\) >> config.cmake echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake From 12312f5672afb7b974b68c8c283c94474853500e Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Mon, 29 Jun 2020 20:08:05 +0000 Subject: [PATCH 29/30] address comment; fix bug --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 3 +-- src/runtime/contrib/json/json_runtime.h | 6 +++--- tests/python/relay/test_json_runtime.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index c2747d145857..a79537f46e74 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -441,8 +441,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { runtime::Module DNNLJSONRuntimeCreate(String symbol_name, String graph_json, const Array& const_names) { - auto n = make_object(symbol_name.operator std::string(), - graph_json.operator std::string(), const_names); + auto n = make_object(symbol_name, graph_json, const_names); return runtime::Module(n); } diff --git a/src/runtime/contrib/json/json_runtime.h b/src/runtime/contrib/json/json_runtime.h index a2769568cf04..c4f126e8ccba 100644 --- a/src/runtime/contrib/json/json_runtime.h +++ b/src/runtime/contrib/json/json_runtime.h @@ -204,7 +204,7 @@ class JSONRuntimeBase : public ModuleNode { */ void SetupConstants(const Array& consts) { for (size_t i = 0; i < consts.size(); ++i) { - data_entry_[const_idx_[i]] = consts[i].operator->(); + data_entry_[EntryID(const_idx_[i], 0)] = consts[i].operator->(); } } @@ -253,9 +253,9 @@ class JSONRuntimeBase : public ModuleNode { std::vector outputs_; /*! \brief Data of that entry. */ std::vector data_entry_; - /*! \brief Map the input name to index. */ + /*! \brief Map the input name to node index. */ std::vector input_var_idx_; - /*! \brief input const index. */ + /*! \brief input const node index. */ std::vector const_idx_; /*! \brief Indicate if the engine has been initialized. */ bool initialized_{false}; diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index d15468c6a942..a886692c5838 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -53,7 +53,7 @@ def check_result(mod, # Run the reference result compile_engine.get().clear() - with relay.build_config(opt_level=3): + with tvm.transform.PassContext(opt_level=3): json, lib, param = relay.build(ref_mod, target=target, params=params) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) From b0129049ba3f29c16bf76e22ef1fd05b3be1a0de Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Wed, 1 Jul 2020 16:48:43 +0000 Subject: [PATCH 30/30] address comment --- src/relay/backend/contrib/codegen_json/codegen_json.h | 8 ++++---- src/relay/backend/contrib/dnnl/codegen.cc | 2 +- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/relay/backend/contrib/codegen_json/codegen_json.h b/src/relay/backend/contrib/codegen_json/codegen_json.h index 8bf5fbc013c2..9ed15a88c72a 100644 --- a/src/relay/backend/contrib/codegen_json/codegen_json.h +++ b/src/relay/backend/contrib/codegen_json/codegen_json.h @@ -104,7 +104,7 @@ class OpAttrExtractor : public AttrVisitor { attr.push_back(Fp2String(fm->value)); } else if (const auto* str = (*an)[i].as()) { String s = GetRef(str); - attr.push_back(s.operator std::string()); + attr.push_back(s); } else { LOG(FATAL) << "Not supported type: " << (*an)[i]->GetTypeKey(); } @@ -118,7 +118,7 @@ class OpAttrExtractor : public AttrVisitor { SetNodeAttr(key, std::vector{Fp2String(fm->value)}); } else if (const auto* str = (*value).as()) { String s = GetRef(str); - SetNodeAttr(key, std::vector{s.operator std::string()}); + SetNodeAttr(key, std::vector{s}); } else { LOG(FATAL) << "Not yet supported type: " << (*value)->GetTypeKey() << ": " << *value; } @@ -230,7 +230,7 @@ class JSONSerializer : public MemoizedExprTranslatorGetAttr(attr::kPartitionedFromPattern); CHECK(pattern.defined()); std::vector values; - values.push_back(pattern.value().operator std::string()); + values.push_back(pattern.value()); std::vector attr; attr.emplace_back(values); node->SetAttr("PartitionedFromPattern", attr); @@ -271,7 +271,7 @@ class JSONSerializer : public MemoizedExprTranslatorop.as()) { auto comp = fn->GetAttr(attr::kComposite); CHECK(comp.defined()) << "JSON runtime only supports composite functions."; - name = comp.value().operator std::string(); + name = comp.value(); } else { LOG(FATAL) << "JSON runtime does not support calls to " << cn->op->GetTypeKey(); } diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 9d4041af6395..d5a483d0d112 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -442,7 +442,7 @@ class DNNLJSONSerializer : public backend::contrib::JSONSerializer { } else if (const auto* fn = cn->op.as()) { auto comp = fn->GetAttr(attr::kComposite); CHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions."; - name = comp.value().operator std::string(); + name = comp.value(); if (name == "dnnl.conv2d_bias_relu") { call = GetRootCall(fn->body.as(), 2, {"nn.conv2d", "add", "nn.relu"}); diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index a79537f46e74..bda9f1a44932 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -136,7 +136,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { size_t offset = 0) { auto eid = EntryID(entry); // Since the DNNL memory has been created before calling this function, we assume the entry - // has not yet been bind to the other DNNL memory; otherwise it may have memory leak. + // has not yet been bound to the other DNNL memory; otherwise it may have memory leak. CHECK_EQ(entry_out_mem_.count(eid), 0); // TODO(@comanic): Support other data types (i.e., int8).